{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999603881956823, "eval_steps": 500, "global_step": 12622, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.922360863537334e-05, "grad_norm": 31.868705434213155, "learning_rate": 5.277044854881267e-08, "loss": 2.4055, "step": 1 }, { "epoch": 0.00015844721727074668, "grad_norm": 33.35421203082402, "learning_rate": 1.0554089709762534e-07, "loss": 2.229, "step": 2 }, { "epoch": 0.00023767082590612002, "grad_norm": 30.905772260462495, "learning_rate": 1.5831134564643802e-07, "loss": 2.5515, "step": 3 }, { "epoch": 0.00031689443454149336, "grad_norm": 31.82938654107664, "learning_rate": 2.1108179419525068e-07, "loss": 2.4607, "step": 4 }, { "epoch": 0.0003961180431768667, "grad_norm": 33.89602990583608, "learning_rate": 2.6385224274406334e-07, "loss": 2.7529, "step": 5 }, { "epoch": 0.00047534165181224003, "grad_norm": 30.444966472962584, "learning_rate": 3.1662269129287605e-07, "loss": 2.5701, "step": 6 }, { "epoch": 0.0005545652604476134, "grad_norm": 28.601879017937765, "learning_rate": 3.693931398416887e-07, "loss": 2.3982, "step": 7 }, { "epoch": 0.0006337888690829867, "grad_norm": 28.96128266138133, "learning_rate": 4.2216358839050136e-07, "loss": 2.4097, "step": 8 }, { "epoch": 0.00071301247771836, "grad_norm": 34.74863678489119, "learning_rate": 4.7493403693931397e-07, "loss": 2.4995, "step": 9 }, { "epoch": 0.0007922360863537334, "grad_norm": 27.248425019992244, "learning_rate": 5.277044854881267e-07, "loss": 2.4025, "step": 10 }, { "epoch": 0.0008714596949891067, "grad_norm": 26.19048251774951, "learning_rate": 5.804749340369393e-07, "loss": 2.4338, "step": 11 }, { "epoch": 0.0009506833036244801, "grad_norm": 29.591611096105435, "learning_rate": 6.332453825857521e-07, "loss": 2.4309, "step": 12 }, { "epoch": 0.0010299069122598535, "grad_norm": 28.882358214389345, "learning_rate": 6.860158311345646e-07, "loss": 2.2793, "step": 13 }, { "epoch": 0.0011091305208952267, "grad_norm": 28.514388351231172, "learning_rate": 7.387862796833774e-07, "loss": 2.2069, "step": 14 }, { "epoch": 0.0011883541295306002, "grad_norm": 29.394819635818916, "learning_rate": 7.915567282321901e-07, "loss": 2.2789, "step": 15 }, { "epoch": 0.0012675777381659734, "grad_norm": 28.82636668537433, "learning_rate": 8.443271767810027e-07, "loss": 2.1414, "step": 16 }, { "epoch": 0.0013468013468013469, "grad_norm": 27.172986033880207, "learning_rate": 8.970976253298154e-07, "loss": 2.0737, "step": 17 }, { "epoch": 0.00142602495543672, "grad_norm": 28.9492321515514, "learning_rate": 9.498680738786279e-07, "loss": 2.0167, "step": 18 }, { "epoch": 0.0015052485640720936, "grad_norm": 17.255700208930268, "learning_rate": 1.0026385224274407e-06, "loss": 1.6937, "step": 19 }, { "epoch": 0.0015844721727074668, "grad_norm": 13.912069084573742, "learning_rate": 1.0554089709762534e-06, "loss": 1.7679, "step": 20 }, { "epoch": 0.0016636957813428402, "grad_norm": 14.66426105865668, "learning_rate": 1.108179419525066e-06, "loss": 1.7548, "step": 21 }, { "epoch": 0.0017429193899782135, "grad_norm": 14.210612712641286, "learning_rate": 1.1609498680738787e-06, "loss": 1.7754, "step": 22 }, { "epoch": 0.001822142998613587, "grad_norm": 18.649014993687306, "learning_rate": 1.2137203166226915e-06, "loss": 1.8135, "step": 23 }, { "epoch": 0.0019013666072489601, "grad_norm": 16.151271281666702, "learning_rate": 1.2664907651715042e-06, "loss": 1.6643, "step": 24 }, { "epoch": 0.0019805902158843334, "grad_norm": 21.06738298338953, "learning_rate": 1.3192612137203166e-06, "loss": 1.8694, "step": 25 }, { "epoch": 0.002059813824519707, "grad_norm": 15.419672041356916, "learning_rate": 1.3720316622691293e-06, "loss": 1.5757, "step": 26 }, { "epoch": 0.0021390374331550803, "grad_norm": 15.600724016035702, "learning_rate": 1.4248021108179422e-06, "loss": 1.553, "step": 27 }, { "epoch": 0.0022182610417904535, "grad_norm": 12.229878834112258, "learning_rate": 1.4775725593667548e-06, "loss": 1.4652, "step": 28 }, { "epoch": 0.0022974846504258267, "grad_norm": 10.225961851553592, "learning_rate": 1.5303430079155673e-06, "loss": 1.4839, "step": 29 }, { "epoch": 0.0023767082590612004, "grad_norm": 12.164324439005954, "learning_rate": 1.5831134564643801e-06, "loss": 1.3534, "step": 30 }, { "epoch": 0.0024559318676965736, "grad_norm": 11.063575560268422, "learning_rate": 1.6358839050131928e-06, "loss": 1.4274, "step": 31 }, { "epoch": 0.002535155476331947, "grad_norm": 9.73448058354835, "learning_rate": 1.6886543535620054e-06, "loss": 1.4187, "step": 32 }, { "epoch": 0.00261437908496732, "grad_norm": 8.730204246763352, "learning_rate": 1.7414248021108183e-06, "loss": 1.1519, "step": 33 }, { "epoch": 0.0026936026936026937, "grad_norm": 6.924881468158276, "learning_rate": 1.7941952506596308e-06, "loss": 1.2454, "step": 34 }, { "epoch": 0.002772826302238067, "grad_norm": 6.629579400630702, "learning_rate": 1.8469656992084434e-06, "loss": 1.2072, "step": 35 }, { "epoch": 0.00285204991087344, "grad_norm": 6.657573499106914, "learning_rate": 1.8997361477572559e-06, "loss": 1.046, "step": 36 }, { "epoch": 0.0029312735195088134, "grad_norm": 8.025208743375664, "learning_rate": 1.9525065963060687e-06, "loss": 1.2457, "step": 37 }, { "epoch": 0.003010497128144187, "grad_norm": 6.679507832590588, "learning_rate": 2.0052770448548814e-06, "loss": 1.2994, "step": 38 }, { "epoch": 0.0030897207367795603, "grad_norm": 6.448298994490477, "learning_rate": 2.058047493403694e-06, "loss": 1.2889, "step": 39 }, { "epoch": 0.0031689443454149336, "grad_norm": 6.8153830133577005, "learning_rate": 2.1108179419525067e-06, "loss": 1.142, "step": 40 }, { "epoch": 0.003248167954050307, "grad_norm": 5.655776676536306, "learning_rate": 2.1635883905013194e-06, "loss": 1.1317, "step": 41 }, { "epoch": 0.0033273915626856805, "grad_norm": 7.634046937784093, "learning_rate": 2.216358839050132e-06, "loss": 0.9966, "step": 42 }, { "epoch": 0.0034066151713210537, "grad_norm": 6.154130288982867, "learning_rate": 2.2691292875989447e-06, "loss": 1.0714, "step": 43 }, { "epoch": 0.003485838779956427, "grad_norm": 6.599696603402416, "learning_rate": 2.3218997361477573e-06, "loss": 1.1753, "step": 44 }, { "epoch": 0.0035650623885918, "grad_norm": 6.1045473837052135, "learning_rate": 2.37467018469657e-06, "loss": 1.0945, "step": 45 }, { "epoch": 0.003644285997227174, "grad_norm": 6.639820010209079, "learning_rate": 2.427440633245383e-06, "loss": 1.2281, "step": 46 }, { "epoch": 0.003723509605862547, "grad_norm": 6.158858746048843, "learning_rate": 2.4802110817941953e-06, "loss": 1.1257, "step": 47 }, { "epoch": 0.0038027332144979203, "grad_norm": 6.448066336502261, "learning_rate": 2.5329815303430084e-06, "loss": 1.1324, "step": 48 }, { "epoch": 0.0038819568231332935, "grad_norm": 6.262066262387423, "learning_rate": 2.5857519788918206e-06, "loss": 1.1343, "step": 49 }, { "epoch": 0.003961180431768667, "grad_norm": 5.243397804131298, "learning_rate": 2.6385224274406333e-06, "loss": 0.9142, "step": 50 }, { "epoch": 0.00404040404040404, "grad_norm": 5.967104448204892, "learning_rate": 2.6912928759894464e-06, "loss": 1.0485, "step": 51 }, { "epoch": 0.004119627649039414, "grad_norm": 5.66949631539659, "learning_rate": 2.7440633245382586e-06, "loss": 1.1379, "step": 52 }, { "epoch": 0.004198851257674787, "grad_norm": 5.474294930789325, "learning_rate": 2.7968337730870717e-06, "loss": 1.0624, "step": 53 }, { "epoch": 0.0042780748663101605, "grad_norm": 4.8417338949953255, "learning_rate": 2.8496042216358843e-06, "loss": 1.1212, "step": 54 }, { "epoch": 0.004357298474945534, "grad_norm": 5.087923167229228, "learning_rate": 2.9023746701846966e-06, "loss": 0.9097, "step": 55 }, { "epoch": 0.004436522083580907, "grad_norm": 5.316728911094931, "learning_rate": 2.9551451187335096e-06, "loss": 0.9949, "step": 56 }, { "epoch": 0.004515745692216281, "grad_norm": 5.218219318853345, "learning_rate": 3.0079155672823223e-06, "loss": 0.9964, "step": 57 }, { "epoch": 0.0045949693008516534, "grad_norm": 5.0674909088177635, "learning_rate": 3.0606860158311345e-06, "loss": 0.9277, "step": 58 }, { "epoch": 0.004674192909487027, "grad_norm": 5.4550208422255535, "learning_rate": 3.1134564643799476e-06, "loss": 0.9852, "step": 59 }, { "epoch": 0.004753416518122401, "grad_norm": 5.001551321481967, "learning_rate": 3.1662269129287603e-06, "loss": 0.9319, "step": 60 }, { "epoch": 0.004832640126757774, "grad_norm": 5.447524685496177, "learning_rate": 3.2189973614775725e-06, "loss": 1.0637, "step": 61 }, { "epoch": 0.004911863735393147, "grad_norm": 5.218168830884699, "learning_rate": 3.2717678100263856e-06, "loss": 0.9396, "step": 62 }, { "epoch": 0.004991087344028521, "grad_norm": 4.880088882954173, "learning_rate": 3.3245382585751982e-06, "loss": 1.0132, "step": 63 }, { "epoch": 0.005070310952663894, "grad_norm": 5.0825664489953315, "learning_rate": 3.377308707124011e-06, "loss": 1.0607, "step": 64 }, { "epoch": 0.005149534561299267, "grad_norm": 4.47382446788315, "learning_rate": 3.4300791556728235e-06, "loss": 1.0139, "step": 65 }, { "epoch": 0.00522875816993464, "grad_norm": 5.145244155377481, "learning_rate": 3.4828496042216366e-06, "loss": 0.8655, "step": 66 }, { "epoch": 0.005307981778570014, "grad_norm": 5.083565443816077, "learning_rate": 3.535620052770449e-06, "loss": 1.017, "step": 67 }, { "epoch": 0.0053872053872053875, "grad_norm": 4.802988575118353, "learning_rate": 3.5883905013192615e-06, "loss": 0.9873, "step": 68 }, { "epoch": 0.00546642899584076, "grad_norm": 4.240379484307994, "learning_rate": 3.6411609498680746e-06, "loss": 0.9657, "step": 69 }, { "epoch": 0.005545652604476134, "grad_norm": 4.5666670004609236, "learning_rate": 3.693931398416887e-06, "loss": 0.9206, "step": 70 }, { "epoch": 0.005624876213111508, "grad_norm": 4.195885760168908, "learning_rate": 3.7467018469656995e-06, "loss": 1.0085, "step": 71 }, { "epoch": 0.00570409982174688, "grad_norm": 5.520513458463618, "learning_rate": 3.7994722955145117e-06, "loss": 1.1191, "step": 72 }, { "epoch": 0.005783323430382254, "grad_norm": 4.699359191613322, "learning_rate": 3.852242744063324e-06, "loss": 0.9894, "step": 73 }, { "epoch": 0.005862547039017627, "grad_norm": 5.172166281666409, "learning_rate": 3.9050131926121375e-06, "loss": 0.9746, "step": 74 }, { "epoch": 0.0059417706476530005, "grad_norm": 4.69763964850509, "learning_rate": 3.95778364116095e-06, "loss": 0.8907, "step": 75 }, { "epoch": 0.006020994256288374, "grad_norm": 4.9958513834839415, "learning_rate": 4.010554089709763e-06, "loss": 1.0053, "step": 76 }, { "epoch": 0.006100217864923747, "grad_norm": 4.774640599323078, "learning_rate": 4.063324538258576e-06, "loss": 0.9138, "step": 77 }, { "epoch": 0.006179441473559121, "grad_norm": 4.358778391915449, "learning_rate": 4.116094986807388e-06, "loss": 0.9861, "step": 78 }, { "epoch": 0.006258665082194494, "grad_norm": 4.605810160381926, "learning_rate": 4.168865435356201e-06, "loss": 0.9896, "step": 79 }, { "epoch": 0.006337888690829867, "grad_norm": 4.809837933134756, "learning_rate": 4.221635883905013e-06, "loss": 0.967, "step": 80 }, { "epoch": 0.006417112299465241, "grad_norm": 4.80021330417163, "learning_rate": 4.274406332453826e-06, "loss": 0.8566, "step": 81 }, { "epoch": 0.006496335908100614, "grad_norm": 5.462415067634996, "learning_rate": 4.327176781002639e-06, "loss": 0.8521, "step": 82 }, { "epoch": 0.006575559516735987, "grad_norm": 5.719895957749736, "learning_rate": 4.379947229551452e-06, "loss": 0.9843, "step": 83 }, { "epoch": 0.006654783125371361, "grad_norm": 4.544961628485232, "learning_rate": 4.432717678100264e-06, "loss": 0.9193, "step": 84 }, { "epoch": 0.006734006734006734, "grad_norm": 4.467983498834651, "learning_rate": 4.485488126649077e-06, "loss": 0.9251, "step": 85 }, { "epoch": 0.006813230342642107, "grad_norm": 4.102025313306762, "learning_rate": 4.538258575197889e-06, "loss": 0.8745, "step": 86 }, { "epoch": 0.006892453951277481, "grad_norm": 4.371674476843521, "learning_rate": 4.5910290237467024e-06, "loss": 0.9766, "step": 87 }, { "epoch": 0.006971677559912854, "grad_norm": 4.725585073534907, "learning_rate": 4.643799472295515e-06, "loss": 0.9342, "step": 88 }, { "epoch": 0.0070509011685482275, "grad_norm": 5.077637280352382, "learning_rate": 4.696569920844328e-06, "loss": 0.9327, "step": 89 }, { "epoch": 0.0071301247771836, "grad_norm": 4.659863741989866, "learning_rate": 4.74934036939314e-06, "loss": 0.8369, "step": 90 }, { "epoch": 0.007209348385818974, "grad_norm": 4.533011763036514, "learning_rate": 4.802110817941953e-06, "loss": 0.8696, "step": 91 }, { "epoch": 0.007288571994454348, "grad_norm": 4.65339049188555, "learning_rate": 4.854881266490766e-06, "loss": 1.0413, "step": 92 }, { "epoch": 0.00736779560308972, "grad_norm": 4.649252972991806, "learning_rate": 4.907651715039578e-06, "loss": 0.8295, "step": 93 }, { "epoch": 0.007447019211725094, "grad_norm": 5.084528879374284, "learning_rate": 4.960422163588391e-06, "loss": 0.947, "step": 94 }, { "epoch": 0.007526242820360468, "grad_norm": 5.365711702599791, "learning_rate": 5.013192612137203e-06, "loss": 0.9293, "step": 95 }, { "epoch": 0.0076054664289958405, "grad_norm": 5.046350203333127, "learning_rate": 5.065963060686017e-06, "loss": 0.79, "step": 96 }, { "epoch": 0.007684690037631214, "grad_norm": 5.3954948399293094, "learning_rate": 5.118733509234829e-06, "loss": 0.9778, "step": 97 }, { "epoch": 0.007763913646266587, "grad_norm": 4.688927569396289, "learning_rate": 5.171503957783641e-06, "loss": 0.8867, "step": 98 }, { "epoch": 0.00784313725490196, "grad_norm": 4.388795302213495, "learning_rate": 5.224274406332454e-06, "loss": 0.8771, "step": 99 }, { "epoch": 0.007922360863537333, "grad_norm": 3.8591469175181463, "learning_rate": 5.2770448548812665e-06, "loss": 0.8644, "step": 100 }, { "epoch": 0.008001584472172708, "grad_norm": 4.641112549258453, "learning_rate": 5.32981530343008e-06, "loss": 0.8712, "step": 101 }, { "epoch": 0.00808080808080808, "grad_norm": 4.555516172471135, "learning_rate": 5.382585751978893e-06, "loss": 0.9133, "step": 102 }, { "epoch": 0.008160031689443454, "grad_norm": 4.443942171385705, "learning_rate": 5.435356200527705e-06, "loss": 0.9634, "step": 103 }, { "epoch": 0.008239255298078828, "grad_norm": 4.106083206477799, "learning_rate": 5.488126649076517e-06, "loss": 0.9857, "step": 104 }, { "epoch": 0.008318478906714201, "grad_norm": 4.373637779113837, "learning_rate": 5.540897097625331e-06, "loss": 0.934, "step": 105 }, { "epoch": 0.008397702515349574, "grad_norm": 4.025247144093253, "learning_rate": 5.593667546174143e-06, "loss": 0.8653, "step": 106 }, { "epoch": 0.008476926123984948, "grad_norm": 4.11317321322005, "learning_rate": 5.6464379947229556e-06, "loss": 0.7644, "step": 107 }, { "epoch": 0.008556149732620321, "grad_norm": 5.3484256844048375, "learning_rate": 5.699208443271769e-06, "loss": 0.9789, "step": 108 }, { "epoch": 0.008635373341255694, "grad_norm": 4.572732992399881, "learning_rate": 5.751978891820581e-06, "loss": 0.8207, "step": 109 }, { "epoch": 0.008714596949891068, "grad_norm": 5.321804779554728, "learning_rate": 5.804749340369393e-06, "loss": 0.8386, "step": 110 }, { "epoch": 0.008793820558526441, "grad_norm": 4.244600615493288, "learning_rate": 5.857519788918207e-06, "loss": 0.7825, "step": 111 }, { "epoch": 0.008873044167161814, "grad_norm": 4.020909635380337, "learning_rate": 5.910290237467019e-06, "loss": 0.8229, "step": 112 }, { "epoch": 0.008952267775797187, "grad_norm": 5.803912944648255, "learning_rate": 5.9630606860158315e-06, "loss": 1.015, "step": 113 }, { "epoch": 0.009031491384432561, "grad_norm": 4.261138899479962, "learning_rate": 6.015831134564645e-06, "loss": 0.7781, "step": 114 }, { "epoch": 0.009110714993067934, "grad_norm": 4.61631624738624, "learning_rate": 6.068601583113457e-06, "loss": 0.8741, "step": 115 }, { "epoch": 0.009189938601703307, "grad_norm": 4.8776197021487775, "learning_rate": 6.121372031662269e-06, "loss": 0.8966, "step": 116 }, { "epoch": 0.009269162210338681, "grad_norm": 4.086195762521685, "learning_rate": 6.174142480211083e-06, "loss": 1.0242, "step": 117 }, { "epoch": 0.009348385818974054, "grad_norm": 5.630709412798026, "learning_rate": 6.226912928759895e-06, "loss": 0.9456, "step": 118 }, { "epoch": 0.009427609427609427, "grad_norm": 4.588901704970874, "learning_rate": 6.2796833773087074e-06, "loss": 0.8328, "step": 119 }, { "epoch": 0.009506833036244802, "grad_norm": 5.393860711539961, "learning_rate": 6.3324538258575205e-06, "loss": 0.9444, "step": 120 }, { "epoch": 0.009586056644880174, "grad_norm": 4.441173614780535, "learning_rate": 6.385224274406333e-06, "loss": 0.8644, "step": 121 }, { "epoch": 0.009665280253515547, "grad_norm": 4.494882352693371, "learning_rate": 6.437994722955145e-06, "loss": 0.8996, "step": 122 }, { "epoch": 0.009744503862150922, "grad_norm": 4.905180828585797, "learning_rate": 6.490765171503959e-06, "loss": 0.8668, "step": 123 }, { "epoch": 0.009823727470786294, "grad_norm": 5.089005617514571, "learning_rate": 6.543535620052771e-06, "loss": 0.9542, "step": 124 }, { "epoch": 0.009902951079421667, "grad_norm": 6.375768513761875, "learning_rate": 6.596306068601583e-06, "loss": 0.9587, "step": 125 }, { "epoch": 0.009982174688057042, "grad_norm": 5.0498298372319335, "learning_rate": 6.6490765171503965e-06, "loss": 0.9445, "step": 126 }, { "epoch": 0.010061398296692415, "grad_norm": 4.355769440314316, "learning_rate": 6.701846965699209e-06, "loss": 0.7922, "step": 127 }, { "epoch": 0.010140621905327787, "grad_norm": 4.156816938903829, "learning_rate": 6.754617414248022e-06, "loss": 0.8037, "step": 128 }, { "epoch": 0.01021984551396316, "grad_norm": 4.121763608581514, "learning_rate": 6.807387862796835e-06, "loss": 0.8335, "step": 129 }, { "epoch": 0.010299069122598535, "grad_norm": 4.04306955201829, "learning_rate": 6.860158311345647e-06, "loss": 0.8892, "step": 130 }, { "epoch": 0.010378292731233908, "grad_norm": 4.2830441388875045, "learning_rate": 6.912928759894459e-06, "loss": 0.8526, "step": 131 }, { "epoch": 0.01045751633986928, "grad_norm": 4.322006369790227, "learning_rate": 6.965699208443273e-06, "loss": 0.9522, "step": 132 }, { "epoch": 0.010536739948504655, "grad_norm": 4.014897802602603, "learning_rate": 7.0184696569920855e-06, "loss": 0.9428, "step": 133 }, { "epoch": 0.010615963557140028, "grad_norm": 4.168727587985997, "learning_rate": 7.071240105540898e-06, "loss": 0.9373, "step": 134 }, { "epoch": 0.0106951871657754, "grad_norm": 4.904887657583289, "learning_rate": 7.124010554089711e-06, "loss": 0.9598, "step": 135 }, { "epoch": 0.010774410774410775, "grad_norm": 5.235074439299815, "learning_rate": 7.176781002638523e-06, "loss": 0.8605, "step": 136 }, { "epoch": 0.010853634383046148, "grad_norm": 3.918901785131192, "learning_rate": 7.229551451187335e-06, "loss": 0.9594, "step": 137 }, { "epoch": 0.01093285799168152, "grad_norm": 5.361671346898265, "learning_rate": 7.282321899736149e-06, "loss": 0.9146, "step": 138 }, { "epoch": 0.011012081600316895, "grad_norm": 4.72013151628534, "learning_rate": 7.3350923482849614e-06, "loss": 0.888, "step": 139 }, { "epoch": 0.011091305208952268, "grad_norm": 4.310702462486159, "learning_rate": 7.387862796833774e-06, "loss": 0.8192, "step": 140 }, { "epoch": 0.01117052881758764, "grad_norm": 4.154478434577734, "learning_rate": 7.440633245382587e-06, "loss": 0.7708, "step": 141 }, { "epoch": 0.011249752426223015, "grad_norm": 3.8639612804964796, "learning_rate": 7.493403693931399e-06, "loss": 0.7781, "step": 142 }, { "epoch": 0.011328976034858388, "grad_norm": 4.124867136969742, "learning_rate": 7.546174142480211e-06, "loss": 0.785, "step": 143 }, { "epoch": 0.01140819964349376, "grad_norm": 4.248137504752284, "learning_rate": 7.5989445910290234e-06, "loss": 0.9768, "step": 144 }, { "epoch": 0.011487423252129134, "grad_norm": 4.226461559466392, "learning_rate": 7.651715039577837e-06, "loss": 0.8604, "step": 145 }, { "epoch": 0.011566646860764508, "grad_norm": 4.4624465699300115, "learning_rate": 7.704485488126649e-06, "loss": 0.8059, "step": 146 }, { "epoch": 0.011645870469399881, "grad_norm": 3.6667829398885305, "learning_rate": 7.757255936675462e-06, "loss": 0.8426, "step": 147 }, { "epoch": 0.011725094078035254, "grad_norm": 5.001089157790554, "learning_rate": 7.810026385224275e-06, "loss": 0.8442, "step": 148 }, { "epoch": 0.011804317686670628, "grad_norm": 4.330793339427337, "learning_rate": 7.862796833773088e-06, "loss": 0.9529, "step": 149 }, { "epoch": 0.011883541295306001, "grad_norm": 3.67472160005844, "learning_rate": 7.9155672823219e-06, "loss": 0.8692, "step": 150 }, { "epoch": 0.011962764903941374, "grad_norm": 3.845005744976269, "learning_rate": 7.968337730870712e-06, "loss": 0.7479, "step": 151 }, { "epoch": 0.012041988512576748, "grad_norm": 4.3384613812152155, "learning_rate": 8.021108179419526e-06, "loss": 0.7601, "step": 152 }, { "epoch": 0.012121212121212121, "grad_norm": 4.1343625213704405, "learning_rate": 8.073878627968339e-06, "loss": 0.8503, "step": 153 }, { "epoch": 0.012200435729847494, "grad_norm": 4.2433093058821045, "learning_rate": 8.126649076517152e-06, "loss": 0.7726, "step": 154 }, { "epoch": 0.012279659338482869, "grad_norm": 3.45255082771628, "learning_rate": 8.179419525065963e-06, "loss": 0.747, "step": 155 }, { "epoch": 0.012358882947118241, "grad_norm": 4.341564180857874, "learning_rate": 8.232189973614776e-06, "loss": 0.869, "step": 156 }, { "epoch": 0.012438106555753614, "grad_norm": 3.7577045631718127, "learning_rate": 8.28496042216359e-06, "loss": 0.6547, "step": 157 }, { "epoch": 0.012517330164388989, "grad_norm": 5.513269080787856, "learning_rate": 8.337730870712402e-06, "loss": 0.8728, "step": 158 }, { "epoch": 0.012596553773024361, "grad_norm": 4.37882606551242, "learning_rate": 8.390501319261214e-06, "loss": 0.8575, "step": 159 }, { "epoch": 0.012675777381659734, "grad_norm": 4.388520745167087, "learning_rate": 8.443271767810027e-06, "loss": 1.066, "step": 160 }, { "epoch": 0.012755000990295109, "grad_norm": 4.250390412888382, "learning_rate": 8.49604221635884e-06, "loss": 0.8017, "step": 161 }, { "epoch": 0.012834224598930482, "grad_norm": 3.9454617049590857, "learning_rate": 8.548812664907651e-06, "loss": 0.8749, "step": 162 }, { "epoch": 0.012913448207565854, "grad_norm": 4.430707449945308, "learning_rate": 8.601583113456466e-06, "loss": 0.6564, "step": 163 }, { "epoch": 0.012992671816201227, "grad_norm": 3.9202256632045724, "learning_rate": 8.654353562005277e-06, "loss": 0.8393, "step": 164 }, { "epoch": 0.013071895424836602, "grad_norm": 4.268136791602566, "learning_rate": 8.70712401055409e-06, "loss": 0.9371, "step": 165 }, { "epoch": 0.013151119033471975, "grad_norm": 3.9162695096517184, "learning_rate": 8.759894459102904e-06, "loss": 0.7281, "step": 166 }, { "epoch": 0.013230342642107347, "grad_norm": 4.553388186923074, "learning_rate": 8.812664907651715e-06, "loss": 0.8769, "step": 167 }, { "epoch": 0.013309566250742722, "grad_norm": 3.839789189950159, "learning_rate": 8.865435356200528e-06, "loss": 0.7697, "step": 168 }, { "epoch": 0.013388789859378095, "grad_norm": 3.8145935609705197, "learning_rate": 8.918205804749341e-06, "loss": 0.7364, "step": 169 }, { "epoch": 0.013468013468013467, "grad_norm": 4.268096339566533, "learning_rate": 8.970976253298154e-06, "loss": 0.9221, "step": 170 }, { "epoch": 0.013547237076648842, "grad_norm": 4.550504129757893, "learning_rate": 9.023746701846966e-06, "loss": 0.8489, "step": 171 }, { "epoch": 0.013626460685284215, "grad_norm": 4.283525417991337, "learning_rate": 9.076517150395779e-06, "loss": 0.9542, "step": 172 }, { "epoch": 0.013705684293919588, "grad_norm": 4.3678315522937385, "learning_rate": 9.129287598944592e-06, "loss": 0.8868, "step": 173 }, { "epoch": 0.013784907902554962, "grad_norm": 3.9556925817923583, "learning_rate": 9.182058047493405e-06, "loss": 0.81, "step": 174 }, { "epoch": 0.013864131511190335, "grad_norm": 3.704795715263707, "learning_rate": 9.234828496042218e-06, "loss": 0.8582, "step": 175 }, { "epoch": 0.013943355119825708, "grad_norm": 4.170391413752929, "learning_rate": 9.28759894459103e-06, "loss": 0.8575, "step": 176 }, { "epoch": 0.014022578728461082, "grad_norm": 3.7131669252291637, "learning_rate": 9.340369393139842e-06, "loss": 0.8142, "step": 177 }, { "epoch": 0.014101802337096455, "grad_norm": 3.4994250961024402, "learning_rate": 9.393139841688655e-06, "loss": 0.8888, "step": 178 }, { "epoch": 0.014181025945731828, "grad_norm": 4.2069622125317325, "learning_rate": 9.445910290237469e-06, "loss": 0.8666, "step": 179 }, { "epoch": 0.0142602495543672, "grad_norm": 3.9202895838726217, "learning_rate": 9.49868073878628e-06, "loss": 0.6881, "step": 180 }, { "epoch": 0.014339473163002575, "grad_norm": 3.795816921822793, "learning_rate": 9.551451187335093e-06, "loss": 0.9302, "step": 181 }, { "epoch": 0.014418696771637948, "grad_norm": 3.857082471296832, "learning_rate": 9.604221635883906e-06, "loss": 0.7497, "step": 182 }, { "epoch": 0.01449792038027332, "grad_norm": 3.425914907611555, "learning_rate": 9.656992084432717e-06, "loss": 0.74, "step": 183 }, { "epoch": 0.014577143988908695, "grad_norm": 4.827477679594857, "learning_rate": 9.709762532981532e-06, "loss": 0.9002, "step": 184 }, { "epoch": 0.014656367597544068, "grad_norm": 4.403657129664381, "learning_rate": 9.762532981530344e-06, "loss": 0.8277, "step": 185 }, { "epoch": 0.01473559120617944, "grad_norm": 3.837542836735528, "learning_rate": 9.815303430079157e-06, "loss": 0.7833, "step": 186 }, { "epoch": 0.014814814814814815, "grad_norm": 4.244820929131236, "learning_rate": 9.86807387862797e-06, "loss": 0.9134, "step": 187 }, { "epoch": 0.014894038423450188, "grad_norm": 3.492855668523716, "learning_rate": 9.920844327176781e-06, "loss": 0.8036, "step": 188 }, { "epoch": 0.014973262032085561, "grad_norm": 3.611719510210302, "learning_rate": 9.973614775725594e-06, "loss": 0.7717, "step": 189 }, { "epoch": 0.015052485640720936, "grad_norm": 3.741597374680162, "learning_rate": 1.0026385224274406e-05, "loss": 0.9001, "step": 190 }, { "epoch": 0.015131709249356308, "grad_norm": 4.527848826983816, "learning_rate": 1.007915567282322e-05, "loss": 0.827, "step": 191 }, { "epoch": 0.015210932857991681, "grad_norm": 3.354860688994162, "learning_rate": 1.0131926121372034e-05, "loss": 0.6931, "step": 192 }, { "epoch": 0.015290156466627056, "grad_norm": 4.219651178061965, "learning_rate": 1.0184696569920845e-05, "loss": 0.8936, "step": 193 }, { "epoch": 0.015369380075262428, "grad_norm": 3.6376167818913494, "learning_rate": 1.0237467018469658e-05, "loss": 0.7386, "step": 194 }, { "epoch": 0.015448603683897801, "grad_norm": 3.5033686642392685, "learning_rate": 1.0290237467018471e-05, "loss": 0.7582, "step": 195 }, { "epoch": 0.015527827292533174, "grad_norm": 4.679814278475639, "learning_rate": 1.0343007915567282e-05, "loss": 0.884, "step": 196 }, { "epoch": 0.015607050901168549, "grad_norm": 3.8639364280687487, "learning_rate": 1.0395778364116096e-05, "loss": 0.8326, "step": 197 }, { "epoch": 0.01568627450980392, "grad_norm": 3.634592082543115, "learning_rate": 1.0448548812664909e-05, "loss": 0.7581, "step": 198 }, { "epoch": 0.015765498118439296, "grad_norm": 4.118737206191648, "learning_rate": 1.050131926121372e-05, "loss": 0.7847, "step": 199 }, { "epoch": 0.015844721727074667, "grad_norm": 3.584822138410341, "learning_rate": 1.0554089709762533e-05, "loss": 0.6461, "step": 200 }, { "epoch": 0.01592394533571004, "grad_norm": 3.703795809615698, "learning_rate": 1.0606860158311348e-05, "loss": 0.7239, "step": 201 }, { "epoch": 0.016003168944345416, "grad_norm": 3.5727922420656153, "learning_rate": 1.065963060686016e-05, "loss": 0.8866, "step": 202 }, { "epoch": 0.016082392552980787, "grad_norm": 3.723572236030753, "learning_rate": 1.0712401055408972e-05, "loss": 0.895, "step": 203 }, { "epoch": 0.01616161616161616, "grad_norm": 4.3724041554593445, "learning_rate": 1.0765171503957785e-05, "loss": 0.7314, "step": 204 }, { "epoch": 0.016240839770251536, "grad_norm": 3.5208260272751803, "learning_rate": 1.0817941952506597e-05, "loss": 0.8545, "step": 205 }, { "epoch": 0.016320063378886907, "grad_norm": 3.6578900613380094, "learning_rate": 1.087071240105541e-05, "loss": 0.6679, "step": 206 }, { "epoch": 0.01639928698752228, "grad_norm": 4.5009170735929755, "learning_rate": 1.0923482849604223e-05, "loss": 0.7741, "step": 207 }, { "epoch": 0.016478510596157656, "grad_norm": 3.4412113654885594, "learning_rate": 1.0976253298153034e-05, "loss": 0.7697, "step": 208 }, { "epoch": 0.016557734204793027, "grad_norm": 4.514535209752686, "learning_rate": 1.1029023746701847e-05, "loss": 0.9767, "step": 209 }, { "epoch": 0.016636957813428402, "grad_norm": 4.138695914329832, "learning_rate": 1.1081794195250662e-05, "loss": 0.819, "step": 210 }, { "epoch": 0.016716181422063776, "grad_norm": 3.5464001325184724, "learning_rate": 1.1134564643799472e-05, "loss": 0.7432, "step": 211 }, { "epoch": 0.016795405030699147, "grad_norm": 3.268689749462536, "learning_rate": 1.1187335092348287e-05, "loss": 0.7539, "step": 212 }, { "epoch": 0.016874628639334522, "grad_norm": 3.6449475341487245, "learning_rate": 1.12401055408971e-05, "loss": 0.7753, "step": 213 }, { "epoch": 0.016953852247969897, "grad_norm": 4.216205799069353, "learning_rate": 1.1292875989445911e-05, "loss": 0.8787, "step": 214 }, { "epoch": 0.017033075856605268, "grad_norm": 4.299724070076182, "learning_rate": 1.1345646437994724e-05, "loss": 0.8365, "step": 215 }, { "epoch": 0.017112299465240642, "grad_norm": 3.74584938994572, "learning_rate": 1.1398416886543537e-05, "loss": 0.8036, "step": 216 }, { "epoch": 0.017191523073876017, "grad_norm": 4.077208167309988, "learning_rate": 1.1451187335092349e-05, "loss": 0.7797, "step": 217 }, { "epoch": 0.017270746682511388, "grad_norm": 3.565018608216613, "learning_rate": 1.1503957783641162e-05, "loss": 0.671, "step": 218 }, { "epoch": 0.017349970291146762, "grad_norm": 3.8567431211721, "learning_rate": 1.1556728232189975e-05, "loss": 0.7822, "step": 219 }, { "epoch": 0.017429193899782137, "grad_norm": 3.745620195928399, "learning_rate": 1.1609498680738786e-05, "loss": 0.8644, "step": 220 }, { "epoch": 0.017508417508417508, "grad_norm": 3.8005079906268007, "learning_rate": 1.16622691292876e-05, "loss": 0.8291, "step": 221 }, { "epoch": 0.017587641117052882, "grad_norm": 3.7551160204839547, "learning_rate": 1.1715039577836414e-05, "loss": 0.8835, "step": 222 }, { "epoch": 0.017666864725688253, "grad_norm": 3.7167090708993857, "learning_rate": 1.1767810026385225e-05, "loss": 0.8747, "step": 223 }, { "epoch": 0.017746088334323628, "grad_norm": 3.2585918653928063, "learning_rate": 1.1820580474934039e-05, "loss": 0.8071, "step": 224 }, { "epoch": 0.017825311942959002, "grad_norm": 3.7416082846867003, "learning_rate": 1.1873350923482852e-05, "loss": 0.6964, "step": 225 }, { "epoch": 0.017904535551594374, "grad_norm": 3.7342682163534016, "learning_rate": 1.1926121372031663e-05, "loss": 0.723, "step": 226 }, { "epoch": 0.017983759160229748, "grad_norm": 3.506114113377626, "learning_rate": 1.1978891820580476e-05, "loss": 0.7236, "step": 227 }, { "epoch": 0.018062982768865123, "grad_norm": 4.026296434389851, "learning_rate": 1.203166226912929e-05, "loss": 0.7678, "step": 228 }, { "epoch": 0.018142206377500494, "grad_norm": 4.059294464998192, "learning_rate": 1.20844327176781e-05, "loss": 0.9026, "step": 229 }, { "epoch": 0.018221429986135868, "grad_norm": 3.8225269226973237, "learning_rate": 1.2137203166226914e-05, "loss": 0.8269, "step": 230 }, { "epoch": 0.018300653594771243, "grad_norm": 4.168487101336105, "learning_rate": 1.2189973614775727e-05, "loss": 0.7832, "step": 231 }, { "epoch": 0.018379877203406614, "grad_norm": 4.366470054616233, "learning_rate": 1.2242744063324538e-05, "loss": 0.7617, "step": 232 }, { "epoch": 0.01845910081204199, "grad_norm": 4.157728193933643, "learning_rate": 1.2295514511873353e-05, "loss": 0.8242, "step": 233 }, { "epoch": 0.018538324420677363, "grad_norm": 3.5409903145818107, "learning_rate": 1.2348284960422166e-05, "loss": 0.7008, "step": 234 }, { "epoch": 0.018617548029312734, "grad_norm": 3.9399017253064925, "learning_rate": 1.2401055408970977e-05, "loss": 0.7594, "step": 235 }, { "epoch": 0.01869677163794811, "grad_norm": 4.072821094514031, "learning_rate": 1.245382585751979e-05, "loss": 0.794, "step": 236 }, { "epoch": 0.018775995246583483, "grad_norm": 3.587038963862682, "learning_rate": 1.2506596306068604e-05, "loss": 0.7905, "step": 237 }, { "epoch": 0.018855218855218854, "grad_norm": 4.222390342873257, "learning_rate": 1.2559366754617415e-05, "loss": 0.7026, "step": 238 }, { "epoch": 0.01893444246385423, "grad_norm": 3.6227628867044306, "learning_rate": 1.2612137203166228e-05, "loss": 0.7938, "step": 239 }, { "epoch": 0.019013666072489603, "grad_norm": 3.9591522550380924, "learning_rate": 1.2664907651715041e-05, "loss": 0.7552, "step": 240 }, { "epoch": 0.019092889681124974, "grad_norm": 3.465839747417068, "learning_rate": 1.2717678100263852e-05, "loss": 0.7554, "step": 241 }, { "epoch": 0.01917211328976035, "grad_norm": 5.25631605275512, "learning_rate": 1.2770448548812666e-05, "loss": 0.8187, "step": 242 }, { "epoch": 0.019251336898395723, "grad_norm": 4.231239499070505, "learning_rate": 1.282321899736148e-05, "loss": 0.6708, "step": 243 }, { "epoch": 0.019330560507031094, "grad_norm": 4.322833233877656, "learning_rate": 1.287598944591029e-05, "loss": 0.691, "step": 244 }, { "epoch": 0.01940978411566647, "grad_norm": 4.853147718334036, "learning_rate": 1.2928759894459105e-05, "loss": 0.78, "step": 245 }, { "epoch": 0.019489007724301843, "grad_norm": 3.7149811759579805, "learning_rate": 1.2981530343007918e-05, "loss": 0.8873, "step": 246 }, { "epoch": 0.019568231332937214, "grad_norm": 4.039438627767664, "learning_rate": 1.303430079155673e-05, "loss": 0.9019, "step": 247 }, { "epoch": 0.01964745494157259, "grad_norm": 4.334229332140161, "learning_rate": 1.3087071240105542e-05, "loss": 0.8289, "step": 248 }, { "epoch": 0.019726678550207963, "grad_norm": 3.8079823489206848, "learning_rate": 1.3139841688654355e-05, "loss": 0.7061, "step": 249 }, { "epoch": 0.019805902158843335, "grad_norm": 3.1954119319695016, "learning_rate": 1.3192612137203167e-05, "loss": 0.8081, "step": 250 }, { "epoch": 0.01988512576747871, "grad_norm": 3.563204029462328, "learning_rate": 1.324538258575198e-05, "loss": 0.7448, "step": 251 }, { "epoch": 0.019964349376114084, "grad_norm": 4.341566034690226, "learning_rate": 1.3298153034300793e-05, "loss": 0.8055, "step": 252 }, { "epoch": 0.020043572984749455, "grad_norm": 3.3198199379309274, "learning_rate": 1.3350923482849604e-05, "loss": 0.8121, "step": 253 }, { "epoch": 0.02012279659338483, "grad_norm": 4.06557739337397, "learning_rate": 1.3403693931398417e-05, "loss": 0.8344, "step": 254 }, { "epoch": 0.020202020202020204, "grad_norm": 3.499526476418784, "learning_rate": 1.3456464379947232e-05, "loss": 0.7689, "step": 255 }, { "epoch": 0.020281243810655575, "grad_norm": 3.926451846400831, "learning_rate": 1.3509234828496044e-05, "loss": 0.8759, "step": 256 }, { "epoch": 0.02036046741929095, "grad_norm": 3.8956301180414226, "learning_rate": 1.3562005277044857e-05, "loss": 0.7401, "step": 257 }, { "epoch": 0.02043969102792632, "grad_norm": 3.9302225168681155, "learning_rate": 1.361477572559367e-05, "loss": 0.8247, "step": 258 }, { "epoch": 0.020518914636561695, "grad_norm": 3.5472244564328763, "learning_rate": 1.3667546174142481e-05, "loss": 0.7644, "step": 259 }, { "epoch": 0.02059813824519707, "grad_norm": 3.56902753063322, "learning_rate": 1.3720316622691294e-05, "loss": 0.7661, "step": 260 }, { "epoch": 0.02067736185383244, "grad_norm": 4.206985293609494, "learning_rate": 1.3773087071240107e-05, "loss": 0.858, "step": 261 }, { "epoch": 0.020756585462467815, "grad_norm": 3.789742057549914, "learning_rate": 1.3825857519788919e-05, "loss": 0.7101, "step": 262 }, { "epoch": 0.02083580907110319, "grad_norm": 3.5289805722076224, "learning_rate": 1.3878627968337732e-05, "loss": 0.7384, "step": 263 }, { "epoch": 0.02091503267973856, "grad_norm": 3.4809773057379734, "learning_rate": 1.3931398416886547e-05, "loss": 0.9131, "step": 264 }, { "epoch": 0.020994256288373935, "grad_norm": 4.028840068111051, "learning_rate": 1.3984168865435356e-05, "loss": 0.8361, "step": 265 }, { "epoch": 0.02107347989700931, "grad_norm": 3.4749301836903337, "learning_rate": 1.4036939313984171e-05, "loss": 0.7317, "step": 266 }, { "epoch": 0.02115270350564468, "grad_norm": 3.467759847181791, "learning_rate": 1.4089709762532984e-05, "loss": 0.844, "step": 267 }, { "epoch": 0.021231927114280055, "grad_norm": 3.8743740199567394, "learning_rate": 1.4142480211081795e-05, "loss": 0.8776, "step": 268 }, { "epoch": 0.02131115072291543, "grad_norm": 4.110449384942725, "learning_rate": 1.4195250659630609e-05, "loss": 0.8366, "step": 269 }, { "epoch": 0.0213903743315508, "grad_norm": 3.8833441075642963, "learning_rate": 1.4248021108179422e-05, "loss": 0.7772, "step": 270 }, { "epoch": 0.021469597940186175, "grad_norm": 3.289699051116365, "learning_rate": 1.4300791556728233e-05, "loss": 0.7317, "step": 271 }, { "epoch": 0.02154882154882155, "grad_norm": 3.34801754211088, "learning_rate": 1.4353562005277046e-05, "loss": 0.7615, "step": 272 }, { "epoch": 0.02162804515745692, "grad_norm": 3.0799025441784265, "learning_rate": 1.440633245382586e-05, "loss": 0.8028, "step": 273 }, { "epoch": 0.021707268766092296, "grad_norm": 3.58422767361047, "learning_rate": 1.445910290237467e-05, "loss": 0.8324, "step": 274 }, { "epoch": 0.02178649237472767, "grad_norm": 4.023408860426394, "learning_rate": 1.4511873350923484e-05, "loss": 0.861, "step": 275 }, { "epoch": 0.02186571598336304, "grad_norm": 4.097140721055274, "learning_rate": 1.4564643799472298e-05, "loss": 0.9003, "step": 276 }, { "epoch": 0.021944939591998416, "grad_norm": 3.707139064155454, "learning_rate": 1.461741424802111e-05, "loss": 0.7211, "step": 277 }, { "epoch": 0.02202416320063379, "grad_norm": 3.418846745319288, "learning_rate": 1.4670184696569923e-05, "loss": 0.7495, "step": 278 }, { "epoch": 0.02210338680926916, "grad_norm": 4.817702318931237, "learning_rate": 1.4722955145118736e-05, "loss": 0.6855, "step": 279 }, { "epoch": 0.022182610417904536, "grad_norm": 5.710433622705206, "learning_rate": 1.4775725593667547e-05, "loss": 0.8139, "step": 280 }, { "epoch": 0.02226183402653991, "grad_norm": 3.664054971437985, "learning_rate": 1.482849604221636e-05, "loss": 0.8447, "step": 281 }, { "epoch": 0.02234105763517528, "grad_norm": 3.4240551483562376, "learning_rate": 1.4881266490765173e-05, "loss": 0.7682, "step": 282 }, { "epoch": 0.022420281243810656, "grad_norm": 3.9249114246971284, "learning_rate": 1.4934036939313985e-05, "loss": 0.7726, "step": 283 }, { "epoch": 0.02249950485244603, "grad_norm": 3.1316834100159534, "learning_rate": 1.4986807387862798e-05, "loss": 0.7919, "step": 284 }, { "epoch": 0.0225787284610814, "grad_norm": 3.7673726856479672, "learning_rate": 1.503957783641161e-05, "loss": 0.6971, "step": 285 }, { "epoch": 0.022657952069716776, "grad_norm": 3.502806068320617, "learning_rate": 1.5092348284960422e-05, "loss": 0.8588, "step": 286 }, { "epoch": 0.02273717567835215, "grad_norm": 3.674320971057659, "learning_rate": 1.5145118733509237e-05, "loss": 0.8545, "step": 287 }, { "epoch": 0.02281639928698752, "grad_norm": 3.65343275322434, "learning_rate": 1.5197889182058047e-05, "loss": 0.822, "step": 288 }, { "epoch": 0.022895622895622896, "grad_norm": 3.9145186417350097, "learning_rate": 1.5250659630606862e-05, "loss": 0.8197, "step": 289 }, { "epoch": 0.022974846504258267, "grad_norm": 3.587462611763387, "learning_rate": 1.5303430079155675e-05, "loss": 0.806, "step": 290 }, { "epoch": 0.023054070112893642, "grad_norm": 3.2429608145578976, "learning_rate": 1.5356200527704484e-05, "loss": 0.716, "step": 291 }, { "epoch": 0.023133293721529016, "grad_norm": 3.7327217962083674, "learning_rate": 1.5408970976253298e-05, "loss": 0.8026, "step": 292 }, { "epoch": 0.023212517330164387, "grad_norm": 3.7050830424089245, "learning_rate": 1.5461741424802114e-05, "loss": 0.7938, "step": 293 }, { "epoch": 0.023291740938799762, "grad_norm": 3.4178884118590487, "learning_rate": 1.5514511873350924e-05, "loss": 0.7051, "step": 294 }, { "epoch": 0.023370964547435136, "grad_norm": 4.149806296582689, "learning_rate": 1.5567282321899737e-05, "loss": 0.8055, "step": 295 }, { "epoch": 0.023450188156070507, "grad_norm": 3.595358031209693, "learning_rate": 1.562005277044855e-05, "loss": 0.7103, "step": 296 }, { "epoch": 0.023529411764705882, "grad_norm": 3.8238105633654027, "learning_rate": 1.5672823218997363e-05, "loss": 0.7502, "step": 297 }, { "epoch": 0.023608635373341257, "grad_norm": 4.105921446221804, "learning_rate": 1.5725593667546176e-05, "loss": 0.8152, "step": 298 }, { "epoch": 0.023687858981976628, "grad_norm": 3.3725631824917355, "learning_rate": 1.577836411609499e-05, "loss": 0.8643, "step": 299 }, { "epoch": 0.023767082590612002, "grad_norm": 3.3390397221383106, "learning_rate": 1.58311345646438e-05, "loss": 0.845, "step": 300 }, { "epoch": 0.023846306199247377, "grad_norm": 3.281381423728863, "learning_rate": 1.5883905013192612e-05, "loss": 0.696, "step": 301 }, { "epoch": 0.023925529807882748, "grad_norm": 4.143666183958962, "learning_rate": 1.5936675461741425e-05, "loss": 0.6892, "step": 302 }, { "epoch": 0.024004753416518122, "grad_norm": 3.6469645487470803, "learning_rate": 1.5989445910290238e-05, "loss": 0.7484, "step": 303 }, { "epoch": 0.024083977025153497, "grad_norm": 3.462031792761476, "learning_rate": 1.604221635883905e-05, "loss": 0.6692, "step": 304 }, { "epoch": 0.024163200633788868, "grad_norm": 4.382336173610418, "learning_rate": 1.6094986807387864e-05, "loss": 0.8901, "step": 305 }, { "epoch": 0.024242424242424242, "grad_norm": 3.615706822076657, "learning_rate": 1.6147757255936677e-05, "loss": 0.746, "step": 306 }, { "epoch": 0.024321647851059617, "grad_norm": 4.0891182726928506, "learning_rate": 1.620052770448549e-05, "loss": 0.7599, "step": 307 }, { "epoch": 0.024400871459694988, "grad_norm": 3.2599454134387145, "learning_rate": 1.6253298153034303e-05, "loss": 0.7387, "step": 308 }, { "epoch": 0.024480095068330363, "grad_norm": 3.077261991593952, "learning_rate": 1.6306068601583113e-05, "loss": 0.6451, "step": 309 }, { "epoch": 0.024559318676965737, "grad_norm": 3.3946669801299554, "learning_rate": 1.6358839050131926e-05, "loss": 0.7617, "step": 310 }, { "epoch": 0.024638542285601108, "grad_norm": 3.0909202813571754, "learning_rate": 1.641160949868074e-05, "loss": 0.7394, "step": 311 }, { "epoch": 0.024717765894236483, "grad_norm": 3.2303813414067646, "learning_rate": 1.6464379947229552e-05, "loss": 0.6724, "step": 312 }, { "epoch": 0.024796989502871857, "grad_norm": 4.148102731792718, "learning_rate": 1.6517150395778365e-05, "loss": 0.9289, "step": 313 }, { "epoch": 0.024876213111507228, "grad_norm": 3.1480484242615727, "learning_rate": 1.656992084432718e-05, "loss": 0.8047, "step": 314 }, { "epoch": 0.024955436720142603, "grad_norm": 3.0850680980658396, "learning_rate": 1.6622691292875988e-05, "loss": 0.646, "step": 315 }, { "epoch": 0.025034660328777977, "grad_norm": 3.847289304454283, "learning_rate": 1.6675461741424805e-05, "loss": 0.8803, "step": 316 }, { "epoch": 0.02511388393741335, "grad_norm": 3.4955441653574355, "learning_rate": 1.6728232189973618e-05, "loss": 0.6668, "step": 317 }, { "epoch": 0.025193107546048723, "grad_norm": 3.5042065798133177, "learning_rate": 1.6781002638522427e-05, "loss": 0.6678, "step": 318 }, { "epoch": 0.025272331154684097, "grad_norm": 3.1172788400995293, "learning_rate": 1.683377308707124e-05, "loss": 0.6891, "step": 319 }, { "epoch": 0.02535155476331947, "grad_norm": 3.314291280894821, "learning_rate": 1.6886543535620054e-05, "loss": 0.6419, "step": 320 }, { "epoch": 0.025430778371954843, "grad_norm": 4.01611550806511, "learning_rate": 1.6939313984168867e-05, "loss": 0.7323, "step": 321 }, { "epoch": 0.025510001980590218, "grad_norm": 3.7084401810504657, "learning_rate": 1.699208443271768e-05, "loss": 0.762, "step": 322 }, { "epoch": 0.02558922558922559, "grad_norm": 3.753149408505484, "learning_rate": 1.7044854881266493e-05, "loss": 0.8163, "step": 323 }, { "epoch": 0.025668449197860963, "grad_norm": 4.135777493301567, "learning_rate": 1.7097625329815303e-05, "loss": 0.7717, "step": 324 }, { "epoch": 0.025747672806496334, "grad_norm": 3.1570838097689085, "learning_rate": 1.7150395778364116e-05, "loss": 0.6832, "step": 325 }, { "epoch": 0.02582689641513171, "grad_norm": 3.484950768473148, "learning_rate": 1.7203166226912932e-05, "loss": 0.7892, "step": 326 }, { "epoch": 0.025906120023767083, "grad_norm": 4.095040453604106, "learning_rate": 1.7255936675461742e-05, "loss": 0.9254, "step": 327 }, { "epoch": 0.025985343632402454, "grad_norm": 3.552826016710888, "learning_rate": 1.7308707124010555e-05, "loss": 0.7753, "step": 328 }, { "epoch": 0.02606456724103783, "grad_norm": 3.1391953350397426, "learning_rate": 1.7361477572559368e-05, "loss": 0.7731, "step": 329 }, { "epoch": 0.026143790849673203, "grad_norm": 3.589391543210561, "learning_rate": 1.741424802110818e-05, "loss": 0.8643, "step": 330 }, { "epoch": 0.026223014458308574, "grad_norm": 3.345659315748946, "learning_rate": 1.7467018469656994e-05, "loss": 0.7583, "step": 331 }, { "epoch": 0.02630223806694395, "grad_norm": 3.3314584139164296, "learning_rate": 1.7519788918205807e-05, "loss": 0.7089, "step": 332 }, { "epoch": 0.026381461675579324, "grad_norm": 3.5738923858018365, "learning_rate": 1.7572559366754617e-05, "loss": 0.7971, "step": 333 }, { "epoch": 0.026460685284214695, "grad_norm": 2.9993215529920443, "learning_rate": 1.762532981530343e-05, "loss": 0.6821, "step": 334 }, { "epoch": 0.02653990889285007, "grad_norm": 3.2038798869042426, "learning_rate": 1.7678100263852246e-05, "loss": 0.6752, "step": 335 }, { "epoch": 0.026619132501485444, "grad_norm": 3.926245847094454, "learning_rate": 1.7730870712401056e-05, "loss": 0.6815, "step": 336 }, { "epoch": 0.026698356110120815, "grad_norm": 4.166435984990503, "learning_rate": 1.778364116094987e-05, "loss": 0.8396, "step": 337 }, { "epoch": 0.02677757971875619, "grad_norm": 3.32979714666828, "learning_rate": 1.7836411609498682e-05, "loss": 0.7585, "step": 338 }, { "epoch": 0.026856803327391564, "grad_norm": 3.231145519092424, "learning_rate": 1.7889182058047495e-05, "loss": 0.639, "step": 339 }, { "epoch": 0.026936026936026935, "grad_norm": 3.9174238979156164, "learning_rate": 1.794195250659631e-05, "loss": 0.8961, "step": 340 }, { "epoch": 0.02701525054466231, "grad_norm": 3.3408210835803076, "learning_rate": 1.799472295514512e-05, "loss": 0.8431, "step": 341 }, { "epoch": 0.027094474153297684, "grad_norm": 3.5027731677869878, "learning_rate": 1.804749340369393e-05, "loss": 0.7896, "step": 342 }, { "epoch": 0.027173697761933055, "grad_norm": 3.2871288694337113, "learning_rate": 1.8100263852242744e-05, "loss": 0.6117, "step": 343 }, { "epoch": 0.02725292137056843, "grad_norm": 3.350721831688502, "learning_rate": 1.8153034300791557e-05, "loss": 0.7742, "step": 344 }, { "epoch": 0.027332144979203804, "grad_norm": 3.4371672105624453, "learning_rate": 1.820580474934037e-05, "loss": 0.7458, "step": 345 }, { "epoch": 0.027411368587839175, "grad_norm": 3.450554693901827, "learning_rate": 1.8258575197889184e-05, "loss": 0.6961, "step": 346 }, { "epoch": 0.02749059219647455, "grad_norm": 3.6673363372608354, "learning_rate": 1.8311345646437997e-05, "loss": 0.6564, "step": 347 }, { "epoch": 0.027569815805109924, "grad_norm": 3.2710856783239604, "learning_rate": 1.836411609498681e-05, "loss": 0.661, "step": 348 }, { "epoch": 0.027649039413745295, "grad_norm": 4.205237087646808, "learning_rate": 1.8416886543535623e-05, "loss": 0.8297, "step": 349 }, { "epoch": 0.02772826302238067, "grad_norm": 3.2138147795983985, "learning_rate": 1.8469656992084436e-05, "loss": 0.6865, "step": 350 }, { "epoch": 0.027807486631016044, "grad_norm": 3.8356757431162443, "learning_rate": 1.8522427440633246e-05, "loss": 0.7885, "step": 351 }, { "epoch": 0.027886710239651415, "grad_norm": 3.2551435313043333, "learning_rate": 1.857519788918206e-05, "loss": 0.7511, "step": 352 }, { "epoch": 0.02796593384828679, "grad_norm": 3.0983731597894617, "learning_rate": 1.8627968337730872e-05, "loss": 0.6247, "step": 353 }, { "epoch": 0.028045157456922164, "grad_norm": 3.1731553515873396, "learning_rate": 1.8680738786279685e-05, "loss": 0.6205, "step": 354 }, { "epoch": 0.028124381065557535, "grad_norm": 3.287247997085444, "learning_rate": 1.8733509234828498e-05, "loss": 0.7181, "step": 355 }, { "epoch": 0.02820360467419291, "grad_norm": 3.327138725584456, "learning_rate": 1.878627968337731e-05, "loss": 0.8072, "step": 356 }, { "epoch": 0.028282828282828285, "grad_norm": 3.319797807388505, "learning_rate": 1.883905013192612e-05, "loss": 0.8466, "step": 357 }, { "epoch": 0.028362051891463656, "grad_norm": 3.570402366889043, "learning_rate": 1.8891820580474937e-05, "loss": 0.8399, "step": 358 }, { "epoch": 0.02844127550009903, "grad_norm": 3.137571438918123, "learning_rate": 1.894459102902375e-05, "loss": 0.7881, "step": 359 }, { "epoch": 0.0285204991087344, "grad_norm": 3.4841293423776003, "learning_rate": 1.899736147757256e-05, "loss": 0.8527, "step": 360 }, { "epoch": 0.028599722717369776, "grad_norm": 3.4820764436145932, "learning_rate": 1.9050131926121373e-05, "loss": 0.8243, "step": 361 }, { "epoch": 0.02867894632600515, "grad_norm": 3.3164503046184723, "learning_rate": 1.9102902374670186e-05, "loss": 0.8558, "step": 362 }, { "epoch": 0.02875816993464052, "grad_norm": 2.9865196186958567, "learning_rate": 1.9155672823219e-05, "loss": 0.6437, "step": 363 }, { "epoch": 0.028837393543275896, "grad_norm": 3.2335490271808904, "learning_rate": 1.9208443271767812e-05, "loss": 0.7301, "step": 364 }, { "epoch": 0.02891661715191127, "grad_norm": 3.3502128059371152, "learning_rate": 1.9261213720316625e-05, "loss": 0.8369, "step": 365 }, { "epoch": 0.02899584076054664, "grad_norm": 2.8743265293932776, "learning_rate": 1.9313984168865435e-05, "loss": 0.6829, "step": 366 }, { "epoch": 0.029075064369182016, "grad_norm": 3.280219616689533, "learning_rate": 1.9366754617414248e-05, "loss": 0.6537, "step": 367 }, { "epoch": 0.02915428797781739, "grad_norm": 2.732268199224151, "learning_rate": 1.9419525065963065e-05, "loss": 0.7224, "step": 368 }, { "epoch": 0.02923351158645276, "grad_norm": 2.984363660002938, "learning_rate": 1.9472295514511874e-05, "loss": 0.7127, "step": 369 }, { "epoch": 0.029312735195088136, "grad_norm": 3.371642454877917, "learning_rate": 1.9525065963060687e-05, "loss": 0.7686, "step": 370 }, { "epoch": 0.02939195880372351, "grad_norm": 3.1768839335393078, "learning_rate": 1.95778364116095e-05, "loss": 0.7357, "step": 371 }, { "epoch": 0.02947118241235888, "grad_norm": 2.7923715663376765, "learning_rate": 1.9630606860158313e-05, "loss": 0.6656, "step": 372 }, { "epoch": 0.029550406020994256, "grad_norm": 4.191541846532922, "learning_rate": 1.9683377308707127e-05, "loss": 0.7534, "step": 373 }, { "epoch": 0.02962962962962963, "grad_norm": 4.000464072905396, "learning_rate": 1.973614775725594e-05, "loss": 0.8135, "step": 374 }, { "epoch": 0.029708853238265002, "grad_norm": 3.0150323185962975, "learning_rate": 1.978891820580475e-05, "loss": 0.6979, "step": 375 }, { "epoch": 0.029788076846900376, "grad_norm": 3.154364618050119, "learning_rate": 1.9841688654353562e-05, "loss": 0.738, "step": 376 }, { "epoch": 0.02986730045553575, "grad_norm": 2.8574921657055388, "learning_rate": 1.9894459102902375e-05, "loss": 0.6646, "step": 377 }, { "epoch": 0.029946524064171122, "grad_norm": 4.068817276971112, "learning_rate": 1.994722955145119e-05, "loss": 0.7863, "step": 378 }, { "epoch": 0.030025747672806496, "grad_norm": 3.801094142270408, "learning_rate": 2e-05, "loss": 0.7429, "step": 379 }, { "epoch": 0.03010497128144187, "grad_norm": 2.9202579325609848, "learning_rate": 1.999999967077406e-05, "loss": 0.7807, "step": 380 }, { "epoch": 0.030184194890077242, "grad_norm": 3.664901234494972, "learning_rate": 1.9999998683096255e-05, "loss": 0.6887, "step": 381 }, { "epoch": 0.030263418498712617, "grad_norm": 3.7705786876071548, "learning_rate": 1.999999703696666e-05, "loss": 0.8314, "step": 382 }, { "epoch": 0.03034264210734799, "grad_norm": 2.9239267082931084, "learning_rate": 1.999999473238537e-05, "loss": 0.6929, "step": 383 }, { "epoch": 0.030421865715983362, "grad_norm": 3.1488338802910625, "learning_rate": 1.9999991769352545e-05, "loss": 0.78, "step": 384 }, { "epoch": 0.030501089324618737, "grad_norm": 3.1924989010982188, "learning_rate": 1.9999988147868384e-05, "loss": 0.774, "step": 385 }, { "epoch": 0.03058031293325411, "grad_norm": 2.9362412624508267, "learning_rate": 1.9999983867933114e-05, "loss": 0.6999, "step": 386 }, { "epoch": 0.030659536541889482, "grad_norm": 3.05407156990383, "learning_rate": 1.999997892954703e-05, "loss": 0.7604, "step": 387 }, { "epoch": 0.030738760150524857, "grad_norm": 3.0079815654319813, "learning_rate": 1.9999973332710443e-05, "loss": 0.7542, "step": 388 }, { "epoch": 0.03081798375916023, "grad_norm": 2.9564707196198143, "learning_rate": 1.9999967077423732e-05, "loss": 0.6401, "step": 389 }, { "epoch": 0.030897207367795602, "grad_norm": 3.3880826537854043, "learning_rate": 1.9999960163687307e-05, "loss": 0.6489, "step": 390 }, { "epoch": 0.030976430976430977, "grad_norm": 3.00111825208427, "learning_rate": 1.999995259150162e-05, "loss": 0.65, "step": 391 }, { "epoch": 0.031055654585066348, "grad_norm": 3.6544433389417113, "learning_rate": 1.999994436086717e-05, "loss": 0.7, "step": 392 }, { "epoch": 0.031134878193701723, "grad_norm": 3.660774889293522, "learning_rate": 1.9999935471784508e-05, "loss": 0.789, "step": 393 }, { "epoch": 0.031214101802337097, "grad_norm": 3.026834031119743, "learning_rate": 1.9999925924254203e-05, "loss": 0.6067, "step": 394 }, { "epoch": 0.03129332541097247, "grad_norm": 3.317335090945264, "learning_rate": 1.9999915718276898e-05, "loss": 0.7867, "step": 395 }, { "epoch": 0.03137254901960784, "grad_norm": 3.5844819431308514, "learning_rate": 1.9999904853853256e-05, "loss": 0.6939, "step": 396 }, { "epoch": 0.03145177262824322, "grad_norm": 2.762998963626351, "learning_rate": 1.9999893330983998e-05, "loss": 0.5991, "step": 397 }, { "epoch": 0.03153099623687859, "grad_norm": 3.3876228445124448, "learning_rate": 1.999988114966988e-05, "loss": 0.7375, "step": 398 }, { "epoch": 0.031610219845513966, "grad_norm": 4.3883250638148645, "learning_rate": 1.9999868309911704e-05, "loss": 0.7754, "step": 399 }, { "epoch": 0.031689443454149334, "grad_norm": 3.392568834553705, "learning_rate": 1.9999854811710317e-05, "loss": 0.7602, "step": 400 }, { "epoch": 0.03176866706278471, "grad_norm": 3.02439206130161, "learning_rate": 1.9999840655066608e-05, "loss": 0.8112, "step": 401 }, { "epoch": 0.03184789067142008, "grad_norm": 3.1035894348375943, "learning_rate": 1.9999825839981506e-05, "loss": 0.7849, "step": 402 }, { "epoch": 0.03192711428005546, "grad_norm": 3.3498760431884147, "learning_rate": 1.9999810366455986e-05, "loss": 0.7188, "step": 403 }, { "epoch": 0.03200633788869083, "grad_norm": 3.0627182954455456, "learning_rate": 1.9999794234491075e-05, "loss": 0.7826, "step": 404 }, { "epoch": 0.03208556149732621, "grad_norm": 3.2296720837773853, "learning_rate": 1.9999777444087826e-05, "loss": 0.7912, "step": 405 }, { "epoch": 0.032164785105961574, "grad_norm": 2.922693644758063, "learning_rate": 1.999975999524735e-05, "loss": 0.7255, "step": 406 }, { "epoch": 0.03224400871459695, "grad_norm": 2.8195517460950814, "learning_rate": 1.9999741887970795e-05, "loss": 0.6582, "step": 407 }, { "epoch": 0.03232323232323232, "grad_norm": 2.9473629113911604, "learning_rate": 1.999972312225935e-05, "loss": 0.7149, "step": 408 }, { "epoch": 0.0324024559318677, "grad_norm": 3.3702272349427056, "learning_rate": 1.999970369811425e-05, "loss": 0.8295, "step": 409 }, { "epoch": 0.03248167954050307, "grad_norm": 3.475859899358589, "learning_rate": 1.9999683615536784e-05, "loss": 0.6059, "step": 410 }, { "epoch": 0.03256090314913844, "grad_norm": 2.652656758009029, "learning_rate": 1.9999662874528264e-05, "loss": 0.6792, "step": 411 }, { "epoch": 0.032640126757773814, "grad_norm": 3.1009047844442295, "learning_rate": 1.999964147509006e-05, "loss": 0.678, "step": 412 }, { "epoch": 0.03271935036640919, "grad_norm": 3.1179207540858416, "learning_rate": 1.999961941722358e-05, "loss": 0.7409, "step": 413 }, { "epoch": 0.03279857397504456, "grad_norm": 2.8619668085480496, "learning_rate": 1.9999596700930274e-05, "loss": 0.6693, "step": 414 }, { "epoch": 0.03287779758367994, "grad_norm": 2.962266476851864, "learning_rate": 1.999957332621164e-05, "loss": 0.7561, "step": 415 }, { "epoch": 0.03295702119231531, "grad_norm": 3.224454599986635, "learning_rate": 1.999954929306922e-05, "loss": 0.677, "step": 416 }, { "epoch": 0.03303624480095068, "grad_norm": 3.3015275650816043, "learning_rate": 1.999952460150459e-05, "loss": 0.8423, "step": 417 }, { "epoch": 0.033115468409586055, "grad_norm": 2.855492149164199, "learning_rate": 1.9999499251519388e-05, "loss": 0.7511, "step": 418 }, { "epoch": 0.03319469201822143, "grad_norm": 3.8401180843939384, "learning_rate": 1.9999473243115268e-05, "loss": 0.8389, "step": 419 }, { "epoch": 0.033273915626856804, "grad_norm": 2.8469379162486432, "learning_rate": 1.999944657629395e-05, "loss": 0.7397, "step": 420 }, { "epoch": 0.03335313923549218, "grad_norm": 3.2696244839813335, "learning_rate": 1.999941925105719e-05, "loss": 0.7419, "step": 421 }, { "epoch": 0.03343236284412755, "grad_norm": 3.1230839546840063, "learning_rate": 1.9999391267406786e-05, "loss": 0.8035, "step": 422 }, { "epoch": 0.03351158645276292, "grad_norm": 2.9283918023315363, "learning_rate": 1.9999362625344584e-05, "loss": 0.6943, "step": 423 }, { "epoch": 0.033590810061398295, "grad_norm": 3.1936284231474725, "learning_rate": 1.9999333324872464e-05, "loss": 0.9064, "step": 424 }, { "epoch": 0.03367003367003367, "grad_norm": 2.8875438705586474, "learning_rate": 1.9999303365992357e-05, "loss": 0.7968, "step": 425 }, { "epoch": 0.033749257278669044, "grad_norm": 2.400006216231242, "learning_rate": 1.999927274870624e-05, "loss": 0.5922, "step": 426 }, { "epoch": 0.03382848088730442, "grad_norm": 2.999635943694363, "learning_rate": 1.9999241473016126e-05, "loss": 0.7664, "step": 427 }, { "epoch": 0.03390770449593979, "grad_norm": 2.958039689462677, "learning_rate": 1.999920953892407e-05, "loss": 0.7659, "step": 428 }, { "epoch": 0.03398692810457516, "grad_norm": 3.0957704885821955, "learning_rate": 1.9999176946432183e-05, "loss": 0.6943, "step": 429 }, { "epoch": 0.034066151713210535, "grad_norm": 2.924849074337557, "learning_rate": 1.9999143695542606e-05, "loss": 0.744, "step": 430 }, { "epoch": 0.03414537532184591, "grad_norm": 3.466886521758863, "learning_rate": 1.9999109786257528e-05, "loss": 0.8334, "step": 431 }, { "epoch": 0.034224598930481284, "grad_norm": 2.899606871934526, "learning_rate": 1.9999075218579184e-05, "loss": 0.722, "step": 432 }, { "epoch": 0.03430382253911666, "grad_norm": 2.438949140716664, "learning_rate": 1.999903999250985e-05, "loss": 0.6094, "step": 433 }, { "epoch": 0.03438304614775203, "grad_norm": 3.1741480519318954, "learning_rate": 1.9999004108051846e-05, "loss": 0.721, "step": 434 }, { "epoch": 0.0344622697563874, "grad_norm": 2.7943785874454408, "learning_rate": 1.999896756520753e-05, "loss": 0.6219, "step": 435 }, { "epoch": 0.034541493365022775, "grad_norm": 2.8887590844679036, "learning_rate": 1.9998930363979315e-05, "loss": 0.6477, "step": 436 }, { "epoch": 0.03462071697365815, "grad_norm": 3.4787657190867733, "learning_rate": 1.999889250436965e-05, "loss": 0.726, "step": 437 }, { "epoch": 0.034699940582293524, "grad_norm": 3.57284819177238, "learning_rate": 1.9998853986381018e-05, "loss": 0.6634, "step": 438 }, { "epoch": 0.0347791641909289, "grad_norm": 3.613442654515561, "learning_rate": 1.9998814810015968e-05, "loss": 0.6943, "step": 439 }, { "epoch": 0.034858387799564274, "grad_norm": 3.218407392773835, "learning_rate": 1.9998774975277074e-05, "loss": 0.7278, "step": 440 }, { "epoch": 0.03493761140819964, "grad_norm": 2.728775105672768, "learning_rate": 1.9998734482166954e-05, "loss": 0.5458, "step": 441 }, { "epoch": 0.035016835016835016, "grad_norm": 3.005926546632378, "learning_rate": 1.9998693330688283e-05, "loss": 0.7494, "step": 442 }, { "epoch": 0.03509605862547039, "grad_norm": 3.7011643987907146, "learning_rate": 1.9998651520843766e-05, "loss": 0.7735, "step": 443 }, { "epoch": 0.035175282234105765, "grad_norm": 2.9851244415240963, "learning_rate": 1.999860905263616e-05, "loss": 0.6974, "step": 444 }, { "epoch": 0.03525450584274114, "grad_norm": 3.428898050367164, "learning_rate": 1.9998565926068253e-05, "loss": 0.7623, "step": 445 }, { "epoch": 0.03533372945137651, "grad_norm": 2.8356099510017088, "learning_rate": 1.999852214114289e-05, "loss": 0.6526, "step": 446 }, { "epoch": 0.03541295306001188, "grad_norm": 2.594817507939691, "learning_rate": 1.9998477697862956e-05, "loss": 0.6711, "step": 447 }, { "epoch": 0.035492176668647256, "grad_norm": 3.235172623698129, "learning_rate": 1.9998432596231373e-05, "loss": 0.7407, "step": 448 }, { "epoch": 0.03557140027728263, "grad_norm": 2.7348183642356796, "learning_rate": 1.9998386836251116e-05, "loss": 0.6542, "step": 449 }, { "epoch": 0.035650623885918005, "grad_norm": 3.272985917072195, "learning_rate": 1.9998340417925193e-05, "loss": 0.6998, "step": 450 }, { "epoch": 0.03572984749455338, "grad_norm": 3.8264584256652254, "learning_rate": 1.9998293341256664e-05, "loss": 0.797, "step": 451 }, { "epoch": 0.03580907110318875, "grad_norm": 3.21843345949715, "learning_rate": 1.9998245606248627e-05, "loss": 0.7336, "step": 452 }, { "epoch": 0.03588829471182412, "grad_norm": 3.036525917040152, "learning_rate": 1.999819721290422e-05, "loss": 0.6946, "step": 453 }, { "epoch": 0.035967518320459496, "grad_norm": 2.8182487471269044, "learning_rate": 1.9998148161226645e-05, "loss": 0.6617, "step": 454 }, { "epoch": 0.03604674192909487, "grad_norm": 2.9361313245946223, "learning_rate": 1.9998098451219115e-05, "loss": 0.675, "step": 455 }, { "epoch": 0.036125965537730245, "grad_norm": 3.7030365958009726, "learning_rate": 1.999804808288491e-05, "loss": 0.796, "step": 456 }, { "epoch": 0.03620518914636562, "grad_norm": 3.5489158000641896, "learning_rate": 1.9997997056227347e-05, "loss": 0.8261, "step": 457 }, { "epoch": 0.03628441275500099, "grad_norm": 2.7238264832859973, "learning_rate": 1.9997945371249784e-05, "loss": 0.6541, "step": 458 }, { "epoch": 0.03636363636363636, "grad_norm": 3.3916413944293984, "learning_rate": 1.999789302795563e-05, "loss": 0.6755, "step": 459 }, { "epoch": 0.036442859972271736, "grad_norm": 3.0983398976573184, "learning_rate": 1.999784002634832e-05, "loss": 0.7259, "step": 460 }, { "epoch": 0.03652208358090711, "grad_norm": 2.8820099131603234, "learning_rate": 1.9997786366431354e-05, "loss": 0.7672, "step": 461 }, { "epoch": 0.036601307189542485, "grad_norm": 2.640988448612532, "learning_rate": 1.9997732048208264e-05, "loss": 0.6634, "step": 462 }, { "epoch": 0.03668053079817786, "grad_norm": 3.072518897736735, "learning_rate": 1.9997677071682623e-05, "loss": 0.6494, "step": 463 }, { "epoch": 0.03675975440681323, "grad_norm": 2.9755891729458304, "learning_rate": 1.9997621436858053e-05, "loss": 0.7039, "step": 464 }, { "epoch": 0.0368389780154486, "grad_norm": 2.6731496434863637, "learning_rate": 1.9997565143738216e-05, "loss": 0.6326, "step": 465 }, { "epoch": 0.03691820162408398, "grad_norm": 3.013420128072819, "learning_rate": 1.999750819232682e-05, "loss": 0.7497, "step": 466 }, { "epoch": 0.03699742523271935, "grad_norm": 2.8700835063291374, "learning_rate": 1.9997450582627614e-05, "loss": 0.6467, "step": 467 }, { "epoch": 0.037076648841354726, "grad_norm": 2.441887982804938, "learning_rate": 1.9997392314644392e-05, "loss": 0.5403, "step": 468 }, { "epoch": 0.0371558724499901, "grad_norm": 2.627724197173415, "learning_rate": 1.999733338838099e-05, "loss": 0.5625, "step": 469 }, { "epoch": 0.03723509605862547, "grad_norm": 3.2004314480087097, "learning_rate": 1.999727380384129e-05, "loss": 0.7292, "step": 470 }, { "epoch": 0.03731431966726084, "grad_norm": 2.6246971490359643, "learning_rate": 1.999721356102921e-05, "loss": 0.5749, "step": 471 }, { "epoch": 0.03739354327589622, "grad_norm": 2.8178098789578265, "learning_rate": 1.9997152659948727e-05, "loss": 0.6766, "step": 472 }, { "epoch": 0.03747276688453159, "grad_norm": 2.751576181721114, "learning_rate": 1.9997091100603842e-05, "loss": 0.751, "step": 473 }, { "epoch": 0.037551990493166966, "grad_norm": 2.8643044061564487, "learning_rate": 1.999702888299861e-05, "loss": 0.7831, "step": 474 }, { "epoch": 0.03763121410180234, "grad_norm": 3.117882607758987, "learning_rate": 1.9996966007137125e-05, "loss": 0.7705, "step": 475 }, { "epoch": 0.03771043771043771, "grad_norm": 2.9402830765122383, "learning_rate": 1.9996902473023537e-05, "loss": 0.6483, "step": 476 }, { "epoch": 0.03778966131907308, "grad_norm": 3.282518962096688, "learning_rate": 1.999683828066202e-05, "loss": 0.6724, "step": 477 }, { "epoch": 0.03786888492770846, "grad_norm": 3.2056871207163846, "learning_rate": 1.9996773430056806e-05, "loss": 0.7173, "step": 478 }, { "epoch": 0.03794810853634383, "grad_norm": 2.9749120725141576, "learning_rate": 1.999670792121216e-05, "loss": 0.6135, "step": 479 }, { "epoch": 0.038027332144979206, "grad_norm": 2.9635473200006155, "learning_rate": 1.99966417541324e-05, "loss": 0.6571, "step": 480 }, { "epoch": 0.038106555753614574, "grad_norm": 2.928957653416349, "learning_rate": 1.9996574928821883e-05, "loss": 0.5789, "step": 481 }, { "epoch": 0.03818577936224995, "grad_norm": 2.98391492816687, "learning_rate": 1.9996507445285003e-05, "loss": 0.7262, "step": 482 }, { "epoch": 0.03826500297088532, "grad_norm": 2.800091720516489, "learning_rate": 1.999643930352621e-05, "loss": 0.633, "step": 483 }, { "epoch": 0.0383442265795207, "grad_norm": 3.2330381177522893, "learning_rate": 1.999637050354999e-05, "loss": 0.6621, "step": 484 }, { "epoch": 0.03842345018815607, "grad_norm": 2.552098027501547, "learning_rate": 1.9996301045360874e-05, "loss": 0.6485, "step": 485 }, { "epoch": 0.038502673796791446, "grad_norm": 2.807944468047814, "learning_rate": 1.999623092896343e-05, "loss": 0.6129, "step": 486 }, { "epoch": 0.038581897405426814, "grad_norm": 4.063652502957622, "learning_rate": 1.9996160154362275e-05, "loss": 0.835, "step": 487 }, { "epoch": 0.03866112101406219, "grad_norm": 2.661945008145644, "learning_rate": 1.9996088721562076e-05, "loss": 0.6821, "step": 488 }, { "epoch": 0.03874034462269756, "grad_norm": 2.6965061288315693, "learning_rate": 1.9996016630567535e-05, "loss": 0.6292, "step": 489 }, { "epoch": 0.03881956823133294, "grad_norm": 3.0953770748312848, "learning_rate": 1.9995943881383393e-05, "loss": 0.6311, "step": 490 }, { "epoch": 0.03889879183996831, "grad_norm": 2.785452994251254, "learning_rate": 1.9995870474014444e-05, "loss": 0.6612, "step": 491 }, { "epoch": 0.03897801544860369, "grad_norm": 2.9224780599158673, "learning_rate": 1.9995796408465523e-05, "loss": 0.6748, "step": 492 }, { "epoch": 0.039057239057239054, "grad_norm": 4.149683108294913, "learning_rate": 1.9995721684741505e-05, "loss": 0.8614, "step": 493 }, { "epoch": 0.03913646266587443, "grad_norm": 3.1582695477828517, "learning_rate": 1.9995646302847307e-05, "loss": 0.7074, "step": 494 }, { "epoch": 0.0392156862745098, "grad_norm": 2.7600043242543313, "learning_rate": 1.9995570262787903e-05, "loss": 0.6541, "step": 495 }, { "epoch": 0.03929490988314518, "grad_norm": 3.1489370384526953, "learning_rate": 1.9995493564568286e-05, "loss": 0.7382, "step": 496 }, { "epoch": 0.03937413349178055, "grad_norm": 2.8524636809738855, "learning_rate": 1.9995416208193518e-05, "loss": 0.7343, "step": 497 }, { "epoch": 0.03945335710041593, "grad_norm": 2.958917817282755, "learning_rate": 1.999533819366868e-05, "loss": 0.6717, "step": 498 }, { "epoch": 0.039532580709051295, "grad_norm": 2.9267099722503582, "learning_rate": 1.9995259520998927e-05, "loss": 0.7438, "step": 499 }, { "epoch": 0.03961180431768667, "grad_norm": 2.695233361159774, "learning_rate": 1.9995180190189424e-05, "loss": 0.6826, "step": 500 }, { "epoch": 0.039691027926322044, "grad_norm": 2.5503050358416206, "learning_rate": 1.9995100201245397e-05, "loss": 0.6195, "step": 501 }, { "epoch": 0.03977025153495742, "grad_norm": 3.1679747148764723, "learning_rate": 1.999501955417212e-05, "loss": 0.5611, "step": 502 }, { "epoch": 0.03984947514359279, "grad_norm": 2.673508667082099, "learning_rate": 1.999493824897489e-05, "loss": 0.6866, "step": 503 }, { "epoch": 0.03992869875222817, "grad_norm": 2.712319456941955, "learning_rate": 1.9994856285659073e-05, "loss": 0.7074, "step": 504 }, { "epoch": 0.040007922360863535, "grad_norm": 3.1805104047232295, "learning_rate": 1.9994773664230064e-05, "loss": 0.6495, "step": 505 }, { "epoch": 0.04008714596949891, "grad_norm": 3.0759353713991215, "learning_rate": 1.99946903846933e-05, "loss": 0.7408, "step": 506 }, { "epoch": 0.040166369578134284, "grad_norm": 3.7609544068127594, "learning_rate": 1.9994606447054265e-05, "loss": 0.7069, "step": 507 }, { "epoch": 0.04024559318676966, "grad_norm": 3.1347559824941316, "learning_rate": 1.999452185131849e-05, "loss": 0.8218, "step": 508 }, { "epoch": 0.04032481679540503, "grad_norm": 2.926264130145264, "learning_rate": 1.9994436597491537e-05, "loss": 0.7224, "step": 509 }, { "epoch": 0.04040404040404041, "grad_norm": 3.2332216646277736, "learning_rate": 1.9994350685579024e-05, "loss": 0.7147, "step": 510 }, { "epoch": 0.040483264012675775, "grad_norm": 2.999816638567332, "learning_rate": 1.999426411558661e-05, "loss": 0.7248, "step": 511 }, { "epoch": 0.04056248762131115, "grad_norm": 2.7011088575440314, "learning_rate": 1.9994176887519994e-05, "loss": 0.6763, "step": 512 }, { "epoch": 0.040641711229946524, "grad_norm": 2.875825212497789, "learning_rate": 1.9994089001384918e-05, "loss": 0.652, "step": 513 }, { "epoch": 0.0407209348385819, "grad_norm": 2.7613176935656845, "learning_rate": 1.9994000457187167e-05, "loss": 0.6271, "step": 514 }, { "epoch": 0.04080015844721727, "grad_norm": 3.0338670732042847, "learning_rate": 1.999391125493258e-05, "loss": 0.6945, "step": 515 }, { "epoch": 0.04087938205585264, "grad_norm": 2.9271041032335794, "learning_rate": 1.9993821394627018e-05, "loss": 0.7362, "step": 516 }, { "epoch": 0.040958605664488015, "grad_norm": 2.764063485558089, "learning_rate": 1.9993730876276407e-05, "loss": 0.6527, "step": 517 }, { "epoch": 0.04103782927312339, "grad_norm": 3.2356110659940462, "learning_rate": 1.9993639699886707e-05, "loss": 0.8494, "step": 518 }, { "epoch": 0.041117052881758764, "grad_norm": 2.711864109774637, "learning_rate": 1.9993547865463916e-05, "loss": 0.7577, "step": 519 }, { "epoch": 0.04119627649039414, "grad_norm": 2.7076231482128525, "learning_rate": 1.9993455373014087e-05, "loss": 0.7267, "step": 520 }, { "epoch": 0.04127550009902951, "grad_norm": 2.522840786648438, "learning_rate": 1.99933622225433e-05, "loss": 0.6705, "step": 521 }, { "epoch": 0.04135472370766488, "grad_norm": 2.4498342280290197, "learning_rate": 1.9993268414057704e-05, "loss": 0.6374, "step": 522 }, { "epoch": 0.041433947316300256, "grad_norm": 2.779138934794209, "learning_rate": 1.9993173947563466e-05, "loss": 0.6527, "step": 523 }, { "epoch": 0.04151317092493563, "grad_norm": 3.209255526032306, "learning_rate": 1.9993078823066804e-05, "loss": 0.7538, "step": 524 }, { "epoch": 0.041592394533571005, "grad_norm": 2.7573208701047967, "learning_rate": 1.9992983040573986e-05, "loss": 0.7038, "step": 525 }, { "epoch": 0.04167161814220638, "grad_norm": 3.277805689495167, "learning_rate": 1.9992886600091318e-05, "loss": 0.6516, "step": 526 }, { "epoch": 0.041750841750841754, "grad_norm": 3.1060003358891355, "learning_rate": 1.9992789501625155e-05, "loss": 0.7219, "step": 527 }, { "epoch": 0.04183006535947712, "grad_norm": 3.558104529708639, "learning_rate": 1.9992691745181882e-05, "loss": 0.787, "step": 528 }, { "epoch": 0.041909288968112496, "grad_norm": 3.511454003505485, "learning_rate": 1.9992593330767938e-05, "loss": 0.6802, "step": 529 }, { "epoch": 0.04198851257674787, "grad_norm": 2.8584552202792386, "learning_rate": 1.9992494258389805e-05, "loss": 0.67, "step": 530 }, { "epoch": 0.042067736185383245, "grad_norm": 2.844480677624732, "learning_rate": 1.9992394528054006e-05, "loss": 0.6093, "step": 531 }, { "epoch": 0.04214695979401862, "grad_norm": 2.6071483043285983, "learning_rate": 1.9992294139767106e-05, "loss": 0.5991, "step": 532 }, { "epoch": 0.042226183402653994, "grad_norm": 2.7460919041493788, "learning_rate": 1.999219309353572e-05, "loss": 0.6853, "step": 533 }, { "epoch": 0.04230540701128936, "grad_norm": 2.5187884497002933, "learning_rate": 1.9992091389366497e-05, "loss": 0.6424, "step": 534 }, { "epoch": 0.042384630619924736, "grad_norm": 2.897588748423445, "learning_rate": 1.9991989027266134e-05, "loss": 0.5928, "step": 535 }, { "epoch": 0.04246385422856011, "grad_norm": 2.696778484381764, "learning_rate": 1.9991886007241375e-05, "loss": 0.697, "step": 536 }, { "epoch": 0.042543077837195485, "grad_norm": 3.1141253510387403, "learning_rate": 1.9991782329298998e-05, "loss": 0.638, "step": 537 }, { "epoch": 0.04262230144583086, "grad_norm": 2.7855091294178185, "learning_rate": 1.9991677993445832e-05, "loss": 0.7393, "step": 538 }, { "epoch": 0.042701525054466234, "grad_norm": 3.211922209710532, "learning_rate": 1.9991572999688746e-05, "loss": 0.7502, "step": 539 }, { "epoch": 0.0427807486631016, "grad_norm": 2.7847666934413233, "learning_rate": 1.9991467348034653e-05, "loss": 0.7073, "step": 540 }, { "epoch": 0.042859972271736976, "grad_norm": 3.1466928505805254, "learning_rate": 1.9991361038490515e-05, "loss": 0.7464, "step": 541 }, { "epoch": 0.04293919588037235, "grad_norm": 2.9109729397270394, "learning_rate": 1.9991254071063327e-05, "loss": 0.6223, "step": 542 }, { "epoch": 0.043018419489007725, "grad_norm": 2.9595310235056376, "learning_rate": 1.9991146445760133e-05, "loss": 0.7574, "step": 543 }, { "epoch": 0.0430976430976431, "grad_norm": 2.578088025546306, "learning_rate": 1.9991038162588018e-05, "loss": 0.729, "step": 544 }, { "epoch": 0.043176866706278474, "grad_norm": 2.5866156442876784, "learning_rate": 1.9990929221554117e-05, "loss": 0.5977, "step": 545 }, { "epoch": 0.04325609031491384, "grad_norm": 2.615899734064934, "learning_rate": 1.99908196226656e-05, "loss": 0.7644, "step": 546 }, { "epoch": 0.04333531392354922, "grad_norm": 3.012293580248462, "learning_rate": 1.9990709365929678e-05, "loss": 0.5903, "step": 547 }, { "epoch": 0.04341453753218459, "grad_norm": 2.6347402725839952, "learning_rate": 1.999059845135362e-05, "loss": 0.7533, "step": 548 }, { "epoch": 0.043493761140819966, "grad_norm": 2.804277972290371, "learning_rate": 1.9990486878944727e-05, "loss": 0.7683, "step": 549 }, { "epoch": 0.04357298474945534, "grad_norm": 3.298628451797767, "learning_rate": 1.9990374648710343e-05, "loss": 0.7281, "step": 550 }, { "epoch": 0.04365220835809071, "grad_norm": 2.5461350179164435, "learning_rate": 1.9990261760657858e-05, "loss": 0.5977, "step": 551 }, { "epoch": 0.04373143196672608, "grad_norm": 2.878080238406779, "learning_rate": 1.9990148214794713e-05, "loss": 0.6163, "step": 552 }, { "epoch": 0.04381065557536146, "grad_norm": 2.8437619434504833, "learning_rate": 1.999003401112837e-05, "loss": 0.5998, "step": 553 }, { "epoch": 0.04388987918399683, "grad_norm": 3.257285964911966, "learning_rate": 1.9989919149666356e-05, "loss": 0.7809, "step": 554 }, { "epoch": 0.043969102792632206, "grad_norm": 2.6453604089495237, "learning_rate": 1.998980363041624e-05, "loss": 0.6726, "step": 555 }, { "epoch": 0.04404832640126758, "grad_norm": 2.9857843193323936, "learning_rate": 1.9989687453385617e-05, "loss": 0.6915, "step": 556 }, { "epoch": 0.04412755000990295, "grad_norm": 2.574734905677725, "learning_rate": 1.9989570618582145e-05, "loss": 0.5921, "step": 557 }, { "epoch": 0.04420677361853832, "grad_norm": 2.9648203643839453, "learning_rate": 1.9989453126013515e-05, "loss": 0.6709, "step": 558 }, { "epoch": 0.0442859972271737, "grad_norm": 2.9420517654172564, "learning_rate": 1.9989334975687462e-05, "loss": 0.6243, "step": 559 }, { "epoch": 0.04436522083580907, "grad_norm": 2.7075965434973446, "learning_rate": 1.9989216167611766e-05, "loss": 0.6519, "step": 560 }, { "epoch": 0.044444444444444446, "grad_norm": 3.4922142754295864, "learning_rate": 1.998909670179425e-05, "loss": 0.7702, "step": 561 }, { "epoch": 0.04452366805307982, "grad_norm": 2.471925624577639, "learning_rate": 1.9988976578242785e-05, "loss": 0.5786, "step": 562 }, { "epoch": 0.04460289166171519, "grad_norm": 2.6487943441010824, "learning_rate": 1.9988855796965275e-05, "loss": 0.5797, "step": 563 }, { "epoch": 0.04468211527035056, "grad_norm": 2.947701025591218, "learning_rate": 1.998873435796967e-05, "loss": 0.6425, "step": 564 }, { "epoch": 0.04476133887898594, "grad_norm": 2.9218173867935384, "learning_rate": 1.9988612261263972e-05, "loss": 0.6799, "step": 565 }, { "epoch": 0.04484056248762131, "grad_norm": 3.177205720590406, "learning_rate": 1.9988489506856218e-05, "loss": 0.7271, "step": 566 }, { "epoch": 0.044919786096256686, "grad_norm": 2.2078488201996302, "learning_rate": 1.9988366094754493e-05, "loss": 0.5412, "step": 567 }, { "epoch": 0.04499900970489206, "grad_norm": 2.777484065929777, "learning_rate": 1.9988242024966924e-05, "loss": 0.7146, "step": 568 }, { "epoch": 0.04507823331352743, "grad_norm": 2.6607829463343857, "learning_rate": 1.9988117297501674e-05, "loss": 0.5175, "step": 569 }, { "epoch": 0.0451574569221628, "grad_norm": 3.1262530114276723, "learning_rate": 1.998799191236696e-05, "loss": 0.5948, "step": 570 }, { "epoch": 0.04523668053079818, "grad_norm": 2.6460522397025845, "learning_rate": 1.998786586957104e-05, "loss": 0.58, "step": 571 }, { "epoch": 0.04531590413943355, "grad_norm": 2.338051691139102, "learning_rate": 1.998773916912221e-05, "loss": 0.5258, "step": 572 }, { "epoch": 0.04539512774806893, "grad_norm": 3.1732048495489216, "learning_rate": 1.9987611811028814e-05, "loss": 0.7305, "step": 573 }, { "epoch": 0.0454743513567043, "grad_norm": 3.2720050513201784, "learning_rate": 1.9987483795299236e-05, "loss": 0.7008, "step": 574 }, { "epoch": 0.04555357496533967, "grad_norm": 2.6673925623928763, "learning_rate": 1.9987355121941907e-05, "loss": 0.5623, "step": 575 }, { "epoch": 0.04563279857397504, "grad_norm": 2.9195876692283096, "learning_rate": 1.99872257909653e-05, "loss": 0.6321, "step": 576 }, { "epoch": 0.04571202218261042, "grad_norm": 2.7217386213900614, "learning_rate": 1.9987095802377933e-05, "loss": 0.6518, "step": 577 }, { "epoch": 0.04579124579124579, "grad_norm": 3.2974273164470427, "learning_rate": 1.9986965156188357e-05, "loss": 0.7561, "step": 578 }, { "epoch": 0.04587046939988117, "grad_norm": 2.9400032161999388, "learning_rate": 1.9986833852405183e-05, "loss": 0.6642, "step": 579 }, { "epoch": 0.045949693008516534, "grad_norm": 2.7068679155876376, "learning_rate": 1.9986701891037053e-05, "loss": 0.6378, "step": 580 }, { "epoch": 0.04602891661715191, "grad_norm": 3.9742121301651583, "learning_rate": 1.9986569272092656e-05, "loss": 0.7511, "step": 581 }, { "epoch": 0.046108140225787284, "grad_norm": 3.242659719526242, "learning_rate": 1.9986435995580725e-05, "loss": 0.761, "step": 582 }, { "epoch": 0.04618736383442266, "grad_norm": 2.895531543417738, "learning_rate": 1.9986302061510036e-05, "loss": 0.6983, "step": 583 }, { "epoch": 0.04626658744305803, "grad_norm": 2.8702758756080415, "learning_rate": 1.9986167469889405e-05, "loss": 0.6306, "step": 584 }, { "epoch": 0.04634581105169341, "grad_norm": 2.1347346059102748, "learning_rate": 1.9986032220727698e-05, "loss": 0.6373, "step": 585 }, { "epoch": 0.046425034660328775, "grad_norm": 2.8633299012680866, "learning_rate": 1.9985896314033816e-05, "loss": 0.6229, "step": 586 }, { "epoch": 0.04650425826896415, "grad_norm": 3.186312914456255, "learning_rate": 1.9985759749816715e-05, "loss": 0.7059, "step": 587 }, { "epoch": 0.046583481877599524, "grad_norm": 3.238427936423136, "learning_rate": 1.9985622528085382e-05, "loss": 0.8296, "step": 588 }, { "epoch": 0.0466627054862349, "grad_norm": 2.4922093733575577, "learning_rate": 1.9985484648848854e-05, "loss": 0.6537, "step": 589 }, { "epoch": 0.04674192909487027, "grad_norm": 2.7315794081336238, "learning_rate": 1.9985346112116207e-05, "loss": 0.6153, "step": 590 }, { "epoch": 0.04682115270350565, "grad_norm": 2.9502974462280163, "learning_rate": 1.9985206917896563e-05, "loss": 0.7039, "step": 591 }, { "epoch": 0.046900376312141015, "grad_norm": 2.7589366963533455, "learning_rate": 1.9985067066199093e-05, "loss": 0.712, "step": 592 }, { "epoch": 0.04697959992077639, "grad_norm": 2.6013313259748894, "learning_rate": 1.9984926557033003e-05, "loss": 0.5692, "step": 593 }, { "epoch": 0.047058823529411764, "grad_norm": 2.539902134310688, "learning_rate": 1.998478539040754e-05, "loss": 0.6376, "step": 594 }, { "epoch": 0.04713804713804714, "grad_norm": 2.8541696879373997, "learning_rate": 1.9984643566332005e-05, "loss": 0.6713, "step": 595 }, { "epoch": 0.04721727074668251, "grad_norm": 2.4711420630229806, "learning_rate": 1.9984501084815734e-05, "loss": 0.648, "step": 596 }, { "epoch": 0.04729649435531789, "grad_norm": 2.5613465734917105, "learning_rate": 1.9984357945868106e-05, "loss": 0.5956, "step": 597 }, { "epoch": 0.047375717963953255, "grad_norm": 2.8631944466667556, "learning_rate": 1.998421414949855e-05, "loss": 0.6392, "step": 598 }, { "epoch": 0.04745494157258863, "grad_norm": 3.0172239664770606, "learning_rate": 1.9984069695716534e-05, "loss": 0.7078, "step": 599 }, { "epoch": 0.047534165181224004, "grad_norm": 3.0444262679152603, "learning_rate": 1.998392458453157e-05, "loss": 0.6846, "step": 600 }, { "epoch": 0.04761338878985938, "grad_norm": 3.0072222650922678, "learning_rate": 1.998377881595321e-05, "loss": 0.79, "step": 601 }, { "epoch": 0.04769261239849475, "grad_norm": 3.5902124668081044, "learning_rate": 1.9983632389991056e-05, "loss": 0.7086, "step": 602 }, { "epoch": 0.04777183600713013, "grad_norm": 3.1212771496113625, "learning_rate": 1.9983485306654745e-05, "loss": 0.6768, "step": 603 }, { "epoch": 0.047851059615765495, "grad_norm": 2.616003039410528, "learning_rate": 1.9983337565953968e-05, "loss": 0.6232, "step": 604 }, { "epoch": 0.04793028322440087, "grad_norm": 2.591166475711505, "learning_rate": 1.9983189167898446e-05, "loss": 0.5417, "step": 605 }, { "epoch": 0.048009506833036245, "grad_norm": 2.779868819625606, "learning_rate": 1.998304011249795e-05, "loss": 0.6163, "step": 606 }, { "epoch": 0.04808873044167162, "grad_norm": 2.829172799196641, "learning_rate": 1.9982890399762303e-05, "loss": 0.6054, "step": 607 }, { "epoch": 0.048167954050306994, "grad_norm": 2.809769704582584, "learning_rate": 1.9982740029701356e-05, "loss": 0.6892, "step": 608 }, { "epoch": 0.04824717765894237, "grad_norm": 2.605794862233877, "learning_rate": 1.998258900232501e-05, "loss": 0.5969, "step": 609 }, { "epoch": 0.048326401267577736, "grad_norm": 2.7851336729890046, "learning_rate": 1.9982437317643218e-05, "loss": 0.6597, "step": 610 }, { "epoch": 0.04840562487621311, "grad_norm": 2.5018389523938454, "learning_rate": 1.9982284975665952e-05, "loss": 0.5533, "step": 611 }, { "epoch": 0.048484848484848485, "grad_norm": 2.777958124265028, "learning_rate": 1.998213197640326e-05, "loss": 0.7352, "step": 612 }, { "epoch": 0.04856407209348386, "grad_norm": 2.2012757624853174, "learning_rate": 1.9981978319865204e-05, "loss": 0.5475, "step": 613 }, { "epoch": 0.048643295702119234, "grad_norm": 2.704370088430054, "learning_rate": 1.9981824006061904e-05, "loss": 0.6201, "step": 614 }, { "epoch": 0.0487225193107546, "grad_norm": 2.5669896657704405, "learning_rate": 1.998166903500353e-05, "loss": 0.6223, "step": 615 }, { "epoch": 0.048801742919389976, "grad_norm": 2.323261161206825, "learning_rate": 1.998151340670027e-05, "loss": 0.6524, "step": 616 }, { "epoch": 0.04888096652802535, "grad_norm": 2.8889437618273837, "learning_rate": 1.9981357121162385e-05, "loss": 0.6029, "step": 617 }, { "epoch": 0.048960190136660725, "grad_norm": 2.402792617113219, "learning_rate": 1.998120017840016e-05, "loss": 0.609, "step": 618 }, { "epoch": 0.0490394137452961, "grad_norm": 2.6508978559231022, "learning_rate": 1.998104257842393e-05, "loss": 0.5763, "step": 619 }, { "epoch": 0.049118637353931474, "grad_norm": 2.4978265070674053, "learning_rate": 1.9980884321244072e-05, "loss": 0.6082, "step": 620 }, { "epoch": 0.04919786096256684, "grad_norm": 2.7797387302993912, "learning_rate": 1.9980725406871007e-05, "loss": 0.6711, "step": 621 }, { "epoch": 0.049277084571202216, "grad_norm": 3.043533496043816, "learning_rate": 1.9980565835315196e-05, "loss": 0.7228, "step": 622 }, { "epoch": 0.04935630817983759, "grad_norm": 2.536032972949813, "learning_rate": 1.9980405606587148e-05, "loss": 0.5813, "step": 623 }, { "epoch": 0.049435531788472965, "grad_norm": 2.742709408107949, "learning_rate": 1.9980244720697417e-05, "loss": 0.6454, "step": 624 }, { "epoch": 0.04951475539710834, "grad_norm": 2.7586212198110602, "learning_rate": 1.9980083177656588e-05, "loss": 0.6294, "step": 625 }, { "epoch": 0.049593979005743714, "grad_norm": 2.670816947943694, "learning_rate": 1.9979920977475306e-05, "loss": 0.678, "step": 626 }, { "epoch": 0.04967320261437908, "grad_norm": 2.6129776162312037, "learning_rate": 1.9979758120164248e-05, "loss": 0.5139, "step": 627 }, { "epoch": 0.049752426223014456, "grad_norm": 3.464283442736191, "learning_rate": 1.997959460573414e-05, "loss": 0.752, "step": 628 }, { "epoch": 0.04983164983164983, "grad_norm": 2.468377768061911, "learning_rate": 1.9979430434195742e-05, "loss": 0.6196, "step": 629 }, { "epoch": 0.049910873440285206, "grad_norm": 2.5856202402355626, "learning_rate": 1.9979265605559868e-05, "loss": 0.5284, "step": 630 }, { "epoch": 0.04999009704892058, "grad_norm": 2.8478163456612466, "learning_rate": 1.997910011983737e-05, "loss": 0.656, "step": 631 }, { "epoch": 0.050069320657555955, "grad_norm": 2.66801621363886, "learning_rate": 1.997893397703915e-05, "loss": 0.7702, "step": 632 }, { "epoch": 0.05014854426619132, "grad_norm": 2.6782434329778253, "learning_rate": 1.997876717717614e-05, "loss": 0.6588, "step": 633 }, { "epoch": 0.0502277678748267, "grad_norm": 2.8357230739787807, "learning_rate": 1.9978599720259325e-05, "loss": 0.636, "step": 634 }, { "epoch": 0.05030699148346207, "grad_norm": 2.67219616582363, "learning_rate": 1.9978431606299736e-05, "loss": 0.6349, "step": 635 }, { "epoch": 0.050386215092097446, "grad_norm": 2.7799780644999563, "learning_rate": 1.9978262835308437e-05, "loss": 0.604, "step": 636 }, { "epoch": 0.05046543870073282, "grad_norm": 2.6727525862396773, "learning_rate": 1.997809340729654e-05, "loss": 0.6337, "step": 637 }, { "epoch": 0.050544662309368195, "grad_norm": 2.910480479175523, "learning_rate": 1.9977923322275206e-05, "loss": 0.7388, "step": 638 }, { "epoch": 0.05062388591800356, "grad_norm": 2.600217686259559, "learning_rate": 1.997775258025563e-05, "loss": 0.696, "step": 639 }, { "epoch": 0.05070310952663894, "grad_norm": 2.627565437899395, "learning_rate": 1.997758118124906e-05, "loss": 0.6522, "step": 640 }, { "epoch": 0.05078233313527431, "grad_norm": 2.8081736199031337, "learning_rate": 1.997740912526678e-05, "loss": 0.5927, "step": 641 }, { "epoch": 0.050861556743909686, "grad_norm": 2.552202695982183, "learning_rate": 1.9977236412320112e-05, "loss": 0.6015, "step": 642 }, { "epoch": 0.05094078035254506, "grad_norm": 2.8910998372845014, "learning_rate": 1.9977063042420438e-05, "loss": 0.7683, "step": 643 }, { "epoch": 0.051020003961180435, "grad_norm": 2.9512496299669198, "learning_rate": 1.9976889015579167e-05, "loss": 0.729, "step": 644 }, { "epoch": 0.0510992275698158, "grad_norm": 2.5518080864931387, "learning_rate": 1.997671433180776e-05, "loss": 0.6124, "step": 645 }, { "epoch": 0.05117845117845118, "grad_norm": 2.37320117472414, "learning_rate": 1.997653899111772e-05, "loss": 0.5442, "step": 646 }, { "epoch": 0.05125767478708655, "grad_norm": 2.795261688628885, "learning_rate": 1.9976362993520587e-05, "loss": 0.6342, "step": 647 }, { "epoch": 0.051336898395721926, "grad_norm": 3.4267679656696703, "learning_rate": 1.9976186339027958e-05, "loss": 0.7155, "step": 648 }, { "epoch": 0.0514161220043573, "grad_norm": 3.1752769459428363, "learning_rate": 1.9976009027651463e-05, "loss": 0.7265, "step": 649 }, { "epoch": 0.05149534561299267, "grad_norm": 2.8347118851828905, "learning_rate": 1.9975831059402774e-05, "loss": 0.7014, "step": 650 }, { "epoch": 0.05157456922162804, "grad_norm": 2.6490634250358607, "learning_rate": 1.9975652434293607e-05, "loss": 0.6357, "step": 651 }, { "epoch": 0.05165379283026342, "grad_norm": 2.3694212990980352, "learning_rate": 1.9975473152335726e-05, "loss": 0.5775, "step": 652 }, { "epoch": 0.05173301643889879, "grad_norm": 2.3910356841694957, "learning_rate": 1.9975293213540942e-05, "loss": 0.5648, "step": 653 }, { "epoch": 0.05181224004753417, "grad_norm": 2.764664832140692, "learning_rate": 1.9975112617921097e-05, "loss": 0.6438, "step": 654 }, { "epoch": 0.05189146365616954, "grad_norm": 2.4686374239659363, "learning_rate": 1.997493136548808e-05, "loss": 0.6736, "step": 655 }, { "epoch": 0.05197068726480491, "grad_norm": 2.7663992114573945, "learning_rate": 1.9974749456253834e-05, "loss": 0.6061, "step": 656 }, { "epoch": 0.05204991087344028, "grad_norm": 2.811873503676391, "learning_rate": 1.9974566890230327e-05, "loss": 0.6387, "step": 657 }, { "epoch": 0.05212913448207566, "grad_norm": 2.521915684645968, "learning_rate": 1.9974383667429585e-05, "loss": 0.544, "step": 658 }, { "epoch": 0.05220835809071103, "grad_norm": 3.0204963682286685, "learning_rate": 1.9974199787863674e-05, "loss": 0.7979, "step": 659 }, { "epoch": 0.05228758169934641, "grad_norm": 2.4868389956062953, "learning_rate": 1.99740152515447e-05, "loss": 0.5599, "step": 660 }, { "epoch": 0.05236680530798178, "grad_norm": 2.3646509533453766, "learning_rate": 1.9973830058484813e-05, "loss": 0.5704, "step": 661 }, { "epoch": 0.05244602891661715, "grad_norm": 2.679785776657189, "learning_rate": 1.9973644208696208e-05, "loss": 0.6531, "step": 662 }, { "epoch": 0.052525252525252523, "grad_norm": 2.5504100604708824, "learning_rate": 1.9973457702191123e-05, "loss": 0.6136, "step": 663 }, { "epoch": 0.0526044761338879, "grad_norm": 2.4527852886555537, "learning_rate": 1.9973270538981835e-05, "loss": 0.6394, "step": 664 }, { "epoch": 0.05268369974252327, "grad_norm": 2.633989018059531, "learning_rate": 1.9973082719080673e-05, "loss": 0.635, "step": 665 }, { "epoch": 0.05276292335115865, "grad_norm": 2.6275397938688188, "learning_rate": 1.9972894242499997e-05, "loss": 0.6583, "step": 666 }, { "epoch": 0.05284214695979402, "grad_norm": 2.6031168190738745, "learning_rate": 1.9972705109252227e-05, "loss": 0.4877, "step": 667 }, { "epoch": 0.05292137056842939, "grad_norm": 2.7618723427671794, "learning_rate": 1.997251531934981e-05, "loss": 0.6604, "step": 668 }, { "epoch": 0.053000594177064764, "grad_norm": 2.4246126961990746, "learning_rate": 1.997232487280524e-05, "loss": 0.6461, "step": 669 }, { "epoch": 0.05307981778570014, "grad_norm": 2.71269251734555, "learning_rate": 1.9972133769631065e-05, "loss": 0.5875, "step": 670 }, { "epoch": 0.05315904139433551, "grad_norm": 2.842160240744585, "learning_rate": 1.9971942009839862e-05, "loss": 0.5902, "step": 671 }, { "epoch": 0.05323826500297089, "grad_norm": 2.5058083969029608, "learning_rate": 1.997174959344426e-05, "loss": 0.5104, "step": 672 }, { "epoch": 0.05331748861160626, "grad_norm": 3.095755714523418, "learning_rate": 1.9971556520456928e-05, "loss": 0.7196, "step": 673 }, { "epoch": 0.05339671222024163, "grad_norm": 2.6440602877302766, "learning_rate": 1.997136279089058e-05, "loss": 0.5158, "step": 674 }, { "epoch": 0.053475935828877004, "grad_norm": 2.380378443290473, "learning_rate": 1.9971168404757972e-05, "loss": 0.5321, "step": 675 }, { "epoch": 0.05355515943751238, "grad_norm": 2.943670103029875, "learning_rate": 1.99709733620719e-05, "loss": 0.5452, "step": 676 }, { "epoch": 0.05363438304614775, "grad_norm": 2.698056488057479, "learning_rate": 1.9970777662845212e-05, "loss": 0.5922, "step": 677 }, { "epoch": 0.05371360665478313, "grad_norm": 2.485391823486526, "learning_rate": 1.997058130709079e-05, "loss": 0.6386, "step": 678 }, { "epoch": 0.0537928302634185, "grad_norm": 2.371974473127211, "learning_rate": 1.9970384294821565e-05, "loss": 0.6121, "step": 679 }, { "epoch": 0.05387205387205387, "grad_norm": 2.287721938999729, "learning_rate": 1.9970186626050507e-05, "loss": 0.5171, "step": 680 }, { "epoch": 0.053951277480689244, "grad_norm": 2.6430969086515033, "learning_rate": 1.9969988300790636e-05, "loss": 0.6118, "step": 681 }, { "epoch": 0.05403050108932462, "grad_norm": 2.741916159906486, "learning_rate": 1.9969789319055007e-05, "loss": 0.5779, "step": 682 }, { "epoch": 0.05410972469795999, "grad_norm": 3.158648452548921, "learning_rate": 1.996958968085672e-05, "loss": 0.6893, "step": 683 }, { "epoch": 0.05418894830659537, "grad_norm": 3.020262211509068, "learning_rate": 1.9969389386208927e-05, "loss": 0.5984, "step": 684 }, { "epoch": 0.054268171915230735, "grad_norm": 2.5100076680365424, "learning_rate": 1.9969188435124812e-05, "loss": 0.6247, "step": 685 }, { "epoch": 0.05434739552386611, "grad_norm": 2.4949733461128303, "learning_rate": 1.9968986827617603e-05, "loss": 0.6169, "step": 686 }, { "epoch": 0.054426619132501484, "grad_norm": 2.429748799371095, "learning_rate": 1.9968784563700586e-05, "loss": 0.6555, "step": 687 }, { "epoch": 0.05450584274113686, "grad_norm": 2.3898313595164744, "learning_rate": 1.9968581643387065e-05, "loss": 0.5174, "step": 688 }, { "epoch": 0.054585066349772234, "grad_norm": 2.637504743240127, "learning_rate": 1.9968378066690414e-05, "loss": 0.6571, "step": 689 }, { "epoch": 0.05466428995840761, "grad_norm": 2.6575214702891583, "learning_rate": 1.996817383362403e-05, "loss": 0.5789, "step": 690 }, { "epoch": 0.054743513567042976, "grad_norm": 2.786459308227437, "learning_rate": 1.996796894420136e-05, "loss": 0.7131, "step": 691 }, { "epoch": 0.05482273717567835, "grad_norm": 2.4692945603362593, "learning_rate": 1.9967763398435904e-05, "loss": 0.5474, "step": 692 }, { "epoch": 0.054901960784313725, "grad_norm": 2.134773643144399, "learning_rate": 1.9967557196341184e-05, "loss": 0.5043, "step": 693 }, { "epoch": 0.0549811843929491, "grad_norm": 2.3494652818548194, "learning_rate": 1.996735033793079e-05, "loss": 0.5797, "step": 694 }, { "epoch": 0.055060408001584474, "grad_norm": 2.301315502766717, "learning_rate": 1.996714282321833e-05, "loss": 0.5363, "step": 695 }, { "epoch": 0.05513963161021985, "grad_norm": 2.646761240488808, "learning_rate": 1.9966934652217477e-05, "loss": 0.6053, "step": 696 }, { "epoch": 0.055218855218855216, "grad_norm": 2.415552203020133, "learning_rate": 1.9966725824941933e-05, "loss": 0.5301, "step": 697 }, { "epoch": 0.05529807882749059, "grad_norm": 3.0458682362950897, "learning_rate": 1.9966516341405452e-05, "loss": 0.6386, "step": 698 }, { "epoch": 0.055377302436125965, "grad_norm": 2.7936587781734903, "learning_rate": 1.9966306201621826e-05, "loss": 0.7439, "step": 699 }, { "epoch": 0.05545652604476134, "grad_norm": 2.8706291071527685, "learning_rate": 1.996609540560489e-05, "loss": 0.6652, "step": 700 }, { "epoch": 0.055535749653396714, "grad_norm": 2.3161894148621514, "learning_rate": 1.9965883953368527e-05, "loss": 0.5688, "step": 701 }, { "epoch": 0.05561497326203209, "grad_norm": 2.8461840065841497, "learning_rate": 1.9965671844926656e-05, "loss": 0.5667, "step": 702 }, { "epoch": 0.055694196870667456, "grad_norm": 2.9600619125581993, "learning_rate": 1.9965459080293247e-05, "loss": 0.6043, "step": 703 }, { "epoch": 0.05577342047930283, "grad_norm": 2.397708845722197, "learning_rate": 1.9965245659482312e-05, "loss": 0.5245, "step": 704 }, { "epoch": 0.055852644087938205, "grad_norm": 2.405841582394235, "learning_rate": 1.9965031582507896e-05, "loss": 0.5556, "step": 705 }, { "epoch": 0.05593186769657358, "grad_norm": 3.100420003235673, "learning_rate": 1.99648168493841e-05, "loss": 0.6072, "step": 706 }, { "epoch": 0.056011091305208954, "grad_norm": 2.889026280113388, "learning_rate": 1.996460146012506e-05, "loss": 0.7871, "step": 707 }, { "epoch": 0.05609031491384433, "grad_norm": 2.3461840103487672, "learning_rate": 1.996438541474496e-05, "loss": 0.6629, "step": 708 }, { "epoch": 0.056169538522479696, "grad_norm": 2.923544197881536, "learning_rate": 1.996416871325803e-05, "loss": 0.7147, "step": 709 }, { "epoch": 0.05624876213111507, "grad_norm": 2.3962285535277017, "learning_rate": 1.9963951355678533e-05, "loss": 0.5197, "step": 710 }, { "epoch": 0.056327985739750445, "grad_norm": 3.027128432204251, "learning_rate": 1.996373334202078e-05, "loss": 0.7684, "step": 711 }, { "epoch": 0.05640720934838582, "grad_norm": 2.477609611604304, "learning_rate": 1.9963514672299135e-05, "loss": 0.6056, "step": 712 }, { "epoch": 0.056486432957021195, "grad_norm": 2.3765566687849207, "learning_rate": 1.9963295346527984e-05, "loss": 0.5918, "step": 713 }, { "epoch": 0.05656565656565657, "grad_norm": 2.939459877213976, "learning_rate": 1.996307536472178e-05, "loss": 0.6208, "step": 714 }, { "epoch": 0.05664488017429194, "grad_norm": 2.9043336498673287, "learning_rate": 1.9962854726894997e-05, "loss": 0.6694, "step": 715 }, { "epoch": 0.05672410378292731, "grad_norm": 2.4184683194597203, "learning_rate": 1.9962633433062174e-05, "loss": 0.5481, "step": 716 }, { "epoch": 0.056803327391562686, "grad_norm": 2.4163807137940245, "learning_rate": 1.996241148323787e-05, "loss": 0.5587, "step": 717 }, { "epoch": 0.05688255100019806, "grad_norm": 2.891850644404592, "learning_rate": 1.996218887743671e-05, "loss": 0.6685, "step": 718 }, { "epoch": 0.056961774608833435, "grad_norm": 2.651254931314016, "learning_rate": 1.996196561567335e-05, "loss": 0.653, "step": 719 }, { "epoch": 0.0570409982174688, "grad_norm": 2.57945858033426, "learning_rate": 1.996174169796248e-05, "loss": 0.5611, "step": 720 }, { "epoch": 0.05712022182610418, "grad_norm": 2.2201838539606813, "learning_rate": 1.996151712431886e-05, "loss": 0.4784, "step": 721 }, { "epoch": 0.05719944543473955, "grad_norm": 2.5032642726550627, "learning_rate": 1.9961291894757267e-05, "loss": 0.6104, "step": 722 }, { "epoch": 0.057278669043374926, "grad_norm": 2.5685714936150292, "learning_rate": 1.9961066009292532e-05, "loss": 0.6313, "step": 723 }, { "epoch": 0.0573578926520103, "grad_norm": 2.8109719313699215, "learning_rate": 1.9960839467939534e-05, "loss": 0.5291, "step": 724 }, { "epoch": 0.057437116260645675, "grad_norm": 2.7771055788225065, "learning_rate": 1.996061227071318e-05, "loss": 0.6983, "step": 725 }, { "epoch": 0.05751633986928104, "grad_norm": 2.0332474807315934, "learning_rate": 1.996038441762844e-05, "loss": 0.5158, "step": 726 }, { "epoch": 0.05759556347791642, "grad_norm": 2.531385531966582, "learning_rate": 1.9960155908700306e-05, "loss": 0.3911, "step": 727 }, { "epoch": 0.05767478708655179, "grad_norm": 2.5610767100035656, "learning_rate": 1.9959926743943836e-05, "loss": 0.7105, "step": 728 }, { "epoch": 0.057754010695187166, "grad_norm": 2.4959407649057144, "learning_rate": 1.9959696923374113e-05, "loss": 0.5186, "step": 729 }, { "epoch": 0.05783323430382254, "grad_norm": 2.509142926211617, "learning_rate": 1.995946644700627e-05, "loss": 0.4419, "step": 730 }, { "epoch": 0.057912457912457915, "grad_norm": 2.222831618451293, "learning_rate": 1.9959235314855485e-05, "loss": 0.6019, "step": 731 }, { "epoch": 0.05799168152109328, "grad_norm": 2.889053167249598, "learning_rate": 1.9959003526936972e-05, "loss": 0.6789, "step": 732 }, { "epoch": 0.05807090512972866, "grad_norm": 2.5681290397033747, "learning_rate": 1.9958771083266e-05, "loss": 0.4969, "step": 733 }, { "epoch": 0.05815012873836403, "grad_norm": 3.39418674973344, "learning_rate": 1.995853798385787e-05, "loss": 0.7614, "step": 734 }, { "epoch": 0.058229352346999406, "grad_norm": 2.5749721111731576, "learning_rate": 1.9958304228727928e-05, "loss": 0.5518, "step": 735 }, { "epoch": 0.05830857595563478, "grad_norm": 2.0726530624526394, "learning_rate": 1.995806981789157e-05, "loss": 0.5375, "step": 736 }, { "epoch": 0.058387799564270156, "grad_norm": 2.3779351058228593, "learning_rate": 1.9957834751364232e-05, "loss": 0.6003, "step": 737 }, { "epoch": 0.05846702317290552, "grad_norm": 2.4830709763837526, "learning_rate": 1.995759902916139e-05, "loss": 0.6315, "step": 738 }, { "epoch": 0.0585462467815409, "grad_norm": 2.7198984614433095, "learning_rate": 1.995736265129856e-05, "loss": 0.5255, "step": 739 }, { "epoch": 0.05862547039017627, "grad_norm": 2.491938822626573, "learning_rate": 1.9957125617791314e-05, "loss": 0.6831, "step": 740 }, { "epoch": 0.05870469399881165, "grad_norm": 2.5592724079088436, "learning_rate": 1.995688792865526e-05, "loss": 0.5711, "step": 741 }, { "epoch": 0.05878391760744702, "grad_norm": 2.334868396839467, "learning_rate": 1.995664958390604e-05, "loss": 0.7081, "step": 742 }, { "epoch": 0.058863141216082396, "grad_norm": 2.1865386690707487, "learning_rate": 1.995641058355936e-05, "loss": 0.489, "step": 743 }, { "epoch": 0.05894236482471776, "grad_norm": 2.3231421524398934, "learning_rate": 1.9956170927630946e-05, "loss": 0.4743, "step": 744 }, { "epoch": 0.05902158843335314, "grad_norm": 2.494813284093219, "learning_rate": 1.9955930616136582e-05, "loss": 0.6098, "step": 745 }, { "epoch": 0.05910081204198851, "grad_norm": 2.599401725454122, "learning_rate": 1.995568964909209e-05, "loss": 0.623, "step": 746 }, { "epoch": 0.05918003565062389, "grad_norm": 2.3179147089299192, "learning_rate": 1.995544802651334e-05, "loss": 0.584, "step": 747 }, { "epoch": 0.05925925925925926, "grad_norm": 2.691895102421814, "learning_rate": 1.995520574841624e-05, "loss": 0.5988, "step": 748 }, { "epoch": 0.059338482867894636, "grad_norm": 3.3258773585574244, "learning_rate": 1.9954962814816744e-05, "loss": 0.6456, "step": 749 }, { "epoch": 0.059417706476530004, "grad_norm": 2.6380093104467717, "learning_rate": 1.9954719225730847e-05, "loss": 0.5185, "step": 750 }, { "epoch": 0.05949693008516538, "grad_norm": 3.212343708527098, "learning_rate": 1.995447498117459e-05, "loss": 0.551, "step": 751 }, { "epoch": 0.05957615369380075, "grad_norm": 2.697230578361117, "learning_rate": 1.9954230081164047e-05, "loss": 0.5654, "step": 752 }, { "epoch": 0.05965537730243613, "grad_norm": 2.9450702913170868, "learning_rate": 1.9953984525715354e-05, "loss": 0.6749, "step": 753 }, { "epoch": 0.0597346009110715, "grad_norm": 2.55178518989023, "learning_rate": 1.9953738314844676e-05, "loss": 0.572, "step": 754 }, { "epoch": 0.05981382451970687, "grad_norm": 3.200334715793516, "learning_rate": 1.9953491448568222e-05, "loss": 0.66, "step": 755 }, { "epoch": 0.059893048128342244, "grad_norm": 3.345363979215846, "learning_rate": 1.9953243926902254e-05, "loss": 0.6548, "step": 756 }, { "epoch": 0.05997227173697762, "grad_norm": 2.5378054199272144, "learning_rate": 1.995299574986306e-05, "loss": 0.6511, "step": 757 }, { "epoch": 0.06005149534561299, "grad_norm": 2.452376974233593, "learning_rate": 1.9952746917466988e-05, "loss": 0.6255, "step": 758 }, { "epoch": 0.06013071895424837, "grad_norm": 2.7109833783005177, "learning_rate": 1.9952497429730423e-05, "loss": 0.6317, "step": 759 }, { "epoch": 0.06020994256288374, "grad_norm": 3.591650209262339, "learning_rate": 1.9952247286669787e-05, "loss": 0.6245, "step": 760 }, { "epoch": 0.06028916617151911, "grad_norm": 2.663116080398295, "learning_rate": 1.995199648830156e-05, "loss": 0.5889, "step": 761 }, { "epoch": 0.060368389780154484, "grad_norm": 2.355172346699632, "learning_rate": 1.9951745034642245e-05, "loss": 0.6441, "step": 762 }, { "epoch": 0.06044761338878986, "grad_norm": 2.7028345660740127, "learning_rate": 1.995149292570841e-05, "loss": 0.7464, "step": 763 }, { "epoch": 0.06052683699742523, "grad_norm": 2.340022724644699, "learning_rate": 1.9951240161516643e-05, "loss": 0.5095, "step": 764 }, { "epoch": 0.06060606060606061, "grad_norm": 2.714666785154811, "learning_rate": 1.9950986742083594e-05, "loss": 0.7877, "step": 765 }, { "epoch": 0.06068528421469598, "grad_norm": 2.580043567565731, "learning_rate": 1.9950732667425953e-05, "loss": 0.7249, "step": 766 }, { "epoch": 0.06076450782333135, "grad_norm": 2.752091896509089, "learning_rate": 1.9950477937560442e-05, "loss": 0.6021, "step": 767 }, { "epoch": 0.060843731431966724, "grad_norm": 2.8974863989681463, "learning_rate": 1.995022255250384e-05, "loss": 0.6006, "step": 768 }, { "epoch": 0.0609229550406021, "grad_norm": 2.607699970135225, "learning_rate": 1.9949966512272964e-05, "loss": 0.6395, "step": 769 }, { "epoch": 0.06100217864923747, "grad_norm": 2.615834203622895, "learning_rate": 1.994970981688466e-05, "loss": 0.6573, "step": 770 }, { "epoch": 0.06108140225787285, "grad_norm": 2.9657669350241207, "learning_rate": 1.9949452466355847e-05, "loss": 0.5741, "step": 771 }, { "epoch": 0.06116062586650822, "grad_norm": 2.5923109185032245, "learning_rate": 1.9949194460703462e-05, "loss": 0.5857, "step": 772 }, { "epoch": 0.06123984947514359, "grad_norm": 2.561067586714167, "learning_rate": 1.9948935799944492e-05, "loss": 0.627, "step": 773 }, { "epoch": 0.061319073083778965, "grad_norm": 2.581691505754139, "learning_rate": 1.994867648409597e-05, "loss": 0.5704, "step": 774 }, { "epoch": 0.06139829669241434, "grad_norm": 2.734930223592304, "learning_rate": 1.9948416513174976e-05, "loss": 0.6628, "step": 775 }, { "epoch": 0.061477520301049714, "grad_norm": 2.3722126461915667, "learning_rate": 1.994815588719862e-05, "loss": 0.5661, "step": 776 }, { "epoch": 0.06155674390968509, "grad_norm": 2.7298165839923345, "learning_rate": 1.9947894606184065e-05, "loss": 0.6022, "step": 777 }, { "epoch": 0.06163596751832046, "grad_norm": 2.750632686742977, "learning_rate": 1.9947632670148517e-05, "loss": 0.663, "step": 778 }, { "epoch": 0.06171519112695583, "grad_norm": 2.6867070031547122, "learning_rate": 1.9947370079109224e-05, "loss": 0.6674, "step": 779 }, { "epoch": 0.061794414735591205, "grad_norm": 2.981532808935078, "learning_rate": 1.9947106833083474e-05, "loss": 0.6409, "step": 780 }, { "epoch": 0.06187363834422658, "grad_norm": 2.6442669257600557, "learning_rate": 1.9946842932088603e-05, "loss": 0.6983, "step": 781 }, { "epoch": 0.061952861952861954, "grad_norm": 2.3548220132098945, "learning_rate": 1.9946578376141985e-05, "loss": 0.6266, "step": 782 }, { "epoch": 0.06203208556149733, "grad_norm": 2.358681961536268, "learning_rate": 1.9946313165261042e-05, "loss": 0.4969, "step": 783 }, { "epoch": 0.062111309170132696, "grad_norm": 2.2581926458343697, "learning_rate": 1.9946047299463234e-05, "loss": 0.4531, "step": 784 }, { "epoch": 0.06219053277876807, "grad_norm": 2.962108618442753, "learning_rate": 1.994578077876607e-05, "loss": 0.6063, "step": 785 }, { "epoch": 0.062269756387403445, "grad_norm": 3.110260041430944, "learning_rate": 1.9945513603187096e-05, "loss": 0.6105, "step": 786 }, { "epoch": 0.06234897999603882, "grad_norm": 2.346190564223158, "learning_rate": 1.994524577274391e-05, "loss": 0.5769, "step": 787 }, { "epoch": 0.062428203604674194, "grad_norm": 2.5600674199496574, "learning_rate": 1.994497728745414e-05, "loss": 0.6509, "step": 788 }, { "epoch": 0.06250742721330957, "grad_norm": 2.431098877667765, "learning_rate": 1.9944708147335466e-05, "loss": 0.5991, "step": 789 }, { "epoch": 0.06258665082194494, "grad_norm": 3.2183602707719987, "learning_rate": 1.9944438352405614e-05, "loss": 0.6811, "step": 790 }, { "epoch": 0.06266587443058032, "grad_norm": 2.3598556923531757, "learning_rate": 1.9944167902682345e-05, "loss": 0.5922, "step": 791 }, { "epoch": 0.06274509803921569, "grad_norm": 2.43703512731962, "learning_rate": 1.994389679818347e-05, "loss": 0.5734, "step": 792 }, { "epoch": 0.06282432164785105, "grad_norm": 2.9309176599034057, "learning_rate": 1.9943625038926834e-05, "loss": 0.6582, "step": 793 }, { "epoch": 0.06290354525648643, "grad_norm": 2.161936410664361, "learning_rate": 1.9943352624930336e-05, "loss": 0.567, "step": 794 }, { "epoch": 0.0629827688651218, "grad_norm": 2.3171996758184026, "learning_rate": 1.9943079556211915e-05, "loss": 0.6401, "step": 795 }, { "epoch": 0.06306199247375718, "grad_norm": 2.6941908990766374, "learning_rate": 1.9942805832789548e-05, "loss": 0.5644, "step": 796 }, { "epoch": 0.06314121608239255, "grad_norm": 2.902003772442965, "learning_rate": 1.9942531454681254e-05, "loss": 0.5836, "step": 797 }, { "epoch": 0.06322043969102793, "grad_norm": 2.6670085133384935, "learning_rate": 1.994225642190511e-05, "loss": 0.6748, "step": 798 }, { "epoch": 0.0632996632996633, "grad_norm": 3.0540072374606573, "learning_rate": 1.9941980734479214e-05, "loss": 0.7282, "step": 799 }, { "epoch": 0.06337888690829867, "grad_norm": 2.753682430792006, "learning_rate": 1.994170439242173e-05, "loss": 0.5858, "step": 800 }, { "epoch": 0.06345811051693405, "grad_norm": 2.8482096534878725, "learning_rate": 1.9941427395750844e-05, "loss": 0.703, "step": 801 }, { "epoch": 0.06353733412556942, "grad_norm": 2.412255560545647, "learning_rate": 1.99411497444848e-05, "loss": 0.5168, "step": 802 }, { "epoch": 0.0636165577342048, "grad_norm": 2.3058684501414577, "learning_rate": 1.994087143864188e-05, "loss": 0.5101, "step": 803 }, { "epoch": 0.06369578134284017, "grad_norm": 2.3312432210310003, "learning_rate": 1.994059247824041e-05, "loss": 0.6279, "step": 804 }, { "epoch": 0.06377500495147553, "grad_norm": 2.4541675562418814, "learning_rate": 1.994031286329875e-05, "loss": 0.4586, "step": 805 }, { "epoch": 0.06385422856011091, "grad_norm": 1.861930279281903, "learning_rate": 1.9940032593835324e-05, "loss": 0.499, "step": 806 }, { "epoch": 0.06393345216874628, "grad_norm": 2.5420727710667137, "learning_rate": 1.993975166986858e-05, "loss": 0.6231, "step": 807 }, { "epoch": 0.06401267577738166, "grad_norm": 2.488083310242034, "learning_rate": 1.9939470091417012e-05, "loss": 0.5683, "step": 808 }, { "epoch": 0.06409189938601703, "grad_norm": 2.361916836508993, "learning_rate": 1.9939187858499166e-05, "loss": 0.6398, "step": 809 }, { "epoch": 0.06417112299465241, "grad_norm": 2.6141111543939446, "learning_rate": 1.9938904971133626e-05, "loss": 0.5686, "step": 810 }, { "epoch": 0.06425034660328778, "grad_norm": 3.0303566545321767, "learning_rate": 1.9938621429339012e-05, "loss": 0.5517, "step": 811 }, { "epoch": 0.06432957021192315, "grad_norm": 2.524730690336445, "learning_rate": 1.9938337233134e-05, "loss": 0.4779, "step": 812 }, { "epoch": 0.06440879382055853, "grad_norm": 2.6543283733083998, "learning_rate": 1.9938052382537304e-05, "loss": 0.5784, "step": 813 }, { "epoch": 0.0644880174291939, "grad_norm": 2.3359737642548395, "learning_rate": 1.9937766877567676e-05, "loss": 0.622, "step": 814 }, { "epoch": 0.06456724103782928, "grad_norm": 2.53017447133631, "learning_rate": 1.9937480718243914e-05, "loss": 0.6311, "step": 815 }, { "epoch": 0.06464646464646465, "grad_norm": 2.709306314384742, "learning_rate": 1.9937193904584865e-05, "loss": 0.6351, "step": 816 }, { "epoch": 0.06472568825510001, "grad_norm": 2.366553536621406, "learning_rate": 1.9936906436609413e-05, "loss": 0.5853, "step": 817 }, { "epoch": 0.0648049118637354, "grad_norm": 3.01701329921683, "learning_rate": 1.9936618314336486e-05, "loss": 0.5695, "step": 818 }, { "epoch": 0.06488413547237076, "grad_norm": 2.388744593155798, "learning_rate": 1.9936329537785054e-05, "loss": 0.4993, "step": 819 }, { "epoch": 0.06496335908100614, "grad_norm": 2.310655509082259, "learning_rate": 1.9936040106974132e-05, "loss": 0.5861, "step": 820 }, { "epoch": 0.06504258268964151, "grad_norm": 2.336793716650803, "learning_rate": 1.9935750021922778e-05, "loss": 0.5873, "step": 821 }, { "epoch": 0.06512180629827688, "grad_norm": 2.114529169594679, "learning_rate": 1.993545928265009e-05, "loss": 0.5935, "step": 822 }, { "epoch": 0.06520102990691226, "grad_norm": 2.692553344124309, "learning_rate": 1.993516788917522e-05, "loss": 0.6219, "step": 823 }, { "epoch": 0.06528025351554763, "grad_norm": 2.389893720618022, "learning_rate": 1.9934875841517346e-05, "loss": 0.5645, "step": 824 }, { "epoch": 0.06535947712418301, "grad_norm": 2.484716621739794, "learning_rate": 1.9934583139695703e-05, "loss": 0.5553, "step": 825 }, { "epoch": 0.06543870073281838, "grad_norm": 3.0462642760778618, "learning_rate": 1.9934289783729564e-05, "loss": 0.6167, "step": 826 }, { "epoch": 0.06551792434145376, "grad_norm": 2.2452716816803076, "learning_rate": 1.993399577363824e-05, "loss": 0.5349, "step": 827 }, { "epoch": 0.06559714795008913, "grad_norm": 2.8885614555168337, "learning_rate": 1.9933701109441093e-05, "loss": 0.6317, "step": 828 }, { "epoch": 0.0656763715587245, "grad_norm": 2.6602159378281987, "learning_rate": 1.993340579115753e-05, "loss": 0.6245, "step": 829 }, { "epoch": 0.06575559516735988, "grad_norm": 2.6165913358272075, "learning_rate": 1.993310981880699e-05, "loss": 0.6299, "step": 830 }, { "epoch": 0.06583481877599524, "grad_norm": 2.371449325243517, "learning_rate": 1.9932813192408964e-05, "loss": 0.5249, "step": 831 }, { "epoch": 0.06591404238463063, "grad_norm": 2.357942686977399, "learning_rate": 1.9932515911982983e-05, "loss": 0.6708, "step": 832 }, { "epoch": 0.06599326599326599, "grad_norm": 2.880553589653365, "learning_rate": 1.993221797754862e-05, "loss": 0.653, "step": 833 }, { "epoch": 0.06607248960190136, "grad_norm": 2.310695758148777, "learning_rate": 1.9931919389125496e-05, "loss": 0.5041, "step": 834 }, { "epoch": 0.06615171321053674, "grad_norm": 2.4386376078799747, "learning_rate": 1.9931620146733264e-05, "loss": 0.5969, "step": 835 }, { "epoch": 0.06623093681917211, "grad_norm": 2.400077208564521, "learning_rate": 1.993132025039164e-05, "loss": 0.5913, "step": 836 }, { "epoch": 0.06631016042780749, "grad_norm": 2.3442029871090146, "learning_rate": 1.9931019700120363e-05, "loss": 0.5431, "step": 837 }, { "epoch": 0.06638938403644286, "grad_norm": 2.2883192943085717, "learning_rate": 1.9930718495939222e-05, "loss": 0.5296, "step": 838 }, { "epoch": 0.06646860764507824, "grad_norm": 2.346789549641395, "learning_rate": 1.9930416637868053e-05, "loss": 0.4923, "step": 839 }, { "epoch": 0.06654783125371361, "grad_norm": 2.614121672017315, "learning_rate": 1.993011412592673e-05, "loss": 0.6086, "step": 840 }, { "epoch": 0.06662705486234898, "grad_norm": 3.0534538592411766, "learning_rate": 1.992981096013517e-05, "loss": 0.6922, "step": 841 }, { "epoch": 0.06670627847098436, "grad_norm": 3.017815351014294, "learning_rate": 1.9929507140513342e-05, "loss": 0.6948, "step": 842 }, { "epoch": 0.06678550207961972, "grad_norm": 2.360489736437008, "learning_rate": 1.9929202667081246e-05, "loss": 0.5377, "step": 843 }, { "epoch": 0.0668647256882551, "grad_norm": 2.5539470848512007, "learning_rate": 1.9928897539858926e-05, "loss": 0.5094, "step": 844 }, { "epoch": 0.06694394929689047, "grad_norm": 2.2777283063451534, "learning_rate": 1.992859175886648e-05, "loss": 0.5961, "step": 845 }, { "epoch": 0.06702317290552584, "grad_norm": 2.0505572977239828, "learning_rate": 1.9928285324124038e-05, "loss": 0.5134, "step": 846 }, { "epoch": 0.06710239651416122, "grad_norm": 2.5129650303866047, "learning_rate": 1.9927978235651782e-05, "loss": 0.5376, "step": 847 }, { "epoch": 0.06718162012279659, "grad_norm": 3.221723448132506, "learning_rate": 1.992767049346993e-05, "loss": 0.6556, "step": 848 }, { "epoch": 0.06726084373143197, "grad_norm": 2.76830146639477, "learning_rate": 1.9927362097598746e-05, "loss": 0.6053, "step": 849 }, { "epoch": 0.06734006734006734, "grad_norm": 2.233404358137078, "learning_rate": 1.9927053048058534e-05, "loss": 0.5713, "step": 850 }, { "epoch": 0.06741929094870272, "grad_norm": 2.9886397534117837, "learning_rate": 1.9926743344869645e-05, "loss": 0.5631, "step": 851 }, { "epoch": 0.06749851455733809, "grad_norm": 2.7133581210801947, "learning_rate": 1.992643298805247e-05, "loss": 0.63, "step": 852 }, { "epoch": 0.06757773816597346, "grad_norm": 2.7880183431147634, "learning_rate": 1.9926121977627447e-05, "loss": 0.5981, "step": 853 }, { "epoch": 0.06765696177460884, "grad_norm": 2.283195629083946, "learning_rate": 1.9925810313615052e-05, "loss": 0.5193, "step": 854 }, { "epoch": 0.0677361853832442, "grad_norm": 2.4685472107825013, "learning_rate": 1.9925497996035807e-05, "loss": 0.608, "step": 855 }, { "epoch": 0.06781540899187959, "grad_norm": 2.340252814536461, "learning_rate": 1.992518502491028e-05, "loss": 0.6015, "step": 856 }, { "epoch": 0.06789463260051495, "grad_norm": 2.670901520367536, "learning_rate": 1.9924871400259074e-05, "loss": 0.617, "step": 857 }, { "epoch": 0.06797385620915032, "grad_norm": 2.5836058464362823, "learning_rate": 1.9924557122102843e-05, "loss": 0.6712, "step": 858 }, { "epoch": 0.0680530798177857, "grad_norm": 2.373915203317525, "learning_rate": 1.9924242190462276e-05, "loss": 0.6829, "step": 859 }, { "epoch": 0.06813230342642107, "grad_norm": 2.9855954138436975, "learning_rate": 1.992392660535812e-05, "loss": 0.7835, "step": 860 }, { "epoch": 0.06821152703505645, "grad_norm": 2.922510848840876, "learning_rate": 1.9923610366811142e-05, "loss": 0.7843, "step": 861 }, { "epoch": 0.06829075064369182, "grad_norm": 2.279622275703524, "learning_rate": 1.9923293474842175e-05, "loss": 0.6269, "step": 862 }, { "epoch": 0.06836997425232719, "grad_norm": 2.5275665929230935, "learning_rate": 1.9922975929472076e-05, "loss": 0.669, "step": 863 }, { "epoch": 0.06844919786096257, "grad_norm": 2.6381708069523375, "learning_rate": 1.9922657730721758e-05, "loss": 0.6102, "step": 864 }, { "epoch": 0.06852842146959794, "grad_norm": 2.4006202588321903, "learning_rate": 1.9922338878612177e-05, "loss": 0.5943, "step": 865 }, { "epoch": 0.06860764507823332, "grad_norm": 2.2801290873509177, "learning_rate": 1.9922019373164324e-05, "loss": 0.473, "step": 866 }, { "epoch": 0.06868686868686869, "grad_norm": 2.2776237544692366, "learning_rate": 1.9921699214399238e-05, "loss": 0.5833, "step": 867 }, { "epoch": 0.06876609229550407, "grad_norm": 2.67064385104672, "learning_rate": 1.9921378402337996e-05, "loss": 0.5563, "step": 868 }, { "epoch": 0.06884531590413943, "grad_norm": 2.3761074502160997, "learning_rate": 1.9921056937001725e-05, "loss": 0.5406, "step": 869 }, { "epoch": 0.0689245395127748, "grad_norm": 2.6282969248165413, "learning_rate": 1.9920734818411592e-05, "loss": 0.5174, "step": 870 }, { "epoch": 0.06900376312141018, "grad_norm": 2.768547190282355, "learning_rate": 1.9920412046588807e-05, "loss": 0.5631, "step": 871 }, { "epoch": 0.06908298673004555, "grad_norm": 2.738677217126626, "learning_rate": 1.992008862155462e-05, "loss": 0.5762, "step": 872 }, { "epoch": 0.06916221033868093, "grad_norm": 2.5047554390696862, "learning_rate": 1.9919764543330334e-05, "loss": 0.4896, "step": 873 }, { "epoch": 0.0692414339473163, "grad_norm": 2.7292928450358644, "learning_rate": 1.9919439811937283e-05, "loss": 0.651, "step": 874 }, { "epoch": 0.06932065755595167, "grad_norm": 2.4716401373212395, "learning_rate": 1.991911442739685e-05, "loss": 0.6114, "step": 875 }, { "epoch": 0.06939988116458705, "grad_norm": 2.427938301520501, "learning_rate": 1.9918788389730457e-05, "loss": 0.5364, "step": 876 }, { "epoch": 0.06947910477322242, "grad_norm": 2.3805652833263915, "learning_rate": 1.9918461698959576e-05, "loss": 0.6165, "step": 877 }, { "epoch": 0.0695583283818578, "grad_norm": 2.147475091562914, "learning_rate": 1.9918134355105717e-05, "loss": 0.4989, "step": 878 }, { "epoch": 0.06963755199049317, "grad_norm": 2.2841456029641796, "learning_rate": 1.9917806358190434e-05, "loss": 0.4942, "step": 879 }, { "epoch": 0.06971677559912855, "grad_norm": 2.1208825659876975, "learning_rate": 1.9917477708235324e-05, "loss": 0.5941, "step": 880 }, { "epoch": 0.06979599920776391, "grad_norm": 2.4247452574228547, "learning_rate": 1.9917148405262027e-05, "loss": 0.6446, "step": 881 }, { "epoch": 0.06987522281639928, "grad_norm": 2.5975848014190666, "learning_rate": 1.9916818449292223e-05, "loss": 0.5433, "step": 882 }, { "epoch": 0.06995444642503466, "grad_norm": 2.238138002042094, "learning_rate": 1.9916487840347644e-05, "loss": 0.6243, "step": 883 }, { "epoch": 0.07003367003367003, "grad_norm": 2.6730194021003677, "learning_rate": 1.9916156578450052e-05, "loss": 0.7004, "step": 884 }, { "epoch": 0.07011289364230541, "grad_norm": 2.22251494252007, "learning_rate": 1.9915824663621267e-05, "loss": 0.5966, "step": 885 }, { "epoch": 0.07019211725094078, "grad_norm": 2.102441178207004, "learning_rate": 1.991549209588314e-05, "loss": 0.4584, "step": 886 }, { "epoch": 0.07027134085957615, "grad_norm": 2.480618541833406, "learning_rate": 1.9915158875257566e-05, "loss": 0.5416, "step": 887 }, { "epoch": 0.07035056446821153, "grad_norm": 2.2487807086735523, "learning_rate": 1.991482500176649e-05, "loss": 0.4912, "step": 888 }, { "epoch": 0.0704297880768469, "grad_norm": 2.3755036554761855, "learning_rate": 1.9914490475431892e-05, "loss": 0.5761, "step": 889 }, { "epoch": 0.07050901168548228, "grad_norm": 2.0612764095223457, "learning_rate": 1.9914155296275804e-05, "loss": 0.5633, "step": 890 }, { "epoch": 0.07058823529411765, "grad_norm": 2.450533835665693, "learning_rate": 1.9913819464320295e-05, "loss": 0.446, "step": 891 }, { "epoch": 0.07066745890275301, "grad_norm": 2.085665128858314, "learning_rate": 1.9913482979587473e-05, "loss": 0.543, "step": 892 }, { "epoch": 0.0707466825113884, "grad_norm": 2.311629385161046, "learning_rate": 1.9913145842099503e-05, "loss": 0.5764, "step": 893 }, { "epoch": 0.07082590612002376, "grad_norm": 2.384799683655331, "learning_rate": 1.9912808051878575e-05, "loss": 0.5429, "step": 894 }, { "epoch": 0.07090512972865914, "grad_norm": 2.5044835997295856, "learning_rate": 1.9912469608946932e-05, "loss": 0.5312, "step": 895 }, { "epoch": 0.07098435333729451, "grad_norm": 2.1919015800368196, "learning_rate": 1.9912130513326863e-05, "loss": 0.6025, "step": 896 }, { "epoch": 0.0710635769459299, "grad_norm": 2.9519507325462317, "learning_rate": 1.9911790765040697e-05, "loss": 0.6424, "step": 897 }, { "epoch": 0.07114280055456526, "grad_norm": 2.50623357058056, "learning_rate": 1.9911450364110798e-05, "loss": 0.6322, "step": 898 }, { "epoch": 0.07122202416320063, "grad_norm": 2.2688446065718675, "learning_rate": 1.9911109310559583e-05, "loss": 0.5411, "step": 899 }, { "epoch": 0.07130124777183601, "grad_norm": 2.1621910834786737, "learning_rate": 1.991076760440951e-05, "loss": 0.5956, "step": 900 }, { "epoch": 0.07138047138047138, "grad_norm": 2.323059316434927, "learning_rate": 1.991042524568308e-05, "loss": 0.5644, "step": 901 }, { "epoch": 0.07145969498910676, "grad_norm": 2.139321254896373, "learning_rate": 1.991008223440283e-05, "loss": 0.5794, "step": 902 }, { "epoch": 0.07153891859774213, "grad_norm": 2.255598119425402, "learning_rate": 1.9909738570591352e-05, "loss": 0.5271, "step": 903 }, { "epoch": 0.0716181422063775, "grad_norm": 2.4163889519624657, "learning_rate": 1.990939425427127e-05, "loss": 0.5916, "step": 904 }, { "epoch": 0.07169736581501288, "grad_norm": 2.457627836614485, "learning_rate": 1.9909049285465258e-05, "loss": 0.6217, "step": 905 }, { "epoch": 0.07177658942364824, "grad_norm": 2.284404778088907, "learning_rate": 1.990870366419603e-05, "loss": 0.5575, "step": 906 }, { "epoch": 0.07185581303228362, "grad_norm": 2.2658243473432287, "learning_rate": 1.9908357390486342e-05, "loss": 0.5829, "step": 907 }, { "epoch": 0.07193503664091899, "grad_norm": 2.527317543491671, "learning_rate": 1.9908010464358997e-05, "loss": 0.5473, "step": 908 }, { "epoch": 0.07201426024955437, "grad_norm": 2.578772380055916, "learning_rate": 1.9907662885836836e-05, "loss": 0.6369, "step": 909 }, { "epoch": 0.07209348385818974, "grad_norm": 2.6512834477426526, "learning_rate": 1.9907314654942748e-05, "loss": 0.5202, "step": 910 }, { "epoch": 0.07217270746682511, "grad_norm": 2.7526226665382842, "learning_rate": 1.990696577169966e-05, "loss": 0.6889, "step": 911 }, { "epoch": 0.07225193107546049, "grad_norm": 2.25739059904445, "learning_rate": 1.9906616236130543e-05, "loss": 0.4722, "step": 912 }, { "epoch": 0.07233115468409586, "grad_norm": 2.1917311953311533, "learning_rate": 1.990626604825842e-05, "loss": 0.5757, "step": 913 }, { "epoch": 0.07241037829273124, "grad_norm": 2.5090957024568743, "learning_rate": 1.9905915208106342e-05, "loss": 0.4883, "step": 914 }, { "epoch": 0.07248960190136661, "grad_norm": 2.4103195657044925, "learning_rate": 1.990556371569741e-05, "loss": 0.6352, "step": 915 }, { "epoch": 0.07256882551000197, "grad_norm": 2.741830933995138, "learning_rate": 1.990521157105477e-05, "loss": 0.5133, "step": 916 }, { "epoch": 0.07264804911863736, "grad_norm": 2.4749257639171796, "learning_rate": 1.990485877420161e-05, "loss": 0.5508, "step": 917 }, { "epoch": 0.07272727272727272, "grad_norm": 2.7743935671534246, "learning_rate": 1.990450532516116e-05, "loss": 0.5859, "step": 918 }, { "epoch": 0.0728064963359081, "grad_norm": 2.6901306746022775, "learning_rate": 1.9904151223956688e-05, "loss": 0.5363, "step": 919 }, { "epoch": 0.07288571994454347, "grad_norm": 2.270580183753606, "learning_rate": 1.9903796470611515e-05, "loss": 0.4755, "step": 920 }, { "epoch": 0.07296494355317884, "grad_norm": 2.4964563143971383, "learning_rate": 1.9903441065149e-05, "loss": 0.5628, "step": 921 }, { "epoch": 0.07304416716181422, "grad_norm": 2.3148142675786, "learning_rate": 1.990308500759254e-05, "loss": 0.5291, "step": 922 }, { "epoch": 0.07312339077044959, "grad_norm": 2.759305898022117, "learning_rate": 1.9902728297965586e-05, "loss": 0.7155, "step": 923 }, { "epoch": 0.07320261437908497, "grad_norm": 2.391039701229346, "learning_rate": 1.990237093629162e-05, "loss": 0.6283, "step": 924 }, { "epoch": 0.07328183798772034, "grad_norm": 2.4341409272036567, "learning_rate": 1.9902012922594178e-05, "loss": 0.6436, "step": 925 }, { "epoch": 0.07336106159635572, "grad_norm": 2.629153944855865, "learning_rate": 1.990165425689683e-05, "loss": 0.5817, "step": 926 }, { "epoch": 0.07344028520499109, "grad_norm": 2.999800542377723, "learning_rate": 1.9901294939223192e-05, "loss": 0.7025, "step": 927 }, { "epoch": 0.07351950881362646, "grad_norm": 2.3315986739064147, "learning_rate": 1.9900934969596925e-05, "loss": 0.5782, "step": 928 }, { "epoch": 0.07359873242226184, "grad_norm": 2.19586525154139, "learning_rate": 1.9900574348041728e-05, "loss": 0.4544, "step": 929 }, { "epoch": 0.0736779560308972, "grad_norm": 2.333981826493017, "learning_rate": 1.990021307458135e-05, "loss": 0.5572, "step": 930 }, { "epoch": 0.07375717963953259, "grad_norm": 2.762353900418872, "learning_rate": 1.989985114923958e-05, "loss": 0.622, "step": 931 }, { "epoch": 0.07383640324816795, "grad_norm": 2.2765380969186335, "learning_rate": 1.9899488572040244e-05, "loss": 0.4791, "step": 932 }, { "epoch": 0.07391562685680332, "grad_norm": 2.3601711611150793, "learning_rate": 1.989912534300722e-05, "loss": 0.5191, "step": 933 }, { "epoch": 0.0739948504654387, "grad_norm": 2.588364259136482, "learning_rate": 1.9898761462164425e-05, "loss": 0.6849, "step": 934 }, { "epoch": 0.07407407407407407, "grad_norm": 2.64012916369795, "learning_rate": 1.989839692953581e-05, "loss": 0.59, "step": 935 }, { "epoch": 0.07415329768270945, "grad_norm": 2.384470253660101, "learning_rate": 1.9898031745145397e-05, "loss": 0.5794, "step": 936 }, { "epoch": 0.07423252129134482, "grad_norm": 2.7702633138713377, "learning_rate": 1.989766590901721e-05, "loss": 0.6083, "step": 937 }, { "epoch": 0.0743117448999802, "grad_norm": 3.1284189033255627, "learning_rate": 1.9897299421175353e-05, "loss": 0.7033, "step": 938 }, { "epoch": 0.07439096850861557, "grad_norm": 3.0776085969214386, "learning_rate": 1.989693228164395e-05, "loss": 0.6533, "step": 939 }, { "epoch": 0.07447019211725094, "grad_norm": 2.4372575665676286, "learning_rate": 1.989656449044718e-05, "loss": 0.5605, "step": 940 }, { "epoch": 0.07454941572588632, "grad_norm": 2.3548609576621025, "learning_rate": 1.9896196047609255e-05, "loss": 0.6597, "step": 941 }, { "epoch": 0.07462863933452168, "grad_norm": 2.5150272857529754, "learning_rate": 1.9895826953154437e-05, "loss": 0.5906, "step": 942 }, { "epoch": 0.07470786294315707, "grad_norm": 2.34275629040273, "learning_rate": 1.9895457207107032e-05, "loss": 0.593, "step": 943 }, { "epoch": 0.07478708655179243, "grad_norm": 2.321333121037487, "learning_rate": 1.9895086809491384e-05, "loss": 0.5768, "step": 944 }, { "epoch": 0.0748663101604278, "grad_norm": 2.6139333663849125, "learning_rate": 1.989471576033188e-05, "loss": 0.5813, "step": 945 }, { "epoch": 0.07494553376906318, "grad_norm": 2.6229210683811566, "learning_rate": 1.9894344059652953e-05, "loss": 0.5276, "step": 946 }, { "epoch": 0.07502475737769855, "grad_norm": 2.691579310859323, "learning_rate": 1.989397170747908e-05, "loss": 0.6681, "step": 947 }, { "epoch": 0.07510398098633393, "grad_norm": 2.2628838224488996, "learning_rate": 1.9893598703834773e-05, "loss": 0.5727, "step": 948 }, { "epoch": 0.0751832045949693, "grad_norm": 2.5417131403763076, "learning_rate": 1.98932250487446e-05, "loss": 0.5811, "step": 949 }, { "epoch": 0.07526242820360468, "grad_norm": 2.6594808728801533, "learning_rate": 1.989285074223316e-05, "loss": 0.6061, "step": 950 }, { "epoch": 0.07534165181224005, "grad_norm": 2.1338151286853706, "learning_rate": 1.98924757843251e-05, "loss": 0.6154, "step": 951 }, { "epoch": 0.07542087542087542, "grad_norm": 2.2963129103002236, "learning_rate": 1.989210017504511e-05, "loss": 0.5281, "step": 952 }, { "epoch": 0.0755000990295108, "grad_norm": 2.457216823104847, "learning_rate": 1.989172391441792e-05, "loss": 0.6455, "step": 953 }, { "epoch": 0.07557932263814617, "grad_norm": 2.893209777574209, "learning_rate": 1.9891347002468307e-05, "loss": 0.6824, "step": 954 }, { "epoch": 0.07565854624678155, "grad_norm": 2.0640640545519116, "learning_rate": 1.9890969439221086e-05, "loss": 0.4339, "step": 955 }, { "epoch": 0.07573776985541691, "grad_norm": 2.543695427342178, "learning_rate": 1.989059122470112e-05, "loss": 0.6042, "step": 956 }, { "epoch": 0.07581699346405228, "grad_norm": 2.2370983000132925, "learning_rate": 1.9890212358933316e-05, "loss": 0.5077, "step": 957 }, { "epoch": 0.07589621707268766, "grad_norm": 2.4802801842018987, "learning_rate": 1.9889832841942613e-05, "loss": 0.629, "step": 958 }, { "epoch": 0.07597544068132303, "grad_norm": 2.8108090868001896, "learning_rate": 1.988945267375401e-05, "loss": 0.6172, "step": 959 }, { "epoch": 0.07605466428995841, "grad_norm": 2.268798044596798, "learning_rate": 1.9889071854392528e-05, "loss": 0.5143, "step": 960 }, { "epoch": 0.07613388789859378, "grad_norm": 2.4041933150755406, "learning_rate": 1.9888690383883247e-05, "loss": 0.4789, "step": 961 }, { "epoch": 0.07621311150722915, "grad_norm": 2.2787345334316584, "learning_rate": 1.9888308262251286e-05, "loss": 0.5178, "step": 962 }, { "epoch": 0.07629233511586453, "grad_norm": 2.3698462560557805, "learning_rate": 1.988792548952181e-05, "loss": 0.5473, "step": 963 }, { "epoch": 0.0763715587244999, "grad_norm": 2.444233132166127, "learning_rate": 1.9887542065720013e-05, "loss": 0.6192, "step": 964 }, { "epoch": 0.07645078233313528, "grad_norm": 2.219272390994219, "learning_rate": 1.988715799087115e-05, "loss": 0.575, "step": 965 }, { "epoch": 0.07653000594177065, "grad_norm": 2.298572045122237, "learning_rate": 1.9886773265000502e-05, "loss": 0.527, "step": 966 }, { "epoch": 0.07660922955040603, "grad_norm": 2.1482108494168135, "learning_rate": 1.9886387888133413e-05, "loss": 0.4366, "step": 967 }, { "epoch": 0.0766884531590414, "grad_norm": 2.1101315267819203, "learning_rate": 1.988600186029525e-05, "loss": 0.5012, "step": 968 }, { "epoch": 0.07676767676767676, "grad_norm": 2.7321741556839667, "learning_rate": 1.988561518151143e-05, "loss": 0.477, "step": 969 }, { "epoch": 0.07684690037631214, "grad_norm": 2.480594396625295, "learning_rate": 1.988522785180742e-05, "loss": 0.6292, "step": 970 }, { "epoch": 0.07692612398494751, "grad_norm": 2.1796398221460196, "learning_rate": 1.9884839871208717e-05, "loss": 0.5449, "step": 971 }, { "epoch": 0.07700534759358289, "grad_norm": 2.072570670527955, "learning_rate": 1.9884451239740877e-05, "loss": 0.4721, "step": 972 }, { "epoch": 0.07708457120221826, "grad_norm": 2.299809558046713, "learning_rate": 1.988406195742948e-05, "loss": 0.5759, "step": 973 }, { "epoch": 0.07716379481085363, "grad_norm": 2.13576691936463, "learning_rate": 1.9883672024300163e-05, "loss": 0.4433, "step": 974 }, { "epoch": 0.07724301841948901, "grad_norm": 2.4520707098449064, "learning_rate": 1.98832814403786e-05, "loss": 0.5766, "step": 975 }, { "epoch": 0.07732224202812438, "grad_norm": 2.766327843132799, "learning_rate": 1.988289020569051e-05, "loss": 0.6727, "step": 976 }, { "epoch": 0.07740146563675976, "grad_norm": 2.5064951346112, "learning_rate": 1.9882498320261652e-05, "loss": 0.5935, "step": 977 }, { "epoch": 0.07748068924539513, "grad_norm": 2.425900782785911, "learning_rate": 1.9882105784117835e-05, "loss": 0.5033, "step": 978 }, { "epoch": 0.07755991285403051, "grad_norm": 2.5647047758604216, "learning_rate": 1.98817125972849e-05, "loss": 0.6163, "step": 979 }, { "epoch": 0.07763913646266588, "grad_norm": 2.3390776043312136, "learning_rate": 1.9881318759788738e-05, "loss": 0.6479, "step": 980 }, { "epoch": 0.07771836007130124, "grad_norm": 2.29016226420427, "learning_rate": 1.988092427165528e-05, "loss": 0.4766, "step": 981 }, { "epoch": 0.07779758367993662, "grad_norm": 2.93436897178446, "learning_rate": 1.98805291329105e-05, "loss": 0.6694, "step": 982 }, { "epoch": 0.07787680728857199, "grad_norm": 2.2474360030425125, "learning_rate": 1.9880133343580423e-05, "loss": 0.6023, "step": 983 }, { "epoch": 0.07795603089720737, "grad_norm": 2.459388822542379, "learning_rate": 1.9879736903691107e-05, "loss": 0.6372, "step": 984 }, { "epoch": 0.07803525450584274, "grad_norm": 2.619687500158315, "learning_rate": 1.9879339813268653e-05, "loss": 0.6342, "step": 985 }, { "epoch": 0.07811447811447811, "grad_norm": 2.145655543525274, "learning_rate": 1.9878942072339208e-05, "loss": 0.4446, "step": 986 }, { "epoch": 0.07819370172311349, "grad_norm": 2.647277753965703, "learning_rate": 1.987854368092896e-05, "loss": 0.704, "step": 987 }, { "epoch": 0.07827292533174886, "grad_norm": 2.2017066413096207, "learning_rate": 1.9878144639064145e-05, "loss": 0.5217, "step": 988 }, { "epoch": 0.07835214894038424, "grad_norm": 2.095758426892819, "learning_rate": 1.9877744946771034e-05, "loss": 0.4057, "step": 989 }, { "epoch": 0.0784313725490196, "grad_norm": 2.2117560897301702, "learning_rate": 1.987734460407595e-05, "loss": 0.4695, "step": 990 }, { "epoch": 0.07851059615765497, "grad_norm": 2.3987320414172473, "learning_rate": 1.9876943611005252e-05, "loss": 0.5071, "step": 991 }, { "epoch": 0.07858981976629036, "grad_norm": 2.939845595210767, "learning_rate": 1.9876541967585337e-05, "loss": 0.6177, "step": 992 }, { "epoch": 0.07866904337492572, "grad_norm": 2.7068922017867645, "learning_rate": 1.987613967384266e-05, "loss": 0.5082, "step": 993 }, { "epoch": 0.0787482669835611, "grad_norm": 2.503160603491663, "learning_rate": 1.9875736729803705e-05, "loss": 0.6302, "step": 994 }, { "epoch": 0.07882749059219647, "grad_norm": 2.56265855673719, "learning_rate": 1.9875333135495e-05, "loss": 0.6615, "step": 995 }, { "epoch": 0.07890671420083185, "grad_norm": 2.2596065545697566, "learning_rate": 1.9874928890943134e-05, "loss": 0.6682, "step": 996 }, { "epoch": 0.07898593780946722, "grad_norm": 2.448360339483264, "learning_rate": 1.9874523996174714e-05, "loss": 0.57, "step": 997 }, { "epoch": 0.07906516141810259, "grad_norm": 2.3321655062409046, "learning_rate": 1.98741184512164e-05, "loss": 0.667, "step": 998 }, { "epoch": 0.07914438502673797, "grad_norm": 2.3746430097998656, "learning_rate": 1.9873712256094898e-05, "loss": 0.6208, "step": 999 }, { "epoch": 0.07922360863537334, "grad_norm": 2.4382360386725024, "learning_rate": 1.987330541083695e-05, "loss": 0.6245, "step": 1000 }, { "epoch": 0.07930283224400872, "grad_norm": 2.502665713196127, "learning_rate": 1.9872897915469353e-05, "loss": 0.4758, "step": 1001 }, { "epoch": 0.07938205585264409, "grad_norm": 2.203990802523697, "learning_rate": 1.987248977001893e-05, "loss": 0.5672, "step": 1002 }, { "epoch": 0.07946127946127945, "grad_norm": 2.162036536603663, "learning_rate": 1.987208097451256e-05, "loss": 0.555, "step": 1003 }, { "epoch": 0.07954050306991484, "grad_norm": 2.473045243499081, "learning_rate": 1.987167152897716e-05, "loss": 0.5935, "step": 1004 }, { "epoch": 0.0796197266785502, "grad_norm": 2.119374803626877, "learning_rate": 1.987126143343969e-05, "loss": 0.5198, "step": 1005 }, { "epoch": 0.07969895028718559, "grad_norm": 2.659645048479735, "learning_rate": 1.987085068792715e-05, "loss": 0.5732, "step": 1006 }, { "epoch": 0.07977817389582095, "grad_norm": 2.3463721828983473, "learning_rate": 1.9870439292466587e-05, "loss": 0.5767, "step": 1007 }, { "epoch": 0.07985739750445633, "grad_norm": 2.901642126328267, "learning_rate": 1.9870027247085093e-05, "loss": 0.7066, "step": 1008 }, { "epoch": 0.0799366211130917, "grad_norm": 2.180806212088373, "learning_rate": 1.9869614551809793e-05, "loss": 0.4841, "step": 1009 }, { "epoch": 0.08001584472172707, "grad_norm": 2.660905041614718, "learning_rate": 1.986920120666787e-05, "loss": 0.4858, "step": 1010 }, { "epoch": 0.08009506833036245, "grad_norm": 2.368586242448567, "learning_rate": 1.986878721168653e-05, "loss": 0.5848, "step": 1011 }, { "epoch": 0.08017429193899782, "grad_norm": 2.4271483938049956, "learning_rate": 1.986837256689304e-05, "loss": 0.5724, "step": 1012 }, { "epoch": 0.0802535155476332, "grad_norm": 2.4509693602785463, "learning_rate": 1.98679572723147e-05, "loss": 0.5219, "step": 1013 }, { "epoch": 0.08033273915626857, "grad_norm": 2.443496597980648, "learning_rate": 1.9867541327978853e-05, "loss": 0.6714, "step": 1014 }, { "epoch": 0.08041196276490394, "grad_norm": 2.367671588770985, "learning_rate": 1.986712473391289e-05, "loss": 0.5335, "step": 1015 }, { "epoch": 0.08049118637353932, "grad_norm": 2.565088295429447, "learning_rate": 1.986670749014424e-05, "loss": 0.59, "step": 1016 }, { "epoch": 0.08057040998217468, "grad_norm": 2.0604190565475773, "learning_rate": 1.9866289596700383e-05, "loss": 0.4956, "step": 1017 }, { "epoch": 0.08064963359081007, "grad_norm": 2.1988560970066233, "learning_rate": 1.9865871053608823e-05, "loss": 0.5635, "step": 1018 }, { "epoch": 0.08072885719944543, "grad_norm": 2.558336155054883, "learning_rate": 1.9865451860897126e-05, "loss": 0.5646, "step": 1019 }, { "epoch": 0.08080808080808081, "grad_norm": 2.454308626909449, "learning_rate": 1.98650320185929e-05, "loss": 0.547, "step": 1020 }, { "epoch": 0.08088730441671618, "grad_norm": 2.268676352822214, "learning_rate": 1.986461152672378e-05, "loss": 0.6198, "step": 1021 }, { "epoch": 0.08096652802535155, "grad_norm": 2.3917768390278527, "learning_rate": 1.986419038531745e-05, "loss": 0.5774, "step": 1022 }, { "epoch": 0.08104575163398693, "grad_norm": 2.0974142950247554, "learning_rate": 1.9863768594401654e-05, "loss": 0.5313, "step": 1023 }, { "epoch": 0.0811249752426223, "grad_norm": 2.3658289548688693, "learning_rate": 1.9863346154004155e-05, "loss": 0.5298, "step": 1024 }, { "epoch": 0.08120419885125768, "grad_norm": 2.2853381004335653, "learning_rate": 1.986292306415277e-05, "loss": 0.5569, "step": 1025 }, { "epoch": 0.08128342245989305, "grad_norm": 2.2461119361208315, "learning_rate": 1.9862499324875362e-05, "loss": 0.4962, "step": 1026 }, { "epoch": 0.08136264606852842, "grad_norm": 2.064477503023414, "learning_rate": 1.9862074936199827e-05, "loss": 0.4665, "step": 1027 }, { "epoch": 0.0814418696771638, "grad_norm": 2.6328229695413503, "learning_rate": 1.9861649898154107e-05, "loss": 0.6401, "step": 1028 }, { "epoch": 0.08152109328579916, "grad_norm": 2.1821967129970434, "learning_rate": 1.98612242107662e-05, "loss": 0.5117, "step": 1029 }, { "epoch": 0.08160031689443455, "grad_norm": 1.9693769603604045, "learning_rate": 1.9860797874064123e-05, "loss": 0.406, "step": 1030 }, { "epoch": 0.08167954050306991, "grad_norm": 2.411832476912573, "learning_rate": 1.9860370888075954e-05, "loss": 0.5463, "step": 1031 }, { "epoch": 0.08175876411170528, "grad_norm": 2.291319632797604, "learning_rate": 1.9859943252829804e-05, "loss": 0.5611, "step": 1032 }, { "epoch": 0.08183798772034066, "grad_norm": 2.7509008792279346, "learning_rate": 1.9859514968353836e-05, "loss": 0.6889, "step": 1033 }, { "epoch": 0.08191721132897603, "grad_norm": 2.117279359438013, "learning_rate": 1.985908603467625e-05, "loss": 0.5329, "step": 1034 }, { "epoch": 0.08199643493761141, "grad_norm": 2.3927478088787963, "learning_rate": 1.985865645182529e-05, "loss": 0.4715, "step": 1035 }, { "epoch": 0.08207565854624678, "grad_norm": 2.4279236573667364, "learning_rate": 1.9858226219829234e-05, "loss": 0.5258, "step": 1036 }, { "epoch": 0.08215488215488216, "grad_norm": 2.2393890880065763, "learning_rate": 1.985779533871642e-05, "loss": 0.547, "step": 1037 }, { "epoch": 0.08223410576351753, "grad_norm": 2.0590795623077476, "learning_rate": 1.985736380851521e-05, "loss": 0.5285, "step": 1038 }, { "epoch": 0.0823133293721529, "grad_norm": 2.235554088389397, "learning_rate": 1.9856931629254032e-05, "loss": 0.4274, "step": 1039 }, { "epoch": 0.08239255298078828, "grad_norm": 2.206928814171015, "learning_rate": 1.9856498800961328e-05, "loss": 0.5413, "step": 1040 }, { "epoch": 0.08247177658942365, "grad_norm": 2.7100518442397945, "learning_rate": 1.9856065323665606e-05, "loss": 0.5916, "step": 1041 }, { "epoch": 0.08255100019805903, "grad_norm": 2.291630426847014, "learning_rate": 1.9855631197395406e-05, "loss": 0.6018, "step": 1042 }, { "epoch": 0.0826302238066944, "grad_norm": 2.3776700075246553, "learning_rate": 1.985519642217932e-05, "loss": 0.5523, "step": 1043 }, { "epoch": 0.08270944741532976, "grad_norm": 2.565118367367117, "learning_rate": 1.9854760998045964e-05, "loss": 0.5729, "step": 1044 }, { "epoch": 0.08278867102396514, "grad_norm": 2.7186306604147688, "learning_rate": 1.9854324925024017e-05, "loss": 0.5959, "step": 1045 }, { "epoch": 0.08286789463260051, "grad_norm": 2.280808570677283, "learning_rate": 1.9853888203142184e-05, "loss": 0.5527, "step": 1046 }, { "epoch": 0.08294711824123589, "grad_norm": 2.19729709498443, "learning_rate": 1.9853450832429234e-05, "loss": 0.4914, "step": 1047 }, { "epoch": 0.08302634184987126, "grad_norm": 2.37706804529759, "learning_rate": 1.9853012812913956e-05, "loss": 0.4247, "step": 1048 }, { "epoch": 0.08310556545850664, "grad_norm": 2.4265574672280357, "learning_rate": 1.9852574144625193e-05, "loss": 0.5721, "step": 1049 }, { "epoch": 0.08318478906714201, "grad_norm": 2.626927345585469, "learning_rate": 1.985213482759183e-05, "loss": 0.5253, "step": 1050 }, { "epoch": 0.08326401267577738, "grad_norm": 2.866786707581821, "learning_rate": 1.9851694861842795e-05, "loss": 0.6334, "step": 1051 }, { "epoch": 0.08334323628441276, "grad_norm": 2.371213699585761, "learning_rate": 1.9851254247407053e-05, "loss": 0.4334, "step": 1052 }, { "epoch": 0.08342245989304813, "grad_norm": 2.2498020645676218, "learning_rate": 1.9850812984313626e-05, "loss": 0.585, "step": 1053 }, { "epoch": 0.08350168350168351, "grad_norm": 2.1204360173482395, "learning_rate": 1.985037107259156e-05, "loss": 0.5654, "step": 1054 }, { "epoch": 0.08358090711031887, "grad_norm": 2.08748411123756, "learning_rate": 1.984992851226996e-05, "loss": 0.5612, "step": 1055 }, { "epoch": 0.08366013071895424, "grad_norm": 2.455402448279221, "learning_rate": 1.9849485303377955e-05, "loss": 0.5161, "step": 1056 }, { "epoch": 0.08373935432758962, "grad_norm": 2.5213630334970847, "learning_rate": 1.984904144594474e-05, "loss": 0.6956, "step": 1057 }, { "epoch": 0.08381857793622499, "grad_norm": 2.4166977001787777, "learning_rate": 1.9848596939999534e-05, "loss": 0.658, "step": 1058 }, { "epoch": 0.08389780154486037, "grad_norm": 2.1928000964600307, "learning_rate": 1.984815178557161e-05, "loss": 0.4511, "step": 1059 }, { "epoch": 0.08397702515349574, "grad_norm": 2.4245482282394377, "learning_rate": 1.9847705982690275e-05, "loss": 0.6044, "step": 1060 }, { "epoch": 0.08405624876213111, "grad_norm": 2.2615363598612728, "learning_rate": 1.984725953138489e-05, "loss": 0.5518, "step": 1061 }, { "epoch": 0.08413547237076649, "grad_norm": 2.115417015206091, "learning_rate": 1.9846812431684843e-05, "loss": 0.4682, "step": 1062 }, { "epoch": 0.08421469597940186, "grad_norm": 2.3264873794250747, "learning_rate": 1.9846364683619575e-05, "loss": 0.6484, "step": 1063 }, { "epoch": 0.08429391958803724, "grad_norm": 2.25754548439187, "learning_rate": 1.9845916287218575e-05, "loss": 0.5645, "step": 1064 }, { "epoch": 0.0843731431966726, "grad_norm": 2.6747068352563312, "learning_rate": 1.9845467242511362e-05, "loss": 0.5747, "step": 1065 }, { "epoch": 0.08445236680530799, "grad_norm": 2.837796690514525, "learning_rate": 1.9845017549527502e-05, "loss": 0.55, "step": 1066 }, { "epoch": 0.08453159041394336, "grad_norm": 2.3349890027042304, "learning_rate": 1.984456720829661e-05, "loss": 0.5326, "step": 1067 }, { "epoch": 0.08461081402257872, "grad_norm": 2.6760624736859575, "learning_rate": 1.9844116218848335e-05, "loss": 0.5845, "step": 1068 }, { "epoch": 0.0846900376312141, "grad_norm": 2.3164545198774347, "learning_rate": 1.9843664581212374e-05, "loss": 0.4944, "step": 1069 }, { "epoch": 0.08476926123984947, "grad_norm": 2.4392096315760377, "learning_rate": 1.9843212295418464e-05, "loss": 0.549, "step": 1070 }, { "epoch": 0.08484848484848485, "grad_norm": 2.807065304480291, "learning_rate": 1.984275936149639e-05, "loss": 0.5743, "step": 1071 }, { "epoch": 0.08492770845712022, "grad_norm": 2.2653560895895457, "learning_rate": 1.984230577947597e-05, "loss": 0.5345, "step": 1072 }, { "epoch": 0.08500693206575559, "grad_norm": 2.392687671543606, "learning_rate": 1.9841851549387074e-05, "loss": 0.5886, "step": 1073 }, { "epoch": 0.08508615567439097, "grad_norm": 2.0049570218640143, "learning_rate": 1.9841396671259606e-05, "loss": 0.5386, "step": 1074 }, { "epoch": 0.08516537928302634, "grad_norm": 2.58416099512533, "learning_rate": 1.9840941145123524e-05, "loss": 0.5644, "step": 1075 }, { "epoch": 0.08524460289166172, "grad_norm": 2.2363938724823105, "learning_rate": 1.984048497100882e-05, "loss": 0.5658, "step": 1076 }, { "epoch": 0.08532382650029709, "grad_norm": 2.1951930411728773, "learning_rate": 1.9840028148945526e-05, "loss": 0.6041, "step": 1077 }, { "epoch": 0.08540305010893247, "grad_norm": 2.196124730052582, "learning_rate": 1.983957067896373e-05, "loss": 0.3754, "step": 1078 }, { "epoch": 0.08548227371756784, "grad_norm": 2.1384632951667246, "learning_rate": 1.9839112561093548e-05, "loss": 0.4146, "step": 1079 }, { "epoch": 0.0855614973262032, "grad_norm": 2.5943666634004696, "learning_rate": 1.983865379536515e-05, "loss": 0.5801, "step": 1080 }, { "epoch": 0.08564072093483859, "grad_norm": 2.4808275383811194, "learning_rate": 1.9838194381808737e-05, "loss": 0.5941, "step": 1081 }, { "epoch": 0.08571994454347395, "grad_norm": 2.1275604317129106, "learning_rate": 1.983773432045456e-05, "loss": 0.4698, "step": 1082 }, { "epoch": 0.08579916815210933, "grad_norm": 2.360931024696737, "learning_rate": 1.9837273611332918e-05, "loss": 0.6583, "step": 1083 }, { "epoch": 0.0858783917607447, "grad_norm": 2.1226193362488455, "learning_rate": 1.983681225447414e-05, "loss": 0.5295, "step": 1084 }, { "epoch": 0.08595761536938007, "grad_norm": 2.484145678782945, "learning_rate": 1.9836350249908606e-05, "loss": 0.7594, "step": 1085 }, { "epoch": 0.08603683897801545, "grad_norm": 2.035756418623099, "learning_rate": 1.983588759766674e-05, "loss": 0.3336, "step": 1086 }, { "epoch": 0.08611606258665082, "grad_norm": 2.2760914643266874, "learning_rate": 1.9835424297779002e-05, "loss": 0.503, "step": 1087 }, { "epoch": 0.0861952861952862, "grad_norm": 2.1878326301137827, "learning_rate": 1.98349603502759e-05, "loss": 0.4444, "step": 1088 }, { "epoch": 0.08627450980392157, "grad_norm": 2.0592784746412325, "learning_rate": 1.983449575518798e-05, "loss": 0.4367, "step": 1089 }, { "epoch": 0.08635373341255695, "grad_norm": 2.2643735590126486, "learning_rate": 1.983403051254584e-05, "loss": 0.4014, "step": 1090 }, { "epoch": 0.08643295702119232, "grad_norm": 2.701903165426508, "learning_rate": 1.9833564622380105e-05, "loss": 0.5097, "step": 1091 }, { "epoch": 0.08651218062982768, "grad_norm": 2.5365783980407643, "learning_rate": 1.9833098084721455e-05, "loss": 0.565, "step": 1092 }, { "epoch": 0.08659140423846307, "grad_norm": 2.312263058319201, "learning_rate": 1.9832630899600607e-05, "loss": 0.5472, "step": 1093 }, { "epoch": 0.08667062784709843, "grad_norm": 2.6998444078873174, "learning_rate": 1.9832163067048335e-05, "loss": 0.5052, "step": 1094 }, { "epoch": 0.08674985145573381, "grad_norm": 2.718849308212614, "learning_rate": 1.9831694587095428e-05, "loss": 0.495, "step": 1095 }, { "epoch": 0.08682907506436918, "grad_norm": 2.2805710437838465, "learning_rate": 1.983122545977274e-05, "loss": 0.559, "step": 1096 }, { "epoch": 0.08690829867300455, "grad_norm": 2.194525402699448, "learning_rate": 1.983075568511116e-05, "loss": 0.5577, "step": 1097 }, { "epoch": 0.08698752228163993, "grad_norm": 2.3875645035218125, "learning_rate": 1.983028526314162e-05, "loss": 0.6205, "step": 1098 }, { "epoch": 0.0870667458902753, "grad_norm": 2.0795549229475863, "learning_rate": 1.98298141938951e-05, "loss": 0.5904, "step": 1099 }, { "epoch": 0.08714596949891068, "grad_norm": 1.9693597637996387, "learning_rate": 1.982934247740261e-05, "loss": 0.4593, "step": 1100 }, { "epoch": 0.08722519310754605, "grad_norm": 2.3509050828577576, "learning_rate": 1.9828870113695217e-05, "loss": 0.6522, "step": 1101 }, { "epoch": 0.08730441671618142, "grad_norm": 2.503366566979004, "learning_rate": 1.9828397102804016e-05, "loss": 0.4066, "step": 1102 }, { "epoch": 0.0873836403248168, "grad_norm": 2.402161114785244, "learning_rate": 1.982792344476016e-05, "loss": 0.519, "step": 1103 }, { "epoch": 0.08746286393345216, "grad_norm": 2.5738632917015827, "learning_rate": 1.982744913959483e-05, "loss": 0.6475, "step": 1104 }, { "epoch": 0.08754208754208755, "grad_norm": 2.0720674857952384, "learning_rate": 1.9826974187339267e-05, "loss": 0.5736, "step": 1105 }, { "epoch": 0.08762131115072291, "grad_norm": 2.0509287976489996, "learning_rate": 1.9826498588024738e-05, "loss": 0.4438, "step": 1106 }, { "epoch": 0.0877005347593583, "grad_norm": 2.358130101245386, "learning_rate": 1.982602234168255e-05, "loss": 0.621, "step": 1107 }, { "epoch": 0.08777975836799366, "grad_norm": 2.2116427993237946, "learning_rate": 1.9825545448344078e-05, "loss": 0.5359, "step": 1108 }, { "epoch": 0.08785898197662903, "grad_norm": 2.7682717064872837, "learning_rate": 1.9825067908040716e-05, "loss": 0.4988, "step": 1109 }, { "epoch": 0.08793820558526441, "grad_norm": 2.4150945446945795, "learning_rate": 1.9824589720803906e-05, "loss": 0.4937, "step": 1110 }, { "epoch": 0.08801742919389978, "grad_norm": 2.588873075138313, "learning_rate": 1.9824110886665137e-05, "loss": 0.5405, "step": 1111 }, { "epoch": 0.08809665280253516, "grad_norm": 2.5414871777571184, "learning_rate": 1.9823631405655933e-05, "loss": 0.5157, "step": 1112 }, { "epoch": 0.08817587641117053, "grad_norm": 2.4131642122876213, "learning_rate": 1.9823151277807873e-05, "loss": 0.5428, "step": 1113 }, { "epoch": 0.0882551000198059, "grad_norm": 2.1832959392334, "learning_rate": 1.9822670503152567e-05, "loss": 0.4526, "step": 1114 }, { "epoch": 0.08833432362844128, "grad_norm": 2.1678480146487313, "learning_rate": 1.982218908172167e-05, "loss": 0.3613, "step": 1115 }, { "epoch": 0.08841354723707665, "grad_norm": 2.411533307772899, "learning_rate": 1.9821707013546885e-05, "loss": 0.677, "step": 1116 }, { "epoch": 0.08849277084571203, "grad_norm": 2.0337302013065544, "learning_rate": 1.9821224298659953e-05, "loss": 0.4903, "step": 1117 }, { "epoch": 0.0885719944543474, "grad_norm": 2.9432963459147716, "learning_rate": 1.9820740937092656e-05, "loss": 0.6193, "step": 1118 }, { "epoch": 0.08865121806298278, "grad_norm": 2.6375121992616, "learning_rate": 1.982025692887682e-05, "loss": 0.6142, "step": 1119 }, { "epoch": 0.08873044167161814, "grad_norm": 2.2098968055286083, "learning_rate": 1.9819772274044323e-05, "loss": 0.4294, "step": 1120 }, { "epoch": 0.08880966528025351, "grad_norm": 1.9195425193672744, "learning_rate": 1.9819286972627066e-05, "loss": 0.4044, "step": 1121 }, { "epoch": 0.08888888888888889, "grad_norm": 2.4016411822158292, "learning_rate": 1.9818801024657014e-05, "loss": 0.5483, "step": 1122 }, { "epoch": 0.08896811249752426, "grad_norm": 2.4155108762995323, "learning_rate": 1.9818314430166158e-05, "loss": 0.4949, "step": 1123 }, { "epoch": 0.08904733610615964, "grad_norm": 2.78135300689024, "learning_rate": 1.981782718918654e-05, "loss": 0.5327, "step": 1124 }, { "epoch": 0.08912655971479501, "grad_norm": 2.346909948332823, "learning_rate": 1.981733930175024e-05, "loss": 0.4374, "step": 1125 }, { "epoch": 0.08920578332343038, "grad_norm": 2.1665616960343685, "learning_rate": 1.9816850767889387e-05, "loss": 0.4909, "step": 1126 }, { "epoch": 0.08928500693206576, "grad_norm": 2.496609749684578, "learning_rate": 1.9816361587636143e-05, "loss": 0.5709, "step": 1127 }, { "epoch": 0.08936423054070113, "grad_norm": 2.349931343807538, "learning_rate": 1.9815871761022727e-05, "loss": 0.6173, "step": 1128 }, { "epoch": 0.08944345414933651, "grad_norm": 2.269799196215516, "learning_rate": 1.9815381288081382e-05, "loss": 0.4906, "step": 1129 }, { "epoch": 0.08952267775797187, "grad_norm": 2.673440163310611, "learning_rate": 1.9814890168844412e-05, "loss": 0.5888, "step": 1130 }, { "epoch": 0.08960190136660724, "grad_norm": 2.118942300398741, "learning_rate": 1.981439840334415e-05, "loss": 0.528, "step": 1131 }, { "epoch": 0.08968112497524262, "grad_norm": 2.271213627250479, "learning_rate": 1.9813905991612974e-05, "loss": 0.5625, "step": 1132 }, { "epoch": 0.08976034858387799, "grad_norm": 2.580820532147506, "learning_rate": 1.9813412933683312e-05, "loss": 0.4875, "step": 1133 }, { "epoch": 0.08983957219251337, "grad_norm": 2.4625476284905776, "learning_rate": 1.9812919229587626e-05, "loss": 0.4058, "step": 1134 }, { "epoch": 0.08991879580114874, "grad_norm": 2.191370044534374, "learning_rate": 1.9812424879358424e-05, "loss": 0.5066, "step": 1135 }, { "epoch": 0.08999801940978412, "grad_norm": 2.351674586942393, "learning_rate": 1.981192988302826e-05, "loss": 0.6397, "step": 1136 }, { "epoch": 0.09007724301841949, "grad_norm": 2.2191575410966253, "learning_rate": 1.981143424062973e-05, "loss": 0.4388, "step": 1137 }, { "epoch": 0.09015646662705486, "grad_norm": 2.377638942591902, "learning_rate": 1.981093795219546e-05, "loss": 0.5633, "step": 1138 }, { "epoch": 0.09023569023569024, "grad_norm": 2.456143566746411, "learning_rate": 1.9810441017758132e-05, "loss": 0.5233, "step": 1139 }, { "epoch": 0.0903149138443256, "grad_norm": 2.1400603190019813, "learning_rate": 1.980994343735047e-05, "loss": 0.4868, "step": 1140 }, { "epoch": 0.09039413745296099, "grad_norm": 2.4365862201405344, "learning_rate": 1.9809445211005235e-05, "loss": 0.527, "step": 1141 }, { "epoch": 0.09047336106159636, "grad_norm": 2.3171574143156324, "learning_rate": 1.980894633875523e-05, "loss": 0.5416, "step": 1142 }, { "epoch": 0.09055258467023172, "grad_norm": 2.2672458176524035, "learning_rate": 1.980844682063331e-05, "loss": 0.5176, "step": 1143 }, { "epoch": 0.0906318082788671, "grad_norm": 2.336202433378965, "learning_rate": 1.980794665667236e-05, "loss": 0.57, "step": 1144 }, { "epoch": 0.09071103188750247, "grad_norm": 2.213737381332928, "learning_rate": 1.9807445846905316e-05, "loss": 0.5883, "step": 1145 }, { "epoch": 0.09079025549613785, "grad_norm": 2.4832808174617442, "learning_rate": 1.980694439136515e-05, "loss": 0.6151, "step": 1146 }, { "epoch": 0.09086947910477322, "grad_norm": 2.035309695248436, "learning_rate": 1.980644229008489e-05, "loss": 0.5427, "step": 1147 }, { "epoch": 0.0909487027134086, "grad_norm": 2.3437276485275036, "learning_rate": 1.9805939543097586e-05, "loss": 0.5379, "step": 1148 }, { "epoch": 0.09102792632204397, "grad_norm": 2.1635536174164813, "learning_rate": 1.9805436150436352e-05, "loss": 0.4341, "step": 1149 }, { "epoch": 0.09110714993067934, "grad_norm": 2.1598382643032106, "learning_rate": 1.9804932112134323e-05, "loss": 0.4478, "step": 1150 }, { "epoch": 0.09118637353931472, "grad_norm": 2.621712715581236, "learning_rate": 1.9804427428224696e-05, "loss": 0.5489, "step": 1151 }, { "epoch": 0.09126559714795009, "grad_norm": 2.741732777540899, "learning_rate": 1.9803922098740696e-05, "loss": 0.5285, "step": 1152 }, { "epoch": 0.09134482075658547, "grad_norm": 2.2823604314766195, "learning_rate": 1.98034161237156e-05, "loss": 0.5482, "step": 1153 }, { "epoch": 0.09142404436522084, "grad_norm": 2.2583037186943073, "learning_rate": 1.9802909503182722e-05, "loss": 0.5335, "step": 1154 }, { "epoch": 0.0915032679738562, "grad_norm": 2.336794350316473, "learning_rate": 1.9802402237175426e-05, "loss": 0.4927, "step": 1155 }, { "epoch": 0.09158249158249158, "grad_norm": 2.109334400919138, "learning_rate": 1.9801894325727104e-05, "loss": 0.426, "step": 1156 }, { "epoch": 0.09166171519112695, "grad_norm": 2.6767397455004978, "learning_rate": 1.980138576887121e-05, "loss": 0.7275, "step": 1157 }, { "epoch": 0.09174093879976233, "grad_norm": 2.1200292119942468, "learning_rate": 1.980087656664122e-05, "loss": 0.4499, "step": 1158 }, { "epoch": 0.0918201624083977, "grad_norm": 2.4409068807120162, "learning_rate": 1.9800366719070668e-05, "loss": 0.603, "step": 1159 }, { "epoch": 0.09189938601703307, "grad_norm": 2.502240881484407, "learning_rate": 1.9799856226193125e-05, "loss": 0.5064, "step": 1160 }, { "epoch": 0.09197860962566845, "grad_norm": 2.5107586371318655, "learning_rate": 1.97993450880422e-05, "loss": 0.4761, "step": 1161 }, { "epoch": 0.09205783323430382, "grad_norm": 2.52045368777118, "learning_rate": 1.9798833304651555e-05, "loss": 0.5551, "step": 1162 }, { "epoch": 0.0921370568429392, "grad_norm": 2.100859266252998, "learning_rate": 1.9798320876054882e-05, "loss": 0.491, "step": 1163 }, { "epoch": 0.09221628045157457, "grad_norm": 2.629915640264029, "learning_rate": 1.9797807802285933e-05, "loss": 0.5826, "step": 1164 }, { "epoch": 0.09229550406020995, "grad_norm": 2.410017640664231, "learning_rate": 1.979729408337848e-05, "loss": 0.5008, "step": 1165 }, { "epoch": 0.09237472766884532, "grad_norm": 2.2935743009461493, "learning_rate": 1.9796779719366355e-05, "loss": 0.5593, "step": 1166 }, { "epoch": 0.09245395127748068, "grad_norm": 2.2391645715067505, "learning_rate": 1.9796264710283425e-05, "loss": 0.6607, "step": 1167 }, { "epoch": 0.09253317488611607, "grad_norm": 2.2076146526130525, "learning_rate": 1.9795749056163595e-05, "loss": 0.5248, "step": 1168 }, { "epoch": 0.09261239849475143, "grad_norm": 1.8103433460053742, "learning_rate": 1.9795232757040827e-05, "loss": 0.3633, "step": 1169 }, { "epoch": 0.09269162210338681, "grad_norm": 2.2385250682269557, "learning_rate": 1.9794715812949117e-05, "loss": 0.4679, "step": 1170 }, { "epoch": 0.09277084571202218, "grad_norm": 2.230733998375238, "learning_rate": 1.9794198223922496e-05, "loss": 0.5891, "step": 1171 }, { "epoch": 0.09285006932065755, "grad_norm": 2.309282906613327, "learning_rate": 1.979367998999505e-05, "loss": 0.4029, "step": 1172 }, { "epoch": 0.09292929292929293, "grad_norm": 2.4423149386148384, "learning_rate": 1.97931611112009e-05, "loss": 0.4837, "step": 1173 }, { "epoch": 0.0930085165379283, "grad_norm": 2.1330231983301196, "learning_rate": 1.9792641587574212e-05, "loss": 0.4331, "step": 1174 }, { "epoch": 0.09308774014656368, "grad_norm": 2.2764042540338076, "learning_rate": 1.9792121419149196e-05, "loss": 0.4995, "step": 1175 }, { "epoch": 0.09316696375519905, "grad_norm": 2.304755667573725, "learning_rate": 1.97916006059601e-05, "loss": 0.4912, "step": 1176 }, { "epoch": 0.09324618736383443, "grad_norm": 2.481570323484625, "learning_rate": 1.979107914804122e-05, "loss": 0.549, "step": 1177 }, { "epoch": 0.0933254109724698, "grad_norm": 2.8406646903648682, "learning_rate": 1.979055704542689e-05, "loss": 0.5797, "step": 1178 }, { "epoch": 0.09340463458110516, "grad_norm": 2.3209900863875386, "learning_rate": 1.9790034298151486e-05, "loss": 0.4511, "step": 1179 }, { "epoch": 0.09348385818974055, "grad_norm": 2.386819623697311, "learning_rate": 1.9789510906249432e-05, "loss": 0.4525, "step": 1180 }, { "epoch": 0.09356308179837591, "grad_norm": 2.284857855201307, "learning_rate": 1.9788986869755187e-05, "loss": 0.529, "step": 1181 }, { "epoch": 0.0936423054070113, "grad_norm": 2.574328508526162, "learning_rate": 1.978846218870326e-05, "loss": 0.8237, "step": 1182 }, { "epoch": 0.09372152901564666, "grad_norm": 2.0393555958534635, "learning_rate": 1.9787936863128195e-05, "loss": 0.4356, "step": 1183 }, { "epoch": 0.09380075262428203, "grad_norm": 2.444795081415653, "learning_rate": 1.9787410893064584e-05, "loss": 0.5858, "step": 1184 }, { "epoch": 0.09387997623291741, "grad_norm": 2.6478975907834936, "learning_rate": 1.978688427854706e-05, "loss": 0.4975, "step": 1185 }, { "epoch": 0.09395919984155278, "grad_norm": 2.2927301216239138, "learning_rate": 1.97863570196103e-05, "loss": 0.5294, "step": 1186 }, { "epoch": 0.09403842345018816, "grad_norm": 2.2859707838860808, "learning_rate": 1.9785829116289017e-05, "loss": 0.5564, "step": 1187 }, { "epoch": 0.09411764705882353, "grad_norm": 2.242425012732724, "learning_rate": 1.9785300568617973e-05, "loss": 0.5383, "step": 1188 }, { "epoch": 0.09419687066745891, "grad_norm": 2.2826212487488258, "learning_rate": 1.978477137663197e-05, "loss": 0.5753, "step": 1189 }, { "epoch": 0.09427609427609428, "grad_norm": 2.0926510648331527, "learning_rate": 1.9784241540365856e-05, "loss": 0.609, "step": 1190 }, { "epoch": 0.09435531788472964, "grad_norm": 1.8044647273101764, "learning_rate": 1.9783711059854514e-05, "loss": 0.5156, "step": 1191 }, { "epoch": 0.09443454149336503, "grad_norm": 2.3067515982080926, "learning_rate": 1.9783179935132874e-05, "loss": 0.5784, "step": 1192 }, { "epoch": 0.0945137651020004, "grad_norm": 2.3706657264655155, "learning_rate": 1.978264816623591e-05, "loss": 0.4633, "step": 1193 }, { "epoch": 0.09459298871063578, "grad_norm": 2.175872253953192, "learning_rate": 1.9782115753198633e-05, "loss": 0.5168, "step": 1194 }, { "epoch": 0.09467221231927114, "grad_norm": 2.109943903248783, "learning_rate": 1.9781582696056105e-05, "loss": 0.5042, "step": 1195 }, { "epoch": 0.09475143592790651, "grad_norm": 2.267825030136225, "learning_rate": 1.9781048994843423e-05, "loss": 0.531, "step": 1196 }, { "epoch": 0.09483065953654189, "grad_norm": 1.9213490245286327, "learning_rate": 1.9780514649595727e-05, "loss": 0.3961, "step": 1197 }, { "epoch": 0.09490988314517726, "grad_norm": 2.729647736955396, "learning_rate": 1.97799796603482e-05, "loss": 0.5893, "step": 1198 }, { "epoch": 0.09498910675381264, "grad_norm": 2.0323492123321825, "learning_rate": 1.9779444027136075e-05, "loss": 0.5612, "step": 1199 }, { "epoch": 0.09506833036244801, "grad_norm": 2.23912741901332, "learning_rate": 1.977890774999461e-05, "loss": 0.4758, "step": 1200 }, { "epoch": 0.09514755397108338, "grad_norm": 2.3558358687105567, "learning_rate": 1.977837082895913e-05, "loss": 0.5562, "step": 1201 }, { "epoch": 0.09522677757971876, "grad_norm": 2.2231082077832207, "learning_rate": 1.9777833264064977e-05, "loss": 0.449, "step": 1202 }, { "epoch": 0.09530600118835413, "grad_norm": 2.3733770072652747, "learning_rate": 1.9777295055347553e-05, "loss": 0.4318, "step": 1203 }, { "epoch": 0.0953852247969895, "grad_norm": 2.302576114795998, "learning_rate": 1.9776756202842297e-05, "loss": 0.4529, "step": 1204 }, { "epoch": 0.09546444840562487, "grad_norm": 2.786686457430988, "learning_rate": 1.9776216706584682e-05, "loss": 0.6389, "step": 1205 }, { "epoch": 0.09554367201426026, "grad_norm": 2.1640912723222296, "learning_rate": 1.977567656661024e-05, "loss": 0.492, "step": 1206 }, { "epoch": 0.09562289562289562, "grad_norm": 2.0051288858369505, "learning_rate": 1.9775135782954534e-05, "loss": 0.4634, "step": 1207 }, { "epoch": 0.09570211923153099, "grad_norm": 2.52693859178143, "learning_rate": 1.9774594355653175e-05, "loss": 0.5035, "step": 1208 }, { "epoch": 0.09578134284016637, "grad_norm": 2.0072780933553402, "learning_rate": 1.9774052284741804e-05, "loss": 0.4775, "step": 1209 }, { "epoch": 0.09586056644880174, "grad_norm": 2.5248290352741196, "learning_rate": 1.9773509570256124e-05, "loss": 0.6186, "step": 1210 }, { "epoch": 0.09593979005743712, "grad_norm": 2.7138568006642743, "learning_rate": 1.9772966212231863e-05, "loss": 0.5514, "step": 1211 }, { "epoch": 0.09601901366607249, "grad_norm": 2.4705366215900186, "learning_rate": 1.9772422210704803e-05, "loss": 0.5646, "step": 1212 }, { "epoch": 0.09609823727470786, "grad_norm": 1.8637671901095176, "learning_rate": 1.977187756571076e-05, "loss": 0.4846, "step": 1213 }, { "epoch": 0.09617746088334324, "grad_norm": 2.393257399096758, "learning_rate": 1.9771332277285603e-05, "loss": 0.5056, "step": 1214 }, { "epoch": 0.0962566844919786, "grad_norm": 2.4048595101962946, "learning_rate": 1.977078634546523e-05, "loss": 0.5664, "step": 1215 }, { "epoch": 0.09633590810061399, "grad_norm": 2.3086846618645778, "learning_rate": 1.977023977028559e-05, "loss": 0.6001, "step": 1216 }, { "epoch": 0.09641513170924935, "grad_norm": 2.375784985667264, "learning_rate": 1.9769692551782672e-05, "loss": 0.6104, "step": 1217 }, { "epoch": 0.09649435531788474, "grad_norm": 1.9729956772425852, "learning_rate": 1.976914468999251e-05, "loss": 0.5019, "step": 1218 }, { "epoch": 0.0965735789265201, "grad_norm": 2.777477555441641, "learning_rate": 1.9768596184951174e-05, "loss": 0.6158, "step": 1219 }, { "epoch": 0.09665280253515547, "grad_norm": 2.4295227812181426, "learning_rate": 1.9768047036694785e-05, "loss": 0.6095, "step": 1220 }, { "epoch": 0.09673202614379085, "grad_norm": 2.431015576210552, "learning_rate": 1.9767497245259496e-05, "loss": 0.5322, "step": 1221 }, { "epoch": 0.09681124975242622, "grad_norm": 1.9937253550706193, "learning_rate": 1.9766946810681517e-05, "loss": 0.5104, "step": 1222 }, { "epoch": 0.0968904733610616, "grad_norm": 2.3508815834530217, "learning_rate": 1.9766395732997082e-05, "loss": 0.4864, "step": 1223 }, { "epoch": 0.09696969696969697, "grad_norm": 2.1732492944130612, "learning_rate": 1.9765844012242482e-05, "loss": 0.4722, "step": 1224 }, { "epoch": 0.09704892057833234, "grad_norm": 2.1792013845006792, "learning_rate": 1.9765291648454042e-05, "loss": 0.4567, "step": 1225 }, { "epoch": 0.09712814418696772, "grad_norm": 2.216061633850962, "learning_rate": 1.9764738641668137e-05, "loss": 0.5426, "step": 1226 }, { "epoch": 0.09720736779560309, "grad_norm": 2.3564778208703823, "learning_rate": 1.9764184991921178e-05, "loss": 0.5025, "step": 1227 }, { "epoch": 0.09728659140423847, "grad_norm": 2.20319294384829, "learning_rate": 1.9763630699249615e-05, "loss": 0.4232, "step": 1228 }, { "epoch": 0.09736581501287384, "grad_norm": 2.071192665133225, "learning_rate": 1.9763075763689956e-05, "loss": 0.4236, "step": 1229 }, { "epoch": 0.0974450386215092, "grad_norm": 1.9634205377783411, "learning_rate": 1.9762520185278734e-05, "loss": 0.416, "step": 1230 }, { "epoch": 0.09752426223014458, "grad_norm": 2.741419263392541, "learning_rate": 1.9761963964052528e-05, "loss": 0.5124, "step": 1231 }, { "epoch": 0.09760348583877995, "grad_norm": 2.146037377471575, "learning_rate": 1.976140710004797e-05, "loss": 0.5936, "step": 1232 }, { "epoch": 0.09768270944741533, "grad_norm": 2.216905805926447, "learning_rate": 1.976084959330172e-05, "loss": 0.5554, "step": 1233 }, { "epoch": 0.0977619330560507, "grad_norm": 2.0828080968632268, "learning_rate": 1.9760291443850496e-05, "loss": 0.5043, "step": 1234 }, { "epoch": 0.09784115666468608, "grad_norm": 2.353320748286769, "learning_rate": 1.9759732651731037e-05, "loss": 0.4785, "step": 1235 }, { "epoch": 0.09792038027332145, "grad_norm": 2.645633383650802, "learning_rate": 1.975917321698015e-05, "loss": 0.5462, "step": 1236 }, { "epoch": 0.09799960388195682, "grad_norm": 1.977526715455474, "learning_rate": 1.9758613139634662e-05, "loss": 0.5362, "step": 1237 }, { "epoch": 0.0980788274905922, "grad_norm": 2.3591579859369203, "learning_rate": 1.975805241973145e-05, "loss": 0.599, "step": 1238 }, { "epoch": 0.09815805109922757, "grad_norm": 2.1680867235864634, "learning_rate": 1.9757491057307448e-05, "loss": 0.6011, "step": 1239 }, { "epoch": 0.09823727470786295, "grad_norm": 1.9512126136847754, "learning_rate": 1.9756929052399606e-05, "loss": 0.4796, "step": 1240 }, { "epoch": 0.09831649831649832, "grad_norm": 2.173023721130156, "learning_rate": 1.9756366405044928e-05, "loss": 0.4921, "step": 1241 }, { "epoch": 0.09839572192513368, "grad_norm": 2.1913021786140483, "learning_rate": 1.9755803115280476e-05, "loss": 0.5157, "step": 1242 }, { "epoch": 0.09847494553376906, "grad_norm": 2.766369888233421, "learning_rate": 1.9755239183143323e-05, "loss": 0.5181, "step": 1243 }, { "epoch": 0.09855416914240443, "grad_norm": 2.2744231441113434, "learning_rate": 1.9754674608670613e-05, "loss": 0.6038, "step": 1244 }, { "epoch": 0.09863339275103981, "grad_norm": 2.267371434111587, "learning_rate": 1.9754109391899514e-05, "loss": 0.555, "step": 1245 }, { "epoch": 0.09871261635967518, "grad_norm": 2.25288566372562, "learning_rate": 1.975354353286725e-05, "loss": 0.4967, "step": 1246 }, { "epoch": 0.09879183996831056, "grad_norm": 2.1701598042655204, "learning_rate": 1.9752977031611072e-05, "loss": 0.4921, "step": 1247 }, { "epoch": 0.09887106357694593, "grad_norm": 2.6282075195812613, "learning_rate": 1.9752409888168285e-05, "loss": 0.5678, "step": 1248 }, { "epoch": 0.0989502871855813, "grad_norm": 2.543478358588145, "learning_rate": 1.975184210257623e-05, "loss": 0.4775, "step": 1249 }, { "epoch": 0.09902951079421668, "grad_norm": 1.9627462180607333, "learning_rate": 1.97512736748723e-05, "loss": 0.4183, "step": 1250 }, { "epoch": 0.09910873440285205, "grad_norm": 2.4357277056177944, "learning_rate": 1.975070460509392e-05, "loss": 0.5256, "step": 1251 }, { "epoch": 0.09918795801148743, "grad_norm": 2.5276796768433734, "learning_rate": 1.9750134893278553e-05, "loss": 0.5206, "step": 1252 }, { "epoch": 0.0992671816201228, "grad_norm": 2.0876619760814834, "learning_rate": 1.974956453946372e-05, "loss": 0.4243, "step": 1253 }, { "epoch": 0.09934640522875816, "grad_norm": 2.187654364862005, "learning_rate": 1.9748993543686973e-05, "loss": 0.4743, "step": 1254 }, { "epoch": 0.09942562883739355, "grad_norm": 2.424028485743396, "learning_rate": 1.9748421905985915e-05, "loss": 0.5166, "step": 1255 }, { "epoch": 0.09950485244602891, "grad_norm": 2.1951241985135312, "learning_rate": 1.9747849626398176e-05, "loss": 0.5178, "step": 1256 }, { "epoch": 0.0995840760546643, "grad_norm": 2.0811995259320386, "learning_rate": 1.9747276704961447e-05, "loss": 0.4329, "step": 1257 }, { "epoch": 0.09966329966329966, "grad_norm": 2.279498263179114, "learning_rate": 1.9746703141713444e-05, "loss": 0.5354, "step": 1258 }, { "epoch": 0.09974252327193504, "grad_norm": 1.9895821829242717, "learning_rate": 1.974612893669194e-05, "loss": 0.4478, "step": 1259 }, { "epoch": 0.09982174688057041, "grad_norm": 2.9207547936648877, "learning_rate": 1.974555408993474e-05, "loss": 0.5537, "step": 1260 }, { "epoch": 0.09990097048920578, "grad_norm": 2.3649710729599565, "learning_rate": 1.9744978601479693e-05, "loss": 0.5399, "step": 1261 }, { "epoch": 0.09998019409784116, "grad_norm": 2.3819317618700215, "learning_rate": 1.97444024713647e-05, "loss": 0.5064, "step": 1262 }, { "epoch": 0.10005941770647653, "grad_norm": 2.1908432352275855, "learning_rate": 1.9743825699627687e-05, "loss": 0.5147, "step": 1263 }, { "epoch": 0.10013864131511191, "grad_norm": 2.287527706598705, "learning_rate": 1.974324828630664e-05, "loss": 0.5913, "step": 1264 }, { "epoch": 0.10021786492374728, "grad_norm": 2.406175882809149, "learning_rate": 1.974267023143957e-05, "loss": 0.4876, "step": 1265 }, { "epoch": 0.10029708853238264, "grad_norm": 2.6522953771586018, "learning_rate": 1.974209153506455e-05, "loss": 0.6547, "step": 1266 }, { "epoch": 0.10037631214101803, "grad_norm": 2.6774454544898947, "learning_rate": 1.9741512197219675e-05, "loss": 0.5214, "step": 1267 }, { "epoch": 0.1004555357496534, "grad_norm": 2.284614930368995, "learning_rate": 1.9740932217943095e-05, "loss": 0.4807, "step": 1268 }, { "epoch": 0.10053475935828877, "grad_norm": 2.0462928836102816, "learning_rate": 1.9740351597272998e-05, "loss": 0.5032, "step": 1269 }, { "epoch": 0.10061398296692414, "grad_norm": 2.3892670018590443, "learning_rate": 1.9739770335247616e-05, "loss": 0.6089, "step": 1270 }, { "epoch": 0.10069320657555951, "grad_norm": 2.1011891572579957, "learning_rate": 1.9739188431905223e-05, "loss": 0.6247, "step": 1271 }, { "epoch": 0.10077243018419489, "grad_norm": 2.069333043250612, "learning_rate": 1.9738605887284134e-05, "loss": 0.5171, "step": 1272 }, { "epoch": 0.10085165379283026, "grad_norm": 1.9830086566428888, "learning_rate": 1.9738022701422705e-05, "loss": 0.604, "step": 1273 }, { "epoch": 0.10093087740146564, "grad_norm": 1.9189881387192915, "learning_rate": 1.973743887435934e-05, "loss": 0.4395, "step": 1274 }, { "epoch": 0.10101010101010101, "grad_norm": 2.158610283217366, "learning_rate": 1.9736854406132476e-05, "loss": 0.4273, "step": 1275 }, { "epoch": 0.10108932461873639, "grad_norm": 2.479550264026852, "learning_rate": 1.9736269296780603e-05, "loss": 0.475, "step": 1276 }, { "epoch": 0.10116854822737176, "grad_norm": 2.2418316267104403, "learning_rate": 1.9735683546342243e-05, "loss": 0.5047, "step": 1277 }, { "epoch": 0.10124777183600712, "grad_norm": 2.2333766677349445, "learning_rate": 1.9735097154855968e-05, "loss": 0.5292, "step": 1278 }, { "epoch": 0.1013269954446425, "grad_norm": 2.3155805068531956, "learning_rate": 1.9734510122360383e-05, "loss": 0.6675, "step": 1279 }, { "epoch": 0.10140621905327787, "grad_norm": 2.233679218583782, "learning_rate": 1.973392244889415e-05, "loss": 0.5609, "step": 1280 }, { "epoch": 0.10148544266191326, "grad_norm": 2.2578654982470776, "learning_rate": 1.9733334134495963e-05, "loss": 0.5459, "step": 1281 }, { "epoch": 0.10156466627054862, "grad_norm": 2.235927539895228, "learning_rate": 1.9732745179204553e-05, "loss": 0.5324, "step": 1282 }, { "epoch": 0.10164388987918399, "grad_norm": 2.2734544531055936, "learning_rate": 1.9732155583058705e-05, "loss": 0.5646, "step": 1283 }, { "epoch": 0.10172311348781937, "grad_norm": 2.3225330071712644, "learning_rate": 1.973156534609724e-05, "loss": 0.5223, "step": 1284 }, { "epoch": 0.10180233709645474, "grad_norm": 2.1874778254951797, "learning_rate": 1.973097446835902e-05, "loss": 0.5121, "step": 1285 }, { "epoch": 0.10188156070509012, "grad_norm": 1.7930253360562414, "learning_rate": 1.9730382949882955e-05, "loss": 0.3641, "step": 1286 }, { "epoch": 0.10196078431372549, "grad_norm": 2.1533712556327, "learning_rate": 1.9729790790707995e-05, "loss": 0.478, "step": 1287 }, { "epoch": 0.10204000792236087, "grad_norm": 2.188433134663067, "learning_rate": 1.9729197990873127e-05, "loss": 0.4731, "step": 1288 }, { "epoch": 0.10211923153099624, "grad_norm": 3.7277942529749044, "learning_rate": 1.9728604550417385e-05, "loss": 0.5092, "step": 1289 }, { "epoch": 0.1021984551396316, "grad_norm": 2.917114218579833, "learning_rate": 1.9728010469379844e-05, "loss": 0.4841, "step": 1290 }, { "epoch": 0.10227767874826699, "grad_norm": 2.467788717668694, "learning_rate": 1.972741574779962e-05, "loss": 0.5663, "step": 1291 }, { "epoch": 0.10235690235690235, "grad_norm": 2.166985276940756, "learning_rate": 1.9726820385715877e-05, "loss": 0.428, "step": 1292 }, { "epoch": 0.10243612596553774, "grad_norm": 2.734128340973256, "learning_rate": 1.9726224383167815e-05, "loss": 0.5253, "step": 1293 }, { "epoch": 0.1025153495741731, "grad_norm": 2.0422415700032412, "learning_rate": 1.9725627740194673e-05, "loss": 0.4421, "step": 1294 }, { "epoch": 0.10259457318280847, "grad_norm": 2.089549173570466, "learning_rate": 1.9725030456835745e-05, "loss": 0.4337, "step": 1295 }, { "epoch": 0.10267379679144385, "grad_norm": 2.459132497291346, "learning_rate": 1.9724432533130355e-05, "loss": 0.629, "step": 1296 }, { "epoch": 0.10275302040007922, "grad_norm": 2.2745960847474733, "learning_rate": 1.972383396911787e-05, "loss": 0.5844, "step": 1297 }, { "epoch": 0.1028322440087146, "grad_norm": 2.4412656469129512, "learning_rate": 1.9723234764837708e-05, "loss": 0.4796, "step": 1298 }, { "epoch": 0.10291146761734997, "grad_norm": 2.092936022873505, "learning_rate": 1.9722634920329323e-05, "loss": 0.4889, "step": 1299 }, { "epoch": 0.10299069122598534, "grad_norm": 2.376181841165693, "learning_rate": 1.9722034435632207e-05, "loss": 0.6405, "step": 1300 }, { "epoch": 0.10306991483462072, "grad_norm": 2.6309636850104483, "learning_rate": 1.972143331078591e-05, "loss": 0.6636, "step": 1301 }, { "epoch": 0.10314913844325609, "grad_norm": 2.125338740252849, "learning_rate": 1.972083154583e-05, "loss": 0.4635, "step": 1302 }, { "epoch": 0.10322836205189147, "grad_norm": 2.015033388272449, "learning_rate": 1.972022914080411e-05, "loss": 0.4261, "step": 1303 }, { "epoch": 0.10330758566052684, "grad_norm": 2.2598935735723997, "learning_rate": 1.9719626095747897e-05, "loss": 0.5767, "step": 1304 }, { "epoch": 0.10338680926916222, "grad_norm": 1.8116809672507894, "learning_rate": 1.971902241070108e-05, "loss": 0.4311, "step": 1305 }, { "epoch": 0.10346603287779758, "grad_norm": 2.628031805876651, "learning_rate": 1.9718418085703397e-05, "loss": 0.6494, "step": 1306 }, { "epoch": 0.10354525648643295, "grad_norm": 2.04617520653546, "learning_rate": 1.971781312079465e-05, "loss": 0.4754, "step": 1307 }, { "epoch": 0.10362448009506833, "grad_norm": 1.95774659537593, "learning_rate": 1.9717207516014664e-05, "loss": 0.4569, "step": 1308 }, { "epoch": 0.1037037037037037, "grad_norm": 2.117897026087532, "learning_rate": 1.9716601271403322e-05, "loss": 0.4425, "step": 1309 }, { "epoch": 0.10378292731233908, "grad_norm": 2.815940052246426, "learning_rate": 1.9715994387000537e-05, "loss": 0.5858, "step": 1310 }, { "epoch": 0.10386215092097445, "grad_norm": 2.4192115644317846, "learning_rate": 1.9715386862846272e-05, "loss": 0.5589, "step": 1311 }, { "epoch": 0.10394137452960982, "grad_norm": 2.779990687395962, "learning_rate": 1.971477869898053e-05, "loss": 0.4776, "step": 1312 }, { "epoch": 0.1040205981382452, "grad_norm": 2.3997539806034967, "learning_rate": 1.9714169895443357e-05, "loss": 0.4748, "step": 1313 }, { "epoch": 0.10409982174688057, "grad_norm": 2.4012592625019913, "learning_rate": 1.971356045227484e-05, "loss": 0.5633, "step": 1314 }, { "epoch": 0.10417904535551595, "grad_norm": 2.2427710239188583, "learning_rate": 1.97129503695151e-05, "loss": 0.5419, "step": 1315 }, { "epoch": 0.10425826896415132, "grad_norm": 2.28467564395694, "learning_rate": 1.9712339647204313e-05, "loss": 0.4789, "step": 1316 }, { "epoch": 0.1043374925727867, "grad_norm": 3.0056755188812057, "learning_rate": 1.97117282853827e-05, "loss": 0.3512, "step": 1317 }, { "epoch": 0.10441671618142206, "grad_norm": 2.7055295071619554, "learning_rate": 1.9711116284090506e-05, "loss": 0.6086, "step": 1318 }, { "epoch": 0.10449593979005743, "grad_norm": 3.3132267338184374, "learning_rate": 1.971050364336803e-05, "loss": 0.5974, "step": 1319 }, { "epoch": 0.10457516339869281, "grad_norm": 3.565549385918516, "learning_rate": 1.9709890363255617e-05, "loss": 0.5051, "step": 1320 }, { "epoch": 0.10465438700732818, "grad_norm": 3.882957308066483, "learning_rate": 1.9709276443793638e-05, "loss": 0.6568, "step": 1321 }, { "epoch": 0.10473361061596356, "grad_norm": 2.776105672136285, "learning_rate": 1.970866188502253e-05, "loss": 0.6399, "step": 1322 }, { "epoch": 0.10481283422459893, "grad_norm": 4.536532459565255, "learning_rate": 1.970804668698275e-05, "loss": 0.4474, "step": 1323 }, { "epoch": 0.1048920578332343, "grad_norm": 2.7320798978149883, "learning_rate": 1.970743084971481e-05, "loss": 0.5845, "step": 1324 }, { "epoch": 0.10497128144186968, "grad_norm": 1.8859528357289852, "learning_rate": 1.970681437325925e-05, "loss": 0.4127, "step": 1325 }, { "epoch": 0.10505050505050505, "grad_norm": 2.351724306388326, "learning_rate": 1.9706197257656675e-05, "loss": 0.6094, "step": 1326 }, { "epoch": 0.10512972865914043, "grad_norm": 2.1602675824098365, "learning_rate": 1.9705579502947712e-05, "loss": 0.4579, "step": 1327 }, { "epoch": 0.1052089522677758, "grad_norm": 2.1566597592068426, "learning_rate": 1.9704961109173042e-05, "loss": 0.6245, "step": 1328 }, { "epoch": 0.10528817587641116, "grad_norm": 2.456475444549144, "learning_rate": 1.9704342076373378e-05, "loss": 0.5567, "step": 1329 }, { "epoch": 0.10536739948504655, "grad_norm": 2.361525570523304, "learning_rate": 1.9703722404589484e-05, "loss": 0.4448, "step": 1330 }, { "epoch": 0.10544662309368191, "grad_norm": 2.089131308783069, "learning_rate": 1.970310209386216e-05, "loss": 0.4669, "step": 1331 }, { "epoch": 0.1055258467023173, "grad_norm": 2.215335156635847, "learning_rate": 1.9702481144232253e-05, "loss": 0.462, "step": 1332 }, { "epoch": 0.10560507031095266, "grad_norm": 2.1499694316036804, "learning_rate": 1.9701859555740647e-05, "loss": 0.4532, "step": 1333 }, { "epoch": 0.10568429391958804, "grad_norm": 2.0674533804386206, "learning_rate": 1.9701237328428272e-05, "loss": 0.5602, "step": 1334 }, { "epoch": 0.10576351752822341, "grad_norm": 2.2777257235135764, "learning_rate": 1.9700614462336096e-05, "loss": 0.5804, "step": 1335 }, { "epoch": 0.10584274113685878, "grad_norm": 2.450934116180437, "learning_rate": 1.9699990957505136e-05, "loss": 0.552, "step": 1336 }, { "epoch": 0.10592196474549416, "grad_norm": 2.074400066835221, "learning_rate": 1.9699366813976443e-05, "loss": 0.4621, "step": 1337 }, { "epoch": 0.10600118835412953, "grad_norm": 2.0631351800189734, "learning_rate": 1.9698742031791118e-05, "loss": 0.582, "step": 1338 }, { "epoch": 0.10608041196276491, "grad_norm": 2.252480410369213, "learning_rate": 1.96981166109903e-05, "loss": 0.4432, "step": 1339 }, { "epoch": 0.10615963557140028, "grad_norm": 2.383505553669203, "learning_rate": 1.9697490551615162e-05, "loss": 0.5494, "step": 1340 }, { "epoch": 0.10623885918003564, "grad_norm": 2.4455835402480877, "learning_rate": 1.9696863853706937e-05, "loss": 0.4431, "step": 1341 }, { "epoch": 0.10631808278867103, "grad_norm": 1.7569167065533893, "learning_rate": 1.969623651730688e-05, "loss": 0.3387, "step": 1342 }, { "epoch": 0.1063973063973064, "grad_norm": 2.409163867936381, "learning_rate": 1.969560854245631e-05, "loss": 0.5591, "step": 1343 }, { "epoch": 0.10647653000594177, "grad_norm": 1.9511644788047329, "learning_rate": 1.9694979929196566e-05, "loss": 0.4673, "step": 1344 }, { "epoch": 0.10655575361457714, "grad_norm": 2.057363452047913, "learning_rate": 1.9694350677569043e-05, "loss": 0.4632, "step": 1345 }, { "epoch": 0.10663497722321252, "grad_norm": 2.214010071480097, "learning_rate": 1.9693720787615174e-05, "loss": 0.5368, "step": 1346 }, { "epoch": 0.10671420083184789, "grad_norm": 1.8988530420406244, "learning_rate": 1.9693090259376436e-05, "loss": 0.3494, "step": 1347 }, { "epoch": 0.10679342444048326, "grad_norm": 2.074989357202546, "learning_rate": 1.9692459092894343e-05, "loss": 0.5136, "step": 1348 }, { "epoch": 0.10687264804911864, "grad_norm": 2.207738979422821, "learning_rate": 1.969182728821046e-05, "loss": 0.5687, "step": 1349 }, { "epoch": 0.10695187165775401, "grad_norm": 2.4109550297326314, "learning_rate": 1.969119484536638e-05, "loss": 0.4685, "step": 1350 }, { "epoch": 0.10703109526638939, "grad_norm": 2.1401189992372496, "learning_rate": 1.969056176440375e-05, "loss": 0.4236, "step": 1351 }, { "epoch": 0.10711031887502476, "grad_norm": 2.0901917705160407, "learning_rate": 1.9689928045364258e-05, "loss": 0.4008, "step": 1352 }, { "epoch": 0.10718954248366012, "grad_norm": 2.191145863247345, "learning_rate": 1.9689293688289627e-05, "loss": 0.5192, "step": 1353 }, { "epoch": 0.1072687660922955, "grad_norm": 1.9571435588422554, "learning_rate": 1.968865869322163e-05, "loss": 0.4484, "step": 1354 }, { "epoch": 0.10734798970093087, "grad_norm": 2.5446998878971017, "learning_rate": 1.968802306020208e-05, "loss": 0.587, "step": 1355 }, { "epoch": 0.10742721330956626, "grad_norm": 2.396013303266199, "learning_rate": 1.968738678927282e-05, "loss": 0.6018, "step": 1356 }, { "epoch": 0.10750643691820162, "grad_norm": 2.3787680427272186, "learning_rate": 1.9686749880475756e-05, "loss": 0.4426, "step": 1357 }, { "epoch": 0.107585660526837, "grad_norm": 2.2608244331470178, "learning_rate": 1.9686112333852826e-05, "loss": 0.5174, "step": 1358 }, { "epoch": 0.10766488413547237, "grad_norm": 2.1900368568921755, "learning_rate": 1.9685474149446e-05, "loss": 0.5377, "step": 1359 }, { "epoch": 0.10774410774410774, "grad_norm": 2.2683388133593625, "learning_rate": 1.9684835327297306e-05, "loss": 0.4892, "step": 1360 }, { "epoch": 0.10782333135274312, "grad_norm": 2.377612781702832, "learning_rate": 1.9684195867448806e-05, "loss": 0.4858, "step": 1361 }, { "epoch": 0.10790255496137849, "grad_norm": 2.721941567580665, "learning_rate": 1.9683555769942608e-05, "loss": 0.5264, "step": 1362 }, { "epoch": 0.10798177857001387, "grad_norm": 2.1630529412613586, "learning_rate": 1.968291503482086e-05, "loss": 0.4109, "step": 1363 }, { "epoch": 0.10806100217864924, "grad_norm": 2.532963871025193, "learning_rate": 1.968227366212574e-05, "loss": 0.5461, "step": 1364 }, { "epoch": 0.1081402257872846, "grad_norm": 2.0279043105152805, "learning_rate": 1.968163165189949e-05, "loss": 0.5266, "step": 1365 }, { "epoch": 0.10821944939591999, "grad_norm": 2.181519166040357, "learning_rate": 1.9680989004184383e-05, "loss": 0.4409, "step": 1366 }, { "epoch": 0.10829867300455535, "grad_norm": 2.0071130131159545, "learning_rate": 1.968034571902273e-05, "loss": 0.5636, "step": 1367 }, { "epoch": 0.10837789661319074, "grad_norm": 2.0732762064516606, "learning_rate": 1.967970179645689e-05, "loss": 0.3248, "step": 1368 }, { "epoch": 0.1084571202218261, "grad_norm": 2.323664796075749, "learning_rate": 1.9679057236529266e-05, "loss": 0.5848, "step": 1369 }, { "epoch": 0.10853634383046147, "grad_norm": 2.770374447369148, "learning_rate": 1.9678412039282292e-05, "loss": 0.6797, "step": 1370 }, { "epoch": 0.10861556743909685, "grad_norm": 2.418803718526639, "learning_rate": 1.967776620475846e-05, "loss": 0.443, "step": 1371 }, { "epoch": 0.10869479104773222, "grad_norm": 2.2391812851513375, "learning_rate": 1.9677119733000283e-05, "loss": 0.5881, "step": 1372 }, { "epoch": 0.1087740146563676, "grad_norm": 2.5260582610737243, "learning_rate": 1.967647262405034e-05, "loss": 0.4752, "step": 1373 }, { "epoch": 0.10885323826500297, "grad_norm": 2.0597832877880284, "learning_rate": 1.967582487795123e-05, "loss": 0.3699, "step": 1374 }, { "epoch": 0.10893246187363835, "grad_norm": 2.0817716920621945, "learning_rate": 1.967517649474561e-05, "loss": 0.4187, "step": 1375 }, { "epoch": 0.10901168548227372, "grad_norm": 1.9458466176770466, "learning_rate": 1.9674527474476175e-05, "loss": 0.4809, "step": 1376 }, { "epoch": 0.10909090909090909, "grad_norm": 2.10176318772313, "learning_rate": 1.9673877817185656e-05, "loss": 0.4342, "step": 1377 }, { "epoch": 0.10917013269954447, "grad_norm": 2.2836851871431145, "learning_rate": 1.9673227522916827e-05, "loss": 0.5271, "step": 1378 }, { "epoch": 0.10924935630817983, "grad_norm": 1.90201178977515, "learning_rate": 1.9672576591712517e-05, "loss": 0.4403, "step": 1379 }, { "epoch": 0.10932857991681522, "grad_norm": 1.985590042962168, "learning_rate": 1.9671925023615572e-05, "loss": 0.4756, "step": 1380 }, { "epoch": 0.10940780352545058, "grad_norm": 2.0719820061206757, "learning_rate": 1.9671272818668906e-05, "loss": 0.4786, "step": 1381 }, { "epoch": 0.10948702713408595, "grad_norm": 2.1166472216413292, "learning_rate": 1.967061997691546e-05, "loss": 0.394, "step": 1382 }, { "epoch": 0.10956625074272133, "grad_norm": 2.3947940276839836, "learning_rate": 1.966996649839822e-05, "loss": 0.5613, "step": 1383 }, { "epoch": 0.1096454743513567, "grad_norm": 1.9176786321029364, "learning_rate": 1.9669312383160217e-05, "loss": 0.4751, "step": 1384 }, { "epoch": 0.10972469795999208, "grad_norm": 2.4938684252566565, "learning_rate": 1.966865763124452e-05, "loss": 0.5545, "step": 1385 }, { "epoch": 0.10980392156862745, "grad_norm": 2.1487134280280507, "learning_rate": 1.966800224269424e-05, "loss": 0.4548, "step": 1386 }, { "epoch": 0.10988314517726283, "grad_norm": 2.1239835539558767, "learning_rate": 1.9667346217552528e-05, "loss": 0.5169, "step": 1387 }, { "epoch": 0.1099623687858982, "grad_norm": 2.0449107987637816, "learning_rate": 1.9666689555862586e-05, "loss": 0.4512, "step": 1388 }, { "epoch": 0.11004159239453357, "grad_norm": 1.8557761825547496, "learning_rate": 1.966603225766765e-05, "loss": 0.4105, "step": 1389 }, { "epoch": 0.11012081600316895, "grad_norm": 1.9996639683551252, "learning_rate": 1.9665374323011002e-05, "loss": 0.4897, "step": 1390 }, { "epoch": 0.11020003961180432, "grad_norm": 1.9564383286329299, "learning_rate": 1.9664715751935958e-05, "loss": 0.3754, "step": 1391 }, { "epoch": 0.1102792632204397, "grad_norm": 1.9868567196309925, "learning_rate": 1.9664056544485887e-05, "loss": 0.5438, "step": 1392 }, { "epoch": 0.11035848682907506, "grad_norm": 2.327529420723129, "learning_rate": 1.9663396700704195e-05, "loss": 0.4414, "step": 1393 }, { "epoch": 0.11043771043771043, "grad_norm": 1.8880730434393933, "learning_rate": 1.9662736220634325e-05, "loss": 0.4322, "step": 1394 }, { "epoch": 0.11051693404634581, "grad_norm": 2.1486232593957486, "learning_rate": 1.966207510431977e-05, "loss": 0.4426, "step": 1395 }, { "epoch": 0.11059615765498118, "grad_norm": 1.9612609312177982, "learning_rate": 1.966141335180406e-05, "loss": 0.4507, "step": 1396 }, { "epoch": 0.11067538126361656, "grad_norm": 1.9754263857552985, "learning_rate": 1.966075096313077e-05, "loss": 0.4621, "step": 1397 }, { "epoch": 0.11075460487225193, "grad_norm": 2.0349815536675075, "learning_rate": 1.966008793834351e-05, "loss": 0.5256, "step": 1398 }, { "epoch": 0.1108338284808873, "grad_norm": 1.9520324765110302, "learning_rate": 1.9659424277485943e-05, "loss": 0.4565, "step": 1399 }, { "epoch": 0.11091305208952268, "grad_norm": 2.2345829054299577, "learning_rate": 1.9658759980601766e-05, "loss": 0.3995, "step": 1400 }, { "epoch": 0.11099227569815805, "grad_norm": 2.1041284053669203, "learning_rate": 1.9658095047734718e-05, "loss": 0.507, "step": 1401 }, { "epoch": 0.11107149930679343, "grad_norm": 1.851611024030357, "learning_rate": 1.965742947892858e-05, "loss": 0.3452, "step": 1402 }, { "epoch": 0.1111507229154288, "grad_norm": 2.1897687510782746, "learning_rate": 1.9656763274227188e-05, "loss": 0.4197, "step": 1403 }, { "epoch": 0.11122994652406418, "grad_norm": 2.0738946031359617, "learning_rate": 1.9656096433674393e-05, "loss": 0.4331, "step": 1404 }, { "epoch": 0.11130917013269954, "grad_norm": 2.077409424094191, "learning_rate": 1.965542895731411e-05, "loss": 0.4854, "step": 1405 }, { "epoch": 0.11138839374133491, "grad_norm": 2.1631282545969426, "learning_rate": 1.965476084519029e-05, "loss": 0.4984, "step": 1406 }, { "epoch": 0.1114676173499703, "grad_norm": 2.1110815671663454, "learning_rate": 1.9654092097346925e-05, "loss": 0.548, "step": 1407 }, { "epoch": 0.11154684095860566, "grad_norm": 2.350641338008749, "learning_rate": 1.965342271382805e-05, "loss": 0.4873, "step": 1408 }, { "epoch": 0.11162606456724104, "grad_norm": 2.3327262249107763, "learning_rate": 1.9652752694677735e-05, "loss": 0.467, "step": 1409 }, { "epoch": 0.11170528817587641, "grad_norm": 2.1904935250138218, "learning_rate": 1.9652082039940102e-05, "loss": 0.4448, "step": 1410 }, { "epoch": 0.11178451178451178, "grad_norm": 2.1684146924547227, "learning_rate": 1.965141074965931e-05, "loss": 0.5793, "step": 1411 }, { "epoch": 0.11186373539314716, "grad_norm": 2.394931541780458, "learning_rate": 1.965073882387956e-05, "loss": 0.5232, "step": 1412 }, { "epoch": 0.11194295900178253, "grad_norm": 1.8008395756504112, "learning_rate": 1.9650066262645097e-05, "loss": 0.3579, "step": 1413 }, { "epoch": 0.11202218261041791, "grad_norm": 1.7880135468872924, "learning_rate": 1.96493930660002e-05, "loss": 0.4693, "step": 1414 }, { "epoch": 0.11210140621905328, "grad_norm": 2.2555014374847113, "learning_rate": 1.9648719233989202e-05, "loss": 0.4871, "step": 1415 }, { "epoch": 0.11218062982768866, "grad_norm": 2.123410435394117, "learning_rate": 1.9648044766656466e-05, "loss": 0.4627, "step": 1416 }, { "epoch": 0.11225985343632403, "grad_norm": 1.939601690782903, "learning_rate": 1.9647369664046407e-05, "loss": 0.4424, "step": 1417 }, { "epoch": 0.11233907704495939, "grad_norm": 2.281327273341833, "learning_rate": 1.9646693926203477e-05, "loss": 0.6591, "step": 1418 }, { "epoch": 0.11241830065359477, "grad_norm": 2.1106466856930286, "learning_rate": 1.964601755317217e-05, "loss": 0.4574, "step": 1419 }, { "epoch": 0.11249752426223014, "grad_norm": 1.9544101324947374, "learning_rate": 1.9645340544997017e-05, "loss": 0.4516, "step": 1420 }, { "epoch": 0.11257674787086552, "grad_norm": 1.9559669198281695, "learning_rate": 1.9644662901722603e-05, "loss": 0.469, "step": 1421 }, { "epoch": 0.11265597147950089, "grad_norm": 2.117404733421457, "learning_rate": 1.9643984623393542e-05, "loss": 0.3933, "step": 1422 }, { "epoch": 0.11273519508813626, "grad_norm": 1.9197144873396272, "learning_rate": 1.96433057100545e-05, "loss": 0.5246, "step": 1423 }, { "epoch": 0.11281441869677164, "grad_norm": 2.154385418286073, "learning_rate": 1.9642626161750176e-05, "loss": 0.5501, "step": 1424 }, { "epoch": 0.11289364230540701, "grad_norm": 2.1084457698898036, "learning_rate": 1.9641945978525318e-05, "loss": 0.4261, "step": 1425 }, { "epoch": 0.11297286591404239, "grad_norm": 2.2243457030478746, "learning_rate": 1.9641265160424705e-05, "loss": 0.568, "step": 1426 }, { "epoch": 0.11305208952267776, "grad_norm": 2.5108768972557707, "learning_rate": 1.9640583707493176e-05, "loss": 0.4744, "step": 1427 }, { "epoch": 0.11313131313131314, "grad_norm": 2.0892895286912894, "learning_rate": 1.96399016197756e-05, "loss": 0.4505, "step": 1428 }, { "epoch": 0.1132105367399485, "grad_norm": 2.3158352460670066, "learning_rate": 1.9639218897316885e-05, "loss": 0.5378, "step": 1429 }, { "epoch": 0.11328976034858387, "grad_norm": 2.340186869923995, "learning_rate": 1.9638535540161988e-05, "loss": 0.4724, "step": 1430 }, { "epoch": 0.11336898395721925, "grad_norm": 2.6492115525694704, "learning_rate": 1.96378515483559e-05, "loss": 0.496, "step": 1431 }, { "epoch": 0.11344820756585462, "grad_norm": 2.4004721933968187, "learning_rate": 1.9637166921943663e-05, "loss": 0.5341, "step": 1432 }, { "epoch": 0.11352743117449, "grad_norm": 1.9682433988575243, "learning_rate": 1.963648166097036e-05, "loss": 0.4253, "step": 1433 }, { "epoch": 0.11360665478312537, "grad_norm": 2.1500121868081763, "learning_rate": 1.9635795765481102e-05, "loss": 0.4655, "step": 1434 }, { "epoch": 0.11368587839176074, "grad_norm": 2.1591082401339805, "learning_rate": 1.9635109235521057e-05, "loss": 0.5482, "step": 1435 }, { "epoch": 0.11376510200039612, "grad_norm": 2.1846671812269802, "learning_rate": 1.963442207113543e-05, "loss": 0.5818, "step": 1436 }, { "epoch": 0.11384432560903149, "grad_norm": 1.9687121902879365, "learning_rate": 1.9633734272369473e-05, "loss": 0.5662, "step": 1437 }, { "epoch": 0.11392354921766687, "grad_norm": 2.3071778648916594, "learning_rate": 1.9633045839268464e-05, "loss": 0.4821, "step": 1438 }, { "epoch": 0.11400277282630224, "grad_norm": 2.140676102643196, "learning_rate": 1.9632356771877735e-05, "loss": 0.4727, "step": 1439 }, { "epoch": 0.1140819964349376, "grad_norm": 1.9071046885629488, "learning_rate": 1.9631667070242667e-05, "loss": 0.444, "step": 1440 }, { "epoch": 0.11416122004357299, "grad_norm": 2.291727933785507, "learning_rate": 1.963097673440866e-05, "loss": 0.5207, "step": 1441 }, { "epoch": 0.11424044365220835, "grad_norm": 2.34082057747678, "learning_rate": 1.9630285764421183e-05, "loss": 0.5034, "step": 1442 }, { "epoch": 0.11431966726084374, "grad_norm": 1.8877917306566079, "learning_rate": 1.9629594160325725e-05, "loss": 0.425, "step": 1443 }, { "epoch": 0.1143988908694791, "grad_norm": 2.3941728626559975, "learning_rate": 1.9628901922167823e-05, "loss": 0.5708, "step": 1444 }, { "epoch": 0.11447811447811448, "grad_norm": 2.434665449999772, "learning_rate": 1.9628209049993064e-05, "loss": 0.5163, "step": 1445 }, { "epoch": 0.11455733808674985, "grad_norm": 1.956106966260622, "learning_rate": 1.9627515543847068e-05, "loss": 0.5267, "step": 1446 }, { "epoch": 0.11463656169538522, "grad_norm": 2.2697471101411324, "learning_rate": 1.9626821403775494e-05, "loss": 0.4266, "step": 1447 }, { "epoch": 0.1147157853040206, "grad_norm": 2.248906086278467, "learning_rate": 1.9626126629824056e-05, "loss": 0.5469, "step": 1448 }, { "epoch": 0.11479500891265597, "grad_norm": 2.111508000638065, "learning_rate": 1.9625431222038494e-05, "loss": 0.5685, "step": 1449 }, { "epoch": 0.11487423252129135, "grad_norm": 2.236947028253924, "learning_rate": 1.9624735180464602e-05, "loss": 0.5866, "step": 1450 }, { "epoch": 0.11495345612992672, "grad_norm": 2.7930780903054147, "learning_rate": 1.962403850514821e-05, "loss": 0.6704, "step": 1451 }, { "epoch": 0.11503267973856209, "grad_norm": 1.918162529432279, "learning_rate": 1.962334119613519e-05, "loss": 0.4194, "step": 1452 }, { "epoch": 0.11511190334719747, "grad_norm": 1.8806060879712632, "learning_rate": 1.9622643253471457e-05, "loss": 0.432, "step": 1453 }, { "epoch": 0.11519112695583283, "grad_norm": 2.4150856120857673, "learning_rate": 1.9621944677202966e-05, "loss": 0.5439, "step": 1454 }, { "epoch": 0.11527035056446822, "grad_norm": 1.8366272493038598, "learning_rate": 1.9621245467375715e-05, "loss": 0.4434, "step": 1455 }, { "epoch": 0.11534957417310358, "grad_norm": 2.2143508571733865, "learning_rate": 1.9620545624035748e-05, "loss": 0.6679, "step": 1456 }, { "epoch": 0.11542879778173896, "grad_norm": 2.0768577960184995, "learning_rate": 1.961984514722914e-05, "loss": 0.419, "step": 1457 }, { "epoch": 0.11550802139037433, "grad_norm": 2.19651941167457, "learning_rate": 1.9619144037002015e-05, "loss": 0.3883, "step": 1458 }, { "epoch": 0.1155872449990097, "grad_norm": 2.0519791683137574, "learning_rate": 1.9618442293400544e-05, "loss": 0.4742, "step": 1459 }, { "epoch": 0.11566646860764508, "grad_norm": 2.041938262311429, "learning_rate": 1.9617739916470926e-05, "loss": 0.5295, "step": 1460 }, { "epoch": 0.11574569221628045, "grad_norm": 2.291458185449157, "learning_rate": 1.9617036906259416e-05, "loss": 0.563, "step": 1461 }, { "epoch": 0.11582491582491583, "grad_norm": 2.2623712408457335, "learning_rate": 1.9616333262812298e-05, "loss": 0.4523, "step": 1462 }, { "epoch": 0.1159041394335512, "grad_norm": 2.121016687689861, "learning_rate": 1.9615628986175902e-05, "loss": 0.4096, "step": 1463 }, { "epoch": 0.11598336304218657, "grad_norm": 1.8709177349527826, "learning_rate": 1.9614924076396605e-05, "loss": 0.461, "step": 1464 }, { "epoch": 0.11606258665082195, "grad_norm": 1.9476571336567652, "learning_rate": 1.9614218533520827e-05, "loss": 0.4157, "step": 1465 }, { "epoch": 0.11614181025945731, "grad_norm": 1.7256799191617038, "learning_rate": 1.9613512357595014e-05, "loss": 0.374, "step": 1466 }, { "epoch": 0.1162210338680927, "grad_norm": 2.8457128537080214, "learning_rate": 1.9612805548665673e-05, "loss": 0.503, "step": 1467 }, { "epoch": 0.11630025747672806, "grad_norm": 2.518083460143727, "learning_rate": 1.961209810677934e-05, "loss": 0.5476, "step": 1468 }, { "epoch": 0.11637948108536343, "grad_norm": 2.23278768736079, "learning_rate": 1.9611390031982595e-05, "loss": 0.525, "step": 1469 }, { "epoch": 0.11645870469399881, "grad_norm": 1.9076000265786033, "learning_rate": 1.9610681324322068e-05, "loss": 0.3774, "step": 1470 }, { "epoch": 0.11653792830263418, "grad_norm": 2.2776735570874536, "learning_rate": 1.9609971983844412e-05, "loss": 0.5078, "step": 1471 }, { "epoch": 0.11661715191126956, "grad_norm": 1.8411926260518676, "learning_rate": 1.9609262010596346e-05, "loss": 0.3922, "step": 1472 }, { "epoch": 0.11669637551990493, "grad_norm": 1.9701215709698423, "learning_rate": 1.9608551404624613e-05, "loss": 0.5038, "step": 1473 }, { "epoch": 0.11677559912854031, "grad_norm": 1.9612736617063422, "learning_rate": 1.9607840165976003e-05, "loss": 0.4469, "step": 1474 }, { "epoch": 0.11685482273717568, "grad_norm": 2.2336520853868955, "learning_rate": 1.960712829469735e-05, "loss": 0.5173, "step": 1475 }, { "epoch": 0.11693404634581105, "grad_norm": 1.9833072142438084, "learning_rate": 1.9606415790835523e-05, "loss": 0.4824, "step": 1476 }, { "epoch": 0.11701326995444643, "grad_norm": 1.9177108554461322, "learning_rate": 1.9605702654437438e-05, "loss": 0.3869, "step": 1477 }, { "epoch": 0.1170924935630818, "grad_norm": 1.981158209605318, "learning_rate": 1.9604988885550056e-05, "loss": 0.4094, "step": 1478 }, { "epoch": 0.11717171717171718, "grad_norm": 2.0259390748646298, "learning_rate": 1.960427448422037e-05, "loss": 0.5093, "step": 1479 }, { "epoch": 0.11725094078035254, "grad_norm": 2.0448675329673716, "learning_rate": 1.9603559450495423e-05, "loss": 0.4534, "step": 1480 }, { "epoch": 0.11733016438898791, "grad_norm": 1.8984610544193488, "learning_rate": 1.9602843784422297e-05, "loss": 0.3554, "step": 1481 }, { "epoch": 0.1174093879976233, "grad_norm": 1.9395398700099775, "learning_rate": 1.9602127486048112e-05, "loss": 0.4022, "step": 1482 }, { "epoch": 0.11748861160625866, "grad_norm": 2.0163720396854505, "learning_rate": 1.9601410555420035e-05, "loss": 0.3974, "step": 1483 }, { "epoch": 0.11756783521489404, "grad_norm": 2.05054203776624, "learning_rate": 1.9600692992585275e-05, "loss": 0.4397, "step": 1484 }, { "epoch": 0.11764705882352941, "grad_norm": 2.214079054262502, "learning_rate": 1.959997479759107e-05, "loss": 0.4751, "step": 1485 }, { "epoch": 0.11772628243216479, "grad_norm": 2.2950353059232116, "learning_rate": 1.959925597048472e-05, "loss": 0.5284, "step": 1486 }, { "epoch": 0.11780550604080016, "grad_norm": 1.9920905764637256, "learning_rate": 1.9598536511313553e-05, "loss": 0.4613, "step": 1487 }, { "epoch": 0.11788472964943553, "grad_norm": 2.1118488569468488, "learning_rate": 1.9597816420124945e-05, "loss": 0.5998, "step": 1488 }, { "epoch": 0.11796395325807091, "grad_norm": 2.2433354406277664, "learning_rate": 1.95970956969663e-05, "loss": 0.5527, "step": 1489 }, { "epoch": 0.11804317686670628, "grad_norm": 1.8367627795571106, "learning_rate": 1.9596374341885093e-05, "loss": 0.5335, "step": 1490 }, { "epoch": 0.11812240047534166, "grad_norm": 1.7407102159496246, "learning_rate": 1.95956523549288e-05, "loss": 0.4402, "step": 1491 }, { "epoch": 0.11820162408397702, "grad_norm": 2.112722245176321, "learning_rate": 1.9594929736144978e-05, "loss": 0.4479, "step": 1492 }, { "epoch": 0.11828084769261239, "grad_norm": 2.462689716995226, "learning_rate": 1.9594206485581196e-05, "loss": 0.557, "step": 1493 }, { "epoch": 0.11836007130124777, "grad_norm": 2.056292590324996, "learning_rate": 1.959348260328508e-05, "loss": 0.4044, "step": 1494 }, { "epoch": 0.11843929490988314, "grad_norm": 2.172687419632236, "learning_rate": 1.95927580893043e-05, "loss": 0.6337, "step": 1495 }, { "epoch": 0.11851851851851852, "grad_norm": 2.114823681771656, "learning_rate": 1.9592032943686554e-05, "loss": 0.5385, "step": 1496 }, { "epoch": 0.11859774212715389, "grad_norm": 2.3100892535716655, "learning_rate": 1.9591307166479595e-05, "loss": 0.4535, "step": 1497 }, { "epoch": 0.11867696573578927, "grad_norm": 2.349652632487777, "learning_rate": 1.959058075773121e-05, "loss": 0.5193, "step": 1498 }, { "epoch": 0.11875618934442464, "grad_norm": 1.8137312542239925, "learning_rate": 1.9589853717489228e-05, "loss": 0.4829, "step": 1499 }, { "epoch": 0.11883541295306001, "grad_norm": 1.9286817746349163, "learning_rate": 1.958912604580152e-05, "loss": 0.4112, "step": 1500 }, { "epoch": 0.11891463656169539, "grad_norm": 1.8759861059939869, "learning_rate": 1.9588397742716004e-05, "loss": 0.5042, "step": 1501 }, { "epoch": 0.11899386017033076, "grad_norm": 2.023392474184696, "learning_rate": 1.9587668808280632e-05, "loss": 0.5244, "step": 1502 }, { "epoch": 0.11907308377896614, "grad_norm": 2.338711609491881, "learning_rate": 1.9586939242543402e-05, "loss": 0.4811, "step": 1503 }, { "epoch": 0.1191523073876015, "grad_norm": 2.2632293860196713, "learning_rate": 1.9586209045552355e-05, "loss": 0.48, "step": 1504 }, { "epoch": 0.11923153099623687, "grad_norm": 2.0604617678476598, "learning_rate": 1.9585478217355563e-05, "loss": 0.4963, "step": 1505 }, { "epoch": 0.11931075460487225, "grad_norm": 2.2328978445179337, "learning_rate": 1.9584746758001156e-05, "loss": 0.5735, "step": 1506 }, { "epoch": 0.11938997821350762, "grad_norm": 2.1402047021513386, "learning_rate": 1.9584014667537293e-05, "loss": 0.5495, "step": 1507 }, { "epoch": 0.119469201822143, "grad_norm": 2.275821810381484, "learning_rate": 1.9583281946012183e-05, "loss": 0.5686, "step": 1508 }, { "epoch": 0.11954842543077837, "grad_norm": 2.205633271036908, "learning_rate": 1.9582548593474064e-05, "loss": 0.597, "step": 1509 }, { "epoch": 0.11962764903941374, "grad_norm": 2.032498172198259, "learning_rate": 1.9581814609971232e-05, "loss": 0.3864, "step": 1510 }, { "epoch": 0.11970687264804912, "grad_norm": 2.1809595038271077, "learning_rate": 1.958107999555201e-05, "loss": 0.4366, "step": 1511 }, { "epoch": 0.11978609625668449, "grad_norm": 2.0966040877975693, "learning_rate": 1.958034475026477e-05, "loss": 0.4563, "step": 1512 }, { "epoch": 0.11986531986531987, "grad_norm": 2.0053545068303653, "learning_rate": 1.957960887415793e-05, "loss": 0.4182, "step": 1513 }, { "epoch": 0.11994454347395524, "grad_norm": 1.967509465069574, "learning_rate": 1.9578872367279937e-05, "loss": 0.4695, "step": 1514 }, { "epoch": 0.12002376708259062, "grad_norm": 2.31215641518515, "learning_rate": 1.957813522967929e-05, "loss": 0.4742, "step": 1515 }, { "epoch": 0.12010299069122599, "grad_norm": 1.6709110781226622, "learning_rate": 1.9577397461404527e-05, "loss": 0.3521, "step": 1516 }, { "epoch": 0.12018221429986135, "grad_norm": 2.45691631192335, "learning_rate": 1.957665906250422e-05, "loss": 0.6144, "step": 1517 }, { "epoch": 0.12026143790849673, "grad_norm": 1.7185842208518693, "learning_rate": 1.9575920033027002e-05, "loss": 0.4392, "step": 1518 }, { "epoch": 0.1203406615171321, "grad_norm": 2.1658119142884322, "learning_rate": 1.9575180373021516e-05, "loss": 0.4573, "step": 1519 }, { "epoch": 0.12041988512576748, "grad_norm": 2.01196096927822, "learning_rate": 1.9574440082536482e-05, "loss": 0.419, "step": 1520 }, { "epoch": 0.12049910873440285, "grad_norm": 2.3063711390334407, "learning_rate": 1.9573699161620635e-05, "loss": 0.575, "step": 1521 }, { "epoch": 0.12057833234303822, "grad_norm": 1.8541806380421149, "learning_rate": 1.9572957610322766e-05, "loss": 0.3356, "step": 1522 }, { "epoch": 0.1206575559516736, "grad_norm": 2.224184920772965, "learning_rate": 1.95722154286917e-05, "loss": 0.4105, "step": 1523 }, { "epoch": 0.12073677956030897, "grad_norm": 2.1852759463791425, "learning_rate": 1.9571472616776304e-05, "loss": 0.4712, "step": 1524 }, { "epoch": 0.12081600316894435, "grad_norm": 2.2581373778888603, "learning_rate": 1.9570729174625493e-05, "loss": 0.4556, "step": 1525 }, { "epoch": 0.12089522677757972, "grad_norm": 1.9576966899336323, "learning_rate": 1.956998510228822e-05, "loss": 0.4633, "step": 1526 }, { "epoch": 0.1209744503862151, "grad_norm": 1.8843490040594182, "learning_rate": 1.956924039981347e-05, "loss": 0.5199, "step": 1527 }, { "epoch": 0.12105367399485047, "grad_norm": 1.9876327531780724, "learning_rate": 1.956849506725029e-05, "loss": 0.4823, "step": 1528 }, { "epoch": 0.12113289760348583, "grad_norm": 2.1313006638182825, "learning_rate": 1.9567749104647746e-05, "loss": 0.543, "step": 1529 }, { "epoch": 0.12121212121212122, "grad_norm": 1.9858746753460998, "learning_rate": 1.9567002512054964e-05, "loss": 0.4229, "step": 1530 }, { "epoch": 0.12129134482075658, "grad_norm": 2.048676644917086, "learning_rate": 1.9566255289521096e-05, "loss": 0.5403, "step": 1531 }, { "epoch": 0.12137056842939196, "grad_norm": 1.862883832172104, "learning_rate": 1.956550743709535e-05, "loss": 0.3393, "step": 1532 }, { "epoch": 0.12144979203802733, "grad_norm": 2.284319341639327, "learning_rate": 1.9564758954826964e-05, "loss": 0.4484, "step": 1533 }, { "epoch": 0.1215290156466627, "grad_norm": 2.1742344707512498, "learning_rate": 1.9564009842765225e-05, "loss": 0.4469, "step": 1534 }, { "epoch": 0.12160823925529808, "grad_norm": 2.417652281543239, "learning_rate": 1.956326010095946e-05, "loss": 0.552, "step": 1535 }, { "epoch": 0.12168746286393345, "grad_norm": 2.0660780166131603, "learning_rate": 1.9562509729459024e-05, "loss": 0.4635, "step": 1536 }, { "epoch": 0.12176668647256883, "grad_norm": 2.256056768108262, "learning_rate": 1.956175872831334e-05, "loss": 0.6252, "step": 1537 }, { "epoch": 0.1218459100812042, "grad_norm": 2.1447276940724276, "learning_rate": 1.9561007097571853e-05, "loss": 0.4759, "step": 1538 }, { "epoch": 0.12192513368983957, "grad_norm": 2.0856712082665125, "learning_rate": 1.9560254837284053e-05, "loss": 0.3735, "step": 1539 }, { "epoch": 0.12200435729847495, "grad_norm": 2.0140312749191587, "learning_rate": 1.955950194749947e-05, "loss": 0.5297, "step": 1540 }, { "epoch": 0.12208358090711031, "grad_norm": 2.1950721780582634, "learning_rate": 1.9558748428267682e-05, "loss": 0.4727, "step": 1541 }, { "epoch": 0.1221628045157457, "grad_norm": 2.107878681326026, "learning_rate": 1.9557994279638307e-05, "loss": 0.4633, "step": 1542 }, { "epoch": 0.12224202812438106, "grad_norm": 2.129355868492411, "learning_rate": 1.9557239501660995e-05, "loss": 0.4734, "step": 1543 }, { "epoch": 0.12232125173301645, "grad_norm": 2.183122441993006, "learning_rate": 1.955648409438545e-05, "loss": 0.4621, "step": 1544 }, { "epoch": 0.12240047534165181, "grad_norm": 2.469968994570861, "learning_rate": 1.955572805786141e-05, "loss": 0.5069, "step": 1545 }, { "epoch": 0.12247969895028718, "grad_norm": 2.0663506941577903, "learning_rate": 1.9554971392138655e-05, "loss": 0.4811, "step": 1546 }, { "epoch": 0.12255892255892256, "grad_norm": 2.040254687777327, "learning_rate": 1.955421409726701e-05, "loss": 0.5074, "step": 1547 }, { "epoch": 0.12263814616755793, "grad_norm": 2.0151917059884146, "learning_rate": 1.9553456173296342e-05, "loss": 0.5792, "step": 1548 }, { "epoch": 0.12271736977619331, "grad_norm": 2.342460576824375, "learning_rate": 1.9552697620276547e-05, "loss": 0.55, "step": 1549 }, { "epoch": 0.12279659338482868, "grad_norm": 1.9577816400953072, "learning_rate": 1.9551938438257583e-05, "loss": 0.33, "step": 1550 }, { "epoch": 0.12287581699346405, "grad_norm": 2.2596378182288372, "learning_rate": 1.9551178627289436e-05, "loss": 0.4823, "step": 1551 }, { "epoch": 0.12295504060209943, "grad_norm": 1.9260888550406368, "learning_rate": 1.9550418187422127e-05, "loss": 0.3066, "step": 1552 }, { "epoch": 0.1230342642107348, "grad_norm": 1.9401354769111385, "learning_rate": 1.954965711870574e-05, "loss": 0.4799, "step": 1553 }, { "epoch": 0.12311348781937018, "grad_norm": 2.435901679159972, "learning_rate": 1.954889542119038e-05, "loss": 0.5224, "step": 1554 }, { "epoch": 0.12319271142800554, "grad_norm": 2.109284057685913, "learning_rate": 1.9548133094926203e-05, "loss": 0.4227, "step": 1555 }, { "epoch": 0.12327193503664093, "grad_norm": 2.264513968471351, "learning_rate": 1.9547370139963406e-05, "loss": 0.5184, "step": 1556 }, { "epoch": 0.1233511586452763, "grad_norm": 1.9963833903416612, "learning_rate": 1.954660655635222e-05, "loss": 0.493, "step": 1557 }, { "epoch": 0.12343038225391166, "grad_norm": 2.3394609325354683, "learning_rate": 1.954584234414293e-05, "loss": 0.5902, "step": 1558 }, { "epoch": 0.12350960586254704, "grad_norm": 2.1173850208042855, "learning_rate": 1.954507750338585e-05, "loss": 0.4574, "step": 1559 }, { "epoch": 0.12358882947118241, "grad_norm": 2.784984556302951, "learning_rate": 1.954431203413135e-05, "loss": 0.5032, "step": 1560 }, { "epoch": 0.12366805307981779, "grad_norm": 2.4832464418214664, "learning_rate": 1.9543545936429824e-05, "loss": 0.5774, "step": 1561 }, { "epoch": 0.12374727668845316, "grad_norm": 2.249905926437713, "learning_rate": 1.954277921033172e-05, "loss": 0.6038, "step": 1562 }, { "epoch": 0.12382650029708853, "grad_norm": 2.270271926697608, "learning_rate": 1.954201185588752e-05, "loss": 0.5766, "step": 1563 }, { "epoch": 0.12390572390572391, "grad_norm": 2.1328028405772845, "learning_rate": 1.9541243873147752e-05, "loss": 0.426, "step": 1564 }, { "epoch": 0.12398494751435928, "grad_norm": 2.5046424985676885, "learning_rate": 1.9540475262162988e-05, "loss": 0.6653, "step": 1565 }, { "epoch": 0.12406417112299466, "grad_norm": 2.1343082635875197, "learning_rate": 1.9539706022983827e-05, "loss": 0.4462, "step": 1566 }, { "epoch": 0.12414339473163002, "grad_norm": 2.2156024292847767, "learning_rate": 1.9538936155660934e-05, "loss": 0.5407, "step": 1567 }, { "epoch": 0.12422261834026539, "grad_norm": 2.579028877772703, "learning_rate": 1.953816566024499e-05, "loss": 0.6548, "step": 1568 }, { "epoch": 0.12430184194890077, "grad_norm": 2.0779856118130016, "learning_rate": 1.9537394536786734e-05, "loss": 0.4593, "step": 1569 }, { "epoch": 0.12438106555753614, "grad_norm": 2.31625464332722, "learning_rate": 1.9536622785336936e-05, "loss": 0.5234, "step": 1570 }, { "epoch": 0.12446028916617152, "grad_norm": 2.0622730352018035, "learning_rate": 1.953585040594642e-05, "loss": 0.3372, "step": 1571 }, { "epoch": 0.12453951277480689, "grad_norm": 2.4841316876648607, "learning_rate": 1.9535077398666034e-05, "loss": 0.4423, "step": 1572 }, { "epoch": 0.12461873638344227, "grad_norm": 1.9787853395709356, "learning_rate": 1.953430376354668e-05, "loss": 0.3854, "step": 1573 }, { "epoch": 0.12469795999207764, "grad_norm": 2.525922564771538, "learning_rate": 1.9533529500639302e-05, "loss": 0.5425, "step": 1574 }, { "epoch": 0.12477718360071301, "grad_norm": 1.93523597768252, "learning_rate": 1.9532754609994878e-05, "loss": 0.3317, "step": 1575 }, { "epoch": 0.12485640720934839, "grad_norm": 2.0219349869300443, "learning_rate": 1.953197909166443e-05, "loss": 0.4876, "step": 1576 }, { "epoch": 0.12493563081798376, "grad_norm": 1.78265919950567, "learning_rate": 1.9531202945699027e-05, "loss": 0.3151, "step": 1577 }, { "epoch": 0.12501485442661914, "grad_norm": 2.5949093041009017, "learning_rate": 1.953042617214977e-05, "loss": 0.4207, "step": 1578 }, { "epoch": 0.12509407803525452, "grad_norm": 2.3174313296879925, "learning_rate": 1.9529648771067805e-05, "loss": 0.4594, "step": 1579 }, { "epoch": 0.12517330164388987, "grad_norm": 2.1455311092567535, "learning_rate": 1.9528870742504328e-05, "loss": 0.447, "step": 1580 }, { "epoch": 0.12525252525252525, "grad_norm": 2.1268622439915683, "learning_rate": 1.9528092086510556e-05, "loss": 0.5086, "step": 1581 }, { "epoch": 0.12533174886116064, "grad_norm": 2.2348117985011973, "learning_rate": 1.9527312803137767e-05, "loss": 0.4691, "step": 1582 }, { "epoch": 0.125410972469796, "grad_norm": 2.6368223200522363, "learning_rate": 1.9526532892437275e-05, "loss": 0.6259, "step": 1583 }, { "epoch": 0.12549019607843137, "grad_norm": 2.284528147692858, "learning_rate": 1.9525752354460433e-05, "loss": 0.5743, "step": 1584 }, { "epoch": 0.12556941968706675, "grad_norm": 2.2208798173703976, "learning_rate": 1.9524971189258627e-05, "loss": 0.5342, "step": 1585 }, { "epoch": 0.1256486432957021, "grad_norm": 2.2671382435804603, "learning_rate": 1.9524189396883307e-05, "loss": 0.4803, "step": 1586 }, { "epoch": 0.1257278669043375, "grad_norm": 1.9844528087045685, "learning_rate": 1.9523406977385937e-05, "loss": 0.5166, "step": 1587 }, { "epoch": 0.12580709051297287, "grad_norm": 1.7632181098371553, "learning_rate": 1.9522623930818043e-05, "loss": 0.4377, "step": 1588 }, { "epoch": 0.12588631412160825, "grad_norm": 2.283692140176902, "learning_rate": 1.9521840257231183e-05, "loss": 0.4947, "step": 1589 }, { "epoch": 0.1259655377302436, "grad_norm": 2.3164307111854368, "learning_rate": 1.9521055956676956e-05, "loss": 0.4708, "step": 1590 }, { "epoch": 0.12604476133887899, "grad_norm": 1.7116145701711083, "learning_rate": 1.9520271029207008e-05, "loss": 0.4086, "step": 1591 }, { "epoch": 0.12612398494751437, "grad_norm": 1.9182914083948066, "learning_rate": 1.9519485474873027e-05, "loss": 0.4887, "step": 1592 }, { "epoch": 0.12620320855614972, "grad_norm": 2.023898036529861, "learning_rate": 1.9518699293726727e-05, "loss": 0.4496, "step": 1593 }, { "epoch": 0.1262824321647851, "grad_norm": 1.7625947344225925, "learning_rate": 1.9517912485819878e-05, "loss": 0.3721, "step": 1594 }, { "epoch": 0.12636165577342048, "grad_norm": 2.208037197461247, "learning_rate": 1.9517125051204292e-05, "loss": 0.4905, "step": 1595 }, { "epoch": 0.12644087938205587, "grad_norm": 2.2507473829483047, "learning_rate": 1.9516336989931813e-05, "loss": 0.5883, "step": 1596 }, { "epoch": 0.12652010299069122, "grad_norm": 2.0932088228907055, "learning_rate": 1.9515548302054335e-05, "loss": 0.5015, "step": 1597 }, { "epoch": 0.1265993265993266, "grad_norm": 1.787315191367274, "learning_rate": 1.9514758987623784e-05, "loss": 0.3419, "step": 1598 }, { "epoch": 0.12667855020796198, "grad_norm": 1.9059191388049546, "learning_rate": 1.9513969046692137e-05, "loss": 0.4962, "step": 1599 }, { "epoch": 0.12675777381659734, "grad_norm": 2.0120350854369553, "learning_rate": 1.951317847931141e-05, "loss": 0.4746, "step": 1600 }, { "epoch": 0.12683699742523272, "grad_norm": 2.002509790584961, "learning_rate": 1.9512387285533655e-05, "loss": 0.439, "step": 1601 }, { "epoch": 0.1269162210338681, "grad_norm": 2.334436469329716, "learning_rate": 1.951159546541096e-05, "loss": 0.6096, "step": 1602 }, { "epoch": 0.12699544464250345, "grad_norm": 1.859265258937115, "learning_rate": 1.9510803018995477e-05, "loss": 0.377, "step": 1603 }, { "epoch": 0.12707466825113883, "grad_norm": 1.900511346149793, "learning_rate": 1.9510009946339377e-05, "loss": 0.5022, "step": 1604 }, { "epoch": 0.12715389185977422, "grad_norm": 2.050205491252545, "learning_rate": 1.9509216247494882e-05, "loss": 0.4027, "step": 1605 }, { "epoch": 0.1272331154684096, "grad_norm": 2.197658630941255, "learning_rate": 1.950842192251425e-05, "loss": 0.5213, "step": 1606 }, { "epoch": 0.12731233907704495, "grad_norm": 1.832814046622772, "learning_rate": 1.950762697144979e-05, "loss": 0.381, "step": 1607 }, { "epoch": 0.12739156268568033, "grad_norm": 1.897873626707906, "learning_rate": 1.950683139435384e-05, "loss": 0.3199, "step": 1608 }, { "epoch": 0.1274707862943157, "grad_norm": 2.3630339918533414, "learning_rate": 1.9506035191278784e-05, "loss": 0.561, "step": 1609 }, { "epoch": 0.12755000990295107, "grad_norm": 2.056307252757091, "learning_rate": 1.9505238362277054e-05, "loss": 0.4708, "step": 1610 }, { "epoch": 0.12762923351158645, "grad_norm": 1.8252485722983463, "learning_rate": 1.9504440907401113e-05, "loss": 0.3927, "step": 1611 }, { "epoch": 0.12770845712022183, "grad_norm": 1.9059585629214528, "learning_rate": 1.9503642826703468e-05, "loss": 0.395, "step": 1612 }, { "epoch": 0.1277876807288572, "grad_norm": 1.9259285645711433, "learning_rate": 1.950284412023668e-05, "loss": 0.4115, "step": 1613 }, { "epoch": 0.12786690433749257, "grad_norm": 2.3218748090406005, "learning_rate": 1.9502044788053322e-05, "loss": 0.5312, "step": 1614 }, { "epoch": 0.12794612794612795, "grad_norm": 1.8636654565152415, "learning_rate": 1.9501244830206037e-05, "loss": 0.4334, "step": 1615 }, { "epoch": 0.12802535155476333, "grad_norm": 2.2423125778198454, "learning_rate": 1.9500444246747502e-05, "loss": 0.4985, "step": 1616 }, { "epoch": 0.12810457516339868, "grad_norm": 2.2631406096595867, "learning_rate": 1.9499643037730422e-05, "loss": 0.4807, "step": 1617 }, { "epoch": 0.12818379877203406, "grad_norm": 2.009836839593306, "learning_rate": 1.949884120320756e-05, "loss": 0.4357, "step": 1618 }, { "epoch": 0.12826302238066944, "grad_norm": 2.0614390156081823, "learning_rate": 1.949803874323171e-05, "loss": 0.4016, "step": 1619 }, { "epoch": 0.12834224598930483, "grad_norm": 2.4047935362273134, "learning_rate": 1.949723565785571e-05, "loss": 0.5006, "step": 1620 }, { "epoch": 0.12842146959794018, "grad_norm": 2.340608266658904, "learning_rate": 1.9496431947132438e-05, "loss": 0.5462, "step": 1621 }, { "epoch": 0.12850069320657556, "grad_norm": 1.902774466757955, "learning_rate": 1.9495627611114817e-05, "loss": 0.3877, "step": 1622 }, { "epoch": 0.12857991681521094, "grad_norm": 2.6526359364203653, "learning_rate": 1.949482264985581e-05, "loss": 0.5816, "step": 1623 }, { "epoch": 0.1286591404238463, "grad_norm": 2.224691379426264, "learning_rate": 1.9494017063408415e-05, "loss": 0.5562, "step": 1624 }, { "epoch": 0.12873836403248168, "grad_norm": 2.252047688919355, "learning_rate": 1.9493210851825682e-05, "loss": 0.4172, "step": 1625 }, { "epoch": 0.12881758764111706, "grad_norm": 2.020222266558436, "learning_rate": 1.949240401516069e-05, "loss": 0.4992, "step": 1626 }, { "epoch": 0.1288968112497524, "grad_norm": 2.087228576009431, "learning_rate": 1.9491596553466568e-05, "loss": 0.383, "step": 1627 }, { "epoch": 0.1289760348583878, "grad_norm": 1.9397511634762696, "learning_rate": 1.9490788466796483e-05, "loss": 0.4283, "step": 1628 }, { "epoch": 0.12905525846702318, "grad_norm": 2.134617262902257, "learning_rate": 1.9489979755203646e-05, "loss": 0.3847, "step": 1629 }, { "epoch": 0.12913448207565856, "grad_norm": 2.2783258272898244, "learning_rate": 1.9489170418741306e-05, "loss": 0.5288, "step": 1630 }, { "epoch": 0.1292137056842939, "grad_norm": 2.627729679675159, "learning_rate": 1.948836045746275e-05, "loss": 0.4422, "step": 1631 }, { "epoch": 0.1292929292929293, "grad_norm": 2.0808924413119847, "learning_rate": 1.9487549871421316e-05, "loss": 0.5224, "step": 1632 }, { "epoch": 0.12937215290156467, "grad_norm": 2.734266585355052, "learning_rate": 1.9486738660670373e-05, "loss": 0.5707, "step": 1633 }, { "epoch": 0.12945137651020003, "grad_norm": 1.9749476541489894, "learning_rate": 1.9485926825263334e-05, "loss": 0.4319, "step": 1634 }, { "epoch": 0.1295306001188354, "grad_norm": 2.1730236224927646, "learning_rate": 1.948511436525366e-05, "loss": 0.5097, "step": 1635 }, { "epoch": 0.1296098237274708, "grad_norm": 1.9952733290539528, "learning_rate": 1.9484301280694845e-05, "loss": 0.3531, "step": 1636 }, { "epoch": 0.12968904733610617, "grad_norm": 2.142154481154024, "learning_rate": 1.9483487571640424e-05, "loss": 0.5847, "step": 1637 }, { "epoch": 0.12976827094474153, "grad_norm": 2.098423162700416, "learning_rate": 1.948267323814398e-05, "loss": 0.3985, "step": 1638 }, { "epoch": 0.1298474945533769, "grad_norm": 1.8687273876495234, "learning_rate": 1.948185828025913e-05, "loss": 0.3977, "step": 1639 }, { "epoch": 0.1299267181620123, "grad_norm": 1.966885840639226, "learning_rate": 1.9481042698039534e-05, "loss": 0.4246, "step": 1640 }, { "epoch": 0.13000594177064764, "grad_norm": 1.772690548251816, "learning_rate": 1.94802264915389e-05, "loss": 0.3666, "step": 1641 }, { "epoch": 0.13008516537928302, "grad_norm": 2.284571102529604, "learning_rate": 1.9479409660810965e-05, "loss": 0.4862, "step": 1642 }, { "epoch": 0.1301643889879184, "grad_norm": 1.79577134966779, "learning_rate": 1.9478592205909517e-05, "loss": 0.4195, "step": 1643 }, { "epoch": 0.13024361259655376, "grad_norm": 1.9873410139883068, "learning_rate": 1.947777412688838e-05, "loss": 0.4572, "step": 1644 }, { "epoch": 0.13032283620518914, "grad_norm": 2.0112424630111896, "learning_rate": 1.947695542380142e-05, "loss": 0.4602, "step": 1645 }, { "epoch": 0.13040205981382452, "grad_norm": 2.2439224519208976, "learning_rate": 1.9476136096702546e-05, "loss": 0.4055, "step": 1646 }, { "epoch": 0.1304812834224599, "grad_norm": 2.08499264584321, "learning_rate": 1.9475316145645706e-05, "loss": 0.4869, "step": 1647 }, { "epoch": 0.13056050703109526, "grad_norm": 1.866420939233387, "learning_rate": 1.947449557068489e-05, "loss": 0.398, "step": 1648 }, { "epoch": 0.13063973063973064, "grad_norm": 1.9962876757412493, "learning_rate": 1.947367437187413e-05, "loss": 0.5062, "step": 1649 }, { "epoch": 0.13071895424836602, "grad_norm": 2.4789946542886434, "learning_rate": 1.9472852549267496e-05, "loss": 0.5333, "step": 1650 }, { "epoch": 0.13079817785700137, "grad_norm": 2.3722255312534912, "learning_rate": 1.9472030102919102e-05, "loss": 0.5509, "step": 1651 }, { "epoch": 0.13087740146563676, "grad_norm": 1.785553283135544, "learning_rate": 1.9471207032883103e-05, "loss": 0.4859, "step": 1652 }, { "epoch": 0.13095662507427214, "grad_norm": 1.7755155125517021, "learning_rate": 1.9470383339213693e-05, "loss": 0.5293, "step": 1653 }, { "epoch": 0.13103584868290752, "grad_norm": 1.9955128080505007, "learning_rate": 1.946955902196511e-05, "loss": 0.4461, "step": 1654 }, { "epoch": 0.13111507229154287, "grad_norm": 1.8357218407037454, "learning_rate": 1.9468734081191627e-05, "loss": 0.4582, "step": 1655 }, { "epoch": 0.13119429590017825, "grad_norm": 2.1395018904148255, "learning_rate": 1.9467908516947568e-05, "loss": 0.5928, "step": 1656 }, { "epoch": 0.13127351950881364, "grad_norm": 2.3972933487425863, "learning_rate": 1.946708232928729e-05, "loss": 0.529, "step": 1657 }, { "epoch": 0.131352743117449, "grad_norm": 1.9151294624797697, "learning_rate": 1.9466255518265193e-05, "loss": 0.4837, "step": 1658 }, { "epoch": 0.13143196672608437, "grad_norm": 2.449250233769008, "learning_rate": 1.946542808393572e-05, "loss": 0.5345, "step": 1659 }, { "epoch": 0.13151119033471975, "grad_norm": 1.8208164851237159, "learning_rate": 1.946460002635335e-05, "loss": 0.4359, "step": 1660 }, { "epoch": 0.13159041394335513, "grad_norm": 1.8595550911542422, "learning_rate": 1.946377134557261e-05, "loss": 0.5135, "step": 1661 }, { "epoch": 0.1316696375519905, "grad_norm": 1.8927095736270318, "learning_rate": 1.9462942041648062e-05, "loss": 0.3981, "step": 1662 }, { "epoch": 0.13174886116062587, "grad_norm": 1.9692179918122763, "learning_rate": 1.9462112114634316e-05, "loss": 0.4599, "step": 1663 }, { "epoch": 0.13182808476926125, "grad_norm": 1.8405268038449931, "learning_rate": 1.9461281564586014e-05, "loss": 0.3978, "step": 1664 }, { "epoch": 0.1319073083778966, "grad_norm": 1.8747009013460822, "learning_rate": 1.9460450391557847e-05, "loss": 0.4791, "step": 1665 }, { "epoch": 0.13198653198653199, "grad_norm": 1.8437471781988803, "learning_rate": 1.945961859560454e-05, "loss": 0.4044, "step": 1666 }, { "epoch": 0.13206575559516737, "grad_norm": 2.343887922901139, "learning_rate": 1.9458786176780868e-05, "loss": 0.479, "step": 1667 }, { "epoch": 0.13214497920380272, "grad_norm": 2.1212067978883047, "learning_rate": 1.945795313514164e-05, "loss": 0.4229, "step": 1668 }, { "epoch": 0.1322242028124381, "grad_norm": 2.1224087212384464, "learning_rate": 1.9457119470741707e-05, "loss": 0.5046, "step": 1669 }, { "epoch": 0.13230342642107348, "grad_norm": 2.2257522080975014, "learning_rate": 1.9456285183635958e-05, "loss": 0.5205, "step": 1670 }, { "epoch": 0.13238265002970886, "grad_norm": 2.1916722752343043, "learning_rate": 1.9455450273879332e-05, "loss": 0.378, "step": 1671 }, { "epoch": 0.13246187363834422, "grad_norm": 2.253134935717543, "learning_rate": 1.94546147415268e-05, "loss": 0.4761, "step": 1672 }, { "epoch": 0.1325410972469796, "grad_norm": 1.9484350906708843, "learning_rate": 1.9453778586633386e-05, "loss": 0.5103, "step": 1673 }, { "epoch": 0.13262032085561498, "grad_norm": 2.034277497863812, "learning_rate": 1.9452941809254136e-05, "loss": 0.5718, "step": 1674 }, { "epoch": 0.13269954446425034, "grad_norm": 2.1139733755876446, "learning_rate": 1.9452104409444153e-05, "loss": 0.4832, "step": 1675 }, { "epoch": 0.13277876807288572, "grad_norm": 2.345538241691021, "learning_rate": 1.9451266387258576e-05, "loss": 0.4459, "step": 1676 }, { "epoch": 0.1328579916815211, "grad_norm": 2.374230006452594, "learning_rate": 1.9450427742752583e-05, "loss": 0.4718, "step": 1677 }, { "epoch": 0.13293721529015648, "grad_norm": 1.8458156884277443, "learning_rate": 1.9449588475981394e-05, "loss": 0.4513, "step": 1678 }, { "epoch": 0.13301643889879183, "grad_norm": 1.8872002622842183, "learning_rate": 1.9448748587000277e-05, "loss": 0.4412, "step": 1679 }, { "epoch": 0.13309566250742721, "grad_norm": 2.225454113668978, "learning_rate": 1.944790807586453e-05, "loss": 0.4744, "step": 1680 }, { "epoch": 0.1331748861160626, "grad_norm": 1.4787705331846077, "learning_rate": 1.9447066942629495e-05, "loss": 0.3266, "step": 1681 }, { "epoch": 0.13325410972469795, "grad_norm": 2.104026365802812, "learning_rate": 1.9446225187350558e-05, "loss": 0.5449, "step": 1682 }, { "epoch": 0.13333333333333333, "grad_norm": 2.00456098540276, "learning_rate": 1.9445382810083143e-05, "loss": 0.4949, "step": 1683 }, { "epoch": 0.1334125569419687, "grad_norm": 1.7312061856591376, "learning_rate": 1.944453981088272e-05, "loss": 0.3797, "step": 1684 }, { "epoch": 0.13349178055060407, "grad_norm": 1.7597842704681006, "learning_rate": 1.9443696189804793e-05, "loss": 0.4461, "step": 1685 }, { "epoch": 0.13357100415923945, "grad_norm": 1.7799676338091903, "learning_rate": 1.9442851946904914e-05, "loss": 0.4498, "step": 1686 }, { "epoch": 0.13365022776787483, "grad_norm": 2.1520571804664677, "learning_rate": 1.9442007082238673e-05, "loss": 0.3885, "step": 1687 }, { "epoch": 0.1337294513765102, "grad_norm": 1.882594705224177, "learning_rate": 1.944116159586169e-05, "loss": 0.3983, "step": 1688 }, { "epoch": 0.13380867498514556, "grad_norm": 1.8917192419641224, "learning_rate": 1.944031548782965e-05, "loss": 0.4186, "step": 1689 }, { "epoch": 0.13388789859378095, "grad_norm": 1.960842937343515, "learning_rate": 1.9439468758198258e-05, "loss": 0.3934, "step": 1690 }, { "epoch": 0.13396712220241633, "grad_norm": 2.0689676580915037, "learning_rate": 1.943862140702327e-05, "loss": 0.4452, "step": 1691 }, { "epoch": 0.13404634581105168, "grad_norm": 2.0448959404311684, "learning_rate": 1.9437773434360476e-05, "loss": 0.4711, "step": 1692 }, { "epoch": 0.13412556941968706, "grad_norm": 2.2481434596155467, "learning_rate": 1.943692484026571e-05, "loss": 0.4853, "step": 1693 }, { "epoch": 0.13420479302832244, "grad_norm": 2.023476681537764, "learning_rate": 1.9436075624794853e-05, "loss": 0.4078, "step": 1694 }, { "epoch": 0.13428401663695783, "grad_norm": 2.479704968592698, "learning_rate": 1.9435225788003822e-05, "loss": 0.4518, "step": 1695 }, { "epoch": 0.13436324024559318, "grad_norm": 2.5161634152042636, "learning_rate": 1.943437532994857e-05, "loss": 0.5347, "step": 1696 }, { "epoch": 0.13444246385422856, "grad_norm": 2.4083826215696504, "learning_rate": 1.9433524250685098e-05, "loss": 0.5727, "step": 1697 }, { "epoch": 0.13452168746286394, "grad_norm": 1.7881820050007933, "learning_rate": 1.9432672550269446e-05, "loss": 0.3996, "step": 1698 }, { "epoch": 0.1346009110714993, "grad_norm": 1.7457137282820732, "learning_rate": 1.943182022875769e-05, "loss": 0.4058, "step": 1699 }, { "epoch": 0.13468013468013468, "grad_norm": 1.8356547487301496, "learning_rate": 1.9430967286205962e-05, "loss": 0.4334, "step": 1700 }, { "epoch": 0.13475935828877006, "grad_norm": 1.7984240074881184, "learning_rate": 1.9430113722670412e-05, "loss": 0.4133, "step": 1701 }, { "epoch": 0.13483858189740544, "grad_norm": 1.8746421845702204, "learning_rate": 1.942925953820725e-05, "loss": 0.4348, "step": 1702 }, { "epoch": 0.1349178055060408, "grad_norm": 1.9845443254777095, "learning_rate": 1.9428404732872716e-05, "loss": 0.3993, "step": 1703 }, { "epoch": 0.13499702911467618, "grad_norm": 2.4156276039784244, "learning_rate": 1.94275493067231e-05, "loss": 0.6108, "step": 1704 }, { "epoch": 0.13507625272331156, "grad_norm": 1.809568454283512, "learning_rate": 1.9426693259814725e-05, "loss": 0.4645, "step": 1705 }, { "epoch": 0.1351554763319469, "grad_norm": 1.8675048103801162, "learning_rate": 1.9425836592203954e-05, "loss": 0.3901, "step": 1706 }, { "epoch": 0.1352346999405823, "grad_norm": 2.32064376753328, "learning_rate": 1.94249793039472e-05, "loss": 0.5329, "step": 1707 }, { "epoch": 0.13531392354921767, "grad_norm": 1.7546194849759331, "learning_rate": 1.9424121395100907e-05, "loss": 0.4295, "step": 1708 }, { "epoch": 0.13539314715785303, "grad_norm": 2.0775656344676037, "learning_rate": 1.9423262865721567e-05, "loss": 0.444, "step": 1709 }, { "epoch": 0.1354723707664884, "grad_norm": 1.9905151480595324, "learning_rate": 1.9422403715865708e-05, "loss": 0.5129, "step": 1710 }, { "epoch": 0.1355515943751238, "grad_norm": 1.8947681729403973, "learning_rate": 1.9421543945589904e-05, "loss": 0.4244, "step": 1711 }, { "epoch": 0.13563081798375917, "grad_norm": 1.8364447732380378, "learning_rate": 1.9420683554950765e-05, "loss": 0.4196, "step": 1712 }, { "epoch": 0.13571004159239453, "grad_norm": 2.497857229275654, "learning_rate": 1.9419822544004942e-05, "loss": 0.5594, "step": 1713 }, { "epoch": 0.1357892652010299, "grad_norm": 2.0029454551093964, "learning_rate": 1.941896091280913e-05, "loss": 0.5197, "step": 1714 }, { "epoch": 0.1358684888096653, "grad_norm": 2.408637352901282, "learning_rate": 1.9418098661420064e-05, "loss": 0.4725, "step": 1715 }, { "epoch": 0.13594771241830064, "grad_norm": 2.1314088052580527, "learning_rate": 1.9417235789894517e-05, "loss": 0.6064, "step": 1716 }, { "epoch": 0.13602693602693602, "grad_norm": 2.323294655439459, "learning_rate": 1.9416372298289306e-05, "loss": 0.4667, "step": 1717 }, { "epoch": 0.1361061596355714, "grad_norm": 2.0042268492920785, "learning_rate": 1.941550818666129e-05, "loss": 0.3919, "step": 1718 }, { "epoch": 0.1361853832442068, "grad_norm": 2.405695532988341, "learning_rate": 1.941464345506736e-05, "loss": 0.5169, "step": 1719 }, { "epoch": 0.13626460685284214, "grad_norm": 1.7786793195492034, "learning_rate": 1.9413778103564462e-05, "loss": 0.5789, "step": 1720 }, { "epoch": 0.13634383046147752, "grad_norm": 1.9578453884853981, "learning_rate": 1.9412912132209573e-05, "loss": 0.4823, "step": 1721 }, { "epoch": 0.1364230540701129, "grad_norm": 2.1062554513907252, "learning_rate": 1.941204554105971e-05, "loss": 0.4481, "step": 1722 }, { "epoch": 0.13650227767874826, "grad_norm": 1.786579999483879, "learning_rate": 1.941117833017194e-05, "loss": 0.4559, "step": 1723 }, { "epoch": 0.13658150128738364, "grad_norm": 1.8123789371957015, "learning_rate": 1.9410310499603356e-05, "loss": 0.5092, "step": 1724 }, { "epoch": 0.13666072489601902, "grad_norm": 1.8015305568837592, "learning_rate": 1.9409442049411104e-05, "loss": 0.3541, "step": 1725 }, { "epoch": 0.13673994850465437, "grad_norm": 2.2860736215597877, "learning_rate": 1.9408572979652373e-05, "loss": 0.5113, "step": 1726 }, { "epoch": 0.13681917211328976, "grad_norm": 2.069428158328441, "learning_rate": 1.940770329038438e-05, "loss": 0.4118, "step": 1727 }, { "epoch": 0.13689839572192514, "grad_norm": 2.0410027061518705, "learning_rate": 1.9406832981664392e-05, "loss": 0.3555, "step": 1728 }, { "epoch": 0.13697761933056052, "grad_norm": 1.9822005259271056, "learning_rate": 1.9405962053549717e-05, "loss": 0.5067, "step": 1729 }, { "epoch": 0.13705684293919587, "grad_norm": 2.313602099290828, "learning_rate": 1.9405090506097698e-05, "loss": 0.4683, "step": 1730 }, { "epoch": 0.13713606654783125, "grad_norm": 2.209547323402575, "learning_rate": 1.9404218339365724e-05, "loss": 0.3983, "step": 1731 }, { "epoch": 0.13721529015646663, "grad_norm": 2.622615156077461, "learning_rate": 1.940334555341122e-05, "loss": 0.4484, "step": 1732 }, { "epoch": 0.137294513765102, "grad_norm": 1.981892743092382, "learning_rate": 1.940247214829166e-05, "loss": 0.393, "step": 1733 }, { "epoch": 0.13737373737373737, "grad_norm": 2.1220095542393382, "learning_rate": 1.9401598124064552e-05, "loss": 0.4899, "step": 1734 }, { "epoch": 0.13745296098237275, "grad_norm": 1.7769057895679274, "learning_rate": 1.9400723480787446e-05, "loss": 0.4501, "step": 1735 }, { "epoch": 0.13753218459100813, "grad_norm": 2.2016975897855655, "learning_rate": 1.9399848218517927e-05, "loss": 0.4238, "step": 1736 }, { "epoch": 0.1376114081996435, "grad_norm": 1.6178729306084192, "learning_rate": 1.9398972337313634e-05, "loss": 0.3724, "step": 1737 }, { "epoch": 0.13769063180827887, "grad_norm": 1.8502194092890032, "learning_rate": 1.939809583723224e-05, "loss": 0.3597, "step": 1738 }, { "epoch": 0.13776985541691425, "grad_norm": 2.158042318325267, "learning_rate": 1.9397218718331455e-05, "loss": 0.6125, "step": 1739 }, { "epoch": 0.1378490790255496, "grad_norm": 1.9061422703867463, "learning_rate": 1.939634098066903e-05, "loss": 0.5138, "step": 1740 }, { "epoch": 0.13792830263418498, "grad_norm": 1.9098883872894814, "learning_rate": 1.9395462624302768e-05, "loss": 0.4624, "step": 1741 }, { "epoch": 0.13800752624282037, "grad_norm": 1.7123011474279195, "learning_rate": 1.93945836492905e-05, "loss": 0.4588, "step": 1742 }, { "epoch": 0.13808674985145572, "grad_norm": 2.0568765939877585, "learning_rate": 1.93937040556901e-05, "loss": 0.6117, "step": 1743 }, { "epoch": 0.1381659734600911, "grad_norm": 2.861098225849574, "learning_rate": 1.939282384355949e-05, "loss": 0.5326, "step": 1744 }, { "epoch": 0.13824519706872648, "grad_norm": 1.8175794975317248, "learning_rate": 1.9391943012956623e-05, "loss": 0.4212, "step": 1745 }, { "epoch": 0.13832442067736186, "grad_norm": 1.838516328275049, "learning_rate": 1.93910615639395e-05, "loss": 0.4089, "step": 1746 }, { "epoch": 0.13840364428599722, "grad_norm": 2.1572337500925274, "learning_rate": 1.9390179496566162e-05, "loss": 0.4446, "step": 1747 }, { "epoch": 0.1384828678946326, "grad_norm": 2.418764482717121, "learning_rate": 1.938929681089469e-05, "loss": 0.5034, "step": 1748 }, { "epoch": 0.13856209150326798, "grad_norm": 2.2109820477973634, "learning_rate": 1.9388413506983196e-05, "loss": 0.5481, "step": 1749 }, { "epoch": 0.13864131511190333, "grad_norm": 2.136195707929235, "learning_rate": 1.938752958488985e-05, "loss": 0.5183, "step": 1750 }, { "epoch": 0.13872053872053872, "grad_norm": 1.9372198262370093, "learning_rate": 1.9386645044672848e-05, "loss": 0.3758, "step": 1751 }, { "epoch": 0.1387997623291741, "grad_norm": 1.968787028856834, "learning_rate": 1.9385759886390433e-05, "loss": 0.4315, "step": 1752 }, { "epoch": 0.13887898593780948, "grad_norm": 2.1157436968704757, "learning_rate": 1.9384874110100897e-05, "loss": 0.4934, "step": 1753 }, { "epoch": 0.13895820954644483, "grad_norm": 2.627711551232593, "learning_rate": 1.9383987715862554e-05, "loss": 0.4437, "step": 1754 }, { "epoch": 0.13903743315508021, "grad_norm": 2.0759798286362887, "learning_rate": 1.9383100703733774e-05, "loss": 0.5854, "step": 1755 }, { "epoch": 0.1391166567637156, "grad_norm": 2.2200247530146053, "learning_rate": 1.9382213073772962e-05, "loss": 0.5481, "step": 1756 }, { "epoch": 0.13919588037235095, "grad_norm": 2.3220177650487273, "learning_rate": 1.938132482603856e-05, "loss": 0.5872, "step": 1757 }, { "epoch": 0.13927510398098633, "grad_norm": 1.904674940657998, "learning_rate": 1.9380435960589065e-05, "loss": 0.4909, "step": 1758 }, { "epoch": 0.1393543275896217, "grad_norm": 1.8204153175975037, "learning_rate": 1.937954647748299e-05, "loss": 0.4696, "step": 1759 }, { "epoch": 0.1394335511982571, "grad_norm": 1.9541158849152975, "learning_rate": 1.9378656376778914e-05, "loss": 0.5564, "step": 1760 }, { "epoch": 0.13951277480689245, "grad_norm": 1.9878054715321214, "learning_rate": 1.9377765658535445e-05, "loss": 0.4724, "step": 1761 }, { "epoch": 0.13959199841552783, "grad_norm": 1.7890825933234422, "learning_rate": 1.937687432281123e-05, "loss": 0.2845, "step": 1762 }, { "epoch": 0.1396712220241632, "grad_norm": 1.7643246744742345, "learning_rate": 1.9375982369664958e-05, "loss": 0.4345, "step": 1763 }, { "epoch": 0.13975044563279856, "grad_norm": 2.020045225419193, "learning_rate": 1.937508979915536e-05, "loss": 0.4975, "step": 1764 }, { "epoch": 0.13982966924143395, "grad_norm": 2.467895531181798, "learning_rate": 1.9374196611341212e-05, "loss": 0.7063, "step": 1765 }, { "epoch": 0.13990889285006933, "grad_norm": 2.232716844846986, "learning_rate": 1.937330280628132e-05, "loss": 0.625, "step": 1766 }, { "epoch": 0.13998811645870468, "grad_norm": 2.0246533429342373, "learning_rate": 1.937240838403454e-05, "loss": 0.4467, "step": 1767 }, { "epoch": 0.14006734006734006, "grad_norm": 2.0035787659594453, "learning_rate": 1.9371513344659764e-05, "loss": 0.4551, "step": 1768 }, { "epoch": 0.14014656367597544, "grad_norm": 1.674694238789154, "learning_rate": 1.937061768821593e-05, "loss": 0.3754, "step": 1769 }, { "epoch": 0.14022578728461083, "grad_norm": 2.022031124885922, "learning_rate": 1.936972141476201e-05, "loss": 0.3525, "step": 1770 }, { "epoch": 0.14030501089324618, "grad_norm": 1.9498652413194093, "learning_rate": 1.936882452435702e-05, "loss": 0.4232, "step": 1771 }, { "epoch": 0.14038423450188156, "grad_norm": 1.6798047629882495, "learning_rate": 1.936792701706001e-05, "loss": 0.3754, "step": 1772 }, { "epoch": 0.14046345811051694, "grad_norm": 2.0061424050036796, "learning_rate": 1.9367028892930088e-05, "loss": 0.383, "step": 1773 }, { "epoch": 0.1405426817191523, "grad_norm": 2.390564364769597, "learning_rate": 1.9366130152026378e-05, "loss": 0.5046, "step": 1774 }, { "epoch": 0.14062190532778768, "grad_norm": 1.624583928388353, "learning_rate": 1.936523079440807e-05, "loss": 0.3932, "step": 1775 }, { "epoch": 0.14070112893642306, "grad_norm": 2.1716089822125673, "learning_rate": 1.936433082013437e-05, "loss": 0.5159, "step": 1776 }, { "epoch": 0.14078035254505844, "grad_norm": 1.7393439467030505, "learning_rate": 1.936343022926455e-05, "loss": 0.4323, "step": 1777 }, { "epoch": 0.1408595761536938, "grad_norm": 2.163720813492295, "learning_rate": 1.93625290218579e-05, "loss": 0.5656, "step": 1778 }, { "epoch": 0.14093879976232918, "grad_norm": 1.816942167119474, "learning_rate": 1.9361627197973767e-05, "loss": 0.3822, "step": 1779 }, { "epoch": 0.14101802337096456, "grad_norm": 2.0497407275673174, "learning_rate": 1.9360724757671525e-05, "loss": 0.4217, "step": 1780 }, { "epoch": 0.1410972469795999, "grad_norm": 1.5973374977032029, "learning_rate": 1.93598217010106e-05, "loss": 0.4664, "step": 1781 }, { "epoch": 0.1411764705882353, "grad_norm": 2.0599105378559264, "learning_rate": 1.9358918028050453e-05, "loss": 0.5569, "step": 1782 }, { "epoch": 0.14125569419687067, "grad_norm": 1.6677554130424028, "learning_rate": 1.9358013738850586e-05, "loss": 0.43, "step": 1783 }, { "epoch": 0.14133491780550603, "grad_norm": 2.215116502783159, "learning_rate": 1.935710883347054e-05, "loss": 0.4736, "step": 1784 }, { "epoch": 0.1414141414141414, "grad_norm": 2.270922316355081, "learning_rate": 1.9356203311969903e-05, "loss": 0.5182, "step": 1785 }, { "epoch": 0.1414933650227768, "grad_norm": 1.7845245648001509, "learning_rate": 1.9355297174408298e-05, "loss": 0.3727, "step": 1786 }, { "epoch": 0.14157258863141217, "grad_norm": 1.7965048440585847, "learning_rate": 1.9354390420845387e-05, "loss": 0.4023, "step": 1787 }, { "epoch": 0.14165181224004753, "grad_norm": 2.4450551216498755, "learning_rate": 1.9353483051340876e-05, "loss": 0.4747, "step": 1788 }, { "epoch": 0.1417310358486829, "grad_norm": 2.138546589514695, "learning_rate": 1.9352575065954515e-05, "loss": 0.6135, "step": 1789 }, { "epoch": 0.1418102594573183, "grad_norm": 2.131733890328888, "learning_rate": 1.9351666464746087e-05, "loss": 0.4813, "step": 1790 }, { "epoch": 0.14188948306595364, "grad_norm": 1.857900351570552, "learning_rate": 1.935075724777542e-05, "loss": 0.4552, "step": 1791 }, { "epoch": 0.14196870667458902, "grad_norm": 1.8538230130208877, "learning_rate": 1.9349847415102378e-05, "loss": 0.4836, "step": 1792 }, { "epoch": 0.1420479302832244, "grad_norm": 1.7066402987548663, "learning_rate": 1.9348936966786874e-05, "loss": 0.384, "step": 1793 }, { "epoch": 0.1421271538918598, "grad_norm": 2.1433453566457112, "learning_rate": 1.9348025902888858e-05, "loss": 0.5182, "step": 1794 }, { "epoch": 0.14220637750049514, "grad_norm": 2.1828203151582346, "learning_rate": 1.9347114223468316e-05, "loss": 0.4587, "step": 1795 }, { "epoch": 0.14228560110913052, "grad_norm": 2.2696694482378073, "learning_rate": 1.9346201928585273e-05, "loss": 0.6383, "step": 1796 }, { "epoch": 0.1423648247177659, "grad_norm": 1.9021557835183327, "learning_rate": 1.9345289018299807e-05, "loss": 0.3727, "step": 1797 }, { "epoch": 0.14244404832640126, "grad_norm": 1.9451941303057496, "learning_rate": 1.9344375492672024e-05, "loss": 0.4042, "step": 1798 }, { "epoch": 0.14252327193503664, "grad_norm": 1.503029071705186, "learning_rate": 1.934346135176208e-05, "loss": 0.2743, "step": 1799 }, { "epoch": 0.14260249554367202, "grad_norm": 2.0780442067088725, "learning_rate": 1.9342546595630162e-05, "loss": 0.4638, "step": 1800 }, { "epoch": 0.1426817191523074, "grad_norm": 1.9043616225453697, "learning_rate": 1.9341631224336503e-05, "loss": 0.4801, "step": 1801 }, { "epoch": 0.14276094276094276, "grad_norm": 2.0991769456846012, "learning_rate": 1.934071523794138e-05, "loss": 0.4705, "step": 1802 }, { "epoch": 0.14284016636957814, "grad_norm": 2.035425321656529, "learning_rate": 1.9339798636505102e-05, "loss": 0.3996, "step": 1803 }, { "epoch": 0.14291938997821352, "grad_norm": 2.0135250284271895, "learning_rate": 1.9338881420088023e-05, "loss": 0.478, "step": 1804 }, { "epoch": 0.14299861358684887, "grad_norm": 2.220003798769769, "learning_rate": 1.933796358875054e-05, "loss": 0.5016, "step": 1805 }, { "epoch": 0.14307783719548425, "grad_norm": 2.2951484077224986, "learning_rate": 1.9337045142553085e-05, "loss": 0.5116, "step": 1806 }, { "epoch": 0.14315706080411963, "grad_norm": 2.0687508524836313, "learning_rate": 1.9336126081556134e-05, "loss": 0.4933, "step": 1807 }, { "epoch": 0.143236284412755, "grad_norm": 2.159079806953251, "learning_rate": 1.9335206405820208e-05, "loss": 0.4002, "step": 1808 }, { "epoch": 0.14331550802139037, "grad_norm": 2.1994063427890596, "learning_rate": 1.933428611540585e-05, "loss": 0.4737, "step": 1809 }, { "epoch": 0.14339473163002575, "grad_norm": 2.307142705416731, "learning_rate": 1.9333365210373668e-05, "loss": 0.4908, "step": 1810 }, { "epoch": 0.14347395523866113, "grad_norm": 1.685965820228709, "learning_rate": 1.93324436907843e-05, "loss": 0.3631, "step": 1811 }, { "epoch": 0.1435531788472965, "grad_norm": 2.0599784451619927, "learning_rate": 1.9331521556698415e-05, "loss": 0.4934, "step": 1812 }, { "epoch": 0.14363240245593187, "grad_norm": 2.0466443169909954, "learning_rate": 1.9330598808176736e-05, "loss": 0.4656, "step": 1813 }, { "epoch": 0.14371162606456725, "grad_norm": 2.1132047205863733, "learning_rate": 1.9329675445280024e-05, "loss": 0.4509, "step": 1814 }, { "epoch": 0.1437908496732026, "grad_norm": 1.7822908149601173, "learning_rate": 1.9328751468069075e-05, "loss": 0.3163, "step": 1815 }, { "epoch": 0.14387007328183798, "grad_norm": 1.838990777874387, "learning_rate": 1.932782687660473e-05, "loss": 0.5313, "step": 1816 }, { "epoch": 0.14394929689047337, "grad_norm": 2.2461276342862044, "learning_rate": 1.9326901670947868e-05, "loss": 0.5057, "step": 1817 }, { "epoch": 0.14402852049910875, "grad_norm": 1.8609101306501556, "learning_rate": 1.9325975851159406e-05, "loss": 0.3807, "step": 1818 }, { "epoch": 0.1441077441077441, "grad_norm": 1.981105678074427, "learning_rate": 1.932504941730031e-05, "loss": 0.3796, "step": 1819 }, { "epoch": 0.14418696771637948, "grad_norm": 2.306442711644101, "learning_rate": 1.932412236943158e-05, "loss": 0.5326, "step": 1820 }, { "epoch": 0.14426619132501486, "grad_norm": 1.8029045918670155, "learning_rate": 1.9323194707614253e-05, "loss": 0.3696, "step": 1821 }, { "epoch": 0.14434541493365022, "grad_norm": 1.7897509339138913, "learning_rate": 1.932226643190942e-05, "loss": 0.4776, "step": 1822 }, { "epoch": 0.1444246385422856, "grad_norm": 2.340700481179847, "learning_rate": 1.9321337542378193e-05, "loss": 0.5961, "step": 1823 }, { "epoch": 0.14450386215092098, "grad_norm": 2.076583010875345, "learning_rate": 1.9320408039081745e-05, "loss": 0.447, "step": 1824 }, { "epoch": 0.14458308575955633, "grad_norm": 1.8023699948273684, "learning_rate": 1.9319477922081273e-05, "loss": 0.4057, "step": 1825 }, { "epoch": 0.14466230936819172, "grad_norm": 1.7624331472698438, "learning_rate": 1.9318547191438018e-05, "loss": 0.393, "step": 1826 }, { "epoch": 0.1447415329768271, "grad_norm": 2.1307732710959746, "learning_rate": 1.9317615847213274e-05, "loss": 0.4143, "step": 1827 }, { "epoch": 0.14482075658546248, "grad_norm": 2.15431151013332, "learning_rate": 1.931668388946836e-05, "loss": 0.4426, "step": 1828 }, { "epoch": 0.14489998019409783, "grad_norm": 2.167344316732097, "learning_rate": 1.9315751318264636e-05, "loss": 0.5725, "step": 1829 }, { "epoch": 0.14497920380273321, "grad_norm": 2.3659972389832844, "learning_rate": 1.9314818133663516e-05, "loss": 0.5478, "step": 1830 }, { "epoch": 0.1450584274113686, "grad_norm": 2.2549618688831155, "learning_rate": 1.9313884335726443e-05, "loss": 0.476, "step": 1831 }, { "epoch": 0.14513765102000395, "grad_norm": 1.698471366718964, "learning_rate": 1.93129499245149e-05, "loss": 0.4373, "step": 1832 }, { "epoch": 0.14521687462863933, "grad_norm": 2.1759911317876854, "learning_rate": 1.9312014900090416e-05, "loss": 0.4952, "step": 1833 }, { "epoch": 0.1452960982372747, "grad_norm": 2.2611368908528875, "learning_rate": 1.931107926251456e-05, "loss": 0.4248, "step": 1834 }, { "epoch": 0.1453753218459101, "grad_norm": 2.0360940514568315, "learning_rate": 1.931014301184893e-05, "loss": 0.4159, "step": 1835 }, { "epoch": 0.14545454545454545, "grad_norm": 2.117510818133951, "learning_rate": 1.9309206148155188e-05, "loss": 0.4959, "step": 1836 }, { "epoch": 0.14553376906318083, "grad_norm": 1.8158404215969388, "learning_rate": 1.930826867149501e-05, "loss": 0.4113, "step": 1837 }, { "epoch": 0.1456129926718162, "grad_norm": 2.209456584755991, "learning_rate": 1.9307330581930127e-05, "loss": 0.449, "step": 1838 }, { "epoch": 0.14569221628045156, "grad_norm": 2.011804955761324, "learning_rate": 1.930639187952231e-05, "loss": 0.4077, "step": 1839 }, { "epoch": 0.14577143988908695, "grad_norm": 1.9777091067568096, "learning_rate": 1.930545256433337e-05, "loss": 0.4008, "step": 1840 }, { "epoch": 0.14585066349772233, "grad_norm": 2.2823522263468665, "learning_rate": 1.930451263642515e-05, "loss": 0.4894, "step": 1841 }, { "epoch": 0.14592988710635768, "grad_norm": 1.989066747530846, "learning_rate": 1.9303572095859545e-05, "loss": 0.4688, "step": 1842 }, { "epoch": 0.14600911071499306, "grad_norm": 1.518781692342767, "learning_rate": 1.9302630942698487e-05, "loss": 0.3336, "step": 1843 }, { "epoch": 0.14608833432362844, "grad_norm": 2.1014901119358766, "learning_rate": 1.9301689177003938e-05, "loss": 0.4732, "step": 1844 }, { "epoch": 0.14616755793226383, "grad_norm": 1.9598769017842552, "learning_rate": 1.9300746798837913e-05, "loss": 0.4883, "step": 1845 }, { "epoch": 0.14624678154089918, "grad_norm": 2.3493637045983506, "learning_rate": 1.9299803808262466e-05, "loss": 0.5128, "step": 1846 }, { "epoch": 0.14632600514953456, "grad_norm": 2.2166559785155435, "learning_rate": 1.9298860205339685e-05, "loss": 0.4094, "step": 1847 }, { "epoch": 0.14640522875816994, "grad_norm": 1.999818778507222, "learning_rate": 1.9297915990131704e-05, "loss": 0.362, "step": 1848 }, { "epoch": 0.1464844523668053, "grad_norm": 2.0642269726819746, "learning_rate": 1.9296971162700696e-05, "loss": 0.4919, "step": 1849 }, { "epoch": 0.14656367597544068, "grad_norm": 1.847261442593121, "learning_rate": 1.9296025723108867e-05, "loss": 0.4321, "step": 1850 }, { "epoch": 0.14664289958407606, "grad_norm": 1.9549805277293442, "learning_rate": 1.9295079671418474e-05, "loss": 0.4691, "step": 1851 }, { "epoch": 0.14672212319271144, "grad_norm": 1.85507622284027, "learning_rate": 1.929413300769181e-05, "loss": 0.4271, "step": 1852 }, { "epoch": 0.1468013468013468, "grad_norm": 2.130972390981168, "learning_rate": 1.9293185731991212e-05, "loss": 0.534, "step": 1853 }, { "epoch": 0.14688057040998218, "grad_norm": 1.8403258636881825, "learning_rate": 1.9292237844379043e-05, "loss": 0.4804, "step": 1854 }, { "epoch": 0.14695979401861756, "grad_norm": 2.1049113638663886, "learning_rate": 1.929128934491773e-05, "loss": 0.4035, "step": 1855 }, { "epoch": 0.1470390176272529, "grad_norm": 1.8290146848525395, "learning_rate": 1.929034023366972e-05, "loss": 0.3942, "step": 1856 }, { "epoch": 0.1471182412358883, "grad_norm": 2.0293885839440127, "learning_rate": 1.92893905106975e-05, "loss": 0.4333, "step": 1857 }, { "epoch": 0.14719746484452367, "grad_norm": 2.143315362216103, "learning_rate": 1.9288440176063617e-05, "loss": 0.4627, "step": 1858 }, { "epoch": 0.14727668845315905, "grad_norm": 2.1624607141174375, "learning_rate": 1.9287489229830645e-05, "loss": 0.5045, "step": 1859 }, { "epoch": 0.1473559120617944, "grad_norm": 1.9613800936249302, "learning_rate": 1.9286537672061192e-05, "loss": 0.5062, "step": 1860 }, { "epoch": 0.1474351356704298, "grad_norm": 1.9648995675783334, "learning_rate": 1.9285585502817917e-05, "loss": 0.3821, "step": 1861 }, { "epoch": 0.14751435927906517, "grad_norm": 1.9786698165372079, "learning_rate": 1.9284632722163515e-05, "loss": 0.4164, "step": 1862 }, { "epoch": 0.14759358288770053, "grad_norm": 1.9033845131427516, "learning_rate": 1.9283679330160726e-05, "loss": 0.5049, "step": 1863 }, { "epoch": 0.1476728064963359, "grad_norm": 1.4399827623338708, "learning_rate": 1.9282725326872324e-05, "loss": 0.3297, "step": 1864 }, { "epoch": 0.1477520301049713, "grad_norm": 1.860708722176362, "learning_rate": 1.9281770712361123e-05, "loss": 0.3911, "step": 1865 }, { "epoch": 0.14783125371360664, "grad_norm": 1.8495892211555531, "learning_rate": 1.928081548668998e-05, "loss": 0.4586, "step": 1866 }, { "epoch": 0.14791047732224202, "grad_norm": 1.8420053359851232, "learning_rate": 1.9279859649921797e-05, "loss": 0.476, "step": 1867 }, { "epoch": 0.1479897009308774, "grad_norm": 1.974134326275655, "learning_rate": 1.9278903202119508e-05, "loss": 0.506, "step": 1868 }, { "epoch": 0.1480689245395128, "grad_norm": 1.8203063542485634, "learning_rate": 1.9277946143346086e-05, "loss": 0.4331, "step": 1869 }, { "epoch": 0.14814814814814814, "grad_norm": 1.9668506928461797, "learning_rate": 1.9276988473664557e-05, "loss": 0.4762, "step": 1870 }, { "epoch": 0.14822737175678352, "grad_norm": 2.1191496630185642, "learning_rate": 1.9276030193137974e-05, "loss": 0.5223, "step": 1871 }, { "epoch": 0.1483065953654189, "grad_norm": 1.7254225381192794, "learning_rate": 1.927507130182944e-05, "loss": 0.313, "step": 1872 }, { "epoch": 0.14838581897405426, "grad_norm": 1.7873173208811577, "learning_rate": 1.9274111799802084e-05, "loss": 0.3964, "step": 1873 }, { "epoch": 0.14846504258268964, "grad_norm": 2.2364493024943513, "learning_rate": 1.9273151687119093e-05, "loss": 0.4798, "step": 1874 }, { "epoch": 0.14854426619132502, "grad_norm": 2.2831714507851073, "learning_rate": 1.927219096384368e-05, "loss": 0.3919, "step": 1875 }, { "epoch": 0.1486234897999604, "grad_norm": 2.23354123887157, "learning_rate": 1.9271229630039107e-05, "loss": 0.4747, "step": 1876 }, { "epoch": 0.14870271340859575, "grad_norm": 1.603236364029779, "learning_rate": 1.9270267685768676e-05, "loss": 0.3655, "step": 1877 }, { "epoch": 0.14878193701723114, "grad_norm": 2.168529420784789, "learning_rate": 1.9269305131095722e-05, "loss": 0.3981, "step": 1878 }, { "epoch": 0.14886116062586652, "grad_norm": 2.0085573977685494, "learning_rate": 1.9268341966083627e-05, "loss": 0.4899, "step": 1879 }, { "epoch": 0.14894038423450187, "grad_norm": 2.2243447349940806, "learning_rate": 1.9267378190795812e-05, "loss": 0.4673, "step": 1880 }, { "epoch": 0.14901960784313725, "grad_norm": 1.9868982898608372, "learning_rate": 1.9266413805295732e-05, "loss": 0.3923, "step": 1881 }, { "epoch": 0.14909883145177263, "grad_norm": 1.7397097370050283, "learning_rate": 1.9265448809646893e-05, "loss": 0.3642, "step": 1882 }, { "epoch": 0.149178055060408, "grad_norm": 1.94205580602043, "learning_rate": 1.9264483203912826e-05, "loss": 0.4176, "step": 1883 }, { "epoch": 0.14925727866904337, "grad_norm": 2.3367070651409754, "learning_rate": 1.9263516988157123e-05, "loss": 0.5332, "step": 1884 }, { "epoch": 0.14933650227767875, "grad_norm": 2.689015395247916, "learning_rate": 1.92625501624434e-05, "loss": 0.4557, "step": 1885 }, { "epoch": 0.14941572588631413, "grad_norm": 1.7186251042579286, "learning_rate": 1.9261582726835316e-05, "loss": 0.3935, "step": 1886 }, { "epoch": 0.1494949494949495, "grad_norm": 1.89128621765505, "learning_rate": 1.926061468139657e-05, "loss": 0.3861, "step": 1887 }, { "epoch": 0.14957417310358487, "grad_norm": 1.8673714239430046, "learning_rate": 1.9259646026190913e-05, "loss": 0.3528, "step": 1888 }, { "epoch": 0.14965339671222025, "grad_norm": 2.0300503501002876, "learning_rate": 1.9258676761282117e-05, "loss": 0.3455, "step": 1889 }, { "epoch": 0.1497326203208556, "grad_norm": 1.7113529266372025, "learning_rate": 1.9257706886734e-05, "loss": 0.3368, "step": 1890 }, { "epoch": 0.14981184392949098, "grad_norm": 1.8920505991175196, "learning_rate": 1.9256736402610437e-05, "loss": 0.3931, "step": 1891 }, { "epoch": 0.14989106753812637, "grad_norm": 2.0710426675113234, "learning_rate": 1.9255765308975322e-05, "loss": 0.3905, "step": 1892 }, { "epoch": 0.14997029114676175, "grad_norm": 2.3245060771205983, "learning_rate": 1.9254793605892596e-05, "loss": 0.5304, "step": 1893 }, { "epoch": 0.1500495147553971, "grad_norm": 1.762178470731661, "learning_rate": 1.9253821293426242e-05, "loss": 0.43, "step": 1894 }, { "epoch": 0.15012873836403248, "grad_norm": 1.9129250808745424, "learning_rate": 1.9252848371640284e-05, "loss": 0.4565, "step": 1895 }, { "epoch": 0.15020796197266786, "grad_norm": 1.868247456948084, "learning_rate": 1.925187484059878e-05, "loss": 0.4063, "step": 1896 }, { "epoch": 0.15028718558130322, "grad_norm": 2.0298751791691827, "learning_rate": 1.9250900700365837e-05, "loss": 0.5094, "step": 1897 }, { "epoch": 0.1503664091899386, "grad_norm": 1.6624511402079098, "learning_rate": 1.9249925951005593e-05, "loss": 0.3305, "step": 1898 }, { "epoch": 0.15044563279857398, "grad_norm": 1.8351561038727953, "learning_rate": 1.9248950592582235e-05, "loss": 0.479, "step": 1899 }, { "epoch": 0.15052485640720936, "grad_norm": 1.7694296875548985, "learning_rate": 1.9247974625159983e-05, "loss": 0.3434, "step": 1900 }, { "epoch": 0.15060408001584472, "grad_norm": 2.032063298012281, "learning_rate": 1.92469980488031e-05, "loss": 0.4861, "step": 1901 }, { "epoch": 0.1506833036244801, "grad_norm": 2.224114977159279, "learning_rate": 1.924602086357589e-05, "loss": 0.5321, "step": 1902 }, { "epoch": 0.15076252723311548, "grad_norm": 1.8468503194943078, "learning_rate": 1.9245043069542696e-05, "loss": 0.4219, "step": 1903 }, { "epoch": 0.15084175084175083, "grad_norm": 1.8128736638197256, "learning_rate": 1.92440646667679e-05, "loss": 0.3802, "step": 1904 }, { "epoch": 0.1509209744503862, "grad_norm": 1.9532221440247153, "learning_rate": 1.9243085655315924e-05, "loss": 0.5408, "step": 1905 }, { "epoch": 0.1510001980590216, "grad_norm": 1.820683007173657, "learning_rate": 1.924210603525123e-05, "loss": 0.4453, "step": 1906 }, { "epoch": 0.15107942166765695, "grad_norm": 2.304764565458607, "learning_rate": 1.924112580663833e-05, "loss": 0.431, "step": 1907 }, { "epoch": 0.15115864527629233, "grad_norm": 2.1196869594549077, "learning_rate": 1.9240144969541754e-05, "loss": 0.4517, "step": 1908 }, { "epoch": 0.1512378688849277, "grad_norm": 1.9947772795376557, "learning_rate": 1.9239163524026097e-05, "loss": 0.4483, "step": 1909 }, { "epoch": 0.1513170924935631, "grad_norm": 1.684761660778757, "learning_rate": 1.9238181470155978e-05, "loss": 0.4046, "step": 1910 }, { "epoch": 0.15139631610219845, "grad_norm": 2.130858326720803, "learning_rate": 1.923719880799606e-05, "loss": 0.5245, "step": 1911 }, { "epoch": 0.15147553971083383, "grad_norm": 2.3276100423522386, "learning_rate": 1.9236215537611044e-05, "loss": 0.4491, "step": 1912 }, { "epoch": 0.1515547633194692, "grad_norm": 1.89893713295418, "learning_rate": 1.923523165906568e-05, "loss": 0.4959, "step": 1913 }, { "epoch": 0.15163398692810456, "grad_norm": 1.914576502782711, "learning_rate": 1.923424717242475e-05, "loss": 0.4415, "step": 1914 }, { "epoch": 0.15171321053673995, "grad_norm": 1.843912458264141, "learning_rate": 1.923326207775307e-05, "loss": 0.4721, "step": 1915 }, { "epoch": 0.15179243414537533, "grad_norm": 1.8891873478621077, "learning_rate": 1.9232276375115517e-05, "loss": 0.4514, "step": 1916 }, { "epoch": 0.1518716577540107, "grad_norm": 2.106252079033078, "learning_rate": 1.9231290064576985e-05, "loss": 0.4155, "step": 1917 }, { "epoch": 0.15195088136264606, "grad_norm": 1.695659540473576, "learning_rate": 1.923030314620242e-05, "loss": 0.4002, "step": 1918 }, { "epoch": 0.15203010497128144, "grad_norm": 2.28437970043238, "learning_rate": 1.9229315620056805e-05, "loss": 0.5403, "step": 1919 }, { "epoch": 0.15210932857991682, "grad_norm": 2.183536217192937, "learning_rate": 1.9228327486205166e-05, "loss": 0.5063, "step": 1920 }, { "epoch": 0.15218855218855218, "grad_norm": 1.9305139961497506, "learning_rate": 1.9227338744712565e-05, "loss": 0.4946, "step": 1921 }, { "epoch": 0.15226777579718756, "grad_norm": 1.942805626215468, "learning_rate": 1.9226349395644106e-05, "loss": 0.5179, "step": 1922 }, { "epoch": 0.15234699940582294, "grad_norm": 2.187066801878686, "learning_rate": 1.9225359439064934e-05, "loss": 0.4609, "step": 1923 }, { "epoch": 0.1524262230144583, "grad_norm": 1.7678449602437118, "learning_rate": 1.9224368875040235e-05, "loss": 0.4618, "step": 1924 }, { "epoch": 0.15250544662309368, "grad_norm": 1.8783593693212375, "learning_rate": 1.922337770363523e-05, "loss": 0.3983, "step": 1925 }, { "epoch": 0.15258467023172906, "grad_norm": 1.6685539071365887, "learning_rate": 1.922238592491518e-05, "loss": 0.4266, "step": 1926 }, { "epoch": 0.15266389384036444, "grad_norm": 1.83736043297227, "learning_rate": 1.9221393538945397e-05, "loss": 0.5038, "step": 1927 }, { "epoch": 0.1527431174489998, "grad_norm": 2.065717585181778, "learning_rate": 1.9220400545791216e-05, "loss": 0.4098, "step": 1928 }, { "epoch": 0.15282234105763517, "grad_norm": 2.194196464387683, "learning_rate": 1.9219406945518028e-05, "loss": 0.503, "step": 1929 }, { "epoch": 0.15290156466627056, "grad_norm": 1.6921395712465421, "learning_rate": 1.921841273819125e-05, "loss": 0.4009, "step": 1930 }, { "epoch": 0.1529807882749059, "grad_norm": 2.3982264520278855, "learning_rate": 1.9217417923876352e-05, "loss": 0.6454, "step": 1931 }, { "epoch": 0.1530600118835413, "grad_norm": 2.434815057296822, "learning_rate": 1.9216422502638836e-05, "loss": 0.4516, "step": 1932 }, { "epoch": 0.15313923549217667, "grad_norm": 1.5625483543103351, "learning_rate": 1.9215426474544242e-05, "loss": 0.3211, "step": 1933 }, { "epoch": 0.15321845910081205, "grad_norm": 1.6859384136778546, "learning_rate": 1.9214429839658156e-05, "loss": 0.3147, "step": 1934 }, { "epoch": 0.1532976827094474, "grad_norm": 1.8659018671223904, "learning_rate": 1.9213432598046205e-05, "loss": 0.4216, "step": 1935 }, { "epoch": 0.1533769063180828, "grad_norm": 1.950069094228493, "learning_rate": 1.9212434749774048e-05, "loss": 0.3841, "step": 1936 }, { "epoch": 0.15345612992671817, "grad_norm": 1.9437979084349992, "learning_rate": 1.921143629490739e-05, "loss": 0.4203, "step": 1937 }, { "epoch": 0.15353535353535352, "grad_norm": 2.132083856118412, "learning_rate": 1.9210437233511974e-05, "loss": 0.3643, "step": 1938 }, { "epoch": 0.1536145771439889, "grad_norm": 2.0621296382892886, "learning_rate": 1.9209437565653587e-05, "loss": 0.3834, "step": 1939 }, { "epoch": 0.1536938007526243, "grad_norm": 2.3378011508499488, "learning_rate": 1.9208437291398045e-05, "loss": 0.5445, "step": 1940 }, { "epoch": 0.15377302436125967, "grad_norm": 1.8673915945006168, "learning_rate": 1.920743641081122e-05, "loss": 0.4372, "step": 1941 }, { "epoch": 0.15385224796989502, "grad_norm": 1.559541513346412, "learning_rate": 1.920643492395901e-05, "loss": 0.3706, "step": 1942 }, { "epoch": 0.1539314715785304, "grad_norm": 1.9106238247941263, "learning_rate": 1.9205432830907353e-05, "loss": 0.4876, "step": 1943 }, { "epoch": 0.15401069518716579, "grad_norm": 1.8895165530515177, "learning_rate": 1.9204430131722243e-05, "loss": 0.4114, "step": 1944 }, { "epoch": 0.15408991879580114, "grad_norm": 2.1240288066456925, "learning_rate": 1.9203426826469695e-05, "loss": 0.464, "step": 1945 }, { "epoch": 0.15416914240443652, "grad_norm": 2.2132213437258508, "learning_rate": 1.9202422915215777e-05, "loss": 0.4162, "step": 1946 }, { "epoch": 0.1542483660130719, "grad_norm": 1.9889021149191182, "learning_rate": 1.920141839802659e-05, "loss": 0.4147, "step": 1947 }, { "epoch": 0.15432758962170726, "grad_norm": 2.0598692042699227, "learning_rate": 1.9200413274968276e-05, "loss": 0.3997, "step": 1948 }, { "epoch": 0.15440681323034264, "grad_norm": 2.0430074413352903, "learning_rate": 1.9199407546107014e-05, "loss": 0.4796, "step": 1949 }, { "epoch": 0.15448603683897802, "grad_norm": 2.3283017916561928, "learning_rate": 1.919840121150903e-05, "loss": 0.3975, "step": 1950 }, { "epoch": 0.1545652604476134, "grad_norm": 2.3288207185637506, "learning_rate": 1.9197394271240587e-05, "loss": 0.5509, "step": 1951 }, { "epoch": 0.15464448405624875, "grad_norm": 1.972040550521486, "learning_rate": 1.919638672536799e-05, "loss": 0.4874, "step": 1952 }, { "epoch": 0.15472370766488414, "grad_norm": 2.2835353620288976, "learning_rate": 1.9195378573957574e-05, "loss": 0.4073, "step": 1953 }, { "epoch": 0.15480293127351952, "grad_norm": 2.3055396461467703, "learning_rate": 1.9194369817075725e-05, "loss": 0.5435, "step": 1954 }, { "epoch": 0.15488215488215487, "grad_norm": 1.7468913721066341, "learning_rate": 1.9193360454788864e-05, "loss": 0.4197, "step": 1955 }, { "epoch": 0.15496137849079025, "grad_norm": 1.8901320146770375, "learning_rate": 1.919235048716345e-05, "loss": 0.3991, "step": 1956 }, { "epoch": 0.15504060209942563, "grad_norm": 1.8694540215549391, "learning_rate": 1.919133991426599e-05, "loss": 0.4191, "step": 1957 }, { "epoch": 0.15511982570806102, "grad_norm": 2.111349903660736, "learning_rate": 1.919032873616302e-05, "loss": 0.5526, "step": 1958 }, { "epoch": 0.15519904931669637, "grad_norm": 1.8547085662579925, "learning_rate": 1.918931695292113e-05, "loss": 0.5017, "step": 1959 }, { "epoch": 0.15527827292533175, "grad_norm": 1.8235165024553577, "learning_rate": 1.918830456460693e-05, "loss": 0.4983, "step": 1960 }, { "epoch": 0.15535749653396713, "grad_norm": 2.154537625909922, "learning_rate": 1.9187291571287088e-05, "loss": 0.4045, "step": 1961 }, { "epoch": 0.15543672014260249, "grad_norm": 1.7149052536133882, "learning_rate": 1.91862779730283e-05, "loss": 0.3957, "step": 1962 }, { "epoch": 0.15551594375123787, "grad_norm": 2.0026327116423372, "learning_rate": 1.918526376989731e-05, "loss": 0.4579, "step": 1963 }, { "epoch": 0.15559516735987325, "grad_norm": 1.7958870638110085, "learning_rate": 1.9184248961960895e-05, "loss": 0.4649, "step": 1964 }, { "epoch": 0.1556743909685086, "grad_norm": 1.9881404823689581, "learning_rate": 1.918323354928588e-05, "loss": 0.5235, "step": 1965 }, { "epoch": 0.15575361457714398, "grad_norm": 1.6403889304204737, "learning_rate": 1.918221753193912e-05, "loss": 0.4186, "step": 1966 }, { "epoch": 0.15583283818577937, "grad_norm": 1.8395212180387734, "learning_rate": 1.9181200909987524e-05, "loss": 0.5151, "step": 1967 }, { "epoch": 0.15591206179441475, "grad_norm": 1.8932017186535837, "learning_rate": 1.918018368349802e-05, "loss": 0.461, "step": 1968 }, { "epoch": 0.1559912854030501, "grad_norm": 1.9612754693647139, "learning_rate": 1.9179165852537596e-05, "loss": 0.4759, "step": 1969 }, { "epoch": 0.15607050901168548, "grad_norm": 2.142376187267396, "learning_rate": 1.9178147417173265e-05, "loss": 0.5838, "step": 1970 }, { "epoch": 0.15614973262032086, "grad_norm": 2.072107257313081, "learning_rate": 1.917712837747209e-05, "loss": 0.4795, "step": 1971 }, { "epoch": 0.15622895622895622, "grad_norm": 2.1710238774166597, "learning_rate": 1.917610873350117e-05, "loss": 0.4474, "step": 1972 }, { "epoch": 0.1563081798375916, "grad_norm": 2.1051730723179514, "learning_rate": 1.917508848532764e-05, "loss": 0.4373, "step": 1973 }, { "epoch": 0.15638740344622698, "grad_norm": 1.7526336787370713, "learning_rate": 1.9174067633018682e-05, "loss": 0.4352, "step": 1974 }, { "epoch": 0.15646662705486236, "grad_norm": 1.8533115293675426, "learning_rate": 1.9173046176641515e-05, "loss": 0.3838, "step": 1975 }, { "epoch": 0.15654585066349772, "grad_norm": 2.019297783913294, "learning_rate": 1.917202411626339e-05, "loss": 0.3946, "step": 1976 }, { "epoch": 0.1566250742721331, "grad_norm": 1.9613658949742254, "learning_rate": 1.9171001451951616e-05, "loss": 0.4629, "step": 1977 }, { "epoch": 0.15670429788076848, "grad_norm": 2.316414806630757, "learning_rate": 1.916997818377352e-05, "loss": 0.476, "step": 1978 }, { "epoch": 0.15678352148940383, "grad_norm": 2.0416898656544595, "learning_rate": 1.9168954311796487e-05, "loss": 0.4168, "step": 1979 }, { "epoch": 0.1568627450980392, "grad_norm": 1.8983919071654909, "learning_rate": 1.9167929836087932e-05, "loss": 0.4465, "step": 1980 }, { "epoch": 0.1569419687066746, "grad_norm": 1.8743570587315996, "learning_rate": 1.9166904756715307e-05, "loss": 0.4146, "step": 1981 }, { "epoch": 0.15702119231530995, "grad_norm": 2.2083717297570997, "learning_rate": 1.9165879073746112e-05, "loss": 0.6243, "step": 1982 }, { "epoch": 0.15710041592394533, "grad_norm": 1.617666459264679, "learning_rate": 1.9164852787247887e-05, "loss": 0.4738, "step": 1983 }, { "epoch": 0.1571796395325807, "grad_norm": 1.875389729531953, "learning_rate": 1.91638258972882e-05, "loss": 0.42, "step": 1984 }, { "epoch": 0.1572588631412161, "grad_norm": 1.8603775762284498, "learning_rate": 1.916279840393467e-05, "loss": 0.4651, "step": 1985 }, { "epoch": 0.15733808674985145, "grad_norm": 2.231502971943156, "learning_rate": 1.916177030725496e-05, "loss": 0.5369, "step": 1986 }, { "epoch": 0.15741731035848683, "grad_norm": 1.7585322187350656, "learning_rate": 1.9160741607316755e-05, "loss": 0.402, "step": 1987 }, { "epoch": 0.1574965339671222, "grad_norm": 1.9824088280783905, "learning_rate": 1.9159712304187795e-05, "loss": 0.4537, "step": 1988 }, { "epoch": 0.15757575757575756, "grad_norm": 1.7478757597559542, "learning_rate": 1.9158682397935852e-05, "loss": 0.3418, "step": 1989 }, { "epoch": 0.15765498118439294, "grad_norm": 2.0178883333436697, "learning_rate": 1.9157651888628744e-05, "loss": 0.4895, "step": 1990 }, { "epoch": 0.15773420479302833, "grad_norm": 2.004160681129957, "learning_rate": 1.915662077633432e-05, "loss": 0.4939, "step": 1991 }, { "epoch": 0.1578134284016637, "grad_norm": 1.596888270010446, "learning_rate": 1.915558906112048e-05, "loss": 0.361, "step": 1992 }, { "epoch": 0.15789265201029906, "grad_norm": 1.7962284825872572, "learning_rate": 1.915455674305515e-05, "loss": 0.3198, "step": 1993 }, { "epoch": 0.15797187561893444, "grad_norm": 2.3687125225646013, "learning_rate": 1.9153523822206312e-05, "loss": 0.435, "step": 1994 }, { "epoch": 0.15805109922756982, "grad_norm": 2.018520997961375, "learning_rate": 1.9152490298641973e-05, "loss": 0.4507, "step": 1995 }, { "epoch": 0.15813032283620518, "grad_norm": 1.9315785147219517, "learning_rate": 1.9151456172430186e-05, "loss": 0.4238, "step": 1996 }, { "epoch": 0.15820954644484056, "grad_norm": 1.99264143017953, "learning_rate": 1.9150421443639045e-05, "loss": 0.4224, "step": 1997 }, { "epoch": 0.15828877005347594, "grad_norm": 2.4910885234633198, "learning_rate": 1.9149386112336682e-05, "loss": 0.4694, "step": 1998 }, { "epoch": 0.15836799366211132, "grad_norm": 1.8709733923205423, "learning_rate": 1.9148350178591264e-05, "loss": 0.3645, "step": 1999 }, { "epoch": 0.15844721727074668, "grad_norm": 1.9463881954475855, "learning_rate": 1.914731364247101e-05, "loss": 0.4791, "step": 2000 }, { "epoch": 0.15852644087938206, "grad_norm": 2.017351418628687, "learning_rate": 1.914627650404416e-05, "loss": 0.459, "step": 2001 }, { "epoch": 0.15860566448801744, "grad_norm": 2.4404235710193176, "learning_rate": 1.9145238763379016e-05, "loss": 0.4703, "step": 2002 }, { "epoch": 0.1586848880966528, "grad_norm": 2.159753927165498, "learning_rate": 1.9144200420543905e-05, "loss": 0.4714, "step": 2003 }, { "epoch": 0.15876411170528817, "grad_norm": 1.841447863879372, "learning_rate": 1.9143161475607194e-05, "loss": 0.4276, "step": 2004 }, { "epoch": 0.15884333531392356, "grad_norm": 2.019703730189138, "learning_rate": 1.9142121928637292e-05, "loss": 0.4465, "step": 2005 }, { "epoch": 0.1589225589225589, "grad_norm": 1.7159889083633495, "learning_rate": 1.914108177970265e-05, "loss": 0.3886, "step": 2006 }, { "epoch": 0.1590017825311943, "grad_norm": 2.138110793928573, "learning_rate": 1.914004102887176e-05, "loss": 0.4477, "step": 2007 }, { "epoch": 0.15908100613982967, "grad_norm": 1.9513403361777697, "learning_rate": 1.9138999676213146e-05, "loss": 0.5159, "step": 2008 }, { "epoch": 0.15916022974846505, "grad_norm": 2.0677284631210897, "learning_rate": 1.9137957721795376e-05, "loss": 0.4755, "step": 2009 }, { "epoch": 0.1592394533571004, "grad_norm": 2.2030131498243293, "learning_rate": 1.913691516568706e-05, "loss": 0.5203, "step": 2010 }, { "epoch": 0.1593186769657358, "grad_norm": 1.9704861041232478, "learning_rate": 1.9135872007956846e-05, "loss": 0.4124, "step": 2011 }, { "epoch": 0.15939790057437117, "grad_norm": 1.8426520951644911, "learning_rate": 1.9134828248673415e-05, "loss": 0.3733, "step": 2012 }, { "epoch": 0.15947712418300652, "grad_norm": 2.0257096131206302, "learning_rate": 1.9133783887905502e-05, "loss": 0.5265, "step": 2013 }, { "epoch": 0.1595563477916419, "grad_norm": 1.6164377945708577, "learning_rate": 1.913273892572187e-05, "loss": 0.3541, "step": 2014 }, { "epoch": 0.1596355714002773, "grad_norm": 2.0665921569618275, "learning_rate": 1.9131693362191318e-05, "loss": 0.4187, "step": 2015 }, { "epoch": 0.15971479500891267, "grad_norm": 2.3161746982649007, "learning_rate": 1.91306471973827e-05, "loss": 0.4786, "step": 2016 }, { "epoch": 0.15979401861754802, "grad_norm": 2.2556797725522566, "learning_rate": 1.91296004313649e-05, "loss": 0.4511, "step": 2017 }, { "epoch": 0.1598732422261834, "grad_norm": 2.256879583081843, "learning_rate": 1.9128553064206835e-05, "loss": 0.4731, "step": 2018 }, { "epoch": 0.15995246583481879, "grad_norm": 1.9858378208952217, "learning_rate": 1.9127505095977483e-05, "loss": 0.5413, "step": 2019 }, { "epoch": 0.16003168944345414, "grad_norm": 2.214168557110572, "learning_rate": 1.9126456526745833e-05, "loss": 0.4752, "step": 2020 }, { "epoch": 0.16011091305208952, "grad_norm": 1.998546795067003, "learning_rate": 1.9125407356580932e-05, "loss": 0.4276, "step": 2021 }, { "epoch": 0.1601901366607249, "grad_norm": 2.1717770604291156, "learning_rate": 1.9124357585551872e-05, "loss": 0.5396, "step": 2022 }, { "epoch": 0.16026936026936026, "grad_norm": 1.8499838317063686, "learning_rate": 1.9123307213727764e-05, "loss": 0.4366, "step": 2023 }, { "epoch": 0.16034858387799564, "grad_norm": 2.069710774423531, "learning_rate": 1.9122256241177776e-05, "loss": 0.4577, "step": 2024 }, { "epoch": 0.16042780748663102, "grad_norm": 1.9744168513151665, "learning_rate": 1.9121204667971107e-05, "loss": 0.4787, "step": 2025 }, { "epoch": 0.1605070310952664, "grad_norm": 2.1224475634786195, "learning_rate": 1.9120152494177e-05, "loss": 0.4443, "step": 2026 }, { "epoch": 0.16058625470390175, "grad_norm": 1.5698929079671524, "learning_rate": 1.9119099719864735e-05, "loss": 0.4655, "step": 2027 }, { "epoch": 0.16066547831253714, "grad_norm": 2.3474613616238753, "learning_rate": 1.911804634510363e-05, "loss": 0.435, "step": 2028 }, { "epoch": 0.16074470192117252, "grad_norm": 2.032901151342805, "learning_rate": 1.911699236996305e-05, "loss": 0.4536, "step": 2029 }, { "epoch": 0.16082392552980787, "grad_norm": 1.511792177635146, "learning_rate": 1.911593779451239e-05, "loss": 0.3334, "step": 2030 }, { "epoch": 0.16090314913844325, "grad_norm": 2.0327297256623638, "learning_rate": 1.911488261882109e-05, "loss": 0.5448, "step": 2031 }, { "epoch": 0.16098237274707863, "grad_norm": 2.029856153994575, "learning_rate": 1.911382684295862e-05, "loss": 0.4393, "step": 2032 }, { "epoch": 0.16106159635571402, "grad_norm": 2.2739213651011267, "learning_rate": 1.911277046699451e-05, "loss": 0.4683, "step": 2033 }, { "epoch": 0.16114081996434937, "grad_norm": 1.6415975123062514, "learning_rate": 1.9111713490998316e-05, "loss": 0.3008, "step": 2034 }, { "epoch": 0.16122004357298475, "grad_norm": 1.8590560329796424, "learning_rate": 1.911065591503963e-05, "loss": 0.483, "step": 2035 }, { "epoch": 0.16129926718162013, "grad_norm": 1.703794023995196, "learning_rate": 1.9109597739188088e-05, "loss": 0.4398, "step": 2036 }, { "epoch": 0.16137849079025549, "grad_norm": 2.079177596949361, "learning_rate": 1.9108538963513366e-05, "loss": 0.4747, "step": 2037 }, { "epoch": 0.16145771439889087, "grad_norm": 2.0291583347975672, "learning_rate": 1.9107479588085182e-05, "loss": 0.3828, "step": 2038 }, { "epoch": 0.16153693800752625, "grad_norm": 1.9075007919021594, "learning_rate": 1.910641961297329e-05, "loss": 0.4762, "step": 2039 }, { "epoch": 0.16161616161616163, "grad_norm": 2.2221860674462475, "learning_rate": 1.9105359038247484e-05, "loss": 0.4476, "step": 2040 }, { "epoch": 0.16169538522479698, "grad_norm": 1.9098952614991478, "learning_rate": 1.9104297863977595e-05, "loss": 0.4687, "step": 2041 }, { "epoch": 0.16177460883343237, "grad_norm": 2.2812926157485798, "learning_rate": 1.9103236090233507e-05, "loss": 0.4885, "step": 2042 }, { "epoch": 0.16185383244206775, "grad_norm": 1.903474218188879, "learning_rate": 1.9102173717085114e-05, "loss": 0.4055, "step": 2043 }, { "epoch": 0.1619330560507031, "grad_norm": 2.3890631174378254, "learning_rate": 1.9101110744602384e-05, "loss": 0.4792, "step": 2044 }, { "epoch": 0.16201227965933848, "grad_norm": 2.2426265314474194, "learning_rate": 1.9100047172855306e-05, "loss": 0.5375, "step": 2045 }, { "epoch": 0.16209150326797386, "grad_norm": 1.64981596146523, "learning_rate": 1.9098983001913903e-05, "loss": 0.3908, "step": 2046 }, { "epoch": 0.16217072687660922, "grad_norm": 1.9634646988029925, "learning_rate": 1.909791823184825e-05, "loss": 0.4321, "step": 2047 }, { "epoch": 0.1622499504852446, "grad_norm": 1.8697621624055465, "learning_rate": 1.909685286272846e-05, "loss": 0.4648, "step": 2048 }, { "epoch": 0.16232917409387998, "grad_norm": 1.7425545839507501, "learning_rate": 1.9095786894624685e-05, "loss": 0.4435, "step": 2049 }, { "epoch": 0.16240839770251536, "grad_norm": 1.9943424804669774, "learning_rate": 1.9094720327607102e-05, "loss": 0.4894, "step": 2050 }, { "epoch": 0.16248762131115072, "grad_norm": 1.803249905651218, "learning_rate": 1.909365316174595e-05, "loss": 0.4681, "step": 2051 }, { "epoch": 0.1625668449197861, "grad_norm": 1.5984663033682562, "learning_rate": 1.9092585397111492e-05, "loss": 0.2981, "step": 2052 }, { "epoch": 0.16264606852842148, "grad_norm": 2.2081089528667444, "learning_rate": 1.9091517033774038e-05, "loss": 0.5272, "step": 2053 }, { "epoch": 0.16272529213705683, "grad_norm": 1.8387584633591254, "learning_rate": 1.9090448071803932e-05, "loss": 0.4184, "step": 2054 }, { "epoch": 0.1628045157456922, "grad_norm": 1.7075200875404835, "learning_rate": 1.908937851127156e-05, "loss": 0.4085, "step": 2055 }, { "epoch": 0.1628837393543276, "grad_norm": 1.6881624219173819, "learning_rate": 1.908830835224735e-05, "loss": 0.3926, "step": 2056 }, { "epoch": 0.16296296296296298, "grad_norm": 1.8902781401712885, "learning_rate": 1.9087237594801762e-05, "loss": 0.4746, "step": 2057 }, { "epoch": 0.16304218657159833, "grad_norm": 2.1921377278812217, "learning_rate": 1.9086166239005305e-05, "loss": 0.5582, "step": 2058 }, { "epoch": 0.1631214101802337, "grad_norm": 1.8514505977716071, "learning_rate": 1.908509428492852e-05, "loss": 0.3789, "step": 2059 }, { "epoch": 0.1632006337888691, "grad_norm": 1.8186435845825861, "learning_rate": 1.9084021732641994e-05, "loss": 0.4499, "step": 2060 }, { "epoch": 0.16327985739750445, "grad_norm": 1.9738670012443507, "learning_rate": 1.9082948582216344e-05, "loss": 0.4885, "step": 2061 }, { "epoch": 0.16335908100613983, "grad_norm": 2.3557387508860597, "learning_rate": 1.9081874833722234e-05, "loss": 0.4721, "step": 2062 }, { "epoch": 0.1634383046147752, "grad_norm": 2.283746955716872, "learning_rate": 1.908080048723037e-05, "loss": 0.5479, "step": 2063 }, { "epoch": 0.16351752822341056, "grad_norm": 1.8879144260508687, "learning_rate": 1.9079725542811484e-05, "loss": 0.4572, "step": 2064 }, { "epoch": 0.16359675183204594, "grad_norm": 1.8688000657124575, "learning_rate": 1.907865000053636e-05, "loss": 0.4312, "step": 2065 }, { "epoch": 0.16367597544068133, "grad_norm": 1.7126821263919434, "learning_rate": 1.9077573860475815e-05, "loss": 0.4382, "step": 2066 }, { "epoch": 0.1637551990493167, "grad_norm": 1.9439573365348657, "learning_rate": 1.9076497122700713e-05, "loss": 0.4075, "step": 2067 }, { "epoch": 0.16383442265795206, "grad_norm": 1.7635609787546924, "learning_rate": 1.9075419787281948e-05, "loss": 0.4871, "step": 2068 }, { "epoch": 0.16391364626658744, "grad_norm": 1.9910760462873718, "learning_rate": 1.9074341854290458e-05, "loss": 0.3818, "step": 2069 }, { "epoch": 0.16399286987522282, "grad_norm": 1.7261206009996528, "learning_rate": 1.907326332379722e-05, "loss": 0.3531, "step": 2070 }, { "epoch": 0.16407209348385818, "grad_norm": 1.772590764593171, "learning_rate": 1.9072184195873248e-05, "loss": 0.3428, "step": 2071 }, { "epoch": 0.16415131709249356, "grad_norm": 1.7374034073598783, "learning_rate": 1.9071104470589603e-05, "loss": 0.3992, "step": 2072 }, { "epoch": 0.16423054070112894, "grad_norm": 2.137588648482301, "learning_rate": 1.9070024148017375e-05, "loss": 0.4891, "step": 2073 }, { "epoch": 0.16430976430976432, "grad_norm": 1.8073878554775773, "learning_rate": 1.9068943228227695e-05, "loss": 0.3693, "step": 2074 }, { "epoch": 0.16438898791839968, "grad_norm": 2.0353060672381114, "learning_rate": 1.9067861711291744e-05, "loss": 0.404, "step": 2075 }, { "epoch": 0.16446821152703506, "grad_norm": 1.8381303585687634, "learning_rate": 1.906677959728073e-05, "loss": 0.434, "step": 2076 }, { "epoch": 0.16454743513567044, "grad_norm": 1.7978064015330337, "learning_rate": 1.9065696886265906e-05, "loss": 0.4621, "step": 2077 }, { "epoch": 0.1646266587443058, "grad_norm": 1.9251036960685879, "learning_rate": 1.9064613578318564e-05, "loss": 0.334, "step": 2078 }, { "epoch": 0.16470588235294117, "grad_norm": 1.734373021904076, "learning_rate": 1.9063529673510036e-05, "loss": 0.299, "step": 2079 }, { "epoch": 0.16478510596157656, "grad_norm": 1.9358714590221786, "learning_rate": 1.9062445171911688e-05, "loss": 0.469, "step": 2080 }, { "epoch": 0.1648643295702119, "grad_norm": 1.9175926311797722, "learning_rate": 1.9061360073594933e-05, "loss": 0.4203, "step": 2081 }, { "epoch": 0.1649435531788473, "grad_norm": 2.081708246257512, "learning_rate": 1.9060274378631215e-05, "loss": 0.4775, "step": 2082 }, { "epoch": 0.16502277678748267, "grad_norm": 2.3725404126742777, "learning_rate": 1.9059188087092025e-05, "loss": 0.5039, "step": 2083 }, { "epoch": 0.16510200039611805, "grad_norm": 2.1449702764161676, "learning_rate": 1.905810119904889e-05, "loss": 0.3308, "step": 2084 }, { "epoch": 0.1651812240047534, "grad_norm": 1.9393740275276423, "learning_rate": 1.9057013714573375e-05, "loss": 0.4446, "step": 2085 }, { "epoch": 0.1652604476133888, "grad_norm": 2.0224354747188973, "learning_rate": 1.9055925633737088e-05, "loss": 0.453, "step": 2086 }, { "epoch": 0.16533967122202417, "grad_norm": 2.0719452740776876, "learning_rate": 1.905483695661167e-05, "loss": 0.458, "step": 2087 }, { "epoch": 0.16541889483065952, "grad_norm": 1.7243595554861604, "learning_rate": 1.905374768326881e-05, "loss": 0.4261, "step": 2088 }, { "epoch": 0.1654981184392949, "grad_norm": 2.1132599060514368, "learning_rate": 1.9052657813780226e-05, "loss": 0.5107, "step": 2089 }, { "epoch": 0.1655773420479303, "grad_norm": 1.764948832320103, "learning_rate": 1.9051567348217686e-05, "loss": 0.3729, "step": 2090 }, { "epoch": 0.16565656565656567, "grad_norm": 1.7770507985614987, "learning_rate": 1.905047628665299e-05, "loss": 0.3085, "step": 2091 }, { "epoch": 0.16573578926520102, "grad_norm": 1.9481151339485177, "learning_rate": 1.9049384629157974e-05, "loss": 0.4357, "step": 2092 }, { "epoch": 0.1658150128738364, "grad_norm": 2.08903763187559, "learning_rate": 1.9048292375804527e-05, "loss": 0.5798, "step": 2093 }, { "epoch": 0.16589423648247179, "grad_norm": 1.912963993717447, "learning_rate": 1.9047199526664565e-05, "loss": 0.5083, "step": 2094 }, { "epoch": 0.16597346009110714, "grad_norm": 1.8950488329303423, "learning_rate": 1.9046106081810047e-05, "loss": 0.4688, "step": 2095 }, { "epoch": 0.16605268369974252, "grad_norm": 1.8322721980137766, "learning_rate": 1.9045012041312966e-05, "loss": 0.4893, "step": 2096 }, { "epoch": 0.1661319073083779, "grad_norm": 1.6160660139896603, "learning_rate": 1.904391740524537e-05, "loss": 0.3787, "step": 2097 }, { "epoch": 0.16621113091701328, "grad_norm": 1.748887779213861, "learning_rate": 1.9042822173679325e-05, "loss": 0.4242, "step": 2098 }, { "epoch": 0.16629035452564864, "grad_norm": 1.6657150099368654, "learning_rate": 1.9041726346686952e-05, "loss": 0.3845, "step": 2099 }, { "epoch": 0.16636957813428402, "grad_norm": 2.080950012323449, "learning_rate": 1.9040629924340406e-05, "loss": 0.4616, "step": 2100 }, { "epoch": 0.1664488017429194, "grad_norm": 1.944120818298703, "learning_rate": 1.903953290671188e-05, "loss": 0.5048, "step": 2101 }, { "epoch": 0.16652802535155475, "grad_norm": 1.9155882044296995, "learning_rate": 1.903843529387361e-05, "loss": 0.4695, "step": 2102 }, { "epoch": 0.16660724896019014, "grad_norm": 1.7618387395184265, "learning_rate": 1.903733708589786e-05, "loss": 0.4762, "step": 2103 }, { "epoch": 0.16668647256882552, "grad_norm": 2.220991776497542, "learning_rate": 1.9036238282856952e-05, "loss": 0.6267, "step": 2104 }, { "epoch": 0.16676569617746087, "grad_norm": 1.9177135363531168, "learning_rate": 1.903513888482323e-05, "loss": 0.4821, "step": 2105 }, { "epoch": 0.16684491978609625, "grad_norm": 1.530413558382587, "learning_rate": 1.903403889186909e-05, "loss": 0.3674, "step": 2106 }, { "epoch": 0.16692414339473163, "grad_norm": 1.9414858457253144, "learning_rate": 1.903293830406696e-05, "loss": 0.5525, "step": 2107 }, { "epoch": 0.16700336700336701, "grad_norm": 2.0608747651108983, "learning_rate": 1.9031837121489303e-05, "loss": 0.518, "step": 2108 }, { "epoch": 0.16708259061200237, "grad_norm": 1.7072264140131648, "learning_rate": 1.903073534420863e-05, "loss": 0.4484, "step": 2109 }, { "epoch": 0.16716181422063775, "grad_norm": 1.6015691891846717, "learning_rate": 1.9029632972297488e-05, "loss": 0.3609, "step": 2110 }, { "epoch": 0.16724103782927313, "grad_norm": 1.8441132771224842, "learning_rate": 1.9028530005828462e-05, "loss": 0.4383, "step": 2111 }, { "epoch": 0.16732026143790849, "grad_norm": 1.9824977295494668, "learning_rate": 1.9027426444874177e-05, "loss": 0.5459, "step": 2112 }, { "epoch": 0.16739948504654387, "grad_norm": 1.7882499964851173, "learning_rate": 1.90263222895073e-05, "loss": 0.3986, "step": 2113 }, { "epoch": 0.16747870865517925, "grad_norm": 2.0990733023350563, "learning_rate": 1.902521753980053e-05, "loss": 0.5134, "step": 2114 }, { "epoch": 0.16755793226381463, "grad_norm": 2.3701923845142927, "learning_rate": 1.9024112195826614e-05, "loss": 0.465, "step": 2115 }, { "epoch": 0.16763715587244998, "grad_norm": 1.8813385397244755, "learning_rate": 1.902300625765833e-05, "loss": 0.3947, "step": 2116 }, { "epoch": 0.16771637948108536, "grad_norm": 1.8744528326878467, "learning_rate": 1.9021899725368498e-05, "loss": 0.346, "step": 2117 }, { "epoch": 0.16779560308972075, "grad_norm": 2.2864476645229317, "learning_rate": 1.902079259902998e-05, "loss": 0.4422, "step": 2118 }, { "epoch": 0.1678748266983561, "grad_norm": 1.7235774467986877, "learning_rate": 1.901968487871568e-05, "loss": 0.3719, "step": 2119 }, { "epoch": 0.16795405030699148, "grad_norm": 1.7635146709495053, "learning_rate": 1.9018576564498527e-05, "loss": 0.4148, "step": 2120 }, { "epoch": 0.16803327391562686, "grad_norm": 2.3921189681760775, "learning_rate": 1.9017467656451498e-05, "loss": 0.5032, "step": 2121 }, { "epoch": 0.16811249752426222, "grad_norm": 1.8731702615933892, "learning_rate": 1.9016358154647618e-05, "loss": 0.4955, "step": 2122 }, { "epoch": 0.1681917211328976, "grad_norm": 2.0615634278881028, "learning_rate": 1.9015248059159937e-05, "loss": 0.4585, "step": 2123 }, { "epoch": 0.16827094474153298, "grad_norm": 2.104829927777596, "learning_rate": 1.901413737006155e-05, "loss": 0.4737, "step": 2124 }, { "epoch": 0.16835016835016836, "grad_norm": 2.1271160020437585, "learning_rate": 1.901302608742559e-05, "loss": 0.4759, "step": 2125 }, { "epoch": 0.16842939195880371, "grad_norm": 1.9955275056730297, "learning_rate": 1.9011914211325225e-05, "loss": 0.4339, "step": 2126 }, { "epoch": 0.1685086155674391, "grad_norm": 2.0453468189057378, "learning_rate": 1.9010801741833678e-05, "loss": 0.3957, "step": 2127 }, { "epoch": 0.16858783917607448, "grad_norm": 2.0515103807351527, "learning_rate": 1.900968867902419e-05, "loss": 0.4234, "step": 2128 }, { "epoch": 0.16866706278470983, "grad_norm": 1.8188145322226423, "learning_rate": 1.900857502297006e-05, "loss": 0.3967, "step": 2129 }, { "epoch": 0.1687462863933452, "grad_norm": 1.6357454774535003, "learning_rate": 1.9007460773744605e-05, "loss": 0.3148, "step": 2130 }, { "epoch": 0.1688255100019806, "grad_norm": 1.672921216228675, "learning_rate": 1.90063459314212e-05, "loss": 0.3223, "step": 2131 }, { "epoch": 0.16890473361061598, "grad_norm": 1.7180600630608942, "learning_rate": 1.9005230496073256e-05, "loss": 0.3086, "step": 2132 }, { "epoch": 0.16898395721925133, "grad_norm": 1.8721917325562165, "learning_rate": 1.900411446777421e-05, "loss": 0.4684, "step": 2133 }, { "epoch": 0.1690631808278867, "grad_norm": 1.645104471147386, "learning_rate": 1.900299784659755e-05, "loss": 0.388, "step": 2134 }, { "epoch": 0.1691424044365221, "grad_norm": 2.22111790445004, "learning_rate": 1.9001880632616806e-05, "loss": 0.5104, "step": 2135 }, { "epoch": 0.16922162804515745, "grad_norm": 1.8280414426193994, "learning_rate": 1.9000762825905535e-05, "loss": 0.4788, "step": 2136 }, { "epoch": 0.16930085165379283, "grad_norm": 1.7297646239170181, "learning_rate": 1.899964442653734e-05, "loss": 0.3546, "step": 2137 }, { "epoch": 0.1693800752624282, "grad_norm": 1.8872502511626468, "learning_rate": 1.8998525434585862e-05, "loss": 0.4687, "step": 2138 }, { "epoch": 0.1694592988710636, "grad_norm": 1.6529560957691534, "learning_rate": 1.8997405850124786e-05, "loss": 0.3574, "step": 2139 }, { "epoch": 0.16953852247969894, "grad_norm": 1.8860667886597298, "learning_rate": 1.8996285673227826e-05, "loss": 0.4449, "step": 2140 }, { "epoch": 0.16961774608833433, "grad_norm": 1.9445173946011252, "learning_rate": 1.899516490396874e-05, "loss": 0.5066, "step": 2141 }, { "epoch": 0.1696969696969697, "grad_norm": 1.6820637648159442, "learning_rate": 1.8994043542421328e-05, "loss": 0.4024, "step": 2142 }, { "epoch": 0.16977619330560506, "grad_norm": 1.9563454784742638, "learning_rate": 1.8992921588659424e-05, "loss": 0.4198, "step": 2143 }, { "epoch": 0.16985541691424044, "grad_norm": 1.9033491209101583, "learning_rate": 1.8991799042756906e-05, "loss": 0.3132, "step": 2144 }, { "epoch": 0.16993464052287582, "grad_norm": 1.8633506564375288, "learning_rate": 1.8990675904787688e-05, "loss": 0.3507, "step": 2145 }, { "epoch": 0.17001386413151118, "grad_norm": 2.146509904970231, "learning_rate": 1.898955217482572e-05, "loss": 0.4511, "step": 2146 }, { "epoch": 0.17009308774014656, "grad_norm": 1.8306317274246138, "learning_rate": 1.8988427852944997e-05, "loss": 0.3992, "step": 2147 }, { "epoch": 0.17017231134878194, "grad_norm": 1.9880179087695329, "learning_rate": 1.898730293921955e-05, "loss": 0.4553, "step": 2148 }, { "epoch": 0.17025153495741732, "grad_norm": 2.448597104416439, "learning_rate": 1.8986177433723446e-05, "loss": 0.4902, "step": 2149 }, { "epoch": 0.17033075856605268, "grad_norm": 1.9976358383312918, "learning_rate": 1.89850513365308e-05, "loss": 0.4545, "step": 2150 }, { "epoch": 0.17040998217468806, "grad_norm": 2.202723894983435, "learning_rate": 1.8983924647715756e-05, "loss": 0.513, "step": 2151 }, { "epoch": 0.17048920578332344, "grad_norm": 1.70808889054141, "learning_rate": 1.89827973673525e-05, "loss": 0.3365, "step": 2152 }, { "epoch": 0.1705684293919588, "grad_norm": 2.098168455340224, "learning_rate": 1.8981669495515264e-05, "loss": 0.5016, "step": 2153 }, { "epoch": 0.17064765300059417, "grad_norm": 1.9088495150572768, "learning_rate": 1.8980541032278302e-05, "loss": 0.5515, "step": 2154 }, { "epoch": 0.17072687660922956, "grad_norm": 1.9722023800135497, "learning_rate": 1.8979411977715928e-05, "loss": 0.4859, "step": 2155 }, { "epoch": 0.17080610021786494, "grad_norm": 1.8623219955418344, "learning_rate": 1.8978282331902483e-05, "loss": 0.3516, "step": 2156 }, { "epoch": 0.1708853238265003, "grad_norm": 2.1371829738240833, "learning_rate": 1.8977152094912346e-05, "loss": 0.504, "step": 2157 }, { "epoch": 0.17096454743513567, "grad_norm": 1.843012693582406, "learning_rate": 1.897602126681994e-05, "loss": 0.3696, "step": 2158 }, { "epoch": 0.17104377104377105, "grad_norm": 1.926789243703306, "learning_rate": 1.897488984769972e-05, "loss": 0.3904, "step": 2159 }, { "epoch": 0.1711229946524064, "grad_norm": 2.0824324285525537, "learning_rate": 1.8973757837626193e-05, "loss": 0.3856, "step": 2160 }, { "epoch": 0.1712022182610418, "grad_norm": 1.8621294104250126, "learning_rate": 1.8972625236673887e-05, "loss": 0.3375, "step": 2161 }, { "epoch": 0.17128144186967717, "grad_norm": 1.737299237088058, "learning_rate": 1.8971492044917386e-05, "loss": 0.4387, "step": 2162 }, { "epoch": 0.17136066547831252, "grad_norm": 1.989813285249737, "learning_rate": 1.8970358262431297e-05, "loss": 0.5958, "step": 2163 }, { "epoch": 0.1714398890869479, "grad_norm": 2.022091188701207, "learning_rate": 1.8969223889290283e-05, "loss": 0.5144, "step": 2164 }, { "epoch": 0.1715191126955833, "grad_norm": 2.388596662473868, "learning_rate": 1.8968088925569032e-05, "loss": 0.5097, "step": 2165 }, { "epoch": 0.17159833630421867, "grad_norm": 1.8249422246841323, "learning_rate": 1.896695337134228e-05, "loss": 0.3813, "step": 2166 }, { "epoch": 0.17167755991285402, "grad_norm": 1.402784176995135, "learning_rate": 1.8965817226684794e-05, "loss": 0.3669, "step": 2167 }, { "epoch": 0.1717567835214894, "grad_norm": 2.0367399975948417, "learning_rate": 1.896468049167138e-05, "loss": 0.3647, "step": 2168 }, { "epoch": 0.17183600713012478, "grad_norm": 1.4095077651953454, "learning_rate": 1.896354316637689e-05, "loss": 0.3241, "step": 2169 }, { "epoch": 0.17191523073876014, "grad_norm": 1.8942162604065602, "learning_rate": 1.8962405250876218e-05, "loss": 0.3848, "step": 2170 }, { "epoch": 0.17199445434739552, "grad_norm": 1.7726567893062553, "learning_rate": 1.896126674524428e-05, "loss": 0.3696, "step": 2171 }, { "epoch": 0.1720736779560309, "grad_norm": 1.6502099044234704, "learning_rate": 1.896012764955605e-05, "loss": 0.3754, "step": 2172 }, { "epoch": 0.17215290156466628, "grad_norm": 1.864638018255017, "learning_rate": 1.8958987963886526e-05, "loss": 0.3498, "step": 2173 }, { "epoch": 0.17223212517330164, "grad_norm": 1.783206741932751, "learning_rate": 1.8957847688310752e-05, "loss": 0.4461, "step": 2174 }, { "epoch": 0.17231134878193702, "grad_norm": 2.047774975163439, "learning_rate": 1.8956706822903812e-05, "loss": 0.3177, "step": 2175 }, { "epoch": 0.1723905723905724, "grad_norm": 2.035527189904597, "learning_rate": 1.8955565367740824e-05, "loss": 0.4246, "step": 2176 }, { "epoch": 0.17246979599920775, "grad_norm": 1.7519343645244982, "learning_rate": 1.8954423322896944e-05, "loss": 0.3945, "step": 2177 }, { "epoch": 0.17254901960784313, "grad_norm": 1.831292065828642, "learning_rate": 1.895328068844738e-05, "loss": 0.4323, "step": 2178 }, { "epoch": 0.17262824321647852, "grad_norm": 1.8576091695913817, "learning_rate": 1.8952137464467358e-05, "loss": 0.4357, "step": 2179 }, { "epoch": 0.1727074668251139, "grad_norm": 1.9984153169578844, "learning_rate": 1.895099365103216e-05, "loss": 0.3837, "step": 2180 }, { "epoch": 0.17278669043374925, "grad_norm": 1.7284041636644616, "learning_rate": 1.89498492482171e-05, "loss": 0.3989, "step": 2181 }, { "epoch": 0.17286591404238463, "grad_norm": 2.022492091073739, "learning_rate": 1.8948704256097533e-05, "loss": 0.483, "step": 2182 }, { "epoch": 0.17294513765102001, "grad_norm": 1.6676594627685584, "learning_rate": 1.8947558674748844e-05, "loss": 0.3916, "step": 2183 }, { "epoch": 0.17302436125965537, "grad_norm": 1.8786959682061501, "learning_rate": 1.8946412504246474e-05, "loss": 0.3982, "step": 2184 }, { "epoch": 0.17310358486829075, "grad_norm": 1.7347395440781879, "learning_rate": 1.8945265744665886e-05, "loss": 0.4129, "step": 2185 }, { "epoch": 0.17318280847692613, "grad_norm": 1.839906345040177, "learning_rate": 1.8944118396082594e-05, "loss": 0.4373, "step": 2186 }, { "epoch": 0.17326203208556148, "grad_norm": 1.8808715706556598, "learning_rate": 1.8942970458572138e-05, "loss": 0.3705, "step": 2187 }, { "epoch": 0.17334125569419687, "grad_norm": 1.465523771651254, "learning_rate": 1.894182193221011e-05, "loss": 0.3839, "step": 2188 }, { "epoch": 0.17342047930283225, "grad_norm": 2.045584338401776, "learning_rate": 1.894067281707213e-05, "loss": 0.438, "step": 2189 }, { "epoch": 0.17349970291146763, "grad_norm": 1.7244714163141794, "learning_rate": 1.893952311323387e-05, "loss": 0.3122, "step": 2190 }, { "epoch": 0.17357892652010298, "grad_norm": 2.087892759180738, "learning_rate": 1.8938372820771024e-05, "loss": 0.4992, "step": 2191 }, { "epoch": 0.17365815012873836, "grad_norm": 1.5864066017656742, "learning_rate": 1.8937221939759334e-05, "loss": 0.3433, "step": 2192 }, { "epoch": 0.17373737373737375, "grad_norm": 1.810884640645776, "learning_rate": 1.8936070470274587e-05, "loss": 0.3559, "step": 2193 }, { "epoch": 0.1738165973460091, "grad_norm": 2.034336914336385, "learning_rate": 1.8934918412392596e-05, "loss": 0.4277, "step": 2194 }, { "epoch": 0.17389582095464448, "grad_norm": 2.170366699440581, "learning_rate": 1.893376576618922e-05, "loss": 0.4649, "step": 2195 }, { "epoch": 0.17397504456327986, "grad_norm": 2.1226685532874807, "learning_rate": 1.8932612531740354e-05, "loss": 0.4181, "step": 2196 }, { "epoch": 0.17405426817191524, "grad_norm": 1.9851458292958541, "learning_rate": 1.893145870912193e-05, "loss": 0.3979, "step": 2197 }, { "epoch": 0.1741334917805506, "grad_norm": 1.853482817886539, "learning_rate": 1.8930304298409933e-05, "loss": 0.3808, "step": 2198 }, { "epoch": 0.17421271538918598, "grad_norm": 1.6820010225809787, "learning_rate": 1.8929149299680364e-05, "loss": 0.3381, "step": 2199 }, { "epoch": 0.17429193899782136, "grad_norm": 1.7140596998394408, "learning_rate": 1.8927993713009275e-05, "loss": 0.3823, "step": 2200 }, { "epoch": 0.17437116260645671, "grad_norm": 1.8606979503810697, "learning_rate": 1.892683753847276e-05, "loss": 0.407, "step": 2201 }, { "epoch": 0.1744503862150921, "grad_norm": 1.7655200320709468, "learning_rate": 1.892568077614695e-05, "loss": 0.3668, "step": 2202 }, { "epoch": 0.17452960982372748, "grad_norm": 2.07433274871448, "learning_rate": 1.892452342610801e-05, "loss": 0.5242, "step": 2203 }, { "epoch": 0.17460883343236283, "grad_norm": 2.1122841979181217, "learning_rate": 1.892336548843214e-05, "loss": 0.4286, "step": 2204 }, { "epoch": 0.1746880570409982, "grad_norm": 1.6973340263343, "learning_rate": 1.892220696319559e-05, "loss": 0.347, "step": 2205 }, { "epoch": 0.1747672806496336, "grad_norm": 2.055431202364881, "learning_rate": 1.8921047850474645e-05, "loss": 0.4514, "step": 2206 }, { "epoch": 0.17484650425826898, "grad_norm": 1.7403543808111084, "learning_rate": 1.891988815034562e-05, "loss": 0.3488, "step": 2207 }, { "epoch": 0.17492572786690433, "grad_norm": 2.1790220862502063, "learning_rate": 1.891872786288488e-05, "loss": 0.6366, "step": 2208 }, { "epoch": 0.1750049514755397, "grad_norm": 1.71837675448399, "learning_rate": 1.8917566988168826e-05, "loss": 0.3661, "step": 2209 }, { "epoch": 0.1750841750841751, "grad_norm": 1.6733013712942197, "learning_rate": 1.8916405526273894e-05, "loss": 0.4033, "step": 2210 }, { "epoch": 0.17516339869281045, "grad_norm": 1.9810764788718538, "learning_rate": 1.8915243477276563e-05, "loss": 0.5247, "step": 2211 }, { "epoch": 0.17524262230144583, "grad_norm": 1.623001545856307, "learning_rate": 1.8914080841253348e-05, "loss": 0.3908, "step": 2212 }, { "epoch": 0.1753218459100812, "grad_norm": 2.406920649554192, "learning_rate": 1.8912917618280796e-05, "loss": 0.5585, "step": 2213 }, { "epoch": 0.1754010695187166, "grad_norm": 2.0108397831424187, "learning_rate": 1.8911753808435508e-05, "loss": 0.4854, "step": 2214 }, { "epoch": 0.17548029312735194, "grad_norm": 1.7907118992550852, "learning_rate": 1.891058941179411e-05, "loss": 0.3247, "step": 2215 }, { "epoch": 0.17555951673598733, "grad_norm": 1.5781424876269412, "learning_rate": 1.8909424428433278e-05, "loss": 0.4004, "step": 2216 }, { "epoch": 0.1756387403446227, "grad_norm": 1.7942504313275773, "learning_rate": 1.8908258858429716e-05, "loss": 0.3783, "step": 2217 }, { "epoch": 0.17571796395325806, "grad_norm": 2.010130021109717, "learning_rate": 1.890709270186017e-05, "loss": 0.4597, "step": 2218 }, { "epoch": 0.17579718756189344, "grad_norm": 1.7787478256826252, "learning_rate": 1.890592595880143e-05, "loss": 0.3464, "step": 2219 }, { "epoch": 0.17587641117052882, "grad_norm": 1.8029842841402366, "learning_rate": 1.890475862933032e-05, "loss": 0.5667, "step": 2220 }, { "epoch": 0.17595563477916418, "grad_norm": 1.7161573015522174, "learning_rate": 1.8903590713523698e-05, "loss": 0.3594, "step": 2221 }, { "epoch": 0.17603485838779956, "grad_norm": 1.4311308529672409, "learning_rate": 1.8902422211458466e-05, "loss": 0.3366, "step": 2222 }, { "epoch": 0.17611408199643494, "grad_norm": 1.6691948159474395, "learning_rate": 1.890125312321157e-05, "loss": 0.4429, "step": 2223 }, { "epoch": 0.17619330560507032, "grad_norm": 1.8912113260284953, "learning_rate": 1.8900083448859986e-05, "loss": 0.3895, "step": 2224 }, { "epoch": 0.17627252921370568, "grad_norm": 1.64699710502356, "learning_rate": 1.8898913188480733e-05, "loss": 0.3213, "step": 2225 }, { "epoch": 0.17635175282234106, "grad_norm": 2.556629372696625, "learning_rate": 1.8897742342150863e-05, "loss": 0.5572, "step": 2226 }, { "epoch": 0.17643097643097644, "grad_norm": 2.045487012558576, "learning_rate": 1.8896570909947477e-05, "loss": 0.539, "step": 2227 }, { "epoch": 0.1765102000396118, "grad_norm": 2.0179074922465023, "learning_rate": 1.88953988919477e-05, "loss": 0.3821, "step": 2228 }, { "epoch": 0.17658942364824717, "grad_norm": 1.8211579487525722, "learning_rate": 1.8894226288228707e-05, "loss": 0.4121, "step": 2229 }, { "epoch": 0.17666864725688255, "grad_norm": 2.615598291706729, "learning_rate": 1.8893053098867714e-05, "loss": 0.3657, "step": 2230 }, { "epoch": 0.17674787086551794, "grad_norm": 1.9369807536093724, "learning_rate": 1.889187932394196e-05, "loss": 0.5303, "step": 2231 }, { "epoch": 0.1768270944741533, "grad_norm": 1.6452689917937342, "learning_rate": 1.889070496352874e-05, "loss": 0.4002, "step": 2232 }, { "epoch": 0.17690631808278867, "grad_norm": 1.8744234724362385, "learning_rate": 1.888953001770538e-05, "loss": 0.4721, "step": 2233 }, { "epoch": 0.17698554169142405, "grad_norm": 2.2046879278370866, "learning_rate": 1.8888354486549238e-05, "loss": 0.4631, "step": 2234 }, { "epoch": 0.1770647653000594, "grad_norm": 1.8778360823220894, "learning_rate": 1.888717837013772e-05, "loss": 0.3896, "step": 2235 }, { "epoch": 0.1771439889086948, "grad_norm": 1.8536637664604478, "learning_rate": 1.8886001668548273e-05, "loss": 0.4214, "step": 2236 }, { "epoch": 0.17722321251733017, "grad_norm": 1.6890399945181858, "learning_rate": 1.8884824381858368e-05, "loss": 0.4159, "step": 2237 }, { "epoch": 0.17730243612596555, "grad_norm": 1.7342792892104857, "learning_rate": 1.888364651014553e-05, "loss": 0.3457, "step": 2238 }, { "epoch": 0.1773816597346009, "grad_norm": 1.7625808294094478, "learning_rate": 1.888246805348732e-05, "loss": 0.3966, "step": 2239 }, { "epoch": 0.1774608833432363, "grad_norm": 1.7118432210807184, "learning_rate": 1.8881289011961323e-05, "loss": 0.4413, "step": 2240 }, { "epoch": 0.17754010695187167, "grad_norm": 1.838074358096047, "learning_rate": 1.8880109385645184e-05, "loss": 0.4436, "step": 2241 }, { "epoch": 0.17761933056050702, "grad_norm": 1.633478721072027, "learning_rate": 1.8878929174616566e-05, "loss": 0.3417, "step": 2242 }, { "epoch": 0.1776985541691424, "grad_norm": 1.9819808143150355, "learning_rate": 1.887774837895318e-05, "loss": 0.3732, "step": 2243 }, { "epoch": 0.17777777777777778, "grad_norm": 1.6699514643121105, "learning_rate": 1.887656699873279e-05, "loss": 0.5105, "step": 2244 }, { "epoch": 0.17785700138641314, "grad_norm": 1.7868109276929764, "learning_rate": 1.887538503403317e-05, "loss": 0.4515, "step": 2245 }, { "epoch": 0.17793622499504852, "grad_norm": 2.029234407698369, "learning_rate": 1.8874202484932148e-05, "loss": 0.6043, "step": 2246 }, { "epoch": 0.1780154486036839, "grad_norm": 1.9147865702362916, "learning_rate": 1.8873019351507596e-05, "loss": 0.501, "step": 2247 }, { "epoch": 0.17809467221231928, "grad_norm": 1.8624715434446462, "learning_rate": 1.887183563383741e-05, "loss": 0.4854, "step": 2248 }, { "epoch": 0.17817389582095464, "grad_norm": 1.556742610453188, "learning_rate": 1.8870651331999542e-05, "loss": 0.3313, "step": 2249 }, { "epoch": 0.17825311942959002, "grad_norm": 1.841256103307755, "learning_rate": 1.886946644607196e-05, "loss": 0.3702, "step": 2250 }, { "epoch": 0.1783323430382254, "grad_norm": 1.6772844439285954, "learning_rate": 1.8868280976132697e-05, "loss": 0.361, "step": 2251 }, { "epoch": 0.17841156664686075, "grad_norm": 1.6568975210615398, "learning_rate": 1.8867094922259798e-05, "loss": 0.3319, "step": 2252 }, { "epoch": 0.17849079025549613, "grad_norm": 1.5713953858385998, "learning_rate": 1.8865908284531368e-05, "loss": 0.3441, "step": 2253 }, { "epoch": 0.17857001386413152, "grad_norm": 1.8025363517663684, "learning_rate": 1.8864721063025536e-05, "loss": 0.4084, "step": 2254 }, { "epoch": 0.1786492374727669, "grad_norm": 1.7126671880469564, "learning_rate": 1.8863533257820475e-05, "loss": 0.3185, "step": 2255 }, { "epoch": 0.17872846108140225, "grad_norm": 1.7851946841043358, "learning_rate": 1.8862344868994395e-05, "loss": 0.4637, "step": 2256 }, { "epoch": 0.17880768469003763, "grad_norm": 1.5654988003945565, "learning_rate": 1.8861155896625553e-05, "loss": 0.3847, "step": 2257 }, { "epoch": 0.17888690829867301, "grad_norm": 1.9567725757796477, "learning_rate": 1.885996634079223e-05, "loss": 0.398, "step": 2258 }, { "epoch": 0.17896613190730837, "grad_norm": 1.6956798680506862, "learning_rate": 1.8858776201572758e-05, "loss": 0.4264, "step": 2259 }, { "epoch": 0.17904535551594375, "grad_norm": 1.6010016102637785, "learning_rate": 1.8857585479045493e-05, "loss": 0.2754, "step": 2260 }, { "epoch": 0.17912457912457913, "grad_norm": 2.4283995901520323, "learning_rate": 1.8856394173288848e-05, "loss": 0.5484, "step": 2261 }, { "epoch": 0.17920380273321448, "grad_norm": 1.8478325552885637, "learning_rate": 1.8855202284381264e-05, "loss": 0.4718, "step": 2262 }, { "epoch": 0.17928302634184987, "grad_norm": 1.6448562805599607, "learning_rate": 1.8854009812401213e-05, "loss": 0.355, "step": 2263 }, { "epoch": 0.17936224995048525, "grad_norm": 1.7881800379246628, "learning_rate": 1.885281675742722e-05, "loss": 0.4924, "step": 2264 }, { "epoch": 0.17944147355912063, "grad_norm": 1.4895489922092775, "learning_rate": 1.885162311953784e-05, "loss": 0.3156, "step": 2265 }, { "epoch": 0.17952069716775598, "grad_norm": 1.5722179443363025, "learning_rate": 1.885042889881167e-05, "loss": 0.3398, "step": 2266 }, { "epoch": 0.17959992077639136, "grad_norm": 2.073310183346257, "learning_rate": 1.8849234095327343e-05, "loss": 0.5563, "step": 2267 }, { "epoch": 0.17967914438502675, "grad_norm": 2.7623455888483717, "learning_rate": 1.884803870916353e-05, "loss": 0.4301, "step": 2268 }, { "epoch": 0.1797583679936621, "grad_norm": 1.8925565254847532, "learning_rate": 1.884684274039894e-05, "loss": 0.4399, "step": 2269 }, { "epoch": 0.17983759160229748, "grad_norm": 2.0557675513682216, "learning_rate": 1.8845646189112327e-05, "loss": 0.4875, "step": 2270 }, { "epoch": 0.17991681521093286, "grad_norm": 1.898396110131812, "learning_rate": 1.8844449055382473e-05, "loss": 0.3867, "step": 2271 }, { "epoch": 0.17999603881956824, "grad_norm": 1.7200747344175484, "learning_rate": 1.8843251339288207e-05, "loss": 0.3697, "step": 2272 }, { "epoch": 0.1800752624282036, "grad_norm": 1.7646438533908584, "learning_rate": 1.884205304090839e-05, "loss": 0.4222, "step": 2273 }, { "epoch": 0.18015448603683898, "grad_norm": 2.07752988484997, "learning_rate": 1.8840854160321926e-05, "loss": 0.309, "step": 2274 }, { "epoch": 0.18023370964547436, "grad_norm": 1.9290459038600012, "learning_rate": 1.8839654697607756e-05, "loss": 0.3583, "step": 2275 }, { "epoch": 0.18031293325410971, "grad_norm": 1.9685104820992123, "learning_rate": 1.8838454652844857e-05, "loss": 0.3555, "step": 2276 }, { "epoch": 0.1803921568627451, "grad_norm": 1.53254060447503, "learning_rate": 1.8837254026112245e-05, "loss": 0.2805, "step": 2277 }, { "epoch": 0.18047138047138048, "grad_norm": 1.4558650278677392, "learning_rate": 1.883605281748898e-05, "loss": 0.3279, "step": 2278 }, { "epoch": 0.18055060408001586, "grad_norm": 1.4837920476190676, "learning_rate": 1.8834851027054152e-05, "loss": 0.3208, "step": 2279 }, { "epoch": 0.1806298276886512, "grad_norm": 1.7923882954292831, "learning_rate": 1.8833648654886898e-05, "loss": 0.5319, "step": 2280 }, { "epoch": 0.1807090512972866, "grad_norm": 1.6702166683689448, "learning_rate": 1.883244570106638e-05, "loss": 0.3297, "step": 2281 }, { "epoch": 0.18078827490592198, "grad_norm": 1.6943542703288907, "learning_rate": 1.8831242165671816e-05, "loss": 0.3677, "step": 2282 }, { "epoch": 0.18086749851455733, "grad_norm": 1.8095776228294747, "learning_rate": 1.8830038048782445e-05, "loss": 0.4291, "step": 2283 }, { "epoch": 0.1809467221231927, "grad_norm": 2.104819798483281, "learning_rate": 1.8828833350477556e-05, "loss": 0.3804, "step": 2284 }, { "epoch": 0.1810259457318281, "grad_norm": 2.3131001287007913, "learning_rate": 1.8827628070836477e-05, "loss": 0.5896, "step": 2285 }, { "epoch": 0.18110516934046345, "grad_norm": 1.433658407859947, "learning_rate": 1.8826422209938563e-05, "loss": 0.2805, "step": 2286 }, { "epoch": 0.18118439294909883, "grad_norm": 2.161279939067316, "learning_rate": 1.8825215767863215e-05, "loss": 0.4456, "step": 2287 }, { "epoch": 0.1812636165577342, "grad_norm": 2.216487613370055, "learning_rate": 1.8824008744689873e-05, "loss": 0.4306, "step": 2288 }, { "epoch": 0.1813428401663696, "grad_norm": 2.0995617196942513, "learning_rate": 1.8822801140498014e-05, "loss": 0.4203, "step": 2289 }, { "epoch": 0.18142206377500494, "grad_norm": 1.8671765245175258, "learning_rate": 1.8821592955367154e-05, "loss": 0.5573, "step": 2290 }, { "epoch": 0.18150128738364033, "grad_norm": 2.2008061758558997, "learning_rate": 1.8820384189376845e-05, "loss": 0.506, "step": 2291 }, { "epoch": 0.1815805109922757, "grad_norm": 1.8644571066637654, "learning_rate": 1.8819174842606675e-05, "loss": 0.3825, "step": 2292 }, { "epoch": 0.18165973460091106, "grad_norm": 1.6594249224595061, "learning_rate": 1.8817964915136277e-05, "loss": 0.3296, "step": 2293 }, { "epoch": 0.18173895820954644, "grad_norm": 2.129810835139294, "learning_rate": 1.881675440704532e-05, "loss": 0.3965, "step": 2294 }, { "epoch": 0.18181818181818182, "grad_norm": 1.5873513756967357, "learning_rate": 1.881554331841351e-05, "loss": 0.3441, "step": 2295 }, { "epoch": 0.1818974054268172, "grad_norm": 2.0750169233732354, "learning_rate": 1.881433164932059e-05, "loss": 0.4623, "step": 2296 }, { "epoch": 0.18197662903545256, "grad_norm": 1.7549889422274445, "learning_rate": 1.881311939984634e-05, "loss": 0.3912, "step": 2297 }, { "epoch": 0.18205585264408794, "grad_norm": 1.573779645385025, "learning_rate": 1.8811906570070583e-05, "loss": 0.3316, "step": 2298 }, { "epoch": 0.18213507625272332, "grad_norm": 2.0709601923959, "learning_rate": 1.8810693160073184e-05, "loss": 0.4057, "step": 2299 }, { "epoch": 0.18221429986135868, "grad_norm": 2.097543494234802, "learning_rate": 1.880947916993403e-05, "loss": 0.3673, "step": 2300 }, { "epoch": 0.18229352346999406, "grad_norm": 1.5298726500406574, "learning_rate": 1.8808264599733065e-05, "loss": 0.2659, "step": 2301 }, { "epoch": 0.18237274707862944, "grad_norm": 1.5216584193933604, "learning_rate": 1.8807049449550254e-05, "loss": 0.2266, "step": 2302 }, { "epoch": 0.1824519706872648, "grad_norm": 1.9837962569782617, "learning_rate": 1.8805833719465617e-05, "loss": 0.4794, "step": 2303 }, { "epoch": 0.18253119429590017, "grad_norm": 2.0585730297186937, "learning_rate": 1.88046174095592e-05, "loss": 0.4592, "step": 2304 }, { "epoch": 0.18261041790453555, "grad_norm": 1.7416208978356076, "learning_rate": 1.880340051991109e-05, "loss": 0.3332, "step": 2305 }, { "epoch": 0.18268964151317094, "grad_norm": 2.265459999298335, "learning_rate": 1.8802183050601417e-05, "loss": 0.4646, "step": 2306 }, { "epoch": 0.1827688651218063, "grad_norm": 2.203347640226268, "learning_rate": 1.8800965001710342e-05, "loss": 0.5617, "step": 2307 }, { "epoch": 0.18284808873044167, "grad_norm": 1.7305201048658656, "learning_rate": 1.879974637331807e-05, "loss": 0.4589, "step": 2308 }, { "epoch": 0.18292731233907705, "grad_norm": 1.9013759269040318, "learning_rate": 1.879852716550484e-05, "loss": 0.4891, "step": 2309 }, { "epoch": 0.1830065359477124, "grad_norm": 1.5753954653956812, "learning_rate": 1.8797307378350935e-05, "loss": 0.4267, "step": 2310 }, { "epoch": 0.1830857595563478, "grad_norm": 1.8518004712075684, "learning_rate": 1.8796087011936665e-05, "loss": 0.4864, "step": 2311 }, { "epoch": 0.18316498316498317, "grad_norm": 1.899141150680386, "learning_rate": 1.8794866066342394e-05, "loss": 0.4532, "step": 2312 }, { "epoch": 0.18324420677361855, "grad_norm": 2.040495096945813, "learning_rate": 1.879364454164851e-05, "loss": 0.3518, "step": 2313 }, { "epoch": 0.1833234303822539, "grad_norm": 1.7950597727607651, "learning_rate": 1.879242243793544e-05, "loss": 0.4003, "step": 2314 }, { "epoch": 0.18340265399088929, "grad_norm": 1.8331303240602623, "learning_rate": 1.8791199755283664e-05, "loss": 0.3965, "step": 2315 }, { "epoch": 0.18348187759952467, "grad_norm": 1.7683662036436285, "learning_rate": 1.878997649377368e-05, "loss": 0.4382, "step": 2316 }, { "epoch": 0.18356110120816002, "grad_norm": 1.9893096676356994, "learning_rate": 1.8788752653486045e-05, "loss": 0.5534, "step": 2317 }, { "epoch": 0.1836403248167954, "grad_norm": 2.5506165340396256, "learning_rate": 1.878752823450133e-05, "loss": 0.5076, "step": 2318 }, { "epoch": 0.18371954842543078, "grad_norm": 1.6382932677607578, "learning_rate": 1.878630323690017e-05, "loss": 0.2987, "step": 2319 }, { "epoch": 0.18379877203406614, "grad_norm": 1.7383952777398874, "learning_rate": 1.8785077660763217e-05, "loss": 0.3365, "step": 2320 }, { "epoch": 0.18387799564270152, "grad_norm": 1.8154240396472983, "learning_rate": 1.8783851506171166e-05, "loss": 0.3896, "step": 2321 }, { "epoch": 0.1839572192513369, "grad_norm": 1.7578521501095088, "learning_rate": 1.8782624773204764e-05, "loss": 0.3561, "step": 2322 }, { "epoch": 0.18403644285997228, "grad_norm": 2.0584519642640684, "learning_rate": 1.8781397461944777e-05, "loss": 0.3873, "step": 2323 }, { "epoch": 0.18411566646860764, "grad_norm": 1.7436913212805967, "learning_rate": 1.8780169572472024e-05, "loss": 0.4053, "step": 2324 }, { "epoch": 0.18419489007724302, "grad_norm": 1.5158083241245237, "learning_rate": 1.8778941104867347e-05, "loss": 0.256, "step": 2325 }, { "epoch": 0.1842741136858784, "grad_norm": 1.9183068256449582, "learning_rate": 1.8777712059211643e-05, "loss": 0.4625, "step": 2326 }, { "epoch": 0.18435333729451375, "grad_norm": 1.7576413609117723, "learning_rate": 1.8776482435585836e-05, "loss": 0.4278, "step": 2327 }, { "epoch": 0.18443256090314913, "grad_norm": 2.419763721225595, "learning_rate": 1.877525223407089e-05, "loss": 0.4879, "step": 2328 }, { "epoch": 0.18451178451178452, "grad_norm": 1.6680153888882208, "learning_rate": 1.877402145474781e-05, "loss": 0.4523, "step": 2329 }, { "epoch": 0.1845910081204199, "grad_norm": 1.9967123683697205, "learning_rate": 1.877279009769763e-05, "loss": 0.5596, "step": 2330 }, { "epoch": 0.18467023172905525, "grad_norm": 1.927182763885645, "learning_rate": 1.8771558163001438e-05, "loss": 0.4686, "step": 2331 }, { "epoch": 0.18474945533769063, "grad_norm": 1.747050931247662, "learning_rate": 1.8770325650740347e-05, "loss": 0.3459, "step": 2332 }, { "epoch": 0.184828678946326, "grad_norm": 1.9548417004629488, "learning_rate": 1.876909256099551e-05, "loss": 0.4088, "step": 2333 }, { "epoch": 0.18490790255496137, "grad_norm": 2.144296732351858, "learning_rate": 1.876785889384812e-05, "loss": 0.4656, "step": 2334 }, { "epoch": 0.18498712616359675, "grad_norm": 2.031294617666716, "learning_rate": 1.8766624649379415e-05, "loss": 0.4511, "step": 2335 }, { "epoch": 0.18506634977223213, "grad_norm": 2.1992577270489178, "learning_rate": 1.8765389827670657e-05, "loss": 0.5271, "step": 2336 }, { "epoch": 0.1851455733808675, "grad_norm": 1.8328758846218425, "learning_rate": 1.8764154428803155e-05, "loss": 0.3273, "step": 2337 }, { "epoch": 0.18522479698950287, "grad_norm": 1.7319106122120445, "learning_rate": 1.8762918452858256e-05, "loss": 0.3204, "step": 2338 }, { "epoch": 0.18530402059813825, "grad_norm": 1.7051350317911467, "learning_rate": 1.876168189991734e-05, "loss": 0.3261, "step": 2339 }, { "epoch": 0.18538324420677363, "grad_norm": 1.635363196019197, "learning_rate": 1.876044477006183e-05, "loss": 0.3294, "step": 2340 }, { "epoch": 0.18546246781540898, "grad_norm": 1.8682510709027873, "learning_rate": 1.8759207063373183e-05, "loss": 0.4412, "step": 2341 }, { "epoch": 0.18554169142404436, "grad_norm": 1.9523791002269242, "learning_rate": 1.87579687799329e-05, "loss": 0.3822, "step": 2342 }, { "epoch": 0.18562091503267975, "grad_norm": 2.1880988821770866, "learning_rate": 1.875672991982251e-05, "loss": 0.4145, "step": 2343 }, { "epoch": 0.1857001386413151, "grad_norm": 1.7331520326443695, "learning_rate": 1.875549048312359e-05, "loss": 0.3571, "step": 2344 }, { "epoch": 0.18577936224995048, "grad_norm": 2.275734147404578, "learning_rate": 1.8754250469917753e-05, "loss": 0.6173, "step": 2345 }, { "epoch": 0.18585858585858586, "grad_norm": 1.9917886538701886, "learning_rate": 1.8753009880286647e-05, "loss": 0.4861, "step": 2346 }, { "epoch": 0.18593780946722124, "grad_norm": 2.156289567617249, "learning_rate": 1.8751768714311952e-05, "loss": 0.4442, "step": 2347 }, { "epoch": 0.1860170330758566, "grad_norm": 1.8264008770166533, "learning_rate": 1.87505269720754e-05, "loss": 0.4578, "step": 2348 }, { "epoch": 0.18609625668449198, "grad_norm": 1.687707281552878, "learning_rate": 1.8749284653658754e-05, "loss": 0.4556, "step": 2349 }, { "epoch": 0.18617548029312736, "grad_norm": 1.8904634684342112, "learning_rate": 1.874804175914381e-05, "loss": 0.4188, "step": 2350 }, { "epoch": 0.1862547039017627, "grad_norm": 1.5538120984870052, "learning_rate": 1.8746798288612405e-05, "loss": 0.3356, "step": 2351 }, { "epoch": 0.1863339275103981, "grad_norm": 1.8939729130562217, "learning_rate": 1.8745554242146428e-05, "loss": 0.4321, "step": 2352 }, { "epoch": 0.18641315111903348, "grad_norm": 1.781224552420944, "learning_rate": 1.874430961982778e-05, "loss": 0.4184, "step": 2353 }, { "epoch": 0.18649237472766886, "grad_norm": 1.670711878932282, "learning_rate": 1.874306442173842e-05, "loss": 0.391, "step": 2354 }, { "epoch": 0.1865715983363042, "grad_norm": 1.6724785368717014, "learning_rate": 1.8741818647960337e-05, "loss": 0.3589, "step": 2355 }, { "epoch": 0.1866508219449396, "grad_norm": 1.7555861838819646, "learning_rate": 1.8740572298575558e-05, "loss": 0.3607, "step": 2356 }, { "epoch": 0.18673004555357497, "grad_norm": 1.9545398762094552, "learning_rate": 1.8739325373666152e-05, "loss": 0.4158, "step": 2357 }, { "epoch": 0.18680926916221033, "grad_norm": 1.7468752504306375, "learning_rate": 1.8738077873314218e-05, "loss": 0.3636, "step": 2358 }, { "epoch": 0.1868884927708457, "grad_norm": 2.06051487039993, "learning_rate": 1.8736829797601903e-05, "loss": 0.5174, "step": 2359 }, { "epoch": 0.1869677163794811, "grad_norm": 2.0622390544462497, "learning_rate": 1.8735581146611387e-05, "loss": 0.4825, "step": 2360 }, { "epoch": 0.18704693998811645, "grad_norm": 1.8789107491141992, "learning_rate": 1.873433192042488e-05, "loss": 0.3824, "step": 2361 }, { "epoch": 0.18712616359675183, "grad_norm": 1.6979003987512717, "learning_rate": 1.8733082119124646e-05, "loss": 0.3843, "step": 2362 }, { "epoch": 0.1872053872053872, "grad_norm": 2.035126859611104, "learning_rate": 1.8731831742792974e-05, "loss": 0.5086, "step": 2363 }, { "epoch": 0.1872846108140226, "grad_norm": 1.9832439389715169, "learning_rate": 1.87305807915122e-05, "loss": 0.4475, "step": 2364 }, { "epoch": 0.18736383442265794, "grad_norm": 1.8418365032068778, "learning_rate": 1.8729329265364685e-05, "loss": 0.413, "step": 2365 }, { "epoch": 0.18744305803129332, "grad_norm": 1.8788030236637507, "learning_rate": 1.8728077164432844e-05, "loss": 0.4368, "step": 2366 }, { "epoch": 0.1875222816399287, "grad_norm": 1.7870952935908009, "learning_rate": 1.872682448879912e-05, "loss": 0.325, "step": 2367 }, { "epoch": 0.18760150524856406, "grad_norm": 1.7754411529992522, "learning_rate": 1.8725571238545992e-05, "loss": 0.3682, "step": 2368 }, { "epoch": 0.18768072885719944, "grad_norm": 1.7655766330990534, "learning_rate": 1.872431741375598e-05, "loss": 0.4078, "step": 2369 }, { "epoch": 0.18775995246583482, "grad_norm": 1.6194993772848008, "learning_rate": 1.872306301451165e-05, "loss": 0.268, "step": 2370 }, { "epoch": 0.1878391760744702, "grad_norm": 1.7114325339534924, "learning_rate": 1.872180804089559e-05, "loss": 0.3151, "step": 2371 }, { "epoch": 0.18791839968310556, "grad_norm": 1.8473532529845424, "learning_rate": 1.8720552492990438e-05, "loss": 0.3935, "step": 2372 }, { "epoch": 0.18799762329174094, "grad_norm": 1.8641753718218268, "learning_rate": 1.8719296370878866e-05, "loss": 0.3951, "step": 2373 }, { "epoch": 0.18807684690037632, "grad_norm": 1.8870948327931774, "learning_rate": 1.871803967464358e-05, "loss": 0.3629, "step": 2374 }, { "epoch": 0.18815607050901167, "grad_norm": 2.196962506361073, "learning_rate": 1.8716782404367333e-05, "loss": 0.3604, "step": 2375 }, { "epoch": 0.18823529411764706, "grad_norm": 1.7294808074470036, "learning_rate": 1.8715524560132906e-05, "loss": 0.3808, "step": 2376 }, { "epoch": 0.18831451772628244, "grad_norm": 2.0256550860295954, "learning_rate": 1.8714266142023124e-05, "loss": 0.4235, "step": 2377 }, { "epoch": 0.18839374133491782, "grad_norm": 1.7510176391458774, "learning_rate": 1.8713007150120846e-05, "loss": 0.375, "step": 2378 }, { "epoch": 0.18847296494355317, "grad_norm": 1.892410094595781, "learning_rate": 1.871174758450897e-05, "loss": 0.4023, "step": 2379 }, { "epoch": 0.18855218855218855, "grad_norm": 1.9373251719769788, "learning_rate": 1.8710487445270436e-05, "loss": 0.4978, "step": 2380 }, { "epoch": 0.18863141216082394, "grad_norm": 2.0180704993445766, "learning_rate": 1.8709226732488216e-05, "loss": 0.3978, "step": 2381 }, { "epoch": 0.1887106357694593, "grad_norm": 1.7390286886625042, "learning_rate": 1.8707965446245317e-05, "loss": 0.3696, "step": 2382 }, { "epoch": 0.18878985937809467, "grad_norm": 1.8298931479275429, "learning_rate": 1.87067035866248e-05, "loss": 0.3391, "step": 2383 }, { "epoch": 0.18886908298673005, "grad_norm": 1.4915163930334776, "learning_rate": 1.8705441153709742e-05, "loss": 0.3487, "step": 2384 }, { "epoch": 0.1889483065953654, "grad_norm": 1.7732297991573869, "learning_rate": 1.8704178147583273e-05, "loss": 0.4103, "step": 2385 }, { "epoch": 0.1890275302040008, "grad_norm": 1.5095699225614836, "learning_rate": 1.8702914568328555e-05, "loss": 0.375, "step": 2386 }, { "epoch": 0.18910675381263617, "grad_norm": 1.4673082236948103, "learning_rate": 1.8701650416028788e-05, "loss": 0.2898, "step": 2387 }, { "epoch": 0.18918597742127155, "grad_norm": 1.7899505532539894, "learning_rate": 1.870038569076721e-05, "loss": 0.3584, "step": 2388 }, { "epoch": 0.1892652010299069, "grad_norm": 1.7333453460030324, "learning_rate": 1.86991203926271e-05, "loss": 0.3415, "step": 2389 }, { "epoch": 0.18934442463854229, "grad_norm": 2.0920851841960606, "learning_rate": 1.8697854521691767e-05, "loss": 0.4613, "step": 2390 }, { "epoch": 0.18942364824717767, "grad_norm": 2.2864957706129863, "learning_rate": 1.8696588078044566e-05, "loss": 0.4501, "step": 2391 }, { "epoch": 0.18950287185581302, "grad_norm": 2.063547774954329, "learning_rate": 1.8695321061768886e-05, "loss": 0.4536, "step": 2392 }, { "epoch": 0.1895820954644484, "grad_norm": 1.6002743802277264, "learning_rate": 1.8694053472948154e-05, "loss": 0.3797, "step": 2393 }, { "epoch": 0.18966131907308378, "grad_norm": 1.888273259717488, "learning_rate": 1.8692785311665835e-05, "loss": 0.3678, "step": 2394 }, { "epoch": 0.18974054268171917, "grad_norm": 1.635737304559724, "learning_rate": 1.8691516578005426e-05, "loss": 0.3704, "step": 2395 }, { "epoch": 0.18981976629035452, "grad_norm": 1.4483407927362657, "learning_rate": 1.8690247272050474e-05, "loss": 0.2832, "step": 2396 }, { "epoch": 0.1898989898989899, "grad_norm": 1.5334735991646946, "learning_rate": 1.8688977393884555e-05, "loss": 0.3018, "step": 2397 }, { "epoch": 0.18997821350762528, "grad_norm": 1.9058660957587708, "learning_rate": 1.868770694359128e-05, "loss": 0.3983, "step": 2398 }, { "epoch": 0.19005743711626064, "grad_norm": 2.0388866409711555, "learning_rate": 1.868643592125431e-05, "loss": 0.4391, "step": 2399 }, { "epoch": 0.19013666072489602, "grad_norm": 2.287289222407347, "learning_rate": 1.8685164326957327e-05, "loss": 0.5266, "step": 2400 }, { "epoch": 0.1902158843335314, "grad_norm": 1.932741715497152, "learning_rate": 1.8683892160784066e-05, "loss": 0.4238, "step": 2401 }, { "epoch": 0.19029510794216675, "grad_norm": 1.8191490322617931, "learning_rate": 1.868261942281829e-05, "loss": 0.4583, "step": 2402 }, { "epoch": 0.19037433155080213, "grad_norm": 2.303602836911234, "learning_rate": 1.86813461131438e-05, "loss": 0.4993, "step": 2403 }, { "epoch": 0.19045355515943752, "grad_norm": 1.7777739596052975, "learning_rate": 1.8680072231844445e-05, "loss": 0.3685, "step": 2404 }, { "epoch": 0.1905327787680729, "grad_norm": 1.4140370681319647, "learning_rate": 1.8678797779004096e-05, "loss": 0.3052, "step": 2405 }, { "epoch": 0.19061200237670825, "grad_norm": 1.8499230873998467, "learning_rate": 1.8677522754706677e-05, "loss": 0.4748, "step": 2406 }, { "epoch": 0.19069122598534363, "grad_norm": 1.6907549028453686, "learning_rate": 1.8676247159036132e-05, "loss": 0.413, "step": 2407 }, { "epoch": 0.190770449593979, "grad_norm": 1.957102796744527, "learning_rate": 1.8674970992076465e-05, "loss": 0.3538, "step": 2408 }, { "epoch": 0.19084967320261437, "grad_norm": 2.1751572747829657, "learning_rate": 1.8673694253911696e-05, "loss": 0.5967, "step": 2409 }, { "epoch": 0.19092889681124975, "grad_norm": 1.8679825237373502, "learning_rate": 1.8672416944625896e-05, "loss": 0.4704, "step": 2410 }, { "epoch": 0.19100812041988513, "grad_norm": 1.8916616445963068, "learning_rate": 1.867113906430317e-05, "loss": 0.4638, "step": 2411 }, { "epoch": 0.1910873440285205, "grad_norm": 1.5037310405266053, "learning_rate": 1.8669860613027657e-05, "loss": 0.343, "step": 2412 }, { "epoch": 0.19116656763715587, "grad_norm": 2.342439312107435, "learning_rate": 1.8668581590883544e-05, "loss": 0.5825, "step": 2413 }, { "epoch": 0.19124579124579125, "grad_norm": 2.116267144376701, "learning_rate": 1.8667301997955038e-05, "loss": 0.4826, "step": 2414 }, { "epoch": 0.19132501485442663, "grad_norm": 1.789201147244534, "learning_rate": 1.8666021834326404e-05, "loss": 0.4607, "step": 2415 }, { "epoch": 0.19140423846306198, "grad_norm": 2.1631592228638614, "learning_rate": 1.866474110008193e-05, "loss": 0.4965, "step": 2416 }, { "epoch": 0.19148346207169736, "grad_norm": 1.7570021410851158, "learning_rate": 1.8663459795305946e-05, "loss": 0.4359, "step": 2417 }, { "epoch": 0.19156268568033274, "grad_norm": 1.7415731620720836, "learning_rate": 1.866217792008282e-05, "loss": 0.4227, "step": 2418 }, { "epoch": 0.1916419092889681, "grad_norm": 1.6731262756305025, "learning_rate": 1.866089547449696e-05, "loss": 0.3284, "step": 2419 }, { "epoch": 0.19172113289760348, "grad_norm": 1.9295648640892022, "learning_rate": 1.8659612458632802e-05, "loss": 0.4063, "step": 2420 }, { "epoch": 0.19180035650623886, "grad_norm": 1.6838959609857238, "learning_rate": 1.8658328872574833e-05, "loss": 0.3641, "step": 2421 }, { "epoch": 0.19187958011487424, "grad_norm": 1.8323934603776113, "learning_rate": 1.8657044716407573e-05, "loss": 0.4275, "step": 2422 }, { "epoch": 0.1919588037235096, "grad_norm": 1.8700447133185338, "learning_rate": 1.865575999021557e-05, "loss": 0.534, "step": 2423 }, { "epoch": 0.19203802733214498, "grad_norm": 2.230692013912159, "learning_rate": 1.8654474694083416e-05, "loss": 0.4678, "step": 2424 }, { "epoch": 0.19211725094078036, "grad_norm": 1.8800928379643222, "learning_rate": 1.8653188828095754e-05, "loss": 0.3926, "step": 2425 }, { "epoch": 0.1921964745494157, "grad_norm": 2.064351405857103, "learning_rate": 1.865190239233724e-05, "loss": 0.4101, "step": 2426 }, { "epoch": 0.1922756981580511, "grad_norm": 1.8977278493163616, "learning_rate": 1.8650615386892587e-05, "loss": 0.3694, "step": 2427 }, { "epoch": 0.19235492176668648, "grad_norm": 1.909276467843947, "learning_rate": 1.8649327811846533e-05, "loss": 0.5535, "step": 2428 }, { "epoch": 0.19243414537532186, "grad_norm": 1.5399040700323343, "learning_rate": 1.8648039667283857e-05, "loss": 0.4441, "step": 2429 }, { "epoch": 0.1925133689839572, "grad_norm": 1.6651636991409458, "learning_rate": 1.8646750953289384e-05, "loss": 0.3652, "step": 2430 }, { "epoch": 0.1925925925925926, "grad_norm": 1.6815956236704828, "learning_rate": 1.8645461669947966e-05, "loss": 0.3155, "step": 2431 }, { "epoch": 0.19267181620122797, "grad_norm": 1.6718262583049504, "learning_rate": 1.8644171817344497e-05, "loss": 0.3481, "step": 2432 }, { "epoch": 0.19275103980986333, "grad_norm": 2.7799199239515766, "learning_rate": 1.8642881395563904e-05, "loss": 0.3483, "step": 2433 }, { "epoch": 0.1928302634184987, "grad_norm": 1.9756974054244152, "learning_rate": 1.864159040469116e-05, "loss": 0.4401, "step": 2434 }, { "epoch": 0.1929094870271341, "grad_norm": 1.8071163200499114, "learning_rate": 1.864029884481127e-05, "loss": 0.4145, "step": 2435 }, { "epoch": 0.19298871063576947, "grad_norm": 2.543240114206302, "learning_rate": 1.8639006716009275e-05, "loss": 0.5372, "step": 2436 }, { "epoch": 0.19306793424440483, "grad_norm": 2.228588993655016, "learning_rate": 1.8637714018370255e-05, "loss": 0.4201, "step": 2437 }, { "epoch": 0.1931471578530402, "grad_norm": 1.8494149784248002, "learning_rate": 1.8636420751979328e-05, "loss": 0.4531, "step": 2438 }, { "epoch": 0.1932263814616756, "grad_norm": 2.0253791399897874, "learning_rate": 1.863512691692165e-05, "loss": 0.456, "step": 2439 }, { "epoch": 0.19330560507031094, "grad_norm": 1.6588983512442654, "learning_rate": 1.863383251328242e-05, "loss": 0.424, "step": 2440 }, { "epoch": 0.19338482867894632, "grad_norm": 1.8389834487870507, "learning_rate": 1.8632537541146856e-05, "loss": 0.3149, "step": 2441 }, { "epoch": 0.1934640522875817, "grad_norm": 1.9858381854276652, "learning_rate": 1.8631242000600235e-05, "loss": 0.5007, "step": 2442 }, { "epoch": 0.19354327589621706, "grad_norm": 2.266124060357365, "learning_rate": 1.8629945891727856e-05, "loss": 0.4956, "step": 2443 }, { "epoch": 0.19362249950485244, "grad_norm": 1.593089901704416, "learning_rate": 1.8628649214615066e-05, "loss": 0.3618, "step": 2444 }, { "epoch": 0.19370172311348782, "grad_norm": 1.679824462137367, "learning_rate": 1.8627351969347246e-05, "loss": 0.3759, "step": 2445 }, { "epoch": 0.1937809467221232, "grad_norm": 1.8874286946173535, "learning_rate": 1.8626054156009807e-05, "loss": 0.4039, "step": 2446 }, { "epoch": 0.19386017033075856, "grad_norm": 1.7848707595708873, "learning_rate": 1.862475577468821e-05, "loss": 0.3609, "step": 2447 }, { "epoch": 0.19393939393939394, "grad_norm": 1.8519651811855438, "learning_rate": 1.8623456825467948e-05, "loss": 0.4146, "step": 2448 }, { "epoch": 0.19401861754802932, "grad_norm": 1.7979330256148234, "learning_rate": 1.8622157308434544e-05, "loss": 0.4959, "step": 2449 }, { "epoch": 0.19409784115666467, "grad_norm": 1.966534088434029, "learning_rate": 1.8620857223673567e-05, "loss": 0.4544, "step": 2450 }, { "epoch": 0.19417706476530006, "grad_norm": 1.9639912803367112, "learning_rate": 1.8619556571270624e-05, "loss": 0.357, "step": 2451 }, { "epoch": 0.19425628837393544, "grad_norm": 2.1894482909531607, "learning_rate": 1.8618255351311355e-05, "loss": 0.3974, "step": 2452 }, { "epoch": 0.19433551198257082, "grad_norm": 1.7694374669375357, "learning_rate": 1.8616953563881444e-05, "loss": 0.4088, "step": 2453 }, { "epoch": 0.19441473559120617, "grad_norm": 1.7142932207290247, "learning_rate": 1.8615651209066598e-05, "loss": 0.3559, "step": 2454 }, { "epoch": 0.19449395919984155, "grad_norm": 1.9300198351645033, "learning_rate": 1.8614348286952577e-05, "loss": 0.3958, "step": 2455 }, { "epoch": 0.19457318280847694, "grad_norm": 2.214933640628995, "learning_rate": 1.8613044797625173e-05, "loss": 0.4106, "step": 2456 }, { "epoch": 0.1946524064171123, "grad_norm": 1.6909762560643709, "learning_rate": 1.861174074117021e-05, "loss": 0.367, "step": 2457 }, { "epoch": 0.19473163002574767, "grad_norm": 1.9584800927859016, "learning_rate": 1.8610436117673557e-05, "loss": 0.3804, "step": 2458 }, { "epoch": 0.19481085363438305, "grad_norm": 2.2511836739384603, "learning_rate": 1.8609130927221116e-05, "loss": 0.4379, "step": 2459 }, { "epoch": 0.1948900772430184, "grad_norm": 1.599297769846532, "learning_rate": 1.8607825169898827e-05, "loss": 0.4615, "step": 2460 }, { "epoch": 0.1949693008516538, "grad_norm": 1.901142235594116, "learning_rate": 1.8606518845792672e-05, "loss": 0.6061, "step": 2461 }, { "epoch": 0.19504852446028917, "grad_norm": 2.0458999263956397, "learning_rate": 1.860521195498866e-05, "loss": 0.4046, "step": 2462 }, { "epoch": 0.19512774806892455, "grad_norm": 1.759581313745163, "learning_rate": 1.8603904497572846e-05, "loss": 0.3766, "step": 2463 }, { "epoch": 0.1952069716775599, "grad_norm": 1.9240944001896176, "learning_rate": 1.8602596473631323e-05, "loss": 0.4983, "step": 2464 }, { "epoch": 0.19528619528619529, "grad_norm": 1.522758996737219, "learning_rate": 1.8601287883250215e-05, "loss": 0.3731, "step": 2465 }, { "epoch": 0.19536541889483067, "grad_norm": 1.8265426742019597, "learning_rate": 1.8599978726515685e-05, "loss": 0.3485, "step": 2466 }, { "epoch": 0.19544464250346602, "grad_norm": 2.4480816777113112, "learning_rate": 1.8598669003513934e-05, "loss": 0.5251, "step": 2467 }, { "epoch": 0.1955238661121014, "grad_norm": 1.8224430982709632, "learning_rate": 1.8597358714331207e-05, "loss": 0.3705, "step": 2468 }, { "epoch": 0.19560308972073678, "grad_norm": 1.7001483763139453, "learning_rate": 1.8596047859053776e-05, "loss": 0.4021, "step": 2469 }, { "epoch": 0.19568231332937217, "grad_norm": 1.7204409613521898, "learning_rate": 1.8594736437767954e-05, "loss": 0.4864, "step": 2470 }, { "epoch": 0.19576153693800752, "grad_norm": 2.091932585980975, "learning_rate": 1.8593424450560094e-05, "loss": 0.4887, "step": 2471 }, { "epoch": 0.1958407605466429, "grad_norm": 1.7633716664478725, "learning_rate": 1.8592111897516583e-05, "loss": 0.4496, "step": 2472 }, { "epoch": 0.19591998415527828, "grad_norm": 1.6653392322170877, "learning_rate": 1.8590798778723843e-05, "loss": 0.4546, "step": 2473 }, { "epoch": 0.19599920776391364, "grad_norm": 1.7320120374400938, "learning_rate": 1.8589485094268344e-05, "loss": 0.373, "step": 2474 }, { "epoch": 0.19607843137254902, "grad_norm": 1.6322324489444844, "learning_rate": 1.858817084423658e-05, "loss": 0.3944, "step": 2475 }, { "epoch": 0.1961576549811844, "grad_norm": 1.795466811829931, "learning_rate": 1.8586856028715087e-05, "loss": 0.469, "step": 2476 }, { "epoch": 0.19623687858981978, "grad_norm": 2.3937840988692707, "learning_rate": 1.8585540647790445e-05, "loss": 0.4013, "step": 2477 }, { "epoch": 0.19631610219845513, "grad_norm": 1.6164375785474248, "learning_rate": 1.858422470154926e-05, "loss": 0.4214, "step": 2478 }, { "epoch": 0.19639532580709052, "grad_norm": 1.9199350130245678, "learning_rate": 1.8582908190078184e-05, "loss": 0.5453, "step": 2479 }, { "epoch": 0.1964745494157259, "grad_norm": 1.7824278600148742, "learning_rate": 1.8581591113463903e-05, "loss": 0.5164, "step": 2480 }, { "epoch": 0.19655377302436125, "grad_norm": 1.7286654271742554, "learning_rate": 1.858027347179314e-05, "loss": 0.3957, "step": 2481 }, { "epoch": 0.19663299663299663, "grad_norm": 2.0083320030792726, "learning_rate": 1.8578955265152652e-05, "loss": 0.374, "step": 2482 }, { "epoch": 0.196712220241632, "grad_norm": 1.9580549598132762, "learning_rate": 1.857763649362924e-05, "loss": 0.5356, "step": 2483 }, { "epoch": 0.19679144385026737, "grad_norm": 1.7602610015289, "learning_rate": 1.857631715730974e-05, "loss": 0.4486, "step": 2484 }, { "epoch": 0.19687066745890275, "grad_norm": 1.6370968567701094, "learning_rate": 1.857499725628102e-05, "loss": 0.3566, "step": 2485 }, { "epoch": 0.19694989106753813, "grad_norm": 1.922573016685483, "learning_rate": 1.8573676790629988e-05, "loss": 0.3174, "step": 2486 }, { "epoch": 0.1970291146761735, "grad_norm": 1.4623496852922742, "learning_rate": 1.8572355760443597e-05, "loss": 0.2664, "step": 2487 }, { "epoch": 0.19710833828480886, "grad_norm": 1.6989502090112747, "learning_rate": 1.8571034165808826e-05, "loss": 0.3267, "step": 2488 }, { "epoch": 0.19718756189344425, "grad_norm": 1.6543509790456885, "learning_rate": 1.85697120068127e-05, "loss": 0.2347, "step": 2489 }, { "epoch": 0.19726678550207963, "grad_norm": 2.162622394196891, "learning_rate": 1.8568389283542263e-05, "loss": 0.4326, "step": 2490 }, { "epoch": 0.19734600911071498, "grad_norm": 3.2740540955986495, "learning_rate": 1.8567065996084628e-05, "loss": 0.4848, "step": 2491 }, { "epoch": 0.19742523271935036, "grad_norm": 1.8739873451937548, "learning_rate": 1.8565742144526917e-05, "loss": 0.4019, "step": 2492 }, { "epoch": 0.19750445632798574, "grad_norm": 1.7344103057979974, "learning_rate": 1.85644177289563e-05, "loss": 0.4771, "step": 2493 }, { "epoch": 0.19758367993662113, "grad_norm": 2.0293785876315154, "learning_rate": 1.856309274945999e-05, "loss": 0.3886, "step": 2494 }, { "epoch": 0.19766290354525648, "grad_norm": 1.876725659866697, "learning_rate": 1.8561767206125223e-05, "loss": 0.3855, "step": 2495 }, { "epoch": 0.19774212715389186, "grad_norm": 2.630788697747225, "learning_rate": 1.856044109903928e-05, "loss": 0.4633, "step": 2496 }, { "epoch": 0.19782135076252724, "grad_norm": 2.225504228548519, "learning_rate": 1.8559114428289482e-05, "loss": 0.5706, "step": 2497 }, { "epoch": 0.1979005743711626, "grad_norm": 1.8209421607723064, "learning_rate": 1.8557787193963184e-05, "loss": 0.4335, "step": 2498 }, { "epoch": 0.19797979797979798, "grad_norm": 1.962437757913541, "learning_rate": 1.8556459396147777e-05, "loss": 0.4608, "step": 2499 }, { "epoch": 0.19805902158843336, "grad_norm": 2.0788544979334023, "learning_rate": 1.8555131034930686e-05, "loss": 0.4806, "step": 2500 }, { "epoch": 0.1981382451970687, "grad_norm": 1.7789781442235573, "learning_rate": 1.8553802110399385e-05, "loss": 0.3791, "step": 2501 }, { "epoch": 0.1982174688057041, "grad_norm": 1.8436094826417708, "learning_rate": 1.8552472622641372e-05, "loss": 0.3799, "step": 2502 }, { "epoch": 0.19829669241433948, "grad_norm": 2.1843434642231268, "learning_rate": 1.8551142571744188e-05, "loss": 0.5619, "step": 2503 }, { "epoch": 0.19837591602297486, "grad_norm": 1.8538465015068417, "learning_rate": 1.854981195779541e-05, "loss": 0.325, "step": 2504 }, { "epoch": 0.1984551396316102, "grad_norm": 1.8608829464420031, "learning_rate": 1.8548480780882658e-05, "loss": 0.4484, "step": 2505 }, { "epoch": 0.1985343632402456, "grad_norm": 2.0326954963360837, "learning_rate": 1.8547149041093574e-05, "loss": 0.4729, "step": 2506 }, { "epoch": 0.19861358684888097, "grad_norm": 1.5760119244850919, "learning_rate": 1.8545816738515855e-05, "loss": 0.4157, "step": 2507 }, { "epoch": 0.19869281045751633, "grad_norm": 1.497014462165689, "learning_rate": 1.854448387323722e-05, "loss": 0.371, "step": 2508 }, { "epoch": 0.1987720340661517, "grad_norm": 1.7390940156536723, "learning_rate": 1.8543150445345443e-05, "loss": 0.3971, "step": 2509 }, { "epoch": 0.1988512576747871, "grad_norm": 1.5371060435271777, "learning_rate": 1.854181645492831e-05, "loss": 0.3336, "step": 2510 }, { "epoch": 0.19893048128342247, "grad_norm": 1.9229194455839491, "learning_rate": 1.8540481902073664e-05, "loss": 0.4252, "step": 2511 }, { "epoch": 0.19900970489205783, "grad_norm": 2.2818319290416724, "learning_rate": 1.8539146786869385e-05, "loss": 0.4121, "step": 2512 }, { "epoch": 0.1990889285006932, "grad_norm": 1.7807339419216095, "learning_rate": 1.8537811109403372e-05, "loss": 0.3523, "step": 2513 }, { "epoch": 0.1991681521093286, "grad_norm": 2.208398926240744, "learning_rate": 1.853647486976358e-05, "loss": 0.547, "step": 2514 }, { "epoch": 0.19924737571796394, "grad_norm": 1.5602544904924176, "learning_rate": 1.8535138068037995e-05, "loss": 0.3616, "step": 2515 }, { "epoch": 0.19932659932659932, "grad_norm": 2.2095884820039893, "learning_rate": 1.8533800704314633e-05, "loss": 0.5332, "step": 2516 }, { "epoch": 0.1994058229352347, "grad_norm": 1.7618684601616275, "learning_rate": 1.8532462778681558e-05, "loss": 0.3672, "step": 2517 }, { "epoch": 0.1994850465438701, "grad_norm": 1.8240924869890305, "learning_rate": 1.8531124291226866e-05, "loss": 0.3825, "step": 2518 }, { "epoch": 0.19956427015250544, "grad_norm": 1.7317104373236238, "learning_rate": 1.8529785242038688e-05, "loss": 0.3898, "step": 2519 }, { "epoch": 0.19964349376114082, "grad_norm": 1.6280770333620187, "learning_rate": 1.8528445631205195e-05, "loss": 0.3855, "step": 2520 }, { "epoch": 0.1997227173697762, "grad_norm": 1.7566863403512298, "learning_rate": 1.852710545881459e-05, "loss": 0.4474, "step": 2521 }, { "epoch": 0.19980194097841156, "grad_norm": 1.878815093583782, "learning_rate": 1.8525764724955123e-05, "loss": 0.4035, "step": 2522 }, { "epoch": 0.19988116458704694, "grad_norm": 1.8298324286185195, "learning_rate": 1.8524423429715072e-05, "loss": 0.3532, "step": 2523 }, { "epoch": 0.19996038819568232, "grad_norm": 1.677771114182212, "learning_rate": 1.8523081573182754e-05, "loss": 0.3794, "step": 2524 }, { "epoch": 0.20003961180431767, "grad_norm": 1.8449111661378463, "learning_rate": 1.8521739155446527e-05, "loss": 0.4851, "step": 2525 }, { "epoch": 0.20011883541295306, "grad_norm": 1.686290781787805, "learning_rate": 1.852039617659478e-05, "loss": 0.3886, "step": 2526 }, { "epoch": 0.20019805902158844, "grad_norm": 1.769262906449984, "learning_rate": 1.851905263671594e-05, "loss": 0.4177, "step": 2527 }, { "epoch": 0.20027728263022382, "grad_norm": 2.213461401505922, "learning_rate": 1.8517708535898477e-05, "loss": 0.3671, "step": 2528 }, { "epoch": 0.20035650623885917, "grad_norm": 1.8336084314787529, "learning_rate": 1.851636387423089e-05, "loss": 0.3184, "step": 2529 }, { "epoch": 0.20043572984749455, "grad_norm": 1.767729823809732, "learning_rate": 1.8515018651801723e-05, "loss": 0.3668, "step": 2530 }, { "epoch": 0.20051495345612994, "grad_norm": 1.72123658107767, "learning_rate": 1.8513672868699547e-05, "loss": 0.409, "step": 2531 }, { "epoch": 0.2005941770647653, "grad_norm": 1.9114359791423503, "learning_rate": 1.851232652501298e-05, "loss": 0.3531, "step": 2532 }, { "epoch": 0.20067340067340067, "grad_norm": 2.0924360141875415, "learning_rate": 1.851097962083067e-05, "loss": 0.4826, "step": 2533 }, { "epoch": 0.20075262428203605, "grad_norm": 1.552165791094521, "learning_rate": 1.85096321562413e-05, "loss": 0.305, "step": 2534 }, { "epoch": 0.20083184789067143, "grad_norm": 1.770835993604429, "learning_rate": 1.8508284131333604e-05, "loss": 0.3868, "step": 2535 }, { "epoch": 0.2009110714993068, "grad_norm": 1.9056575372469093, "learning_rate": 1.850693554619633e-05, "loss": 0.4677, "step": 2536 }, { "epoch": 0.20099029510794217, "grad_norm": 1.8964029082208331, "learning_rate": 1.8505586400918288e-05, "loss": 0.368, "step": 2537 }, { "epoch": 0.20106951871657755, "grad_norm": 2.0384928936642686, "learning_rate": 1.8504236695588308e-05, "loss": 0.3827, "step": 2538 }, { "epoch": 0.2011487423252129, "grad_norm": 1.7720548147958977, "learning_rate": 1.8502886430295262e-05, "loss": 0.4841, "step": 2539 }, { "epoch": 0.20122796593384829, "grad_norm": 1.992105236078617, "learning_rate": 1.8501535605128054e-05, "loss": 0.3567, "step": 2540 }, { "epoch": 0.20130718954248367, "grad_norm": 1.6796399304185092, "learning_rate": 1.8500184220175636e-05, "loss": 0.4534, "step": 2541 }, { "epoch": 0.20138641315111902, "grad_norm": 1.7008230592972515, "learning_rate": 1.8498832275526988e-05, "loss": 0.4597, "step": 2542 }, { "epoch": 0.2014656367597544, "grad_norm": 1.5855692661266696, "learning_rate": 1.8497479771271125e-05, "loss": 0.3148, "step": 2543 }, { "epoch": 0.20154486036838978, "grad_norm": 1.7779826536689631, "learning_rate": 1.8496126707497112e-05, "loss": 0.407, "step": 2544 }, { "epoch": 0.20162408397702516, "grad_norm": 1.8264384362815282, "learning_rate": 1.849477308429403e-05, "loss": 0.3947, "step": 2545 }, { "epoch": 0.20170330758566052, "grad_norm": 2.158459029956277, "learning_rate": 1.8493418901751016e-05, "loss": 0.4558, "step": 2546 }, { "epoch": 0.2017825311942959, "grad_norm": 1.786016789552016, "learning_rate": 1.849206415995724e-05, "loss": 0.4428, "step": 2547 }, { "epoch": 0.20186175480293128, "grad_norm": 1.700575434704635, "learning_rate": 1.8490708859001896e-05, "loss": 0.4093, "step": 2548 }, { "epoch": 0.20194097841156664, "grad_norm": 2.018488079044388, "learning_rate": 1.8489352998974227e-05, "loss": 0.4784, "step": 2549 }, { "epoch": 0.20202020202020202, "grad_norm": 1.8071537132065065, "learning_rate": 1.8487996579963515e-05, "loss": 0.3771, "step": 2550 }, { "epoch": 0.2020994256288374, "grad_norm": 2.0026948751367173, "learning_rate": 1.8486639602059066e-05, "loss": 0.4292, "step": 2551 }, { "epoch": 0.20217864923747278, "grad_norm": 2.069044623731319, "learning_rate": 1.8485282065350237e-05, "loss": 0.4185, "step": 2552 }, { "epoch": 0.20225787284610813, "grad_norm": 1.5654862434381134, "learning_rate": 1.848392396992641e-05, "loss": 0.3382, "step": 2553 }, { "epoch": 0.20233709645474351, "grad_norm": 1.7818155360512213, "learning_rate": 1.8482565315877013e-05, "loss": 0.4563, "step": 2554 }, { "epoch": 0.2024163200633789, "grad_norm": 1.813607767893193, "learning_rate": 1.8481206103291506e-05, "loss": 0.4047, "step": 2555 }, { "epoch": 0.20249554367201425, "grad_norm": 2.014937066860306, "learning_rate": 1.8479846332259388e-05, "loss": 0.4206, "step": 2556 }, { "epoch": 0.20257476728064963, "grad_norm": 2.202476476404529, "learning_rate": 1.847848600287019e-05, "loss": 0.4824, "step": 2557 }, { "epoch": 0.202653990889285, "grad_norm": 1.9251910412474011, "learning_rate": 1.8477125115213484e-05, "loss": 0.5128, "step": 2558 }, { "epoch": 0.20273321449792037, "grad_norm": 1.8213524522171174, "learning_rate": 1.8475763669378878e-05, "loss": 0.4291, "step": 2559 }, { "epoch": 0.20281243810655575, "grad_norm": 1.6161176638402914, "learning_rate": 1.8474401665456016e-05, "loss": 0.3686, "step": 2560 }, { "epoch": 0.20289166171519113, "grad_norm": 1.9537352090698643, "learning_rate": 1.8473039103534583e-05, "loss": 0.4901, "step": 2561 }, { "epoch": 0.2029708853238265, "grad_norm": 1.8927710174886234, "learning_rate": 1.8471675983704295e-05, "loss": 0.5194, "step": 2562 }, { "epoch": 0.20305010893246186, "grad_norm": 1.8293950759224924, "learning_rate": 1.8470312306054903e-05, "loss": 0.3808, "step": 2563 }, { "epoch": 0.20312933254109725, "grad_norm": 1.4585840390039595, "learning_rate": 1.8468948070676205e-05, "loss": 0.3287, "step": 2564 }, { "epoch": 0.20320855614973263, "grad_norm": 1.8672224498645469, "learning_rate": 1.8467583277658026e-05, "loss": 0.3643, "step": 2565 }, { "epoch": 0.20328777975836798, "grad_norm": 1.6749160631893312, "learning_rate": 1.8466217927090232e-05, "loss": 0.3501, "step": 2566 }, { "epoch": 0.20336700336700336, "grad_norm": 2.2147584958701785, "learning_rate": 1.8464852019062726e-05, "loss": 0.4364, "step": 2567 }, { "epoch": 0.20344622697563874, "grad_norm": 1.822011263780815, "learning_rate": 1.846348555366544e-05, "loss": 0.4391, "step": 2568 }, { "epoch": 0.20352545058427413, "grad_norm": 1.8808694610123304, "learning_rate": 1.8462118530988356e-05, "loss": 0.5218, "step": 2569 }, { "epoch": 0.20360467419290948, "grad_norm": 1.7285091505358392, "learning_rate": 1.8460750951121487e-05, "loss": 0.4514, "step": 2570 }, { "epoch": 0.20368389780154486, "grad_norm": 1.7192328423533971, "learning_rate": 1.8459382814154874e-05, "loss": 0.4094, "step": 2571 }, { "epoch": 0.20376312141018024, "grad_norm": 1.727594784011334, "learning_rate": 1.845801412017861e-05, "loss": 0.3623, "step": 2572 }, { "epoch": 0.2038423450188156, "grad_norm": 1.7380797862194362, "learning_rate": 1.845664486928281e-05, "loss": 0.4399, "step": 2573 }, { "epoch": 0.20392156862745098, "grad_norm": 1.821043741521068, "learning_rate": 1.8455275061557643e-05, "loss": 0.346, "step": 2574 }, { "epoch": 0.20400079223608636, "grad_norm": 1.7039374485497736, "learning_rate": 1.845390469709329e-05, "loss": 0.4366, "step": 2575 }, { "epoch": 0.20408001584472174, "grad_norm": 1.6809238091037046, "learning_rate": 1.8452533775979992e-05, "loss": 0.3451, "step": 2576 }, { "epoch": 0.2041592394533571, "grad_norm": 2.0909572192450225, "learning_rate": 1.845116229830802e-05, "loss": 0.4011, "step": 2577 }, { "epoch": 0.20423846306199248, "grad_norm": 1.5025030242150463, "learning_rate": 1.8449790264167672e-05, "loss": 0.281, "step": 2578 }, { "epoch": 0.20431768667062786, "grad_norm": 1.911453706834977, "learning_rate": 1.8448417673649292e-05, "loss": 0.5453, "step": 2579 }, { "epoch": 0.2043969102792632, "grad_norm": 1.955871959095756, "learning_rate": 1.844704452684326e-05, "loss": 0.3655, "step": 2580 }, { "epoch": 0.2044761338878986, "grad_norm": 1.7339672713256262, "learning_rate": 1.844567082383999e-05, "loss": 0.4493, "step": 2581 }, { "epoch": 0.20455535749653397, "grad_norm": 1.6377121197368745, "learning_rate": 1.8444296564729935e-05, "loss": 0.4747, "step": 2582 }, { "epoch": 0.20463458110516933, "grad_norm": 1.4724385422967265, "learning_rate": 1.8442921749603586e-05, "loss": 0.3875, "step": 2583 }, { "epoch": 0.2047138047138047, "grad_norm": 1.6695951246345508, "learning_rate": 1.8441546378551457e-05, "loss": 0.4559, "step": 2584 }, { "epoch": 0.2047930283224401, "grad_norm": 2.36697265882477, "learning_rate": 1.8440170451664122e-05, "loss": 0.4361, "step": 2585 }, { "epoch": 0.20487225193107547, "grad_norm": 1.8857696783758962, "learning_rate": 1.8438793969032175e-05, "loss": 0.4893, "step": 2586 }, { "epoch": 0.20495147553971083, "grad_norm": 1.6431798521518761, "learning_rate": 1.8437416930746248e-05, "loss": 0.4379, "step": 2587 }, { "epoch": 0.2050306991483462, "grad_norm": 1.3155267301350255, "learning_rate": 1.8436039336897015e-05, "loss": 0.2707, "step": 2588 }, { "epoch": 0.2051099227569816, "grad_norm": 1.8913709165051489, "learning_rate": 1.8434661187575183e-05, "loss": 0.4872, "step": 2589 }, { "epoch": 0.20518914636561694, "grad_norm": 1.5169523132561975, "learning_rate": 1.8433282482871497e-05, "loss": 0.3853, "step": 2590 }, { "epoch": 0.20526836997425232, "grad_norm": 1.8298331090397917, "learning_rate": 1.8431903222876737e-05, "loss": 0.4041, "step": 2591 }, { "epoch": 0.2053475935828877, "grad_norm": 1.603435168941413, "learning_rate": 1.8430523407681723e-05, "loss": 0.379, "step": 2592 }, { "epoch": 0.2054268171915231, "grad_norm": 2.1434324550226522, "learning_rate": 1.8429143037377305e-05, "loss": 0.5042, "step": 2593 }, { "epoch": 0.20550604080015844, "grad_norm": 1.9199817324725568, "learning_rate": 1.8427762112054378e-05, "loss": 0.4582, "step": 2594 }, { "epoch": 0.20558526440879382, "grad_norm": 1.447826585214332, "learning_rate": 1.842638063180387e-05, "loss": 0.3226, "step": 2595 }, { "epoch": 0.2056644880174292, "grad_norm": 1.8198817247789638, "learning_rate": 1.8424998596716743e-05, "loss": 0.454, "step": 2596 }, { "epoch": 0.20574371162606456, "grad_norm": 1.659408602894229, "learning_rate": 1.8423616006883994e-05, "loss": 0.2472, "step": 2597 }, { "epoch": 0.20582293523469994, "grad_norm": 1.7604255855472575, "learning_rate": 1.8422232862396663e-05, "loss": 0.3568, "step": 2598 }, { "epoch": 0.20590215884333532, "grad_norm": 1.9274813194205778, "learning_rate": 1.8420849163345824e-05, "loss": 0.4587, "step": 2599 }, { "epoch": 0.20598138245197067, "grad_norm": 2.561083447581465, "learning_rate": 1.8419464909822585e-05, "loss": 0.4281, "step": 2600 }, { "epoch": 0.20606060606060606, "grad_norm": 1.962994187728574, "learning_rate": 1.8418080101918095e-05, "loss": 0.4124, "step": 2601 }, { "epoch": 0.20613982966924144, "grad_norm": 1.368983002235622, "learning_rate": 1.8416694739723535e-05, "loss": 0.3553, "step": 2602 }, { "epoch": 0.20621905327787682, "grad_norm": 2.3986732703255464, "learning_rate": 1.841530882333012e-05, "loss": 0.5015, "step": 2603 }, { "epoch": 0.20629827688651217, "grad_norm": 1.9263548372278987, "learning_rate": 1.8413922352829118e-05, "loss": 0.3741, "step": 2604 }, { "epoch": 0.20637750049514755, "grad_norm": 1.6655041187445303, "learning_rate": 1.8412535328311813e-05, "loss": 0.3234, "step": 2605 }, { "epoch": 0.20645672410378293, "grad_norm": 1.963328820654345, "learning_rate": 1.8411147749869536e-05, "loss": 0.3981, "step": 2606 }, { "epoch": 0.2065359477124183, "grad_norm": 1.8272096800893565, "learning_rate": 1.840975961759365e-05, "loss": 0.4043, "step": 2607 }, { "epoch": 0.20661517132105367, "grad_norm": 1.900238551080896, "learning_rate": 1.8408370931575556e-05, "loss": 0.4265, "step": 2608 }, { "epoch": 0.20669439492968905, "grad_norm": 1.7544930611735488, "learning_rate": 1.84069816919067e-05, "loss": 0.3841, "step": 2609 }, { "epoch": 0.20677361853832443, "grad_norm": 1.7199774777341128, "learning_rate": 1.8405591898678546e-05, "loss": 0.3779, "step": 2610 }, { "epoch": 0.2068528421469598, "grad_norm": 1.9442477804276905, "learning_rate": 1.8404201551982612e-05, "loss": 0.4593, "step": 2611 }, { "epoch": 0.20693206575559517, "grad_norm": 1.8343352215609086, "learning_rate": 1.8402810651910444e-05, "loss": 0.3389, "step": 2612 }, { "epoch": 0.20701128936423055, "grad_norm": 1.8315102033785051, "learning_rate": 1.840141919855363e-05, "loss": 0.55, "step": 2613 }, { "epoch": 0.2070905129728659, "grad_norm": 1.849525054536266, "learning_rate": 1.8400027192003782e-05, "loss": 0.4543, "step": 2614 }, { "epoch": 0.20716973658150128, "grad_norm": 1.6906378152268264, "learning_rate": 1.8398634632352562e-05, "loss": 0.4921, "step": 2615 }, { "epoch": 0.20724896019013667, "grad_norm": 1.6673508123235308, "learning_rate": 1.8397241519691667e-05, "loss": 0.3689, "step": 2616 }, { "epoch": 0.20732818379877205, "grad_norm": 1.6086499528209952, "learning_rate": 1.839584785411282e-05, "loss": 0.395, "step": 2617 }, { "epoch": 0.2074074074074074, "grad_norm": 1.6810559848762459, "learning_rate": 1.839445363570779e-05, "loss": 0.3798, "step": 2618 }, { "epoch": 0.20748663101604278, "grad_norm": 1.7597778783938374, "learning_rate": 1.8393058864568383e-05, "loss": 0.4065, "step": 2619 }, { "epoch": 0.20756585462467816, "grad_norm": 1.6485520861819096, "learning_rate": 1.839166354078643e-05, "loss": 0.3813, "step": 2620 }, { "epoch": 0.20764507823331352, "grad_norm": 1.5647611079734072, "learning_rate": 1.8390267664453815e-05, "loss": 0.4694, "step": 2621 }, { "epoch": 0.2077243018419489, "grad_norm": 1.6148699561855906, "learning_rate": 1.8388871235662442e-05, "loss": 0.3077, "step": 2622 }, { "epoch": 0.20780352545058428, "grad_norm": 1.8008911503103382, "learning_rate": 1.8387474254504265e-05, "loss": 0.3734, "step": 2623 }, { "epoch": 0.20788274905921963, "grad_norm": 1.9166702698900357, "learning_rate": 1.8386076721071265e-05, "loss": 0.5234, "step": 2624 }, { "epoch": 0.20796197266785502, "grad_norm": 2.03191312770713, "learning_rate": 1.8384678635455467e-05, "loss": 0.3718, "step": 2625 }, { "epoch": 0.2080411962764904, "grad_norm": 1.5176904417514294, "learning_rate": 1.838327999774892e-05, "loss": 0.2921, "step": 2626 }, { "epoch": 0.20812041988512578, "grad_norm": 1.7179604568536229, "learning_rate": 1.838188080804373e-05, "loss": 0.3821, "step": 2627 }, { "epoch": 0.20819964349376113, "grad_norm": 1.889777973718155, "learning_rate": 1.8380481066432014e-05, "loss": 0.3609, "step": 2628 }, { "epoch": 0.20827886710239651, "grad_norm": 1.774755717518551, "learning_rate": 1.8379080773005947e-05, "loss": 0.3565, "step": 2629 }, { "epoch": 0.2083580907110319, "grad_norm": 1.5162758369751226, "learning_rate": 1.8377679927857727e-05, "loss": 0.3431, "step": 2630 }, { "epoch": 0.20843731431966725, "grad_norm": 1.3024030545869711, "learning_rate": 1.8376278531079594e-05, "loss": 0.325, "step": 2631 }, { "epoch": 0.20851653792830263, "grad_norm": 1.6443629547466578, "learning_rate": 1.8374876582763828e-05, "loss": 0.4108, "step": 2632 }, { "epoch": 0.208595761536938, "grad_norm": 1.962643786954383, "learning_rate": 1.8373474083002732e-05, "loss": 0.4563, "step": 2633 }, { "epoch": 0.2086749851455734, "grad_norm": 1.44752969385219, "learning_rate": 1.837207103188866e-05, "loss": 0.3447, "step": 2634 }, { "epoch": 0.20875420875420875, "grad_norm": 1.793074278860773, "learning_rate": 1.8370667429513992e-05, "loss": 0.4019, "step": 2635 }, { "epoch": 0.20883343236284413, "grad_norm": 2.170145185577981, "learning_rate": 1.8369263275971153e-05, "loss": 0.4981, "step": 2636 }, { "epoch": 0.2089126559714795, "grad_norm": 1.672341674158899, "learning_rate": 1.8367858571352603e-05, "loss": 0.3985, "step": 2637 }, { "epoch": 0.20899187958011486, "grad_norm": 1.8188718279869924, "learning_rate": 1.8366453315750822e-05, "loss": 0.434, "step": 2638 }, { "epoch": 0.20907110318875025, "grad_norm": 1.9095407206261972, "learning_rate": 1.8365047509258346e-05, "loss": 0.4344, "step": 2639 }, { "epoch": 0.20915032679738563, "grad_norm": 1.6736104526022306, "learning_rate": 1.8363641151967747e-05, "loss": 0.4236, "step": 2640 }, { "epoch": 0.20922955040602098, "grad_norm": 1.6438871798450496, "learning_rate": 1.836223424397162e-05, "loss": 0.3421, "step": 2641 }, { "epoch": 0.20930877401465636, "grad_norm": 1.8883599819287094, "learning_rate": 1.8360826785362603e-05, "loss": 0.3268, "step": 2642 }, { "epoch": 0.20938799762329174, "grad_norm": 1.9001925714115733, "learning_rate": 1.835941877623337e-05, "loss": 0.4171, "step": 2643 }, { "epoch": 0.20946722123192713, "grad_norm": 1.5832669542194682, "learning_rate": 1.835801021667664e-05, "loss": 0.3351, "step": 2644 }, { "epoch": 0.20954644484056248, "grad_norm": 1.8703361485334735, "learning_rate": 1.8356601106785148e-05, "loss": 0.4406, "step": 2645 }, { "epoch": 0.20962566844919786, "grad_norm": 1.595550029336583, "learning_rate": 1.8355191446651687e-05, "loss": 0.3235, "step": 2646 }, { "epoch": 0.20970489205783324, "grad_norm": 1.686953363833381, "learning_rate": 1.8353781236369065e-05, "loss": 0.4281, "step": 2647 }, { "epoch": 0.2097841156664686, "grad_norm": 1.8655691053204475, "learning_rate": 1.8352370476030147e-05, "loss": 0.3926, "step": 2648 }, { "epoch": 0.20986333927510398, "grad_norm": 2.191073565044196, "learning_rate": 1.8350959165727826e-05, "loss": 0.4275, "step": 2649 }, { "epoch": 0.20994256288373936, "grad_norm": 1.9816194150389066, "learning_rate": 1.8349547305555023e-05, "loss": 0.3713, "step": 2650 }, { "epoch": 0.21002178649237474, "grad_norm": 1.653602573804687, "learning_rate": 1.8348134895604708e-05, "loss": 0.3924, "step": 2651 }, { "epoch": 0.2101010101010101, "grad_norm": 1.7238723669521951, "learning_rate": 1.8346721935969878e-05, "loss": 0.4094, "step": 2652 }, { "epoch": 0.21018023370964548, "grad_norm": 1.7249791325183572, "learning_rate": 1.8345308426743568e-05, "loss": 0.3891, "step": 2653 }, { "epoch": 0.21025945731828086, "grad_norm": 1.8682894618405272, "learning_rate": 1.8343894368018854e-05, "loss": 0.4323, "step": 2654 }, { "epoch": 0.2103386809269162, "grad_norm": 1.4840531948415532, "learning_rate": 1.8342479759888844e-05, "loss": 0.2546, "step": 2655 }, { "epoch": 0.2104179045355516, "grad_norm": 1.9482152996128668, "learning_rate": 1.8341064602446686e-05, "loss": 0.416, "step": 2656 }, { "epoch": 0.21049712814418697, "grad_norm": 1.535018377750315, "learning_rate": 1.8339648895785556e-05, "loss": 0.2934, "step": 2657 }, { "epoch": 0.21057635175282233, "grad_norm": 1.8747616061324945, "learning_rate": 1.8338232639998672e-05, "loss": 0.3534, "step": 2658 }, { "epoch": 0.2106555753614577, "grad_norm": 1.6466467667253228, "learning_rate": 1.8336815835179295e-05, "loss": 0.357, "step": 2659 }, { "epoch": 0.2107347989700931, "grad_norm": 1.637187848391672, "learning_rate": 1.8335398481420705e-05, "loss": 0.4285, "step": 2660 }, { "epoch": 0.21081402257872847, "grad_norm": 1.9957554368431647, "learning_rate": 1.8333980578816234e-05, "loss": 0.5636, "step": 2661 }, { "epoch": 0.21089324618736383, "grad_norm": 1.63191350712576, "learning_rate": 1.8332562127459242e-05, "loss": 0.4423, "step": 2662 }, { "epoch": 0.2109724697959992, "grad_norm": 1.754744359268684, "learning_rate": 1.833114312744313e-05, "loss": 0.4373, "step": 2663 }, { "epoch": 0.2110516934046346, "grad_norm": 1.6104142975151547, "learning_rate": 1.8329723578861328e-05, "loss": 0.2937, "step": 2664 }, { "epoch": 0.21113091701326994, "grad_norm": 1.7110792611516918, "learning_rate": 1.8328303481807306e-05, "loss": 0.3618, "step": 2665 }, { "epoch": 0.21121014062190532, "grad_norm": 1.7434590964141203, "learning_rate": 1.832688283637458e-05, "loss": 0.4039, "step": 2666 }, { "epoch": 0.2112893642305407, "grad_norm": 1.6515419905805533, "learning_rate": 1.8325461642656676e-05, "loss": 0.3125, "step": 2667 }, { "epoch": 0.2113685878391761, "grad_norm": 1.5834906996870692, "learning_rate": 1.832403990074719e-05, "loss": 0.3741, "step": 2668 }, { "epoch": 0.21144781144781144, "grad_norm": 1.788676311711513, "learning_rate": 1.8322617610739726e-05, "loss": 0.3731, "step": 2669 }, { "epoch": 0.21152703505644682, "grad_norm": 1.6559201523777785, "learning_rate": 1.8321194772727938e-05, "loss": 0.3825, "step": 2670 }, { "epoch": 0.2116062586650822, "grad_norm": 1.6989067194342768, "learning_rate": 1.8319771386805514e-05, "loss": 0.3829, "step": 2671 }, { "epoch": 0.21168548227371756, "grad_norm": 1.4968904444907245, "learning_rate": 1.8318347453066176e-05, "loss": 0.3181, "step": 2672 }, { "epoch": 0.21176470588235294, "grad_norm": 1.686285837004851, "learning_rate": 1.8316922971603685e-05, "loss": 0.41, "step": 2673 }, { "epoch": 0.21184392949098832, "grad_norm": 1.7089345831056113, "learning_rate": 1.8315497942511836e-05, "loss": 0.4845, "step": 2674 }, { "epoch": 0.2119231530996237, "grad_norm": 1.6623978142932054, "learning_rate": 1.8314072365884455e-05, "loss": 0.4018, "step": 2675 }, { "epoch": 0.21200237670825905, "grad_norm": 1.7047144285076845, "learning_rate": 1.831264624181542e-05, "loss": 0.4098, "step": 2676 }, { "epoch": 0.21208160031689444, "grad_norm": 1.622948788936126, "learning_rate": 1.8311219570398618e-05, "loss": 0.4357, "step": 2677 }, { "epoch": 0.21216082392552982, "grad_norm": 1.7635284502071558, "learning_rate": 1.8309792351728006e-05, "loss": 0.3783, "step": 2678 }, { "epoch": 0.21224004753416517, "grad_norm": 2.014714300841769, "learning_rate": 1.830836458589755e-05, "loss": 0.4123, "step": 2679 }, { "epoch": 0.21231927114280055, "grad_norm": 1.7231774570827907, "learning_rate": 1.8306936273001258e-05, "loss": 0.3878, "step": 2680 }, { "epoch": 0.21239849475143593, "grad_norm": 1.7366222996862124, "learning_rate": 1.830550741313319e-05, "loss": 0.3738, "step": 2681 }, { "epoch": 0.2124777183600713, "grad_norm": 1.6061900708391434, "learning_rate": 1.830407800638742e-05, "loss": 0.3153, "step": 2682 }, { "epoch": 0.21255694196870667, "grad_norm": 1.9188884305998029, "learning_rate": 1.830264805285807e-05, "loss": 0.3542, "step": 2683 }, { "epoch": 0.21263616557734205, "grad_norm": 1.7415687603133443, "learning_rate": 1.8301217552639294e-05, "loss": 0.3654, "step": 2684 }, { "epoch": 0.21271538918597743, "grad_norm": 1.8435612091661784, "learning_rate": 1.8299786505825286e-05, "loss": 0.4261, "step": 2685 }, { "epoch": 0.2127946127946128, "grad_norm": 1.746758974317949, "learning_rate": 1.8298354912510273e-05, "loss": 0.3495, "step": 2686 }, { "epoch": 0.21287383640324817, "grad_norm": 1.824189883935391, "learning_rate": 1.8296922772788522e-05, "loss": 0.5282, "step": 2687 }, { "epoch": 0.21295306001188355, "grad_norm": 2.15997463525689, "learning_rate": 1.8295490086754325e-05, "loss": 0.4156, "step": 2688 }, { "epoch": 0.2130322836205189, "grad_norm": 1.850894787197073, "learning_rate": 1.829405685450202e-05, "loss": 0.3915, "step": 2689 }, { "epoch": 0.21311150722915428, "grad_norm": 1.5642180755623767, "learning_rate": 1.8292623076125983e-05, "loss": 0.4266, "step": 2690 }, { "epoch": 0.21319073083778967, "grad_norm": 1.610509821557913, "learning_rate": 1.8291188751720615e-05, "loss": 0.3243, "step": 2691 }, { "epoch": 0.21326995444642505, "grad_norm": 1.904450189224998, "learning_rate": 1.828975388138036e-05, "loss": 0.4101, "step": 2692 }, { "epoch": 0.2133491780550604, "grad_norm": 1.5324099979749521, "learning_rate": 1.8288318465199705e-05, "loss": 0.3095, "step": 2693 }, { "epoch": 0.21342840166369578, "grad_norm": 1.9523717857796727, "learning_rate": 1.8286882503273157e-05, "loss": 0.4412, "step": 2694 }, { "epoch": 0.21350762527233116, "grad_norm": 2.105285197083128, "learning_rate": 1.828544599569527e-05, "loss": 0.3897, "step": 2695 }, { "epoch": 0.21358684888096652, "grad_norm": 1.8330348790057602, "learning_rate": 1.8284008942560634e-05, "loss": 0.4494, "step": 2696 }, { "epoch": 0.2136660724896019, "grad_norm": 1.7439570106157116, "learning_rate": 1.8282571343963865e-05, "loss": 0.4093, "step": 2697 }, { "epoch": 0.21374529609823728, "grad_norm": 1.571739755819498, "learning_rate": 1.8281133199999628e-05, "loss": 0.314, "step": 2698 }, { "epoch": 0.21382451970687263, "grad_norm": 2.1162420098285546, "learning_rate": 1.8279694510762616e-05, "loss": 0.5275, "step": 2699 }, { "epoch": 0.21390374331550802, "grad_norm": 1.8263679132809585, "learning_rate": 1.8278255276347563e-05, "loss": 0.3863, "step": 2700 }, { "epoch": 0.2139829669241434, "grad_norm": 1.6512850796616958, "learning_rate": 1.8276815496849227e-05, "loss": 0.3041, "step": 2701 }, { "epoch": 0.21406219053277878, "grad_norm": 2.0732941465069255, "learning_rate": 1.827537517236242e-05, "loss": 0.5025, "step": 2702 }, { "epoch": 0.21414141414141413, "grad_norm": 1.6156169966806242, "learning_rate": 1.8273934302981975e-05, "loss": 0.3564, "step": 2703 }, { "epoch": 0.21422063775004951, "grad_norm": 1.54888986829638, "learning_rate": 1.8272492888802767e-05, "loss": 0.3598, "step": 2704 }, { "epoch": 0.2142998613586849, "grad_norm": 1.694783413216878, "learning_rate": 1.8271050929919707e-05, "loss": 0.3489, "step": 2705 }, { "epoch": 0.21437908496732025, "grad_norm": 1.8513059171948416, "learning_rate": 1.8269608426427743e-05, "loss": 0.4858, "step": 2706 }, { "epoch": 0.21445830857595563, "grad_norm": 1.632221511025351, "learning_rate": 1.8268165378421852e-05, "loss": 0.4084, "step": 2707 }, { "epoch": 0.214537532184591, "grad_norm": 1.4226988550174064, "learning_rate": 1.826672178599706e-05, "loss": 0.3306, "step": 2708 }, { "epoch": 0.2146167557932264, "grad_norm": 1.984979993246484, "learning_rate": 1.826527764924841e-05, "loss": 0.456, "step": 2709 }, { "epoch": 0.21469597940186175, "grad_norm": 2.0728136191318725, "learning_rate": 1.8263832968271e-05, "loss": 0.4243, "step": 2710 }, { "epoch": 0.21477520301049713, "grad_norm": 1.7036106920163208, "learning_rate": 1.826238774315995e-05, "loss": 0.3183, "step": 2711 }, { "epoch": 0.2148544266191325, "grad_norm": 1.6966715810963966, "learning_rate": 1.8260941974010425e-05, "loss": 0.3235, "step": 2712 }, { "epoch": 0.21493365022776786, "grad_norm": 1.7073250040361634, "learning_rate": 1.825949566091762e-05, "loss": 0.282, "step": 2713 }, { "epoch": 0.21501287383640325, "grad_norm": 1.7404582039089134, "learning_rate": 1.8258048803976763e-05, "loss": 0.3556, "step": 2714 }, { "epoch": 0.21509209744503863, "grad_norm": 1.7623883626518861, "learning_rate": 1.8256601403283133e-05, "loss": 0.3379, "step": 2715 }, { "epoch": 0.215171321053674, "grad_norm": 1.7377620999413776, "learning_rate": 1.8255153458932028e-05, "loss": 0.3402, "step": 2716 }, { "epoch": 0.21525054466230936, "grad_norm": 1.9517174103467538, "learning_rate": 1.825370497101879e-05, "loss": 0.3924, "step": 2717 }, { "epoch": 0.21532976827094474, "grad_norm": 2.3478029991604417, "learning_rate": 1.825225593963879e-05, "loss": 0.4048, "step": 2718 }, { "epoch": 0.21540899187958013, "grad_norm": 1.6706776008472004, "learning_rate": 1.8250806364887446e-05, "loss": 0.3984, "step": 2719 }, { "epoch": 0.21548821548821548, "grad_norm": 1.6604358040092195, "learning_rate": 1.8249356246860205e-05, "loss": 0.3053, "step": 2720 }, { "epoch": 0.21556743909685086, "grad_norm": 1.7716968969404763, "learning_rate": 1.8247905585652545e-05, "loss": 0.4634, "step": 2721 }, { "epoch": 0.21564666270548624, "grad_norm": 1.5601521197172752, "learning_rate": 1.824645438135999e-05, "loss": 0.3034, "step": 2722 }, { "epoch": 0.2157258863141216, "grad_norm": 2.037816349771718, "learning_rate": 1.8245002634078095e-05, "loss": 0.4467, "step": 2723 }, { "epoch": 0.21580510992275698, "grad_norm": 1.7916226037576128, "learning_rate": 1.8243550343902447e-05, "loss": 0.3722, "step": 2724 }, { "epoch": 0.21588433353139236, "grad_norm": 1.5707068563122943, "learning_rate": 1.8242097510928672e-05, "loss": 0.4196, "step": 2725 }, { "epoch": 0.21596355714002774, "grad_norm": 1.564041407140951, "learning_rate": 1.824064413525244e-05, "loss": 0.3884, "step": 2726 }, { "epoch": 0.2160427807486631, "grad_norm": 1.7789757202395033, "learning_rate": 1.823919021696944e-05, "loss": 0.3709, "step": 2727 }, { "epoch": 0.21612200435729848, "grad_norm": 1.802203886486031, "learning_rate": 1.8237735756175408e-05, "loss": 0.4548, "step": 2728 }, { "epoch": 0.21620122796593386, "grad_norm": 1.4956295081667574, "learning_rate": 1.8236280752966115e-05, "loss": 0.3315, "step": 2729 }, { "epoch": 0.2162804515745692, "grad_norm": 1.8126358749315155, "learning_rate": 1.8234825207437365e-05, "loss": 0.3911, "step": 2730 }, { "epoch": 0.2163596751832046, "grad_norm": 1.6198279392032298, "learning_rate": 1.8233369119685e-05, "loss": 0.3624, "step": 2731 }, { "epoch": 0.21643889879183997, "grad_norm": 1.7352450828221937, "learning_rate": 1.8231912489804893e-05, "loss": 0.3932, "step": 2732 }, { "epoch": 0.21651812240047535, "grad_norm": 1.7283964724711196, "learning_rate": 1.8230455317892957e-05, "loss": 0.2665, "step": 2733 }, { "epoch": 0.2165973460091107, "grad_norm": 1.680340402489577, "learning_rate": 1.822899760404514e-05, "loss": 0.3792, "step": 2734 }, { "epoch": 0.2166765696177461, "grad_norm": 1.7220318125592986, "learning_rate": 1.822753934835743e-05, "loss": 0.4716, "step": 2735 }, { "epoch": 0.21675579322638147, "grad_norm": 1.965210521744279, "learning_rate": 1.822608055092584e-05, "loss": 0.4193, "step": 2736 }, { "epoch": 0.21683501683501682, "grad_norm": 1.9109587540279396, "learning_rate": 1.8224621211846426e-05, "loss": 0.5519, "step": 2737 }, { "epoch": 0.2169142404436522, "grad_norm": 1.7767288955712042, "learning_rate": 1.8223161331215285e-05, "loss": 0.4097, "step": 2738 }, { "epoch": 0.2169934640522876, "grad_norm": 1.9563216952486175, "learning_rate": 1.822170090912853e-05, "loss": 0.3947, "step": 2739 }, { "epoch": 0.21707268766092294, "grad_norm": 1.4339421794399885, "learning_rate": 1.8220239945682337e-05, "loss": 0.2488, "step": 2740 }, { "epoch": 0.21715191126955832, "grad_norm": 1.6193319390191063, "learning_rate": 1.8218778440972893e-05, "loss": 0.4883, "step": 2741 }, { "epoch": 0.2172311348781937, "grad_norm": 1.5099276519890195, "learning_rate": 1.8217316395096438e-05, "loss": 0.3304, "step": 2742 }, { "epoch": 0.21731035848682909, "grad_norm": 1.978902675384766, "learning_rate": 1.8215853808149237e-05, "loss": 0.3174, "step": 2743 }, { "epoch": 0.21738958209546444, "grad_norm": 1.8533347508442921, "learning_rate": 1.8214390680227588e-05, "loss": 0.3355, "step": 2744 }, { "epoch": 0.21746880570409982, "grad_norm": 2.0702849627853666, "learning_rate": 1.8212927011427847e-05, "loss": 0.4839, "step": 2745 }, { "epoch": 0.2175480293127352, "grad_norm": 1.7558025106203288, "learning_rate": 1.8211462801846375e-05, "loss": 0.4176, "step": 2746 }, { "epoch": 0.21762725292137056, "grad_norm": 2.3722666384626603, "learning_rate": 1.820999805157959e-05, "loss": 0.3824, "step": 2747 }, { "epoch": 0.21770647653000594, "grad_norm": 1.704705465251105, "learning_rate": 1.8208532760723937e-05, "loss": 0.3603, "step": 2748 }, { "epoch": 0.21778570013864132, "grad_norm": 2.014080694163564, "learning_rate": 1.82070669293759e-05, "loss": 0.4171, "step": 2749 }, { "epoch": 0.2178649237472767, "grad_norm": 1.8452739910897396, "learning_rate": 1.8205600557631995e-05, "loss": 0.4338, "step": 2750 }, { "epoch": 0.21794414735591205, "grad_norm": 1.8242509132529519, "learning_rate": 1.8204133645588774e-05, "loss": 0.4571, "step": 2751 }, { "epoch": 0.21802337096454744, "grad_norm": 2.0740125033935666, "learning_rate": 1.8202666193342834e-05, "loss": 0.4194, "step": 2752 }, { "epoch": 0.21810259457318282, "grad_norm": 2.1307657354917966, "learning_rate": 1.8201198200990787e-05, "loss": 0.3519, "step": 2753 }, { "epoch": 0.21818181818181817, "grad_norm": 2.1252030817159673, "learning_rate": 1.8199729668629303e-05, "loss": 0.4235, "step": 2754 }, { "epoch": 0.21826104179045355, "grad_norm": 1.8031173174969104, "learning_rate": 1.8198260596355077e-05, "loss": 0.3511, "step": 2755 }, { "epoch": 0.21834026539908893, "grad_norm": 1.964246706829193, "learning_rate": 1.8196790984264835e-05, "loss": 0.5037, "step": 2756 }, { "epoch": 0.21841948900772432, "grad_norm": 2.284880724024758, "learning_rate": 1.8195320832455347e-05, "loss": 0.4277, "step": 2757 }, { "epoch": 0.21849871261635967, "grad_norm": 1.6023385673515118, "learning_rate": 1.819385014102342e-05, "loss": 0.3717, "step": 2758 }, { "epoch": 0.21857793622499505, "grad_norm": 2.2202046474705766, "learning_rate": 1.8192378910065882e-05, "loss": 0.3839, "step": 2759 }, { "epoch": 0.21865715983363043, "grad_norm": 1.6111925963514728, "learning_rate": 1.8190907139679614e-05, "loss": 0.364, "step": 2760 }, { "epoch": 0.21873638344226579, "grad_norm": 2.3162717874605097, "learning_rate": 1.8189434829961525e-05, "loss": 0.4177, "step": 2761 }, { "epoch": 0.21881560705090117, "grad_norm": 1.8462321518533145, "learning_rate": 1.8187961981008554e-05, "loss": 0.3343, "step": 2762 }, { "epoch": 0.21889483065953655, "grad_norm": 1.8098231600461183, "learning_rate": 1.8186488592917686e-05, "loss": 0.3498, "step": 2763 }, { "epoch": 0.2189740542681719, "grad_norm": 1.9612214156109713, "learning_rate": 1.8185014665785936e-05, "loss": 0.4596, "step": 2764 }, { "epoch": 0.21905327787680728, "grad_norm": 1.7332165589616413, "learning_rate": 1.8183540199710354e-05, "loss": 0.3906, "step": 2765 }, { "epoch": 0.21913250148544267, "grad_norm": 1.82242732245487, "learning_rate": 1.8182065194788024e-05, "loss": 0.4079, "step": 2766 }, { "epoch": 0.21921172509407805, "grad_norm": 2.221807084570096, "learning_rate": 1.8180589651116073e-05, "loss": 0.4526, "step": 2767 }, { "epoch": 0.2192909487027134, "grad_norm": 1.7656027807902084, "learning_rate": 1.8179113568791656e-05, "loss": 0.4196, "step": 2768 }, { "epoch": 0.21937017231134878, "grad_norm": 2.0644646990812494, "learning_rate": 1.8177636947911964e-05, "loss": 0.5511, "step": 2769 }, { "epoch": 0.21944939591998416, "grad_norm": 1.9476187845744724, "learning_rate": 1.817615978857423e-05, "loss": 0.4359, "step": 2770 }, { "epoch": 0.21952861952861952, "grad_norm": 1.9106731416280978, "learning_rate": 1.8174682090875713e-05, "loss": 0.5247, "step": 2771 }, { "epoch": 0.2196078431372549, "grad_norm": 1.6803802827016179, "learning_rate": 1.8173203854913714e-05, "loss": 0.361, "step": 2772 }, { "epoch": 0.21968706674589028, "grad_norm": 1.6536349493697018, "learning_rate": 1.817172508078557e-05, "loss": 0.3716, "step": 2773 }, { "epoch": 0.21976629035452566, "grad_norm": 1.8612504702984671, "learning_rate": 1.817024576858865e-05, "loss": 0.4081, "step": 2774 }, { "epoch": 0.21984551396316102, "grad_norm": 2.1841022591359147, "learning_rate": 1.8168765918420358e-05, "loss": 0.4454, "step": 2775 }, { "epoch": 0.2199247375717964, "grad_norm": 1.8181876620385793, "learning_rate": 1.8167285530378134e-05, "loss": 0.3965, "step": 2776 }, { "epoch": 0.22000396118043178, "grad_norm": 1.6906033996564962, "learning_rate": 1.8165804604559455e-05, "loss": 0.3544, "step": 2777 }, { "epoch": 0.22008318478906713, "grad_norm": 1.6687408918256246, "learning_rate": 1.816432314106184e-05, "loss": 0.3091, "step": 2778 }, { "epoch": 0.2201624083977025, "grad_norm": 1.5988539486597675, "learning_rate": 1.8162841139982827e-05, "loss": 0.4036, "step": 2779 }, { "epoch": 0.2202416320063379, "grad_norm": 1.8254445729767457, "learning_rate": 1.816135860142e-05, "loss": 0.378, "step": 2780 }, { "epoch": 0.22032085561497325, "grad_norm": 1.634245166269344, "learning_rate": 1.8159875525470984e-05, "loss": 0.368, "step": 2781 }, { "epoch": 0.22040007922360863, "grad_norm": 1.7144973469948466, "learning_rate": 1.815839191223342e-05, "loss": 0.3423, "step": 2782 }, { "epoch": 0.220479302832244, "grad_norm": 2.1038388548711833, "learning_rate": 1.815690776180501e-05, "loss": 0.4847, "step": 2783 }, { "epoch": 0.2205585264408794, "grad_norm": 1.9930947548245068, "learning_rate": 1.815542307428347e-05, "loss": 0.4618, "step": 2784 }, { "epoch": 0.22063775004951475, "grad_norm": 1.8684329676605897, "learning_rate": 1.8153937849766567e-05, "loss": 0.3254, "step": 2785 }, { "epoch": 0.22071697365815013, "grad_norm": 1.7788005760488583, "learning_rate": 1.8152452088352084e-05, "loss": 0.4312, "step": 2786 }, { "epoch": 0.2207961972667855, "grad_norm": 1.9557187726641876, "learning_rate": 1.8150965790137863e-05, "loss": 0.4146, "step": 2787 }, { "epoch": 0.22087542087542086, "grad_norm": 1.7557325322986894, "learning_rate": 1.814947895522176e-05, "loss": 0.3931, "step": 2788 }, { "epoch": 0.22095464448405625, "grad_norm": 1.8565829578528064, "learning_rate": 1.8147991583701685e-05, "loss": 0.442, "step": 2789 }, { "epoch": 0.22103386809269163, "grad_norm": 1.6727126799031429, "learning_rate": 1.8146503675675568e-05, "loss": 0.3671, "step": 2790 }, { "epoch": 0.221113091701327, "grad_norm": 1.7328901425564518, "learning_rate": 1.814501523124138e-05, "loss": 0.4493, "step": 2791 }, { "epoch": 0.22119231530996236, "grad_norm": 1.7676771472000006, "learning_rate": 1.8143526250497134e-05, "loss": 0.3841, "step": 2792 }, { "epoch": 0.22127153891859774, "grad_norm": 1.74560570394591, "learning_rate": 1.8142036733540868e-05, "loss": 0.3427, "step": 2793 }, { "epoch": 0.22135076252723312, "grad_norm": 2.306174376238809, "learning_rate": 1.814054668047066e-05, "loss": 0.5288, "step": 2794 }, { "epoch": 0.22142998613586848, "grad_norm": 1.850007955548742, "learning_rate": 1.8139056091384623e-05, "loss": 0.4305, "step": 2795 }, { "epoch": 0.22150920974450386, "grad_norm": 1.5887679853443888, "learning_rate": 1.8137564966380905e-05, "loss": 0.3317, "step": 2796 }, { "epoch": 0.22158843335313924, "grad_norm": 2.2075788649659263, "learning_rate": 1.813607330555769e-05, "loss": 0.5342, "step": 2797 }, { "epoch": 0.2216676569617746, "grad_norm": 1.9089197191582707, "learning_rate": 1.8134581109013193e-05, "loss": 0.4259, "step": 2798 }, { "epoch": 0.22174688057040998, "grad_norm": 1.6996826694914087, "learning_rate": 1.8133088376845675e-05, "loss": 0.4196, "step": 2799 }, { "epoch": 0.22182610417904536, "grad_norm": 1.6764364786181034, "learning_rate": 1.8131595109153416e-05, "loss": 0.3646, "step": 2800 }, { "epoch": 0.22190532778768074, "grad_norm": 1.6130005178902656, "learning_rate": 1.813010130603475e-05, "loss": 0.3698, "step": 2801 }, { "epoch": 0.2219845513963161, "grad_norm": 1.6594515205964468, "learning_rate": 1.812860696758803e-05, "loss": 0.3955, "step": 2802 }, { "epoch": 0.22206377500495147, "grad_norm": 1.8965793100215345, "learning_rate": 1.8127112093911655e-05, "loss": 0.3717, "step": 2803 }, { "epoch": 0.22214299861358686, "grad_norm": 1.926036864584367, "learning_rate": 1.8125616685104055e-05, "loss": 0.4084, "step": 2804 }, { "epoch": 0.2222222222222222, "grad_norm": 2.416660042537055, "learning_rate": 1.8124120741263692e-05, "loss": 0.525, "step": 2805 }, { "epoch": 0.2223014458308576, "grad_norm": 2.344228003804821, "learning_rate": 1.812262426248907e-05, "loss": 0.3643, "step": 2806 }, { "epoch": 0.22238066943949297, "grad_norm": 1.4144655318403223, "learning_rate": 1.8121127248878726e-05, "loss": 0.3615, "step": 2807 }, { "epoch": 0.22245989304812835, "grad_norm": 2.033237364665356, "learning_rate": 1.8119629700531228e-05, "loss": 0.4994, "step": 2808 }, { "epoch": 0.2225391166567637, "grad_norm": 1.532489518273933, "learning_rate": 1.8118131617545183e-05, "loss": 0.3889, "step": 2809 }, { "epoch": 0.2226183402653991, "grad_norm": 1.8949282816876172, "learning_rate": 1.8116633000019233e-05, "loss": 0.4014, "step": 2810 }, { "epoch": 0.22269756387403447, "grad_norm": 1.7934510997309172, "learning_rate": 1.8115133848052052e-05, "loss": 0.4296, "step": 2811 }, { "epoch": 0.22277678748266982, "grad_norm": 1.6066842324156256, "learning_rate": 1.8113634161742356e-05, "loss": 0.3598, "step": 2812 }, { "epoch": 0.2228560110913052, "grad_norm": 1.6098163215461445, "learning_rate": 1.8112133941188892e-05, "loss": 0.3267, "step": 2813 }, { "epoch": 0.2229352346999406, "grad_norm": 1.5888658804890845, "learning_rate": 1.811063318649044e-05, "loss": 0.4491, "step": 2814 }, { "epoch": 0.22301445830857597, "grad_norm": 1.8857632465348089, "learning_rate": 1.8109131897745823e-05, "loss": 0.6655, "step": 2815 }, { "epoch": 0.22309368191721132, "grad_norm": 1.4956635509226806, "learning_rate": 1.8107630075053883e-05, "loss": 0.3003, "step": 2816 }, { "epoch": 0.2231729055258467, "grad_norm": 1.774628240460687, "learning_rate": 1.810612771851352e-05, "loss": 0.4392, "step": 2817 }, { "epoch": 0.22325212913448209, "grad_norm": 1.6533402235959829, "learning_rate": 1.8104624828223644e-05, "loss": 0.3316, "step": 2818 }, { "epoch": 0.22333135274311744, "grad_norm": 1.6262057208466414, "learning_rate": 1.8103121404283222e-05, "loss": 0.2931, "step": 2819 }, { "epoch": 0.22341057635175282, "grad_norm": 2.148579006569893, "learning_rate": 1.8101617446791248e-05, "loss": 0.4501, "step": 2820 }, { "epoch": 0.2234897999603882, "grad_norm": 1.870992128770809, "learning_rate": 1.8100112955846746e-05, "loss": 0.4275, "step": 2821 }, { "epoch": 0.22356902356902356, "grad_norm": 1.539892464142909, "learning_rate": 1.8098607931548782e-05, "loss": 0.3532, "step": 2822 }, { "epoch": 0.22364824717765894, "grad_norm": 1.9338981499494745, "learning_rate": 1.8097102373996453e-05, "loss": 0.3456, "step": 2823 }, { "epoch": 0.22372747078629432, "grad_norm": 2.076227453999808, "learning_rate": 1.809559628328889e-05, "loss": 0.4136, "step": 2824 }, { "epoch": 0.2238066943949297, "grad_norm": 1.7753133288999525, "learning_rate": 1.8094089659525274e-05, "loss": 0.3267, "step": 2825 }, { "epoch": 0.22388591800356505, "grad_norm": 1.7890022175119427, "learning_rate": 1.8092582502804793e-05, "loss": 0.3531, "step": 2826 }, { "epoch": 0.22396514161220044, "grad_norm": 1.8333210772697812, "learning_rate": 1.8091074813226696e-05, "loss": 0.3815, "step": 2827 }, { "epoch": 0.22404436522083582, "grad_norm": 1.748560068670978, "learning_rate": 1.8089566590890253e-05, "loss": 0.3506, "step": 2828 }, { "epoch": 0.22412358882947117, "grad_norm": 1.5594139065430026, "learning_rate": 1.8088057835894775e-05, "loss": 0.2637, "step": 2829 }, { "epoch": 0.22420281243810655, "grad_norm": 1.4579240095448367, "learning_rate": 1.8086548548339604e-05, "loss": 0.3963, "step": 2830 }, { "epoch": 0.22428203604674193, "grad_norm": 1.805063003062504, "learning_rate": 1.8085038728324123e-05, "loss": 0.5023, "step": 2831 }, { "epoch": 0.22436125965537732, "grad_norm": 1.5288625833991882, "learning_rate": 1.8083528375947744e-05, "loss": 0.3372, "step": 2832 }, { "epoch": 0.22444048326401267, "grad_norm": 1.8536161647210274, "learning_rate": 1.808201749130992e-05, "loss": 0.4984, "step": 2833 }, { "epoch": 0.22451970687264805, "grad_norm": 1.7108498254541658, "learning_rate": 1.8080506074510128e-05, "loss": 0.4324, "step": 2834 }, { "epoch": 0.22459893048128343, "grad_norm": 1.6716518554392972, "learning_rate": 1.8078994125647896e-05, "loss": 0.3309, "step": 2835 }, { "epoch": 0.22467815408991879, "grad_norm": 1.6324568286415593, "learning_rate": 1.807748164482277e-05, "loss": 0.2995, "step": 2836 }, { "epoch": 0.22475737769855417, "grad_norm": 1.75501913264354, "learning_rate": 1.8075968632134343e-05, "loss": 0.3935, "step": 2837 }, { "epoch": 0.22483660130718955, "grad_norm": 1.409315770587425, "learning_rate": 1.8074455087682247e-05, "loss": 0.3316, "step": 2838 }, { "epoch": 0.2249158249158249, "grad_norm": 1.6932029895477951, "learning_rate": 1.8072941011566133e-05, "loss": 0.3507, "step": 2839 }, { "epoch": 0.22499504852446028, "grad_norm": 2.0566322095679914, "learning_rate": 1.8071426403885698e-05, "loss": 0.4278, "step": 2840 }, { "epoch": 0.22507427213309567, "grad_norm": 1.9683848925073446, "learning_rate": 1.8069911264740667e-05, "loss": 0.4925, "step": 2841 }, { "epoch": 0.22515349574173105, "grad_norm": 1.3995831132540693, "learning_rate": 1.8068395594230815e-05, "loss": 0.3162, "step": 2842 }, { "epoch": 0.2252327193503664, "grad_norm": 1.5986686953594085, "learning_rate": 1.8066879392455932e-05, "loss": 0.3469, "step": 2843 }, { "epoch": 0.22531194295900178, "grad_norm": 1.7270814562069732, "learning_rate": 1.8065362659515856e-05, "loss": 0.3735, "step": 2844 }, { "epoch": 0.22539116656763716, "grad_norm": 1.3954595344249512, "learning_rate": 1.806384539551046e-05, "loss": 0.2429, "step": 2845 }, { "epoch": 0.22547039017627252, "grad_norm": 1.7833586740106087, "learning_rate": 1.8062327600539643e-05, "loss": 0.4897, "step": 2846 }, { "epoch": 0.2255496137849079, "grad_norm": 1.4157658693692063, "learning_rate": 1.8060809274703352e-05, "loss": 0.3503, "step": 2847 }, { "epoch": 0.22562883739354328, "grad_norm": 1.5738266902854912, "learning_rate": 1.805929041810155e-05, "loss": 0.3345, "step": 2848 }, { "epoch": 0.22570806100217866, "grad_norm": 1.6559105230974744, "learning_rate": 1.8057771030834255e-05, "loss": 0.3446, "step": 2849 }, { "epoch": 0.22578728461081402, "grad_norm": 1.6637403875726988, "learning_rate": 1.8056251113001508e-05, "loss": 0.4226, "step": 2850 }, { "epoch": 0.2258665082194494, "grad_norm": 1.9436512872529066, "learning_rate": 1.8054730664703393e-05, "loss": 0.4183, "step": 2851 }, { "epoch": 0.22594573182808478, "grad_norm": 2.232681603583807, "learning_rate": 1.8053209686040017e-05, "loss": 0.434, "step": 2852 }, { "epoch": 0.22602495543672013, "grad_norm": 1.4864096813534597, "learning_rate": 1.8051688177111532e-05, "loss": 0.2568, "step": 2853 }, { "epoch": 0.2261041790453555, "grad_norm": 1.3801866271700975, "learning_rate": 1.805016613801813e-05, "loss": 0.2735, "step": 2854 }, { "epoch": 0.2261834026539909, "grad_norm": 1.6727955446182847, "learning_rate": 1.8048643568860015e-05, "loss": 0.4645, "step": 2855 }, { "epoch": 0.22626262626262628, "grad_norm": 1.6927497343517535, "learning_rate": 1.804712046973745e-05, "loss": 0.408, "step": 2856 }, { "epoch": 0.22634184987126163, "grad_norm": 2.1394470654309297, "learning_rate": 1.8045596840750722e-05, "loss": 0.5207, "step": 2857 }, { "epoch": 0.226421073479897, "grad_norm": 1.9842827002618435, "learning_rate": 1.804407268200016e-05, "loss": 0.6046, "step": 2858 }, { "epoch": 0.2265002970885324, "grad_norm": 1.6780279488009289, "learning_rate": 1.8042547993586114e-05, "loss": 0.3369, "step": 2859 }, { "epoch": 0.22657952069716775, "grad_norm": 1.704333753821867, "learning_rate": 1.8041022775608977e-05, "loss": 0.3989, "step": 2860 }, { "epoch": 0.22665874430580313, "grad_norm": 1.7536418479352935, "learning_rate": 1.803949702816919e-05, "loss": 0.4659, "step": 2861 }, { "epoch": 0.2267379679144385, "grad_norm": 1.6839339653323206, "learning_rate": 1.80379707513672e-05, "loss": 0.439, "step": 2862 }, { "epoch": 0.22681719152307386, "grad_norm": 1.7368961407423944, "learning_rate": 1.8036443945303514e-05, "loss": 0.3599, "step": 2863 }, { "epoch": 0.22689641513170924, "grad_norm": 1.8001728866348243, "learning_rate": 1.8034916610078665e-05, "loss": 0.4409, "step": 2864 }, { "epoch": 0.22697563874034463, "grad_norm": 1.6520122965800768, "learning_rate": 1.8033388745793218e-05, "loss": 0.4133, "step": 2865 }, { "epoch": 0.22705486234898, "grad_norm": 2.157868520460293, "learning_rate": 1.8031860352547777e-05, "loss": 0.5633, "step": 2866 }, { "epoch": 0.22713408595761536, "grad_norm": 1.9273442565279941, "learning_rate": 1.8030331430442974e-05, "loss": 0.4675, "step": 2867 }, { "epoch": 0.22721330956625074, "grad_norm": 1.5075170720673747, "learning_rate": 1.8028801979579487e-05, "loss": 0.4169, "step": 2868 }, { "epoch": 0.22729253317488612, "grad_norm": 2.1337842069408945, "learning_rate": 1.8027272000058028e-05, "loss": 0.4258, "step": 2869 }, { "epoch": 0.22737175678352148, "grad_norm": 1.5442012032111099, "learning_rate": 1.8025741491979326e-05, "loss": 0.3592, "step": 2870 }, { "epoch": 0.22745098039215686, "grad_norm": 1.5826346077276343, "learning_rate": 1.8024210455444168e-05, "loss": 0.4305, "step": 2871 }, { "epoch": 0.22753020400079224, "grad_norm": 1.6139297922178206, "learning_rate": 1.8022678890553364e-05, "loss": 0.4016, "step": 2872 }, { "epoch": 0.22760942760942762, "grad_norm": 1.6565453135192383, "learning_rate": 1.8021146797407752e-05, "loss": 0.397, "step": 2873 }, { "epoch": 0.22768865121806298, "grad_norm": 1.7703330209919277, "learning_rate": 1.801961417610822e-05, "loss": 0.4182, "step": 2874 }, { "epoch": 0.22776787482669836, "grad_norm": 2.0172280662088165, "learning_rate": 1.801808102675568e-05, "loss": 0.461, "step": 2875 }, { "epoch": 0.22784709843533374, "grad_norm": 1.5261982453685994, "learning_rate": 1.801654734945109e-05, "loss": 0.293, "step": 2876 }, { "epoch": 0.2279263220439691, "grad_norm": 1.7126641685235788, "learning_rate": 1.801501314429543e-05, "loss": 0.3706, "step": 2877 }, { "epoch": 0.22800554565260447, "grad_norm": 1.698682478180822, "learning_rate": 1.801347841138972e-05, "loss": 0.3803, "step": 2878 }, { "epoch": 0.22808476926123986, "grad_norm": 1.7174235401223599, "learning_rate": 1.8011943150835013e-05, "loss": 0.3983, "step": 2879 }, { "epoch": 0.2281639928698752, "grad_norm": 1.8542662705192385, "learning_rate": 1.80104073627324e-05, "loss": 0.3471, "step": 2880 }, { "epoch": 0.2282432164785106, "grad_norm": 1.936275162929922, "learning_rate": 1.8008871047183005e-05, "loss": 0.392, "step": 2881 }, { "epoch": 0.22832244008714597, "grad_norm": 1.5733888253616128, "learning_rate": 1.800733420428799e-05, "loss": 0.2975, "step": 2882 }, { "epoch": 0.22840166369578135, "grad_norm": 1.8543446428176829, "learning_rate": 1.8005796834148545e-05, "loss": 0.2992, "step": 2883 }, { "epoch": 0.2284808873044167, "grad_norm": 1.7376819187485757, "learning_rate": 1.8004258936865902e-05, "loss": 0.4457, "step": 2884 }, { "epoch": 0.2285601109130521, "grad_norm": 1.856015607302783, "learning_rate": 1.800272051254132e-05, "loss": 0.3806, "step": 2885 }, { "epoch": 0.22863933452168747, "grad_norm": 1.9483049095172358, "learning_rate": 1.80011815612761e-05, "loss": 0.4488, "step": 2886 }, { "epoch": 0.22871855813032282, "grad_norm": 1.5645732638142311, "learning_rate": 1.7999642083171576e-05, "loss": 0.3392, "step": 2887 }, { "epoch": 0.2287977817389582, "grad_norm": 1.5651473662952953, "learning_rate": 1.799810207832911e-05, "loss": 0.3407, "step": 2888 }, { "epoch": 0.2288770053475936, "grad_norm": 1.7473844301113626, "learning_rate": 1.7996561546850105e-05, "loss": 0.399, "step": 2889 }, { "epoch": 0.22895622895622897, "grad_norm": 1.4118640375580034, "learning_rate": 1.7995020488836e-05, "loss": 0.2476, "step": 2890 }, { "epoch": 0.22903545256486432, "grad_norm": 1.9536733110432936, "learning_rate": 1.799347890438827e-05, "loss": 0.4379, "step": 2891 }, { "epoch": 0.2291146761734997, "grad_norm": 1.677645126704345, "learning_rate": 1.799193679360841e-05, "loss": 0.3418, "step": 2892 }, { "epoch": 0.22919389978213509, "grad_norm": 1.3672033850914929, "learning_rate": 1.799039415659797e-05, "loss": 0.314, "step": 2893 }, { "epoch": 0.22927312339077044, "grad_norm": 1.7380627763572427, "learning_rate": 1.798885099345852e-05, "loss": 0.3811, "step": 2894 }, { "epoch": 0.22935234699940582, "grad_norm": 1.9222401112288479, "learning_rate": 1.7987307304291676e-05, "loss": 0.3831, "step": 2895 }, { "epoch": 0.2294315706080412, "grad_norm": 1.758899882712589, "learning_rate": 1.7985763089199073e-05, "loss": 0.4029, "step": 2896 }, { "epoch": 0.22951079421667656, "grad_norm": 1.4954132267643259, "learning_rate": 1.79842183482824e-05, "loss": 0.3289, "step": 2897 }, { "epoch": 0.22959001782531194, "grad_norm": 1.4396245519738804, "learning_rate": 1.7982673081643364e-05, "loss": 0.254, "step": 2898 }, { "epoch": 0.22966924143394732, "grad_norm": 1.610872647853128, "learning_rate": 1.7981127289383718e-05, "loss": 0.3171, "step": 2899 }, { "epoch": 0.2297484650425827, "grad_norm": 1.9379536913485538, "learning_rate": 1.797958097160524e-05, "loss": 0.4497, "step": 2900 }, { "epoch": 0.22982768865121805, "grad_norm": 1.7177603196683155, "learning_rate": 1.797803412840975e-05, "loss": 0.3667, "step": 2901 }, { "epoch": 0.22990691225985344, "grad_norm": 1.5813242064588038, "learning_rate": 1.7976486759899103e-05, "loss": 0.3622, "step": 2902 }, { "epoch": 0.22998613586848882, "grad_norm": 1.9633317059023883, "learning_rate": 1.797493886617518e-05, "loss": 0.3923, "step": 2903 }, { "epoch": 0.23006535947712417, "grad_norm": 1.4550866977961818, "learning_rate": 1.797339044733991e-05, "loss": 0.3863, "step": 2904 }, { "epoch": 0.23014458308575955, "grad_norm": 1.7266205264993428, "learning_rate": 1.797184150349524e-05, "loss": 0.4353, "step": 2905 }, { "epoch": 0.23022380669439493, "grad_norm": 1.5303695192337168, "learning_rate": 1.7970292034743172e-05, "loss": 0.3354, "step": 2906 }, { "epoch": 0.23030303030303031, "grad_norm": 1.7882675421821683, "learning_rate": 1.7968742041185718e-05, "loss": 0.3862, "step": 2907 }, { "epoch": 0.23038225391166567, "grad_norm": 1.9859746256308541, "learning_rate": 1.7967191522924946e-05, "loss": 0.4517, "step": 2908 }, { "epoch": 0.23046147752030105, "grad_norm": 1.730100149151367, "learning_rate": 1.7965640480062945e-05, "loss": 0.4266, "step": 2909 }, { "epoch": 0.23054070112893643, "grad_norm": 1.6802458168425851, "learning_rate": 1.796408891270185e-05, "loss": 0.4475, "step": 2910 }, { "epoch": 0.23061992473757179, "grad_norm": 1.860548057412297, "learning_rate": 1.7962536820943822e-05, "loss": 0.3543, "step": 2911 }, { "epoch": 0.23069914834620717, "grad_norm": 2.192719843354401, "learning_rate": 1.7960984204891055e-05, "loss": 0.578, "step": 2912 }, { "epoch": 0.23077837195484255, "grad_norm": 1.436751565769236, "learning_rate": 1.7959431064645786e-05, "loss": 0.3694, "step": 2913 }, { "epoch": 0.23085759556347793, "grad_norm": 1.8045081227975395, "learning_rate": 1.7957877400310275e-05, "loss": 0.4476, "step": 2914 }, { "epoch": 0.23093681917211328, "grad_norm": 1.808268774958562, "learning_rate": 1.7956323211986833e-05, "loss": 0.3806, "step": 2915 }, { "epoch": 0.23101604278074866, "grad_norm": 1.6331590983584408, "learning_rate": 1.795476849977779e-05, "loss": 0.3581, "step": 2916 }, { "epoch": 0.23109526638938405, "grad_norm": 1.8663096504901544, "learning_rate": 1.7953213263785513e-05, "loss": 0.3988, "step": 2917 }, { "epoch": 0.2311744899980194, "grad_norm": 1.4572632980616669, "learning_rate": 1.7951657504112416e-05, "loss": 0.3965, "step": 2918 }, { "epoch": 0.23125371360665478, "grad_norm": 1.926773228579224, "learning_rate": 1.795010122086093e-05, "loss": 0.4426, "step": 2919 }, { "epoch": 0.23133293721529016, "grad_norm": 1.5085816174035294, "learning_rate": 1.7948544414133534e-05, "loss": 0.3722, "step": 2920 }, { "epoch": 0.23141216082392552, "grad_norm": 1.3529200149568625, "learning_rate": 1.7946987084032733e-05, "loss": 0.3335, "step": 2921 }, { "epoch": 0.2314913844325609, "grad_norm": 1.7407449660299779, "learning_rate": 1.794542923066107e-05, "loss": 0.3561, "step": 2922 }, { "epoch": 0.23157060804119628, "grad_norm": 1.7414893197132069, "learning_rate": 1.7943870854121126e-05, "loss": 0.3892, "step": 2923 }, { "epoch": 0.23164983164983166, "grad_norm": 1.7620768326162055, "learning_rate": 1.794231195451551e-05, "loss": 0.3609, "step": 2924 }, { "epoch": 0.23172905525846701, "grad_norm": 2.054803373558721, "learning_rate": 1.7940752531946867e-05, "loss": 0.4284, "step": 2925 }, { "epoch": 0.2318082788671024, "grad_norm": 1.6818791261292518, "learning_rate": 1.793919258651788e-05, "loss": 0.4182, "step": 2926 }, { "epoch": 0.23188750247573778, "grad_norm": 1.7027782079665075, "learning_rate": 1.7937632118331255e-05, "loss": 0.3961, "step": 2927 }, { "epoch": 0.23196672608437313, "grad_norm": 2.065425586137854, "learning_rate": 1.7936071127489755e-05, "loss": 0.4461, "step": 2928 }, { "epoch": 0.2320459496930085, "grad_norm": 1.5259608841093395, "learning_rate": 1.7934509614096156e-05, "loss": 0.328, "step": 2929 }, { "epoch": 0.2321251733016439, "grad_norm": 1.7155426004156702, "learning_rate": 1.7932947578253273e-05, "loss": 0.3728, "step": 2930 }, { "epoch": 0.23220439691027928, "grad_norm": 1.6718865487410788, "learning_rate": 1.793138502006397e-05, "loss": 0.3523, "step": 2931 }, { "epoch": 0.23228362051891463, "grad_norm": 1.4028943894615473, "learning_rate": 1.792982193963112e-05, "loss": 0.2667, "step": 2932 }, { "epoch": 0.23236284412755, "grad_norm": 1.9196503526629345, "learning_rate": 1.7928258337057657e-05, "loss": 0.4449, "step": 2933 }, { "epoch": 0.2324420677361854, "grad_norm": 1.7750915914790288, "learning_rate": 1.792669421244653e-05, "loss": 0.3895, "step": 2934 }, { "epoch": 0.23252129134482075, "grad_norm": 1.9564237157450146, "learning_rate": 1.7925129565900728e-05, "loss": 0.4574, "step": 2935 }, { "epoch": 0.23260051495345613, "grad_norm": 1.7732758780228715, "learning_rate": 1.792356439752328e-05, "loss": 0.4024, "step": 2936 }, { "epoch": 0.2326797385620915, "grad_norm": 1.5978501727914904, "learning_rate": 1.792199870741724e-05, "loss": 0.3584, "step": 2937 }, { "epoch": 0.23275896217072686, "grad_norm": 1.7155700712566424, "learning_rate": 1.79204324956857e-05, "loss": 0.4193, "step": 2938 }, { "epoch": 0.23283818577936224, "grad_norm": 1.5635593332620223, "learning_rate": 1.7918865762431794e-05, "loss": 0.3368, "step": 2939 }, { "epoch": 0.23291740938799763, "grad_norm": 1.5212810402499088, "learning_rate": 1.7917298507758684e-05, "loss": 0.3131, "step": 2940 }, { "epoch": 0.232996632996633, "grad_norm": 1.569751981873757, "learning_rate": 1.7915730731769558e-05, "loss": 0.3247, "step": 2941 }, { "epoch": 0.23307585660526836, "grad_norm": 1.7934693993413744, "learning_rate": 1.7914162434567653e-05, "loss": 0.4306, "step": 2942 }, { "epoch": 0.23315508021390374, "grad_norm": 1.8631704579174753, "learning_rate": 1.791259361625623e-05, "loss": 0.4466, "step": 2943 }, { "epoch": 0.23323430382253912, "grad_norm": 1.5854774008709904, "learning_rate": 1.7911024276938595e-05, "loss": 0.3638, "step": 2944 }, { "epoch": 0.23331352743117448, "grad_norm": 1.8713170820793896, "learning_rate": 1.7909454416718075e-05, "loss": 0.3622, "step": 2945 }, { "epoch": 0.23339275103980986, "grad_norm": 1.7270872899188183, "learning_rate": 1.790788403569804e-05, "loss": 0.419, "step": 2946 }, { "epoch": 0.23347197464844524, "grad_norm": 1.7276439062271296, "learning_rate": 1.7906313133981887e-05, "loss": 0.4048, "step": 2947 }, { "epoch": 0.23355119825708062, "grad_norm": 1.830198475896751, "learning_rate": 1.7904741711673064e-05, "loss": 0.4446, "step": 2948 }, { "epoch": 0.23363042186571598, "grad_norm": 1.2875092236252006, "learning_rate": 1.790316976887503e-05, "loss": 0.3, "step": 2949 }, { "epoch": 0.23370964547435136, "grad_norm": 1.6726229280516625, "learning_rate": 1.7901597305691294e-05, "loss": 0.3287, "step": 2950 }, { "epoch": 0.23378886908298674, "grad_norm": 1.6690163118178876, "learning_rate": 1.7900024322225394e-05, "loss": 0.3246, "step": 2951 }, { "epoch": 0.2338680926916221, "grad_norm": 1.8325950046398163, "learning_rate": 1.789845081858091e-05, "loss": 0.4545, "step": 2952 }, { "epoch": 0.23394731630025747, "grad_norm": 1.5981268543349332, "learning_rate": 1.7896876794861443e-05, "loss": 0.3624, "step": 2953 }, { "epoch": 0.23402653990889286, "grad_norm": 1.5063216184604684, "learning_rate": 1.7895302251170636e-05, "loss": 0.3761, "step": 2954 }, { "epoch": 0.23410576351752824, "grad_norm": 1.6326728612627766, "learning_rate": 1.789372718761216e-05, "loss": 0.3614, "step": 2955 }, { "epoch": 0.2341849871261636, "grad_norm": 1.7282467328344246, "learning_rate": 1.7892151604289738e-05, "loss": 0.5239, "step": 2956 }, { "epoch": 0.23426421073479897, "grad_norm": 1.701786411258173, "learning_rate": 1.7890575501307105e-05, "loss": 0.3916, "step": 2957 }, { "epoch": 0.23434343434343435, "grad_norm": 1.5787622454027879, "learning_rate": 1.7888998878768045e-05, "loss": 0.3971, "step": 2958 }, { "epoch": 0.2344226579520697, "grad_norm": 1.5276091746722464, "learning_rate": 1.7887421736776364e-05, "loss": 0.2233, "step": 2959 }, { "epoch": 0.2345018815607051, "grad_norm": 1.7421391705065867, "learning_rate": 1.7885844075435915e-05, "loss": 0.3938, "step": 2960 }, { "epoch": 0.23458110516934047, "grad_norm": 1.8126659099638778, "learning_rate": 1.788426589485058e-05, "loss": 0.4188, "step": 2961 }, { "epoch": 0.23466032877797582, "grad_norm": 1.9265067213446623, "learning_rate": 1.788268719512427e-05, "loss": 0.4063, "step": 2962 }, { "epoch": 0.2347395523866112, "grad_norm": 1.4853653300698935, "learning_rate": 1.788110797636094e-05, "loss": 0.3921, "step": 2963 }, { "epoch": 0.2348187759952466, "grad_norm": 2.1927828779338037, "learning_rate": 1.7879528238664567e-05, "loss": 0.4026, "step": 2964 }, { "epoch": 0.23489799960388197, "grad_norm": 1.7671482596742614, "learning_rate": 1.7877947982139177e-05, "loss": 0.437, "step": 2965 }, { "epoch": 0.23497722321251732, "grad_norm": 2.0796720775994446, "learning_rate": 1.7876367206888817e-05, "loss": 0.4492, "step": 2966 }, { "epoch": 0.2350564468211527, "grad_norm": 1.9580159398190233, "learning_rate": 1.7874785913017575e-05, "loss": 0.3741, "step": 2967 }, { "epoch": 0.23513567042978809, "grad_norm": 1.7011607919402372, "learning_rate": 1.7873204100629572e-05, "loss": 0.3527, "step": 2968 }, { "epoch": 0.23521489403842344, "grad_norm": 1.9644319886555555, "learning_rate": 1.7871621769828965e-05, "loss": 0.5007, "step": 2969 }, { "epoch": 0.23529411764705882, "grad_norm": 1.6525374695924118, "learning_rate": 1.7870038920719935e-05, "loss": 0.2847, "step": 2970 }, { "epoch": 0.2353733412556942, "grad_norm": 2.253349584992222, "learning_rate": 1.7868455553406713e-05, "loss": 0.5174, "step": 2971 }, { "epoch": 0.23545256486432958, "grad_norm": 1.8088971622140557, "learning_rate": 1.7866871667993554e-05, "loss": 0.4381, "step": 2972 }, { "epoch": 0.23553178847296494, "grad_norm": 1.967009078398569, "learning_rate": 1.786528726458475e-05, "loss": 0.3866, "step": 2973 }, { "epoch": 0.23561101208160032, "grad_norm": 1.469212006549645, "learning_rate": 1.786370234328462e-05, "loss": 0.3598, "step": 2974 }, { "epoch": 0.2356902356902357, "grad_norm": 1.7371199479170925, "learning_rate": 1.7862116904197534e-05, "loss": 0.3473, "step": 2975 }, { "epoch": 0.23576945929887105, "grad_norm": 1.913597333169268, "learning_rate": 1.7860530947427878e-05, "loss": 0.5021, "step": 2976 }, { "epoch": 0.23584868290750644, "grad_norm": 1.8952711321426534, "learning_rate": 1.785894447308008e-05, "loss": 0.5066, "step": 2977 }, { "epoch": 0.23592790651614182, "grad_norm": 1.4181389132884858, "learning_rate": 1.7857357481258603e-05, "loss": 0.2819, "step": 2978 }, { "epoch": 0.23600713012477717, "grad_norm": 1.4438255022898114, "learning_rate": 1.7855769972067944e-05, "loss": 0.2693, "step": 2979 }, { "epoch": 0.23608635373341255, "grad_norm": 1.9154337634779175, "learning_rate": 1.785418194561263e-05, "loss": 0.4551, "step": 2980 }, { "epoch": 0.23616557734204793, "grad_norm": 1.8723701004890083, "learning_rate": 1.7852593401997232e-05, "loss": 0.3461, "step": 2981 }, { "epoch": 0.23624480095068331, "grad_norm": 2.0466176268691534, "learning_rate": 1.785100434132634e-05, "loss": 0.4119, "step": 2982 }, { "epoch": 0.23632402455931867, "grad_norm": 1.6920718337267746, "learning_rate": 1.7849414763704587e-05, "loss": 0.3585, "step": 2983 }, { "epoch": 0.23640324816795405, "grad_norm": 1.7472886952156605, "learning_rate": 1.7847824669236643e-05, "loss": 0.3363, "step": 2984 }, { "epoch": 0.23648247177658943, "grad_norm": 1.8665463353039125, "learning_rate": 1.7846234058027207e-05, "loss": 0.3498, "step": 2985 }, { "epoch": 0.23656169538522479, "grad_norm": 1.7971249645359173, "learning_rate": 1.7844642930181008e-05, "loss": 0.4522, "step": 2986 }, { "epoch": 0.23664091899386017, "grad_norm": 1.6625591585831032, "learning_rate": 1.7843051285802823e-05, "loss": 0.4483, "step": 2987 }, { "epoch": 0.23672014260249555, "grad_norm": 2.0507712255392403, "learning_rate": 1.7841459124997445e-05, "loss": 0.4121, "step": 2988 }, { "epoch": 0.23679936621113093, "grad_norm": 1.7118752008905507, "learning_rate": 1.7839866447869717e-05, "loss": 0.3981, "step": 2989 }, { "epoch": 0.23687858981976628, "grad_norm": 1.5289274953893544, "learning_rate": 1.7838273254524505e-05, "loss": 0.3272, "step": 2990 }, { "epoch": 0.23695781342840166, "grad_norm": 1.9012071846949739, "learning_rate": 1.7836679545066712e-05, "loss": 0.3739, "step": 2991 }, { "epoch": 0.23703703703703705, "grad_norm": 2.2138697661068454, "learning_rate": 1.7835085319601283e-05, "loss": 0.4013, "step": 2992 }, { "epoch": 0.2371162606456724, "grad_norm": 1.6662270080382835, "learning_rate": 1.783349057823318e-05, "loss": 0.3968, "step": 2993 }, { "epoch": 0.23719548425430778, "grad_norm": 1.742504282670805, "learning_rate": 1.783189532106742e-05, "loss": 0.4317, "step": 2994 }, { "epoch": 0.23727470786294316, "grad_norm": 1.7391509132236265, "learning_rate": 1.783029954820904e-05, "loss": 0.51, "step": 2995 }, { "epoch": 0.23735393147157854, "grad_norm": 1.486493206822086, "learning_rate": 1.7828703259763107e-05, "loss": 0.2872, "step": 2996 }, { "epoch": 0.2374331550802139, "grad_norm": 1.6780228412202491, "learning_rate": 1.782710645583473e-05, "loss": 0.3689, "step": 2997 }, { "epoch": 0.23751237868884928, "grad_norm": 1.8037090424314197, "learning_rate": 1.7825509136529065e-05, "loss": 0.4099, "step": 2998 }, { "epoch": 0.23759160229748466, "grad_norm": 1.4838606463914465, "learning_rate": 1.782391130195127e-05, "loss": 0.4174, "step": 2999 }, { "epoch": 0.23767082590612001, "grad_norm": 1.7007785737085417, "learning_rate": 1.7822312952206565e-05, "loss": 0.4335, "step": 3000 }, { "epoch": 0.2377500495147554, "grad_norm": 1.5601984226245849, "learning_rate": 1.782071408740019e-05, "loss": 0.4329, "step": 3001 }, { "epoch": 0.23782927312339078, "grad_norm": 1.9321930118327455, "learning_rate": 1.781911470763742e-05, "loss": 0.4113, "step": 3002 }, { "epoch": 0.23790849673202613, "grad_norm": 1.6775462521648703, "learning_rate": 1.7817514813023577e-05, "loss": 0.4919, "step": 3003 }, { "epoch": 0.2379877203406615, "grad_norm": 1.6890843011783088, "learning_rate": 1.781591440366399e-05, "loss": 0.3913, "step": 3004 }, { "epoch": 0.2380669439492969, "grad_norm": 1.6815843237143824, "learning_rate": 1.7814313479664054e-05, "loss": 0.3549, "step": 3005 }, { "epoch": 0.23814616755793228, "grad_norm": 1.630168929724418, "learning_rate": 1.781271204112917e-05, "loss": 0.402, "step": 3006 }, { "epoch": 0.23822539116656763, "grad_norm": 1.8758683036216994, "learning_rate": 1.7811110088164797e-05, "loss": 0.3401, "step": 3007 }, { "epoch": 0.238304614775203, "grad_norm": 1.7197732446151963, "learning_rate": 1.7809507620876406e-05, "loss": 0.3072, "step": 3008 }, { "epoch": 0.2383838383838384, "grad_norm": 1.775250984240573, "learning_rate": 1.7807904639369512e-05, "loss": 0.5199, "step": 3009 }, { "epoch": 0.23846306199247375, "grad_norm": 1.5126074857013367, "learning_rate": 1.7806301143749672e-05, "loss": 0.3181, "step": 3010 }, { "epoch": 0.23854228560110913, "grad_norm": 1.610330348021287, "learning_rate": 1.780469713412246e-05, "loss": 0.3136, "step": 3011 }, { "epoch": 0.2386215092097445, "grad_norm": 1.7474691127532895, "learning_rate": 1.78030926105935e-05, "loss": 0.4079, "step": 3012 }, { "epoch": 0.2387007328183799, "grad_norm": 1.8788682983942224, "learning_rate": 1.7801487573268433e-05, "loss": 0.3756, "step": 3013 }, { "epoch": 0.23877995642701524, "grad_norm": 1.545590004326463, "learning_rate": 1.7799882022252948e-05, "loss": 0.3509, "step": 3014 }, { "epoch": 0.23885918003565063, "grad_norm": 1.8924300162713565, "learning_rate": 1.7798275957652764e-05, "loss": 0.4448, "step": 3015 }, { "epoch": 0.238938403644286, "grad_norm": 1.9670811136959074, "learning_rate": 1.779666937957363e-05, "loss": 0.4056, "step": 3016 }, { "epoch": 0.23901762725292136, "grad_norm": 1.5933054658189323, "learning_rate": 1.7795062288121335e-05, "loss": 0.3795, "step": 3017 }, { "epoch": 0.23909685086155674, "grad_norm": 1.7144014119415023, "learning_rate": 1.7793454683401692e-05, "loss": 0.3452, "step": 3018 }, { "epoch": 0.23917607447019212, "grad_norm": 1.815888246301984, "learning_rate": 1.779184656552056e-05, "loss": 0.3239, "step": 3019 }, { "epoch": 0.23925529807882748, "grad_norm": 1.5971836595814837, "learning_rate": 1.7790237934583824e-05, "loss": 0.3462, "step": 3020 }, { "epoch": 0.23933452168746286, "grad_norm": 1.9646704316874855, "learning_rate": 1.7788628790697404e-05, "loss": 0.3804, "step": 3021 }, { "epoch": 0.23941374529609824, "grad_norm": 1.6747798166136836, "learning_rate": 1.7787019133967252e-05, "loss": 0.3547, "step": 3022 }, { "epoch": 0.23949296890473362, "grad_norm": 1.9264676329828028, "learning_rate": 1.778540896449936e-05, "loss": 0.4613, "step": 3023 }, { "epoch": 0.23957219251336898, "grad_norm": 1.6132476701095293, "learning_rate": 1.778379828239975e-05, "loss": 0.4111, "step": 3024 }, { "epoch": 0.23965141612200436, "grad_norm": 1.4309644043664016, "learning_rate": 1.778218708777448e-05, "loss": 0.2995, "step": 3025 }, { "epoch": 0.23973063973063974, "grad_norm": 1.6887063840388452, "learning_rate": 1.7780575380729626e-05, "loss": 0.371, "step": 3026 }, { "epoch": 0.2398098633392751, "grad_norm": 1.6360906298003395, "learning_rate": 1.777896316137133e-05, "loss": 0.3086, "step": 3027 }, { "epoch": 0.23988908694791047, "grad_norm": 1.6362852227495086, "learning_rate": 1.7777350429805734e-05, "loss": 0.3738, "step": 3028 }, { "epoch": 0.23996831055654586, "grad_norm": 2.0635917716487584, "learning_rate": 1.777573718613904e-05, "loss": 0.4594, "step": 3029 }, { "epoch": 0.24004753416518124, "grad_norm": 1.525494481033945, "learning_rate": 1.7774123430477464e-05, "loss": 0.3678, "step": 3030 }, { "epoch": 0.2401267577738166, "grad_norm": 2.013675252469297, "learning_rate": 1.7772509162927266e-05, "loss": 0.3841, "step": 3031 }, { "epoch": 0.24020598138245197, "grad_norm": 1.7014065313927174, "learning_rate": 1.7770894383594737e-05, "loss": 0.3373, "step": 3032 }, { "epoch": 0.24028520499108735, "grad_norm": 1.5976352232612612, "learning_rate": 1.7769279092586205e-05, "loss": 0.3187, "step": 3033 }, { "epoch": 0.2403644285997227, "grad_norm": 1.3225149809736216, "learning_rate": 1.776766329000803e-05, "loss": 0.2451, "step": 3034 }, { "epoch": 0.2404436522083581, "grad_norm": 1.4403620008154954, "learning_rate": 1.7766046975966603e-05, "loss": 0.3146, "step": 3035 }, { "epoch": 0.24052287581699347, "grad_norm": 1.9039601851649306, "learning_rate": 1.7764430150568347e-05, "loss": 0.4125, "step": 3036 }, { "epoch": 0.24060209942562882, "grad_norm": 1.779349077077079, "learning_rate": 1.776281281391973e-05, "loss": 0.4304, "step": 3037 }, { "epoch": 0.2406813230342642, "grad_norm": 1.7453775037164776, "learning_rate": 1.776119496612724e-05, "loss": 0.4112, "step": 3038 }, { "epoch": 0.2407605466428996, "grad_norm": 1.9217349353308082, "learning_rate": 1.7759576607297405e-05, "loss": 0.401, "step": 3039 }, { "epoch": 0.24083977025153497, "grad_norm": 1.8946796518967006, "learning_rate": 1.7757957737536785e-05, "loss": 0.3424, "step": 3040 }, { "epoch": 0.24091899386017032, "grad_norm": 2.1324307793213104, "learning_rate": 1.775633835695198e-05, "loss": 0.4827, "step": 3041 }, { "epoch": 0.2409982174688057, "grad_norm": 1.9373241398516923, "learning_rate": 1.7754718465649618e-05, "loss": 0.386, "step": 3042 }, { "epoch": 0.24107744107744108, "grad_norm": 1.716237797792207, "learning_rate": 1.7753098063736355e-05, "loss": 0.3793, "step": 3043 }, { "epoch": 0.24115666468607644, "grad_norm": 1.7525684948363685, "learning_rate": 1.775147715131889e-05, "loss": 0.4361, "step": 3044 }, { "epoch": 0.24123588829471182, "grad_norm": 1.612076879378781, "learning_rate": 1.7749855728503952e-05, "loss": 0.3075, "step": 3045 }, { "epoch": 0.2413151119033472, "grad_norm": 1.7327545991172746, "learning_rate": 1.7748233795398308e-05, "loss": 0.4417, "step": 3046 }, { "epoch": 0.24139433551198258, "grad_norm": 1.7073355632547802, "learning_rate": 1.7746611352108744e-05, "loss": 0.3665, "step": 3047 }, { "epoch": 0.24147355912061794, "grad_norm": 1.5098829303663128, "learning_rate": 1.7744988398742102e-05, "loss": 0.346, "step": 3048 }, { "epoch": 0.24155278272925332, "grad_norm": 1.6746666098265324, "learning_rate": 1.7743364935405238e-05, "loss": 0.4539, "step": 3049 }, { "epoch": 0.2416320063378887, "grad_norm": 1.570900488242882, "learning_rate": 1.7741740962205053e-05, "loss": 0.3007, "step": 3050 }, { "epoch": 0.24171122994652405, "grad_norm": 1.8628105363132954, "learning_rate": 1.7740116479248474e-05, "loss": 0.4446, "step": 3051 }, { "epoch": 0.24179045355515943, "grad_norm": 1.8509043499665498, "learning_rate": 1.773849148664247e-05, "loss": 0.4163, "step": 3052 }, { "epoch": 0.24186967716379482, "grad_norm": 1.900746073434233, "learning_rate": 1.773686598449404e-05, "loss": 0.3962, "step": 3053 }, { "epoch": 0.2419489007724302, "grad_norm": 1.7785979488267507, "learning_rate": 1.7735239972910208e-05, "loss": 0.3946, "step": 3054 }, { "epoch": 0.24202812438106555, "grad_norm": 1.6141601899754905, "learning_rate": 1.7733613451998043e-05, "loss": 0.3045, "step": 3055 }, { "epoch": 0.24210734798970093, "grad_norm": 1.6990711528937419, "learning_rate": 1.7731986421864645e-05, "loss": 0.4688, "step": 3056 }, { "epoch": 0.24218657159833631, "grad_norm": 1.7152333008636496, "learning_rate": 1.7730358882617148e-05, "loss": 0.4961, "step": 3057 }, { "epoch": 0.24226579520697167, "grad_norm": 1.7344426087816067, "learning_rate": 1.772873083436271e-05, "loss": 0.3569, "step": 3058 }, { "epoch": 0.24234501881560705, "grad_norm": 1.700652081978969, "learning_rate": 1.7727102277208538e-05, "loss": 0.3215, "step": 3059 }, { "epoch": 0.24242424242424243, "grad_norm": 2.014740699375079, "learning_rate": 1.772547321126186e-05, "loss": 0.3551, "step": 3060 }, { "epoch": 0.24250346603287778, "grad_norm": 1.5192035457231223, "learning_rate": 1.7723843636629945e-05, "loss": 0.3115, "step": 3061 }, { "epoch": 0.24258268964151317, "grad_norm": 1.6300265788745925, "learning_rate": 1.772221355342009e-05, "loss": 0.364, "step": 3062 }, { "epoch": 0.24266191325014855, "grad_norm": 1.4934620481195857, "learning_rate": 1.7720582961739628e-05, "loss": 0.4145, "step": 3063 }, { "epoch": 0.24274113685878393, "grad_norm": 1.514277909480501, "learning_rate": 1.771895186169593e-05, "loss": 0.2893, "step": 3064 }, { "epoch": 0.24282036046741928, "grad_norm": 1.4571352183046191, "learning_rate": 1.7717320253396393e-05, "loss": 0.3356, "step": 3065 }, { "epoch": 0.24289958407605466, "grad_norm": 1.6276661897388882, "learning_rate": 1.771568813694845e-05, "loss": 0.3816, "step": 3066 }, { "epoch": 0.24297880768469005, "grad_norm": 1.591625283464681, "learning_rate": 1.771405551245957e-05, "loss": 0.3179, "step": 3067 }, { "epoch": 0.2430580312933254, "grad_norm": 1.961787620391812, "learning_rate": 1.771242238003725e-05, "loss": 0.4525, "step": 3068 }, { "epoch": 0.24313725490196078, "grad_norm": 1.9240313938905484, "learning_rate": 1.7710788739789025e-05, "loss": 0.3429, "step": 3069 }, { "epoch": 0.24321647851059616, "grad_norm": 1.6863130474500412, "learning_rate": 1.7709154591822466e-05, "loss": 0.3725, "step": 3070 }, { "epoch": 0.24329570211923154, "grad_norm": 2.001387621587086, "learning_rate": 1.770751993624517e-05, "loss": 0.4086, "step": 3071 }, { "epoch": 0.2433749257278669, "grad_norm": 1.5142532804538547, "learning_rate": 1.770588477316477e-05, "loss": 0.299, "step": 3072 }, { "epoch": 0.24345414933650228, "grad_norm": 1.857428628276661, "learning_rate": 1.770424910268894e-05, "loss": 0.3723, "step": 3073 }, { "epoch": 0.24353337294513766, "grad_norm": 1.7462419498643962, "learning_rate": 1.7702612924925377e-05, "loss": 0.3731, "step": 3074 }, { "epoch": 0.24361259655377301, "grad_norm": 1.8710645082225665, "learning_rate": 1.7700976239981815e-05, "loss": 0.3863, "step": 3075 }, { "epoch": 0.2436918201624084, "grad_norm": 1.5347069367482018, "learning_rate": 1.769933904796602e-05, "loss": 0.3049, "step": 3076 }, { "epoch": 0.24377104377104378, "grad_norm": 1.8587373108909016, "learning_rate": 1.76977013489858e-05, "loss": 0.4202, "step": 3077 }, { "epoch": 0.24385026737967913, "grad_norm": 1.619430852618424, "learning_rate": 1.7696063143148982e-05, "loss": 0.3421, "step": 3078 }, { "epoch": 0.2439294909883145, "grad_norm": 1.6464314393924742, "learning_rate": 1.7694424430563436e-05, "loss": 0.4486, "step": 3079 }, { "epoch": 0.2440087145969499, "grad_norm": 1.6898695277486135, "learning_rate": 1.769278521133707e-05, "loss": 0.364, "step": 3080 }, { "epoch": 0.24408793820558528, "grad_norm": 1.6357901919241997, "learning_rate": 1.769114548557781e-05, "loss": 0.2768, "step": 3081 }, { "epoch": 0.24416716181422063, "grad_norm": 1.4944061972271023, "learning_rate": 1.768950525339362e-05, "loss": 0.3361, "step": 3082 }, { "epoch": 0.244246385422856, "grad_norm": 1.4083502535547918, "learning_rate": 1.7687864514892516e-05, "loss": 0.3275, "step": 3083 }, { "epoch": 0.2443256090314914, "grad_norm": 2.167969114380632, "learning_rate": 1.7686223270182524e-05, "loss": 0.4334, "step": 3084 }, { "epoch": 0.24440483264012675, "grad_norm": 2.0972020452737086, "learning_rate": 1.7684581519371714e-05, "loss": 0.3704, "step": 3085 }, { "epoch": 0.24448405624876213, "grad_norm": 1.5880282774029748, "learning_rate": 1.768293926256819e-05, "loss": 0.4288, "step": 3086 }, { "epoch": 0.2445632798573975, "grad_norm": 1.6393280801288992, "learning_rate": 1.7681296499880077e-05, "loss": 0.3685, "step": 3087 }, { "epoch": 0.2446425034660329, "grad_norm": 1.7906286050299878, "learning_rate": 1.767965323141555e-05, "loss": 0.3569, "step": 3088 }, { "epoch": 0.24472172707466824, "grad_norm": 1.4097848857249107, "learning_rate": 1.7678009457282816e-05, "loss": 0.3159, "step": 3089 }, { "epoch": 0.24480095068330363, "grad_norm": 1.7815584433961174, "learning_rate": 1.7676365177590097e-05, "loss": 0.3152, "step": 3090 }, { "epoch": 0.244880174291939, "grad_norm": 1.652073647775185, "learning_rate": 1.7674720392445672e-05, "loss": 0.329, "step": 3091 }, { "epoch": 0.24495939790057436, "grad_norm": 1.7839111747695318, "learning_rate": 1.7673075101957837e-05, "loss": 0.4997, "step": 3092 }, { "epoch": 0.24503862150920974, "grad_norm": 1.5159647073828213, "learning_rate": 1.7671429306234924e-05, "loss": 0.356, "step": 3093 }, { "epoch": 0.24511784511784512, "grad_norm": 1.6781157454696225, "learning_rate": 1.7669783005385305e-05, "loss": 0.3434, "step": 3094 }, { "epoch": 0.2451970687264805, "grad_norm": 1.9992196666224964, "learning_rate": 1.766813619951738e-05, "loss": 0.4459, "step": 3095 }, { "epoch": 0.24527629233511586, "grad_norm": 1.6531818851633784, "learning_rate": 1.7666488888739587e-05, "loss": 0.3399, "step": 3096 }, { "epoch": 0.24535551594375124, "grad_norm": 1.686594843713856, "learning_rate": 1.7664841073160383e-05, "loss": 0.4389, "step": 3097 }, { "epoch": 0.24543473955238662, "grad_norm": 1.854276648134284, "learning_rate": 1.766319275288828e-05, "loss": 0.4581, "step": 3098 }, { "epoch": 0.24551396316102198, "grad_norm": 1.7112353364898552, "learning_rate": 1.7661543928031802e-05, "loss": 0.325, "step": 3099 }, { "epoch": 0.24559318676965736, "grad_norm": 1.9011164612139102, "learning_rate": 1.7659894598699527e-05, "loss": 0.367, "step": 3100 }, { "epoch": 0.24567241037829274, "grad_norm": 1.6314720605941708, "learning_rate": 1.765824476500005e-05, "loss": 0.3954, "step": 3101 }, { "epoch": 0.2457516339869281, "grad_norm": 1.6024620008153847, "learning_rate": 1.7656594427041997e-05, "loss": 0.4288, "step": 3102 }, { "epoch": 0.24583085759556347, "grad_norm": 1.8448318751583703, "learning_rate": 1.765494358493405e-05, "loss": 0.3834, "step": 3103 }, { "epoch": 0.24591008120419885, "grad_norm": 1.5315277339381745, "learning_rate": 1.7653292238784897e-05, "loss": 0.3936, "step": 3104 }, { "epoch": 0.24598930481283424, "grad_norm": 1.6824810285535965, "learning_rate": 1.7651640388703275e-05, "loss": 0.4767, "step": 3105 }, { "epoch": 0.2460685284214696, "grad_norm": 1.618077403185247, "learning_rate": 1.7649988034797952e-05, "loss": 0.401, "step": 3106 }, { "epoch": 0.24614775203010497, "grad_norm": 1.736405319429256, "learning_rate": 1.7648335177177725e-05, "loss": 0.2936, "step": 3107 }, { "epoch": 0.24622697563874035, "grad_norm": 1.5671705213711955, "learning_rate": 1.764668181595143e-05, "loss": 0.3629, "step": 3108 }, { "epoch": 0.2463061992473757, "grad_norm": 1.8258693894173534, "learning_rate": 1.764502795122793e-05, "loss": 0.6055, "step": 3109 }, { "epoch": 0.2463854228560111, "grad_norm": 1.9598912574689136, "learning_rate": 1.7643373583116123e-05, "loss": 0.4291, "step": 3110 }, { "epoch": 0.24646464646464647, "grad_norm": 1.7132876626298001, "learning_rate": 1.7641718711724947e-05, "loss": 0.3735, "step": 3111 }, { "epoch": 0.24654387007328185, "grad_norm": 1.345583144252865, "learning_rate": 1.764006333716336e-05, "loss": 0.2853, "step": 3112 }, { "epoch": 0.2466230936819172, "grad_norm": 1.7463840669564408, "learning_rate": 1.7638407459540364e-05, "loss": 0.4549, "step": 3113 }, { "epoch": 0.2467023172905526, "grad_norm": 1.9929654102104863, "learning_rate": 1.7636751078964995e-05, "loss": 0.4265, "step": 3114 }, { "epoch": 0.24678154089918797, "grad_norm": 1.7252340845541883, "learning_rate": 1.763509419554631e-05, "loss": 0.3636, "step": 3115 }, { "epoch": 0.24686076450782332, "grad_norm": 1.537441125450611, "learning_rate": 1.763343680939341e-05, "loss": 0.3439, "step": 3116 }, { "epoch": 0.2469399881164587, "grad_norm": 1.8478322196130217, "learning_rate": 1.7631778920615427e-05, "loss": 0.3978, "step": 3117 }, { "epoch": 0.24701921172509408, "grad_norm": 1.9885081331537757, "learning_rate": 1.7630120529321518e-05, "loss": 0.4383, "step": 3118 }, { "epoch": 0.24709843533372944, "grad_norm": 1.7683438747751496, "learning_rate": 1.7628461635620895e-05, "loss": 0.3372, "step": 3119 }, { "epoch": 0.24717765894236482, "grad_norm": 1.6878995620613646, "learning_rate": 1.7626802239622772e-05, "loss": 0.367, "step": 3120 }, { "epoch": 0.2472568825510002, "grad_norm": 1.6331811307900859, "learning_rate": 1.7625142341436423e-05, "loss": 0.3765, "step": 3121 }, { "epoch": 0.24733610615963558, "grad_norm": 1.5326916621265476, "learning_rate": 1.762348194117114e-05, "loss": 0.2831, "step": 3122 }, { "epoch": 0.24741532976827094, "grad_norm": 1.6542354847422005, "learning_rate": 1.7621821038936257e-05, "loss": 0.41, "step": 3123 }, { "epoch": 0.24749455337690632, "grad_norm": 1.6825881558525462, "learning_rate": 1.7620159634841127e-05, "loss": 0.4372, "step": 3124 }, { "epoch": 0.2475737769855417, "grad_norm": 1.8215056536469243, "learning_rate": 1.761849772899515e-05, "loss": 0.4171, "step": 3125 }, { "epoch": 0.24765300059417705, "grad_norm": 1.6850118382127774, "learning_rate": 1.7616835321507757e-05, "loss": 0.3661, "step": 3126 }, { "epoch": 0.24773222420281243, "grad_norm": 1.3341692323234686, "learning_rate": 1.761517241248841e-05, "loss": 0.2281, "step": 3127 }, { "epoch": 0.24781144781144782, "grad_norm": 1.6464597112168649, "learning_rate": 1.76135090020466e-05, "loss": 0.3422, "step": 3128 }, { "epoch": 0.2478906714200832, "grad_norm": 1.8514499850295798, "learning_rate": 1.7611845090291858e-05, "loss": 0.4611, "step": 3129 }, { "epoch": 0.24796989502871855, "grad_norm": 1.8596657964559526, "learning_rate": 1.761018067733374e-05, "loss": 0.4425, "step": 3130 }, { "epoch": 0.24804911863735393, "grad_norm": 1.5980531539496576, "learning_rate": 1.7608515763281843e-05, "loss": 0.413, "step": 3131 }, { "epoch": 0.24812834224598931, "grad_norm": 1.7036543217940978, "learning_rate": 1.760685034824579e-05, "loss": 0.4034, "step": 3132 }, { "epoch": 0.24820756585462467, "grad_norm": 1.6625956746508452, "learning_rate": 1.760518443233525e-05, "loss": 0.3812, "step": 3133 }, { "epoch": 0.24828678946326005, "grad_norm": 1.823827034449052, "learning_rate": 1.7603518015659905e-05, "loss": 0.4981, "step": 3134 }, { "epoch": 0.24836601307189543, "grad_norm": 1.5951347269817877, "learning_rate": 1.7601851098329484e-05, "loss": 0.4152, "step": 3135 }, { "epoch": 0.24844523668053078, "grad_norm": 1.6333279705640327, "learning_rate": 1.7600183680453745e-05, "loss": 0.3817, "step": 3136 }, { "epoch": 0.24852446028916617, "grad_norm": 1.612506329116598, "learning_rate": 1.7598515762142484e-05, "loss": 0.3262, "step": 3137 }, { "epoch": 0.24860368389780155, "grad_norm": 1.667174698200025, "learning_rate": 1.759684734350552e-05, "loss": 0.482, "step": 3138 }, { "epoch": 0.24868290750643693, "grad_norm": 1.5860244743119838, "learning_rate": 1.759517842465271e-05, "loss": 0.3543, "step": 3139 }, { "epoch": 0.24876213111507228, "grad_norm": 1.6657968318462428, "learning_rate": 1.759350900569395e-05, "loss": 0.4006, "step": 3140 }, { "epoch": 0.24884135472370766, "grad_norm": 1.4512463582173374, "learning_rate": 1.759183908673916e-05, "loss": 0.3589, "step": 3141 }, { "epoch": 0.24892057833234305, "grad_norm": 1.9940519905085468, "learning_rate": 1.759016866789829e-05, "loss": 0.3304, "step": 3142 }, { "epoch": 0.2489998019409784, "grad_norm": 2.0032225011465505, "learning_rate": 1.7588497749281338e-05, "loss": 0.3591, "step": 3143 }, { "epoch": 0.24907902554961378, "grad_norm": 1.711719266045336, "learning_rate": 1.7586826330998324e-05, "loss": 0.3738, "step": 3144 }, { "epoch": 0.24915824915824916, "grad_norm": 2.13916379198605, "learning_rate": 1.7585154413159304e-05, "loss": 0.3603, "step": 3145 }, { "epoch": 0.24923747276688454, "grad_norm": 1.7765353635847283, "learning_rate": 1.758348199587436e-05, "loss": 0.3535, "step": 3146 }, { "epoch": 0.2493166963755199, "grad_norm": 1.2951752704028645, "learning_rate": 1.7581809079253616e-05, "loss": 0.3375, "step": 3147 }, { "epoch": 0.24939591998415528, "grad_norm": 1.7800642657783186, "learning_rate": 1.7580135663407226e-05, "loss": 0.4327, "step": 3148 }, { "epoch": 0.24947514359279066, "grad_norm": 1.991001877459909, "learning_rate": 1.7578461748445374e-05, "loss": 0.4391, "step": 3149 }, { "epoch": 0.24955436720142601, "grad_norm": 1.8319689521742195, "learning_rate": 1.7576787334478283e-05, "loss": 0.3643, "step": 3150 }, { "epoch": 0.2496335908100614, "grad_norm": 1.532594908192309, "learning_rate": 1.7575112421616203e-05, "loss": 0.3371, "step": 3151 }, { "epoch": 0.24971281441869678, "grad_norm": 1.9404567341999421, "learning_rate": 1.757343700996942e-05, "loss": 0.3831, "step": 3152 }, { "epoch": 0.24979203802733216, "grad_norm": 1.6412692424730824, "learning_rate": 1.757176109964825e-05, "loss": 0.4111, "step": 3153 }, { "epoch": 0.2498712616359675, "grad_norm": 1.8372979184013194, "learning_rate": 1.7570084690763042e-05, "loss": 0.4445, "step": 3154 }, { "epoch": 0.2499504852446029, "grad_norm": 1.5492598635557118, "learning_rate": 1.7568407783424187e-05, "loss": 0.3488, "step": 3155 }, { "epoch": 0.2500297088532383, "grad_norm": 1.6773650227703059, "learning_rate": 1.7566730377742093e-05, "loss": 0.3525, "step": 3156 }, { "epoch": 0.25010893246187366, "grad_norm": 1.3484960434550548, "learning_rate": 1.7565052473827213e-05, "loss": 0.3274, "step": 3157 }, { "epoch": 0.25018815607050904, "grad_norm": 1.7040834307595112, "learning_rate": 1.7563374071790028e-05, "loss": 0.3084, "step": 3158 }, { "epoch": 0.25026737967914436, "grad_norm": 1.7158746015627382, "learning_rate": 1.7561695171741054e-05, "loss": 0.4214, "step": 3159 }, { "epoch": 0.25034660328777975, "grad_norm": 1.5382135792599925, "learning_rate": 1.7560015773790837e-05, "loss": 0.3863, "step": 3160 }, { "epoch": 0.2504258268964151, "grad_norm": 1.6423749980612374, "learning_rate": 1.7558335878049955e-05, "loss": 0.4629, "step": 3161 }, { "epoch": 0.2505050505050505, "grad_norm": 1.6094871513361309, "learning_rate": 1.7556655484629028e-05, "loss": 0.4016, "step": 3162 }, { "epoch": 0.2505842741136859, "grad_norm": 1.9636668367552694, "learning_rate": 1.7554974593638697e-05, "loss": 0.3705, "step": 3163 }, { "epoch": 0.25066349772232127, "grad_norm": 1.4463471760821986, "learning_rate": 1.755329320518964e-05, "loss": 0.355, "step": 3164 }, { "epoch": 0.25074272133095665, "grad_norm": 2.218429902654406, "learning_rate": 1.7551611319392573e-05, "loss": 0.4233, "step": 3165 }, { "epoch": 0.250821944939592, "grad_norm": 1.692074924221293, "learning_rate": 1.7549928936358232e-05, "loss": 0.256, "step": 3166 }, { "epoch": 0.25090116854822736, "grad_norm": 1.9555349113087643, "learning_rate": 1.75482460561974e-05, "loss": 0.4198, "step": 3167 }, { "epoch": 0.25098039215686274, "grad_norm": 1.8304323777677494, "learning_rate": 1.7546562679020884e-05, "loss": 0.3067, "step": 3168 }, { "epoch": 0.2510596157654981, "grad_norm": 1.6974395122185204, "learning_rate": 1.7544878804939528e-05, "loss": 0.3313, "step": 3169 }, { "epoch": 0.2511388393741335, "grad_norm": 1.5269362241735296, "learning_rate": 1.7543194434064208e-05, "loss": 0.3688, "step": 3170 }, { "epoch": 0.2512180629827689, "grad_norm": 1.4531822613412166, "learning_rate": 1.754150956650583e-05, "loss": 0.3558, "step": 3171 }, { "epoch": 0.2512972865914042, "grad_norm": 2.3714676792356273, "learning_rate": 1.753982420237533e-05, "loss": 0.399, "step": 3172 }, { "epoch": 0.2513765102000396, "grad_norm": 1.77194609758977, "learning_rate": 1.753813834178369e-05, "loss": 0.4464, "step": 3173 }, { "epoch": 0.251455733808675, "grad_norm": 1.5697093494779089, "learning_rate": 1.753645198484191e-05, "loss": 0.2932, "step": 3174 }, { "epoch": 0.25153495741731036, "grad_norm": 1.8609773962843452, "learning_rate": 1.753476513166103e-05, "loss": 0.3814, "step": 3175 }, { "epoch": 0.25161418102594574, "grad_norm": 1.5335317957817038, "learning_rate": 1.7533077782352123e-05, "loss": 0.3822, "step": 3176 }, { "epoch": 0.2516934046345811, "grad_norm": 1.4065319394040334, "learning_rate": 1.753138993702629e-05, "loss": 0.3144, "step": 3177 }, { "epoch": 0.2517726282432165, "grad_norm": 1.8547513542068992, "learning_rate": 1.752970159579467e-05, "loss": 0.382, "step": 3178 }, { "epoch": 0.2518518518518518, "grad_norm": 1.5701521302473442, "learning_rate": 1.7528012758768426e-05, "loss": 0.3788, "step": 3179 }, { "epoch": 0.2519310754604872, "grad_norm": 1.9113128812111697, "learning_rate": 1.7526323426058767e-05, "loss": 0.4386, "step": 3180 }, { "epoch": 0.2520102990691226, "grad_norm": 1.928975548651707, "learning_rate": 1.7524633597776923e-05, "loss": 0.4944, "step": 3181 }, { "epoch": 0.25208952267775797, "grad_norm": 1.7284713786438117, "learning_rate": 1.7522943274034165e-05, "loss": 0.3802, "step": 3182 }, { "epoch": 0.25216874628639335, "grad_norm": 1.5495622369767326, "learning_rate": 1.752125245494179e-05, "loss": 0.371, "step": 3183 }, { "epoch": 0.25224796989502873, "grad_norm": 1.540973369500184, "learning_rate": 1.751956114061113e-05, "loss": 0.4582, "step": 3184 }, { "epoch": 0.2523271935036641, "grad_norm": 2.0746743217922674, "learning_rate": 1.751786933115355e-05, "loss": 0.4601, "step": 3185 }, { "epoch": 0.25240641711229944, "grad_norm": 1.448268277596241, "learning_rate": 1.751617702668045e-05, "loss": 0.2938, "step": 3186 }, { "epoch": 0.2524856407209348, "grad_norm": 1.9235119463678907, "learning_rate": 1.751448422730326e-05, "loss": 0.406, "step": 3187 }, { "epoch": 0.2525648643295702, "grad_norm": 1.8716330620152517, "learning_rate": 1.7512790933133435e-05, "loss": 0.4377, "step": 3188 }, { "epoch": 0.2526440879382056, "grad_norm": 1.7226498304368687, "learning_rate": 1.7511097144282482e-05, "loss": 0.417, "step": 3189 }, { "epoch": 0.25272331154684097, "grad_norm": 1.7647831656485702, "learning_rate": 1.7509402860861923e-05, "loss": 0.4161, "step": 3190 }, { "epoch": 0.25280253515547635, "grad_norm": 2.128200677494788, "learning_rate": 1.7507708082983313e-05, "loss": 0.3842, "step": 3191 }, { "epoch": 0.25288175876411173, "grad_norm": 1.7179748091492708, "learning_rate": 1.7506012810758254e-05, "loss": 0.4917, "step": 3192 }, { "epoch": 0.25296098237274706, "grad_norm": 1.7216071210366086, "learning_rate": 1.750431704429837e-05, "loss": 0.4393, "step": 3193 }, { "epoch": 0.25304020598138244, "grad_norm": 1.906337990626111, "learning_rate": 1.7502620783715316e-05, "loss": 0.3848, "step": 3194 }, { "epoch": 0.2531194295900178, "grad_norm": 1.9206035963370554, "learning_rate": 1.7500924029120782e-05, "loss": 0.3593, "step": 3195 }, { "epoch": 0.2531986531986532, "grad_norm": 1.7121886205047347, "learning_rate": 1.7499226780626494e-05, "loss": 0.375, "step": 3196 }, { "epoch": 0.2532778768072886, "grad_norm": 1.6569079521209802, "learning_rate": 1.7497529038344208e-05, "loss": 0.3634, "step": 3197 }, { "epoch": 0.25335710041592396, "grad_norm": 1.499445551334616, "learning_rate": 1.7495830802385707e-05, "loss": 0.3811, "step": 3198 }, { "epoch": 0.25343632402455935, "grad_norm": 1.672430062163695, "learning_rate": 1.7494132072862818e-05, "loss": 0.3561, "step": 3199 }, { "epoch": 0.25351554763319467, "grad_norm": 1.684210748301114, "learning_rate": 1.7492432849887387e-05, "loss": 0.343, "step": 3200 }, { "epoch": 0.25359477124183005, "grad_norm": 2.0828978127171087, "learning_rate": 1.749073313357131e-05, "loss": 0.4565, "step": 3201 }, { "epoch": 0.25367399485046543, "grad_norm": 1.3221965180283575, "learning_rate": 1.7489032924026496e-05, "loss": 0.2588, "step": 3202 }, { "epoch": 0.2537532184591008, "grad_norm": 1.4379988377589241, "learning_rate": 1.74873322213649e-05, "loss": 0.3154, "step": 3203 }, { "epoch": 0.2538324420677362, "grad_norm": 1.6805473159948106, "learning_rate": 1.7485631025698504e-05, "loss": 0.4549, "step": 3204 }, { "epoch": 0.2539116656763716, "grad_norm": 1.669544371262534, "learning_rate": 1.7483929337139326e-05, "loss": 0.4012, "step": 3205 }, { "epoch": 0.2539908892850069, "grad_norm": 1.4702193400041064, "learning_rate": 1.748222715579941e-05, "loss": 0.2759, "step": 3206 }, { "epoch": 0.2540701128936423, "grad_norm": 1.7922742957576494, "learning_rate": 1.7480524481790835e-05, "loss": 0.4298, "step": 3207 }, { "epoch": 0.25414933650227767, "grad_norm": 1.8066914837276697, "learning_rate": 1.7478821315225717e-05, "loss": 0.2921, "step": 3208 }, { "epoch": 0.25422856011091305, "grad_norm": 2.02909534486103, "learning_rate": 1.7477117656216206e-05, "loss": 0.5853, "step": 3209 }, { "epoch": 0.25430778371954843, "grad_norm": 1.6108195464393773, "learning_rate": 1.7475413504874474e-05, "loss": 0.3208, "step": 3210 }, { "epoch": 0.2543870073281838, "grad_norm": 1.7398891844169309, "learning_rate": 1.7473708861312727e-05, "loss": 0.3715, "step": 3211 }, { "epoch": 0.2544662309368192, "grad_norm": 1.7312257525084316, "learning_rate": 1.7472003725643215e-05, "loss": 0.3793, "step": 3212 }, { "epoch": 0.2545454545454545, "grad_norm": 1.7429281544251238, "learning_rate": 1.747029809797821e-05, "loss": 0.3524, "step": 3213 }, { "epoch": 0.2546246781540899, "grad_norm": 1.6307735119147664, "learning_rate": 1.7468591978430024e-05, "loss": 0.3206, "step": 3214 }, { "epoch": 0.2547039017627253, "grad_norm": 1.6272110635613188, "learning_rate": 1.746688536711099e-05, "loss": 0.3269, "step": 3215 }, { "epoch": 0.25478312537136066, "grad_norm": 1.9045383812607315, "learning_rate": 1.7465178264133482e-05, "loss": 0.3967, "step": 3216 }, { "epoch": 0.25486234897999605, "grad_norm": 1.7515333861950393, "learning_rate": 1.7463470669609907e-05, "loss": 0.3921, "step": 3217 }, { "epoch": 0.2549415725886314, "grad_norm": 1.5532013247525969, "learning_rate": 1.74617625836527e-05, "loss": 0.3645, "step": 3218 }, { "epoch": 0.2550207961972668, "grad_norm": 1.5985882784825727, "learning_rate": 1.746005400637433e-05, "loss": 0.3814, "step": 3219 }, { "epoch": 0.25510001980590213, "grad_norm": 1.615170264841815, "learning_rate": 1.74583449378873e-05, "loss": 0.3451, "step": 3220 }, { "epoch": 0.2551792434145375, "grad_norm": 1.5959340879303858, "learning_rate": 1.7456635378304143e-05, "loss": 0.3722, "step": 3221 }, { "epoch": 0.2552584670231729, "grad_norm": 1.8074639912496229, "learning_rate": 1.7454925327737426e-05, "loss": 0.4414, "step": 3222 }, { "epoch": 0.2553376906318083, "grad_norm": 1.665254380750593, "learning_rate": 1.7453214786299746e-05, "loss": 0.3456, "step": 3223 }, { "epoch": 0.25541691424044366, "grad_norm": 1.7293852982090319, "learning_rate": 1.7451503754103735e-05, "loss": 0.375, "step": 3224 }, { "epoch": 0.25549613784907904, "grad_norm": 1.6662525049862082, "learning_rate": 1.7449792231262056e-05, "loss": 0.3708, "step": 3225 }, { "epoch": 0.2555753614577144, "grad_norm": 1.7728653922123545, "learning_rate": 1.7448080217887403e-05, "loss": 0.4221, "step": 3226 }, { "epoch": 0.25565458506634975, "grad_norm": 1.7349799473578216, "learning_rate": 1.7446367714092508e-05, "loss": 0.3862, "step": 3227 }, { "epoch": 0.25573380867498513, "grad_norm": 1.5609924658347427, "learning_rate": 1.7444654719990128e-05, "loss": 0.2764, "step": 3228 }, { "epoch": 0.2558130322836205, "grad_norm": 1.6115811337131152, "learning_rate": 1.7442941235693058e-05, "loss": 0.4664, "step": 3229 }, { "epoch": 0.2558922558922559, "grad_norm": 1.7156195058998613, "learning_rate": 1.744122726131412e-05, "loss": 0.4192, "step": 3230 }, { "epoch": 0.2559714795008913, "grad_norm": 1.9625487708419151, "learning_rate": 1.7439512796966165e-05, "loss": 0.4409, "step": 3231 }, { "epoch": 0.25605070310952666, "grad_norm": 1.4591860207755285, "learning_rate": 1.7437797842762098e-05, "loss": 0.3282, "step": 3232 }, { "epoch": 0.25612992671816204, "grad_norm": 1.77300957976758, "learning_rate": 1.743608239881483e-05, "loss": 0.3466, "step": 3233 }, { "epoch": 0.25620915032679736, "grad_norm": 1.3599882542683719, "learning_rate": 1.7434366465237312e-05, "loss": 0.2755, "step": 3234 }, { "epoch": 0.25628837393543275, "grad_norm": 1.7019073937903981, "learning_rate": 1.7432650042142535e-05, "loss": 0.4418, "step": 3235 }, { "epoch": 0.2563675975440681, "grad_norm": 1.8245809277019283, "learning_rate": 1.743093312964352e-05, "loss": 0.453, "step": 3236 }, { "epoch": 0.2564468211527035, "grad_norm": 1.709948353151467, "learning_rate": 1.742921572785331e-05, "loss": 0.49, "step": 3237 }, { "epoch": 0.2565260447613389, "grad_norm": 1.6703575384654685, "learning_rate": 1.7427497836884995e-05, "loss": 0.3408, "step": 3238 }, { "epoch": 0.25660526836997427, "grad_norm": 1.5532255982849867, "learning_rate": 1.7425779456851683e-05, "loss": 0.3392, "step": 3239 }, { "epoch": 0.25668449197860965, "grad_norm": 1.7879256118800355, "learning_rate": 1.7424060587866526e-05, "loss": 0.5296, "step": 3240 }, { "epoch": 0.256763715587245, "grad_norm": 2.206171587689333, "learning_rate": 1.74223412300427e-05, "loss": 0.4342, "step": 3241 }, { "epoch": 0.25684293919588036, "grad_norm": 1.4817479413885015, "learning_rate": 1.7420621383493423e-05, "loss": 0.271, "step": 3242 }, { "epoch": 0.25692216280451574, "grad_norm": 1.7800792075635854, "learning_rate": 1.7418901048331927e-05, "loss": 0.4097, "step": 3243 }, { "epoch": 0.2570013864131511, "grad_norm": 1.6663641434176393, "learning_rate": 1.7417180224671497e-05, "loss": 0.4495, "step": 3244 }, { "epoch": 0.2570806100217865, "grad_norm": 1.6227235508242879, "learning_rate": 1.741545891262544e-05, "loss": 0.3635, "step": 3245 }, { "epoch": 0.2571598336304219, "grad_norm": 1.8956225105118398, "learning_rate": 1.7413737112307092e-05, "loss": 0.4964, "step": 3246 }, { "epoch": 0.2572390572390572, "grad_norm": 1.4715692775762617, "learning_rate": 1.741201482382983e-05, "loss": 0.3367, "step": 3247 }, { "epoch": 0.2573182808476926, "grad_norm": 1.5415910700017137, "learning_rate": 1.7410292047307054e-05, "loss": 0.3601, "step": 3248 }, { "epoch": 0.257397504456328, "grad_norm": 1.3956682106832476, "learning_rate": 1.7408568782852204e-05, "loss": 0.3177, "step": 3249 }, { "epoch": 0.25747672806496336, "grad_norm": 1.6911254880516622, "learning_rate": 1.7406845030578747e-05, "loss": 0.4653, "step": 3250 }, { "epoch": 0.25755595167359874, "grad_norm": 1.5339807389303075, "learning_rate": 1.7405120790600185e-05, "loss": 0.3131, "step": 3251 }, { "epoch": 0.2576351752822341, "grad_norm": 1.6271599584279126, "learning_rate": 1.740339606303005e-05, "loss": 0.3545, "step": 3252 }, { "epoch": 0.2577143988908695, "grad_norm": 1.1668484645949957, "learning_rate": 1.7401670847981906e-05, "loss": 0.3353, "step": 3253 }, { "epoch": 0.2577936224995048, "grad_norm": 1.634604353439678, "learning_rate": 1.7399945145569353e-05, "loss": 0.359, "step": 3254 }, { "epoch": 0.2578728461081402, "grad_norm": 1.4617874407314366, "learning_rate": 1.7398218955906017e-05, "loss": 0.3457, "step": 3255 }, { "epoch": 0.2579520697167756, "grad_norm": 1.8205880583034841, "learning_rate": 1.7396492279105562e-05, "loss": 0.4108, "step": 3256 }, { "epoch": 0.25803129332541097, "grad_norm": 2.040877014179288, "learning_rate": 1.7394765115281678e-05, "loss": 0.3658, "step": 3257 }, { "epoch": 0.25811051693404635, "grad_norm": 1.6206207362456535, "learning_rate": 1.7393037464548094e-05, "loss": 0.3229, "step": 3258 }, { "epoch": 0.25818974054268173, "grad_norm": 1.6121485523473766, "learning_rate": 1.7391309327018566e-05, "loss": 0.3437, "step": 3259 }, { "epoch": 0.2582689641513171, "grad_norm": 1.4122326007324562, "learning_rate": 1.7389580702806884e-05, "loss": 0.3247, "step": 3260 }, { "epoch": 0.25834818775995244, "grad_norm": 2.0386769280130657, "learning_rate": 1.7387851592026868e-05, "loss": 0.4364, "step": 3261 }, { "epoch": 0.2584274113685878, "grad_norm": 1.7136696001880456, "learning_rate": 1.738612199479237e-05, "loss": 0.4282, "step": 3262 }, { "epoch": 0.2585066349772232, "grad_norm": 1.8339871927007187, "learning_rate": 1.7384391911217283e-05, "loss": 0.4445, "step": 3263 }, { "epoch": 0.2585858585858586, "grad_norm": 1.5219299345390735, "learning_rate": 1.738266134141552e-05, "loss": 0.4529, "step": 3264 }, { "epoch": 0.25866508219449397, "grad_norm": 1.9120484964893236, "learning_rate": 1.738093028550103e-05, "loss": 0.4504, "step": 3265 }, { "epoch": 0.25874430580312935, "grad_norm": 1.753636552923268, "learning_rate": 1.7379198743587794e-05, "loss": 0.3837, "step": 3266 }, { "epoch": 0.25882352941176473, "grad_norm": 1.3001908948057963, "learning_rate": 1.7377466715789828e-05, "loss": 0.3077, "step": 3267 }, { "epoch": 0.25890275302040006, "grad_norm": 1.5332872581989534, "learning_rate": 1.7375734202221174e-05, "loss": 0.3004, "step": 3268 }, { "epoch": 0.25898197662903544, "grad_norm": 1.3683319559779654, "learning_rate": 1.7374001202995918e-05, "loss": 0.273, "step": 3269 }, { "epoch": 0.2590612002376708, "grad_norm": 1.3785631799196048, "learning_rate": 1.7372267718228163e-05, "loss": 0.2715, "step": 3270 }, { "epoch": 0.2591404238463062, "grad_norm": 1.6523402383212709, "learning_rate": 1.7370533748032047e-05, "loss": 0.3255, "step": 3271 }, { "epoch": 0.2592196474549416, "grad_norm": 1.7100681060049254, "learning_rate": 1.7368799292521754e-05, "loss": 0.367, "step": 3272 }, { "epoch": 0.25929887106357696, "grad_norm": 1.8381154197824574, "learning_rate": 1.736706435181148e-05, "loss": 0.4882, "step": 3273 }, { "epoch": 0.25937809467221234, "grad_norm": 1.7769185714791518, "learning_rate": 1.736532892601547e-05, "loss": 0.3668, "step": 3274 }, { "epoch": 0.25945731828084767, "grad_norm": 1.614739880032071, "learning_rate": 1.7363593015247987e-05, "loss": 0.4068, "step": 3275 }, { "epoch": 0.25953654188948305, "grad_norm": 1.6686526054706903, "learning_rate": 1.7361856619623338e-05, "loss": 0.3517, "step": 3276 }, { "epoch": 0.25961576549811843, "grad_norm": 1.3845379465465386, "learning_rate": 1.736011973925585e-05, "loss": 0.2664, "step": 3277 }, { "epoch": 0.2596949891067538, "grad_norm": 1.8041947719154796, "learning_rate": 1.7358382374259895e-05, "loss": 0.4285, "step": 3278 }, { "epoch": 0.2597742127153892, "grad_norm": 2.101592644959429, "learning_rate": 1.7356644524749867e-05, "loss": 0.4763, "step": 3279 }, { "epoch": 0.2598534363240246, "grad_norm": 1.7127252719806318, "learning_rate": 1.7354906190840194e-05, "loss": 0.3894, "step": 3280 }, { "epoch": 0.25993265993265996, "grad_norm": 1.9155344662945042, "learning_rate": 1.7353167372645337e-05, "loss": 0.4061, "step": 3281 }, { "epoch": 0.2600118835412953, "grad_norm": 1.537353996431871, "learning_rate": 1.735142807027979e-05, "loss": 0.3862, "step": 3282 }, { "epoch": 0.26009110714993067, "grad_norm": 1.8639382077306432, "learning_rate": 1.734968828385808e-05, "loss": 0.4072, "step": 3283 }, { "epoch": 0.26017033075856605, "grad_norm": 1.5517712988859211, "learning_rate": 1.7347948013494758e-05, "loss": 0.332, "step": 3284 }, { "epoch": 0.26024955436720143, "grad_norm": 1.5701971241474542, "learning_rate": 1.7346207259304415e-05, "loss": 0.4073, "step": 3285 }, { "epoch": 0.2603287779758368, "grad_norm": 1.619479916136069, "learning_rate": 1.7344466021401673e-05, "loss": 0.429, "step": 3286 }, { "epoch": 0.2604080015844722, "grad_norm": 1.3688675002118789, "learning_rate": 1.734272429990118e-05, "loss": 0.3021, "step": 3287 }, { "epoch": 0.2604872251931075, "grad_norm": 1.7996011501403537, "learning_rate": 1.7340982094917627e-05, "loss": 0.4407, "step": 3288 }, { "epoch": 0.2605664488017429, "grad_norm": 1.5205398410631559, "learning_rate": 1.7339239406565723e-05, "loss": 0.2782, "step": 3289 }, { "epoch": 0.2606456724103783, "grad_norm": 1.5345973807606839, "learning_rate": 1.733749623496022e-05, "loss": 0.3287, "step": 3290 }, { "epoch": 0.26072489601901366, "grad_norm": 1.4237933502086417, "learning_rate": 1.7335752580215898e-05, "loss": 0.2837, "step": 3291 }, { "epoch": 0.26080411962764904, "grad_norm": 1.5194990844878373, "learning_rate": 1.733400844244756e-05, "loss": 0.3551, "step": 3292 }, { "epoch": 0.2608833432362844, "grad_norm": 1.4201031104343034, "learning_rate": 1.733226382177006e-05, "loss": 0.3439, "step": 3293 }, { "epoch": 0.2609625668449198, "grad_norm": 1.9310239268394354, "learning_rate": 1.7330518718298263e-05, "loss": 0.3837, "step": 3294 }, { "epoch": 0.26104179045355513, "grad_norm": 1.8131192379395416, "learning_rate": 1.7328773132147086e-05, "loss": 0.3797, "step": 3295 }, { "epoch": 0.2611210140621905, "grad_norm": 1.8352342399026462, "learning_rate": 1.732702706343146e-05, "loss": 0.4536, "step": 3296 }, { "epoch": 0.2612002376708259, "grad_norm": 1.7530039791797933, "learning_rate": 1.7325280512266357e-05, "loss": 0.4423, "step": 3297 }, { "epoch": 0.2612794612794613, "grad_norm": 1.5180542285600627, "learning_rate": 1.7323533478766777e-05, "loss": 0.3463, "step": 3298 }, { "epoch": 0.26135868488809666, "grad_norm": 1.5185056587650425, "learning_rate": 1.732178596304776e-05, "loss": 0.3702, "step": 3299 }, { "epoch": 0.26143790849673204, "grad_norm": 1.4751522151610112, "learning_rate": 1.7320037965224365e-05, "loss": 0.2908, "step": 3300 }, { "epoch": 0.2615171321053674, "grad_norm": 1.6631568912451127, "learning_rate": 1.731828948541169e-05, "loss": 0.3441, "step": 3301 }, { "epoch": 0.26159635571400275, "grad_norm": 1.9358517600779925, "learning_rate": 1.731654052372487e-05, "loss": 0.3505, "step": 3302 }, { "epoch": 0.26167557932263813, "grad_norm": 1.7128564417616743, "learning_rate": 1.731479108027906e-05, "loss": 0.4425, "step": 3303 }, { "epoch": 0.2617548029312735, "grad_norm": 1.9449670266261203, "learning_rate": 1.7313041155189454e-05, "loss": 0.5081, "step": 3304 }, { "epoch": 0.2618340265399089, "grad_norm": 1.7524071242720565, "learning_rate": 1.7311290748571273e-05, "loss": 0.3776, "step": 3305 }, { "epoch": 0.2619132501485443, "grad_norm": 1.6417450037095627, "learning_rate": 1.7309539860539783e-05, "loss": 0.3596, "step": 3306 }, { "epoch": 0.26199247375717966, "grad_norm": 1.5566731603845965, "learning_rate": 1.7307788491210257e-05, "loss": 0.3483, "step": 3307 }, { "epoch": 0.26207169736581504, "grad_norm": 1.7211189297295166, "learning_rate": 1.7306036640698024e-05, "loss": 0.3337, "step": 3308 }, { "epoch": 0.26215092097445036, "grad_norm": 1.8411600333616034, "learning_rate": 1.7304284309118436e-05, "loss": 0.533, "step": 3309 }, { "epoch": 0.26223014458308574, "grad_norm": 1.5722581157641682, "learning_rate": 1.7302531496586866e-05, "loss": 0.4684, "step": 3310 }, { "epoch": 0.2623093681917211, "grad_norm": 1.5763170390840733, "learning_rate": 1.730077820321874e-05, "loss": 0.3724, "step": 3311 }, { "epoch": 0.2623885918003565, "grad_norm": 1.5864109089882439, "learning_rate": 1.7299024429129497e-05, "loss": 0.3388, "step": 3312 }, { "epoch": 0.2624678154089919, "grad_norm": 1.423259744318166, "learning_rate": 1.7297270174434613e-05, "loss": 0.36, "step": 3313 }, { "epoch": 0.26254703901762727, "grad_norm": 1.61098808275931, "learning_rate": 1.7295515439249608e-05, "loss": 0.3721, "step": 3314 }, { "epoch": 0.26262626262626265, "grad_norm": 1.6476951663832888, "learning_rate": 1.7293760223690008e-05, "loss": 0.5019, "step": 3315 }, { "epoch": 0.262705486234898, "grad_norm": 1.7424649832854857, "learning_rate": 1.729200452787139e-05, "loss": 0.3878, "step": 3316 }, { "epoch": 0.26278470984353336, "grad_norm": 1.4927138998244718, "learning_rate": 1.729024835190937e-05, "loss": 0.3379, "step": 3317 }, { "epoch": 0.26286393345216874, "grad_norm": 1.3498204992430085, "learning_rate": 1.7288491695919567e-05, "loss": 0.2768, "step": 3318 }, { "epoch": 0.2629431570608041, "grad_norm": 1.792856069684127, "learning_rate": 1.728673456001766e-05, "loss": 0.3524, "step": 3319 }, { "epoch": 0.2630223806694395, "grad_norm": 1.4390018950690866, "learning_rate": 1.728497694431934e-05, "loss": 0.3075, "step": 3320 }, { "epoch": 0.2631016042780749, "grad_norm": 1.9079963667902557, "learning_rate": 1.7283218848940344e-05, "loss": 0.4084, "step": 3321 }, { "epoch": 0.26318082788671027, "grad_norm": 1.986725560597437, "learning_rate": 1.728146027399643e-05, "loss": 0.3944, "step": 3322 }, { "epoch": 0.2632600514953456, "grad_norm": 1.7253385507472883, "learning_rate": 1.7279701219603394e-05, "loss": 0.4065, "step": 3323 }, { "epoch": 0.263339275103981, "grad_norm": 1.5853531029605916, "learning_rate": 1.727794168587706e-05, "loss": 0.3435, "step": 3324 }, { "epoch": 0.26341849871261636, "grad_norm": 1.5907427790014843, "learning_rate": 1.7276181672933287e-05, "loss": 0.4163, "step": 3325 }, { "epoch": 0.26349772232125174, "grad_norm": 1.5624424177004532, "learning_rate": 1.7274421180887958e-05, "loss": 0.3467, "step": 3326 }, { "epoch": 0.2635769459298871, "grad_norm": 1.583032504542314, "learning_rate": 1.7272660209857e-05, "loss": 0.3606, "step": 3327 }, { "epoch": 0.2636561695385225, "grad_norm": 1.7219062864971715, "learning_rate": 1.727089875995636e-05, "loss": 0.3901, "step": 3328 }, { "epoch": 0.2637353931471578, "grad_norm": 2.0512689513697424, "learning_rate": 1.726913683130202e-05, "loss": 0.4817, "step": 3329 }, { "epoch": 0.2638146167557932, "grad_norm": 1.9662867938735469, "learning_rate": 1.7267374424009998e-05, "loss": 0.3973, "step": 3330 }, { "epoch": 0.2638938403644286, "grad_norm": 1.7380230192425585, "learning_rate": 1.726561153819634e-05, "loss": 0.4458, "step": 3331 }, { "epoch": 0.26397306397306397, "grad_norm": 1.4692365484664618, "learning_rate": 1.7263848173977122e-05, "loss": 0.301, "step": 3332 }, { "epoch": 0.26405228758169935, "grad_norm": 1.407761262354165, "learning_rate": 1.726208433146845e-05, "loss": 0.3355, "step": 3333 }, { "epoch": 0.26413151119033473, "grad_norm": 1.561809652869918, "learning_rate": 1.726032001078647e-05, "loss": 0.3287, "step": 3334 }, { "epoch": 0.2642107347989701, "grad_norm": 1.527523248887085, "learning_rate": 1.725855521204735e-05, "loss": 0.3405, "step": 3335 }, { "epoch": 0.26428995840760544, "grad_norm": 1.45554849608881, "learning_rate": 1.7256789935367296e-05, "loss": 0.3084, "step": 3336 }, { "epoch": 0.2643691820162408, "grad_norm": 1.3737044393835163, "learning_rate": 1.7255024180862546e-05, "loss": 0.2936, "step": 3337 }, { "epoch": 0.2644484056248762, "grad_norm": 1.7634863946887358, "learning_rate": 1.7253257948649357e-05, "loss": 0.3462, "step": 3338 }, { "epoch": 0.2645276292335116, "grad_norm": 1.4938558323530604, "learning_rate": 1.7251491238844038e-05, "loss": 0.318, "step": 3339 }, { "epoch": 0.26460685284214697, "grad_norm": 1.5452151131431804, "learning_rate": 1.7249724051562905e-05, "loss": 0.2942, "step": 3340 }, { "epoch": 0.26468607645078235, "grad_norm": 1.660362710985787, "learning_rate": 1.7247956386922334e-05, "loss": 0.3156, "step": 3341 }, { "epoch": 0.26476530005941773, "grad_norm": 1.9626762286954302, "learning_rate": 1.7246188245038705e-05, "loss": 0.414, "step": 3342 }, { "epoch": 0.26484452366805306, "grad_norm": 2.1202140891212604, "learning_rate": 1.7244419626028454e-05, "loss": 0.5136, "step": 3343 }, { "epoch": 0.26492374727668844, "grad_norm": 1.3501153366686796, "learning_rate": 1.724265053000802e-05, "loss": 0.2859, "step": 3344 }, { "epoch": 0.2650029708853238, "grad_norm": 1.6756306919223005, "learning_rate": 1.7240880957093903e-05, "loss": 0.3199, "step": 3345 }, { "epoch": 0.2650821944939592, "grad_norm": 1.6808831385906877, "learning_rate": 1.7239110907402615e-05, "loss": 0.3344, "step": 3346 }, { "epoch": 0.2651614181025946, "grad_norm": 1.7225330801956422, "learning_rate": 1.72373403810507e-05, "loss": 0.3757, "step": 3347 }, { "epoch": 0.26524064171122996, "grad_norm": 1.6804858499925897, "learning_rate": 1.7235569378154752e-05, "loss": 0.3361, "step": 3348 }, { "epoch": 0.26531986531986534, "grad_norm": 1.6250276167029933, "learning_rate": 1.7233797898831376e-05, "loss": 0.3741, "step": 3349 }, { "epoch": 0.26539908892850067, "grad_norm": 1.6045888552538323, "learning_rate": 1.7232025943197213e-05, "loss": 0.3235, "step": 3350 }, { "epoch": 0.26547831253713605, "grad_norm": 1.6531223265752097, "learning_rate": 1.723025351136894e-05, "loss": 0.2859, "step": 3351 }, { "epoch": 0.26555753614577143, "grad_norm": 1.7519217981551096, "learning_rate": 1.722848060346326e-05, "loss": 0.3767, "step": 3352 }, { "epoch": 0.2656367597544068, "grad_norm": 1.4914775938723737, "learning_rate": 1.7226707219596918e-05, "loss": 0.3947, "step": 3353 }, { "epoch": 0.2657159833630422, "grad_norm": 2.4259006023562626, "learning_rate": 1.7224933359886676e-05, "loss": 0.4762, "step": 3354 }, { "epoch": 0.2657952069716776, "grad_norm": 2.3933572506990406, "learning_rate": 1.7223159024449338e-05, "loss": 0.5025, "step": 3355 }, { "epoch": 0.26587443058031296, "grad_norm": 1.9928635626609925, "learning_rate": 1.7221384213401732e-05, "loss": 0.3798, "step": 3356 }, { "epoch": 0.2659536541889483, "grad_norm": 1.6264149159310208, "learning_rate": 1.7219608926860726e-05, "loss": 0.3026, "step": 3357 }, { "epoch": 0.26603287779758367, "grad_norm": 1.505716507760458, "learning_rate": 1.721783316494321e-05, "loss": 0.3416, "step": 3358 }, { "epoch": 0.26611210140621905, "grad_norm": 1.7892726599381132, "learning_rate": 1.7216056927766106e-05, "loss": 0.4391, "step": 3359 }, { "epoch": 0.26619132501485443, "grad_norm": 1.8659356473520252, "learning_rate": 1.721428021544638e-05, "loss": 0.3934, "step": 3360 }, { "epoch": 0.2662705486234898, "grad_norm": 1.78167583266563, "learning_rate": 1.7212503028101012e-05, "loss": 0.4821, "step": 3361 }, { "epoch": 0.2663497722321252, "grad_norm": 2.01762735741912, "learning_rate": 1.721072536584702e-05, "loss": 0.5091, "step": 3362 }, { "epoch": 0.2664289958407606, "grad_norm": 1.5625136026564748, "learning_rate": 1.7208947228801464e-05, "loss": 0.3349, "step": 3363 }, { "epoch": 0.2665082194493959, "grad_norm": 1.553926046566275, "learning_rate": 1.7207168617081418e-05, "loss": 0.3269, "step": 3364 }, { "epoch": 0.2665874430580313, "grad_norm": 1.4846642854500622, "learning_rate": 1.7205389530804e-05, "loss": 0.2674, "step": 3365 }, { "epoch": 0.26666666666666666, "grad_norm": 1.495955137748792, "learning_rate": 1.7203609970086347e-05, "loss": 0.3027, "step": 3366 }, { "epoch": 0.26674589027530204, "grad_norm": 1.5461248926759212, "learning_rate": 1.720182993504564e-05, "loss": 0.3788, "step": 3367 }, { "epoch": 0.2668251138839374, "grad_norm": 1.6554169100869451, "learning_rate": 1.7200049425799087e-05, "loss": 0.3942, "step": 3368 }, { "epoch": 0.2669043374925728, "grad_norm": 1.4257443636439064, "learning_rate": 1.7198268442463923e-05, "loss": 0.3629, "step": 3369 }, { "epoch": 0.26698356110120813, "grad_norm": 1.8431318748270273, "learning_rate": 1.719648698515742e-05, "loss": 0.2901, "step": 3370 }, { "epoch": 0.2670627847098435, "grad_norm": 1.946288264880617, "learning_rate": 1.7194705053996873e-05, "loss": 0.3552, "step": 3371 }, { "epoch": 0.2671420083184789, "grad_norm": 1.6385894155512677, "learning_rate": 1.719292264909962e-05, "loss": 0.4054, "step": 3372 }, { "epoch": 0.2672212319271143, "grad_norm": 1.7888456740127965, "learning_rate": 1.7191139770583015e-05, "loss": 0.3932, "step": 3373 }, { "epoch": 0.26730045553574966, "grad_norm": 1.491248320105741, "learning_rate": 1.7189356418564463e-05, "loss": 0.3565, "step": 3374 }, { "epoch": 0.26737967914438504, "grad_norm": 1.5460736581095083, "learning_rate": 1.7187572593161382e-05, "loss": 0.2965, "step": 3375 }, { "epoch": 0.2674589027530204, "grad_norm": 1.5293542309810222, "learning_rate": 1.7185788294491232e-05, "loss": 0.3729, "step": 3376 }, { "epoch": 0.26753812636165575, "grad_norm": 1.3706419719168244, "learning_rate": 1.7184003522671497e-05, "loss": 0.2951, "step": 3377 }, { "epoch": 0.26761734997029113, "grad_norm": 1.608347860554224, "learning_rate": 1.7182218277819697e-05, "loss": 0.2526, "step": 3378 }, { "epoch": 0.2676965735789265, "grad_norm": 1.7595006193398166, "learning_rate": 1.718043256005338e-05, "loss": 0.3859, "step": 3379 }, { "epoch": 0.2677757971875619, "grad_norm": 1.7559585724803755, "learning_rate": 1.717864636949013e-05, "loss": 0.2931, "step": 3380 }, { "epoch": 0.2678550207961973, "grad_norm": 1.310024941510185, "learning_rate": 1.7176859706247563e-05, "loss": 0.3288, "step": 3381 }, { "epoch": 0.26793424440483266, "grad_norm": 1.513550056577479, "learning_rate": 1.717507257044331e-05, "loss": 0.4051, "step": 3382 }, { "epoch": 0.26801346801346804, "grad_norm": 1.2878885549535424, "learning_rate": 1.717328496219506e-05, "loss": 0.2257, "step": 3383 }, { "epoch": 0.26809269162210336, "grad_norm": 2.0902664750080624, "learning_rate": 1.7171496881620507e-05, "loss": 0.4951, "step": 3384 }, { "epoch": 0.26817191523073874, "grad_norm": 1.5848800451269245, "learning_rate": 1.716970832883739e-05, "loss": 0.3506, "step": 3385 }, { "epoch": 0.2682511388393741, "grad_norm": 1.610290992074119, "learning_rate": 1.716791930396348e-05, "loss": 0.394, "step": 3386 }, { "epoch": 0.2683303624480095, "grad_norm": 1.7897502883514513, "learning_rate": 1.716612980711657e-05, "loss": 0.4322, "step": 3387 }, { "epoch": 0.2684095860566449, "grad_norm": 1.5437628925966387, "learning_rate": 1.7164339838414496e-05, "loss": 0.3179, "step": 3388 }, { "epoch": 0.26848880966528027, "grad_norm": 1.4805249462135643, "learning_rate": 1.7162549397975118e-05, "loss": 0.2903, "step": 3389 }, { "epoch": 0.26856803327391565, "grad_norm": 1.4974867630290967, "learning_rate": 1.7160758485916325e-05, "loss": 0.2499, "step": 3390 }, { "epoch": 0.268647256882551, "grad_norm": 1.9093299717142924, "learning_rate": 1.715896710235604e-05, "loss": 0.3616, "step": 3391 }, { "epoch": 0.26872648049118636, "grad_norm": 1.7232279379099156, "learning_rate": 1.715717524741222e-05, "loss": 0.3729, "step": 3392 }, { "epoch": 0.26880570409982174, "grad_norm": 1.8772630322521775, "learning_rate": 1.7155382921202844e-05, "loss": 0.489, "step": 3393 }, { "epoch": 0.2688849277084571, "grad_norm": 1.8797427093619954, "learning_rate": 1.7153590123845938e-05, "loss": 0.4683, "step": 3394 }, { "epoch": 0.2689641513170925, "grad_norm": 1.7251500808938338, "learning_rate": 1.715179685545954e-05, "loss": 0.4664, "step": 3395 }, { "epoch": 0.2690433749257279, "grad_norm": 1.500267294745199, "learning_rate": 1.7150003116161734e-05, "loss": 0.3243, "step": 3396 }, { "epoch": 0.26912259853436327, "grad_norm": 1.6275369323286235, "learning_rate": 1.714820890607062e-05, "loss": 0.3648, "step": 3397 }, { "epoch": 0.2692018221429986, "grad_norm": 1.7570445115889985, "learning_rate": 1.714641422530435e-05, "loss": 0.5036, "step": 3398 }, { "epoch": 0.269281045751634, "grad_norm": 1.3641120873671535, "learning_rate": 1.7144619073981088e-05, "loss": 0.3056, "step": 3399 }, { "epoch": 0.26936026936026936, "grad_norm": 1.4855812915601856, "learning_rate": 1.7142823452219036e-05, "loss": 0.3156, "step": 3400 }, { "epoch": 0.26943949296890474, "grad_norm": 1.486186001565436, "learning_rate": 1.714102736013643e-05, "loss": 0.3461, "step": 3401 }, { "epoch": 0.2695187165775401, "grad_norm": 1.218889634796814, "learning_rate": 1.7139230797851537e-05, "loss": 0.3196, "step": 3402 }, { "epoch": 0.2695979401861755, "grad_norm": 1.756841247467928, "learning_rate": 1.7137433765482644e-05, "loss": 0.3885, "step": 3403 }, { "epoch": 0.2696771637948109, "grad_norm": 1.8770515314755276, "learning_rate": 1.713563626314808e-05, "loss": 0.5336, "step": 3404 }, { "epoch": 0.2697563874034462, "grad_norm": 1.4177512401656744, "learning_rate": 1.71338382909662e-05, "loss": 0.2604, "step": 3405 }, { "epoch": 0.2698356110120816, "grad_norm": 1.5552309993682953, "learning_rate": 1.71320398490554e-05, "loss": 0.3866, "step": 3406 }, { "epoch": 0.26991483462071697, "grad_norm": 1.6592529694386382, "learning_rate": 1.713024093753409e-05, "loss": 0.3642, "step": 3407 }, { "epoch": 0.26999405822935235, "grad_norm": 1.7573719912049914, "learning_rate": 1.7128441556520723e-05, "loss": 0.333, "step": 3408 }, { "epoch": 0.27007328183798773, "grad_norm": 1.5499733294923757, "learning_rate": 1.7126641706133782e-05, "loss": 0.3346, "step": 3409 }, { "epoch": 0.2701525054466231, "grad_norm": 1.6953161001930295, "learning_rate": 1.7124841386491774e-05, "loss": 0.3896, "step": 3410 }, { "epoch": 0.27023172905525844, "grad_norm": 1.3858870935401675, "learning_rate": 1.7123040597713242e-05, "loss": 0.2548, "step": 3411 }, { "epoch": 0.2703109526638938, "grad_norm": 1.59478076231617, "learning_rate": 1.7121239339916763e-05, "loss": 0.3358, "step": 3412 }, { "epoch": 0.2703901762725292, "grad_norm": 1.6838183742554305, "learning_rate": 1.7119437613220936e-05, "loss": 0.2646, "step": 3413 }, { "epoch": 0.2704693998811646, "grad_norm": 1.6209859192175429, "learning_rate": 1.71176354177444e-05, "loss": 0.4011, "step": 3414 }, { "epoch": 0.27054862348979997, "grad_norm": 1.5075837943245174, "learning_rate": 1.711583275360582e-05, "loss": 0.2835, "step": 3415 }, { "epoch": 0.27062784709843535, "grad_norm": 1.655221148733233, "learning_rate": 1.711402962092389e-05, "loss": 0.3185, "step": 3416 }, { "epoch": 0.27070707070707073, "grad_norm": 1.533622757074057, "learning_rate": 1.7112226019817345e-05, "loss": 0.2713, "step": 3417 }, { "epoch": 0.27078629431570606, "grad_norm": 1.6629319939851415, "learning_rate": 1.7110421950404935e-05, "loss": 0.4364, "step": 3418 }, { "epoch": 0.27086551792434144, "grad_norm": 1.6914745944315743, "learning_rate": 1.710861741280545e-05, "loss": 0.4079, "step": 3419 }, { "epoch": 0.2709447415329768, "grad_norm": 1.4721714867478424, "learning_rate": 1.710681240713772e-05, "loss": 0.2924, "step": 3420 }, { "epoch": 0.2710239651416122, "grad_norm": 1.7452123015392331, "learning_rate": 1.7105006933520584e-05, "loss": 0.3267, "step": 3421 }, { "epoch": 0.2711031887502476, "grad_norm": 1.5274338558979625, "learning_rate": 1.710320099207293e-05, "loss": 0.2803, "step": 3422 }, { "epoch": 0.27118241235888296, "grad_norm": 1.4656147474770578, "learning_rate": 1.7101394582913667e-05, "loss": 0.3104, "step": 3423 }, { "epoch": 0.27126163596751834, "grad_norm": 1.3700754454017707, "learning_rate": 1.709958770616174e-05, "loss": 0.2974, "step": 3424 }, { "epoch": 0.27134085957615367, "grad_norm": 1.3210020683231187, "learning_rate": 1.7097780361936128e-05, "loss": 0.2361, "step": 3425 }, { "epoch": 0.27142008318478905, "grad_norm": 1.4043889058183885, "learning_rate": 1.709597255035583e-05, "loss": 0.3122, "step": 3426 }, { "epoch": 0.27149930679342443, "grad_norm": 1.7570200327027756, "learning_rate": 1.709416427153988e-05, "loss": 0.4325, "step": 3427 }, { "epoch": 0.2715785304020598, "grad_norm": 1.5317336147697496, "learning_rate": 1.7092355525607352e-05, "loss": 0.3623, "step": 3428 }, { "epoch": 0.2716577540106952, "grad_norm": 1.6145151680008565, "learning_rate": 1.7090546312677335e-05, "loss": 0.333, "step": 3429 }, { "epoch": 0.2717369776193306, "grad_norm": 1.6430748318588881, "learning_rate": 1.7088736632868964e-05, "loss": 0.3505, "step": 3430 }, { "epoch": 0.27181620122796596, "grad_norm": 1.6904084585860188, "learning_rate": 1.7086926486301393e-05, "loss": 0.3139, "step": 3431 }, { "epoch": 0.2718954248366013, "grad_norm": 1.8389760852783352, "learning_rate": 1.7085115873093814e-05, "loss": 0.3519, "step": 3432 }, { "epoch": 0.27197464844523667, "grad_norm": 1.4048243211770552, "learning_rate": 1.7083304793365445e-05, "loss": 0.3459, "step": 3433 }, { "epoch": 0.27205387205387205, "grad_norm": 1.4151686475921654, "learning_rate": 1.7081493247235537e-05, "loss": 0.3709, "step": 3434 }, { "epoch": 0.27213309566250743, "grad_norm": 1.625735936780301, "learning_rate": 1.7079681234823374e-05, "loss": 0.3908, "step": 3435 }, { "epoch": 0.2722123192711428, "grad_norm": 1.6669548596155164, "learning_rate": 1.7077868756248265e-05, "loss": 0.4416, "step": 3436 }, { "epoch": 0.2722915428797782, "grad_norm": 1.6006588096745782, "learning_rate": 1.7076055811629556e-05, "loss": 0.4119, "step": 3437 }, { "epoch": 0.2723707664884136, "grad_norm": 1.667564668993029, "learning_rate": 1.7074242401086623e-05, "loss": 0.3719, "step": 3438 }, { "epoch": 0.2724499900970489, "grad_norm": 1.7789692914448836, "learning_rate": 1.7072428524738865e-05, "loss": 0.4219, "step": 3439 }, { "epoch": 0.2725292137056843, "grad_norm": 1.4521293138855655, "learning_rate": 1.707061418270572e-05, "loss": 0.357, "step": 3440 }, { "epoch": 0.27260843731431966, "grad_norm": 1.5012763092846912, "learning_rate": 1.706879937510665e-05, "loss": 0.409, "step": 3441 }, { "epoch": 0.27268766092295504, "grad_norm": 1.6692016419563391, "learning_rate": 1.7066984102061155e-05, "loss": 0.379, "step": 3442 }, { "epoch": 0.2727668845315904, "grad_norm": 1.7448511882401803, "learning_rate": 1.706516836368876e-05, "loss": 0.4669, "step": 3443 }, { "epoch": 0.2728461081402258, "grad_norm": 1.3450045257317975, "learning_rate": 1.7063352160109026e-05, "loss": 0.3617, "step": 3444 }, { "epoch": 0.27292533174886113, "grad_norm": 1.6888020855972348, "learning_rate": 1.7061535491441538e-05, "loss": 0.485, "step": 3445 }, { "epoch": 0.2730045553574965, "grad_norm": 2.0743685029768355, "learning_rate": 1.7059718357805915e-05, "loss": 0.4779, "step": 3446 }, { "epoch": 0.2730837789661319, "grad_norm": 1.6506907663741655, "learning_rate": 1.705790075932181e-05, "loss": 0.3791, "step": 3447 }, { "epoch": 0.2731630025747673, "grad_norm": 2.0570297165198115, "learning_rate": 1.7056082696108896e-05, "loss": 0.4215, "step": 3448 }, { "epoch": 0.27324222618340266, "grad_norm": 1.7779728335131182, "learning_rate": 1.7054264168286892e-05, "loss": 0.3329, "step": 3449 }, { "epoch": 0.27332144979203804, "grad_norm": 1.3885423578645373, "learning_rate": 1.7052445175975533e-05, "loss": 0.2732, "step": 3450 }, { "epoch": 0.2734006734006734, "grad_norm": 1.8481845829444703, "learning_rate": 1.7050625719294593e-05, "loss": 0.3973, "step": 3451 }, { "epoch": 0.27347989700930875, "grad_norm": 1.5123705819660078, "learning_rate": 1.7048805798363876e-05, "loss": 0.2943, "step": 3452 }, { "epoch": 0.27355912061794413, "grad_norm": 1.786858506354238, "learning_rate": 1.7046985413303215e-05, "loss": 0.4477, "step": 3453 }, { "epoch": 0.2736383442265795, "grad_norm": 1.5471780854791461, "learning_rate": 1.7045164564232474e-05, "loss": 0.3578, "step": 3454 }, { "epoch": 0.2737175678352149, "grad_norm": 1.7805195904238957, "learning_rate": 1.704334325127154e-05, "loss": 0.2828, "step": 3455 }, { "epoch": 0.2737967914438503, "grad_norm": 1.732430938778358, "learning_rate": 1.704152147454035e-05, "loss": 0.2986, "step": 3456 }, { "epoch": 0.27387601505248566, "grad_norm": 1.758498757053166, "learning_rate": 1.7039699234158846e-05, "loss": 0.423, "step": 3457 }, { "epoch": 0.27395523866112104, "grad_norm": 1.6006205311132922, "learning_rate": 1.7037876530247025e-05, "loss": 0.3457, "step": 3458 }, { "epoch": 0.27403446226975636, "grad_norm": 1.3896396579463255, "learning_rate": 1.7036053362924896e-05, "loss": 0.3312, "step": 3459 }, { "epoch": 0.27411368587839174, "grad_norm": 1.764344372472803, "learning_rate": 1.7034229732312512e-05, "loss": 0.3944, "step": 3460 }, { "epoch": 0.2741929094870271, "grad_norm": 1.7081658344683073, "learning_rate": 1.703240563852994e-05, "loss": 0.4221, "step": 3461 }, { "epoch": 0.2742721330956625, "grad_norm": 1.7209613237632448, "learning_rate": 1.70305810816973e-05, "loss": 0.3428, "step": 3462 }, { "epoch": 0.2743513567042979, "grad_norm": 1.8368380787938738, "learning_rate": 1.7028756061934722e-05, "loss": 0.3526, "step": 3463 }, { "epoch": 0.27443058031293327, "grad_norm": 1.5457859204276865, "learning_rate": 1.702693057936238e-05, "loss": 0.3908, "step": 3464 }, { "epoch": 0.27450980392156865, "grad_norm": 1.756414280743525, "learning_rate": 1.702510463410047e-05, "loss": 0.4255, "step": 3465 }, { "epoch": 0.274589027530204, "grad_norm": 1.5175551884141112, "learning_rate": 1.7023278226269222e-05, "loss": 0.3431, "step": 3466 }, { "epoch": 0.27466825113883936, "grad_norm": 1.763552152346543, "learning_rate": 1.7021451355988895e-05, "loss": 0.4228, "step": 3467 }, { "epoch": 0.27474747474747474, "grad_norm": 1.2026595961010276, "learning_rate": 1.7019624023379784e-05, "loss": 0.2514, "step": 3468 }, { "epoch": 0.2748266983561101, "grad_norm": 1.691867059457729, "learning_rate": 1.7017796228562206e-05, "loss": 0.5292, "step": 3469 }, { "epoch": 0.2749059219647455, "grad_norm": 1.560317059000484, "learning_rate": 1.7015967971656513e-05, "loss": 0.3913, "step": 3470 }, { "epoch": 0.2749851455733809, "grad_norm": 1.4002738026635486, "learning_rate": 1.7014139252783092e-05, "loss": 0.3197, "step": 3471 }, { "epoch": 0.27506436918201627, "grad_norm": 1.6314215963213896, "learning_rate": 1.7012310072062348e-05, "loss": 0.2881, "step": 3472 }, { "epoch": 0.2751435927906516, "grad_norm": 1.4151646134979659, "learning_rate": 1.7010480429614726e-05, "loss": 0.3346, "step": 3473 }, { "epoch": 0.275222816399287, "grad_norm": 1.650463046409657, "learning_rate": 1.70086503255607e-05, "loss": 0.3368, "step": 3474 }, { "epoch": 0.27530204000792236, "grad_norm": 1.624409202663714, "learning_rate": 1.7006819760020773e-05, "loss": 0.4098, "step": 3475 }, { "epoch": 0.27538126361655774, "grad_norm": 1.4761181870737787, "learning_rate": 1.700498873311548e-05, "loss": 0.3473, "step": 3476 }, { "epoch": 0.2754604872251931, "grad_norm": 1.5863268081605195, "learning_rate": 1.7003157244965387e-05, "loss": 0.4174, "step": 3477 }, { "epoch": 0.2755397108338285, "grad_norm": 1.6749235256213921, "learning_rate": 1.700132529569109e-05, "loss": 0.3959, "step": 3478 }, { "epoch": 0.2756189344424639, "grad_norm": 1.7993624870938862, "learning_rate": 1.69994928854132e-05, "loss": 0.4267, "step": 3479 }, { "epoch": 0.2756981580510992, "grad_norm": 1.8977725110457364, "learning_rate": 1.6997660014252392e-05, "loss": 0.4097, "step": 3480 }, { "epoch": 0.2757773816597346, "grad_norm": 1.6812032829572645, "learning_rate": 1.699582668232934e-05, "loss": 0.4293, "step": 3481 }, { "epoch": 0.27585660526836997, "grad_norm": 1.4960697291126734, "learning_rate": 1.6993992889764758e-05, "loss": 0.3356, "step": 3482 }, { "epoch": 0.27593582887700535, "grad_norm": 1.5295950997830625, "learning_rate": 1.69921586366794e-05, "loss": 0.3528, "step": 3483 }, { "epoch": 0.27601505248564073, "grad_norm": 1.629124127734205, "learning_rate": 1.6990323923194042e-05, "loss": 0.4128, "step": 3484 }, { "epoch": 0.2760942760942761, "grad_norm": 1.799205153426002, "learning_rate": 1.698848874942949e-05, "loss": 0.4616, "step": 3485 }, { "epoch": 0.27617349970291144, "grad_norm": 1.4499513586437884, "learning_rate": 1.698665311550658e-05, "loss": 0.3323, "step": 3486 }, { "epoch": 0.2762527233115468, "grad_norm": 1.6006614593568531, "learning_rate": 1.6984817021546177e-05, "loss": 0.3606, "step": 3487 }, { "epoch": 0.2763319469201822, "grad_norm": 1.6802327549415106, "learning_rate": 1.6982980467669183e-05, "loss": 0.4688, "step": 3488 }, { "epoch": 0.2764111705288176, "grad_norm": 1.5479145940067485, "learning_rate": 1.6981143453996524e-05, "loss": 0.2289, "step": 3489 }, { "epoch": 0.27649039413745297, "grad_norm": 1.506043670783407, "learning_rate": 1.697930598064916e-05, "loss": 0.3444, "step": 3490 }, { "epoch": 0.27656961774608835, "grad_norm": 1.7625349504827108, "learning_rate": 1.697746804774808e-05, "loss": 0.4255, "step": 3491 }, { "epoch": 0.27664884135472373, "grad_norm": 1.5368530244966128, "learning_rate": 1.6975629655414304e-05, "loss": 0.303, "step": 3492 }, { "epoch": 0.27672806496335906, "grad_norm": 1.6029409993459238, "learning_rate": 1.6973790803768875e-05, "loss": 0.3902, "step": 3493 }, { "epoch": 0.27680728857199444, "grad_norm": 1.5809861960759883, "learning_rate": 1.6971951492932882e-05, "loss": 0.2595, "step": 3494 }, { "epoch": 0.2768865121806298, "grad_norm": 1.9003975628676633, "learning_rate": 1.697011172302743e-05, "loss": 0.4297, "step": 3495 }, { "epoch": 0.2769657357892652, "grad_norm": 1.7487107537465267, "learning_rate": 1.696827149417366e-05, "loss": 0.4236, "step": 3496 }, { "epoch": 0.2770449593979006, "grad_norm": 1.5212680768840097, "learning_rate": 1.696643080649274e-05, "loss": 0.3356, "step": 3497 }, { "epoch": 0.27712418300653596, "grad_norm": 1.9136349947741031, "learning_rate": 1.696458966010587e-05, "loss": 0.4792, "step": 3498 }, { "epoch": 0.27720340661517134, "grad_norm": 1.4732937295891226, "learning_rate": 1.6962748055134283e-05, "loss": 0.3877, "step": 3499 }, { "epoch": 0.27728263022380667, "grad_norm": 1.8798186862878368, "learning_rate": 1.696090599169924e-05, "loss": 0.4363, "step": 3500 }, { "epoch": 0.27736185383244205, "grad_norm": 1.768072113898181, "learning_rate": 1.695906346992203e-05, "loss": 0.405, "step": 3501 }, { "epoch": 0.27744107744107743, "grad_norm": 1.5366420685646789, "learning_rate": 1.6957220489923978e-05, "loss": 0.334, "step": 3502 }, { "epoch": 0.2775203010497128, "grad_norm": 1.467712076375398, "learning_rate": 1.695537705182643e-05, "loss": 0.2649, "step": 3503 }, { "epoch": 0.2775995246583482, "grad_norm": 1.7573735086257902, "learning_rate": 1.695353315575077e-05, "loss": 0.3206, "step": 3504 }, { "epoch": 0.2776787482669836, "grad_norm": 1.6932444911950197, "learning_rate": 1.6951688801818413e-05, "loss": 0.2739, "step": 3505 }, { "epoch": 0.27775797187561896, "grad_norm": 1.8150111543318028, "learning_rate": 1.6949843990150798e-05, "loss": 0.4105, "step": 3506 }, { "epoch": 0.2778371954842543, "grad_norm": 1.6464606181851476, "learning_rate": 1.6947998720869394e-05, "loss": 0.3744, "step": 3507 }, { "epoch": 0.27791641909288967, "grad_norm": 1.47741556209451, "learning_rate": 1.6946152994095705e-05, "loss": 0.3519, "step": 3508 }, { "epoch": 0.27799564270152505, "grad_norm": 1.466738540705516, "learning_rate": 1.6944306809951264e-05, "loss": 0.2829, "step": 3509 }, { "epoch": 0.27807486631016043, "grad_norm": 1.6131810392094053, "learning_rate": 1.694246016855764e-05, "loss": 0.3526, "step": 3510 }, { "epoch": 0.2781540899187958, "grad_norm": 1.4707793401323985, "learning_rate": 1.694061307003641e-05, "loss": 0.2998, "step": 3511 }, { "epoch": 0.2782333135274312, "grad_norm": 1.6707136558403675, "learning_rate": 1.693876551450921e-05, "loss": 0.3658, "step": 3512 }, { "epoch": 0.2783125371360666, "grad_norm": 1.646277141807071, "learning_rate": 1.693691750209769e-05, "loss": 0.3348, "step": 3513 }, { "epoch": 0.2783917607447019, "grad_norm": 1.589557915750613, "learning_rate": 1.6935069032923525e-05, "loss": 0.4126, "step": 3514 }, { "epoch": 0.2784709843533373, "grad_norm": 1.744988982843674, "learning_rate": 1.6933220107108438e-05, "loss": 0.5218, "step": 3515 }, { "epoch": 0.27855020796197266, "grad_norm": 1.8068087976162832, "learning_rate": 1.6931370724774166e-05, "loss": 0.3489, "step": 3516 }, { "epoch": 0.27862943157060804, "grad_norm": 1.601528820318888, "learning_rate": 1.6929520886042486e-05, "loss": 0.3275, "step": 3517 }, { "epoch": 0.2787086551792434, "grad_norm": 1.5622175615918097, "learning_rate": 1.6927670591035195e-05, "loss": 0.4182, "step": 3518 }, { "epoch": 0.2787878787878788, "grad_norm": 1.8951382408782513, "learning_rate": 1.692581983987413e-05, "loss": 0.3882, "step": 3519 }, { "epoch": 0.2788671023965142, "grad_norm": 1.6295321212478375, "learning_rate": 1.6923968632681155e-05, "loss": 0.3463, "step": 3520 }, { "epoch": 0.2789463260051495, "grad_norm": 1.4200048304005606, "learning_rate": 1.6922116969578163e-05, "loss": 0.2261, "step": 3521 }, { "epoch": 0.2790255496137849, "grad_norm": 1.7766957545518904, "learning_rate": 1.692026485068707e-05, "loss": 0.4466, "step": 3522 }, { "epoch": 0.2791047732224203, "grad_norm": 1.785747832773107, "learning_rate": 1.6918412276129837e-05, "loss": 0.4344, "step": 3523 }, { "epoch": 0.27918399683105566, "grad_norm": 1.6077267271447753, "learning_rate": 1.691655924602845e-05, "loss": 0.3834, "step": 3524 }, { "epoch": 0.27926322043969104, "grad_norm": 1.9594099423058808, "learning_rate": 1.6914705760504913e-05, "loss": 0.465, "step": 3525 }, { "epoch": 0.2793424440483264, "grad_norm": 1.492125004500428, "learning_rate": 1.6912851819681272e-05, "loss": 0.266, "step": 3526 }, { "epoch": 0.27942166765696175, "grad_norm": 1.8400968383905338, "learning_rate": 1.69109974236796e-05, "loss": 0.4018, "step": 3527 }, { "epoch": 0.27950089126559713, "grad_norm": 1.526204915859762, "learning_rate": 1.6909142572622003e-05, "loss": 0.3215, "step": 3528 }, { "epoch": 0.2795801148742325, "grad_norm": 1.7297234727761364, "learning_rate": 1.6907287266630614e-05, "loss": 0.3751, "step": 3529 }, { "epoch": 0.2796593384828679, "grad_norm": 1.602004668749506, "learning_rate": 1.6905431505827595e-05, "loss": 0.3161, "step": 3530 }, { "epoch": 0.2797385620915033, "grad_norm": 1.7382902472599635, "learning_rate": 1.6903575290335136e-05, "loss": 0.2935, "step": 3531 }, { "epoch": 0.27981778570013865, "grad_norm": 1.4289480413399747, "learning_rate": 1.690171862027546e-05, "loss": 0.2746, "step": 3532 }, { "epoch": 0.27989700930877404, "grad_norm": 1.7974312161166537, "learning_rate": 1.6899861495770827e-05, "loss": 0.4417, "step": 3533 }, { "epoch": 0.27997623291740936, "grad_norm": 1.7742212172800194, "learning_rate": 1.689800391694351e-05, "loss": 0.2631, "step": 3534 }, { "epoch": 0.28005545652604474, "grad_norm": 1.6929891102217385, "learning_rate": 1.689614588391583e-05, "loss": 0.4269, "step": 3535 }, { "epoch": 0.2801346801346801, "grad_norm": 1.3175051341640325, "learning_rate": 1.689428739681012e-05, "loss": 0.3223, "step": 3536 }, { "epoch": 0.2802139037433155, "grad_norm": 1.409101625512571, "learning_rate": 1.6892428455748762e-05, "loss": 0.2801, "step": 3537 }, { "epoch": 0.2802931273519509, "grad_norm": 1.2630127014473755, "learning_rate": 1.6890569060854156e-05, "loss": 0.26, "step": 3538 }, { "epoch": 0.28037235096058627, "grad_norm": 1.8192073478133002, "learning_rate": 1.6888709212248728e-05, "loss": 0.4691, "step": 3539 }, { "epoch": 0.28045157456922165, "grad_norm": 1.9492757929953899, "learning_rate": 1.6886848910054947e-05, "loss": 0.5208, "step": 3540 }, { "epoch": 0.280530798177857, "grad_norm": 1.7001053794245082, "learning_rate": 1.6884988154395304e-05, "loss": 0.3743, "step": 3541 }, { "epoch": 0.28061002178649236, "grad_norm": 1.555818105861714, "learning_rate": 1.688312694539232e-05, "loss": 0.4427, "step": 3542 }, { "epoch": 0.28068924539512774, "grad_norm": 1.7617632297817103, "learning_rate": 1.6881265283168543e-05, "loss": 0.4516, "step": 3543 }, { "epoch": 0.2807684690037631, "grad_norm": 1.7065956849786186, "learning_rate": 1.6879403167846556e-05, "loss": 0.3538, "step": 3544 }, { "epoch": 0.2808476926123985, "grad_norm": 1.592015712746443, "learning_rate": 1.6877540599548977e-05, "loss": 0.4562, "step": 3545 }, { "epoch": 0.2809269162210339, "grad_norm": 1.7462708848976471, "learning_rate": 1.6875677578398442e-05, "loss": 0.4419, "step": 3546 }, { "epoch": 0.28100613982966927, "grad_norm": 2.0023386855341196, "learning_rate": 1.6873814104517617e-05, "loss": 0.4469, "step": 3547 }, { "epoch": 0.2810853634383046, "grad_norm": 1.6070230401212557, "learning_rate": 1.6871950178029216e-05, "loss": 0.3239, "step": 3548 }, { "epoch": 0.28116458704694, "grad_norm": 1.5582492370040515, "learning_rate": 1.6870085799055956e-05, "loss": 0.3928, "step": 3549 }, { "epoch": 0.28124381065557535, "grad_norm": 1.4845910874482904, "learning_rate": 1.6868220967720604e-05, "loss": 0.3363, "step": 3550 }, { "epoch": 0.28132303426421074, "grad_norm": 1.7708579870049843, "learning_rate": 1.686635568414595e-05, "loss": 0.4809, "step": 3551 }, { "epoch": 0.2814022578728461, "grad_norm": 1.5147265926372622, "learning_rate": 1.686448994845481e-05, "loss": 0.3501, "step": 3552 }, { "epoch": 0.2814814814814815, "grad_norm": 1.9661251600762675, "learning_rate": 1.6862623760770038e-05, "loss": 0.5826, "step": 3553 }, { "epoch": 0.2815607050901169, "grad_norm": 1.5511779143150988, "learning_rate": 1.6860757121214513e-05, "loss": 0.229, "step": 3554 }, { "epoch": 0.2816399286987522, "grad_norm": 1.6877606595650976, "learning_rate": 1.685889002991114e-05, "loss": 0.477, "step": 3555 }, { "epoch": 0.2817191523073876, "grad_norm": 1.550230499538908, "learning_rate": 1.6857022486982865e-05, "loss": 0.3786, "step": 3556 }, { "epoch": 0.28179837591602297, "grad_norm": 1.767368902490723, "learning_rate": 1.6855154492552656e-05, "loss": 0.3383, "step": 3557 }, { "epoch": 0.28187759952465835, "grad_norm": 1.465005676072625, "learning_rate": 1.6853286046743505e-05, "loss": 0.3332, "step": 3558 }, { "epoch": 0.28195682313329373, "grad_norm": 1.6038488986910835, "learning_rate": 1.6851417149678442e-05, "loss": 0.3883, "step": 3559 }, { "epoch": 0.2820360467419291, "grad_norm": 1.636018145618707, "learning_rate": 1.684954780148053e-05, "loss": 0.3857, "step": 3560 }, { "epoch": 0.2821152703505645, "grad_norm": 1.5417685126993563, "learning_rate": 1.684767800227285e-05, "loss": 0.3102, "step": 3561 }, { "epoch": 0.2821944939591998, "grad_norm": 1.5205095980407326, "learning_rate": 1.6845807752178528e-05, "loss": 0.361, "step": 3562 }, { "epoch": 0.2822737175678352, "grad_norm": 1.5356691480212799, "learning_rate": 1.68439370513207e-05, "loss": 0.2851, "step": 3563 }, { "epoch": 0.2823529411764706, "grad_norm": 1.7413950399991796, "learning_rate": 1.6842065899822548e-05, "loss": 0.3764, "step": 3564 }, { "epoch": 0.28243216478510597, "grad_norm": 1.9135949586534597, "learning_rate": 1.6840194297807283e-05, "loss": 0.4358, "step": 3565 }, { "epoch": 0.28251138839374135, "grad_norm": 1.9011800764645568, "learning_rate": 1.6838322245398135e-05, "loss": 0.3765, "step": 3566 }, { "epoch": 0.28259061200237673, "grad_norm": 1.6927354842088744, "learning_rate": 1.6836449742718367e-05, "loss": 0.3494, "step": 3567 }, { "epoch": 0.28266983561101205, "grad_norm": 1.7830644087708565, "learning_rate": 1.6834576789891282e-05, "loss": 0.4242, "step": 3568 }, { "epoch": 0.28274905921964744, "grad_norm": 1.8936036889716845, "learning_rate": 1.68327033870402e-05, "loss": 0.4626, "step": 3569 }, { "epoch": 0.2828282828282828, "grad_norm": 1.5418801057911977, "learning_rate": 1.6830829534288475e-05, "loss": 0.3039, "step": 3570 }, { "epoch": 0.2829075064369182, "grad_norm": 1.9611365615816758, "learning_rate": 1.6828955231759495e-05, "loss": 0.3371, "step": 3571 }, { "epoch": 0.2829867300455536, "grad_norm": 1.54659309175576, "learning_rate": 1.682708047957667e-05, "loss": 0.3619, "step": 3572 }, { "epoch": 0.28306595365418896, "grad_norm": 1.86565103419617, "learning_rate": 1.682520527786345e-05, "loss": 0.4165, "step": 3573 }, { "epoch": 0.28314517726282434, "grad_norm": 1.6522161904280512, "learning_rate": 1.6823329626743298e-05, "loss": 0.2955, "step": 3574 }, { "epoch": 0.28322440087145967, "grad_norm": 1.4667178434231913, "learning_rate": 1.6821453526339727e-05, "loss": 0.3278, "step": 3575 }, { "epoch": 0.28330362448009505, "grad_norm": 1.838070548155054, "learning_rate": 1.6819576976776262e-05, "loss": 0.3991, "step": 3576 }, { "epoch": 0.28338284808873043, "grad_norm": 1.8244461812184616, "learning_rate": 1.6817699978176464e-05, "loss": 0.4738, "step": 3577 }, { "epoch": 0.2834620716973658, "grad_norm": 1.759398180587922, "learning_rate": 1.681582253066393e-05, "loss": 0.3788, "step": 3578 }, { "epoch": 0.2835412953060012, "grad_norm": 1.4334897523217673, "learning_rate": 1.681394463436228e-05, "loss": 0.2866, "step": 3579 }, { "epoch": 0.2836205189146366, "grad_norm": 1.7906084844323114, "learning_rate": 1.6812066289395157e-05, "loss": 0.5063, "step": 3580 }, { "epoch": 0.28369974252327196, "grad_norm": 1.6472053512655485, "learning_rate": 1.681018749588625e-05, "loss": 0.3431, "step": 3581 }, { "epoch": 0.2837789661319073, "grad_norm": 1.5857126964109334, "learning_rate": 1.6808308253959263e-05, "loss": 0.3562, "step": 3582 }, { "epoch": 0.28385818974054267, "grad_norm": 1.5514486881035434, "learning_rate": 1.680642856373794e-05, "loss": 0.4192, "step": 3583 }, { "epoch": 0.28393741334917805, "grad_norm": 1.6444978527983385, "learning_rate": 1.680454842534604e-05, "loss": 0.3861, "step": 3584 }, { "epoch": 0.28401663695781343, "grad_norm": 1.7347252108863274, "learning_rate": 1.6802667838907374e-05, "loss": 0.3744, "step": 3585 }, { "epoch": 0.2840958605664488, "grad_norm": 1.7459594993989513, "learning_rate": 1.680078680454576e-05, "loss": 0.4487, "step": 3586 }, { "epoch": 0.2841750841750842, "grad_norm": 1.5893724205079869, "learning_rate": 1.6798905322385063e-05, "loss": 0.3463, "step": 3587 }, { "epoch": 0.2842543077837196, "grad_norm": 1.9394135541698432, "learning_rate": 1.6797023392549157e-05, "loss": 0.4756, "step": 3588 }, { "epoch": 0.2843335313923549, "grad_norm": 1.6215229282396264, "learning_rate": 1.679514101516197e-05, "loss": 0.3123, "step": 3589 }, { "epoch": 0.2844127550009903, "grad_norm": 1.5051473318343307, "learning_rate": 1.6793258190347445e-05, "loss": 0.3647, "step": 3590 }, { "epoch": 0.28449197860962566, "grad_norm": 1.6319692724433477, "learning_rate": 1.679137491822955e-05, "loss": 0.3785, "step": 3591 }, { "epoch": 0.28457120221826104, "grad_norm": 1.2785138619559915, "learning_rate": 1.6789491198932302e-05, "loss": 0.3282, "step": 3592 }, { "epoch": 0.2846504258268964, "grad_norm": 1.5242877948579932, "learning_rate": 1.6787607032579724e-05, "loss": 0.355, "step": 3593 }, { "epoch": 0.2847296494355318, "grad_norm": 1.4239024085664382, "learning_rate": 1.678572241929588e-05, "loss": 0.3155, "step": 3594 }, { "epoch": 0.2848088730441672, "grad_norm": 1.7002248937531939, "learning_rate": 1.6783837359204868e-05, "loss": 0.4815, "step": 3595 }, { "epoch": 0.2848880966528025, "grad_norm": 1.8248986115549153, "learning_rate": 1.6781951852430813e-05, "loss": 0.3612, "step": 3596 }, { "epoch": 0.2849673202614379, "grad_norm": 1.3955557988943879, "learning_rate": 1.6780065899097853e-05, "loss": 0.2804, "step": 3597 }, { "epoch": 0.2850465438700733, "grad_norm": 1.6120218485253899, "learning_rate": 1.677817949933018e-05, "loss": 0.3879, "step": 3598 }, { "epoch": 0.28512576747870866, "grad_norm": 1.4285686904092474, "learning_rate": 1.6776292653252e-05, "loss": 0.2915, "step": 3599 }, { "epoch": 0.28520499108734404, "grad_norm": 1.9313834081163612, "learning_rate": 1.6774405360987556e-05, "loss": 0.4056, "step": 3600 }, { "epoch": 0.2852842146959794, "grad_norm": 1.496380441023455, "learning_rate": 1.6772517622661115e-05, "loss": 0.2844, "step": 3601 }, { "epoch": 0.2853634383046148, "grad_norm": 1.4792953236678106, "learning_rate": 1.6770629438396973e-05, "loss": 0.3116, "step": 3602 }, { "epoch": 0.28544266191325013, "grad_norm": 1.5181475927751238, "learning_rate": 1.676874080831947e-05, "loss": 0.4181, "step": 3603 }, { "epoch": 0.2855218855218855, "grad_norm": 1.4877365513085468, "learning_rate": 1.676685173255294e-05, "loss": 0.342, "step": 3604 }, { "epoch": 0.2856011091305209, "grad_norm": 1.8036031255484326, "learning_rate": 1.6764962211221796e-05, "loss": 0.4385, "step": 3605 }, { "epoch": 0.2856803327391563, "grad_norm": 1.486537671144364, "learning_rate": 1.6763072244450435e-05, "loss": 0.3813, "step": 3606 }, { "epoch": 0.28575955634779165, "grad_norm": 1.4513846095705085, "learning_rate": 1.676118183236331e-05, "loss": 0.2682, "step": 3607 }, { "epoch": 0.28583877995642704, "grad_norm": 1.550222438443536, "learning_rate": 1.6759290975084894e-05, "loss": 0.301, "step": 3608 }, { "epoch": 0.28591800356506236, "grad_norm": 1.3022403793706947, "learning_rate": 1.675739967273969e-05, "loss": 0.2648, "step": 3609 }, { "epoch": 0.28599722717369774, "grad_norm": 1.6172990474791722, "learning_rate": 1.675550792545223e-05, "loss": 0.4253, "step": 3610 }, { "epoch": 0.2860764507823331, "grad_norm": 1.5438222467284817, "learning_rate": 1.6753615733347085e-05, "loss": 0.4286, "step": 3611 }, { "epoch": 0.2861556743909685, "grad_norm": 1.8211315822390446, "learning_rate": 1.6751723096548834e-05, "loss": 0.4643, "step": 3612 }, { "epoch": 0.2862348979996039, "grad_norm": 1.4013422933530293, "learning_rate": 1.6749830015182106e-05, "loss": 0.297, "step": 3613 }, { "epoch": 0.28631412160823927, "grad_norm": 1.7063383931892007, "learning_rate": 1.6747936489371552e-05, "loss": 0.4357, "step": 3614 }, { "epoch": 0.28639334521687465, "grad_norm": 1.5081876713250542, "learning_rate": 1.674604251924185e-05, "loss": 0.3652, "step": 3615 }, { "epoch": 0.28647256882551, "grad_norm": 1.591421911800057, "learning_rate": 1.6744148104917705e-05, "loss": 0.4056, "step": 3616 }, { "epoch": 0.28655179243414536, "grad_norm": 1.480660887258956, "learning_rate": 1.6742253246523856e-05, "loss": 0.3463, "step": 3617 }, { "epoch": 0.28663101604278074, "grad_norm": 1.3844072874530815, "learning_rate": 1.6740357944185074e-05, "loss": 0.3125, "step": 3618 }, { "epoch": 0.2867102396514161, "grad_norm": 1.180234939996569, "learning_rate": 1.6738462198026154e-05, "loss": 0.3364, "step": 3619 }, { "epoch": 0.2867894632600515, "grad_norm": 1.3599034163326482, "learning_rate": 1.6736566008171925e-05, "loss": 0.3164, "step": 3620 }, { "epoch": 0.2868686868686869, "grad_norm": 1.8749851622283111, "learning_rate": 1.6734669374747237e-05, "loss": 0.4001, "step": 3621 }, { "epoch": 0.28694791047732227, "grad_norm": 1.5930901287496209, "learning_rate": 1.6732772297876975e-05, "loss": 0.3607, "step": 3622 }, { "epoch": 0.2870271340859576, "grad_norm": 1.3820294439640533, "learning_rate": 1.6730874777686053e-05, "loss": 0.331, "step": 3623 }, { "epoch": 0.287106357694593, "grad_norm": 1.3583426516974253, "learning_rate": 1.6728976814299413e-05, "loss": 0.3344, "step": 3624 }, { "epoch": 0.28718558130322835, "grad_norm": 1.575334033532164, "learning_rate": 1.6727078407842028e-05, "loss": 0.3369, "step": 3625 }, { "epoch": 0.28726480491186374, "grad_norm": 1.9255241823776548, "learning_rate": 1.67251795584389e-05, "loss": 0.3792, "step": 3626 }, { "epoch": 0.2873440285204991, "grad_norm": 1.5721585979541202, "learning_rate": 1.6723280266215057e-05, "loss": 0.3417, "step": 3627 }, { "epoch": 0.2874232521291345, "grad_norm": 1.3321013125151693, "learning_rate": 1.672138053129556e-05, "loss": 0.336, "step": 3628 }, { "epoch": 0.2875024757377699, "grad_norm": 1.5977789410414887, "learning_rate": 1.6719480353805493e-05, "loss": 0.3966, "step": 3629 }, { "epoch": 0.2875816993464052, "grad_norm": 1.7722218042203264, "learning_rate": 1.671757973386998e-05, "loss": 0.3594, "step": 3630 }, { "epoch": 0.2876609229550406, "grad_norm": 2.4183955376847033, "learning_rate": 1.6715678671614162e-05, "loss": 0.4452, "step": 3631 }, { "epoch": 0.28774014656367597, "grad_norm": 1.544197375645035, "learning_rate": 1.6713777167163215e-05, "loss": 0.3337, "step": 3632 }, { "epoch": 0.28781937017231135, "grad_norm": 1.775270778435445, "learning_rate": 1.6711875220642352e-05, "loss": 0.4508, "step": 3633 }, { "epoch": 0.28789859378094673, "grad_norm": 1.4933435345664674, "learning_rate": 1.6709972832176797e-05, "loss": 0.322, "step": 3634 }, { "epoch": 0.2879778173895821, "grad_norm": 1.8384884621027973, "learning_rate": 1.670807000189182e-05, "loss": 0.3989, "step": 3635 }, { "epoch": 0.2880570409982175, "grad_norm": 1.6849671471603531, "learning_rate": 1.6706166729912712e-05, "loss": 0.3266, "step": 3636 }, { "epoch": 0.2881362646068528, "grad_norm": 1.833308292889901, "learning_rate": 1.670426301636479e-05, "loss": 0.3839, "step": 3637 }, { "epoch": 0.2882154882154882, "grad_norm": 1.5373957630120187, "learning_rate": 1.6702358861373408e-05, "loss": 0.3622, "step": 3638 }, { "epoch": 0.2882947118241236, "grad_norm": 1.552858478187155, "learning_rate": 1.6700454265063943e-05, "loss": 0.349, "step": 3639 }, { "epoch": 0.28837393543275897, "grad_norm": 1.6062859579383988, "learning_rate": 1.6698549227561805e-05, "loss": 0.3414, "step": 3640 }, { "epoch": 0.28845315904139435, "grad_norm": 1.7176547535061093, "learning_rate": 1.6696643748992434e-05, "loss": 0.3501, "step": 3641 }, { "epoch": 0.28853238265002973, "grad_norm": 1.5550389555711928, "learning_rate": 1.6694737829481292e-05, "loss": 0.3445, "step": 3642 }, { "epoch": 0.2886116062586651, "grad_norm": 1.7039094289641743, "learning_rate": 1.669283146915388e-05, "loss": 0.3559, "step": 3643 }, { "epoch": 0.28869082986730044, "grad_norm": 1.5778669974174928, "learning_rate": 1.6690924668135718e-05, "loss": 0.3288, "step": 3644 }, { "epoch": 0.2887700534759358, "grad_norm": 1.462204256319542, "learning_rate": 1.668901742655236e-05, "loss": 0.3299, "step": 3645 }, { "epoch": 0.2888492770845712, "grad_norm": 1.9797578307548085, "learning_rate": 1.6687109744529394e-05, "loss": 0.3228, "step": 3646 }, { "epoch": 0.2889285006932066, "grad_norm": 1.8619979227611663, "learning_rate": 1.6685201622192422e-05, "loss": 0.3885, "step": 3647 }, { "epoch": 0.28900772430184196, "grad_norm": 1.7633755501190216, "learning_rate": 1.6683293059667096e-05, "loss": 0.3463, "step": 3648 }, { "epoch": 0.28908694791047734, "grad_norm": 1.7407588148043072, "learning_rate": 1.6681384057079076e-05, "loss": 0.365, "step": 3649 }, { "epoch": 0.28916617151911267, "grad_norm": 1.63931320851029, "learning_rate": 1.6679474614554066e-05, "loss": 0.3673, "step": 3650 }, { "epoch": 0.28924539512774805, "grad_norm": 1.701622710902603, "learning_rate": 1.667756473221779e-05, "loss": 0.3408, "step": 3651 }, { "epoch": 0.28932461873638343, "grad_norm": 1.7940237286487946, "learning_rate": 1.667565441019601e-05, "loss": 0.4036, "step": 3652 }, { "epoch": 0.2894038423450188, "grad_norm": 1.3365666457609597, "learning_rate": 1.6673743648614507e-05, "loss": 0.2774, "step": 3653 }, { "epoch": 0.2894830659536542, "grad_norm": 1.617247333049898, "learning_rate": 1.66718324475991e-05, "loss": 0.3726, "step": 3654 }, { "epoch": 0.2895622895622896, "grad_norm": 1.801136603704768, "learning_rate": 1.6669920807275622e-05, "loss": 0.4702, "step": 3655 }, { "epoch": 0.28964151317092496, "grad_norm": 1.6496845076227367, "learning_rate": 1.666800872776996e-05, "loss": 0.4065, "step": 3656 }, { "epoch": 0.2897207367795603, "grad_norm": 1.6073287286430564, "learning_rate": 1.6666096209208e-05, "loss": 0.3445, "step": 3657 }, { "epoch": 0.28979996038819567, "grad_norm": 2.0635826256773475, "learning_rate": 1.6664183251715687e-05, "loss": 0.4878, "step": 3658 }, { "epoch": 0.28987918399683105, "grad_norm": 1.5853944059926224, "learning_rate": 1.666226985541897e-05, "loss": 0.3283, "step": 3659 }, { "epoch": 0.28995840760546643, "grad_norm": 1.5294033148153279, "learning_rate": 1.666035602044384e-05, "loss": 0.2541, "step": 3660 }, { "epoch": 0.2900376312141018, "grad_norm": 1.553102308427671, "learning_rate": 1.665844174691631e-05, "loss": 0.3476, "step": 3661 }, { "epoch": 0.2901168548227372, "grad_norm": 1.599079528408568, "learning_rate": 1.6656527034962433e-05, "loss": 0.4552, "step": 3662 }, { "epoch": 0.2901960784313726, "grad_norm": 1.3978944067426902, "learning_rate": 1.665461188470828e-05, "loss": 0.2751, "step": 3663 }, { "epoch": 0.2902753020400079, "grad_norm": 1.7886649165700803, "learning_rate": 1.6652696296279954e-05, "loss": 0.3276, "step": 3664 }, { "epoch": 0.2903545256486433, "grad_norm": 1.4567574127010015, "learning_rate": 1.6650780269803587e-05, "loss": 0.3033, "step": 3665 }, { "epoch": 0.29043374925727866, "grad_norm": 1.7960727770474372, "learning_rate": 1.664886380540534e-05, "loss": 0.3951, "step": 3666 }, { "epoch": 0.29051297286591404, "grad_norm": 1.4746401531627715, "learning_rate": 1.664694690321141e-05, "loss": 0.3797, "step": 3667 }, { "epoch": 0.2905921964745494, "grad_norm": 1.8651030781180145, "learning_rate": 1.6645029563348e-05, "loss": 0.3675, "step": 3668 }, { "epoch": 0.2906714200831848, "grad_norm": 1.544899839721725, "learning_rate": 1.6643111785941374e-05, "loss": 0.3284, "step": 3669 }, { "epoch": 0.2907506436918202, "grad_norm": 1.6870378337498664, "learning_rate": 1.66411935711178e-05, "loss": 0.3742, "step": 3670 }, { "epoch": 0.2908298673004555, "grad_norm": 1.921389935370315, "learning_rate": 1.6639274919003582e-05, "loss": 0.3077, "step": 3671 }, { "epoch": 0.2909090909090909, "grad_norm": 1.5554839911492166, "learning_rate": 1.6637355829725057e-05, "loss": 0.3984, "step": 3672 }, { "epoch": 0.2909883145177263, "grad_norm": 1.462193690870757, "learning_rate": 1.663543630340859e-05, "loss": 0.2794, "step": 3673 }, { "epoch": 0.29106753812636166, "grad_norm": 1.7819231651448524, "learning_rate": 1.6633516340180568e-05, "loss": 0.4415, "step": 3674 }, { "epoch": 0.29114676173499704, "grad_norm": 1.477082577320874, "learning_rate": 1.6631595940167416e-05, "loss": 0.3798, "step": 3675 }, { "epoch": 0.2912259853436324, "grad_norm": 1.7948912984018348, "learning_rate": 1.662967510349558e-05, "loss": 0.4033, "step": 3676 }, { "epoch": 0.2913052089522678, "grad_norm": 1.622229483769328, "learning_rate": 1.6627753830291536e-05, "loss": 0.3754, "step": 3677 }, { "epoch": 0.29138443256090313, "grad_norm": 1.475226981239045, "learning_rate": 1.6625832120681795e-05, "loss": 0.389, "step": 3678 }, { "epoch": 0.2914636561695385, "grad_norm": 1.4448973930137625, "learning_rate": 1.6623909974792888e-05, "loss": 0.3615, "step": 3679 }, { "epoch": 0.2915428797781739, "grad_norm": 1.6242515508177402, "learning_rate": 1.6621987392751385e-05, "loss": 0.37, "step": 3680 }, { "epoch": 0.2916221033868093, "grad_norm": 1.4842416611925109, "learning_rate": 1.6620064374683874e-05, "loss": 0.2904, "step": 3681 }, { "epoch": 0.29170132699544465, "grad_norm": 1.8226877878692371, "learning_rate": 1.6618140920716976e-05, "loss": 0.3205, "step": 3682 }, { "epoch": 0.29178055060408004, "grad_norm": 1.5582042564604095, "learning_rate": 1.6616217030977345e-05, "loss": 0.3537, "step": 3683 }, { "epoch": 0.29185977421271536, "grad_norm": 1.8693609962015734, "learning_rate": 1.6614292705591658e-05, "loss": 0.3738, "step": 3684 }, { "epoch": 0.29193899782135074, "grad_norm": 1.4929929643184192, "learning_rate": 1.6612367944686617e-05, "loss": 0.2502, "step": 3685 }, { "epoch": 0.2920182214299861, "grad_norm": 1.5231909340028893, "learning_rate": 1.6610442748388972e-05, "loss": 0.3516, "step": 3686 }, { "epoch": 0.2920974450386215, "grad_norm": 1.807927774894062, "learning_rate": 1.6608517116825473e-05, "loss": 0.305, "step": 3687 }, { "epoch": 0.2921766686472569, "grad_norm": 1.7156559061161423, "learning_rate": 1.6606591050122924e-05, "loss": 0.3707, "step": 3688 }, { "epoch": 0.29225589225589227, "grad_norm": 1.7756116117316123, "learning_rate": 1.660466454840814e-05, "loss": 0.3398, "step": 3689 }, { "epoch": 0.29233511586452765, "grad_norm": 1.4715483555346898, "learning_rate": 1.6602737611807975e-05, "loss": 0.2963, "step": 3690 }, { "epoch": 0.292414339473163, "grad_norm": 1.3492264844913355, "learning_rate": 1.660081024044931e-05, "loss": 0.3042, "step": 3691 }, { "epoch": 0.29249356308179836, "grad_norm": 1.5876136880426355, "learning_rate": 1.659888243445905e-05, "loss": 0.4111, "step": 3692 }, { "epoch": 0.29257278669043374, "grad_norm": 1.6843865515874483, "learning_rate": 1.6596954193964136e-05, "loss": 0.347, "step": 3693 }, { "epoch": 0.2926520102990691, "grad_norm": 1.6159047178595056, "learning_rate": 1.659502551909153e-05, "loss": 0.2566, "step": 3694 }, { "epoch": 0.2927312339077045, "grad_norm": 1.6326633787297915, "learning_rate": 1.6593096409968227e-05, "loss": 0.4207, "step": 3695 }, { "epoch": 0.2928104575163399, "grad_norm": 1.6312351788466415, "learning_rate": 1.6591166866721247e-05, "loss": 0.2902, "step": 3696 }, { "epoch": 0.29288968112497527, "grad_norm": 1.6743139956406583, "learning_rate": 1.658923688947765e-05, "loss": 0.4262, "step": 3697 }, { "epoch": 0.2929689047336106, "grad_norm": 1.594665167211578, "learning_rate": 1.6587306478364502e-05, "loss": 0.3031, "step": 3698 }, { "epoch": 0.293048128342246, "grad_norm": 1.8226401098855778, "learning_rate": 1.658537563350892e-05, "loss": 0.3209, "step": 3699 }, { "epoch": 0.29312735195088135, "grad_norm": 1.9183583484565196, "learning_rate": 1.6583444355038042e-05, "loss": 0.4386, "step": 3700 }, { "epoch": 0.29320657555951674, "grad_norm": 1.2217138579713387, "learning_rate": 1.6581512643079028e-05, "loss": 0.2286, "step": 3701 }, { "epoch": 0.2932857991681521, "grad_norm": 1.7245372312858804, "learning_rate": 1.657958049775908e-05, "loss": 0.2874, "step": 3702 }, { "epoch": 0.2933650227767875, "grad_norm": 1.5545081582187148, "learning_rate": 1.6577647919205407e-05, "loss": 0.2811, "step": 3703 }, { "epoch": 0.2934442463854229, "grad_norm": 1.7646696527010508, "learning_rate": 1.6575714907545272e-05, "loss": 0.3848, "step": 3704 }, { "epoch": 0.2935234699940582, "grad_norm": 1.4763703725637947, "learning_rate": 1.6573781462905954e-05, "loss": 0.269, "step": 3705 }, { "epoch": 0.2936026936026936, "grad_norm": 1.466020055182049, "learning_rate": 1.6571847585414754e-05, "loss": 0.3398, "step": 3706 }, { "epoch": 0.29368191721132897, "grad_norm": 1.6648461105635022, "learning_rate": 1.6569913275199013e-05, "loss": 0.4153, "step": 3707 }, { "epoch": 0.29376114081996435, "grad_norm": 1.6762880173918289, "learning_rate": 1.6567978532386094e-05, "loss": 0.3389, "step": 3708 }, { "epoch": 0.29384036442859973, "grad_norm": 1.4084573794364987, "learning_rate": 1.6566043357103393e-05, "loss": 0.3095, "step": 3709 }, { "epoch": 0.2939195880372351, "grad_norm": 1.5264587159337366, "learning_rate": 1.656410774947833e-05, "loss": 0.3541, "step": 3710 }, { "epoch": 0.2939988116458705, "grad_norm": 1.7712955241935733, "learning_rate": 1.6562171709638355e-05, "loss": 0.4035, "step": 3711 }, { "epoch": 0.2940780352545058, "grad_norm": 1.650402565242953, "learning_rate": 1.656023523771095e-05, "loss": 0.3859, "step": 3712 }, { "epoch": 0.2941572588631412, "grad_norm": 1.457668188975639, "learning_rate": 1.655829833382362e-05, "loss": 0.3065, "step": 3713 }, { "epoch": 0.2942364824717766, "grad_norm": 1.7683403141845395, "learning_rate": 1.6556360998103903e-05, "loss": 0.4861, "step": 3714 }, { "epoch": 0.29431570608041197, "grad_norm": 1.3387389380927102, "learning_rate": 1.655442323067936e-05, "loss": 0.3494, "step": 3715 }, { "epoch": 0.29439492968904735, "grad_norm": 1.5460035560143075, "learning_rate": 1.6552485031677586e-05, "loss": 0.3607, "step": 3716 }, { "epoch": 0.29447415329768273, "grad_norm": 1.5644628630902204, "learning_rate": 1.65505464012262e-05, "loss": 0.3323, "step": 3717 }, { "epoch": 0.2945533769063181, "grad_norm": 1.5263992181145956, "learning_rate": 1.6548607339452853e-05, "loss": 0.2947, "step": 3718 }, { "epoch": 0.29463260051495344, "grad_norm": 1.680623589400432, "learning_rate": 1.6546667846485224e-05, "loss": 0.3836, "step": 3719 }, { "epoch": 0.2947118241235888, "grad_norm": 1.5645426107179712, "learning_rate": 1.6544727922451014e-05, "loss": 0.3296, "step": 3720 }, { "epoch": 0.2947910477322242, "grad_norm": 1.5798023138428012, "learning_rate": 1.654278756747796e-05, "loss": 0.3759, "step": 3721 }, { "epoch": 0.2948702713408596, "grad_norm": 1.6448449681708475, "learning_rate": 1.6540846781693837e-05, "loss": 0.3258, "step": 3722 }, { "epoch": 0.29494949494949496, "grad_norm": 1.692579158191838, "learning_rate": 1.6538905565226416e-05, "loss": 0.3276, "step": 3723 }, { "epoch": 0.29502871855813034, "grad_norm": 1.157165029471748, "learning_rate": 1.6536963918203532e-05, "loss": 0.3096, "step": 3724 }, { "epoch": 0.29510794216676567, "grad_norm": 1.5351515561844389, "learning_rate": 1.6535021840753026e-05, "loss": 0.3423, "step": 3725 }, { "epoch": 0.29518716577540105, "grad_norm": 1.626572147130463, "learning_rate": 1.6533079333002775e-05, "loss": 0.45, "step": 3726 }, { "epoch": 0.29526638938403643, "grad_norm": 1.4759940209148366, "learning_rate": 1.6531136395080687e-05, "loss": 0.3755, "step": 3727 }, { "epoch": 0.2953456129926718, "grad_norm": 1.6047735388780962, "learning_rate": 1.6529193027114692e-05, "loss": 0.4001, "step": 3728 }, { "epoch": 0.2954248366013072, "grad_norm": 1.6936689428996885, "learning_rate": 1.6527249229232754e-05, "loss": 0.4437, "step": 3729 }, { "epoch": 0.2955040602099426, "grad_norm": 1.5483223620704214, "learning_rate": 1.652530500156286e-05, "loss": 0.2854, "step": 3730 }, { "epoch": 0.29558328381857796, "grad_norm": 1.9279758485809468, "learning_rate": 1.652336034423303e-05, "loss": 0.4761, "step": 3731 }, { "epoch": 0.2956625074272133, "grad_norm": 1.3885258750564502, "learning_rate": 1.6521415257371312e-05, "loss": 0.2676, "step": 3732 }, { "epoch": 0.29574173103584867, "grad_norm": 2.0173393527273866, "learning_rate": 1.6519469741105777e-05, "loss": 0.4428, "step": 3733 }, { "epoch": 0.29582095464448405, "grad_norm": 1.2769367437849652, "learning_rate": 1.6517523795564527e-05, "loss": 0.3903, "step": 3734 }, { "epoch": 0.29590017825311943, "grad_norm": 1.4168831246146063, "learning_rate": 1.6515577420875698e-05, "loss": 0.2817, "step": 3735 }, { "epoch": 0.2959794018617548, "grad_norm": 1.4590632301807038, "learning_rate": 1.6513630617167446e-05, "loss": 0.4161, "step": 3736 }, { "epoch": 0.2960586254703902, "grad_norm": 1.4957658708651385, "learning_rate": 1.6511683384567957e-05, "loss": 0.3314, "step": 3737 }, { "epoch": 0.2961378490790256, "grad_norm": 1.6931108922356175, "learning_rate": 1.6509735723205453e-05, "loss": 0.3407, "step": 3738 }, { "epoch": 0.2962170726876609, "grad_norm": 1.9257459743957412, "learning_rate": 1.6507787633208173e-05, "loss": 0.4885, "step": 3739 }, { "epoch": 0.2962962962962963, "grad_norm": 1.4618563336714918, "learning_rate": 1.650583911470439e-05, "loss": 0.2855, "step": 3740 }, { "epoch": 0.29637551990493166, "grad_norm": 1.578070839901341, "learning_rate": 1.6503890167822406e-05, "loss": 0.4354, "step": 3741 }, { "epoch": 0.29645474351356704, "grad_norm": 1.625889368351407, "learning_rate": 1.6501940792690547e-05, "loss": 0.3298, "step": 3742 }, { "epoch": 0.2965339671222024, "grad_norm": 2.0722239035668673, "learning_rate": 1.6499990989437177e-05, "loss": 0.4087, "step": 3743 }, { "epoch": 0.2966131907308378, "grad_norm": 1.7203826844700671, "learning_rate": 1.6498040758190673e-05, "loss": 0.3445, "step": 3744 }, { "epoch": 0.2966924143394732, "grad_norm": 1.5562219947285774, "learning_rate": 1.6496090099079452e-05, "loss": 0.3462, "step": 3745 }, { "epoch": 0.2967716379481085, "grad_norm": 1.5974700535982191, "learning_rate": 1.6494139012231954e-05, "loss": 0.2918, "step": 3746 }, { "epoch": 0.2968508615567439, "grad_norm": 1.8451398968677453, "learning_rate": 1.6492187497776654e-05, "loss": 0.4844, "step": 3747 }, { "epoch": 0.2969300851653793, "grad_norm": 1.3668940341916167, "learning_rate": 1.6490235555842044e-05, "loss": 0.286, "step": 3748 }, { "epoch": 0.29700930877401466, "grad_norm": 1.6436202528195933, "learning_rate": 1.6488283186556648e-05, "loss": 0.3584, "step": 3749 }, { "epoch": 0.29708853238265004, "grad_norm": 1.4993676939339717, "learning_rate": 1.6486330390049027e-05, "loss": 0.465, "step": 3750 }, { "epoch": 0.2971677559912854, "grad_norm": 1.7093675941475246, "learning_rate": 1.648437716644776e-05, "loss": 0.3096, "step": 3751 }, { "epoch": 0.2972469795999208, "grad_norm": 2.0648495925544057, "learning_rate": 1.6482423515881455e-05, "loss": 0.4456, "step": 3752 }, { "epoch": 0.29732620320855613, "grad_norm": 1.4552570422854636, "learning_rate": 1.6480469438478756e-05, "loss": 0.2939, "step": 3753 }, { "epoch": 0.2974054268171915, "grad_norm": 1.6662426434991227, "learning_rate": 1.6478514934368326e-05, "loss": 0.3757, "step": 3754 }, { "epoch": 0.2974846504258269, "grad_norm": 1.5962851467094807, "learning_rate": 1.647656000367886e-05, "loss": 0.3251, "step": 3755 }, { "epoch": 0.2975638740344623, "grad_norm": 1.5103269275543294, "learning_rate": 1.647460464653908e-05, "loss": 0.3744, "step": 3756 }, { "epoch": 0.29764309764309765, "grad_norm": 1.5483971219441284, "learning_rate": 1.6472648863077737e-05, "loss": 0.3312, "step": 3757 }, { "epoch": 0.29772232125173304, "grad_norm": 1.4794407460995247, "learning_rate": 1.6470692653423614e-05, "loss": 0.3563, "step": 3758 }, { "epoch": 0.2978015448603684, "grad_norm": 1.5927313826335552, "learning_rate": 1.6468736017705515e-05, "loss": 0.4335, "step": 3759 }, { "epoch": 0.29788076846900374, "grad_norm": 1.5609038027387239, "learning_rate": 1.646677895605227e-05, "loss": 0.3471, "step": 3760 }, { "epoch": 0.2979599920776391, "grad_norm": 1.5743584159085167, "learning_rate": 1.6464821468592748e-05, "loss": 0.4019, "step": 3761 }, { "epoch": 0.2980392156862745, "grad_norm": 1.5769263913700893, "learning_rate": 1.646286355545584e-05, "loss": 0.3243, "step": 3762 }, { "epoch": 0.2981184392949099, "grad_norm": 1.4067949846430399, "learning_rate": 1.6460905216770467e-05, "loss": 0.3049, "step": 3763 }, { "epoch": 0.29819766290354527, "grad_norm": 1.6794696717920552, "learning_rate": 1.6458946452665573e-05, "loss": 0.4214, "step": 3764 }, { "epoch": 0.29827688651218065, "grad_norm": 1.7329479945034956, "learning_rate": 1.6456987263270132e-05, "loss": 0.3726, "step": 3765 }, { "epoch": 0.298356110120816, "grad_norm": 1.714100066221399, "learning_rate": 1.645502764871315e-05, "loss": 0.4985, "step": 3766 }, { "epoch": 0.29843533372945136, "grad_norm": 1.6324421847987027, "learning_rate": 1.6453067609123656e-05, "loss": 0.4205, "step": 3767 }, { "epoch": 0.29851455733808674, "grad_norm": 1.7559885452665687, "learning_rate": 1.6451107144630708e-05, "loss": 0.457, "step": 3768 }, { "epoch": 0.2985937809467221, "grad_norm": 1.4335785231140599, "learning_rate": 1.6449146255363395e-05, "loss": 0.3186, "step": 3769 }, { "epoch": 0.2986730045553575, "grad_norm": 1.436299620477261, "learning_rate": 1.6447184941450833e-05, "loss": 0.2621, "step": 3770 }, { "epoch": 0.2987522281639929, "grad_norm": 1.5070547075831144, "learning_rate": 1.644522320302217e-05, "loss": 0.4093, "step": 3771 }, { "epoch": 0.29883145177262826, "grad_norm": 1.6426450608368386, "learning_rate": 1.6443261040206566e-05, "loss": 0.346, "step": 3772 }, { "epoch": 0.2989106753812636, "grad_norm": 1.753915088153408, "learning_rate": 1.6441298453133224e-05, "loss": 0.4704, "step": 3773 }, { "epoch": 0.298989898989899, "grad_norm": 1.624887100688437, "learning_rate": 1.6439335441931376e-05, "loss": 0.3534, "step": 3774 }, { "epoch": 0.29906912259853435, "grad_norm": 1.335493481898599, "learning_rate": 1.6437372006730276e-05, "loss": 0.2843, "step": 3775 }, { "epoch": 0.29914834620716974, "grad_norm": 1.6182677105252745, "learning_rate": 1.64354081476592e-05, "loss": 0.4009, "step": 3776 }, { "epoch": 0.2992275698158051, "grad_norm": 1.6379374535945792, "learning_rate": 1.643344386484746e-05, "loss": 0.4189, "step": 3777 }, { "epoch": 0.2993067934244405, "grad_norm": 1.4489817796737543, "learning_rate": 1.64314791584244e-05, "loss": 0.3679, "step": 3778 }, { "epoch": 0.2993860170330759, "grad_norm": 1.4954248788830213, "learning_rate": 1.6429514028519383e-05, "loss": 0.302, "step": 3779 }, { "epoch": 0.2994652406417112, "grad_norm": 1.3727648316046877, "learning_rate": 1.6427548475261807e-05, "loss": 0.3157, "step": 3780 }, { "epoch": 0.2995444642503466, "grad_norm": 1.6889827463718825, "learning_rate": 1.642558249878109e-05, "loss": 0.3195, "step": 3781 }, { "epoch": 0.29962368785898197, "grad_norm": 2.0113385117015583, "learning_rate": 1.642361609920668e-05, "loss": 0.346, "step": 3782 }, { "epoch": 0.29970291146761735, "grad_norm": 1.3005111421180062, "learning_rate": 1.6421649276668065e-05, "loss": 0.236, "step": 3783 }, { "epoch": 0.29978213507625273, "grad_norm": 1.814189959560917, "learning_rate": 1.641968203129474e-05, "loss": 0.3674, "step": 3784 }, { "epoch": 0.2998613586848881, "grad_norm": 1.3463800810596662, "learning_rate": 1.641771436321624e-05, "loss": 0.2878, "step": 3785 }, { "epoch": 0.2999405822935235, "grad_norm": 1.5277775792857364, "learning_rate": 1.6415746272562133e-05, "loss": 0.3004, "step": 3786 }, { "epoch": 0.3000198059021588, "grad_norm": 1.7202398641078458, "learning_rate": 1.6413777759462005e-05, "loss": 0.3964, "step": 3787 }, { "epoch": 0.3000990295107942, "grad_norm": 1.5780142294976065, "learning_rate": 1.6411808824045472e-05, "loss": 0.4101, "step": 3788 }, { "epoch": 0.3001782531194296, "grad_norm": 1.8318551528148257, "learning_rate": 1.640983946644218e-05, "loss": 0.4305, "step": 3789 }, { "epoch": 0.30025747672806496, "grad_norm": 1.5673051254852584, "learning_rate": 1.64078696867818e-05, "loss": 0.2752, "step": 3790 }, { "epoch": 0.30033670033670035, "grad_norm": 1.514300134962165, "learning_rate": 1.6405899485194034e-05, "loss": 0.2852, "step": 3791 }, { "epoch": 0.3004159239453357, "grad_norm": 1.7127679297603502, "learning_rate": 1.640392886180861e-05, "loss": 0.413, "step": 3792 }, { "epoch": 0.3004951475539711, "grad_norm": 1.608088617741294, "learning_rate": 1.6401957816755286e-05, "loss": 0.3283, "step": 3793 }, { "epoch": 0.30057437116260644, "grad_norm": 1.519048287661634, "learning_rate": 1.6399986350163844e-05, "loss": 0.357, "step": 3794 }, { "epoch": 0.3006535947712418, "grad_norm": 1.7846920775902686, "learning_rate": 1.6398014462164093e-05, "loss": 0.3559, "step": 3795 }, { "epoch": 0.3007328183798772, "grad_norm": 1.515741033757868, "learning_rate": 1.6396042152885874e-05, "loss": 0.303, "step": 3796 }, { "epoch": 0.3008120419885126, "grad_norm": 1.5575963897544893, "learning_rate": 1.639406942245906e-05, "loss": 0.361, "step": 3797 }, { "epoch": 0.30089126559714796, "grad_norm": 1.5934799767005252, "learning_rate": 1.639209627101354e-05, "loss": 0.4425, "step": 3798 }, { "epoch": 0.30097048920578334, "grad_norm": 1.9555295092338374, "learning_rate": 1.6390122698679234e-05, "loss": 0.31, "step": 3799 }, { "epoch": 0.3010497128144187, "grad_norm": 1.4861878242769602, "learning_rate": 1.6388148705586097e-05, "loss": 0.3422, "step": 3800 }, { "epoch": 0.30112893642305405, "grad_norm": 1.6141246381221648, "learning_rate": 1.6386174291864106e-05, "loss": 0.3316, "step": 3801 }, { "epoch": 0.30120816003168943, "grad_norm": 1.6918582588963165, "learning_rate": 1.6384199457643264e-05, "loss": 0.3889, "step": 3802 }, { "epoch": 0.3012873836403248, "grad_norm": 1.961692310085184, "learning_rate": 1.6382224203053607e-05, "loss": 0.32, "step": 3803 }, { "epoch": 0.3013666072489602, "grad_norm": 1.3305776959827942, "learning_rate": 1.6380248528225197e-05, "loss": 0.2861, "step": 3804 }, { "epoch": 0.3014458308575956, "grad_norm": 1.487523496772002, "learning_rate": 1.6378272433288122e-05, "loss": 0.3223, "step": 3805 }, { "epoch": 0.30152505446623096, "grad_norm": 1.8278446964295654, "learning_rate": 1.6376295918372495e-05, "loss": 0.4469, "step": 3806 }, { "epoch": 0.3016042780748663, "grad_norm": 1.6868377234102636, "learning_rate": 1.6374318983608464e-05, "loss": 0.5389, "step": 3807 }, { "epoch": 0.30168350168350166, "grad_norm": 2.084304915031796, "learning_rate": 1.63723416291262e-05, "loss": 0.392, "step": 3808 }, { "epoch": 0.30176272529213705, "grad_norm": 1.7967013467244641, "learning_rate": 1.63703638550559e-05, "loss": 0.3478, "step": 3809 }, { "epoch": 0.3018419489007724, "grad_norm": 1.8273655052356867, "learning_rate": 1.6368385661527795e-05, "loss": 0.3582, "step": 3810 }, { "epoch": 0.3019211725094078, "grad_norm": 1.6828164947296698, "learning_rate": 1.6366407048672135e-05, "loss": 0.2921, "step": 3811 }, { "epoch": 0.3020003961180432, "grad_norm": 1.5472148067232565, "learning_rate": 1.6364428016619202e-05, "loss": 0.3374, "step": 3812 }, { "epoch": 0.30207961972667857, "grad_norm": 1.5730655180342485, "learning_rate": 1.636244856549931e-05, "loss": 0.3068, "step": 3813 }, { "epoch": 0.3021588433353139, "grad_norm": 1.4756547347443372, "learning_rate": 1.6360468695442797e-05, "loss": 0.359, "step": 3814 }, { "epoch": 0.3022380669439493, "grad_norm": 1.7130972831106233, "learning_rate": 1.6358488406580023e-05, "loss": 0.4333, "step": 3815 }, { "epoch": 0.30231729055258466, "grad_norm": 1.6734596236717552, "learning_rate": 1.635650769904138e-05, "loss": 0.4587, "step": 3816 }, { "epoch": 0.30239651416122004, "grad_norm": 1.729405056866127, "learning_rate": 1.6354526572957292e-05, "loss": 0.4987, "step": 3817 }, { "epoch": 0.3024757377698554, "grad_norm": 1.6373355510413856, "learning_rate": 1.6352545028458206e-05, "loss": 0.4399, "step": 3818 }, { "epoch": 0.3025549613784908, "grad_norm": 1.4211971196948625, "learning_rate": 1.6350563065674596e-05, "loss": 0.2919, "step": 3819 }, { "epoch": 0.3026341849871262, "grad_norm": 1.6523017112193026, "learning_rate": 1.6348580684736962e-05, "loss": 0.4078, "step": 3820 }, { "epoch": 0.3027134085957615, "grad_norm": 1.3075951775593628, "learning_rate": 1.6346597885775843e-05, "loss": 0.2245, "step": 3821 }, { "epoch": 0.3027926322043969, "grad_norm": 1.6879622665776723, "learning_rate": 1.6344614668921787e-05, "loss": 0.4017, "step": 3822 }, { "epoch": 0.3028718558130323, "grad_norm": 1.6495169799144414, "learning_rate": 1.6342631034305386e-05, "loss": 0.4104, "step": 3823 }, { "epoch": 0.30295107942166766, "grad_norm": 1.55359658127163, "learning_rate": 1.634064698205725e-05, "loss": 0.3575, "step": 3824 }, { "epoch": 0.30303030303030304, "grad_norm": 1.3333725357229693, "learning_rate": 1.6338662512308013e-05, "loss": 0.3173, "step": 3825 }, { "epoch": 0.3031095266389384, "grad_norm": 1.4457097277733202, "learning_rate": 1.6336677625188357e-05, "loss": 0.3363, "step": 3826 }, { "epoch": 0.3031887502475738, "grad_norm": 1.555676829281651, "learning_rate": 1.6334692320828968e-05, "loss": 0.3219, "step": 3827 }, { "epoch": 0.3032679738562091, "grad_norm": 1.3530281421596255, "learning_rate": 1.6332706599360568e-05, "loss": 0.306, "step": 3828 }, { "epoch": 0.3033471974648445, "grad_norm": 1.6454117978423115, "learning_rate": 1.633072046091391e-05, "loss": 0.3432, "step": 3829 }, { "epoch": 0.3034264210734799, "grad_norm": 1.3930626696608348, "learning_rate": 1.6328733905619775e-05, "loss": 0.289, "step": 3830 }, { "epoch": 0.30350564468211527, "grad_norm": 1.7497257245471574, "learning_rate": 1.632674693360896e-05, "loss": 0.3326, "step": 3831 }, { "epoch": 0.30358486829075065, "grad_norm": 1.6349530365308782, "learning_rate": 1.6324759545012306e-05, "loss": 0.3893, "step": 3832 }, { "epoch": 0.30366409189938603, "grad_norm": 1.4708960442420949, "learning_rate": 1.6322771739960664e-05, "loss": 0.3058, "step": 3833 }, { "epoch": 0.3037433155080214, "grad_norm": 1.4778862415043623, "learning_rate": 1.6320783518584926e-05, "loss": 0.3498, "step": 3834 }, { "epoch": 0.30382253911665674, "grad_norm": 1.5604297876082858, "learning_rate": 1.631879488101601e-05, "loss": 0.4417, "step": 3835 }, { "epoch": 0.3039017627252921, "grad_norm": 1.4588366078489061, "learning_rate": 1.6316805827384856e-05, "loss": 0.3357, "step": 3836 }, { "epoch": 0.3039809863339275, "grad_norm": 1.8178459538402403, "learning_rate": 1.631481635782243e-05, "loss": 0.4265, "step": 3837 }, { "epoch": 0.3040602099425629, "grad_norm": 1.5781182190565286, "learning_rate": 1.631282647245973e-05, "loss": 0.352, "step": 3838 }, { "epoch": 0.30413943355119827, "grad_norm": 1.6039945513928018, "learning_rate": 1.6310836171427788e-05, "loss": 0.3298, "step": 3839 }, { "epoch": 0.30421865715983365, "grad_norm": 1.846700518571357, "learning_rate": 1.6308845454857647e-05, "loss": 0.387, "step": 3840 }, { "epoch": 0.30429788076846903, "grad_norm": 1.2115120647603959, "learning_rate": 1.6306854322880386e-05, "loss": 0.2871, "step": 3841 }, { "epoch": 0.30437710437710436, "grad_norm": 3.2410836256660036, "learning_rate": 1.630486277562712e-05, "loss": 0.4369, "step": 3842 }, { "epoch": 0.30445632798573974, "grad_norm": 1.671704239532948, "learning_rate": 1.6302870813228974e-05, "loss": 0.4362, "step": 3843 }, { "epoch": 0.3045355515943751, "grad_norm": 1.6085542460584332, "learning_rate": 1.6300878435817115e-05, "loss": 0.3678, "step": 3844 }, { "epoch": 0.3046147752030105, "grad_norm": 1.6325921389417042, "learning_rate": 1.6298885643522724e-05, "loss": 0.3531, "step": 3845 }, { "epoch": 0.3046939988116459, "grad_norm": 1.3342545808697894, "learning_rate": 1.6296892436477024e-05, "loss": 0.2439, "step": 3846 }, { "epoch": 0.30477322242028126, "grad_norm": 1.7166473609594897, "learning_rate": 1.6294898814811258e-05, "loss": 0.3329, "step": 3847 }, { "epoch": 0.3048524460289166, "grad_norm": 1.873728421446356, "learning_rate": 1.629290477865669e-05, "loss": 0.3875, "step": 3848 }, { "epoch": 0.30493166963755197, "grad_norm": 1.8667987810816882, "learning_rate": 1.6290910328144627e-05, "loss": 0.3824, "step": 3849 }, { "epoch": 0.30501089324618735, "grad_norm": 1.6637950329647817, "learning_rate": 1.6288915463406386e-05, "loss": 0.3281, "step": 3850 }, { "epoch": 0.30509011685482273, "grad_norm": 1.8548886263458098, "learning_rate": 1.6286920184573324e-05, "loss": 0.4248, "step": 3851 }, { "epoch": 0.3051693404634581, "grad_norm": 1.489059185016581, "learning_rate": 1.6284924491776815e-05, "loss": 0.3011, "step": 3852 }, { "epoch": 0.3052485640720935, "grad_norm": 1.8782566040331756, "learning_rate": 1.6282928385148273e-05, "loss": 0.4654, "step": 3853 }, { "epoch": 0.3053277876807289, "grad_norm": 1.6943009082616145, "learning_rate": 1.6280931864819125e-05, "loss": 0.3491, "step": 3854 }, { "epoch": 0.3054070112893642, "grad_norm": 1.8388427081702123, "learning_rate": 1.6278934930920834e-05, "loss": 0.535, "step": 3855 }, { "epoch": 0.3054862348979996, "grad_norm": 1.570905121726635, "learning_rate": 1.6276937583584895e-05, "loss": 0.3378, "step": 3856 }, { "epoch": 0.30556545850663497, "grad_norm": 1.624265042521842, "learning_rate": 1.6274939822942818e-05, "loss": 0.3788, "step": 3857 }, { "epoch": 0.30564468211527035, "grad_norm": 1.6533235915875253, "learning_rate": 1.6272941649126146e-05, "loss": 0.3481, "step": 3858 }, { "epoch": 0.30572390572390573, "grad_norm": 1.8394126429612057, "learning_rate": 1.627094306226645e-05, "loss": 0.4744, "step": 3859 }, { "epoch": 0.3058031293325411, "grad_norm": 1.6354535733095188, "learning_rate": 1.6268944062495324e-05, "loss": 0.4803, "step": 3860 }, { "epoch": 0.3058823529411765, "grad_norm": 1.800699104887716, "learning_rate": 1.62669446499444e-05, "loss": 0.3522, "step": 3861 }, { "epoch": 0.3059615765498118, "grad_norm": 1.4575471666967872, "learning_rate": 1.6264944824745326e-05, "loss": 0.3918, "step": 3862 }, { "epoch": 0.3060408001584472, "grad_norm": 1.518255624908039, "learning_rate": 1.6262944587029777e-05, "loss": 0.3738, "step": 3863 }, { "epoch": 0.3061200237670826, "grad_norm": 1.6706320996404647, "learning_rate": 1.6260943936929462e-05, "loss": 0.3605, "step": 3864 }, { "epoch": 0.30619924737571796, "grad_norm": 1.9328751141669256, "learning_rate": 1.6258942874576117e-05, "loss": 0.405, "step": 3865 }, { "epoch": 0.30627847098435335, "grad_norm": 1.9661923984908691, "learning_rate": 1.62569414001015e-05, "loss": 0.438, "step": 3866 }, { "epoch": 0.3063576945929887, "grad_norm": 1.4170629236575045, "learning_rate": 1.6254939513637397e-05, "loss": 0.2903, "step": 3867 }, { "epoch": 0.3064369182016241, "grad_norm": 1.159523129000082, "learning_rate": 1.6252937215315622e-05, "loss": 0.2969, "step": 3868 }, { "epoch": 0.30651614181025943, "grad_norm": 1.8377983854490623, "learning_rate": 1.6250934505268025e-05, "loss": 0.3265, "step": 3869 }, { "epoch": 0.3065953654188948, "grad_norm": 1.4498893510292903, "learning_rate": 1.6248931383626464e-05, "loss": 0.3737, "step": 3870 }, { "epoch": 0.3066745890275302, "grad_norm": 1.7024558999697288, "learning_rate": 1.6246927850522837e-05, "loss": 0.3777, "step": 3871 }, { "epoch": 0.3067538126361656, "grad_norm": 2.3036091533260517, "learning_rate": 1.624492390608907e-05, "loss": 0.3056, "step": 3872 }, { "epoch": 0.30683303624480096, "grad_norm": 1.5073818451059444, "learning_rate": 1.6242919550457116e-05, "loss": 0.3244, "step": 3873 }, { "epoch": 0.30691225985343634, "grad_norm": 1.4083047702812181, "learning_rate": 1.6240914783758946e-05, "loss": 0.31, "step": 3874 }, { "epoch": 0.3069914834620717, "grad_norm": 2.0002940997862138, "learning_rate": 1.6238909606126568e-05, "loss": 0.4826, "step": 3875 }, { "epoch": 0.30707070707070705, "grad_norm": 1.3655340967693788, "learning_rate": 1.6236904017692016e-05, "loss": 0.3025, "step": 3876 }, { "epoch": 0.30714993067934243, "grad_norm": 1.307926462504353, "learning_rate": 1.6234898018587336e-05, "loss": 0.2097, "step": 3877 }, { "epoch": 0.3072291542879778, "grad_norm": 1.597160466642504, "learning_rate": 1.6232891608944627e-05, "loss": 0.5153, "step": 3878 }, { "epoch": 0.3073083778966132, "grad_norm": 1.6280797024562519, "learning_rate": 1.6230884788895998e-05, "loss": 0.3942, "step": 3879 }, { "epoch": 0.3073876015052486, "grad_norm": 1.8013277125103866, "learning_rate": 1.622887755857358e-05, "loss": 0.4869, "step": 3880 }, { "epoch": 0.30746682511388396, "grad_norm": 1.8721027773073764, "learning_rate": 1.6226869918109553e-05, "loss": 0.4217, "step": 3881 }, { "epoch": 0.30754604872251934, "grad_norm": 1.4823194796731884, "learning_rate": 1.62248618676361e-05, "loss": 0.3684, "step": 3882 }, { "epoch": 0.30762527233115466, "grad_norm": 1.7362565226495277, "learning_rate": 1.6222853407285447e-05, "loss": 0.3745, "step": 3883 }, { "epoch": 0.30770449593979005, "grad_norm": 1.5033862134684643, "learning_rate": 1.622084453718984e-05, "loss": 0.3089, "step": 3884 }, { "epoch": 0.3077837195484254, "grad_norm": 1.7048923056998178, "learning_rate": 1.621883525748155e-05, "loss": 0.4797, "step": 3885 }, { "epoch": 0.3078629431570608, "grad_norm": 1.3406782417977685, "learning_rate": 1.6216825568292885e-05, "loss": 0.2417, "step": 3886 }, { "epoch": 0.3079421667656962, "grad_norm": 2.097270834261255, "learning_rate": 1.6214815469756165e-05, "loss": 0.4651, "step": 3887 }, { "epoch": 0.30802139037433157, "grad_norm": 1.5855389651706662, "learning_rate": 1.6212804962003757e-05, "loss": 0.3393, "step": 3888 }, { "epoch": 0.3081006139829669, "grad_norm": 1.7502683201692009, "learning_rate": 1.6210794045168033e-05, "loss": 0.4894, "step": 3889 }, { "epoch": 0.3081798375916023, "grad_norm": 1.7314268439339218, "learning_rate": 1.6208782719381403e-05, "loss": 0.4039, "step": 3890 }, { "epoch": 0.30825906120023766, "grad_norm": 1.7203426537643356, "learning_rate": 1.6206770984776307e-05, "loss": 0.3743, "step": 3891 }, { "epoch": 0.30833828480887304, "grad_norm": 1.7270705470709433, "learning_rate": 1.620475884148521e-05, "loss": 0.3745, "step": 3892 }, { "epoch": 0.3084175084175084, "grad_norm": 1.7493818191080077, "learning_rate": 1.6202746289640594e-05, "loss": 0.3071, "step": 3893 }, { "epoch": 0.3084967320261438, "grad_norm": 1.559848287006385, "learning_rate": 1.620073332937498e-05, "loss": 0.4243, "step": 3894 }, { "epoch": 0.3085759556347792, "grad_norm": 1.7019075087887992, "learning_rate": 1.6198719960820917e-05, "loss": 0.2881, "step": 3895 }, { "epoch": 0.3086551792434145, "grad_norm": 1.481829633295651, "learning_rate": 1.619670618411097e-05, "loss": 0.4019, "step": 3896 }, { "epoch": 0.3087344028520499, "grad_norm": 1.5701358815567066, "learning_rate": 1.6194691999377736e-05, "loss": 0.3249, "step": 3897 }, { "epoch": 0.3088136264606853, "grad_norm": 1.6538956280133923, "learning_rate": 1.619267740675384e-05, "loss": 0.3555, "step": 3898 }, { "epoch": 0.30889285006932066, "grad_norm": 1.2012242832602773, "learning_rate": 1.6190662406371937e-05, "loss": 0.2477, "step": 3899 }, { "epoch": 0.30897207367795604, "grad_norm": 1.6532369613693252, "learning_rate": 1.6188646998364703e-05, "loss": 0.3867, "step": 3900 }, { "epoch": 0.3090512972865914, "grad_norm": 1.5200424322822466, "learning_rate": 1.6186631182864835e-05, "loss": 0.3138, "step": 3901 }, { "epoch": 0.3091305208952268, "grad_norm": 1.4371225729579056, "learning_rate": 1.6184614960005078e-05, "loss": 0.2851, "step": 3902 }, { "epoch": 0.3092097445038621, "grad_norm": 1.7941207407615067, "learning_rate": 1.6182598329918185e-05, "loss": 0.3511, "step": 3903 }, { "epoch": 0.3092889681124975, "grad_norm": 1.4622290904890745, "learning_rate": 1.6180581292736938e-05, "loss": 0.2585, "step": 3904 }, { "epoch": 0.3093681917211329, "grad_norm": 1.458089183011868, "learning_rate": 1.617856384859415e-05, "loss": 0.3436, "step": 3905 }, { "epoch": 0.30944741532976827, "grad_norm": 1.4393034561998665, "learning_rate": 1.6176545997622662e-05, "loss": 0.2405, "step": 3906 }, { "epoch": 0.30952663893840365, "grad_norm": 1.5693974162032396, "learning_rate": 1.6174527739955345e-05, "loss": 0.3897, "step": 3907 }, { "epoch": 0.30960586254703903, "grad_norm": 1.4801643184762672, "learning_rate": 1.6172509075725084e-05, "loss": 0.3065, "step": 3908 }, { "epoch": 0.3096850861556744, "grad_norm": 1.8318505679163875, "learning_rate": 1.61704900050648e-05, "loss": 0.3175, "step": 3909 }, { "epoch": 0.30976430976430974, "grad_norm": 1.769591033144959, "learning_rate": 1.616847052810744e-05, "loss": 0.4635, "step": 3910 }, { "epoch": 0.3098435333729451, "grad_norm": 1.5028956758737584, "learning_rate": 1.6166450644985975e-05, "loss": 0.3215, "step": 3911 }, { "epoch": 0.3099227569815805, "grad_norm": 1.5081839139914694, "learning_rate": 1.6164430355833407e-05, "loss": 0.3, "step": 3912 }, { "epoch": 0.3100019805902159, "grad_norm": 1.4043088547051357, "learning_rate": 1.616240966078276e-05, "loss": 0.3397, "step": 3913 }, { "epoch": 0.31008120419885127, "grad_norm": 1.452618257096568, "learning_rate": 1.616038855996709e-05, "loss": 0.2589, "step": 3914 }, { "epoch": 0.31016042780748665, "grad_norm": 2.615123288208856, "learning_rate": 1.6158367053519476e-05, "loss": 0.3309, "step": 3915 }, { "epoch": 0.31023965141612203, "grad_norm": 1.697277988130609, "learning_rate": 1.6156345141573022e-05, "loss": 0.3964, "step": 3916 }, { "epoch": 0.31031887502475736, "grad_norm": 1.8478504351393588, "learning_rate": 1.6154322824260865e-05, "loss": 0.4758, "step": 3917 }, { "epoch": 0.31039809863339274, "grad_norm": 1.7877943177008, "learning_rate": 1.615230010171616e-05, "loss": 0.3514, "step": 3918 }, { "epoch": 0.3104773222420281, "grad_norm": 1.5023807468932024, "learning_rate": 1.61502769740721e-05, "loss": 0.3596, "step": 3919 }, { "epoch": 0.3105565458506635, "grad_norm": 1.429157569810737, "learning_rate": 1.6148253441461887e-05, "loss": 0.2953, "step": 3920 }, { "epoch": 0.3106357694592989, "grad_norm": 1.550399195990936, "learning_rate": 1.6146229504018777e-05, "loss": 0.4026, "step": 3921 }, { "epoch": 0.31071499306793426, "grad_norm": 1.77126908240208, "learning_rate": 1.6144205161876023e-05, "loss": 0.4375, "step": 3922 }, { "epoch": 0.3107942166765696, "grad_norm": 1.4480220999658833, "learning_rate": 1.6142180415166926e-05, "loss": 0.2819, "step": 3923 }, { "epoch": 0.31087344028520497, "grad_norm": 1.9031412330115325, "learning_rate": 1.61401552640248e-05, "loss": 0.4306, "step": 3924 }, { "epoch": 0.31095266389384035, "grad_norm": 1.5539700436960762, "learning_rate": 1.6138129708582996e-05, "loss": 0.2819, "step": 3925 }, { "epoch": 0.31103188750247573, "grad_norm": 1.3850176489443191, "learning_rate": 1.6136103748974885e-05, "loss": 0.2509, "step": 3926 }, { "epoch": 0.3111111111111111, "grad_norm": 1.8014291452920574, "learning_rate": 1.6134077385333867e-05, "loss": 0.3507, "step": 3927 }, { "epoch": 0.3111903347197465, "grad_norm": 1.5178962006744077, "learning_rate": 1.613205061779337e-05, "loss": 0.3414, "step": 3928 }, { "epoch": 0.3112695583283819, "grad_norm": 1.434360336941996, "learning_rate": 1.6130023446486844e-05, "loss": 0.3486, "step": 3929 }, { "epoch": 0.3113487819370172, "grad_norm": 1.8797197326748423, "learning_rate": 1.612799587154777e-05, "loss": 0.4426, "step": 3930 }, { "epoch": 0.3114280055456526, "grad_norm": 1.4977091745133497, "learning_rate": 1.6125967893109657e-05, "loss": 0.2946, "step": 3931 }, { "epoch": 0.31150722915428797, "grad_norm": 1.577594247482412, "learning_rate": 1.6123939511306028e-05, "loss": 0.3169, "step": 3932 }, { "epoch": 0.31158645276292335, "grad_norm": 1.6050750522062, "learning_rate": 1.6121910726270453e-05, "loss": 0.3593, "step": 3933 }, { "epoch": 0.31166567637155873, "grad_norm": 1.7068248027418338, "learning_rate": 1.6119881538136514e-05, "loss": 0.3789, "step": 3934 }, { "epoch": 0.3117448999801941, "grad_norm": 1.5085712753038054, "learning_rate": 1.611785194703782e-05, "loss": 0.3435, "step": 3935 }, { "epoch": 0.3118241235888295, "grad_norm": 1.4848287162898604, "learning_rate": 1.6115821953108015e-05, "loss": 0.2968, "step": 3936 }, { "epoch": 0.3119033471974648, "grad_norm": 1.7332486526812132, "learning_rate": 1.611379155648076e-05, "loss": 0.3817, "step": 3937 }, { "epoch": 0.3119825708061002, "grad_norm": 1.4199092339714177, "learning_rate": 1.611176075728975e-05, "loss": 0.2963, "step": 3938 }, { "epoch": 0.3120617944147356, "grad_norm": 1.5233312762977607, "learning_rate": 1.61097295556687e-05, "loss": 0.3474, "step": 3939 }, { "epoch": 0.31214101802337096, "grad_norm": 1.6254257284513742, "learning_rate": 1.610769795175136e-05, "loss": 0.3002, "step": 3940 }, { "epoch": 0.31222024163200635, "grad_norm": 1.3463635393826112, "learning_rate": 1.6105665945671497e-05, "loss": 0.2301, "step": 3941 }, { "epoch": 0.3122994652406417, "grad_norm": 1.7120530973401842, "learning_rate": 1.610363353756291e-05, "loss": 0.374, "step": 3942 }, { "epoch": 0.3123786888492771, "grad_norm": 1.2062836633348841, "learning_rate": 1.6101600727559423e-05, "loss": 0.2285, "step": 3943 }, { "epoch": 0.31245791245791243, "grad_norm": 1.654651560809471, "learning_rate": 1.6099567515794886e-05, "loss": 0.3142, "step": 3944 }, { "epoch": 0.3125371360665478, "grad_norm": 1.35080593020383, "learning_rate": 1.609753390240318e-05, "loss": 0.2227, "step": 3945 }, { "epoch": 0.3126163596751832, "grad_norm": 1.5811421030776875, "learning_rate": 1.6095499887518204e-05, "loss": 0.2919, "step": 3946 }, { "epoch": 0.3126955832838186, "grad_norm": 1.6351038035110637, "learning_rate": 1.6093465471273894e-05, "loss": 0.4352, "step": 3947 }, { "epoch": 0.31277480689245396, "grad_norm": 1.6594707654159093, "learning_rate": 1.60914306538042e-05, "loss": 0.3711, "step": 3948 }, { "epoch": 0.31285403050108934, "grad_norm": 1.9952025371852224, "learning_rate": 1.6089395435243105e-05, "loss": 0.4503, "step": 3949 }, { "epoch": 0.3129332541097247, "grad_norm": 1.5302430177619408, "learning_rate": 1.6087359815724623e-05, "loss": 0.3939, "step": 3950 }, { "epoch": 0.31301247771836005, "grad_norm": 1.5069625663961839, "learning_rate": 1.6085323795382785e-05, "loss": 0.3061, "step": 3951 }, { "epoch": 0.31309170132699543, "grad_norm": 1.663772035447464, "learning_rate": 1.608328737435166e-05, "loss": 0.3456, "step": 3952 }, { "epoch": 0.3131709249356308, "grad_norm": 1.3213608162427135, "learning_rate": 1.608125055276533e-05, "loss": 0.2756, "step": 3953 }, { "epoch": 0.3132501485442662, "grad_norm": 1.672841048395237, "learning_rate": 1.607921333075791e-05, "loss": 0.3514, "step": 3954 }, { "epoch": 0.3133293721529016, "grad_norm": 1.8972876988121614, "learning_rate": 1.607717570846355e-05, "loss": 0.4897, "step": 3955 }, { "epoch": 0.31340859576153696, "grad_norm": 1.6312047293334224, "learning_rate": 1.6075137686016408e-05, "loss": 0.3069, "step": 3956 }, { "epoch": 0.31348781937017234, "grad_norm": 1.7777307357017949, "learning_rate": 1.6073099263550677e-05, "loss": 0.3673, "step": 3957 }, { "epoch": 0.31356704297880766, "grad_norm": 1.5043702946374968, "learning_rate": 1.6071060441200587e-05, "loss": 0.2619, "step": 3958 }, { "epoch": 0.31364626658744305, "grad_norm": 1.6649082876196961, "learning_rate": 1.6069021219100375e-05, "loss": 0.4012, "step": 3959 }, { "epoch": 0.3137254901960784, "grad_norm": 1.4555198485720622, "learning_rate": 1.606698159738432e-05, "loss": 0.2845, "step": 3960 }, { "epoch": 0.3138047138047138, "grad_norm": 1.5385885315447563, "learning_rate": 1.606494157618672e-05, "loss": 0.3841, "step": 3961 }, { "epoch": 0.3138839374133492, "grad_norm": 1.370743562057957, "learning_rate": 1.60629011556419e-05, "loss": 0.329, "step": 3962 }, { "epoch": 0.31396316102198457, "grad_norm": 1.3083362360324118, "learning_rate": 1.6060860335884208e-05, "loss": 0.2658, "step": 3963 }, { "epoch": 0.3140423846306199, "grad_norm": 1.6555569799844825, "learning_rate": 1.605881911704803e-05, "loss": 0.3806, "step": 3964 }, { "epoch": 0.3141216082392553, "grad_norm": 1.922946414992996, "learning_rate": 1.6056777499267764e-05, "loss": 0.4366, "step": 3965 }, { "epoch": 0.31420083184789066, "grad_norm": 1.5994553184309726, "learning_rate": 1.6054735482677842e-05, "loss": 0.3834, "step": 3966 }, { "epoch": 0.31428005545652604, "grad_norm": 1.5442959135058916, "learning_rate": 1.6052693067412724e-05, "loss": 0.2872, "step": 3967 }, { "epoch": 0.3143592790651614, "grad_norm": 1.3506474309480183, "learning_rate": 1.605065025360689e-05, "loss": 0.3089, "step": 3968 }, { "epoch": 0.3144385026737968, "grad_norm": 1.4269200021768844, "learning_rate": 1.6048607041394856e-05, "loss": 0.2912, "step": 3969 }, { "epoch": 0.3145177262824322, "grad_norm": 1.499493156096059, "learning_rate": 1.6046563430911148e-05, "loss": 0.3126, "step": 3970 }, { "epoch": 0.3145969498910675, "grad_norm": 1.5649153072445345, "learning_rate": 1.6044519422290333e-05, "loss": 0.3493, "step": 3971 }, { "epoch": 0.3146761734997029, "grad_norm": 1.3721638257787552, "learning_rate": 1.6042475015666995e-05, "loss": 0.3073, "step": 3972 }, { "epoch": 0.3147553971083383, "grad_norm": 1.3633820418866238, "learning_rate": 1.604043021117575e-05, "loss": 0.3121, "step": 3973 }, { "epoch": 0.31483462071697366, "grad_norm": 1.4707268718651163, "learning_rate": 1.603838500895125e-05, "loss": 0.3345, "step": 3974 }, { "epoch": 0.31491384432560904, "grad_norm": 1.7142632728060705, "learning_rate": 1.6036339409128146e-05, "loss": 0.4247, "step": 3975 }, { "epoch": 0.3149930679342444, "grad_norm": 1.461935275688194, "learning_rate": 1.603429341184114e-05, "loss": 0.3061, "step": 3976 }, { "epoch": 0.3150722915428798, "grad_norm": 1.4903185708386406, "learning_rate": 1.6032247017224944e-05, "loss": 0.35, "step": 3977 }, { "epoch": 0.3151515151515151, "grad_norm": 1.5914286040921042, "learning_rate": 1.603020022541431e-05, "loss": 0.2862, "step": 3978 }, { "epoch": 0.3152307387601505, "grad_norm": 1.5253054198857707, "learning_rate": 1.6028153036544005e-05, "loss": 0.3879, "step": 3979 }, { "epoch": 0.3153099623687859, "grad_norm": 1.6728327456698378, "learning_rate": 1.6026105450748826e-05, "loss": 0.4079, "step": 3980 }, { "epoch": 0.31538918597742127, "grad_norm": 1.2396194247137853, "learning_rate": 1.6024057468163604e-05, "loss": 0.2308, "step": 3981 }, { "epoch": 0.31546840958605665, "grad_norm": 1.385846772012705, "learning_rate": 1.602200908892318e-05, "loss": 0.3088, "step": 3982 }, { "epoch": 0.31554763319469203, "grad_norm": 1.7970845809909852, "learning_rate": 1.6019960313162436e-05, "loss": 0.2787, "step": 3983 }, { "epoch": 0.3156268568033274, "grad_norm": 1.4107491670450274, "learning_rate": 1.601791114101627e-05, "loss": 0.3101, "step": 3984 }, { "epoch": 0.31570608041196274, "grad_norm": 1.7846985614927462, "learning_rate": 1.6015861572619612e-05, "loss": 0.3924, "step": 3985 }, { "epoch": 0.3157853040205981, "grad_norm": 1.5642993595642487, "learning_rate": 1.6013811608107415e-05, "loss": 0.3673, "step": 3986 }, { "epoch": 0.3158645276292335, "grad_norm": 1.6853474268140274, "learning_rate": 1.6011761247614664e-05, "loss": 0.3455, "step": 3987 }, { "epoch": 0.3159437512378689, "grad_norm": 1.394722694232811, "learning_rate": 1.600971049127636e-05, "loss": 0.2826, "step": 3988 }, { "epoch": 0.31602297484650427, "grad_norm": 1.4182497128014389, "learning_rate": 1.6007659339227534e-05, "loss": 0.2925, "step": 3989 }, { "epoch": 0.31610219845513965, "grad_norm": 1.6343899804812616, "learning_rate": 1.6005607791603247e-05, "loss": 0.2502, "step": 3990 }, { "epoch": 0.31618142206377503, "grad_norm": 1.679181719415134, "learning_rate": 1.6003555848538586e-05, "loss": 0.3994, "step": 3991 }, { "epoch": 0.31626064567241036, "grad_norm": 1.6381870325344796, "learning_rate": 1.600150351016866e-05, "loss": 0.3061, "step": 3992 }, { "epoch": 0.31633986928104574, "grad_norm": 1.7226596229115656, "learning_rate": 1.5999450776628607e-05, "loss": 0.4597, "step": 3993 }, { "epoch": 0.3164190928896811, "grad_norm": 1.7248008028923216, "learning_rate": 1.5997397648053587e-05, "loss": 0.4571, "step": 3994 }, { "epoch": 0.3164983164983165, "grad_norm": 1.4863097883307261, "learning_rate": 1.599534412457879e-05, "loss": 0.2978, "step": 3995 }, { "epoch": 0.3165775401069519, "grad_norm": 1.8978919659293474, "learning_rate": 1.5993290206339426e-05, "loss": 0.3633, "step": 3996 }, { "epoch": 0.31665676371558726, "grad_norm": 2.0684281114131244, "learning_rate": 1.5991235893470745e-05, "loss": 0.4515, "step": 3997 }, { "epoch": 0.31673598732422265, "grad_norm": 1.45627374081648, "learning_rate": 1.5989181186108003e-05, "loss": 0.3664, "step": 3998 }, { "epoch": 0.31681521093285797, "grad_norm": 1.6866353176737428, "learning_rate": 1.59871260843865e-05, "loss": 0.3215, "step": 3999 }, { "epoch": 0.31689443454149335, "grad_norm": 1.4502791134570638, "learning_rate": 1.5985070588441556e-05, "loss": 0.3696, "step": 4000 }, { "epoch": 0.31697365815012873, "grad_norm": 1.8553075739887375, "learning_rate": 1.598301469840851e-05, "loss": 0.3829, "step": 4001 }, { "epoch": 0.3170528817587641, "grad_norm": 1.5343213790540935, "learning_rate": 1.598095841442273e-05, "loss": 0.3064, "step": 4002 }, { "epoch": 0.3171321053673995, "grad_norm": 1.453456987076398, "learning_rate": 1.5978901736619624e-05, "loss": 0.3491, "step": 4003 }, { "epoch": 0.3172113289760349, "grad_norm": 1.6749931422482662, "learning_rate": 1.5976844665134607e-05, "loss": 0.3952, "step": 4004 }, { "epoch": 0.3172905525846702, "grad_norm": 1.5701432097954482, "learning_rate": 1.5974787200103124e-05, "loss": 0.3798, "step": 4005 }, { "epoch": 0.3173697761933056, "grad_norm": 1.9168291490562859, "learning_rate": 1.5972729341660653e-05, "loss": 0.4636, "step": 4006 }, { "epoch": 0.31744899980194097, "grad_norm": 1.5108320652242277, "learning_rate": 1.597067108994269e-05, "loss": 0.3008, "step": 4007 }, { "epoch": 0.31752822341057635, "grad_norm": 1.3485295180730346, "learning_rate": 1.5968612445084773e-05, "loss": 0.2682, "step": 4008 }, { "epoch": 0.31760744701921173, "grad_norm": 1.9265023874522214, "learning_rate": 1.596655340722244e-05, "loss": 0.4162, "step": 4009 }, { "epoch": 0.3176866706278471, "grad_norm": 2.026081117900468, "learning_rate": 1.5964493976491278e-05, "loss": 0.3426, "step": 4010 }, { "epoch": 0.3177658942364825, "grad_norm": 1.5092003847971471, "learning_rate": 1.5962434153026884e-05, "loss": 0.3542, "step": 4011 }, { "epoch": 0.3178451178451178, "grad_norm": 2.05854167333894, "learning_rate": 1.596037393696489e-05, "loss": 0.3611, "step": 4012 }, { "epoch": 0.3179243414537532, "grad_norm": 1.873707046514757, "learning_rate": 1.5958313328440954e-05, "loss": 0.2826, "step": 4013 }, { "epoch": 0.3180035650623886, "grad_norm": 2.1294997035567325, "learning_rate": 1.595625232759076e-05, "loss": 0.6559, "step": 4014 }, { "epoch": 0.31808278867102396, "grad_norm": 1.4349506552742113, "learning_rate": 1.595419093455e-05, "loss": 0.2362, "step": 4015 }, { "epoch": 0.31816201227965935, "grad_norm": 1.8846758055636847, "learning_rate": 1.5952129149454422e-05, "loss": 0.4909, "step": 4016 }, { "epoch": 0.3182412358882947, "grad_norm": 1.5005705266160903, "learning_rate": 1.595006697243978e-05, "loss": 0.3242, "step": 4017 }, { "epoch": 0.3183204594969301, "grad_norm": 1.4075272590450865, "learning_rate": 1.5948004403641853e-05, "loss": 0.35, "step": 4018 }, { "epoch": 0.31839968310556543, "grad_norm": 1.6490155039931167, "learning_rate": 1.594594144319646e-05, "loss": 0.3933, "step": 4019 }, { "epoch": 0.3184789067142008, "grad_norm": 1.8022323247782437, "learning_rate": 1.594387809123943e-05, "loss": 0.3335, "step": 4020 }, { "epoch": 0.3185581303228362, "grad_norm": 1.5495894082566937, "learning_rate": 1.594181434790663e-05, "loss": 0.3128, "step": 4021 }, { "epoch": 0.3186373539314716, "grad_norm": 1.5184128628676445, "learning_rate": 1.5939750213333948e-05, "loss": 0.3471, "step": 4022 }, { "epoch": 0.31871657754010696, "grad_norm": 1.629351105383533, "learning_rate": 1.593768568765729e-05, "loss": 0.315, "step": 4023 }, { "epoch": 0.31879580114874234, "grad_norm": 1.517339322393902, "learning_rate": 1.5935620771012603e-05, "loss": 0.3716, "step": 4024 }, { "epoch": 0.3188750247573777, "grad_norm": 1.4881773229861002, "learning_rate": 1.5933555463535846e-05, "loss": 0.2914, "step": 4025 }, { "epoch": 0.31895424836601305, "grad_norm": 1.5178618159021746, "learning_rate": 1.5931489765363014e-05, "loss": 0.2857, "step": 4026 }, { "epoch": 0.31903347197464843, "grad_norm": 1.9089184698038, "learning_rate": 1.592942367663012e-05, "loss": 0.4619, "step": 4027 }, { "epoch": 0.3191126955832838, "grad_norm": 2.111897087423868, "learning_rate": 1.5927357197473207e-05, "loss": 0.4302, "step": 4028 }, { "epoch": 0.3191919191919192, "grad_norm": 1.9595311271334965, "learning_rate": 1.5925290328028346e-05, "loss": 0.2981, "step": 4029 }, { "epoch": 0.3192711428005546, "grad_norm": 2.0274748403299516, "learning_rate": 1.5923223068431626e-05, "loss": 0.4058, "step": 4030 }, { "epoch": 0.31935036640918996, "grad_norm": 1.7810799202678977, "learning_rate": 1.592115541881917e-05, "loss": 0.3805, "step": 4031 }, { "epoch": 0.31942959001782534, "grad_norm": 1.5939194886532888, "learning_rate": 1.5919087379327116e-05, "loss": 0.3815, "step": 4032 }, { "epoch": 0.31950881362646066, "grad_norm": 1.5290876885449083, "learning_rate": 1.5917018950091642e-05, "loss": 0.3322, "step": 4033 }, { "epoch": 0.31958803723509605, "grad_norm": 1.8130967687643362, "learning_rate": 1.591495013124894e-05, "loss": 0.3481, "step": 4034 }, { "epoch": 0.3196672608437314, "grad_norm": 1.7781086584506063, "learning_rate": 1.591288092293523e-05, "loss": 0.4384, "step": 4035 }, { "epoch": 0.3197464844523668, "grad_norm": 1.7810641738759658, "learning_rate": 1.5910811325286768e-05, "loss": 0.5053, "step": 4036 }, { "epoch": 0.3198257080610022, "grad_norm": 1.4811799838435034, "learning_rate": 1.5908741338439818e-05, "loss": 0.2877, "step": 4037 }, { "epoch": 0.31990493166963757, "grad_norm": 1.75385673902383, "learning_rate": 1.5906670962530683e-05, "loss": 0.3939, "step": 4038 }, { "epoch": 0.31998415527827295, "grad_norm": 1.4463272747486247, "learning_rate": 1.5904600197695684e-05, "loss": 0.3113, "step": 4039 }, { "epoch": 0.3200633788869083, "grad_norm": 1.7662807921687593, "learning_rate": 1.5902529044071173e-05, "loss": 0.4202, "step": 4040 }, { "epoch": 0.32014260249554366, "grad_norm": 1.804553606907039, "learning_rate": 1.590045750179353e-05, "loss": 0.44, "step": 4041 }, { "epoch": 0.32022182610417904, "grad_norm": 1.2430074461173346, "learning_rate": 1.5898385570999146e-05, "loss": 0.2654, "step": 4042 }, { "epoch": 0.3203010497128144, "grad_norm": 1.671144080773817, "learning_rate": 1.589631325182446e-05, "loss": 0.3594, "step": 4043 }, { "epoch": 0.3203802733214498, "grad_norm": 1.534521781173824, "learning_rate": 1.589424054440591e-05, "loss": 0.3198, "step": 4044 }, { "epoch": 0.3204594969300852, "grad_norm": 1.4216959107307843, "learning_rate": 1.5892167448879984e-05, "loss": 0.292, "step": 4045 }, { "epoch": 0.3205387205387205, "grad_norm": 1.6928449619548298, "learning_rate": 1.5890093965383186e-05, "loss": 0.4057, "step": 4046 }, { "epoch": 0.3206179441473559, "grad_norm": 1.6996355327671042, "learning_rate": 1.588802009405204e-05, "loss": 0.3946, "step": 4047 }, { "epoch": 0.3206971677559913, "grad_norm": 1.41447755486852, "learning_rate": 1.5885945835023104e-05, "loss": 0.4286, "step": 4048 }, { "epoch": 0.32077639136462666, "grad_norm": 1.750307274779682, "learning_rate": 1.5883871188432955e-05, "loss": 0.4238, "step": 4049 }, { "epoch": 0.32085561497326204, "grad_norm": 1.4609287919409428, "learning_rate": 1.5881796154418196e-05, "loss": 0.2922, "step": 4050 }, { "epoch": 0.3209348385818974, "grad_norm": 1.4769480986269932, "learning_rate": 1.5879720733115464e-05, "loss": 0.2954, "step": 4051 }, { "epoch": 0.3210140621905328, "grad_norm": 1.410933015347825, "learning_rate": 1.5877644924661412e-05, "loss": 0.3133, "step": 4052 }, { "epoch": 0.3210932857991681, "grad_norm": 1.6978830498046613, "learning_rate": 1.5875568729192728e-05, "loss": 0.2862, "step": 4053 }, { "epoch": 0.3211725094078035, "grad_norm": 1.246344728350576, "learning_rate": 1.587349214684611e-05, "loss": 0.2976, "step": 4054 }, { "epoch": 0.3212517330164389, "grad_norm": 1.4697024731348896, "learning_rate": 1.5871415177758297e-05, "loss": 0.3432, "step": 4055 }, { "epoch": 0.32133095662507427, "grad_norm": 1.6731277566867855, "learning_rate": 1.5869337822066043e-05, "loss": 0.3906, "step": 4056 }, { "epoch": 0.32141018023370965, "grad_norm": 1.5432068633480194, "learning_rate": 1.586726007990614e-05, "loss": 0.3477, "step": 4057 }, { "epoch": 0.32148940384234503, "grad_norm": 1.6744279487938063, "learning_rate": 1.586518195141539e-05, "loss": 0.2965, "step": 4058 }, { "epoch": 0.3215686274509804, "grad_norm": 1.4676313194712571, "learning_rate": 1.5863103436730627e-05, "loss": 0.3082, "step": 4059 }, { "epoch": 0.32164785105961574, "grad_norm": 1.5485100068176951, "learning_rate": 1.586102453598872e-05, "loss": 0.313, "step": 4060 }, { "epoch": 0.3217270746682511, "grad_norm": 1.7758709047419037, "learning_rate": 1.5858945249326545e-05, "loss": 0.4067, "step": 4061 }, { "epoch": 0.3218062982768865, "grad_norm": 2.059127526753576, "learning_rate": 1.5856865576881016e-05, "loss": 0.3675, "step": 4062 }, { "epoch": 0.3218855218855219, "grad_norm": 1.6719954529409657, "learning_rate": 1.5854785518789074e-05, "loss": 0.3794, "step": 4063 }, { "epoch": 0.32196474549415727, "grad_norm": 1.4205656804115565, "learning_rate": 1.5852705075187674e-05, "loss": 0.3279, "step": 4064 }, { "epoch": 0.32204396910279265, "grad_norm": 1.7520572289418674, "learning_rate": 1.5850624246213805e-05, "loss": 0.3618, "step": 4065 }, { "epoch": 0.32212319271142803, "grad_norm": 1.5448127649430832, "learning_rate": 1.5848543032004483e-05, "loss": 0.4441, "step": 4066 }, { "epoch": 0.32220241632006336, "grad_norm": 1.327499650582626, "learning_rate": 1.5846461432696744e-05, "loss": 0.2979, "step": 4067 }, { "epoch": 0.32228163992869874, "grad_norm": 1.870242632491099, "learning_rate": 1.5844379448427648e-05, "loss": 0.5472, "step": 4068 }, { "epoch": 0.3223608635373341, "grad_norm": 1.1038237678112879, "learning_rate": 1.5842297079334293e-05, "loss": 0.207, "step": 4069 }, { "epoch": 0.3224400871459695, "grad_norm": 1.4843928080800322, "learning_rate": 1.5840214325553782e-05, "loss": 0.3386, "step": 4070 }, { "epoch": 0.3225193107546049, "grad_norm": 1.610494537503993, "learning_rate": 1.583813118722326e-05, "loss": 0.413, "step": 4071 }, { "epoch": 0.32259853436324026, "grad_norm": 1.9873367423345312, "learning_rate": 1.583604766447989e-05, "loss": 0.4168, "step": 4072 }, { "epoch": 0.32267775797187565, "grad_norm": 1.6020008517869455, "learning_rate": 1.5833963757460863e-05, "loss": 0.3319, "step": 4073 }, { "epoch": 0.32275698158051097, "grad_norm": 1.7035260271569024, "learning_rate": 1.5831879466303393e-05, "loss": 0.3486, "step": 4074 }, { "epoch": 0.32283620518914635, "grad_norm": 1.8486188396204775, "learning_rate": 1.5829794791144723e-05, "loss": 0.4305, "step": 4075 }, { "epoch": 0.32291542879778173, "grad_norm": 1.706390053357653, "learning_rate": 1.5827709732122115e-05, "loss": 0.4006, "step": 4076 }, { "epoch": 0.3229946524064171, "grad_norm": 1.6359573319287772, "learning_rate": 1.5825624289372864e-05, "loss": 0.4537, "step": 4077 }, { "epoch": 0.3230738760150525, "grad_norm": 1.8092075318079248, "learning_rate": 1.5823538463034283e-05, "loss": 0.2648, "step": 4078 }, { "epoch": 0.3231530996236879, "grad_norm": 1.8139323968687953, "learning_rate": 1.5821452253243718e-05, "loss": 0.3806, "step": 4079 }, { "epoch": 0.32323232323232326, "grad_norm": 1.4462998240253209, "learning_rate": 1.581936566013853e-05, "loss": 0.4712, "step": 4080 }, { "epoch": 0.3233115468409586, "grad_norm": 1.2640091811161325, "learning_rate": 1.5817278683856117e-05, "loss": 0.2678, "step": 4081 }, { "epoch": 0.32339077044959397, "grad_norm": 1.8705992247017151, "learning_rate": 1.5815191324533893e-05, "loss": 0.3709, "step": 4082 }, { "epoch": 0.32346999405822935, "grad_norm": 2.209763043359014, "learning_rate": 1.58131035823093e-05, "loss": 0.3603, "step": 4083 }, { "epoch": 0.32354921766686473, "grad_norm": 1.945451480736648, "learning_rate": 1.581101545731981e-05, "loss": 0.3403, "step": 4084 }, { "epoch": 0.3236284412755001, "grad_norm": 1.627318957001608, "learning_rate": 1.580892694970291e-05, "loss": 0.3793, "step": 4085 }, { "epoch": 0.3237076648841355, "grad_norm": 1.3674463815413995, "learning_rate": 1.580683805959612e-05, "loss": 0.2761, "step": 4086 }, { "epoch": 0.3237868884927708, "grad_norm": 1.3374556326086156, "learning_rate": 1.5804748787136987e-05, "loss": 0.2826, "step": 4087 }, { "epoch": 0.3238661121014062, "grad_norm": 1.5059445921408614, "learning_rate": 1.5802659132463076e-05, "loss": 0.2897, "step": 4088 }, { "epoch": 0.3239453357100416, "grad_norm": 1.7116220057117026, "learning_rate": 1.5800569095711983e-05, "loss": 0.247, "step": 4089 }, { "epoch": 0.32402455931867696, "grad_norm": 1.7416405539798918, "learning_rate": 1.5798478677021327e-05, "loss": 0.4265, "step": 4090 }, { "epoch": 0.32410378292731234, "grad_norm": 1.8017931813708092, "learning_rate": 1.5796387876528746e-05, "loss": 0.4326, "step": 4091 }, { "epoch": 0.3241830065359477, "grad_norm": 1.613586066070733, "learning_rate": 1.579429669437192e-05, "loss": 0.3269, "step": 4092 }, { "epoch": 0.3242622301445831, "grad_norm": 2.0851014721491206, "learning_rate": 1.579220513068853e-05, "loss": 0.4173, "step": 4093 }, { "epoch": 0.32434145375321843, "grad_norm": 1.6153451371930847, "learning_rate": 1.5790113185616305e-05, "loss": 0.3289, "step": 4094 }, { "epoch": 0.3244206773618538, "grad_norm": 1.502787218227778, "learning_rate": 1.5788020859292987e-05, "loss": 0.2889, "step": 4095 }, { "epoch": 0.3244999009704892, "grad_norm": 1.4847709663047504, "learning_rate": 1.5785928151856345e-05, "loss": 0.3386, "step": 4096 }, { "epoch": 0.3245791245791246, "grad_norm": 1.2691896968235292, "learning_rate": 1.5783835063444176e-05, "loss": 0.2441, "step": 4097 }, { "epoch": 0.32465834818775996, "grad_norm": 1.6602498908243948, "learning_rate": 1.57817415941943e-05, "loss": 0.3856, "step": 4098 }, { "epoch": 0.32473757179639534, "grad_norm": 1.6273179574346803, "learning_rate": 1.5779647744244556e-05, "loss": 0.3513, "step": 4099 }, { "epoch": 0.3248167954050307, "grad_norm": 1.7727630279084312, "learning_rate": 1.577755351373282e-05, "loss": 0.4255, "step": 4100 }, { "epoch": 0.32489601901366605, "grad_norm": 1.3928566774257036, "learning_rate": 1.5775458902796982e-05, "loss": 0.3182, "step": 4101 }, { "epoch": 0.32497524262230143, "grad_norm": 1.4425146167237544, "learning_rate": 1.577336391157497e-05, "loss": 0.3976, "step": 4102 }, { "epoch": 0.3250544662309368, "grad_norm": 1.6250876670082313, "learning_rate": 1.5771268540204724e-05, "loss": 0.3798, "step": 4103 }, { "epoch": 0.3251336898395722, "grad_norm": 1.2782787809667149, "learning_rate": 1.576917278882421e-05, "loss": 0.2678, "step": 4104 }, { "epoch": 0.3252129134482076, "grad_norm": 1.4836702415372003, "learning_rate": 1.576707665757143e-05, "loss": 0.3413, "step": 4105 }, { "epoch": 0.32529213705684296, "grad_norm": 1.5669319128376393, "learning_rate": 1.5764980146584402e-05, "loss": 0.3167, "step": 4106 }, { "epoch": 0.32537136066547834, "grad_norm": 1.5963128604568757, "learning_rate": 1.5762883256001168e-05, "loss": 0.3122, "step": 4107 }, { "epoch": 0.32545058427411366, "grad_norm": 1.6443793513002702, "learning_rate": 1.57607859859598e-05, "loss": 0.3464, "step": 4108 }, { "epoch": 0.32552980788274904, "grad_norm": 1.6174343628805843, "learning_rate": 1.5758688336598397e-05, "loss": 0.345, "step": 4109 }, { "epoch": 0.3256090314913844, "grad_norm": 1.7371858495014527, "learning_rate": 1.5756590308055075e-05, "loss": 0.3202, "step": 4110 }, { "epoch": 0.3256882551000198, "grad_norm": 1.7184145267005262, "learning_rate": 1.5754491900467982e-05, "loss": 0.414, "step": 4111 }, { "epoch": 0.3257674787086552, "grad_norm": 1.8180579342518757, "learning_rate": 1.5752393113975282e-05, "loss": 0.4003, "step": 4112 }, { "epoch": 0.32584670231729057, "grad_norm": 1.6564691017210889, "learning_rate": 1.5750293948715178e-05, "loss": 0.2914, "step": 4113 }, { "epoch": 0.32592592592592595, "grad_norm": 1.553898807504377, "learning_rate": 1.5748194404825885e-05, "loss": 0.4081, "step": 4114 }, { "epoch": 0.3260051495345613, "grad_norm": 1.529549987459568, "learning_rate": 1.574609448244565e-05, "loss": 0.3162, "step": 4115 }, { "epoch": 0.32608437314319666, "grad_norm": 1.4274879881115572, "learning_rate": 1.574399418171274e-05, "loss": 0.3161, "step": 4116 }, { "epoch": 0.32616359675183204, "grad_norm": 2.261364998445019, "learning_rate": 1.5741893502765452e-05, "loss": 0.3985, "step": 4117 }, { "epoch": 0.3262428203604674, "grad_norm": 1.4180349264104986, "learning_rate": 1.5739792445742103e-05, "loss": 0.2931, "step": 4118 }, { "epoch": 0.3263220439691028, "grad_norm": 1.7227585494610789, "learning_rate": 1.573769101078104e-05, "loss": 0.3643, "step": 4119 }, { "epoch": 0.3264012675777382, "grad_norm": 1.6862048978441324, "learning_rate": 1.573558919802064e-05, "loss": 0.3093, "step": 4120 }, { "epoch": 0.32648049118637357, "grad_norm": 1.4288303772723825, "learning_rate": 1.573348700759928e-05, "loss": 0.334, "step": 4121 }, { "epoch": 0.3265597147950089, "grad_norm": 2.0285719530841613, "learning_rate": 1.573138443965539e-05, "loss": 0.3629, "step": 4122 }, { "epoch": 0.3266389384036443, "grad_norm": 1.6836914910003749, "learning_rate": 1.572928149432741e-05, "loss": 0.4494, "step": 4123 }, { "epoch": 0.32671816201227966, "grad_norm": 1.3770018885568436, "learning_rate": 1.5727178171753817e-05, "loss": 0.3225, "step": 4124 }, { "epoch": 0.32679738562091504, "grad_norm": 1.401373513311934, "learning_rate": 1.57250744720731e-05, "loss": 0.2494, "step": 4125 }, { "epoch": 0.3268766092295504, "grad_norm": 1.882570135082064, "learning_rate": 1.572297039542377e-05, "loss": 0.4127, "step": 4126 }, { "epoch": 0.3269558328381858, "grad_norm": 1.7825013364947273, "learning_rate": 1.572086594194438e-05, "loss": 0.3768, "step": 4127 }, { "epoch": 0.3270350564468211, "grad_norm": 1.5325148967272597, "learning_rate": 1.571876111177349e-05, "loss": 0.3614, "step": 4128 }, { "epoch": 0.3271142800554565, "grad_norm": 1.2353497548099759, "learning_rate": 1.571665590504971e-05, "loss": 0.2602, "step": 4129 }, { "epoch": 0.3271935036640919, "grad_norm": 1.615171667142959, "learning_rate": 1.5714550321911636e-05, "loss": 0.3544, "step": 4130 }, { "epoch": 0.32727272727272727, "grad_norm": 1.5249794703643902, "learning_rate": 1.5712444362497917e-05, "loss": 0.2479, "step": 4131 }, { "epoch": 0.32735195088136265, "grad_norm": 1.5709063522900069, "learning_rate": 1.5710338026947227e-05, "loss": 0.4325, "step": 4132 }, { "epoch": 0.32743117448999803, "grad_norm": 1.5583515025518357, "learning_rate": 1.5708231315398255e-05, "loss": 0.3221, "step": 4133 }, { "epoch": 0.3275103980986334, "grad_norm": 1.7328213074876821, "learning_rate": 1.570612422798972e-05, "loss": 0.3227, "step": 4134 }, { "epoch": 0.32758962170726874, "grad_norm": 1.5401999941443987, "learning_rate": 1.5704016764860358e-05, "loss": 0.3203, "step": 4135 }, { "epoch": 0.3276688453159041, "grad_norm": 1.2254292537735298, "learning_rate": 1.5701908926148933e-05, "loss": 0.242, "step": 4136 }, { "epoch": 0.3277480689245395, "grad_norm": 1.6097644217399403, "learning_rate": 1.5699800711994247e-05, "loss": 0.333, "step": 4137 }, { "epoch": 0.3278272925331749, "grad_norm": 1.4990357654879476, "learning_rate": 1.569769212253511e-05, "loss": 0.3431, "step": 4138 }, { "epoch": 0.32790651614181027, "grad_norm": 1.3240800771279073, "learning_rate": 1.569558315791036e-05, "loss": 0.3622, "step": 4139 }, { "epoch": 0.32798573975044565, "grad_norm": 1.4019169636326965, "learning_rate": 1.5693473818258866e-05, "loss": 0.3479, "step": 4140 }, { "epoch": 0.32806496335908103, "grad_norm": 1.7774105201084667, "learning_rate": 1.5691364103719515e-05, "loss": 0.3919, "step": 4141 }, { "epoch": 0.32814418696771636, "grad_norm": 1.7163373701070068, "learning_rate": 1.5689254014431225e-05, "loss": 0.4127, "step": 4142 }, { "epoch": 0.32822341057635174, "grad_norm": 1.4924339893483967, "learning_rate": 1.5687143550532932e-05, "loss": 0.3459, "step": 4143 }, { "epoch": 0.3283026341849871, "grad_norm": 1.6899935775801103, "learning_rate": 1.56850327121636e-05, "loss": 0.281, "step": 4144 }, { "epoch": 0.3283818577936225, "grad_norm": 1.6642851490573314, "learning_rate": 1.568292149946222e-05, "loss": 0.3914, "step": 4145 }, { "epoch": 0.3284610814022579, "grad_norm": 1.6254158194715103, "learning_rate": 1.56808099125678e-05, "loss": 0.2676, "step": 4146 }, { "epoch": 0.32854030501089326, "grad_norm": 1.6626302004344038, "learning_rate": 1.5678697951619386e-05, "loss": 0.3349, "step": 4147 }, { "epoch": 0.32861952861952864, "grad_norm": 1.8090398065346878, "learning_rate": 1.5676585616756037e-05, "loss": 0.4564, "step": 4148 }, { "epoch": 0.32869875222816397, "grad_norm": 1.4981315587877324, "learning_rate": 1.5674472908116834e-05, "loss": 0.3388, "step": 4149 }, { "epoch": 0.32877797583679935, "grad_norm": 1.4669306873360455, "learning_rate": 1.5672359825840895e-05, "loss": 0.311, "step": 4150 }, { "epoch": 0.32885719944543473, "grad_norm": 1.4272680620902973, "learning_rate": 1.567024637006736e-05, "loss": 0.2956, "step": 4151 }, { "epoch": 0.3289364230540701, "grad_norm": 1.7122151858693686, "learning_rate": 1.566813254093538e-05, "loss": 0.3896, "step": 4152 }, { "epoch": 0.3290156466627055, "grad_norm": 1.4796597676585008, "learning_rate": 1.566601833858415e-05, "loss": 0.3327, "step": 4153 }, { "epoch": 0.3290948702713409, "grad_norm": 1.4905342032042195, "learning_rate": 1.566390376315287e-05, "loss": 0.3081, "step": 4154 }, { "epoch": 0.32917409387997626, "grad_norm": 1.4919596154341799, "learning_rate": 1.5661788814780782e-05, "loss": 0.3682, "step": 4155 }, { "epoch": 0.3292533174886116, "grad_norm": 1.2962051191888033, "learning_rate": 1.5659673493607144e-05, "loss": 0.2799, "step": 4156 }, { "epoch": 0.32933254109724697, "grad_norm": 1.7381654459870541, "learning_rate": 1.565755779977124e-05, "loss": 0.3794, "step": 4157 }, { "epoch": 0.32941176470588235, "grad_norm": 1.3163164837949308, "learning_rate": 1.5655441733412376e-05, "loss": 0.2999, "step": 4158 }, { "epoch": 0.32949098831451773, "grad_norm": 1.2673184430786948, "learning_rate": 1.5653325294669884e-05, "loss": 0.2392, "step": 4159 }, { "epoch": 0.3295702119231531, "grad_norm": 1.6551438060361676, "learning_rate": 1.565120848368313e-05, "loss": 0.2603, "step": 4160 }, { "epoch": 0.3296494355317885, "grad_norm": 1.5798922580777348, "learning_rate": 1.5649091300591482e-05, "loss": 0.2619, "step": 4161 }, { "epoch": 0.3297286591404238, "grad_norm": 1.2103007555802698, "learning_rate": 1.564697374553436e-05, "loss": 0.2288, "step": 4162 }, { "epoch": 0.3298078827490592, "grad_norm": 1.6451624840053714, "learning_rate": 1.5644855818651184e-05, "loss": 0.3791, "step": 4163 }, { "epoch": 0.3298871063576946, "grad_norm": 1.9896398890381832, "learning_rate": 1.564273752008141e-05, "loss": 0.5585, "step": 4164 }, { "epoch": 0.32996632996632996, "grad_norm": 1.3460781880914419, "learning_rate": 1.5640618849964528e-05, "loss": 0.2559, "step": 4165 }, { "epoch": 0.33004555357496534, "grad_norm": 1.7591293010318814, "learning_rate": 1.5638499808440036e-05, "loss": 0.3463, "step": 4166 }, { "epoch": 0.3301247771836007, "grad_norm": 1.4119593458458475, "learning_rate": 1.563638039564746e-05, "loss": 0.2451, "step": 4167 }, { "epoch": 0.3302040007922361, "grad_norm": 1.452667841868237, "learning_rate": 1.5634260611726355e-05, "loss": 0.2668, "step": 4168 }, { "epoch": 0.33028322440087143, "grad_norm": 1.6212523425673047, "learning_rate": 1.5632140456816302e-05, "loss": 0.3893, "step": 4169 }, { "epoch": 0.3303624480095068, "grad_norm": 1.4613501771283923, "learning_rate": 1.5630019931056894e-05, "loss": 0.4171, "step": 4170 }, { "epoch": 0.3304416716181422, "grad_norm": 1.4347976063611856, "learning_rate": 1.5627899034587768e-05, "loss": 0.2875, "step": 4171 }, { "epoch": 0.3305208952267776, "grad_norm": 1.7536311018935635, "learning_rate": 1.562577776754857e-05, "loss": 0.3626, "step": 4172 }, { "epoch": 0.33060011883541296, "grad_norm": 1.527150246117706, "learning_rate": 1.5623656130078976e-05, "loss": 0.3404, "step": 4173 }, { "epoch": 0.33067934244404834, "grad_norm": 1.6467553163637536, "learning_rate": 1.5621534122318682e-05, "loss": 0.4612, "step": 4174 }, { "epoch": 0.3307585660526837, "grad_norm": 1.7998619202822257, "learning_rate": 1.5619411744407416e-05, "loss": 0.3646, "step": 4175 }, { "epoch": 0.33083778966131905, "grad_norm": 1.7394536808594858, "learning_rate": 1.561728899648493e-05, "loss": 0.4653, "step": 4176 }, { "epoch": 0.33091701326995443, "grad_norm": 1.5112658734549012, "learning_rate": 1.561516587869099e-05, "loss": 0.2866, "step": 4177 }, { "epoch": 0.3309962368785898, "grad_norm": 2.0090412743310573, "learning_rate": 1.5613042391165395e-05, "loss": 0.5817, "step": 4178 }, { "epoch": 0.3310754604872252, "grad_norm": 1.5983017011813054, "learning_rate": 1.5610918534047964e-05, "loss": 0.3532, "step": 4179 }, { "epoch": 0.3311546840958606, "grad_norm": 1.9844196291748513, "learning_rate": 1.5608794307478546e-05, "loss": 0.3935, "step": 4180 }, { "epoch": 0.33123390770449596, "grad_norm": 1.6575759981422147, "learning_rate": 1.5606669711597017e-05, "loss": 0.3351, "step": 4181 }, { "epoch": 0.33131313131313134, "grad_norm": 1.5268078413425163, "learning_rate": 1.560454474654326e-05, "loss": 0.3276, "step": 4182 }, { "epoch": 0.33139235492176666, "grad_norm": 1.5697133967894934, "learning_rate": 1.56024194124572e-05, "loss": 0.3133, "step": 4183 }, { "epoch": 0.33147157853040204, "grad_norm": 1.859752771998104, "learning_rate": 1.5600293709478776e-05, "loss": 0.3101, "step": 4184 }, { "epoch": 0.3315508021390374, "grad_norm": 1.7134807583348657, "learning_rate": 1.559816763774796e-05, "loss": 0.3019, "step": 4185 }, { "epoch": 0.3316300257476728, "grad_norm": 1.1808077793661966, "learning_rate": 1.559604119740474e-05, "loss": 0.1679, "step": 4186 }, { "epoch": 0.3317092493563082, "grad_norm": 1.7001383949072224, "learning_rate": 1.5593914388589136e-05, "loss": 0.3528, "step": 4187 }, { "epoch": 0.33178847296494357, "grad_norm": 1.6909479202040436, "learning_rate": 1.559178721144119e-05, "loss": 0.3719, "step": 4188 }, { "epoch": 0.33186769657357895, "grad_norm": 2.028373876921339, "learning_rate": 1.5589659666100952e-05, "loss": 0.3597, "step": 4189 }, { "epoch": 0.3319469201822143, "grad_norm": 1.3368820284378782, "learning_rate": 1.5587531752708528e-05, "loss": 0.2873, "step": 4190 }, { "epoch": 0.33202614379084966, "grad_norm": 1.6234417288229521, "learning_rate": 1.558540347140402e-05, "loss": 0.3933, "step": 4191 }, { "epoch": 0.33210536739948504, "grad_norm": 1.287891724882461, "learning_rate": 1.558327482232757e-05, "loss": 0.2745, "step": 4192 }, { "epoch": 0.3321845910081204, "grad_norm": 1.3739640387928436, "learning_rate": 1.558114580561934e-05, "loss": 0.3256, "step": 4193 }, { "epoch": 0.3322638146167558, "grad_norm": 1.450618259859973, "learning_rate": 1.557901642141951e-05, "loss": 0.3188, "step": 4194 }, { "epoch": 0.3323430382253912, "grad_norm": 1.6499599765805935, "learning_rate": 1.5576886669868297e-05, "loss": 0.3621, "step": 4195 }, { "epoch": 0.33242226183402657, "grad_norm": 1.6677216955280363, "learning_rate": 1.5574756551105926e-05, "loss": 0.3915, "step": 4196 }, { "epoch": 0.3325014854426619, "grad_norm": 1.5018442221321981, "learning_rate": 1.5572626065272666e-05, "loss": 0.2439, "step": 4197 }, { "epoch": 0.3325807090512973, "grad_norm": 1.3107643462273924, "learning_rate": 1.557049521250879e-05, "loss": 0.2991, "step": 4198 }, { "epoch": 0.33265993265993266, "grad_norm": 1.7764472928849313, "learning_rate": 1.5568363992954607e-05, "loss": 0.413, "step": 4199 }, { "epoch": 0.33273915626856804, "grad_norm": 1.6423729997991683, "learning_rate": 1.556623240675045e-05, "loss": 0.2733, "step": 4200 }, { "epoch": 0.3328183798772034, "grad_norm": 1.3458073229408152, "learning_rate": 1.556410045403667e-05, "loss": 0.3462, "step": 4201 }, { "epoch": 0.3328976034858388, "grad_norm": 1.6789851897838282, "learning_rate": 1.556196813495365e-05, "loss": 0.3986, "step": 4202 }, { "epoch": 0.3329768270944741, "grad_norm": 1.4348840388865662, "learning_rate": 1.555983544964179e-05, "loss": 0.3226, "step": 4203 }, { "epoch": 0.3330560507031095, "grad_norm": 1.3347616946655005, "learning_rate": 1.555770239824152e-05, "loss": 0.2601, "step": 4204 }, { "epoch": 0.3331352743117449, "grad_norm": 1.7984596767267915, "learning_rate": 1.5555568980893284e-05, "loss": 0.3886, "step": 4205 }, { "epoch": 0.33321449792038027, "grad_norm": 1.66669208163045, "learning_rate": 1.5553435197737566e-05, "loss": 0.3713, "step": 4206 }, { "epoch": 0.33329372152901565, "grad_norm": 1.6769922685466212, "learning_rate": 1.5551301048914863e-05, "loss": 0.4031, "step": 4207 }, { "epoch": 0.33337294513765103, "grad_norm": 1.978035134151725, "learning_rate": 1.5549166534565695e-05, "loss": 0.3462, "step": 4208 }, { "epoch": 0.3334521687462864, "grad_norm": 1.4320454369412885, "learning_rate": 1.554703165483061e-05, "loss": 0.3039, "step": 4209 }, { "epoch": 0.33353139235492174, "grad_norm": 1.3254232489993087, "learning_rate": 1.5544896409850183e-05, "loss": 0.2935, "step": 4210 }, { "epoch": 0.3336106159635571, "grad_norm": 1.729154708646171, "learning_rate": 1.554276079976501e-05, "loss": 0.3526, "step": 4211 }, { "epoch": 0.3336898395721925, "grad_norm": 1.6735937491248896, "learning_rate": 1.5540624824715703e-05, "loss": 0.3103, "step": 4212 }, { "epoch": 0.3337690631808279, "grad_norm": 1.3218157014036311, "learning_rate": 1.5538488484842914e-05, "loss": 0.2603, "step": 4213 }, { "epoch": 0.33384828678946327, "grad_norm": 1.6083059408034546, "learning_rate": 1.553635178028731e-05, "loss": 0.3101, "step": 4214 }, { "epoch": 0.33392751039809865, "grad_norm": 1.7063443063241774, "learning_rate": 1.5534214711189574e-05, "loss": 0.3679, "step": 4215 }, { "epoch": 0.33400673400673403, "grad_norm": 1.8270500236637786, "learning_rate": 1.5532077277690435e-05, "loss": 0.3964, "step": 4216 }, { "epoch": 0.33408595761536936, "grad_norm": 1.44024601717267, "learning_rate": 1.552993947993062e-05, "loss": 0.3317, "step": 4217 }, { "epoch": 0.33416518122400474, "grad_norm": 1.4523795416061784, "learning_rate": 1.5527801318050904e-05, "loss": 0.276, "step": 4218 }, { "epoch": 0.3342444048326401, "grad_norm": 1.3648882674836615, "learning_rate": 1.5525662792192066e-05, "loss": 0.2354, "step": 4219 }, { "epoch": 0.3343236284412755, "grad_norm": 1.4510706605866899, "learning_rate": 1.5523523902494927e-05, "loss": 0.3797, "step": 4220 }, { "epoch": 0.3344028520499109, "grad_norm": 1.5174268932484072, "learning_rate": 1.552138464910031e-05, "loss": 0.3118, "step": 4221 }, { "epoch": 0.33448207565854626, "grad_norm": 1.6627671348472728, "learning_rate": 1.5519245032149083e-05, "loss": 0.4178, "step": 4222 }, { "epoch": 0.33456129926718164, "grad_norm": 1.5801064944691237, "learning_rate": 1.5517105051782127e-05, "loss": 0.2069, "step": 4223 }, { "epoch": 0.33464052287581697, "grad_norm": 1.530721118780897, "learning_rate": 1.551496470814035e-05, "loss": 0.3517, "step": 4224 }, { "epoch": 0.33471974648445235, "grad_norm": 1.3414293890139244, "learning_rate": 1.5512824001364686e-05, "loss": 0.2681, "step": 4225 }, { "epoch": 0.33479897009308773, "grad_norm": 1.8468493679474305, "learning_rate": 1.5510682931596083e-05, "loss": 0.3363, "step": 4226 }, { "epoch": 0.3348781937017231, "grad_norm": 1.7365712626099319, "learning_rate": 1.550854149897553e-05, "loss": 0.4225, "step": 4227 }, { "epoch": 0.3349574173103585, "grad_norm": 1.4519458616213432, "learning_rate": 1.5506399703644017e-05, "loss": 0.326, "step": 4228 }, { "epoch": 0.3350366409189939, "grad_norm": 1.6297718465849158, "learning_rate": 1.5504257545742585e-05, "loss": 0.4093, "step": 4229 }, { "epoch": 0.33511586452762926, "grad_norm": 1.5059403987915492, "learning_rate": 1.5502115025412275e-05, "loss": 0.3955, "step": 4230 }, { "epoch": 0.3351950881362646, "grad_norm": 1.816439505863056, "learning_rate": 1.5499972142794167e-05, "loss": 0.3876, "step": 4231 }, { "epoch": 0.33527431174489997, "grad_norm": 1.2961933036118685, "learning_rate": 1.5497828898029358e-05, "loss": 0.2704, "step": 4232 }, { "epoch": 0.33535353535353535, "grad_norm": 1.4132729334249627, "learning_rate": 1.5495685291258967e-05, "loss": 0.3239, "step": 4233 }, { "epoch": 0.33543275896217073, "grad_norm": 1.3782417523010273, "learning_rate": 1.5493541322624145e-05, "loss": 0.2855, "step": 4234 }, { "epoch": 0.3355119825708061, "grad_norm": 1.4803242882284275, "learning_rate": 1.5491396992266065e-05, "loss": 0.3304, "step": 4235 }, { "epoch": 0.3355912061794415, "grad_norm": 1.4570404690494962, "learning_rate": 1.548925230032591e-05, "loss": 0.392, "step": 4236 }, { "epoch": 0.3356704297880769, "grad_norm": 1.5458789724182995, "learning_rate": 1.5487107246944902e-05, "loss": 0.3548, "step": 4237 }, { "epoch": 0.3357496533967122, "grad_norm": 1.6131480344783478, "learning_rate": 1.548496183226429e-05, "loss": 0.4586, "step": 4238 }, { "epoch": 0.3358288770053476, "grad_norm": 1.4893714858939047, "learning_rate": 1.548281605642533e-05, "loss": 0.3258, "step": 4239 }, { "epoch": 0.33590810061398296, "grad_norm": 1.6938789435238866, "learning_rate": 1.5480669919569313e-05, "loss": 0.2813, "step": 4240 }, { "epoch": 0.33598732422261834, "grad_norm": 1.6769872447502734, "learning_rate": 1.5478523421837553e-05, "loss": 0.338, "step": 4241 }, { "epoch": 0.3360665478312537, "grad_norm": 1.6003984862783687, "learning_rate": 1.5476376563371392e-05, "loss": 0.2741, "step": 4242 }, { "epoch": 0.3361457714398891, "grad_norm": 1.2689194951385718, "learning_rate": 1.547422934431218e-05, "loss": 0.2666, "step": 4243 }, { "epoch": 0.33622499504852443, "grad_norm": 1.7351447988751467, "learning_rate": 1.5472081764801307e-05, "loss": 0.4686, "step": 4244 }, { "epoch": 0.3363042186571598, "grad_norm": 1.18197016411766, "learning_rate": 1.546993382498018e-05, "loss": 0.2585, "step": 4245 }, { "epoch": 0.3363834422657952, "grad_norm": 1.6057280458348795, "learning_rate": 1.546778552499023e-05, "loss": 0.3443, "step": 4246 }, { "epoch": 0.3364626658744306, "grad_norm": 1.6002838812438793, "learning_rate": 1.5465636864972914e-05, "loss": 0.3427, "step": 4247 }, { "epoch": 0.33654188948306596, "grad_norm": 1.9739040391493348, "learning_rate": 1.5463487845069708e-05, "loss": 0.462, "step": 4248 }, { "epoch": 0.33662111309170134, "grad_norm": 1.473318208191478, "learning_rate": 1.546133846542212e-05, "loss": 0.4105, "step": 4249 }, { "epoch": 0.3367003367003367, "grad_norm": 1.3699943693317282, "learning_rate": 1.5459188726171666e-05, "loss": 0.2352, "step": 4250 }, { "epoch": 0.33677956030897205, "grad_norm": 1.5106420361915884, "learning_rate": 1.5457038627459905e-05, "loss": 0.3859, "step": 4251 }, { "epoch": 0.33685878391760743, "grad_norm": 1.2315617954454943, "learning_rate": 1.545488816942841e-05, "loss": 0.2315, "step": 4252 }, { "epoch": 0.3369380075262428, "grad_norm": 1.753153443022401, "learning_rate": 1.5452737352218773e-05, "loss": 0.408, "step": 4253 }, { "epoch": 0.3370172311348782, "grad_norm": 1.712294398397039, "learning_rate": 1.545058617597262e-05, "loss": 0.4053, "step": 4254 }, { "epoch": 0.3370964547435136, "grad_norm": 1.528931331202446, "learning_rate": 1.544843464083159e-05, "loss": 0.3351, "step": 4255 }, { "epoch": 0.33717567835214896, "grad_norm": 1.5315737332191974, "learning_rate": 1.544628274693736e-05, "loss": 0.3406, "step": 4256 }, { "epoch": 0.33725490196078434, "grad_norm": 1.4872515032479605, "learning_rate": 1.5444130494431612e-05, "loss": 0.2454, "step": 4257 }, { "epoch": 0.33733412556941966, "grad_norm": 1.41015186609565, "learning_rate": 1.544197788345607e-05, "loss": 0.3242, "step": 4258 }, { "epoch": 0.33741334917805504, "grad_norm": 1.6792958175936996, "learning_rate": 1.543982491415247e-05, "loss": 0.4017, "step": 4259 }, { "epoch": 0.3374925727866904, "grad_norm": 1.4778059162223025, "learning_rate": 1.5437671586662575e-05, "loss": 0.3251, "step": 4260 }, { "epoch": 0.3375717963953258, "grad_norm": 1.5340082557646046, "learning_rate": 1.543551790112817e-05, "loss": 0.3924, "step": 4261 }, { "epoch": 0.3376510200039612, "grad_norm": 1.0817474947175367, "learning_rate": 1.5433363857691067e-05, "loss": 0.2129, "step": 4262 }, { "epoch": 0.33773024361259657, "grad_norm": 1.485660562195963, "learning_rate": 1.5431209456493093e-05, "loss": 0.3111, "step": 4263 }, { "epoch": 0.33780946722123195, "grad_norm": 1.5150243558972971, "learning_rate": 1.542905469767611e-05, "loss": 0.3381, "step": 4264 }, { "epoch": 0.3378886908298673, "grad_norm": 1.3946392536404986, "learning_rate": 1.5426899581382e-05, "loss": 0.2383, "step": 4265 }, { "epoch": 0.33796791443850266, "grad_norm": 1.7879841887093437, "learning_rate": 1.5424744107752666e-05, "loss": 0.3504, "step": 4266 }, { "epoch": 0.33804713804713804, "grad_norm": 1.6185442471378342, "learning_rate": 1.542258827693003e-05, "loss": 0.3185, "step": 4267 }, { "epoch": 0.3381263616557734, "grad_norm": 1.2925602464150183, "learning_rate": 1.542043208905605e-05, "loss": 0.2116, "step": 4268 }, { "epoch": 0.3382055852644088, "grad_norm": 1.4679548988166635, "learning_rate": 1.5418275544272702e-05, "loss": 0.2978, "step": 4269 }, { "epoch": 0.3382848088730442, "grad_norm": 1.8014859356018105, "learning_rate": 1.541611864272198e-05, "loss": 0.3802, "step": 4270 }, { "epoch": 0.33836403248167957, "grad_norm": 1.5208028695603413, "learning_rate": 1.5413961384545902e-05, "loss": 0.3596, "step": 4271 }, { "epoch": 0.3384432560903149, "grad_norm": 1.7531835715555473, "learning_rate": 1.541180376988652e-05, "loss": 0.3586, "step": 4272 }, { "epoch": 0.3385224796989503, "grad_norm": 1.7763373404347726, "learning_rate": 1.54096457988859e-05, "loss": 0.4851, "step": 4273 }, { "epoch": 0.33860170330758566, "grad_norm": 1.3927237931969634, "learning_rate": 1.540748747168613e-05, "loss": 0.3053, "step": 4274 }, { "epoch": 0.33868092691622104, "grad_norm": 1.4228184773640276, "learning_rate": 1.5405328788429333e-05, "loss": 0.2541, "step": 4275 }, { "epoch": 0.3387601505248564, "grad_norm": 1.791523874147248, "learning_rate": 1.5403169749257644e-05, "loss": 0.4753, "step": 4276 }, { "epoch": 0.3388393741334918, "grad_norm": 1.6019892733694858, "learning_rate": 1.5401010354313222e-05, "loss": 0.3366, "step": 4277 }, { "epoch": 0.3389185977421272, "grad_norm": 1.6801031722330884, "learning_rate": 1.539885060373826e-05, "loss": 0.4062, "step": 4278 }, { "epoch": 0.3389978213507625, "grad_norm": 1.3315412388592751, "learning_rate": 1.539669049767496e-05, "loss": 0.3292, "step": 4279 }, { "epoch": 0.3390770449593979, "grad_norm": 1.7270116239306605, "learning_rate": 1.539453003626556e-05, "loss": 0.3136, "step": 4280 }, { "epoch": 0.33915626856803327, "grad_norm": 1.3809647603967554, "learning_rate": 1.5392369219652313e-05, "loss": 0.3639, "step": 4281 }, { "epoch": 0.33923549217666865, "grad_norm": 1.2838504386342768, "learning_rate": 1.53902080479775e-05, "loss": 0.2924, "step": 4282 }, { "epoch": 0.33931471578530403, "grad_norm": 1.6397241444664687, "learning_rate": 1.5388046521383424e-05, "loss": 0.364, "step": 4283 }, { "epoch": 0.3393939393939394, "grad_norm": 1.327571724092451, "learning_rate": 1.538588464001241e-05, "loss": 0.2866, "step": 4284 }, { "epoch": 0.33947316300257474, "grad_norm": 1.9759979050593923, "learning_rate": 1.5383722404006808e-05, "loss": 0.4839, "step": 4285 }, { "epoch": 0.3395523866112101, "grad_norm": 1.9619536335273284, "learning_rate": 1.5381559813508986e-05, "loss": 0.5092, "step": 4286 }, { "epoch": 0.3396316102198455, "grad_norm": 1.2248605518738174, "learning_rate": 1.537939686866135e-05, "loss": 0.2458, "step": 4287 }, { "epoch": 0.3397108338284809, "grad_norm": 1.491407375995511, "learning_rate": 1.5377233569606312e-05, "loss": 0.3168, "step": 4288 }, { "epoch": 0.33979005743711627, "grad_norm": 1.301030974814455, "learning_rate": 1.5375069916486318e-05, "loss": 0.3654, "step": 4289 }, { "epoch": 0.33986928104575165, "grad_norm": 1.3992107496371073, "learning_rate": 1.5372905909443833e-05, "loss": 0.2918, "step": 4290 }, { "epoch": 0.33994850465438703, "grad_norm": 1.3277011072489449, "learning_rate": 1.5370741548621343e-05, "loss": 0.3824, "step": 4291 }, { "epoch": 0.34002772826302236, "grad_norm": 1.8291009541297036, "learning_rate": 1.5368576834161372e-05, "loss": 0.3127, "step": 4292 }, { "epoch": 0.34010695187165774, "grad_norm": 1.5514373837331348, "learning_rate": 1.536641176620644e-05, "loss": 0.4226, "step": 4293 }, { "epoch": 0.3401861754802931, "grad_norm": 1.6067565336160639, "learning_rate": 1.536424634489912e-05, "loss": 0.3232, "step": 4294 }, { "epoch": 0.3402653990889285, "grad_norm": 1.787977032883429, "learning_rate": 1.536208057038199e-05, "loss": 0.4794, "step": 4295 }, { "epoch": 0.3403446226975639, "grad_norm": 1.3505269470739947, "learning_rate": 1.535991444279765e-05, "loss": 0.2428, "step": 4296 }, { "epoch": 0.34042384630619926, "grad_norm": 1.2430003736676773, "learning_rate": 1.535774796228874e-05, "loss": 0.2724, "step": 4297 }, { "epoch": 0.34050306991483464, "grad_norm": 1.6069657055059665, "learning_rate": 1.5355581128997904e-05, "loss": 0.3173, "step": 4298 }, { "epoch": 0.34058229352346997, "grad_norm": 1.7780656520457074, "learning_rate": 1.5353413943067818e-05, "loss": 0.3433, "step": 4299 }, { "epoch": 0.34066151713210535, "grad_norm": 1.452929952719961, "learning_rate": 1.5351246404641183e-05, "loss": 0.209, "step": 4300 }, { "epoch": 0.34074074074074073, "grad_norm": 1.3784527823708006, "learning_rate": 1.5349078513860728e-05, "loss": 0.2718, "step": 4301 }, { "epoch": 0.3408199643493761, "grad_norm": 1.6734288257932508, "learning_rate": 1.534691027086918e-05, "loss": 0.4111, "step": 4302 }, { "epoch": 0.3408991879580115, "grad_norm": 1.6486669550154163, "learning_rate": 1.5344741675809328e-05, "loss": 0.3261, "step": 4303 }, { "epoch": 0.3409784115666469, "grad_norm": 1.4206449357019255, "learning_rate": 1.534257272882395e-05, "loss": 0.2882, "step": 4304 }, { "epoch": 0.34105763517528226, "grad_norm": 1.3727628650653578, "learning_rate": 1.5340403430055864e-05, "loss": 0.2751, "step": 4305 }, { "epoch": 0.3411368587839176, "grad_norm": 1.3140316381191146, "learning_rate": 1.533823377964791e-05, "loss": 0.3591, "step": 4306 }, { "epoch": 0.34121608239255297, "grad_norm": 1.578538650184226, "learning_rate": 1.5336063777742944e-05, "loss": 0.2873, "step": 4307 }, { "epoch": 0.34129530600118835, "grad_norm": 1.6610921135103072, "learning_rate": 1.5333893424483856e-05, "loss": 0.3532, "step": 4308 }, { "epoch": 0.34137452960982373, "grad_norm": 1.3148128237179069, "learning_rate": 1.5331722720013555e-05, "loss": 0.3027, "step": 4309 }, { "epoch": 0.3414537532184591, "grad_norm": 1.4313257866471976, "learning_rate": 1.532955166447496e-05, "loss": 0.3684, "step": 4310 }, { "epoch": 0.3415329768270945, "grad_norm": 1.7925574512419842, "learning_rate": 1.5327380258011037e-05, "loss": 0.4109, "step": 4311 }, { "epoch": 0.3416122004357299, "grad_norm": 1.3467899204058864, "learning_rate": 1.5325208500764756e-05, "loss": 0.2851, "step": 4312 }, { "epoch": 0.3416914240443652, "grad_norm": 1.7847515151425684, "learning_rate": 1.532303639287912e-05, "loss": 0.3556, "step": 4313 }, { "epoch": 0.3417706476530006, "grad_norm": 1.535328652407067, "learning_rate": 1.532086393449715e-05, "loss": 0.4066, "step": 4314 }, { "epoch": 0.34184987126163596, "grad_norm": 1.4220884972750913, "learning_rate": 1.531869112576189e-05, "loss": 0.3079, "step": 4315 }, { "epoch": 0.34192909487027134, "grad_norm": 1.639160206200713, "learning_rate": 1.5316517966816414e-05, "loss": 0.3443, "step": 4316 }, { "epoch": 0.3420083184789067, "grad_norm": 1.3840596508342897, "learning_rate": 1.5314344457803812e-05, "loss": 0.265, "step": 4317 }, { "epoch": 0.3420875420875421, "grad_norm": 1.4090757924369377, "learning_rate": 1.5312170598867195e-05, "loss": 0.2959, "step": 4318 }, { "epoch": 0.3421667656961775, "grad_norm": 1.2592572989778839, "learning_rate": 1.5309996390149708e-05, "loss": 0.2769, "step": 4319 }, { "epoch": 0.3422459893048128, "grad_norm": 1.7499644082596897, "learning_rate": 1.5307821831794506e-05, "loss": 0.4392, "step": 4320 }, { "epoch": 0.3423252129134482, "grad_norm": 1.628349571539565, "learning_rate": 1.5305646923944776e-05, "loss": 0.3193, "step": 4321 }, { "epoch": 0.3424044365220836, "grad_norm": 1.6307441971665753, "learning_rate": 1.5303471666743727e-05, "loss": 0.3005, "step": 4322 }, { "epoch": 0.34248366013071896, "grad_norm": 1.28757335105128, "learning_rate": 1.5301296060334588e-05, "loss": 0.2759, "step": 4323 }, { "epoch": 0.34256288373935434, "grad_norm": 1.825664837017531, "learning_rate": 1.529912010486061e-05, "loss": 0.3556, "step": 4324 }, { "epoch": 0.3426421073479897, "grad_norm": 1.6498042243213094, "learning_rate": 1.5296943800465068e-05, "loss": 0.3651, "step": 4325 }, { "epoch": 0.34272133095662505, "grad_norm": 1.7490629847383423, "learning_rate": 1.529476714729127e-05, "loss": 0.3812, "step": 4326 }, { "epoch": 0.34280055456526043, "grad_norm": 1.5570358947811893, "learning_rate": 1.529259014548253e-05, "loss": 0.3066, "step": 4327 }, { "epoch": 0.3428797781738958, "grad_norm": 1.311637351378174, "learning_rate": 1.5290412795182193e-05, "loss": 0.2954, "step": 4328 }, { "epoch": 0.3429590017825312, "grad_norm": 1.6279309235631882, "learning_rate": 1.528823509653363e-05, "loss": 0.5063, "step": 4329 }, { "epoch": 0.3430382253911666, "grad_norm": 1.4566379968697696, "learning_rate": 1.5286057049680236e-05, "loss": 0.3533, "step": 4330 }, { "epoch": 0.34311744899980196, "grad_norm": 1.578603220100308, "learning_rate": 1.5283878654765414e-05, "loss": 0.2953, "step": 4331 }, { "epoch": 0.34319667260843734, "grad_norm": 1.2967332993666802, "learning_rate": 1.5281699911932612e-05, "loss": 0.2228, "step": 4332 }, { "epoch": 0.34327589621707266, "grad_norm": 1.4011110065202876, "learning_rate": 1.527952082132528e-05, "loss": 0.4005, "step": 4333 }, { "epoch": 0.34335511982570804, "grad_norm": 1.491828389161139, "learning_rate": 1.5277341383086906e-05, "loss": 0.3721, "step": 4334 }, { "epoch": 0.3434343434343434, "grad_norm": 1.33962654281021, "learning_rate": 1.5275161597360996e-05, "loss": 0.3272, "step": 4335 }, { "epoch": 0.3435135670429788, "grad_norm": 1.982517730652105, "learning_rate": 1.5272981464291077e-05, "loss": 0.3913, "step": 4336 }, { "epoch": 0.3435927906516142, "grad_norm": 1.6422612412762339, "learning_rate": 1.5270800984020705e-05, "loss": 0.332, "step": 4337 }, { "epoch": 0.34367201426024957, "grad_norm": 1.504029812139617, "learning_rate": 1.5268620156693444e-05, "loss": 0.3541, "step": 4338 }, { "epoch": 0.34375123786888495, "grad_norm": 1.4455796329437427, "learning_rate": 1.52664389824529e-05, "loss": 0.3472, "step": 4339 }, { "epoch": 0.3438304614775203, "grad_norm": 1.1758245169354014, "learning_rate": 1.5264257461442687e-05, "loss": 0.2044, "step": 4340 }, { "epoch": 0.34390968508615566, "grad_norm": 1.7121470670997576, "learning_rate": 1.526207559380645e-05, "loss": 0.4498, "step": 4341 }, { "epoch": 0.34398890869479104, "grad_norm": 1.765751378306275, "learning_rate": 1.5259893379687855e-05, "loss": 0.3202, "step": 4342 }, { "epoch": 0.3440681323034264, "grad_norm": 1.706753772383736, "learning_rate": 1.525771081923059e-05, "loss": 0.4135, "step": 4343 }, { "epoch": 0.3441473559120618, "grad_norm": 1.9877885681461496, "learning_rate": 1.525552791257837e-05, "loss": 0.5112, "step": 4344 }, { "epoch": 0.3442265795206972, "grad_norm": 1.5323652363248574, "learning_rate": 1.525334465987492e-05, "loss": 0.2778, "step": 4345 }, { "epoch": 0.34430580312933257, "grad_norm": 1.7597796408885797, "learning_rate": 1.5251161061264003e-05, "loss": 0.3523, "step": 4346 }, { "epoch": 0.3443850267379679, "grad_norm": 1.4685050322338264, "learning_rate": 1.5248977116889396e-05, "loss": 0.3431, "step": 4347 }, { "epoch": 0.3444642503466033, "grad_norm": 1.519332106378515, "learning_rate": 1.5246792826894906e-05, "loss": 0.409, "step": 4348 }, { "epoch": 0.34454347395523865, "grad_norm": 1.6500545113632907, "learning_rate": 1.5244608191424352e-05, "loss": 0.3717, "step": 4349 }, { "epoch": 0.34462269756387404, "grad_norm": 1.2276225784417312, "learning_rate": 1.5242423210621584e-05, "loss": 0.2226, "step": 4350 }, { "epoch": 0.3447019211725094, "grad_norm": 1.6276868001027185, "learning_rate": 1.5240237884630471e-05, "loss": 0.4932, "step": 4351 }, { "epoch": 0.3447811447811448, "grad_norm": 1.624805862201677, "learning_rate": 1.5238052213594912e-05, "loss": 0.395, "step": 4352 }, { "epoch": 0.3448603683897802, "grad_norm": 1.4405626794068653, "learning_rate": 1.5235866197658812e-05, "loss": 0.3353, "step": 4353 }, { "epoch": 0.3449395919984155, "grad_norm": 1.5529442881464233, "learning_rate": 1.5233679836966122e-05, "loss": 0.3625, "step": 4354 }, { "epoch": 0.3450188156070509, "grad_norm": 1.5839005912555417, "learning_rate": 1.5231493131660794e-05, "loss": 0.3269, "step": 4355 }, { "epoch": 0.34509803921568627, "grad_norm": 1.5303994692743137, "learning_rate": 1.5229306081886818e-05, "loss": 0.3016, "step": 4356 }, { "epoch": 0.34517726282432165, "grad_norm": 1.6262291505700162, "learning_rate": 1.5227118687788198e-05, "loss": 0.2979, "step": 4357 }, { "epoch": 0.34525648643295703, "grad_norm": 1.3914514338299404, "learning_rate": 1.5224930949508964e-05, "loss": 0.2786, "step": 4358 }, { "epoch": 0.3453357100415924, "grad_norm": 1.3962291682052854, "learning_rate": 1.5222742867193167e-05, "loss": 0.2577, "step": 4359 }, { "epoch": 0.3454149336502278, "grad_norm": 1.5197331743671314, "learning_rate": 1.5220554440984882e-05, "loss": 0.3455, "step": 4360 }, { "epoch": 0.3454941572588631, "grad_norm": 1.5608143396327747, "learning_rate": 1.5218365671028207e-05, "loss": 0.3136, "step": 4361 }, { "epoch": 0.3455733808674985, "grad_norm": 1.8207944893834378, "learning_rate": 1.5216176557467265e-05, "loss": 0.4917, "step": 4362 }, { "epoch": 0.3456526044761339, "grad_norm": 1.798454423625633, "learning_rate": 1.521398710044619e-05, "loss": 0.333, "step": 4363 }, { "epoch": 0.34573182808476927, "grad_norm": 1.396815439910078, "learning_rate": 1.5211797300109154e-05, "loss": 0.2323, "step": 4364 }, { "epoch": 0.34581105169340465, "grad_norm": 1.7094250651362186, "learning_rate": 1.5209607156600346e-05, "loss": 0.3047, "step": 4365 }, { "epoch": 0.34589027530204003, "grad_norm": 1.7605442349632685, "learning_rate": 1.520741667006397e-05, "loss": 0.348, "step": 4366 }, { "epoch": 0.34596949891067535, "grad_norm": 1.3742067234633975, "learning_rate": 1.5205225840644264e-05, "loss": 0.274, "step": 4367 }, { "epoch": 0.34604872251931074, "grad_norm": 1.2534730114084343, "learning_rate": 1.5203034668485486e-05, "loss": 0.2806, "step": 4368 }, { "epoch": 0.3461279461279461, "grad_norm": 1.643414874391239, "learning_rate": 1.5200843153731905e-05, "loss": 0.3499, "step": 4369 }, { "epoch": 0.3462071697365815, "grad_norm": 1.698514264158814, "learning_rate": 1.519865129652783e-05, "loss": 0.2825, "step": 4370 }, { "epoch": 0.3462863933452169, "grad_norm": 1.5027770164358123, "learning_rate": 1.5196459097017582e-05, "loss": 0.3035, "step": 4371 }, { "epoch": 0.34636561695385226, "grad_norm": 1.6487790488678877, "learning_rate": 1.5194266555345505e-05, "loss": 0.3143, "step": 4372 }, { "epoch": 0.34644484056248764, "grad_norm": 1.8461492062324416, "learning_rate": 1.5192073671655969e-05, "loss": 0.4029, "step": 4373 }, { "epoch": 0.34652406417112297, "grad_norm": 1.4250018414205798, "learning_rate": 1.5189880446093366e-05, "loss": 0.3339, "step": 4374 }, { "epoch": 0.34660328777975835, "grad_norm": 1.5471037998190196, "learning_rate": 1.5187686878802108e-05, "loss": 0.2736, "step": 4375 }, { "epoch": 0.34668251138839373, "grad_norm": 1.7239491407254735, "learning_rate": 1.5185492969926627e-05, "loss": 0.4099, "step": 4376 }, { "epoch": 0.3467617349970291, "grad_norm": 1.4172153097053735, "learning_rate": 1.5183298719611388e-05, "loss": 0.4184, "step": 4377 }, { "epoch": 0.3468409586056645, "grad_norm": 1.4244286831445356, "learning_rate": 1.5181104128000868e-05, "loss": 0.2639, "step": 4378 }, { "epoch": 0.3469201822142999, "grad_norm": 1.4903485767289089, "learning_rate": 1.517890919523957e-05, "loss": 0.3343, "step": 4379 }, { "epoch": 0.34699940582293526, "grad_norm": 1.5706996701714635, "learning_rate": 1.517671392147202e-05, "loss": 0.3917, "step": 4380 }, { "epoch": 0.3470786294315706, "grad_norm": 2.148426472123439, "learning_rate": 1.517451830684277e-05, "loss": 0.4647, "step": 4381 }, { "epoch": 0.34715785304020597, "grad_norm": 1.6660329561456046, "learning_rate": 1.5172322351496385e-05, "loss": 0.3693, "step": 4382 }, { "epoch": 0.34723707664884135, "grad_norm": 1.6528446971522985, "learning_rate": 1.517012605557746e-05, "loss": 0.3442, "step": 4383 }, { "epoch": 0.34731630025747673, "grad_norm": 1.4524525817125662, "learning_rate": 1.5167929419230616e-05, "loss": 0.3673, "step": 4384 }, { "epoch": 0.3473955238661121, "grad_norm": 1.4646704157508048, "learning_rate": 1.516573244260048e-05, "loss": 0.3315, "step": 4385 }, { "epoch": 0.3474747474747475, "grad_norm": 1.4932676472668638, "learning_rate": 1.5163535125831724e-05, "loss": 0.3442, "step": 4386 }, { "epoch": 0.3475539710833829, "grad_norm": 1.5519892449063468, "learning_rate": 1.5161337469069024e-05, "loss": 0.3868, "step": 4387 }, { "epoch": 0.3476331946920182, "grad_norm": 1.5261023445669821, "learning_rate": 1.5159139472457086e-05, "loss": 0.3886, "step": 4388 }, { "epoch": 0.3477124183006536, "grad_norm": 1.3546685315882694, "learning_rate": 1.5156941136140637e-05, "loss": 0.3373, "step": 4389 }, { "epoch": 0.34779164190928896, "grad_norm": 1.5646257798222758, "learning_rate": 1.5154742460264426e-05, "loss": 0.3123, "step": 4390 }, { "epoch": 0.34787086551792434, "grad_norm": 1.303390509789042, "learning_rate": 1.515254344497323e-05, "loss": 0.2792, "step": 4391 }, { "epoch": 0.3479500891265597, "grad_norm": 1.7601413012596392, "learning_rate": 1.5150344090411841e-05, "loss": 0.356, "step": 4392 }, { "epoch": 0.3480293127351951, "grad_norm": 1.2650959078949433, "learning_rate": 1.5148144396725072e-05, "loss": 0.2288, "step": 4393 }, { "epoch": 0.3481085363438305, "grad_norm": 1.4466704973799784, "learning_rate": 1.514594436405777e-05, "loss": 0.2492, "step": 4394 }, { "epoch": 0.3481877599524658, "grad_norm": 2.2227170816834954, "learning_rate": 1.5143743992554791e-05, "loss": 0.3858, "step": 4395 }, { "epoch": 0.3482669835611012, "grad_norm": 1.309722667859268, "learning_rate": 1.514154328236102e-05, "loss": 0.3023, "step": 4396 }, { "epoch": 0.3483462071697366, "grad_norm": 1.3576272291406275, "learning_rate": 1.5139342233621364e-05, "loss": 0.2487, "step": 4397 }, { "epoch": 0.34842543077837196, "grad_norm": 1.3382486838155585, "learning_rate": 1.5137140846480752e-05, "loss": 0.2242, "step": 4398 }, { "epoch": 0.34850465438700734, "grad_norm": 1.4955289585838192, "learning_rate": 1.5134939121084129e-05, "loss": 0.3344, "step": 4399 }, { "epoch": 0.3485838779956427, "grad_norm": 1.7993186062169424, "learning_rate": 1.5132737057576476e-05, "loss": 0.4859, "step": 4400 }, { "epoch": 0.34866310160427805, "grad_norm": 1.520159946231801, "learning_rate": 1.5130534656102783e-05, "loss": 0.3211, "step": 4401 }, { "epoch": 0.34874232521291343, "grad_norm": 1.444083381085495, "learning_rate": 1.512833191680807e-05, "loss": 0.3783, "step": 4402 }, { "epoch": 0.3488215488215488, "grad_norm": 1.4996070574512057, "learning_rate": 1.5126128839837378e-05, "loss": 0.281, "step": 4403 }, { "epoch": 0.3489007724301842, "grad_norm": 1.3668318440224068, "learning_rate": 1.5123925425335766e-05, "loss": 0.2849, "step": 4404 }, { "epoch": 0.3489799960388196, "grad_norm": 1.3987864995830923, "learning_rate": 1.5121721673448319e-05, "loss": 0.3283, "step": 4405 }, { "epoch": 0.34905921964745495, "grad_norm": 1.505740064476565, "learning_rate": 1.5119517584320146e-05, "loss": 0.3958, "step": 4406 }, { "epoch": 0.34913844325609034, "grad_norm": 1.2862179808102558, "learning_rate": 1.5117313158096371e-05, "loss": 0.3162, "step": 4407 }, { "epoch": 0.34921766686472566, "grad_norm": 1.3674620729882478, "learning_rate": 1.511510839492215e-05, "loss": 0.4187, "step": 4408 }, { "epoch": 0.34929689047336104, "grad_norm": 1.198812844397246, "learning_rate": 1.5112903294942651e-05, "loss": 0.2396, "step": 4409 }, { "epoch": 0.3493761140819964, "grad_norm": 1.3356613497560836, "learning_rate": 1.5110697858303072e-05, "loss": 0.3435, "step": 4410 }, { "epoch": 0.3494553376906318, "grad_norm": 1.6636511396414906, "learning_rate": 1.5108492085148632e-05, "loss": 0.3838, "step": 4411 }, { "epoch": 0.3495345612992672, "grad_norm": 1.6133531445790348, "learning_rate": 1.5106285975624568e-05, "loss": 0.433, "step": 4412 }, { "epoch": 0.34961378490790257, "grad_norm": 1.4336497029329514, "learning_rate": 1.5104079529876143e-05, "loss": 0.251, "step": 4413 }, { "epoch": 0.34969300851653795, "grad_norm": 1.3231363770107274, "learning_rate": 1.510187274804864e-05, "loss": 0.3091, "step": 4414 }, { "epoch": 0.3497722321251733, "grad_norm": 1.4911702060595606, "learning_rate": 1.5099665630287365e-05, "loss": 0.3731, "step": 4415 }, { "epoch": 0.34985145573380866, "grad_norm": 1.917045337477769, "learning_rate": 1.5097458176737647e-05, "loss": 0.3195, "step": 4416 }, { "epoch": 0.34993067934244404, "grad_norm": 1.4294612713605452, "learning_rate": 1.5095250387544833e-05, "loss": 0.2989, "step": 4417 }, { "epoch": 0.3500099029510794, "grad_norm": 1.3293686826300255, "learning_rate": 1.5093042262854297e-05, "loss": 0.252, "step": 4418 }, { "epoch": 0.3500891265597148, "grad_norm": 1.5827571695390332, "learning_rate": 1.509083380281144e-05, "loss": 0.2892, "step": 4419 }, { "epoch": 0.3501683501683502, "grad_norm": 1.386141660887367, "learning_rate": 1.5088625007561668e-05, "loss": 0.3653, "step": 4420 }, { "epoch": 0.35024757377698557, "grad_norm": 1.8323940532231806, "learning_rate": 1.5086415877250424e-05, "loss": 0.405, "step": 4421 }, { "epoch": 0.3503267973856209, "grad_norm": 1.3317656299159266, "learning_rate": 1.5084206412023172e-05, "loss": 0.3019, "step": 4422 }, { "epoch": 0.3504060209942563, "grad_norm": 1.398782207111146, "learning_rate": 1.5081996612025387e-05, "loss": 0.3361, "step": 4423 }, { "epoch": 0.35048524460289165, "grad_norm": 1.345468328411188, "learning_rate": 1.5079786477402581e-05, "loss": 0.306, "step": 4424 }, { "epoch": 0.35056446821152704, "grad_norm": 1.5370407033705449, "learning_rate": 1.5077576008300278e-05, "loss": 0.4559, "step": 4425 }, { "epoch": 0.3506436918201624, "grad_norm": 1.4600757339417962, "learning_rate": 1.5075365204864025e-05, "loss": 0.3518, "step": 4426 }, { "epoch": 0.3507229154287978, "grad_norm": 1.6341718820482494, "learning_rate": 1.5073154067239396e-05, "loss": 0.4857, "step": 4427 }, { "epoch": 0.3508021390374332, "grad_norm": 1.826444450110843, "learning_rate": 1.507094259557198e-05, "loss": 0.3229, "step": 4428 }, { "epoch": 0.3508813626460685, "grad_norm": 1.5491382745889493, "learning_rate": 1.5068730790007395e-05, "loss": 0.3467, "step": 4429 }, { "epoch": 0.3509605862547039, "grad_norm": 1.4093133322471183, "learning_rate": 1.5066518650691277e-05, "loss": 0.3193, "step": 4430 }, { "epoch": 0.35103980986333927, "grad_norm": 1.65832062798544, "learning_rate": 1.5064306177769284e-05, "loss": 0.4091, "step": 4431 }, { "epoch": 0.35111903347197465, "grad_norm": 1.475689115708618, "learning_rate": 1.5062093371387097e-05, "loss": 0.4302, "step": 4432 }, { "epoch": 0.35119825708061003, "grad_norm": 1.3314872460149323, "learning_rate": 1.5059880231690418e-05, "loss": 0.2184, "step": 4433 }, { "epoch": 0.3512774806892454, "grad_norm": 1.6020910146437128, "learning_rate": 1.5057666758824974e-05, "loss": 0.3545, "step": 4434 }, { "epoch": 0.3513567042978808, "grad_norm": 1.837153136178669, "learning_rate": 1.5055452952936512e-05, "loss": 0.3466, "step": 4435 }, { "epoch": 0.3514359279065161, "grad_norm": 1.6805451390680544, "learning_rate": 1.5053238814170792e-05, "loss": 0.3289, "step": 4436 }, { "epoch": 0.3515151515151515, "grad_norm": 1.3378784333481688, "learning_rate": 1.5051024342673614e-05, "loss": 0.2403, "step": 4437 }, { "epoch": 0.3515943751237869, "grad_norm": 2.087666893707564, "learning_rate": 1.5048809538590789e-05, "loss": 0.4036, "step": 4438 }, { "epoch": 0.35167359873242227, "grad_norm": 1.7825672859552832, "learning_rate": 1.5046594402068147e-05, "loss": 0.3707, "step": 4439 }, { "epoch": 0.35175282234105765, "grad_norm": 1.791977573660528, "learning_rate": 1.5044378933251546e-05, "loss": 0.3653, "step": 4440 }, { "epoch": 0.35183204594969303, "grad_norm": 1.5204998704240598, "learning_rate": 1.5042163132286867e-05, "loss": 0.3088, "step": 4441 }, { "epoch": 0.35191126955832835, "grad_norm": 1.6927276293706366, "learning_rate": 1.5039946999320004e-05, "loss": 0.3159, "step": 4442 }, { "epoch": 0.35199049316696374, "grad_norm": 1.304804718934339, "learning_rate": 1.5037730534496882e-05, "loss": 0.2718, "step": 4443 }, { "epoch": 0.3520697167755991, "grad_norm": 1.5480500253590626, "learning_rate": 1.5035513737963445e-05, "loss": 0.3296, "step": 4444 }, { "epoch": 0.3521489403842345, "grad_norm": 1.4327400935705397, "learning_rate": 1.5033296609865658e-05, "loss": 0.2291, "step": 4445 }, { "epoch": 0.3522281639928699, "grad_norm": 1.3544891280119487, "learning_rate": 1.503107915034951e-05, "loss": 0.2646, "step": 4446 }, { "epoch": 0.35230738760150526, "grad_norm": 1.4381103031255773, "learning_rate": 1.5028861359561005e-05, "loss": 0.3491, "step": 4447 }, { "epoch": 0.35238661121014064, "grad_norm": 1.2842815403805183, "learning_rate": 1.5026643237646176e-05, "loss": 0.2931, "step": 4448 }, { "epoch": 0.35246583481877597, "grad_norm": 1.556770739822654, "learning_rate": 1.5024424784751079e-05, "loss": 0.3883, "step": 4449 }, { "epoch": 0.35254505842741135, "grad_norm": 1.6071392135468507, "learning_rate": 1.5022206001021784e-05, "loss": 0.2604, "step": 4450 }, { "epoch": 0.35262428203604673, "grad_norm": 1.6581467899133189, "learning_rate": 1.501998688660439e-05, "loss": 0.3594, "step": 4451 }, { "epoch": 0.3527035056446821, "grad_norm": 1.4280202943505607, "learning_rate": 1.5017767441645015e-05, "loss": 0.3022, "step": 4452 }, { "epoch": 0.3527827292533175, "grad_norm": 1.8696357359056301, "learning_rate": 1.5015547666289798e-05, "loss": 0.312, "step": 4453 }, { "epoch": 0.3528619528619529, "grad_norm": 2.2254388203939968, "learning_rate": 1.50133275606849e-05, "loss": 0.5323, "step": 4454 }, { "epoch": 0.35294117647058826, "grad_norm": 1.9794528530004738, "learning_rate": 1.5011107124976505e-05, "loss": 0.3146, "step": 4455 }, { "epoch": 0.3530204000792236, "grad_norm": 1.5637467311851438, "learning_rate": 1.5008886359310815e-05, "loss": 0.3533, "step": 4456 }, { "epoch": 0.35309962368785897, "grad_norm": 1.5582523654418898, "learning_rate": 1.5006665263834062e-05, "loss": 0.238, "step": 4457 }, { "epoch": 0.35317884729649435, "grad_norm": 1.4982906644412797, "learning_rate": 1.5004443838692492e-05, "loss": 0.4334, "step": 4458 }, { "epoch": 0.35325807090512973, "grad_norm": 1.2772645189554344, "learning_rate": 1.5002222084032374e-05, "loss": 0.269, "step": 4459 }, { "epoch": 0.3533372945137651, "grad_norm": 1.611174493711392, "learning_rate": 1.5000000000000002e-05, "loss": 0.3572, "step": 4460 }, { "epoch": 0.3534165181224005, "grad_norm": 1.7140872029478584, "learning_rate": 1.4997777586741689e-05, "loss": 0.3621, "step": 4461 }, { "epoch": 0.3534957417310359, "grad_norm": 1.8482216213669391, "learning_rate": 1.4995554844403767e-05, "loss": 0.4988, "step": 4462 }, { "epoch": 0.3535749653396712, "grad_norm": 1.5782597846093604, "learning_rate": 1.4993331773132598e-05, "loss": 0.3621, "step": 4463 }, { "epoch": 0.3536541889483066, "grad_norm": 1.619754703359135, "learning_rate": 1.4991108373074557e-05, "loss": 0.3121, "step": 4464 }, { "epoch": 0.35373341255694196, "grad_norm": 1.251015099245574, "learning_rate": 1.4988884644376045e-05, "loss": 0.2942, "step": 4465 }, { "epoch": 0.35381263616557734, "grad_norm": 1.1962443859476162, "learning_rate": 1.4986660587183485e-05, "loss": 0.2695, "step": 4466 }, { "epoch": 0.3538918597742127, "grad_norm": 1.3050067250239221, "learning_rate": 1.498443620164332e-05, "loss": 0.3431, "step": 4467 }, { "epoch": 0.3539710833828481, "grad_norm": 1.9169317517790057, "learning_rate": 1.4982211487902015e-05, "loss": 0.4503, "step": 4468 }, { "epoch": 0.3540503069914835, "grad_norm": 1.5769739490151566, "learning_rate": 1.4979986446106054e-05, "loss": 0.3866, "step": 4469 }, { "epoch": 0.3541295306001188, "grad_norm": 1.9632989273453332, "learning_rate": 1.4977761076401949e-05, "loss": 0.3616, "step": 4470 }, { "epoch": 0.3542087542087542, "grad_norm": 1.4735528900290882, "learning_rate": 1.4975535378936228e-05, "loss": 0.3158, "step": 4471 }, { "epoch": 0.3542879778173896, "grad_norm": 1.334976016328683, "learning_rate": 1.4973309353855443e-05, "loss": 0.2648, "step": 4472 }, { "epoch": 0.35436720142602496, "grad_norm": 1.46997527587073, "learning_rate": 1.497108300130617e-05, "loss": 0.3375, "step": 4473 }, { "epoch": 0.35444642503466034, "grad_norm": 1.6751360026779696, "learning_rate": 1.4968856321434997e-05, "loss": 0.4085, "step": 4474 }, { "epoch": 0.3545256486432957, "grad_norm": 1.3147699829778916, "learning_rate": 1.4966629314388548e-05, "loss": 0.1788, "step": 4475 }, { "epoch": 0.3546048722519311, "grad_norm": 1.7380815407654258, "learning_rate": 1.4964401980313452e-05, "loss": 0.4178, "step": 4476 }, { "epoch": 0.35468409586056643, "grad_norm": 1.856948594190862, "learning_rate": 1.4962174319356372e-05, "loss": 0.4212, "step": 4477 }, { "epoch": 0.3547633194692018, "grad_norm": 1.6701692103438248, "learning_rate": 1.4959946331663995e-05, "loss": 0.3934, "step": 4478 }, { "epoch": 0.3548425430778372, "grad_norm": 1.715807350019634, "learning_rate": 1.4957718017383013e-05, "loss": 0.3259, "step": 4479 }, { "epoch": 0.3549217666864726, "grad_norm": 1.6353226514714108, "learning_rate": 1.4955489376660157e-05, "loss": 0.3653, "step": 4480 }, { "epoch": 0.35500099029510795, "grad_norm": 1.5986840524259371, "learning_rate": 1.4953260409642172e-05, "loss": 0.4304, "step": 4481 }, { "epoch": 0.35508021390374334, "grad_norm": 1.4980375062464257, "learning_rate": 1.4951031116475819e-05, "loss": 0.2783, "step": 4482 }, { "epoch": 0.35515943751237866, "grad_norm": 1.3735723890656084, "learning_rate": 1.4948801497307893e-05, "loss": 0.2921, "step": 4483 }, { "epoch": 0.35523866112101404, "grad_norm": 1.476441144812472, "learning_rate": 1.4946571552285196e-05, "loss": 0.3835, "step": 4484 }, { "epoch": 0.3553178847296494, "grad_norm": 1.3532321731038421, "learning_rate": 1.4944341281554566e-05, "loss": 0.3163, "step": 4485 }, { "epoch": 0.3553971083382848, "grad_norm": 1.8412669825428365, "learning_rate": 1.4942110685262854e-05, "loss": 0.391, "step": 4486 }, { "epoch": 0.3554763319469202, "grad_norm": 1.6406533550846139, "learning_rate": 1.493987976355693e-05, "loss": 0.4115, "step": 4487 }, { "epoch": 0.35555555555555557, "grad_norm": 1.3490218542487145, "learning_rate": 1.4937648516583696e-05, "loss": 0.2903, "step": 4488 }, { "epoch": 0.35563477916419095, "grad_norm": 1.7250046190266342, "learning_rate": 1.4935416944490066e-05, "loss": 0.4626, "step": 4489 }, { "epoch": 0.3557140027728263, "grad_norm": 1.484861652232995, "learning_rate": 1.4933185047422976e-05, "loss": 0.3283, "step": 4490 }, { "epoch": 0.35579322638146166, "grad_norm": 1.8375718812275208, "learning_rate": 1.493095282552939e-05, "loss": 0.4697, "step": 4491 }, { "epoch": 0.35587244999009704, "grad_norm": 1.6458998248467758, "learning_rate": 1.4928720278956284e-05, "loss": 0.3502, "step": 4492 }, { "epoch": 0.3559516735987324, "grad_norm": 1.4786427530122839, "learning_rate": 1.4926487407850667e-05, "loss": 0.3209, "step": 4493 }, { "epoch": 0.3560308972073678, "grad_norm": 1.345736220657658, "learning_rate": 1.4924254212359557e-05, "loss": 0.3029, "step": 4494 }, { "epoch": 0.3561101208160032, "grad_norm": 1.510996617616948, "learning_rate": 1.492202069263e-05, "loss": 0.2476, "step": 4495 }, { "epoch": 0.35618934442463857, "grad_norm": 1.3485013445136644, "learning_rate": 1.4919786848809061e-05, "loss": 0.3037, "step": 4496 }, { "epoch": 0.3562685680332739, "grad_norm": 1.3957058135768283, "learning_rate": 1.4917552681043837e-05, "loss": 0.277, "step": 4497 }, { "epoch": 0.3563477916419093, "grad_norm": 1.6312078445070226, "learning_rate": 1.4915318189481425e-05, "loss": 0.4123, "step": 4498 }, { "epoch": 0.35642701525054465, "grad_norm": 1.7028201265352148, "learning_rate": 1.4913083374268965e-05, "loss": 0.3283, "step": 4499 }, { "epoch": 0.35650623885918004, "grad_norm": 1.5076321211463664, "learning_rate": 1.4910848235553604e-05, "loss": 0.366, "step": 4500 }, { "epoch": 0.3565854624678154, "grad_norm": 1.288441048246071, "learning_rate": 1.4908612773482514e-05, "loss": 0.2596, "step": 4501 }, { "epoch": 0.3566646860764508, "grad_norm": 1.3854643136389788, "learning_rate": 1.4906376988202893e-05, "loss": 0.304, "step": 4502 }, { "epoch": 0.3567439096850862, "grad_norm": 1.5390867295985706, "learning_rate": 1.4904140879861957e-05, "loss": 0.3079, "step": 4503 }, { "epoch": 0.3568231332937215, "grad_norm": 1.3772107961348354, "learning_rate": 1.490190444860694e-05, "loss": 0.247, "step": 4504 }, { "epoch": 0.3569023569023569, "grad_norm": 1.4760415313792743, "learning_rate": 1.48996676945851e-05, "loss": 0.2896, "step": 4505 }, { "epoch": 0.35698158051099227, "grad_norm": 1.7818994425986037, "learning_rate": 1.4897430617943718e-05, "loss": 0.4124, "step": 4506 }, { "epoch": 0.35706080411962765, "grad_norm": 1.3538385468125493, "learning_rate": 1.4895193218830098e-05, "loss": 0.1953, "step": 4507 }, { "epoch": 0.35714002772826303, "grad_norm": 1.5564099795895754, "learning_rate": 1.4892955497391556e-05, "loss": 0.3041, "step": 4508 }, { "epoch": 0.3572192513368984, "grad_norm": 1.6640628194064107, "learning_rate": 1.4890717453775438e-05, "loss": 0.3135, "step": 4509 }, { "epoch": 0.3572984749455338, "grad_norm": 1.2438496743608995, "learning_rate": 1.488847908812911e-05, "loss": 0.196, "step": 4510 }, { "epoch": 0.3573776985541691, "grad_norm": 1.4194316506577334, "learning_rate": 1.4886240400599954e-05, "loss": 0.388, "step": 4511 }, { "epoch": 0.3574569221628045, "grad_norm": 1.4845696393316856, "learning_rate": 1.488400139133538e-05, "loss": 0.2699, "step": 4512 }, { "epoch": 0.3575361457714399, "grad_norm": 1.5306197110435527, "learning_rate": 1.4881762060482814e-05, "loss": 0.37, "step": 4513 }, { "epoch": 0.35761536938007527, "grad_norm": 1.6089342845179366, "learning_rate": 1.4879522408189706e-05, "loss": 0.2954, "step": 4514 }, { "epoch": 0.35769459298871065, "grad_norm": 1.4665943204645229, "learning_rate": 1.4877282434603527e-05, "loss": 0.268, "step": 4515 }, { "epoch": 0.35777381659734603, "grad_norm": 1.6858722654032046, "learning_rate": 1.4875042139871768e-05, "loss": 0.373, "step": 4516 }, { "epoch": 0.3578530402059814, "grad_norm": 1.687337844143792, "learning_rate": 1.487280152414194e-05, "loss": 0.3711, "step": 4517 }, { "epoch": 0.35793226381461674, "grad_norm": 1.4986581913333943, "learning_rate": 1.4870560587561578e-05, "loss": 0.2921, "step": 4518 }, { "epoch": 0.3580114874232521, "grad_norm": 1.8314682438077046, "learning_rate": 1.4868319330278236e-05, "loss": 0.4039, "step": 4519 }, { "epoch": 0.3580907110318875, "grad_norm": 1.284968879816148, "learning_rate": 1.4866077752439495e-05, "loss": 0.2224, "step": 4520 }, { "epoch": 0.3581699346405229, "grad_norm": 1.6388947973357595, "learning_rate": 1.4863835854192945e-05, "loss": 0.3199, "step": 4521 }, { "epoch": 0.35824915824915826, "grad_norm": 1.3627534689367056, "learning_rate": 1.4861593635686207e-05, "loss": 0.3353, "step": 4522 }, { "epoch": 0.35832838185779364, "grad_norm": 1.2997421795965136, "learning_rate": 1.485935109706692e-05, "loss": 0.333, "step": 4523 }, { "epoch": 0.35840760546642897, "grad_norm": 1.7399908236843789, "learning_rate": 1.4857108238482747e-05, "loss": 0.3396, "step": 4524 }, { "epoch": 0.35848682907506435, "grad_norm": 1.6751163458280312, "learning_rate": 1.4854865060081367e-05, "loss": 0.4047, "step": 4525 }, { "epoch": 0.35856605268369973, "grad_norm": 1.6910081328032733, "learning_rate": 1.4852621562010484e-05, "loss": 0.3618, "step": 4526 }, { "epoch": 0.3586452762923351, "grad_norm": 1.452417946872166, "learning_rate": 1.4850377744417816e-05, "loss": 0.3222, "step": 4527 }, { "epoch": 0.3587244999009705, "grad_norm": 1.465134751993534, "learning_rate": 1.4848133607451116e-05, "loss": 0.3361, "step": 4528 }, { "epoch": 0.3588037235096059, "grad_norm": 1.2937664713246881, "learning_rate": 1.4845889151258144e-05, "loss": 0.3048, "step": 4529 }, { "epoch": 0.35888294711824126, "grad_norm": 1.4103158941759597, "learning_rate": 1.484364437598669e-05, "loss": 0.3097, "step": 4530 }, { "epoch": 0.3589621707268766, "grad_norm": 1.6533614097152505, "learning_rate": 1.4841399281784558e-05, "loss": 0.3116, "step": 4531 }, { "epoch": 0.35904139433551197, "grad_norm": 1.5836413070257982, "learning_rate": 1.4839153868799583e-05, "loss": 0.3021, "step": 4532 }, { "epoch": 0.35912061794414735, "grad_norm": 1.5399833806009788, "learning_rate": 1.4836908137179607e-05, "loss": 0.3208, "step": 4533 }, { "epoch": 0.35919984155278273, "grad_norm": 1.655999273451648, "learning_rate": 1.4834662087072502e-05, "loss": 0.2872, "step": 4534 }, { "epoch": 0.3592790651614181, "grad_norm": 2.7594771713068877, "learning_rate": 1.4832415718626166e-05, "loss": 0.2787, "step": 4535 }, { "epoch": 0.3593582887700535, "grad_norm": 1.6256369045786114, "learning_rate": 1.4830169031988502e-05, "loss": 0.3501, "step": 4536 }, { "epoch": 0.3594375123786889, "grad_norm": 1.5312947221998576, "learning_rate": 1.482792202730745e-05, "loss": 0.3442, "step": 4537 }, { "epoch": 0.3595167359873242, "grad_norm": 1.5979025065222114, "learning_rate": 1.4825674704730966e-05, "loss": 0.34, "step": 4538 }, { "epoch": 0.3595959595959596, "grad_norm": 1.7289384151629619, "learning_rate": 1.4823427064407018e-05, "loss": 0.2654, "step": 4539 }, { "epoch": 0.35967518320459496, "grad_norm": 1.5554422933856926, "learning_rate": 1.4821179106483609e-05, "loss": 0.2729, "step": 4540 }, { "epoch": 0.35975440681323034, "grad_norm": 1.5807387121266765, "learning_rate": 1.4818930831108755e-05, "loss": 0.2966, "step": 4541 }, { "epoch": 0.3598336304218657, "grad_norm": 1.8050893381280435, "learning_rate": 1.481668223843049e-05, "loss": 0.4225, "step": 4542 }, { "epoch": 0.3599128540305011, "grad_norm": 1.6303951038746831, "learning_rate": 1.481443332859688e-05, "loss": 0.3015, "step": 4543 }, { "epoch": 0.3599920776391365, "grad_norm": 1.6421873815220196, "learning_rate": 1.4812184101755997e-05, "loss": 0.4425, "step": 4544 }, { "epoch": 0.3600713012477718, "grad_norm": 2.4365109990585463, "learning_rate": 1.480993455805595e-05, "loss": 0.4427, "step": 4545 }, { "epoch": 0.3601505248564072, "grad_norm": 1.510087439192847, "learning_rate": 1.480768469764485e-05, "loss": 0.4234, "step": 4546 }, { "epoch": 0.3602297484650426, "grad_norm": 1.7261180209256146, "learning_rate": 1.480543452067085e-05, "loss": 0.4369, "step": 4547 }, { "epoch": 0.36030897207367796, "grad_norm": 1.4108114173008615, "learning_rate": 1.480318402728211e-05, "loss": 0.2853, "step": 4548 }, { "epoch": 0.36038819568231334, "grad_norm": 1.868035887298413, "learning_rate": 1.480093321762681e-05, "loss": 0.4193, "step": 4549 }, { "epoch": 0.3604674192909487, "grad_norm": 1.436531796439108, "learning_rate": 1.4798682091853161e-05, "loss": 0.2232, "step": 4550 }, { "epoch": 0.3605466428995841, "grad_norm": 1.7665740118174211, "learning_rate": 1.4796430650109383e-05, "loss": 0.4041, "step": 4551 }, { "epoch": 0.36062586650821943, "grad_norm": 1.3592856372708426, "learning_rate": 1.4794178892543727e-05, "loss": 0.3048, "step": 4552 }, { "epoch": 0.3607050901168548, "grad_norm": 1.5242254138421896, "learning_rate": 1.4791926819304462e-05, "loss": 0.3198, "step": 4553 }, { "epoch": 0.3607843137254902, "grad_norm": 1.5391450520791332, "learning_rate": 1.4789674430539868e-05, "loss": 0.3438, "step": 4554 }, { "epoch": 0.3608635373341256, "grad_norm": 1.5332789880882494, "learning_rate": 1.4787421726398263e-05, "loss": 0.3789, "step": 4555 }, { "epoch": 0.36094276094276095, "grad_norm": 1.7049827322244504, "learning_rate": 1.4785168707027972e-05, "loss": 0.4588, "step": 4556 }, { "epoch": 0.36102198455139634, "grad_norm": 1.597298101899049, "learning_rate": 1.4782915372577347e-05, "loss": 0.3143, "step": 4557 }, { "epoch": 0.3611012081600317, "grad_norm": 1.5429539572518831, "learning_rate": 1.4780661723194757e-05, "loss": 0.3254, "step": 4558 }, { "epoch": 0.36118043176866704, "grad_norm": 1.608424375766527, "learning_rate": 1.4778407759028599e-05, "loss": 0.3012, "step": 4559 }, { "epoch": 0.3612596553773024, "grad_norm": 1.8023334335875967, "learning_rate": 1.4776153480227278e-05, "loss": 0.3551, "step": 4560 }, { "epoch": 0.3613388789859378, "grad_norm": 1.5202885322557214, "learning_rate": 1.4773898886939235e-05, "loss": 0.3201, "step": 4561 }, { "epoch": 0.3614181025945732, "grad_norm": 1.6416522809960745, "learning_rate": 1.4771643979312917e-05, "loss": 0.3522, "step": 4562 }, { "epoch": 0.36149732620320857, "grad_norm": 1.3573962324661244, "learning_rate": 1.4769388757496806e-05, "loss": 0.2134, "step": 4563 }, { "epoch": 0.36157654981184395, "grad_norm": 1.6109318342005996, "learning_rate": 1.4767133221639394e-05, "loss": 0.382, "step": 4564 }, { "epoch": 0.3616557734204793, "grad_norm": 1.8541905848657845, "learning_rate": 1.4764877371889194e-05, "loss": 0.3619, "step": 4565 }, { "epoch": 0.36173499702911466, "grad_norm": 1.3441381077715713, "learning_rate": 1.476262120839475e-05, "loss": 0.2837, "step": 4566 }, { "epoch": 0.36181422063775004, "grad_norm": 1.5064008339755857, "learning_rate": 1.4760364731304614e-05, "loss": 0.3249, "step": 4567 }, { "epoch": 0.3618934442463854, "grad_norm": 1.487066228871815, "learning_rate": 1.4758107940767368e-05, "loss": 0.3311, "step": 4568 }, { "epoch": 0.3619726678550208, "grad_norm": 1.4046031145447673, "learning_rate": 1.4755850836931607e-05, "loss": 0.3257, "step": 4569 }, { "epoch": 0.3620518914636562, "grad_norm": 1.4320568252244823, "learning_rate": 1.475359341994595e-05, "loss": 0.3715, "step": 4570 }, { "epoch": 0.36213111507229157, "grad_norm": 1.612723330184229, "learning_rate": 1.4751335689959044e-05, "loss": 0.309, "step": 4571 }, { "epoch": 0.3622103386809269, "grad_norm": 1.6610382710189933, "learning_rate": 1.4749077647119542e-05, "loss": 0.2608, "step": 4572 }, { "epoch": 0.3622895622895623, "grad_norm": 1.6224368272366407, "learning_rate": 1.474681929157613e-05, "loss": 0.4, "step": 4573 }, { "epoch": 0.36236878589819765, "grad_norm": 1.5121286737390047, "learning_rate": 1.4744560623477502e-05, "loss": 0.3288, "step": 4574 }, { "epoch": 0.36244800950683304, "grad_norm": 1.5583457044684619, "learning_rate": 1.4742301642972392e-05, "loss": 0.3732, "step": 4575 }, { "epoch": 0.3625272331154684, "grad_norm": 1.8003479962902789, "learning_rate": 1.4740042350209536e-05, "loss": 0.3285, "step": 4576 }, { "epoch": 0.3626064567241038, "grad_norm": 1.830572240757174, "learning_rate": 1.4737782745337696e-05, "loss": 0.4015, "step": 4577 }, { "epoch": 0.3626856803327392, "grad_norm": 1.7514932272298862, "learning_rate": 1.4735522828505663e-05, "loss": 0.3968, "step": 4578 }, { "epoch": 0.3627649039413745, "grad_norm": 1.5725393508885004, "learning_rate": 1.4733262599862234e-05, "loss": 0.3644, "step": 4579 }, { "epoch": 0.3628441275500099, "grad_norm": 1.3220174749720226, "learning_rate": 1.4731002059556242e-05, "loss": 0.2512, "step": 4580 }, { "epoch": 0.36292335115864527, "grad_norm": 1.4223548280156784, "learning_rate": 1.4728741207736525e-05, "loss": 0.4286, "step": 4581 }, { "epoch": 0.36300257476728065, "grad_norm": 1.3931753407842224, "learning_rate": 1.4726480044551953e-05, "loss": 0.2845, "step": 4582 }, { "epoch": 0.36308179837591603, "grad_norm": 1.706869850751769, "learning_rate": 1.4724218570151415e-05, "loss": 0.3968, "step": 4583 }, { "epoch": 0.3631610219845514, "grad_norm": 1.6241919060888654, "learning_rate": 1.4721956784683813e-05, "loss": 0.4633, "step": 4584 }, { "epoch": 0.3632402455931868, "grad_norm": 1.7289697936270059, "learning_rate": 1.4719694688298078e-05, "loss": 0.3716, "step": 4585 }, { "epoch": 0.3633194692018221, "grad_norm": 1.3912620664242132, "learning_rate": 1.4717432281143161e-05, "loss": 0.2715, "step": 4586 }, { "epoch": 0.3633986928104575, "grad_norm": 1.5715488781666351, "learning_rate": 1.4715169563368021e-05, "loss": 0.3328, "step": 4587 }, { "epoch": 0.3634779164190929, "grad_norm": 1.204138937434893, "learning_rate": 1.4712906535121658e-05, "loss": 0.2294, "step": 4588 }, { "epoch": 0.36355714002772826, "grad_norm": 1.6339354137993924, "learning_rate": 1.4710643196553074e-05, "loss": 0.4242, "step": 4589 }, { "epoch": 0.36363636363636365, "grad_norm": 1.451752472936718, "learning_rate": 1.4708379547811302e-05, "loss": 0.2134, "step": 4590 }, { "epoch": 0.36371558724499903, "grad_norm": 1.755564857595638, "learning_rate": 1.4706115589045396e-05, "loss": 0.3582, "step": 4591 }, { "epoch": 0.3637948108536344, "grad_norm": 1.9643347303696277, "learning_rate": 1.4703851320404416e-05, "loss": 0.4131, "step": 4592 }, { "epoch": 0.36387403446226974, "grad_norm": 1.5386793167121273, "learning_rate": 1.4701586742037464e-05, "loss": 0.2697, "step": 4593 }, { "epoch": 0.3639532580709051, "grad_norm": 1.6750472898086326, "learning_rate": 1.4699321854093649e-05, "loss": 0.3799, "step": 4594 }, { "epoch": 0.3640324816795405, "grad_norm": 1.5779240540787023, "learning_rate": 1.46970566567221e-05, "loss": 0.3196, "step": 4595 }, { "epoch": 0.3641117052881759, "grad_norm": 1.6008195093628788, "learning_rate": 1.469479115007197e-05, "loss": 0.3869, "step": 4596 }, { "epoch": 0.36419092889681126, "grad_norm": 1.8066764655593655, "learning_rate": 1.4692525334292434e-05, "loss": 0.237, "step": 4597 }, { "epoch": 0.36427015250544664, "grad_norm": 1.538849625563868, "learning_rate": 1.4690259209532682e-05, "loss": 0.2831, "step": 4598 }, { "epoch": 0.364349376114082, "grad_norm": 1.8674362509436677, "learning_rate": 1.468799277594193e-05, "loss": 0.3825, "step": 4599 }, { "epoch": 0.36442859972271735, "grad_norm": 1.683190879844929, "learning_rate": 1.4685726033669412e-05, "loss": 0.4067, "step": 4600 }, { "epoch": 0.36450782333135273, "grad_norm": 1.5257501778760814, "learning_rate": 1.468345898286438e-05, "loss": 0.2996, "step": 4601 }, { "epoch": 0.3645870469399881, "grad_norm": 1.3137061305129647, "learning_rate": 1.468119162367611e-05, "loss": 0.2582, "step": 4602 }, { "epoch": 0.3646662705486235, "grad_norm": 1.5493471197544215, "learning_rate": 1.4678923956253894e-05, "loss": 0.4196, "step": 4603 }, { "epoch": 0.3647454941572589, "grad_norm": 1.7518071942723026, "learning_rate": 1.4676655980747052e-05, "loss": 0.3467, "step": 4604 }, { "epoch": 0.36482471776589426, "grad_norm": 1.2114731631575513, "learning_rate": 1.4674387697304914e-05, "loss": 0.2344, "step": 4605 }, { "epoch": 0.3649039413745296, "grad_norm": 1.5387893475895256, "learning_rate": 1.4672119106076838e-05, "loss": 0.4891, "step": 4606 }, { "epoch": 0.36498316498316496, "grad_norm": 1.8681054215404638, "learning_rate": 1.4669850207212202e-05, "loss": 0.4069, "step": 4607 }, { "epoch": 0.36506238859180035, "grad_norm": 1.4750912297774346, "learning_rate": 1.4667581000860395e-05, "loss": 0.2821, "step": 4608 }, { "epoch": 0.36514161220043573, "grad_norm": 2.534997991900976, "learning_rate": 1.4665311487170844e-05, "loss": 0.6095, "step": 4609 }, { "epoch": 0.3652208358090711, "grad_norm": 1.6675856075933126, "learning_rate": 1.4663041666292978e-05, "loss": 0.4356, "step": 4610 }, { "epoch": 0.3653000594177065, "grad_norm": 1.6688530198532512, "learning_rate": 1.4660771538376253e-05, "loss": 0.3418, "step": 4611 }, { "epoch": 0.3653792830263419, "grad_norm": 1.270817909880478, "learning_rate": 1.4658501103570149e-05, "loss": 0.2908, "step": 4612 }, { "epoch": 0.3654585066349772, "grad_norm": 1.2741529689072915, "learning_rate": 1.4656230362024166e-05, "loss": 0.2625, "step": 4613 }, { "epoch": 0.3655377302436126, "grad_norm": 1.4525899294237308, "learning_rate": 1.4653959313887813e-05, "loss": 0.358, "step": 4614 }, { "epoch": 0.36561695385224796, "grad_norm": 1.3306823595862238, "learning_rate": 1.4651687959310636e-05, "loss": 0.2651, "step": 4615 }, { "epoch": 0.36569617746088334, "grad_norm": 1.5112091541997872, "learning_rate": 1.4649416298442187e-05, "loss": 0.3741, "step": 4616 }, { "epoch": 0.3657754010695187, "grad_norm": 1.6039927324986167, "learning_rate": 1.4647144331432049e-05, "loss": 0.3978, "step": 4617 }, { "epoch": 0.3658546246781541, "grad_norm": 1.3158075578605177, "learning_rate": 1.4644872058429816e-05, "loss": 0.2901, "step": 4618 }, { "epoch": 0.3659338482867895, "grad_norm": 1.5183594736862347, "learning_rate": 1.4642599479585106e-05, "loss": 0.3766, "step": 4619 }, { "epoch": 0.3660130718954248, "grad_norm": 1.7834692884590755, "learning_rate": 1.4640326595047561e-05, "loss": 0.3997, "step": 4620 }, { "epoch": 0.3660922955040602, "grad_norm": 1.6458240372043382, "learning_rate": 1.4638053404966836e-05, "loss": 0.3972, "step": 4621 }, { "epoch": 0.3661715191126956, "grad_norm": 1.5626009523690787, "learning_rate": 1.4635779909492614e-05, "loss": 0.3104, "step": 4622 }, { "epoch": 0.36625074272133096, "grad_norm": 1.605839133960405, "learning_rate": 1.4633506108774588e-05, "loss": 0.3325, "step": 4623 }, { "epoch": 0.36632996632996634, "grad_norm": 1.65605612094927, "learning_rate": 1.4631232002962481e-05, "loss": 0.3319, "step": 4624 }, { "epoch": 0.3664091899386017, "grad_norm": 1.194760596178059, "learning_rate": 1.462895759220603e-05, "loss": 0.2863, "step": 4625 }, { "epoch": 0.3664884135472371, "grad_norm": 1.4897267524134923, "learning_rate": 1.4626682876654998e-05, "loss": 0.3196, "step": 4626 }, { "epoch": 0.36656763715587243, "grad_norm": 1.6338069843979595, "learning_rate": 1.4624407856459154e-05, "loss": 0.4129, "step": 4627 }, { "epoch": 0.3666468607645078, "grad_norm": 1.658035888298133, "learning_rate": 1.4622132531768309e-05, "loss": 0.373, "step": 4628 }, { "epoch": 0.3667260843731432, "grad_norm": 1.4983164704356673, "learning_rate": 1.4619856902732279e-05, "loss": 0.3318, "step": 4629 }, { "epoch": 0.36680530798177857, "grad_norm": 1.4205575374264885, "learning_rate": 1.4617580969500895e-05, "loss": 0.2856, "step": 4630 }, { "epoch": 0.36688453159041395, "grad_norm": 1.39351778991826, "learning_rate": 1.461530473222403e-05, "loss": 0.2913, "step": 4631 }, { "epoch": 0.36696375519904934, "grad_norm": 1.3460895648737004, "learning_rate": 1.4613028191051548e-05, "loss": 0.2886, "step": 4632 }, { "epoch": 0.3670429788076847, "grad_norm": 1.743681810869301, "learning_rate": 1.4610751346133361e-05, "loss": 0.3681, "step": 4633 }, { "epoch": 0.36712220241632004, "grad_norm": 1.5960034110784955, "learning_rate": 1.4608474197619383e-05, "loss": 0.3737, "step": 4634 }, { "epoch": 0.3672014260249554, "grad_norm": 1.4543851667449015, "learning_rate": 1.4606196745659551e-05, "loss": 0.2573, "step": 4635 }, { "epoch": 0.3672806496335908, "grad_norm": 1.3399534873314962, "learning_rate": 1.460391899040383e-05, "loss": 0.326, "step": 4636 }, { "epoch": 0.3673598732422262, "grad_norm": 1.6446976758229053, "learning_rate": 1.4601640932002194e-05, "loss": 0.3493, "step": 4637 }, { "epoch": 0.36743909685086157, "grad_norm": 1.6301308603854625, "learning_rate": 1.4599362570604645e-05, "loss": 0.31, "step": 4638 }, { "epoch": 0.36751832045949695, "grad_norm": 1.3638954249354742, "learning_rate": 1.4597083906361203e-05, "loss": 0.4205, "step": 4639 }, { "epoch": 0.3675975440681323, "grad_norm": 1.7531860483348178, "learning_rate": 1.4594804939421903e-05, "loss": 0.3755, "step": 4640 }, { "epoch": 0.36767676767676766, "grad_norm": 1.4043519443126165, "learning_rate": 1.4592525669936808e-05, "loss": 0.2748, "step": 4641 }, { "epoch": 0.36775599128540304, "grad_norm": 1.8413621118434689, "learning_rate": 1.4590246098055995e-05, "loss": 0.454, "step": 4642 }, { "epoch": 0.3678352148940384, "grad_norm": 1.5235331398057443, "learning_rate": 1.4587966223929562e-05, "loss": 0.3505, "step": 4643 }, { "epoch": 0.3679144385026738, "grad_norm": 1.4567379310302107, "learning_rate": 1.458568604770763e-05, "loss": 0.3569, "step": 4644 }, { "epoch": 0.3679936621113092, "grad_norm": 1.3856834684257866, "learning_rate": 1.458340556954034e-05, "loss": 0.3214, "step": 4645 }, { "epoch": 0.36807288571994456, "grad_norm": 1.5632748588157244, "learning_rate": 1.4581124789577841e-05, "loss": 0.3491, "step": 4646 }, { "epoch": 0.3681521093285799, "grad_norm": 1.645248312939299, "learning_rate": 1.4578843707970323e-05, "loss": 0.341, "step": 4647 }, { "epoch": 0.36823133293721527, "grad_norm": 1.6531855869579501, "learning_rate": 1.4576562324867975e-05, "loss": 0.3198, "step": 4648 }, { "epoch": 0.36831055654585065, "grad_norm": 1.5837364881527218, "learning_rate": 1.457428064042102e-05, "loss": 0.3833, "step": 4649 }, { "epoch": 0.36838978015448604, "grad_norm": 1.1834622362998237, "learning_rate": 1.45719986547797e-05, "loss": 0.1984, "step": 4650 }, { "epoch": 0.3684690037631214, "grad_norm": 1.4053115244075074, "learning_rate": 1.4569716368094262e-05, "loss": 0.2948, "step": 4651 }, { "epoch": 0.3685482273717568, "grad_norm": 1.5462441867586394, "learning_rate": 1.456743378051499e-05, "loss": 0.3294, "step": 4652 }, { "epoch": 0.3686274509803922, "grad_norm": 1.3494062448999087, "learning_rate": 1.456515089219218e-05, "loss": 0.2445, "step": 4653 }, { "epoch": 0.3687066745890275, "grad_norm": 1.9853310086543616, "learning_rate": 1.456286770327615e-05, "loss": 0.4928, "step": 4654 }, { "epoch": 0.3687858981976629, "grad_norm": 1.4320607522285995, "learning_rate": 1.456058421391724e-05, "loss": 0.232, "step": 4655 }, { "epoch": 0.36886512180629827, "grad_norm": 1.598467427100357, "learning_rate": 1.45583004242658e-05, "loss": 0.3815, "step": 4656 }, { "epoch": 0.36894434541493365, "grad_norm": 1.6540532138961344, "learning_rate": 1.4556016334472211e-05, "loss": 0.3427, "step": 4657 }, { "epoch": 0.36902356902356903, "grad_norm": 1.342372202072488, "learning_rate": 1.455373194468687e-05, "loss": 0.2886, "step": 4658 }, { "epoch": 0.3691027926322044, "grad_norm": 1.5284179077281435, "learning_rate": 1.4551447255060192e-05, "loss": 0.3183, "step": 4659 }, { "epoch": 0.3691820162408398, "grad_norm": 1.1655615528525876, "learning_rate": 1.4549162265742608e-05, "loss": 0.2578, "step": 4660 }, { "epoch": 0.3692612398494751, "grad_norm": 1.6941544491756817, "learning_rate": 1.4546876976884583e-05, "loss": 0.3439, "step": 4661 }, { "epoch": 0.3693404634581105, "grad_norm": 1.4269738452637428, "learning_rate": 1.4544591388636584e-05, "loss": 0.3112, "step": 4662 }, { "epoch": 0.3694196870667459, "grad_norm": 1.103473132298058, "learning_rate": 1.454230550114911e-05, "loss": 0.1941, "step": 4663 }, { "epoch": 0.36949891067538126, "grad_norm": 1.6713762821841982, "learning_rate": 1.4540019314572678e-05, "loss": 0.3523, "step": 4664 }, { "epoch": 0.36957813428401665, "grad_norm": 1.3958377347130868, "learning_rate": 1.4537732829057816e-05, "loss": 0.3582, "step": 4665 }, { "epoch": 0.369657357892652, "grad_norm": 1.8921948303906428, "learning_rate": 1.4535446044755082e-05, "loss": 0.4173, "step": 4666 }, { "epoch": 0.3697365815012874, "grad_norm": 1.8616661308957294, "learning_rate": 1.4533158961815048e-05, "loss": 0.4406, "step": 4667 }, { "epoch": 0.36981580510992274, "grad_norm": 1.217528633217943, "learning_rate": 1.4530871580388311e-05, "loss": 0.1821, "step": 4668 }, { "epoch": 0.3698950287185581, "grad_norm": 1.5306705153935265, "learning_rate": 1.4528583900625481e-05, "loss": 0.3402, "step": 4669 }, { "epoch": 0.3699742523271935, "grad_norm": 1.414553172656487, "learning_rate": 1.4526295922677189e-05, "loss": 0.4246, "step": 4670 }, { "epoch": 0.3700534759358289, "grad_norm": 1.823181112464695, "learning_rate": 1.4524007646694091e-05, "loss": 0.4012, "step": 4671 }, { "epoch": 0.37013269954446426, "grad_norm": 1.3108502235097963, "learning_rate": 1.4521719072826858e-05, "loss": 0.2361, "step": 4672 }, { "epoch": 0.37021192315309964, "grad_norm": 1.1105335455527836, "learning_rate": 1.451943020122618e-05, "loss": 0.2221, "step": 4673 }, { "epoch": 0.370291146761735, "grad_norm": 1.3107517950227892, "learning_rate": 1.4517141032042773e-05, "loss": 0.2718, "step": 4674 }, { "epoch": 0.37037037037037035, "grad_norm": 1.3845994655311256, "learning_rate": 1.4514851565427362e-05, "loss": 0.3194, "step": 4675 }, { "epoch": 0.37044959397900573, "grad_norm": 1.5857491671616526, "learning_rate": 1.4512561801530699e-05, "loss": 0.311, "step": 4676 }, { "epoch": 0.3705288175876411, "grad_norm": 2.014009019609301, "learning_rate": 1.4510271740503555e-05, "loss": 0.3856, "step": 4677 }, { "epoch": 0.3706080411962765, "grad_norm": 1.124160729962944, "learning_rate": 1.4507981382496716e-05, "loss": 0.2459, "step": 4678 }, { "epoch": 0.3706872648049119, "grad_norm": 2.0185225727502383, "learning_rate": 1.4505690727660997e-05, "loss": 0.4145, "step": 4679 }, { "epoch": 0.37076648841354726, "grad_norm": 1.5753645357511594, "learning_rate": 1.4503399776147223e-05, "loss": 0.3028, "step": 4680 }, { "epoch": 0.3708457120221826, "grad_norm": 1.7773431005163407, "learning_rate": 1.4501108528106243e-05, "loss": 0.4036, "step": 4681 }, { "epoch": 0.37092493563081796, "grad_norm": 1.4956789205301773, "learning_rate": 1.4498816983688926e-05, "loss": 0.313, "step": 4682 }, { "epoch": 0.37100415923945335, "grad_norm": 1.5415317871335148, "learning_rate": 1.4496525143046154e-05, "loss": 0.3778, "step": 4683 }, { "epoch": 0.3710833828480887, "grad_norm": 1.4161775432365453, "learning_rate": 1.4494233006328837e-05, "loss": 0.3218, "step": 4684 }, { "epoch": 0.3711626064567241, "grad_norm": 1.4029137319291618, "learning_rate": 1.4491940573687906e-05, "loss": 0.2896, "step": 4685 }, { "epoch": 0.3712418300653595, "grad_norm": 1.433596580212024, "learning_rate": 1.44896478452743e-05, "loss": 0.2989, "step": 4686 }, { "epoch": 0.37132105367399487, "grad_norm": 1.5365685859727027, "learning_rate": 1.4487354821238983e-05, "loss": 0.3105, "step": 4687 }, { "epoch": 0.3714002772826302, "grad_norm": 1.4626767706974417, "learning_rate": 1.4485061501732949e-05, "loss": 0.315, "step": 4688 }, { "epoch": 0.3714795008912656, "grad_norm": 1.4747343396470216, "learning_rate": 1.448276788690719e-05, "loss": 0.3042, "step": 4689 }, { "epoch": 0.37155872449990096, "grad_norm": 1.3942598195028861, "learning_rate": 1.4480473976912737e-05, "loss": 0.2464, "step": 4690 }, { "epoch": 0.37163794810853634, "grad_norm": 1.5027014180904217, "learning_rate": 1.4478179771900634e-05, "loss": 0.3039, "step": 4691 }, { "epoch": 0.3717171717171717, "grad_norm": 1.6378230445114412, "learning_rate": 1.4475885272021936e-05, "loss": 0.392, "step": 4692 }, { "epoch": 0.3717963953258071, "grad_norm": 1.5117124897317267, "learning_rate": 1.4473590477427735e-05, "loss": 0.3651, "step": 4693 }, { "epoch": 0.3718756189344425, "grad_norm": 1.555778726648578, "learning_rate": 1.4471295388269121e-05, "loss": 0.368, "step": 4694 }, { "epoch": 0.3719548425430778, "grad_norm": 1.4679560826210025, "learning_rate": 1.4469000004697224e-05, "loss": 0.3114, "step": 4695 }, { "epoch": 0.3720340661517132, "grad_norm": 1.534323434907577, "learning_rate": 1.446670432686318e-05, "loss": 0.3793, "step": 4696 }, { "epoch": 0.3721132897603486, "grad_norm": 1.6423730313725133, "learning_rate": 1.4464408354918145e-05, "loss": 0.3179, "step": 4697 }, { "epoch": 0.37219251336898396, "grad_norm": 1.2273350592842331, "learning_rate": 1.4462112089013304e-05, "loss": 0.2809, "step": 4698 }, { "epoch": 0.37227173697761934, "grad_norm": 1.5561479541583605, "learning_rate": 1.4459815529299851e-05, "loss": 0.2943, "step": 4699 }, { "epoch": 0.3723509605862547, "grad_norm": 1.5081278645188412, "learning_rate": 1.4457518675929008e-05, "loss": 0.3366, "step": 4700 }, { "epoch": 0.3724301841948901, "grad_norm": 1.360163798910226, "learning_rate": 1.4455221529052006e-05, "loss": 0.2897, "step": 4701 }, { "epoch": 0.3725094078035254, "grad_norm": 1.58515219573682, "learning_rate": 1.4452924088820101e-05, "loss": 0.353, "step": 4702 }, { "epoch": 0.3725886314121608, "grad_norm": 1.4131259366145987, "learning_rate": 1.4450626355384573e-05, "loss": 0.3453, "step": 4703 }, { "epoch": 0.3726678550207962, "grad_norm": 1.559073530996899, "learning_rate": 1.4448328328896717e-05, "loss": 0.3109, "step": 4704 }, { "epoch": 0.37274707862943157, "grad_norm": 1.5864034495512063, "learning_rate": 1.444603000950784e-05, "loss": 0.2887, "step": 4705 }, { "epoch": 0.37282630223806695, "grad_norm": 1.4532642364597348, "learning_rate": 1.4443731397369283e-05, "loss": 0.3134, "step": 4706 }, { "epoch": 0.37290552584670233, "grad_norm": 1.6722820926762596, "learning_rate": 1.4441432492632395e-05, "loss": 0.4005, "step": 4707 }, { "epoch": 0.3729847494553377, "grad_norm": 1.7373699492904362, "learning_rate": 1.4439133295448547e-05, "loss": 0.3444, "step": 4708 }, { "epoch": 0.37306397306397304, "grad_norm": 1.2511997154092365, "learning_rate": 1.4436833805969133e-05, "loss": 0.2703, "step": 4709 }, { "epoch": 0.3731431966726084, "grad_norm": 1.146360282693015, "learning_rate": 1.4434534024345558e-05, "loss": 0.2558, "step": 4710 }, { "epoch": 0.3732224202812438, "grad_norm": 1.4858804313854288, "learning_rate": 1.4432233950729257e-05, "loss": 0.2908, "step": 4711 }, { "epoch": 0.3733016438898792, "grad_norm": 1.4921898449790627, "learning_rate": 1.442993358527168e-05, "loss": 0.3013, "step": 4712 }, { "epoch": 0.37338086749851457, "grad_norm": 1.4069100772099172, "learning_rate": 1.4427632928124288e-05, "loss": 0.2674, "step": 4713 }, { "epoch": 0.37346009110714995, "grad_norm": 1.3734893985782217, "learning_rate": 1.4425331979438573e-05, "loss": 0.2721, "step": 4714 }, { "epoch": 0.37353931471578533, "grad_norm": 1.1963372111462343, "learning_rate": 1.4423030739366042e-05, "loss": 0.2386, "step": 4715 }, { "epoch": 0.37361853832442066, "grad_norm": 1.5444197340935044, "learning_rate": 1.4420729208058217e-05, "loss": 0.3274, "step": 4716 }, { "epoch": 0.37369776193305604, "grad_norm": 1.863440753569575, "learning_rate": 1.4418427385666647e-05, "loss": 0.4475, "step": 4717 }, { "epoch": 0.3737769855416914, "grad_norm": 1.5226158303161417, "learning_rate": 1.4416125272342891e-05, "loss": 0.3624, "step": 4718 }, { "epoch": 0.3738562091503268, "grad_norm": 1.465057170296472, "learning_rate": 1.4413822868238537e-05, "loss": 0.3059, "step": 4719 }, { "epoch": 0.3739354327589622, "grad_norm": 1.7880199929989178, "learning_rate": 1.4411520173505184e-05, "loss": 0.3379, "step": 4720 }, { "epoch": 0.37401465636759756, "grad_norm": 1.5360629781643058, "learning_rate": 1.4409217188294456e-05, "loss": 0.2871, "step": 4721 }, { "epoch": 0.3740938799762329, "grad_norm": 1.564431604919678, "learning_rate": 1.440691391275799e-05, "loss": 0.3338, "step": 4722 }, { "epoch": 0.37417310358486827, "grad_norm": 1.588692736369417, "learning_rate": 1.440461034704745e-05, "loss": 0.3241, "step": 4723 }, { "epoch": 0.37425232719350365, "grad_norm": 1.700732090206522, "learning_rate": 1.4402306491314508e-05, "loss": 0.4238, "step": 4724 }, { "epoch": 0.37433155080213903, "grad_norm": 1.332147796700369, "learning_rate": 1.4400002345710871e-05, "loss": 0.2356, "step": 4725 }, { "epoch": 0.3744107744107744, "grad_norm": 1.3139373866413449, "learning_rate": 1.4397697910388248e-05, "loss": 0.2486, "step": 4726 }, { "epoch": 0.3744899980194098, "grad_norm": 1.6136158357733488, "learning_rate": 1.4395393185498381e-05, "loss": 0.3633, "step": 4727 }, { "epoch": 0.3745692216280452, "grad_norm": 1.740817657205945, "learning_rate": 1.4393088171193021e-05, "loss": 0.4123, "step": 4728 }, { "epoch": 0.3746484452366805, "grad_norm": 1.7611602299747777, "learning_rate": 1.439078286762394e-05, "loss": 0.3676, "step": 4729 }, { "epoch": 0.3747276688453159, "grad_norm": 1.752622200562547, "learning_rate": 1.4388477274942936e-05, "loss": 0.3968, "step": 4730 }, { "epoch": 0.37480689245395127, "grad_norm": 1.7450319718219336, "learning_rate": 1.438617139330182e-05, "loss": 0.3634, "step": 4731 }, { "epoch": 0.37488611606258665, "grad_norm": 1.6150278691106186, "learning_rate": 1.4383865222852423e-05, "loss": 0.4263, "step": 4732 }, { "epoch": 0.37496533967122203, "grad_norm": 1.4561309174099997, "learning_rate": 1.4381558763746593e-05, "loss": 0.3018, "step": 4733 }, { "epoch": 0.3750445632798574, "grad_norm": 1.5183279123368738, "learning_rate": 1.4379252016136203e-05, "loss": 0.3077, "step": 4734 }, { "epoch": 0.3751237868884928, "grad_norm": 1.693966480917254, "learning_rate": 1.4376944980173138e-05, "loss": 0.3354, "step": 4735 }, { "epoch": 0.3752030104971281, "grad_norm": 1.3470739014671065, "learning_rate": 1.4374637656009309e-05, "loss": 0.3069, "step": 4736 }, { "epoch": 0.3752822341057635, "grad_norm": 1.4664709557930569, "learning_rate": 1.4372330043796636e-05, "loss": 0.292, "step": 4737 }, { "epoch": 0.3753614577143989, "grad_norm": 1.6288033815681167, "learning_rate": 1.437002214368707e-05, "loss": 0.4213, "step": 4738 }, { "epoch": 0.37544068132303426, "grad_norm": 1.6169427484861356, "learning_rate": 1.4367713955832575e-05, "loss": 0.3659, "step": 4739 }, { "epoch": 0.37551990493166965, "grad_norm": 1.4286273559504765, "learning_rate": 1.4365405480385129e-05, "loss": 0.2908, "step": 4740 }, { "epoch": 0.375599128540305, "grad_norm": 1.6600317436593115, "learning_rate": 1.4363096717496738e-05, "loss": 0.3743, "step": 4741 }, { "epoch": 0.3756783521489404, "grad_norm": 1.5638075988765359, "learning_rate": 1.4360787667319423e-05, "loss": 0.322, "step": 4742 }, { "epoch": 0.37575757575757573, "grad_norm": 1.4730870053945573, "learning_rate": 1.4358478330005222e-05, "loss": 0.3008, "step": 4743 }, { "epoch": 0.3758367993662111, "grad_norm": 1.6658587344631532, "learning_rate": 1.4356168705706195e-05, "loss": 0.4229, "step": 4744 }, { "epoch": 0.3759160229748465, "grad_norm": 1.1808276796449346, "learning_rate": 1.4353858794574418e-05, "loss": 0.2283, "step": 4745 }, { "epoch": 0.3759952465834819, "grad_norm": 1.4503386906435112, "learning_rate": 1.435154859676199e-05, "loss": 0.218, "step": 4746 }, { "epoch": 0.37607447019211726, "grad_norm": 1.6026729841908476, "learning_rate": 1.4349238112421025e-05, "loss": 0.3528, "step": 4747 }, { "epoch": 0.37615369380075264, "grad_norm": 1.5594781413395669, "learning_rate": 1.4346927341703659e-05, "loss": 0.2479, "step": 4748 }, { "epoch": 0.376232917409388, "grad_norm": 1.7522496954621372, "learning_rate": 1.4344616284762038e-05, "loss": 0.305, "step": 4749 }, { "epoch": 0.37631214101802335, "grad_norm": 1.5081692518287162, "learning_rate": 1.4342304941748347e-05, "loss": 0.3058, "step": 4750 }, { "epoch": 0.37639136462665873, "grad_norm": 1.3274860445854602, "learning_rate": 1.4339993312814765e-05, "loss": 0.2843, "step": 4751 }, { "epoch": 0.3764705882352941, "grad_norm": 1.7588223459918972, "learning_rate": 1.4337681398113508e-05, "loss": 0.3943, "step": 4752 }, { "epoch": 0.3765498118439295, "grad_norm": 1.539578007692588, "learning_rate": 1.4335369197796803e-05, "loss": 0.3581, "step": 4753 }, { "epoch": 0.3766290354525649, "grad_norm": 1.4499875464029557, "learning_rate": 1.4333056712016893e-05, "loss": 0.3362, "step": 4754 }, { "epoch": 0.37670825906120026, "grad_norm": 1.506353803068302, "learning_rate": 1.4330743940926052e-05, "loss": 0.4008, "step": 4755 }, { "epoch": 0.37678748266983564, "grad_norm": 1.5167911728871935, "learning_rate": 1.4328430884676559e-05, "loss": 0.4025, "step": 4756 }, { "epoch": 0.37686670627847096, "grad_norm": 1.5081114452206938, "learning_rate": 1.432611754342072e-05, "loss": 0.2339, "step": 4757 }, { "epoch": 0.37694592988710635, "grad_norm": 1.6514876173222937, "learning_rate": 1.4323803917310857e-05, "loss": 0.3226, "step": 4758 }, { "epoch": 0.3770251534957417, "grad_norm": 1.3272690395731235, "learning_rate": 1.4321490006499309e-05, "loss": 0.3652, "step": 4759 }, { "epoch": 0.3771043771043771, "grad_norm": 1.728946837203164, "learning_rate": 1.4319175811138439e-05, "loss": 0.4434, "step": 4760 }, { "epoch": 0.3771836007130125, "grad_norm": 1.2655002648705844, "learning_rate": 1.4316861331380624e-05, "loss": 0.2848, "step": 4761 }, { "epoch": 0.37726282432164787, "grad_norm": 1.5918721024132505, "learning_rate": 1.431454656737826e-05, "loss": 0.297, "step": 4762 }, { "epoch": 0.3773420479302832, "grad_norm": 1.6932390692880819, "learning_rate": 1.4312231519283768e-05, "loss": 0.4457, "step": 4763 }, { "epoch": 0.3774212715389186, "grad_norm": 1.55078416855098, "learning_rate": 1.4309916187249578e-05, "loss": 0.3203, "step": 4764 }, { "epoch": 0.37750049514755396, "grad_norm": 1.4521369653671037, "learning_rate": 1.4307600571428143e-05, "loss": 0.2671, "step": 4765 }, { "epoch": 0.37757971875618934, "grad_norm": 1.825500781525087, "learning_rate": 1.4305284671971943e-05, "loss": 0.3236, "step": 4766 }, { "epoch": 0.3776589423648247, "grad_norm": 1.5303295024891377, "learning_rate": 1.4302968489033462e-05, "loss": 0.3466, "step": 4767 }, { "epoch": 0.3777381659734601, "grad_norm": 1.2917772443555833, "learning_rate": 1.4300652022765207e-05, "loss": 0.2203, "step": 4768 }, { "epoch": 0.3778173895820955, "grad_norm": 1.3576850745825737, "learning_rate": 1.429833527331971e-05, "loss": 0.2941, "step": 4769 }, { "epoch": 0.3778966131907308, "grad_norm": 1.4598007330713265, "learning_rate": 1.4296018240849518e-05, "loss": 0.2954, "step": 4770 }, { "epoch": 0.3779758367993662, "grad_norm": 1.7004278676561306, "learning_rate": 1.4293700925507199e-05, "loss": 0.4102, "step": 4771 }, { "epoch": 0.3780550604080016, "grad_norm": 1.3716560256662682, "learning_rate": 1.429138332744533e-05, "loss": 0.2995, "step": 4772 }, { "epoch": 0.37813428401663696, "grad_norm": 1.8978984802341372, "learning_rate": 1.428906544681652e-05, "loss": 0.5102, "step": 4773 }, { "epoch": 0.37821350762527234, "grad_norm": 1.480435219751354, "learning_rate": 1.4286747283773388e-05, "loss": 0.3326, "step": 4774 }, { "epoch": 0.3782927312339077, "grad_norm": 1.6170666697721474, "learning_rate": 1.4284428838468572e-05, "loss": 0.4041, "step": 4775 }, { "epoch": 0.3783719548425431, "grad_norm": 1.5603998015216134, "learning_rate": 1.4282110111054733e-05, "loss": 0.3073, "step": 4776 }, { "epoch": 0.3784511784511784, "grad_norm": 1.2834935868281077, "learning_rate": 1.4279791101684547e-05, "loss": 0.3091, "step": 4777 }, { "epoch": 0.3785304020598138, "grad_norm": 1.664788512525511, "learning_rate": 1.427747181051071e-05, "loss": 0.3906, "step": 4778 }, { "epoch": 0.3786096256684492, "grad_norm": 1.388924260721447, "learning_rate": 1.4275152237685938e-05, "loss": 0.2765, "step": 4779 }, { "epoch": 0.37868884927708457, "grad_norm": 1.504079229141585, "learning_rate": 1.4272832383362962e-05, "loss": 0.3215, "step": 4780 }, { "epoch": 0.37876807288571995, "grad_norm": 1.7578236816960764, "learning_rate": 1.427051224769453e-05, "loss": 0.293, "step": 4781 }, { "epoch": 0.37884729649435533, "grad_norm": 1.6795768761384273, "learning_rate": 1.4268191830833417e-05, "loss": 0.3963, "step": 4782 }, { "epoch": 0.3789265201029907, "grad_norm": 1.394179790188331, "learning_rate": 1.426587113293241e-05, "loss": 0.305, "step": 4783 }, { "epoch": 0.37900574371162604, "grad_norm": 1.532906346205471, "learning_rate": 1.4263550154144313e-05, "loss": 0.3036, "step": 4784 }, { "epoch": 0.3790849673202614, "grad_norm": 1.2060321000261924, "learning_rate": 1.4261228894621955e-05, "loss": 0.2612, "step": 4785 }, { "epoch": 0.3791641909288968, "grad_norm": 1.2643894269638989, "learning_rate": 1.4258907354518177e-05, "loss": 0.3271, "step": 4786 }, { "epoch": 0.3792434145375322, "grad_norm": 1.4460847572072517, "learning_rate": 1.4256585533985842e-05, "loss": 0.2806, "step": 4787 }, { "epoch": 0.37932263814616757, "grad_norm": 1.433822254279183, "learning_rate": 1.425426343317783e-05, "loss": 0.2994, "step": 4788 }, { "epoch": 0.37940186175480295, "grad_norm": 1.5820851739982629, "learning_rate": 1.4251941052247044e-05, "loss": 0.3346, "step": 4789 }, { "epoch": 0.37948108536343833, "grad_norm": 1.2061245598050252, "learning_rate": 1.4249618391346399e-05, "loss": 0.2543, "step": 4790 }, { "epoch": 0.37956030897207366, "grad_norm": 1.4766982300934812, "learning_rate": 1.4247295450628826e-05, "loss": 0.3901, "step": 4791 }, { "epoch": 0.37963953258070904, "grad_norm": 1.2927766879477545, "learning_rate": 1.4244972230247287e-05, "loss": 0.2867, "step": 4792 }, { "epoch": 0.3797187561893444, "grad_norm": 1.5140968593992927, "learning_rate": 1.4242648730354756e-05, "loss": 0.2814, "step": 4793 }, { "epoch": 0.3797979797979798, "grad_norm": 2.0780975926321226, "learning_rate": 1.4240324951104213e-05, "loss": 0.3576, "step": 4794 }, { "epoch": 0.3798772034066152, "grad_norm": 1.4711231963119753, "learning_rate": 1.4238000892648682e-05, "loss": 0.375, "step": 4795 }, { "epoch": 0.37995642701525056, "grad_norm": 1.402121695072646, "learning_rate": 1.423567655514118e-05, "loss": 0.332, "step": 4796 }, { "epoch": 0.38003565062388595, "grad_norm": 1.5551963104852167, "learning_rate": 1.4233351938734758e-05, "loss": 0.3524, "step": 4797 }, { "epoch": 0.38011487423252127, "grad_norm": 1.7685458571080634, "learning_rate": 1.4231027043582483e-05, "loss": 0.3524, "step": 4798 }, { "epoch": 0.38019409784115665, "grad_norm": 1.4910700471238236, "learning_rate": 1.4228701869837433e-05, "loss": 0.2447, "step": 4799 }, { "epoch": 0.38027332144979203, "grad_norm": 1.3840322448951945, "learning_rate": 1.4226376417652713e-05, "loss": 0.2976, "step": 4800 }, { "epoch": 0.3803525450584274, "grad_norm": 1.992655736226578, "learning_rate": 1.4224050687181442e-05, "loss": 0.514, "step": 4801 }, { "epoch": 0.3804317686670628, "grad_norm": 1.3722806032954438, "learning_rate": 1.4221724678576756e-05, "loss": 0.32, "step": 4802 }, { "epoch": 0.3805109922756982, "grad_norm": 1.5161612358815824, "learning_rate": 1.421939839199182e-05, "loss": 0.3037, "step": 4803 }, { "epoch": 0.3805902158843335, "grad_norm": 1.722906661712149, "learning_rate": 1.4217071827579796e-05, "loss": 0.4407, "step": 4804 }, { "epoch": 0.3806694394929689, "grad_norm": 1.4325706364116861, "learning_rate": 1.4214744985493884e-05, "loss": 0.3028, "step": 4805 }, { "epoch": 0.38074866310160427, "grad_norm": 1.4871762269384778, "learning_rate": 1.4212417865887299e-05, "loss": 0.3223, "step": 4806 }, { "epoch": 0.38082788671023965, "grad_norm": 1.5846116636037701, "learning_rate": 1.4210090468913263e-05, "loss": 0.3463, "step": 4807 }, { "epoch": 0.38090711031887503, "grad_norm": 1.4911394889462763, "learning_rate": 1.4207762794725026e-05, "loss": 0.2443, "step": 4808 }, { "epoch": 0.3809863339275104, "grad_norm": 1.71450505054649, "learning_rate": 1.4205434843475859e-05, "loss": 0.4016, "step": 4809 }, { "epoch": 0.3810655575361458, "grad_norm": 1.2468708300234312, "learning_rate": 1.420310661531904e-05, "loss": 0.3156, "step": 4810 }, { "epoch": 0.3811447811447811, "grad_norm": 1.4510412626556506, "learning_rate": 1.4200778110407873e-05, "loss": 0.3015, "step": 4811 }, { "epoch": 0.3812240047534165, "grad_norm": 1.2815638980910853, "learning_rate": 1.4198449328895685e-05, "loss": 0.2661, "step": 4812 }, { "epoch": 0.3813032283620519, "grad_norm": 1.3894997499992157, "learning_rate": 1.4196120270935807e-05, "loss": 0.3037, "step": 4813 }, { "epoch": 0.38138245197068726, "grad_norm": 1.9720604861663078, "learning_rate": 1.4193790936681602e-05, "loss": 0.4662, "step": 4814 }, { "epoch": 0.38146167557932265, "grad_norm": 1.3074617738028005, "learning_rate": 1.4191461326286442e-05, "loss": 0.2884, "step": 4815 }, { "epoch": 0.381540899187958, "grad_norm": 1.6905048906543017, "learning_rate": 1.4189131439903721e-05, "loss": 0.454, "step": 4816 }, { "epoch": 0.3816201227965934, "grad_norm": 1.5671984538408312, "learning_rate": 1.4186801277686852e-05, "loss": 0.3987, "step": 4817 }, { "epoch": 0.38169934640522873, "grad_norm": 1.2480114164130005, "learning_rate": 1.4184470839789265e-05, "loss": 0.1935, "step": 4818 }, { "epoch": 0.3817785700138641, "grad_norm": 1.5023859298809623, "learning_rate": 1.4182140126364404e-05, "loss": 0.2729, "step": 4819 }, { "epoch": 0.3818577936224995, "grad_norm": 1.456270837781397, "learning_rate": 1.4179809137565742e-05, "loss": 0.333, "step": 4820 }, { "epoch": 0.3819370172311349, "grad_norm": 1.4132678249526858, "learning_rate": 1.417747787354676e-05, "loss": 0.3856, "step": 4821 }, { "epoch": 0.38201624083977026, "grad_norm": 1.8741654642532457, "learning_rate": 1.4175146334460963e-05, "loss": 0.3839, "step": 4822 }, { "epoch": 0.38209546444840564, "grad_norm": 1.437968325797009, "learning_rate": 1.4172814520461867e-05, "loss": 0.3176, "step": 4823 }, { "epoch": 0.382174688057041, "grad_norm": 1.5045774004637167, "learning_rate": 1.4170482431703012e-05, "loss": 0.3417, "step": 4824 }, { "epoch": 0.38225391166567635, "grad_norm": 1.6932079063719387, "learning_rate": 1.4168150068337958e-05, "loss": 0.2693, "step": 4825 }, { "epoch": 0.38233313527431173, "grad_norm": 1.5961538588669137, "learning_rate": 1.4165817430520276e-05, "loss": 0.3366, "step": 4826 }, { "epoch": 0.3824123588829471, "grad_norm": 1.4955891414265299, "learning_rate": 1.4163484518403561e-05, "loss": 0.3621, "step": 4827 }, { "epoch": 0.3824915824915825, "grad_norm": 1.8994905647227531, "learning_rate": 1.4161151332141426e-05, "loss": 0.339, "step": 4828 }, { "epoch": 0.3825708061002179, "grad_norm": 1.4277600710447218, "learning_rate": 1.4158817871887497e-05, "loss": 0.3345, "step": 4829 }, { "epoch": 0.38265002970885326, "grad_norm": 1.1820947784878053, "learning_rate": 1.4156484137795424e-05, "loss": 0.2685, "step": 4830 }, { "epoch": 0.38272925331748864, "grad_norm": 1.464863891146817, "learning_rate": 1.4154150130018867e-05, "loss": 0.3885, "step": 4831 }, { "epoch": 0.38280847692612396, "grad_norm": 1.6709840314896864, "learning_rate": 1.4151815848711512e-05, "loss": 0.3355, "step": 4832 }, { "epoch": 0.38288770053475935, "grad_norm": 1.4371088335493813, "learning_rate": 1.4149481294027063e-05, "loss": 0.3113, "step": 4833 }, { "epoch": 0.3829669241433947, "grad_norm": 1.4629809768142543, "learning_rate": 1.4147146466119235e-05, "loss": 0.3118, "step": 4834 }, { "epoch": 0.3830461477520301, "grad_norm": 1.2887370470691075, "learning_rate": 1.4144811365141769e-05, "loss": 0.2397, "step": 4835 }, { "epoch": 0.3831253713606655, "grad_norm": 1.526638232785953, "learning_rate": 1.4142475991248417e-05, "loss": 0.3646, "step": 4836 }, { "epoch": 0.38320459496930087, "grad_norm": 1.8151728744650315, "learning_rate": 1.4140140344592952e-05, "loss": 0.4331, "step": 4837 }, { "epoch": 0.3832838185779362, "grad_norm": 1.5869284743567247, "learning_rate": 1.413780442532917e-05, "loss": 0.3556, "step": 4838 }, { "epoch": 0.3833630421865716, "grad_norm": 1.3999090768521412, "learning_rate": 1.4135468233610872e-05, "loss": 0.3369, "step": 4839 }, { "epoch": 0.38344226579520696, "grad_norm": 1.534627600910808, "learning_rate": 1.4133131769591893e-05, "loss": 0.2688, "step": 4840 }, { "epoch": 0.38352148940384234, "grad_norm": 1.3799826397724837, "learning_rate": 1.4130795033426073e-05, "loss": 0.2866, "step": 4841 }, { "epoch": 0.3836007130124777, "grad_norm": 1.5145454712738629, "learning_rate": 1.4128458025267276e-05, "loss": 0.3713, "step": 4842 }, { "epoch": 0.3836799366211131, "grad_norm": 1.4029868938789205, "learning_rate": 1.4126120745269382e-05, "loss": 0.3325, "step": 4843 }, { "epoch": 0.3837591602297485, "grad_norm": 1.3859297427363313, "learning_rate": 1.4123783193586294e-05, "loss": 0.3033, "step": 4844 }, { "epoch": 0.3838383838383838, "grad_norm": 1.4367793385527374, "learning_rate": 1.4121445370371922e-05, "loss": 0.2948, "step": 4845 }, { "epoch": 0.3839176074470192, "grad_norm": 1.3009985454714486, "learning_rate": 1.4119107275780203e-05, "loss": 0.2924, "step": 4846 }, { "epoch": 0.3839968310556546, "grad_norm": 1.3214844254389435, "learning_rate": 1.4116768909965092e-05, "loss": 0.2372, "step": 4847 }, { "epoch": 0.38407605466428996, "grad_norm": 1.2892725320025957, "learning_rate": 1.4114430273080558e-05, "loss": 0.2241, "step": 4848 }, { "epoch": 0.38415527827292534, "grad_norm": 1.4742226328357293, "learning_rate": 1.4112091365280585e-05, "loss": 0.384, "step": 4849 }, { "epoch": 0.3842345018815607, "grad_norm": 1.5086970159345738, "learning_rate": 1.4109752186719181e-05, "loss": 0.3531, "step": 4850 }, { "epoch": 0.3843137254901961, "grad_norm": 1.3932404244457717, "learning_rate": 1.4107412737550372e-05, "loss": 0.3144, "step": 4851 }, { "epoch": 0.3843929490988314, "grad_norm": 1.6274932328954217, "learning_rate": 1.4105073017928199e-05, "loss": 0.343, "step": 4852 }, { "epoch": 0.3844721727074668, "grad_norm": 1.5895181306760369, "learning_rate": 1.4102733028006719e-05, "loss": 0.3992, "step": 4853 }, { "epoch": 0.3845513963161022, "grad_norm": 1.4167651297118389, "learning_rate": 1.410039276794001e-05, "loss": 0.2739, "step": 4854 }, { "epoch": 0.38463061992473757, "grad_norm": 1.3318687998724372, "learning_rate": 1.4098052237882168e-05, "loss": 0.2687, "step": 4855 }, { "epoch": 0.38470984353337295, "grad_norm": 1.4652998253881695, "learning_rate": 1.4095711437987303e-05, "loss": 0.3214, "step": 4856 }, { "epoch": 0.38478906714200833, "grad_norm": 1.3185510641697167, "learning_rate": 1.4093370368409546e-05, "loss": 0.216, "step": 4857 }, { "epoch": 0.3848682907506437, "grad_norm": 1.7416634357536502, "learning_rate": 1.409102902930305e-05, "loss": 0.2907, "step": 4858 }, { "epoch": 0.38494751435927904, "grad_norm": 1.6139151540670138, "learning_rate": 1.4088687420821974e-05, "loss": 0.3849, "step": 4859 }, { "epoch": 0.3850267379679144, "grad_norm": 1.59243920936772, "learning_rate": 1.4086345543120508e-05, "loss": 0.3523, "step": 4860 }, { "epoch": 0.3851059615765498, "grad_norm": 1.7621266320877087, "learning_rate": 1.4084003396352848e-05, "loss": 0.3887, "step": 4861 }, { "epoch": 0.3851851851851852, "grad_norm": 1.4568306356231104, "learning_rate": 1.4081660980673215e-05, "loss": 0.3004, "step": 4862 }, { "epoch": 0.38526440879382057, "grad_norm": 1.499868370735634, "learning_rate": 1.4079318296235846e-05, "loss": 0.263, "step": 4863 }, { "epoch": 0.38534363240245595, "grad_norm": 1.389251130343504, "learning_rate": 1.4076975343194996e-05, "loss": 0.2393, "step": 4864 }, { "epoch": 0.38542285601109133, "grad_norm": 1.8785391613569955, "learning_rate": 1.4074632121704941e-05, "loss": 0.4887, "step": 4865 }, { "epoch": 0.38550207961972666, "grad_norm": 1.7127361770676095, "learning_rate": 1.4072288631919962e-05, "loss": 0.3281, "step": 4866 }, { "epoch": 0.38558130322836204, "grad_norm": 1.4570751925136936, "learning_rate": 1.406994487399437e-05, "loss": 0.3089, "step": 4867 }, { "epoch": 0.3856605268369974, "grad_norm": 1.4250013415436769, "learning_rate": 1.4067600848082496e-05, "loss": 0.2918, "step": 4868 }, { "epoch": 0.3857397504456328, "grad_norm": 1.3092557186054714, "learning_rate": 1.4065256554338675e-05, "loss": 0.2441, "step": 4869 }, { "epoch": 0.3858189740542682, "grad_norm": 1.6026662951991348, "learning_rate": 1.406291199291727e-05, "loss": 0.3461, "step": 4870 }, { "epoch": 0.38589819766290356, "grad_norm": 1.7070252621318853, "learning_rate": 1.4060567163972663e-05, "loss": 0.4226, "step": 4871 }, { "epoch": 0.38597742127153895, "grad_norm": 1.392679573232567, "learning_rate": 1.4058222067659244e-05, "loss": 0.3104, "step": 4872 }, { "epoch": 0.38605664488017427, "grad_norm": 1.3057904428747868, "learning_rate": 1.405587670413143e-05, "loss": 0.2831, "step": 4873 }, { "epoch": 0.38613586848880965, "grad_norm": 1.693204687443313, "learning_rate": 1.405353107354365e-05, "loss": 0.3544, "step": 4874 }, { "epoch": 0.38621509209744503, "grad_norm": 1.6760805100532747, "learning_rate": 1.4051185176050353e-05, "loss": 0.3324, "step": 4875 }, { "epoch": 0.3862943157060804, "grad_norm": 1.453011845161088, "learning_rate": 1.4048839011806006e-05, "loss": 0.2512, "step": 4876 }, { "epoch": 0.3863735393147158, "grad_norm": 1.5671310369401856, "learning_rate": 1.404649258096509e-05, "loss": 0.4056, "step": 4877 }, { "epoch": 0.3864527629233512, "grad_norm": 1.4378890463135665, "learning_rate": 1.4044145883682108e-05, "loss": 0.2719, "step": 4878 }, { "epoch": 0.3865319865319865, "grad_norm": 1.656400400040357, "learning_rate": 1.4041798920111582e-05, "loss": 0.2845, "step": 4879 }, { "epoch": 0.3866112101406219, "grad_norm": 1.4339987754579901, "learning_rate": 1.4039451690408042e-05, "loss": 0.3309, "step": 4880 }, { "epoch": 0.38669043374925727, "grad_norm": 1.3487398994118345, "learning_rate": 1.4037104194726048e-05, "loss": 0.269, "step": 4881 }, { "epoch": 0.38676965735789265, "grad_norm": 1.6421955042997727, "learning_rate": 1.4034756433220164e-05, "loss": 0.3485, "step": 4882 }, { "epoch": 0.38684888096652803, "grad_norm": 1.4465869016485433, "learning_rate": 1.4032408406044986e-05, "loss": 0.2558, "step": 4883 }, { "epoch": 0.3869281045751634, "grad_norm": 1.3221438980799047, "learning_rate": 1.4030060113355118e-05, "loss": 0.2674, "step": 4884 }, { "epoch": 0.3870073281837988, "grad_norm": 1.4552553725512243, "learning_rate": 1.402771155530518e-05, "loss": 0.3098, "step": 4885 }, { "epoch": 0.3870865517924341, "grad_norm": 1.7244149169211545, "learning_rate": 1.4025362732049816e-05, "loss": 0.324, "step": 4886 }, { "epoch": 0.3871657754010695, "grad_norm": 1.6021578566570707, "learning_rate": 1.4023013643743688e-05, "loss": 0.301, "step": 4887 }, { "epoch": 0.3872449990097049, "grad_norm": 1.245091050052912, "learning_rate": 1.4020664290541465e-05, "loss": 0.223, "step": 4888 }, { "epoch": 0.38732422261834026, "grad_norm": 1.5142043254638706, "learning_rate": 1.4018314672597848e-05, "loss": 0.3521, "step": 4889 }, { "epoch": 0.38740344622697565, "grad_norm": 1.605573363303424, "learning_rate": 1.4015964790067545e-05, "loss": 0.3046, "step": 4890 }, { "epoch": 0.387482669835611, "grad_norm": 1.3940049026532149, "learning_rate": 1.401361464310528e-05, "loss": 0.2442, "step": 4891 }, { "epoch": 0.3875618934442464, "grad_norm": 1.8692209407690585, "learning_rate": 1.4011264231865807e-05, "loss": 0.3858, "step": 4892 }, { "epoch": 0.38764111705288173, "grad_norm": 1.5836848128189307, "learning_rate": 1.4008913556503885e-05, "loss": 0.3213, "step": 4893 }, { "epoch": 0.3877203406615171, "grad_norm": 1.5546143594560407, "learning_rate": 1.4006562617174292e-05, "loss": 0.2907, "step": 4894 }, { "epoch": 0.3877995642701525, "grad_norm": 1.3973791623022782, "learning_rate": 1.4004211414031831e-05, "loss": 0.2564, "step": 4895 }, { "epoch": 0.3878787878787879, "grad_norm": 1.4495212236430446, "learning_rate": 1.4001859947231316e-05, "loss": 0.2934, "step": 4896 }, { "epoch": 0.38795801148742326, "grad_norm": 1.550855352746961, "learning_rate": 1.3999508216927578e-05, "loss": 0.2918, "step": 4897 }, { "epoch": 0.38803723509605864, "grad_norm": 1.6357448051825703, "learning_rate": 1.399715622327547e-05, "loss": 0.3758, "step": 4898 }, { "epoch": 0.388116458704694, "grad_norm": 1.4102534696996243, "learning_rate": 1.3994803966429854e-05, "loss": 0.3107, "step": 4899 }, { "epoch": 0.38819568231332935, "grad_norm": 1.4405333401650178, "learning_rate": 1.3992451446545624e-05, "loss": 0.2864, "step": 4900 }, { "epoch": 0.38827490592196473, "grad_norm": 1.3815074972136017, "learning_rate": 1.3990098663777674e-05, "loss": 0.2869, "step": 4901 }, { "epoch": 0.3883541295306001, "grad_norm": 1.2807015578899121, "learning_rate": 1.3987745618280925e-05, "loss": 0.2803, "step": 4902 }, { "epoch": 0.3884333531392355, "grad_norm": 1.7737699226786035, "learning_rate": 1.3985392310210318e-05, "loss": 0.3655, "step": 4903 }, { "epoch": 0.3885125767478709, "grad_norm": 1.5189345534784644, "learning_rate": 1.39830387397208e-05, "loss": 0.3009, "step": 4904 }, { "epoch": 0.38859180035650626, "grad_norm": 1.4081276248404047, "learning_rate": 1.3980684906967348e-05, "loss": 0.2972, "step": 4905 }, { "epoch": 0.38867102396514164, "grad_norm": 1.5252911547919124, "learning_rate": 1.3978330812104947e-05, "loss": 0.4237, "step": 4906 }, { "epoch": 0.38875024757377696, "grad_norm": 1.857577057367717, "learning_rate": 1.3975976455288607e-05, "loss": 0.4442, "step": 4907 }, { "epoch": 0.38882947118241235, "grad_norm": 1.5517775074502338, "learning_rate": 1.397362183667335e-05, "loss": 0.299, "step": 4908 }, { "epoch": 0.3889086947910477, "grad_norm": 1.7384555905688281, "learning_rate": 1.3971266956414211e-05, "loss": 0.3328, "step": 4909 }, { "epoch": 0.3889879183996831, "grad_norm": 1.5151401049783881, "learning_rate": 1.3968911814666252e-05, "loss": 0.2712, "step": 4910 }, { "epoch": 0.3890671420083185, "grad_norm": 1.4591876739845957, "learning_rate": 1.3966556411584548e-05, "loss": 0.2688, "step": 4911 }, { "epoch": 0.38914636561695387, "grad_norm": 1.197145569434945, "learning_rate": 1.396420074732419e-05, "loss": 0.2645, "step": 4912 }, { "epoch": 0.38922558922558925, "grad_norm": 1.4483910374396045, "learning_rate": 1.396184482204029e-05, "loss": 0.3459, "step": 4913 }, { "epoch": 0.3893048128342246, "grad_norm": 1.6798923908578112, "learning_rate": 1.3959488635887967e-05, "loss": 0.3377, "step": 4914 }, { "epoch": 0.38938403644285996, "grad_norm": 1.8372114205527845, "learning_rate": 1.3957132189022373e-05, "loss": 0.3953, "step": 4915 }, { "epoch": 0.38946326005149534, "grad_norm": 1.5875159009324193, "learning_rate": 1.3954775481598665e-05, "loss": 0.3627, "step": 4916 }, { "epoch": 0.3895424836601307, "grad_norm": 1.536919695369658, "learning_rate": 1.3952418513772016e-05, "loss": 0.3839, "step": 4917 }, { "epoch": 0.3896217072687661, "grad_norm": 1.5192670346215063, "learning_rate": 1.3950061285697629e-05, "loss": 0.3168, "step": 4918 }, { "epoch": 0.3897009308774015, "grad_norm": 1.3322279353201638, "learning_rate": 1.3947703797530716e-05, "loss": 0.2695, "step": 4919 }, { "epoch": 0.3897801544860368, "grad_norm": 1.900708452974259, "learning_rate": 1.3945346049426498e-05, "loss": 0.4402, "step": 4920 }, { "epoch": 0.3898593780946722, "grad_norm": 1.3076043739971344, "learning_rate": 1.3942988041540226e-05, "loss": 0.2402, "step": 4921 }, { "epoch": 0.3899386017033076, "grad_norm": 1.4605614608888793, "learning_rate": 1.394062977402717e-05, "loss": 0.2432, "step": 4922 }, { "epoch": 0.39001782531194296, "grad_norm": 1.498653246339802, "learning_rate": 1.3938271247042601e-05, "loss": 0.3179, "step": 4923 }, { "epoch": 0.39009704892057834, "grad_norm": 1.3603336566482187, "learning_rate": 1.3935912460741818e-05, "loss": 0.2831, "step": 4924 }, { "epoch": 0.3901762725292137, "grad_norm": 1.595824176692529, "learning_rate": 1.3933553415280142e-05, "loss": 0.3657, "step": 4925 }, { "epoch": 0.3902554961378491, "grad_norm": 1.5565791931987947, "learning_rate": 1.3931194110812896e-05, "loss": 0.4068, "step": 4926 }, { "epoch": 0.3903347197464844, "grad_norm": 1.5866391152435144, "learning_rate": 1.3928834547495438e-05, "loss": 0.3923, "step": 4927 }, { "epoch": 0.3904139433551198, "grad_norm": 1.2514747270586963, "learning_rate": 1.3926474725483125e-05, "loss": 0.3238, "step": 4928 }, { "epoch": 0.3904931669637552, "grad_norm": 1.2428996148590872, "learning_rate": 1.3924114644931346e-05, "loss": 0.2322, "step": 4929 }, { "epoch": 0.39057239057239057, "grad_norm": 1.5089946511845411, "learning_rate": 1.3921754305995501e-05, "loss": 0.3293, "step": 4930 }, { "epoch": 0.39065161418102595, "grad_norm": 1.4372606111853505, "learning_rate": 1.3919393708831004e-05, "loss": 0.3526, "step": 4931 }, { "epoch": 0.39073083778966133, "grad_norm": 2.350554482680579, "learning_rate": 1.3917032853593289e-05, "loss": 0.4767, "step": 4932 }, { "epoch": 0.3908100613982967, "grad_norm": 1.386996836373447, "learning_rate": 1.3914671740437811e-05, "loss": 0.2811, "step": 4933 }, { "epoch": 0.39088928500693204, "grad_norm": 1.480487325372816, "learning_rate": 1.3912310369520032e-05, "loss": 0.3345, "step": 4934 }, { "epoch": 0.3909685086155674, "grad_norm": 1.774852860448413, "learning_rate": 1.3909948740995442e-05, "loss": 0.3905, "step": 4935 }, { "epoch": 0.3910477322242028, "grad_norm": 1.5142521263545279, "learning_rate": 1.3907586855019538e-05, "loss": 0.4321, "step": 4936 }, { "epoch": 0.3911269558328382, "grad_norm": 1.5950034703760658, "learning_rate": 1.3905224711747844e-05, "loss": 0.3508, "step": 4937 }, { "epoch": 0.39120617944147357, "grad_norm": 1.3624766687345164, "learning_rate": 1.3902862311335896e-05, "loss": 0.3258, "step": 4938 }, { "epoch": 0.39128540305010895, "grad_norm": 1.1130591754136652, "learning_rate": 1.390049965393924e-05, "loss": 0.2118, "step": 4939 }, { "epoch": 0.39136462665874433, "grad_norm": 1.3571908795904069, "learning_rate": 1.3898136739713451e-05, "loss": 0.3062, "step": 4940 }, { "epoch": 0.39144385026737966, "grad_norm": 1.1739721243409775, "learning_rate": 1.3895773568814118e-05, "loss": 0.2056, "step": 4941 }, { "epoch": 0.39152307387601504, "grad_norm": 1.5190804103607454, "learning_rate": 1.3893410141396835e-05, "loss": 0.3981, "step": 4942 }, { "epoch": 0.3916022974846504, "grad_norm": 1.8777037940433605, "learning_rate": 1.3891046457617233e-05, "loss": 0.3201, "step": 4943 }, { "epoch": 0.3916815210932858, "grad_norm": 1.457636938366171, "learning_rate": 1.388868251763094e-05, "loss": 0.3054, "step": 4944 }, { "epoch": 0.3917607447019212, "grad_norm": 1.4995620941905698, "learning_rate": 1.3886318321593614e-05, "loss": 0.3182, "step": 4945 }, { "epoch": 0.39183996831055656, "grad_norm": 1.5102016269779819, "learning_rate": 1.388395386966093e-05, "loss": 0.3344, "step": 4946 }, { "epoch": 0.39191919191919194, "grad_norm": 1.619362491291635, "learning_rate": 1.388158916198857e-05, "loss": 0.3207, "step": 4947 }, { "epoch": 0.39199841552782727, "grad_norm": 1.6366579897083362, "learning_rate": 1.3879224198732239e-05, "loss": 0.2877, "step": 4948 }, { "epoch": 0.39207763913646265, "grad_norm": 1.3143741865211294, "learning_rate": 1.3876858980047665e-05, "loss": 0.2965, "step": 4949 }, { "epoch": 0.39215686274509803, "grad_norm": 1.307834048146454, "learning_rate": 1.3874493506090578e-05, "loss": 0.2229, "step": 4950 }, { "epoch": 0.3922360863537334, "grad_norm": 1.3970734522709591, "learning_rate": 1.3872127777016739e-05, "loss": 0.3171, "step": 4951 }, { "epoch": 0.3923153099623688, "grad_norm": 1.523061243712294, "learning_rate": 1.3869761792981915e-05, "loss": 0.2829, "step": 4952 }, { "epoch": 0.3923945335710042, "grad_norm": 1.72991733701946, "learning_rate": 1.3867395554141899e-05, "loss": 0.4397, "step": 4953 }, { "epoch": 0.39247375717963956, "grad_norm": 1.5156389702467246, "learning_rate": 1.3865029060652493e-05, "loss": 0.2811, "step": 4954 }, { "epoch": 0.3925529807882749, "grad_norm": 1.544392007745256, "learning_rate": 1.3862662312669518e-05, "loss": 0.2781, "step": 4955 }, { "epoch": 0.39263220439691027, "grad_norm": 1.1763181963539993, "learning_rate": 1.386029531034882e-05, "loss": 0.1858, "step": 4956 }, { "epoch": 0.39271142800554565, "grad_norm": 1.436715364023107, "learning_rate": 1.385792805384625e-05, "loss": 0.2848, "step": 4957 }, { "epoch": 0.39279065161418103, "grad_norm": 1.6864050374375252, "learning_rate": 1.3855560543317679e-05, "loss": 0.431, "step": 4958 }, { "epoch": 0.3928698752228164, "grad_norm": 1.2407580960938767, "learning_rate": 1.3853192778919e-05, "loss": 0.1618, "step": 4959 }, { "epoch": 0.3929490988314518, "grad_norm": 1.5653234500224156, "learning_rate": 1.3850824760806115e-05, "loss": 0.391, "step": 4960 }, { "epoch": 0.3930283224400871, "grad_norm": 2.001064179629869, "learning_rate": 1.384845648913495e-05, "loss": 0.4012, "step": 4961 }, { "epoch": 0.3931075460487225, "grad_norm": 1.475863294319895, "learning_rate": 1.3846087964061442e-05, "loss": 0.3193, "step": 4962 }, { "epoch": 0.3931867696573579, "grad_norm": 1.4870112306973544, "learning_rate": 1.3843719185741548e-05, "loss": 0.3913, "step": 4963 }, { "epoch": 0.39326599326599326, "grad_norm": 1.7489630039622186, "learning_rate": 1.3841350154331239e-05, "loss": 0.379, "step": 4964 }, { "epoch": 0.39334521687462864, "grad_norm": 1.285459337194145, "learning_rate": 1.383898086998651e-05, "loss": 0.2464, "step": 4965 }, { "epoch": 0.393424440483264, "grad_norm": 1.3767443739787066, "learning_rate": 1.3836611332863356e-05, "loss": 0.2907, "step": 4966 }, { "epoch": 0.3935036640918994, "grad_norm": 1.6696218537345195, "learning_rate": 1.383424154311781e-05, "loss": 0.2769, "step": 4967 }, { "epoch": 0.39358288770053473, "grad_norm": 1.5706534290720284, "learning_rate": 1.383187150090591e-05, "loss": 0.3987, "step": 4968 }, { "epoch": 0.3936621113091701, "grad_norm": 1.6106067471922265, "learning_rate": 1.3829501206383704e-05, "loss": 0.3045, "step": 4969 }, { "epoch": 0.3937413349178055, "grad_norm": 1.4170378107587347, "learning_rate": 1.3827130659707275e-05, "loss": 0.2872, "step": 4970 }, { "epoch": 0.3938205585264409, "grad_norm": 1.7369049510017955, "learning_rate": 1.3824759861032704e-05, "loss": 0.4081, "step": 4971 }, { "epoch": 0.39389978213507626, "grad_norm": 1.5627150123986702, "learning_rate": 1.38223888105161e-05, "loss": 0.2873, "step": 4972 }, { "epoch": 0.39397900574371164, "grad_norm": 1.5990469821045645, "learning_rate": 1.3820017508313587e-05, "loss": 0.285, "step": 4973 }, { "epoch": 0.394058229352347, "grad_norm": 1.574830293032543, "learning_rate": 1.3817645954581301e-05, "loss": 0.287, "step": 4974 }, { "epoch": 0.39413745296098235, "grad_norm": 1.922110188389245, "learning_rate": 1.3815274149475395e-05, "loss": 0.404, "step": 4975 }, { "epoch": 0.39421667656961773, "grad_norm": 1.3959745443810705, "learning_rate": 1.3812902093152047e-05, "loss": 0.3102, "step": 4976 }, { "epoch": 0.3942959001782531, "grad_norm": 1.3148567603923778, "learning_rate": 1.3810529785767444e-05, "loss": 0.2902, "step": 4977 }, { "epoch": 0.3943751237868885, "grad_norm": 1.562263005375727, "learning_rate": 1.3808157227477788e-05, "loss": 0.3135, "step": 4978 }, { "epoch": 0.3944543473955239, "grad_norm": 1.445019603491408, "learning_rate": 1.3805784418439303e-05, "loss": 0.359, "step": 4979 }, { "epoch": 0.39453357100415926, "grad_norm": 1.4375606960140948, "learning_rate": 1.3803411358808222e-05, "loss": 0.3641, "step": 4980 }, { "epoch": 0.39461279461279464, "grad_norm": 1.699124449073503, "learning_rate": 1.3801038048740811e-05, "loss": 0.3997, "step": 4981 }, { "epoch": 0.39469201822142996, "grad_norm": 1.5823737147711643, "learning_rate": 1.379866448839333e-05, "loss": 0.3482, "step": 4982 }, { "epoch": 0.39477124183006534, "grad_norm": 1.3914423040345811, "learning_rate": 1.379629067792207e-05, "loss": 0.2589, "step": 4983 }, { "epoch": 0.3948504654387007, "grad_norm": 1.7521132149510126, "learning_rate": 1.3793916617483338e-05, "loss": 0.3454, "step": 4984 }, { "epoch": 0.3949296890473361, "grad_norm": 1.5304703868662894, "learning_rate": 1.379154230723345e-05, "loss": 0.3699, "step": 4985 }, { "epoch": 0.3950089126559715, "grad_norm": 1.4582817106605426, "learning_rate": 1.3789167747328746e-05, "loss": 0.3478, "step": 4986 }, { "epoch": 0.39508813626460687, "grad_norm": 1.6094975130019122, "learning_rate": 1.3786792937925576e-05, "loss": 0.2726, "step": 4987 }, { "epoch": 0.39516735987324225, "grad_norm": 1.4944047635188087, "learning_rate": 1.3784417879180314e-05, "loss": 0.3977, "step": 4988 }, { "epoch": 0.3952465834818776, "grad_norm": 1.171463862044395, "learning_rate": 1.3782042571249343e-05, "loss": 0.245, "step": 4989 }, { "epoch": 0.39532580709051296, "grad_norm": 1.4369194403357708, "learning_rate": 1.3779667014289067e-05, "loss": 0.341, "step": 4990 }, { "epoch": 0.39540503069914834, "grad_norm": 1.867628309453861, "learning_rate": 1.3777291208455902e-05, "loss": 0.3443, "step": 4991 }, { "epoch": 0.3954842543077837, "grad_norm": 1.4653879965908314, "learning_rate": 1.3774915153906292e-05, "loss": 0.3819, "step": 4992 }, { "epoch": 0.3955634779164191, "grad_norm": 1.7858215676104223, "learning_rate": 1.377253885079668e-05, "loss": 0.3648, "step": 4993 }, { "epoch": 0.3956427015250545, "grad_norm": 1.5388153043970205, "learning_rate": 1.3770162299283535e-05, "loss": 0.344, "step": 4994 }, { "epoch": 0.39572192513368987, "grad_norm": 1.3894138430179441, "learning_rate": 1.3767785499523347e-05, "loss": 0.3939, "step": 4995 }, { "epoch": 0.3958011487423252, "grad_norm": 1.29479156077149, "learning_rate": 1.376540845167261e-05, "loss": 0.3124, "step": 4996 }, { "epoch": 0.3958803723509606, "grad_norm": 1.2249471037141981, "learning_rate": 1.3763031155887847e-05, "loss": 0.2343, "step": 4997 }, { "epoch": 0.39595959595959596, "grad_norm": 1.2296091385926569, "learning_rate": 1.3760653612325588e-05, "loss": 0.1757, "step": 4998 }, { "epoch": 0.39603881956823134, "grad_norm": 1.7019888682035174, "learning_rate": 1.3758275821142382e-05, "loss": 0.4034, "step": 4999 }, { "epoch": 0.3961180431768667, "grad_norm": 1.5817909189265358, "learning_rate": 1.3755897782494803e-05, "loss": 0.2973, "step": 5000 }, { "epoch": 0.3961972667855021, "grad_norm": 1.464106757563948, "learning_rate": 1.375351949653942e-05, "loss": 0.3074, "step": 5001 }, { "epoch": 0.3962764903941374, "grad_norm": 1.5710960764287611, "learning_rate": 1.375114096343284e-05, "loss": 0.3292, "step": 5002 }, { "epoch": 0.3963557140027728, "grad_norm": 1.3406466771326344, "learning_rate": 1.3748762183331681e-05, "loss": 0.2521, "step": 5003 }, { "epoch": 0.3964349376114082, "grad_norm": 1.4091705003260702, "learning_rate": 1.3746383156392566e-05, "loss": 0.3176, "step": 5004 }, { "epoch": 0.39651416122004357, "grad_norm": 1.7205745709410996, "learning_rate": 1.374400388277215e-05, "loss": 0.2981, "step": 5005 }, { "epoch": 0.39659338482867895, "grad_norm": 1.3665911662947505, "learning_rate": 1.3741624362627091e-05, "loss": 0.2548, "step": 5006 }, { "epoch": 0.39667260843731433, "grad_norm": 1.1696985243092033, "learning_rate": 1.373924459611407e-05, "loss": 0.2767, "step": 5007 }, { "epoch": 0.3967518320459497, "grad_norm": 1.492781908167231, "learning_rate": 1.3736864583389789e-05, "loss": 0.3531, "step": 5008 }, { "epoch": 0.39683105565458504, "grad_norm": 1.7228978934711823, "learning_rate": 1.373448432461095e-05, "loss": 0.3194, "step": 5009 }, { "epoch": 0.3969102792632204, "grad_norm": 1.4416531593543607, "learning_rate": 1.373210381993429e-05, "loss": 0.3286, "step": 5010 }, { "epoch": 0.3969895028718558, "grad_norm": 1.2285054962792505, "learning_rate": 1.3729723069516554e-05, "loss": 0.2804, "step": 5011 }, { "epoch": 0.3970687264804912, "grad_norm": 1.1976794926937506, "learning_rate": 1.3727342073514497e-05, "loss": 0.2344, "step": 5012 }, { "epoch": 0.39714795008912657, "grad_norm": 1.5596682285242176, "learning_rate": 1.3724960832084902e-05, "loss": 0.3788, "step": 5013 }, { "epoch": 0.39722717369776195, "grad_norm": 1.4818707493373284, "learning_rate": 1.3722579345384558e-05, "loss": 0.2911, "step": 5014 }, { "epoch": 0.39730639730639733, "grad_norm": 1.4985081443216186, "learning_rate": 1.3720197613570272e-05, "loss": 0.3241, "step": 5015 }, { "epoch": 0.39738562091503266, "grad_norm": 1.495735575665788, "learning_rate": 1.3717815636798879e-05, "loss": 0.2311, "step": 5016 }, { "epoch": 0.39746484452366804, "grad_norm": 1.3790531256193288, "learning_rate": 1.3715433415227212e-05, "loss": 0.299, "step": 5017 }, { "epoch": 0.3975440681323034, "grad_norm": 1.6006274992818033, "learning_rate": 1.3713050949012134e-05, "loss": 0.2669, "step": 5018 }, { "epoch": 0.3976232917409388, "grad_norm": 1.5820256933793853, "learning_rate": 1.3710668238310519e-05, "loss": 0.3796, "step": 5019 }, { "epoch": 0.3977025153495742, "grad_norm": 1.7423856715804857, "learning_rate": 1.3708285283279252e-05, "loss": 0.3166, "step": 5020 }, { "epoch": 0.39778173895820956, "grad_norm": 1.7575159874833988, "learning_rate": 1.3705902084075244e-05, "loss": 0.3899, "step": 5021 }, { "epoch": 0.39786096256684494, "grad_norm": 2.0061270150199446, "learning_rate": 1.3703518640855414e-05, "loss": 0.428, "step": 5022 }, { "epoch": 0.39794018617548027, "grad_norm": 1.3674633621831715, "learning_rate": 1.37011349537767e-05, "loss": 0.3994, "step": 5023 }, { "epoch": 0.39801940978411565, "grad_norm": 1.6041104223436808, "learning_rate": 1.3698751022996061e-05, "loss": 0.378, "step": 5024 }, { "epoch": 0.39809863339275103, "grad_norm": 1.6551307112366629, "learning_rate": 1.3696366848670464e-05, "loss": 0.3998, "step": 5025 }, { "epoch": 0.3981778570013864, "grad_norm": 1.412588586855754, "learning_rate": 1.3693982430956896e-05, "loss": 0.2962, "step": 5026 }, { "epoch": 0.3982570806100218, "grad_norm": 1.330918437743754, "learning_rate": 1.369159777001236e-05, "loss": 0.2318, "step": 5027 }, { "epoch": 0.3983363042186572, "grad_norm": 1.4553267716806277, "learning_rate": 1.368921286599387e-05, "loss": 0.3721, "step": 5028 }, { "epoch": 0.39841552782729256, "grad_norm": 1.6678420955887867, "learning_rate": 1.368682771905847e-05, "loss": 0.3242, "step": 5029 }, { "epoch": 0.3984947514359279, "grad_norm": 1.4436114898942265, "learning_rate": 1.3684442329363199e-05, "loss": 0.3532, "step": 5030 }, { "epoch": 0.39857397504456327, "grad_norm": 1.485021287915516, "learning_rate": 1.368205669706513e-05, "loss": 0.3453, "step": 5031 }, { "epoch": 0.39865319865319865, "grad_norm": 1.7388753727727932, "learning_rate": 1.3679670822321347e-05, "loss": 0.4663, "step": 5032 }, { "epoch": 0.39873242226183403, "grad_norm": 1.3623754471291856, "learning_rate": 1.3677284705288943e-05, "loss": 0.3294, "step": 5033 }, { "epoch": 0.3988116458704694, "grad_norm": 1.5637861204893544, "learning_rate": 1.3674898346125036e-05, "loss": 0.292, "step": 5034 }, { "epoch": 0.3988908694791048, "grad_norm": 1.8638612822671077, "learning_rate": 1.3672511744986756e-05, "loss": 0.4539, "step": 5035 }, { "epoch": 0.3989700930877402, "grad_norm": 1.5244759697815742, "learning_rate": 1.3670124902031248e-05, "loss": 0.3595, "step": 5036 }, { "epoch": 0.3990493166963755, "grad_norm": 1.2151913711526459, "learning_rate": 1.3667737817415679e-05, "loss": 0.2267, "step": 5037 }, { "epoch": 0.3991285403050109, "grad_norm": 1.2503137345771052, "learning_rate": 1.3665350491297215e-05, "loss": 0.2677, "step": 5038 }, { "epoch": 0.39920776391364626, "grad_norm": 1.3803298061296474, "learning_rate": 1.3662962923833063e-05, "loss": 0.3013, "step": 5039 }, { "epoch": 0.39928698752228164, "grad_norm": 1.5910524223780635, "learning_rate": 1.3660575115180427e-05, "loss": 0.336, "step": 5040 }, { "epoch": 0.399366211130917, "grad_norm": 1.332126057527755, "learning_rate": 1.3658187065496533e-05, "loss": 0.3224, "step": 5041 }, { "epoch": 0.3994454347395524, "grad_norm": 1.3842123919676765, "learning_rate": 1.365579877493862e-05, "loss": 0.3962, "step": 5042 }, { "epoch": 0.39952465834818773, "grad_norm": 1.1278405464818457, "learning_rate": 1.3653410243663953e-05, "loss": 0.2196, "step": 5043 }, { "epoch": 0.3996038819568231, "grad_norm": 2.004902343504536, "learning_rate": 1.3651021471829797e-05, "loss": 0.365, "step": 5044 }, { "epoch": 0.3996831055654585, "grad_norm": 1.3966098423418447, "learning_rate": 1.3648632459593444e-05, "loss": 0.4062, "step": 5045 }, { "epoch": 0.3997623291740939, "grad_norm": 1.4737089172015694, "learning_rate": 1.3646243207112204e-05, "loss": 0.336, "step": 5046 }, { "epoch": 0.39984155278272926, "grad_norm": 1.325129287968291, "learning_rate": 1.3643853714543389e-05, "loss": 0.2506, "step": 5047 }, { "epoch": 0.39992077639136464, "grad_norm": 1.4796352957074, "learning_rate": 1.3641463982044343e-05, "loss": 0.3205, "step": 5048 }, { "epoch": 0.4, "grad_norm": 1.2825025572461901, "learning_rate": 1.3639074009772412e-05, "loss": 0.3262, "step": 5049 }, { "epoch": 0.40007922360863535, "grad_norm": 1.5490918508835396, "learning_rate": 1.3636683797884971e-05, "loss": 0.297, "step": 5050 }, { "epoch": 0.40015844721727073, "grad_norm": 1.5654343330054767, "learning_rate": 1.36342933465394e-05, "loss": 0.3065, "step": 5051 }, { "epoch": 0.4002376708259061, "grad_norm": 1.6738514056112284, "learning_rate": 1.3631902655893096e-05, "loss": 0.3508, "step": 5052 }, { "epoch": 0.4003168944345415, "grad_norm": 1.6050114072452457, "learning_rate": 1.3629511726103482e-05, "loss": 0.4524, "step": 5053 }, { "epoch": 0.4003961180431769, "grad_norm": 1.5027399407005393, "learning_rate": 1.3627120557327982e-05, "loss": 0.3557, "step": 5054 }, { "epoch": 0.40047534165181226, "grad_norm": 1.690092647585083, "learning_rate": 1.3624729149724047e-05, "loss": 0.4404, "step": 5055 }, { "epoch": 0.40055456526044764, "grad_norm": 1.4935298015860123, "learning_rate": 1.362233750344914e-05, "loss": 0.2976, "step": 5056 }, { "epoch": 0.40063378886908296, "grad_norm": 1.4219310251649206, "learning_rate": 1.3619945618660735e-05, "loss": 0.2338, "step": 5057 }, { "epoch": 0.40071301247771834, "grad_norm": 1.4265664738658819, "learning_rate": 1.3617553495516332e-05, "loss": 0.3311, "step": 5058 }, { "epoch": 0.4007922360863537, "grad_norm": 1.5142283373178527, "learning_rate": 1.3615161134173435e-05, "loss": 0.2608, "step": 5059 }, { "epoch": 0.4008714596949891, "grad_norm": 1.395747327231729, "learning_rate": 1.3612768534789573e-05, "loss": 0.3121, "step": 5060 }, { "epoch": 0.4009506833036245, "grad_norm": 1.569119538801247, "learning_rate": 1.3610375697522287e-05, "loss": 0.2611, "step": 5061 }, { "epoch": 0.40102990691225987, "grad_norm": 1.5475282295528205, "learning_rate": 1.3607982622529135e-05, "loss": 0.3369, "step": 5062 }, { "epoch": 0.40110913052089525, "grad_norm": 1.3391420756406291, "learning_rate": 1.3605589309967686e-05, "loss": 0.2263, "step": 5063 }, { "epoch": 0.4011883541295306, "grad_norm": 1.616637222299692, "learning_rate": 1.3603195759995531e-05, "loss": 0.2968, "step": 5064 }, { "epoch": 0.40126757773816596, "grad_norm": 1.310848605966884, "learning_rate": 1.3600801972770272e-05, "loss": 0.2965, "step": 5065 }, { "epoch": 0.40134680134680134, "grad_norm": 1.317397509847661, "learning_rate": 1.3598407948449528e-05, "loss": 0.2648, "step": 5066 }, { "epoch": 0.4014260249554367, "grad_norm": 1.8128827078877061, "learning_rate": 1.3596013687190936e-05, "loss": 0.3586, "step": 5067 }, { "epoch": 0.4015052485640721, "grad_norm": 1.5154107649780881, "learning_rate": 1.3593619189152146e-05, "loss": 0.2863, "step": 5068 }, { "epoch": 0.4015844721727075, "grad_norm": 2.2402790821417855, "learning_rate": 1.3591224454490824e-05, "loss": 0.5488, "step": 5069 }, { "epoch": 0.40166369578134287, "grad_norm": 1.3407774317326246, "learning_rate": 1.3588829483364652e-05, "loss": 0.2998, "step": 5070 }, { "epoch": 0.4017429193899782, "grad_norm": 1.543450009768594, "learning_rate": 1.3586434275931324e-05, "loss": 0.3311, "step": 5071 }, { "epoch": 0.4018221429986136, "grad_norm": 1.1472374992200711, "learning_rate": 1.358403883234856e-05, "loss": 0.2274, "step": 5072 }, { "epoch": 0.40190136660724896, "grad_norm": 1.4854762026695074, "learning_rate": 1.358164315277408e-05, "loss": 0.352, "step": 5073 }, { "epoch": 0.40198059021588434, "grad_norm": 1.7722309855181337, "learning_rate": 1.3579247237365634e-05, "loss": 0.346, "step": 5074 }, { "epoch": 0.4020598138245197, "grad_norm": 1.6302962553982612, "learning_rate": 1.357685108628098e-05, "loss": 0.2874, "step": 5075 }, { "epoch": 0.4021390374331551, "grad_norm": 1.4946590974993992, "learning_rate": 1.3574454699677893e-05, "loss": 0.3132, "step": 5076 }, { "epoch": 0.4022182610417904, "grad_norm": 1.4606986865603948, "learning_rate": 1.357205807771416e-05, "loss": 0.3539, "step": 5077 }, { "epoch": 0.4022974846504258, "grad_norm": 1.3887744348364097, "learning_rate": 1.3569661220547596e-05, "loss": 0.2618, "step": 5078 }, { "epoch": 0.4023767082590612, "grad_norm": 1.5906813703947276, "learning_rate": 1.3567264128336013e-05, "loss": 0.2608, "step": 5079 }, { "epoch": 0.40245593186769657, "grad_norm": 1.3139415967821995, "learning_rate": 1.3564866801237254e-05, "loss": 0.3073, "step": 5080 }, { "epoch": 0.40253515547633195, "grad_norm": 1.2613576760617309, "learning_rate": 1.3562469239409166e-05, "loss": 0.1679, "step": 5081 }, { "epoch": 0.40261437908496733, "grad_norm": 1.3019899148352339, "learning_rate": 1.3560071443009622e-05, "loss": 0.2993, "step": 5082 }, { "epoch": 0.4026936026936027, "grad_norm": 1.3826184365597471, "learning_rate": 1.3557673412196504e-05, "loss": 0.3148, "step": 5083 }, { "epoch": 0.40277282630223804, "grad_norm": 1.356399970669234, "learning_rate": 1.3555275147127709e-05, "loss": 0.2798, "step": 5084 }, { "epoch": 0.4028520499108734, "grad_norm": 1.4328015520163058, "learning_rate": 1.3552876647961151e-05, "loss": 0.2851, "step": 5085 }, { "epoch": 0.4029312735195088, "grad_norm": 1.7211357868465713, "learning_rate": 1.3550477914854766e-05, "loss": 0.3722, "step": 5086 }, { "epoch": 0.4030104971281442, "grad_norm": 1.3134018607662379, "learning_rate": 1.3548078947966487e-05, "loss": 0.2279, "step": 5087 }, { "epoch": 0.40308972073677957, "grad_norm": 1.215497608930495, "learning_rate": 1.3545679747454286e-05, "loss": 0.2297, "step": 5088 }, { "epoch": 0.40316894434541495, "grad_norm": 1.4186684839296035, "learning_rate": 1.3543280313476135e-05, "loss": 0.284, "step": 5089 }, { "epoch": 0.40324816795405033, "grad_norm": 1.4254813838474238, "learning_rate": 1.3540880646190022e-05, "loss": 0.3482, "step": 5090 }, { "epoch": 0.40332739156268566, "grad_norm": 1.4018365750149362, "learning_rate": 1.353848074575396e-05, "loss": 0.378, "step": 5091 }, { "epoch": 0.40340661517132104, "grad_norm": 1.2700896602992604, "learning_rate": 1.3536080612325963e-05, "loss": 0.2575, "step": 5092 }, { "epoch": 0.4034858387799564, "grad_norm": 1.7202241636971003, "learning_rate": 1.3533680246064073e-05, "loss": 0.3945, "step": 5093 }, { "epoch": 0.4035650623885918, "grad_norm": 1.4888000830307015, "learning_rate": 1.3531279647126342e-05, "loss": 0.3157, "step": 5094 }, { "epoch": 0.4036442859972272, "grad_norm": 1.3437683614397191, "learning_rate": 1.352887881567084e-05, "loss": 0.2713, "step": 5095 }, { "epoch": 0.40372350960586256, "grad_norm": 1.5688630700524515, "learning_rate": 1.3526477751855645e-05, "loss": 0.3719, "step": 5096 }, { "epoch": 0.40380273321449794, "grad_norm": 1.142016810373895, "learning_rate": 1.3524076455838859e-05, "loss": 0.2025, "step": 5097 }, { "epoch": 0.40388195682313327, "grad_norm": 1.8625953729769273, "learning_rate": 1.3521674927778594e-05, "loss": 0.2729, "step": 5098 }, { "epoch": 0.40396118043176865, "grad_norm": 1.7706211373366825, "learning_rate": 1.3519273167832982e-05, "loss": 0.4145, "step": 5099 }, { "epoch": 0.40404040404040403, "grad_norm": 1.5767148359739922, "learning_rate": 1.3516871176160166e-05, "loss": 0.3634, "step": 5100 }, { "epoch": 0.4041196276490394, "grad_norm": 1.4709106099560698, "learning_rate": 1.3514468952918303e-05, "loss": 0.3076, "step": 5101 }, { "epoch": 0.4041988512576748, "grad_norm": 1.5046873589838186, "learning_rate": 1.3512066498265572e-05, "loss": 0.3747, "step": 5102 }, { "epoch": 0.4042780748663102, "grad_norm": 1.5105863620024775, "learning_rate": 1.3509663812360161e-05, "loss": 0.2927, "step": 5103 }, { "epoch": 0.40435729847494556, "grad_norm": 1.5985591007068733, "learning_rate": 1.3507260895360274e-05, "loss": 0.3275, "step": 5104 }, { "epoch": 0.4044365220835809, "grad_norm": 1.4536441352260983, "learning_rate": 1.3504857747424133e-05, "loss": 0.2837, "step": 5105 }, { "epoch": 0.40451574569221627, "grad_norm": 1.3922656809888372, "learning_rate": 1.3502454368709973e-05, "loss": 0.314, "step": 5106 }, { "epoch": 0.40459496930085165, "grad_norm": 1.5213369663753875, "learning_rate": 1.3500050759376052e-05, "loss": 0.3539, "step": 5107 }, { "epoch": 0.40467419290948703, "grad_norm": 1.3112523212882794, "learning_rate": 1.3497646919580623e-05, "loss": 0.3005, "step": 5108 }, { "epoch": 0.4047534165181224, "grad_norm": 1.5476483359070967, "learning_rate": 1.3495242849481973e-05, "loss": 0.313, "step": 5109 }, { "epoch": 0.4048326401267578, "grad_norm": 1.862245994646032, "learning_rate": 1.3492838549238406e-05, "loss": 0.3588, "step": 5110 }, { "epoch": 0.4049118637353932, "grad_norm": 1.3476418031706454, "learning_rate": 1.349043401900822e-05, "loss": 0.3708, "step": 5111 }, { "epoch": 0.4049910873440285, "grad_norm": 1.5224215662980156, "learning_rate": 1.348802925894975e-05, "loss": 0.3304, "step": 5112 }, { "epoch": 0.4050703109526639, "grad_norm": 1.4243258848457265, "learning_rate": 1.348562426922134e-05, "loss": 0.3618, "step": 5113 }, { "epoch": 0.40514953456129926, "grad_norm": 1.4068209927098394, "learning_rate": 1.3483219049981343e-05, "loss": 0.2777, "step": 5114 }, { "epoch": 0.40522875816993464, "grad_norm": 1.6424624219085664, "learning_rate": 1.348081360138813e-05, "loss": 0.3122, "step": 5115 }, { "epoch": 0.40530798177857, "grad_norm": 1.3833716206915943, "learning_rate": 1.347840792360009e-05, "loss": 0.2839, "step": 5116 }, { "epoch": 0.4053872053872054, "grad_norm": 1.516729453968915, "learning_rate": 1.3476002016775626e-05, "loss": 0.3656, "step": 5117 }, { "epoch": 0.40546642899584073, "grad_norm": 1.289522675820099, "learning_rate": 1.3473595881073154e-05, "loss": 0.2556, "step": 5118 }, { "epoch": 0.4055456526044761, "grad_norm": 1.4400693404044367, "learning_rate": 1.3471189516651108e-05, "loss": 0.3242, "step": 5119 }, { "epoch": 0.4056248762131115, "grad_norm": 1.5127858934548162, "learning_rate": 1.3468782923667936e-05, "loss": 0.3343, "step": 5120 }, { "epoch": 0.4057040998217469, "grad_norm": 1.5143332512948404, "learning_rate": 1.3466376102282098e-05, "loss": 0.34, "step": 5121 }, { "epoch": 0.40578332343038226, "grad_norm": 1.5384258865514473, "learning_rate": 1.3463969052652073e-05, "loss": 0.2999, "step": 5122 }, { "epoch": 0.40586254703901764, "grad_norm": 1.6343927203275055, "learning_rate": 1.3461561774936352e-05, "loss": 0.2824, "step": 5123 }, { "epoch": 0.405941770647653, "grad_norm": 1.546340158040832, "learning_rate": 1.3459154269293443e-05, "loss": 0.3168, "step": 5124 }, { "epoch": 0.40602099425628835, "grad_norm": 1.7077793404017203, "learning_rate": 1.3456746535881872e-05, "loss": 0.4051, "step": 5125 }, { "epoch": 0.40610021786492373, "grad_norm": 2.377896012436734, "learning_rate": 1.3454338574860175e-05, "loss": 0.3813, "step": 5126 }, { "epoch": 0.4061794414735591, "grad_norm": 1.432030837873591, "learning_rate": 1.3451930386386902e-05, "loss": 0.2947, "step": 5127 }, { "epoch": 0.4062586650821945, "grad_norm": 1.7630241646227092, "learning_rate": 1.3449521970620624e-05, "loss": 0.327, "step": 5128 }, { "epoch": 0.4063378886908299, "grad_norm": 1.495139451041156, "learning_rate": 1.3447113327719923e-05, "loss": 0.2847, "step": 5129 }, { "epoch": 0.40641711229946526, "grad_norm": 1.5502926540053648, "learning_rate": 1.3444704457843393e-05, "loss": 0.3223, "step": 5130 }, { "epoch": 0.40649633590810064, "grad_norm": 1.4049206586767469, "learning_rate": 1.3442295361149651e-05, "loss": 0.3057, "step": 5131 }, { "epoch": 0.40657555951673596, "grad_norm": 1.2889872241005051, "learning_rate": 1.3439886037797326e-05, "loss": 0.2343, "step": 5132 }, { "epoch": 0.40665478312537134, "grad_norm": 1.6026088694358938, "learning_rate": 1.3437476487945051e-05, "loss": 0.3364, "step": 5133 }, { "epoch": 0.4067340067340067, "grad_norm": 1.5080351343819522, "learning_rate": 1.3435066711751494e-05, "loss": 0.2883, "step": 5134 }, { "epoch": 0.4068132303426421, "grad_norm": 1.618867149979652, "learning_rate": 1.343265670937532e-05, "loss": 0.2965, "step": 5135 }, { "epoch": 0.4068924539512775, "grad_norm": 1.442556926185253, "learning_rate": 1.3430246480975218e-05, "loss": 0.3287, "step": 5136 }, { "epoch": 0.40697167755991287, "grad_norm": 1.8739691331582184, "learning_rate": 1.3427836026709892e-05, "loss": 0.4267, "step": 5137 }, { "epoch": 0.40705090116854825, "grad_norm": 1.345967067735683, "learning_rate": 1.3425425346738057e-05, "loss": 0.2856, "step": 5138 }, { "epoch": 0.4071301247771836, "grad_norm": 1.6215449566175117, "learning_rate": 1.3423014441218444e-05, "loss": 0.3122, "step": 5139 }, { "epoch": 0.40720934838581896, "grad_norm": 1.4485375277499877, "learning_rate": 1.3420603310309805e-05, "loss": 0.3134, "step": 5140 }, { "epoch": 0.40728857199445434, "grad_norm": 1.0513686120861234, "learning_rate": 1.3418191954170892e-05, "loss": 0.2185, "step": 5141 }, { "epoch": 0.4073677956030897, "grad_norm": 1.2751831024389073, "learning_rate": 1.341578037296049e-05, "loss": 0.2254, "step": 5142 }, { "epoch": 0.4074470192117251, "grad_norm": 1.6091232455224238, "learning_rate": 1.3413368566837384e-05, "loss": 0.3887, "step": 5143 }, { "epoch": 0.4075262428203605, "grad_norm": 1.3567058867828161, "learning_rate": 1.341095653596038e-05, "loss": 0.2665, "step": 5144 }, { "epoch": 0.40760546642899587, "grad_norm": 1.602471551287939, "learning_rate": 1.3408544280488305e-05, "loss": 0.3403, "step": 5145 }, { "epoch": 0.4076846900376312, "grad_norm": 1.6804636754184283, "learning_rate": 1.3406131800579985e-05, "loss": 0.2691, "step": 5146 }, { "epoch": 0.4077639136462666, "grad_norm": 1.5258151815672445, "learning_rate": 1.3403719096394276e-05, "loss": 0.3424, "step": 5147 }, { "epoch": 0.40784313725490196, "grad_norm": 1.4092434435781458, "learning_rate": 1.3401306168090047e-05, "loss": 0.2989, "step": 5148 }, { "epoch": 0.40792236086353734, "grad_norm": 2.0043761547433965, "learning_rate": 1.3398893015826166e-05, "loss": 0.425, "step": 5149 }, { "epoch": 0.4080015844721727, "grad_norm": 1.8712062050794998, "learning_rate": 1.3396479639761541e-05, "loss": 0.3765, "step": 5150 }, { "epoch": 0.4080808080808081, "grad_norm": 1.5376564389726946, "learning_rate": 1.3394066040055071e-05, "loss": 0.4119, "step": 5151 }, { "epoch": 0.4081600316894435, "grad_norm": 1.6465483678555928, "learning_rate": 1.3391652216865682e-05, "loss": 0.4312, "step": 5152 }, { "epoch": 0.4082392552980788, "grad_norm": 1.386572013284321, "learning_rate": 1.3389238170352318e-05, "loss": 0.3636, "step": 5153 }, { "epoch": 0.4083184789067142, "grad_norm": 1.3145816603053615, "learning_rate": 1.3386823900673926e-05, "loss": 0.2432, "step": 5154 }, { "epoch": 0.40839770251534957, "grad_norm": 1.5352418769531913, "learning_rate": 1.3384409407989475e-05, "loss": 0.3208, "step": 5155 }, { "epoch": 0.40847692612398495, "grad_norm": 1.4478943760331868, "learning_rate": 1.3381994692457956e-05, "loss": 0.2085, "step": 5156 }, { "epoch": 0.40855614973262033, "grad_norm": 1.3582057956841456, "learning_rate": 1.3379579754238354e-05, "loss": 0.2672, "step": 5157 }, { "epoch": 0.4086353733412557, "grad_norm": 1.595702451101812, "learning_rate": 1.3377164593489687e-05, "loss": 0.325, "step": 5158 }, { "epoch": 0.40871459694989104, "grad_norm": 1.4022522124207797, "learning_rate": 1.3374749210370983e-05, "loss": 0.2432, "step": 5159 }, { "epoch": 0.4087938205585264, "grad_norm": 1.3480377781733934, "learning_rate": 1.3372333605041282e-05, "loss": 0.2984, "step": 5160 }, { "epoch": 0.4088730441671618, "grad_norm": 1.6983224458064181, "learning_rate": 1.3369917777659638e-05, "loss": 0.3731, "step": 5161 }, { "epoch": 0.4089522677757972, "grad_norm": 1.12434265947815, "learning_rate": 1.3367501728385124e-05, "loss": 0.2076, "step": 5162 }, { "epoch": 0.40903149138443257, "grad_norm": 1.2190963677722704, "learning_rate": 1.3365085457376823e-05, "loss": 0.2465, "step": 5163 }, { "epoch": 0.40911071499306795, "grad_norm": 1.459942902902082, "learning_rate": 1.336266896479384e-05, "loss": 0.3297, "step": 5164 }, { "epoch": 0.40918993860170333, "grad_norm": 1.6100273207675837, "learning_rate": 1.3360252250795282e-05, "loss": 0.3469, "step": 5165 }, { "epoch": 0.40926916221033866, "grad_norm": 1.4067367717715846, "learning_rate": 1.3357835315540281e-05, "loss": 0.3151, "step": 5166 }, { "epoch": 0.40934838581897404, "grad_norm": 1.419665094192966, "learning_rate": 1.3355418159187988e-05, "loss": 0.2393, "step": 5167 }, { "epoch": 0.4094276094276094, "grad_norm": 1.7599246830886042, "learning_rate": 1.335300078189755e-05, "loss": 0.3986, "step": 5168 }, { "epoch": 0.4095068330362448, "grad_norm": 1.2602699938197612, "learning_rate": 1.3350583183828143e-05, "loss": 0.2041, "step": 5169 }, { "epoch": 0.4095860566448802, "grad_norm": 1.3599322010416612, "learning_rate": 1.3348165365138956e-05, "loss": 0.2781, "step": 5170 }, { "epoch": 0.40966528025351556, "grad_norm": 1.3036849355719582, "learning_rate": 1.3345747325989188e-05, "loss": 0.2763, "step": 5171 }, { "epoch": 0.40974450386215094, "grad_norm": 1.5881215934325086, "learning_rate": 1.3343329066538064e-05, "loss": 0.3535, "step": 5172 }, { "epoch": 0.40982372747078627, "grad_norm": 1.6258469513641862, "learning_rate": 1.3340910586944805e-05, "loss": 0.2677, "step": 5173 }, { "epoch": 0.40990295107942165, "grad_norm": 1.4849707647922123, "learning_rate": 1.3338491887368656e-05, "loss": 0.2681, "step": 5174 }, { "epoch": 0.40998217468805703, "grad_norm": 1.8341736007917508, "learning_rate": 1.3336072967968882e-05, "loss": 0.3178, "step": 5175 }, { "epoch": 0.4100613982966924, "grad_norm": 1.3491635824228365, "learning_rate": 1.3333653828904755e-05, "loss": 0.2803, "step": 5176 }, { "epoch": 0.4101406219053278, "grad_norm": 1.474192346727405, "learning_rate": 1.3331234470335566e-05, "loss": 0.382, "step": 5177 }, { "epoch": 0.4102198455139632, "grad_norm": 1.191256819233283, "learning_rate": 1.3328814892420613e-05, "loss": 0.2027, "step": 5178 }, { "epoch": 0.41029906912259856, "grad_norm": 1.6130990837673658, "learning_rate": 1.3326395095319218e-05, "loss": 0.3884, "step": 5179 }, { "epoch": 0.4103782927312339, "grad_norm": 1.4273983030998831, "learning_rate": 1.3323975079190713e-05, "loss": 0.3146, "step": 5180 }, { "epoch": 0.41045751633986927, "grad_norm": 1.7431163848350197, "learning_rate": 1.332155484419444e-05, "loss": 0.3889, "step": 5181 }, { "epoch": 0.41053673994850465, "grad_norm": 1.4615864282767486, "learning_rate": 1.3319134390489765e-05, "loss": 0.339, "step": 5182 }, { "epoch": 0.41061596355714003, "grad_norm": 1.6573437351140943, "learning_rate": 1.3316713718236061e-05, "loss": 0.4215, "step": 5183 }, { "epoch": 0.4106951871657754, "grad_norm": 1.6556506961283273, "learning_rate": 1.3314292827592716e-05, "loss": 0.3992, "step": 5184 }, { "epoch": 0.4107744107744108, "grad_norm": 1.4842305462675711, "learning_rate": 1.3311871718719137e-05, "loss": 0.2754, "step": 5185 }, { "epoch": 0.4108536343830462, "grad_norm": 1.336339609771554, "learning_rate": 1.330945039177474e-05, "loss": 0.2304, "step": 5186 }, { "epoch": 0.4109328579916815, "grad_norm": 1.2231715882263132, "learning_rate": 1.3307028846918958e-05, "loss": 0.2473, "step": 5187 }, { "epoch": 0.4110120816003169, "grad_norm": 1.5235206339847889, "learning_rate": 1.3304607084311246e-05, "loss": 0.3613, "step": 5188 }, { "epoch": 0.41109130520895226, "grad_norm": 1.609900385807517, "learning_rate": 1.3302185104111049e-05, "loss": 0.4396, "step": 5189 }, { "epoch": 0.41117052881758764, "grad_norm": 2.1327215250766947, "learning_rate": 1.3299762906477855e-05, "loss": 0.4076, "step": 5190 }, { "epoch": 0.411249752426223, "grad_norm": 1.2616874660468511, "learning_rate": 1.3297340491571153e-05, "loss": 0.3311, "step": 5191 }, { "epoch": 0.4113289760348584, "grad_norm": 1.6064882997861947, "learning_rate": 1.3294917859550444e-05, "loss": 0.316, "step": 5192 }, { "epoch": 0.4114081996434938, "grad_norm": 1.465321134095432, "learning_rate": 1.3292495010575249e-05, "loss": 0.2429, "step": 5193 }, { "epoch": 0.4114874232521291, "grad_norm": 1.2818824094209373, "learning_rate": 1.3290071944805099e-05, "loss": 0.231, "step": 5194 }, { "epoch": 0.4115666468607645, "grad_norm": 1.6035677355084266, "learning_rate": 1.3287648662399544e-05, "loss": 0.3876, "step": 5195 }, { "epoch": 0.4116458704693999, "grad_norm": 1.406564577989663, "learning_rate": 1.3285225163518141e-05, "loss": 0.3723, "step": 5196 }, { "epoch": 0.41172509407803526, "grad_norm": 1.6169480290303706, "learning_rate": 1.328280144832047e-05, "loss": 0.3449, "step": 5197 }, { "epoch": 0.41180431768667064, "grad_norm": 1.7619077768442422, "learning_rate": 1.3280377516966118e-05, "loss": 0.2563, "step": 5198 }, { "epoch": 0.411883541295306, "grad_norm": 1.5295708545348026, "learning_rate": 1.3277953369614696e-05, "loss": 0.1938, "step": 5199 }, { "epoch": 0.41196276490394135, "grad_norm": 1.3755280072067326, "learning_rate": 1.3275529006425808e-05, "loss": 0.2395, "step": 5200 }, { "epoch": 0.41204198851257673, "grad_norm": 1.2322555994151438, "learning_rate": 1.3273104427559102e-05, "loss": 0.178, "step": 5201 }, { "epoch": 0.4121212121212121, "grad_norm": 1.5426966514076645, "learning_rate": 1.3270679633174219e-05, "loss": 0.3087, "step": 5202 }, { "epoch": 0.4122004357298475, "grad_norm": 1.2069032494845653, "learning_rate": 1.3268254623430817e-05, "loss": 0.1516, "step": 5203 }, { "epoch": 0.4122796593384829, "grad_norm": 1.4279471998876085, "learning_rate": 1.3265829398488576e-05, "loss": 0.2815, "step": 5204 }, { "epoch": 0.41235888294711825, "grad_norm": 1.3322301509141183, "learning_rate": 1.3263403958507181e-05, "loss": 0.2021, "step": 5205 }, { "epoch": 0.41243810655575364, "grad_norm": 1.1180406077512783, "learning_rate": 1.326097830364634e-05, "loss": 0.2739, "step": 5206 }, { "epoch": 0.41251733016438896, "grad_norm": 1.804246926901076, "learning_rate": 1.3258552434065768e-05, "loss": 0.3799, "step": 5207 }, { "epoch": 0.41259655377302434, "grad_norm": 1.558428723551447, "learning_rate": 1.3256126349925195e-05, "loss": 0.3173, "step": 5208 }, { "epoch": 0.4126757773816597, "grad_norm": 1.4241058954975696, "learning_rate": 1.3253700051384371e-05, "loss": 0.3708, "step": 5209 }, { "epoch": 0.4127550009902951, "grad_norm": 1.5097313655449034, "learning_rate": 1.3251273538603056e-05, "loss": 0.2931, "step": 5210 }, { "epoch": 0.4128342245989305, "grad_norm": 1.64220889074521, "learning_rate": 1.3248846811741021e-05, "loss": 0.3565, "step": 5211 }, { "epoch": 0.41291344820756587, "grad_norm": 1.5268543740728318, "learning_rate": 1.3246419870958056e-05, "loss": 0.2583, "step": 5212 }, { "epoch": 0.41299267181620125, "grad_norm": 1.7112226464572504, "learning_rate": 1.3243992716413962e-05, "loss": 0.3525, "step": 5213 }, { "epoch": 0.4130718954248366, "grad_norm": 1.6035105814364647, "learning_rate": 1.324156534826856e-05, "loss": 0.3236, "step": 5214 }, { "epoch": 0.41315111903347196, "grad_norm": 1.6213796882474591, "learning_rate": 1.3239137766681675e-05, "loss": 0.2291, "step": 5215 }, { "epoch": 0.41323034264210734, "grad_norm": 1.69758930605804, "learning_rate": 1.3236709971813153e-05, "loss": 0.3082, "step": 5216 }, { "epoch": 0.4133095662507427, "grad_norm": 1.2929577016598028, "learning_rate": 1.3234281963822856e-05, "loss": 0.289, "step": 5217 }, { "epoch": 0.4133887898593781, "grad_norm": 1.5934629181630984, "learning_rate": 1.3231853742870652e-05, "loss": 0.3386, "step": 5218 }, { "epoch": 0.4134680134680135, "grad_norm": 1.4110547869130234, "learning_rate": 1.322942530911643e-05, "loss": 0.3154, "step": 5219 }, { "epoch": 0.41354723707664887, "grad_norm": 1.137906778375266, "learning_rate": 1.3226996662720094e-05, "loss": 0.2315, "step": 5220 }, { "epoch": 0.4136264606852842, "grad_norm": 1.594671550607869, "learning_rate": 1.322456780384155e-05, "loss": 0.3952, "step": 5221 }, { "epoch": 0.4137056842939196, "grad_norm": 1.412784278604694, "learning_rate": 1.3222138732640732e-05, "loss": 0.2651, "step": 5222 }, { "epoch": 0.41378490790255495, "grad_norm": 1.4133644066446025, "learning_rate": 1.3219709449277584e-05, "loss": 0.2917, "step": 5223 }, { "epoch": 0.41386413151119034, "grad_norm": 1.6966961337699802, "learning_rate": 1.3217279953912061e-05, "loss": 0.3627, "step": 5224 }, { "epoch": 0.4139433551198257, "grad_norm": 1.4802565286774627, "learning_rate": 1.3214850246704134e-05, "loss": 0.3051, "step": 5225 }, { "epoch": 0.4140225787284611, "grad_norm": 1.5168767869460236, "learning_rate": 1.3212420327813789e-05, "loss": 0.2811, "step": 5226 }, { "epoch": 0.4141018023370965, "grad_norm": 1.759120769981136, "learning_rate": 1.3209990197401016e-05, "loss": 0.3254, "step": 5227 }, { "epoch": 0.4141810259457318, "grad_norm": 1.379197883649193, "learning_rate": 1.3207559855625842e-05, "loss": 0.3107, "step": 5228 }, { "epoch": 0.4142602495543672, "grad_norm": 1.539297375233917, "learning_rate": 1.3205129302648282e-05, "loss": 0.3664, "step": 5229 }, { "epoch": 0.41433947316300257, "grad_norm": 1.6106230234218317, "learning_rate": 1.3202698538628376e-05, "loss": 0.3357, "step": 5230 }, { "epoch": 0.41441869677163795, "grad_norm": 1.8049845672565505, "learning_rate": 1.3200267563726187e-05, "loss": 0.3857, "step": 5231 }, { "epoch": 0.41449792038027333, "grad_norm": 2.007341944420089, "learning_rate": 1.3197836378101773e-05, "loss": 0.4556, "step": 5232 }, { "epoch": 0.4145771439889087, "grad_norm": 1.8360264118700633, "learning_rate": 1.3195404981915223e-05, "loss": 0.3389, "step": 5233 }, { "epoch": 0.4146563675975441, "grad_norm": 1.523470834230146, "learning_rate": 1.3192973375326635e-05, "loss": 0.374, "step": 5234 }, { "epoch": 0.4147355912061794, "grad_norm": 1.5842085704016085, "learning_rate": 1.3190541558496106e-05, "loss": 0.3783, "step": 5235 }, { "epoch": 0.4148148148148148, "grad_norm": 1.4553986164021036, "learning_rate": 1.318810953158377e-05, "loss": 0.3548, "step": 5236 }, { "epoch": 0.4148940384234502, "grad_norm": 1.4820043409827646, "learning_rate": 1.3185677294749763e-05, "loss": 0.348, "step": 5237 }, { "epoch": 0.41497326203208557, "grad_norm": 1.372925911489726, "learning_rate": 1.3183244848154232e-05, "loss": 0.3695, "step": 5238 }, { "epoch": 0.41505248564072095, "grad_norm": 1.5564712000250092, "learning_rate": 1.3180812191957346e-05, "loss": 0.403, "step": 5239 }, { "epoch": 0.41513170924935633, "grad_norm": 1.2778426771006124, "learning_rate": 1.3178379326319284e-05, "loss": 0.3558, "step": 5240 }, { "epoch": 0.41521093285799165, "grad_norm": 1.5609390757786532, "learning_rate": 1.3175946251400234e-05, "loss": 0.3478, "step": 5241 }, { "epoch": 0.41529015646662704, "grad_norm": 1.141077493648826, "learning_rate": 1.3173512967360406e-05, "loss": 0.2096, "step": 5242 }, { "epoch": 0.4153693800752624, "grad_norm": 1.453801804037298, "learning_rate": 1.317107947436002e-05, "loss": 0.4164, "step": 5243 }, { "epoch": 0.4154486036838978, "grad_norm": 1.4209188508433634, "learning_rate": 1.3168645772559308e-05, "loss": 0.3488, "step": 5244 }, { "epoch": 0.4155278272925332, "grad_norm": 1.4395965238873254, "learning_rate": 1.3166211862118519e-05, "loss": 0.2656, "step": 5245 }, { "epoch": 0.41560705090116856, "grad_norm": 1.4795990756278954, "learning_rate": 1.3163777743197912e-05, "loss": 0.265, "step": 5246 }, { "epoch": 0.41568627450980394, "grad_norm": 1.344745241180794, "learning_rate": 1.3161343415957767e-05, "loss": 0.296, "step": 5247 }, { "epoch": 0.41576549811843927, "grad_norm": 1.1695065089717933, "learning_rate": 1.3158908880558366e-05, "loss": 0.1817, "step": 5248 }, { "epoch": 0.41584472172707465, "grad_norm": 1.3687761894189339, "learning_rate": 1.3156474137160015e-05, "loss": 0.3404, "step": 5249 }, { "epoch": 0.41592394533571003, "grad_norm": 1.9831913212191044, "learning_rate": 1.3154039185923034e-05, "loss": 0.4215, "step": 5250 }, { "epoch": 0.4160031689443454, "grad_norm": 1.6383419899854181, "learning_rate": 1.3151604027007744e-05, "loss": 0.4112, "step": 5251 }, { "epoch": 0.4160823925529808, "grad_norm": 1.1759785023432148, "learning_rate": 1.3149168660574495e-05, "loss": 0.2422, "step": 5252 }, { "epoch": 0.4161616161616162, "grad_norm": 1.7063921866718448, "learning_rate": 1.3146733086783646e-05, "loss": 0.3722, "step": 5253 }, { "epoch": 0.41624083977025156, "grad_norm": 1.5826022937986772, "learning_rate": 1.3144297305795559e-05, "loss": 0.2803, "step": 5254 }, { "epoch": 0.4163200633788869, "grad_norm": 1.6130969620528077, "learning_rate": 1.3141861317770628e-05, "loss": 0.3097, "step": 5255 }, { "epoch": 0.41639928698752227, "grad_norm": 1.4575279513781108, "learning_rate": 1.3139425122869244e-05, "loss": 0.3608, "step": 5256 }, { "epoch": 0.41647851059615765, "grad_norm": 1.2969171877071193, "learning_rate": 1.3136988721251823e-05, "loss": 0.3114, "step": 5257 }, { "epoch": 0.41655773420479303, "grad_norm": 1.7477472012265047, "learning_rate": 1.3134552113078788e-05, "loss": 0.3671, "step": 5258 }, { "epoch": 0.4166369578134284, "grad_norm": 1.5858059611899067, "learning_rate": 1.3132115298510579e-05, "loss": 0.347, "step": 5259 }, { "epoch": 0.4167161814220638, "grad_norm": 1.466729421542365, "learning_rate": 1.312967827770765e-05, "loss": 0.3881, "step": 5260 }, { "epoch": 0.4167954050306992, "grad_norm": 1.7151680532077944, "learning_rate": 1.3127241050830463e-05, "loss": 0.4049, "step": 5261 }, { "epoch": 0.4168746286393345, "grad_norm": 1.5098726250968266, "learning_rate": 1.3124803618039501e-05, "loss": 0.2808, "step": 5262 }, { "epoch": 0.4169538522479699, "grad_norm": 1.3477863666910488, "learning_rate": 1.3122365979495259e-05, "loss": 0.2731, "step": 5263 }, { "epoch": 0.41703307585660526, "grad_norm": 1.2699422428056057, "learning_rate": 1.3119928135358238e-05, "loss": 0.2585, "step": 5264 }, { "epoch": 0.41711229946524064, "grad_norm": 1.4970157185764927, "learning_rate": 1.3117490085788963e-05, "loss": 0.3108, "step": 5265 }, { "epoch": 0.417191523073876, "grad_norm": 1.6236409562923515, "learning_rate": 1.3115051830947966e-05, "loss": 0.4194, "step": 5266 }, { "epoch": 0.4172707466825114, "grad_norm": 1.283106924272449, "learning_rate": 1.3112613370995792e-05, "loss": 0.3093, "step": 5267 }, { "epoch": 0.4173499702911468, "grad_norm": 1.4710991651014518, "learning_rate": 1.3110174706093007e-05, "loss": 0.3513, "step": 5268 }, { "epoch": 0.4174291938997821, "grad_norm": 1.5367612462046478, "learning_rate": 1.3107735836400184e-05, "loss": 0.3399, "step": 5269 }, { "epoch": 0.4175084175084175, "grad_norm": 1.5998705516764091, "learning_rate": 1.3105296762077906e-05, "loss": 0.3249, "step": 5270 }, { "epoch": 0.4175876411170529, "grad_norm": 1.1361656401124913, "learning_rate": 1.3102857483286781e-05, "loss": 0.216, "step": 5271 }, { "epoch": 0.41766686472568826, "grad_norm": 1.7112437910284877, "learning_rate": 1.310041800018742e-05, "loss": 0.3659, "step": 5272 }, { "epoch": 0.41774608833432364, "grad_norm": 1.4668141398726744, "learning_rate": 1.3097978312940453e-05, "loss": 0.3346, "step": 5273 }, { "epoch": 0.417825311942959, "grad_norm": 1.7919395749676035, "learning_rate": 1.309553842170652e-05, "loss": 0.2925, "step": 5274 }, { "epoch": 0.4179045355515944, "grad_norm": 1.3011750576725445, "learning_rate": 1.3093098326646277e-05, "loss": 0.25, "step": 5275 }, { "epoch": 0.41798375916022973, "grad_norm": 1.2571443241609823, "learning_rate": 1.3090658027920391e-05, "loss": 0.2126, "step": 5276 }, { "epoch": 0.4180629827688651, "grad_norm": 1.6601330619247827, "learning_rate": 1.3088217525689546e-05, "loss": 0.4192, "step": 5277 }, { "epoch": 0.4181422063775005, "grad_norm": 1.3512499179836552, "learning_rate": 1.3085776820114435e-05, "loss": 0.2721, "step": 5278 }, { "epoch": 0.4182214299861359, "grad_norm": 1.6193294689001956, "learning_rate": 1.3083335911355768e-05, "loss": 0.3656, "step": 5279 }, { "epoch": 0.41830065359477125, "grad_norm": 1.4947321244634302, "learning_rate": 1.3080894799574271e-05, "loss": 0.3214, "step": 5280 }, { "epoch": 0.41837987720340664, "grad_norm": 1.6548284031422333, "learning_rate": 1.3078453484930674e-05, "loss": 0.332, "step": 5281 }, { "epoch": 0.41845910081204196, "grad_norm": 1.2878359525580316, "learning_rate": 1.3076011967585727e-05, "loss": 0.2698, "step": 5282 }, { "epoch": 0.41853832442067734, "grad_norm": 1.5570065632798822, "learning_rate": 1.3073570247700192e-05, "loss": 0.3878, "step": 5283 }, { "epoch": 0.4186175480293127, "grad_norm": 1.6400304463041429, "learning_rate": 1.3071128325434845e-05, "loss": 0.3951, "step": 5284 }, { "epoch": 0.4186967716379481, "grad_norm": 1.3379521115779618, "learning_rate": 1.3068686200950475e-05, "loss": 0.2309, "step": 5285 }, { "epoch": 0.4187759952465835, "grad_norm": 1.2955472859032604, "learning_rate": 1.3066243874407886e-05, "loss": 0.2528, "step": 5286 }, { "epoch": 0.41885521885521887, "grad_norm": 1.429669254020428, "learning_rate": 1.306380134596789e-05, "loss": 0.237, "step": 5287 }, { "epoch": 0.41893444246385425, "grad_norm": 1.4364730899683567, "learning_rate": 1.306135861579132e-05, "loss": 0.3382, "step": 5288 }, { "epoch": 0.4190136660724896, "grad_norm": 1.3358004572151094, "learning_rate": 1.3058915684039013e-05, "loss": 0.2657, "step": 5289 }, { "epoch": 0.41909288968112496, "grad_norm": 1.2371880112514608, "learning_rate": 1.3056472550871829e-05, "loss": 0.2399, "step": 5290 }, { "epoch": 0.41917211328976034, "grad_norm": 1.4502287582672058, "learning_rate": 1.3054029216450632e-05, "loss": 0.3935, "step": 5291 }, { "epoch": 0.4192513368983957, "grad_norm": 1.6608036733376232, "learning_rate": 1.3051585680936305e-05, "loss": 0.3826, "step": 5292 }, { "epoch": 0.4193305605070311, "grad_norm": 1.3671963038191222, "learning_rate": 1.304914194448975e-05, "loss": 0.2501, "step": 5293 }, { "epoch": 0.4194097841156665, "grad_norm": 1.7349716703237739, "learning_rate": 1.3046698007271864e-05, "loss": 0.3201, "step": 5294 }, { "epoch": 0.41948900772430187, "grad_norm": 1.501184102283557, "learning_rate": 1.3044253869443575e-05, "loss": 0.3276, "step": 5295 }, { "epoch": 0.4195682313329372, "grad_norm": 1.3467820580860035, "learning_rate": 1.3041809531165819e-05, "loss": 0.2406, "step": 5296 }, { "epoch": 0.4196474549415726, "grad_norm": 1.974548855605805, "learning_rate": 1.3039364992599538e-05, "loss": 0.4517, "step": 5297 }, { "epoch": 0.41972667855020795, "grad_norm": 1.3849372161870959, "learning_rate": 1.30369202539057e-05, "loss": 0.345, "step": 5298 }, { "epoch": 0.41980590215884334, "grad_norm": 1.305057581540092, "learning_rate": 1.3034475315245273e-05, "loss": 0.338, "step": 5299 }, { "epoch": 0.4198851257674787, "grad_norm": 1.654283897611442, "learning_rate": 1.303203017677925e-05, "loss": 0.2951, "step": 5300 }, { "epoch": 0.4199643493761141, "grad_norm": 1.524409840224814, "learning_rate": 1.302958483866863e-05, "loss": 0.396, "step": 5301 }, { "epoch": 0.4200435729847495, "grad_norm": 1.292551888311837, "learning_rate": 1.3027139301074423e-05, "loss": 0.3093, "step": 5302 }, { "epoch": 0.4201227965933848, "grad_norm": 1.1337498032506503, "learning_rate": 1.3024693564157658e-05, "loss": 0.2558, "step": 5303 }, { "epoch": 0.4202020202020202, "grad_norm": 1.5746279119087407, "learning_rate": 1.3022247628079381e-05, "loss": 0.4044, "step": 5304 }, { "epoch": 0.42028124381065557, "grad_norm": 1.6976577336693552, "learning_rate": 1.3019801493000634e-05, "loss": 0.2844, "step": 5305 }, { "epoch": 0.42036046741929095, "grad_norm": 1.2878362062039947, "learning_rate": 1.3017355159082495e-05, "loss": 0.2351, "step": 5306 }, { "epoch": 0.42043969102792633, "grad_norm": 1.2969092940145905, "learning_rate": 1.3014908626486032e-05, "loss": 0.295, "step": 5307 }, { "epoch": 0.4205189146365617, "grad_norm": 1.2338148705584753, "learning_rate": 1.3012461895372343e-05, "loss": 0.2373, "step": 5308 }, { "epoch": 0.4205981382451971, "grad_norm": 1.449628486215075, "learning_rate": 1.3010014965902535e-05, "loss": 0.2644, "step": 5309 }, { "epoch": 0.4206773618538324, "grad_norm": 1.5930273494525637, "learning_rate": 1.3007567838237725e-05, "loss": 0.2788, "step": 5310 }, { "epoch": 0.4207565854624678, "grad_norm": 1.753344167154168, "learning_rate": 1.3005120512539042e-05, "loss": 0.3698, "step": 5311 }, { "epoch": 0.4208358090711032, "grad_norm": 1.2824667671437335, "learning_rate": 1.300267298896764e-05, "loss": 0.2971, "step": 5312 }, { "epoch": 0.42091503267973857, "grad_norm": 1.6780009824844848, "learning_rate": 1.3000225267684663e-05, "loss": 0.3351, "step": 5313 }, { "epoch": 0.42099425628837395, "grad_norm": 1.7324769919343599, "learning_rate": 1.2997777348851288e-05, "loss": 0.3984, "step": 5314 }, { "epoch": 0.42107347989700933, "grad_norm": 1.7636825950419772, "learning_rate": 1.2995329232628702e-05, "loss": 0.3157, "step": 5315 }, { "epoch": 0.42115270350564465, "grad_norm": 1.4770237736369367, "learning_rate": 1.2992880919178097e-05, "loss": 0.3159, "step": 5316 }, { "epoch": 0.42123192711428004, "grad_norm": 1.3724093028210702, "learning_rate": 1.2990432408660682e-05, "loss": 0.2361, "step": 5317 }, { "epoch": 0.4213111507229154, "grad_norm": 1.424386207497933, "learning_rate": 1.2987983701237688e-05, "loss": 0.3355, "step": 5318 }, { "epoch": 0.4213903743315508, "grad_norm": 1.8857244313966501, "learning_rate": 1.298553479707034e-05, "loss": 0.4112, "step": 5319 }, { "epoch": 0.4214695979401862, "grad_norm": 1.505656407223853, "learning_rate": 1.2983085696319892e-05, "loss": 0.3332, "step": 5320 }, { "epoch": 0.42154882154882156, "grad_norm": 1.1562259124849663, "learning_rate": 1.2980636399147606e-05, "loss": 0.2403, "step": 5321 }, { "epoch": 0.42162804515745694, "grad_norm": 1.4254104253632018, "learning_rate": 1.2978186905714752e-05, "loss": 0.2889, "step": 5322 }, { "epoch": 0.42170726876609227, "grad_norm": 1.5178854444436547, "learning_rate": 1.2975737216182625e-05, "loss": 0.3153, "step": 5323 }, { "epoch": 0.42178649237472765, "grad_norm": 1.2456095005863437, "learning_rate": 1.2973287330712516e-05, "loss": 0.2159, "step": 5324 }, { "epoch": 0.42186571598336303, "grad_norm": 1.273418659889706, "learning_rate": 1.2970837249465746e-05, "loss": 0.3266, "step": 5325 }, { "epoch": 0.4219449395919984, "grad_norm": 1.6878548988442128, "learning_rate": 1.2968386972603635e-05, "loss": 0.3588, "step": 5326 }, { "epoch": 0.4220241632006338, "grad_norm": 1.3721699907131348, "learning_rate": 1.2965936500287526e-05, "loss": 0.2709, "step": 5327 }, { "epoch": 0.4221033868092692, "grad_norm": 1.2987767090340112, "learning_rate": 1.2963485832678772e-05, "loss": 0.2547, "step": 5328 }, { "epoch": 0.42218261041790456, "grad_norm": 1.2379056712049545, "learning_rate": 1.2961034969938732e-05, "loss": 0.2219, "step": 5329 }, { "epoch": 0.4222618340265399, "grad_norm": 1.6189478079826551, "learning_rate": 1.2958583912228785e-05, "loss": 0.381, "step": 5330 }, { "epoch": 0.42234105763517527, "grad_norm": 1.3733530317994096, "learning_rate": 1.295613265971033e-05, "loss": 0.2324, "step": 5331 }, { "epoch": 0.42242028124381065, "grad_norm": 1.5952434646516784, "learning_rate": 1.2953681212544757e-05, "loss": 0.3252, "step": 5332 }, { "epoch": 0.42249950485244603, "grad_norm": 1.6695805122142962, "learning_rate": 1.2951229570893493e-05, "loss": 0.292, "step": 5333 }, { "epoch": 0.4225787284610814, "grad_norm": 1.5465545260038185, "learning_rate": 1.2948777734917961e-05, "loss": 0.3228, "step": 5334 }, { "epoch": 0.4226579520697168, "grad_norm": 1.742619333597175, "learning_rate": 1.2946325704779602e-05, "loss": 0.375, "step": 5335 }, { "epoch": 0.4227371756783522, "grad_norm": 1.608051129156309, "learning_rate": 1.2943873480639875e-05, "loss": 0.3429, "step": 5336 }, { "epoch": 0.4228163992869875, "grad_norm": 1.4473720798249001, "learning_rate": 1.294142106266024e-05, "loss": 0.391, "step": 5337 }, { "epoch": 0.4228956228956229, "grad_norm": 1.817950454772865, "learning_rate": 1.2938968451002183e-05, "loss": 0.3997, "step": 5338 }, { "epoch": 0.42297484650425826, "grad_norm": 1.6406967514579258, "learning_rate": 1.2936515645827198e-05, "loss": 0.4287, "step": 5339 }, { "epoch": 0.42305407011289364, "grad_norm": 1.2732923627053705, "learning_rate": 1.2934062647296783e-05, "loss": 0.2551, "step": 5340 }, { "epoch": 0.423133293721529, "grad_norm": 1.457510554627103, "learning_rate": 1.2931609455572462e-05, "loss": 0.3093, "step": 5341 }, { "epoch": 0.4232125173301644, "grad_norm": 1.6648995220716367, "learning_rate": 1.2929156070815765e-05, "loss": 0.2919, "step": 5342 }, { "epoch": 0.4232917409387998, "grad_norm": 1.5657625603190437, "learning_rate": 1.2926702493188235e-05, "loss": 0.3578, "step": 5343 }, { "epoch": 0.4233709645474351, "grad_norm": 1.2598454697284138, "learning_rate": 1.292424872285143e-05, "loss": 0.26, "step": 5344 }, { "epoch": 0.4234501881560705, "grad_norm": 1.5104531454803312, "learning_rate": 1.2921794759966913e-05, "loss": 0.2762, "step": 5345 }, { "epoch": 0.4235294117647059, "grad_norm": 1.290372724031699, "learning_rate": 1.2919340604696272e-05, "loss": 0.2844, "step": 5346 }, { "epoch": 0.42360863537334126, "grad_norm": 1.3172941870763695, "learning_rate": 1.29168862572011e-05, "loss": 0.348, "step": 5347 }, { "epoch": 0.42368785898197664, "grad_norm": 1.206274560758607, "learning_rate": 1.2914431717643e-05, "loss": 0.3658, "step": 5348 }, { "epoch": 0.423767082590612, "grad_norm": 1.4670086427942317, "learning_rate": 1.2911976986183598e-05, "loss": 0.3358, "step": 5349 }, { "epoch": 0.4238463061992474, "grad_norm": 1.289335118878106, "learning_rate": 1.2909522062984524e-05, "loss": 0.287, "step": 5350 }, { "epoch": 0.42392552980788273, "grad_norm": 1.3051643071168066, "learning_rate": 1.290706694820742e-05, "loss": 0.2865, "step": 5351 }, { "epoch": 0.4240047534165181, "grad_norm": 1.2088286681138052, "learning_rate": 1.2904611642013945e-05, "loss": 0.2114, "step": 5352 }, { "epoch": 0.4240839770251535, "grad_norm": 1.4230314253859666, "learning_rate": 1.2902156144565769e-05, "loss": 0.3252, "step": 5353 }, { "epoch": 0.4241632006337889, "grad_norm": 1.1818993567321534, "learning_rate": 1.2899700456024576e-05, "loss": 0.2558, "step": 5354 }, { "epoch": 0.42424242424242425, "grad_norm": 1.6018433101059462, "learning_rate": 1.2897244576552062e-05, "loss": 0.3696, "step": 5355 }, { "epoch": 0.42432164785105964, "grad_norm": 1.3115637245824774, "learning_rate": 1.289478850630993e-05, "loss": 0.2671, "step": 5356 }, { "epoch": 0.42440087145969496, "grad_norm": 1.4131241566130903, "learning_rate": 1.2892332245459904e-05, "loss": 0.338, "step": 5357 }, { "epoch": 0.42448009506833034, "grad_norm": 1.3185846469905353, "learning_rate": 1.288987579416372e-05, "loss": 0.2558, "step": 5358 }, { "epoch": 0.4245593186769657, "grad_norm": 1.1992209755196688, "learning_rate": 1.2887419152583117e-05, "loss": 0.2746, "step": 5359 }, { "epoch": 0.4246385422856011, "grad_norm": 1.4760978780529286, "learning_rate": 1.2884962320879857e-05, "loss": 0.3418, "step": 5360 }, { "epoch": 0.4247177658942365, "grad_norm": 1.8056725131778988, "learning_rate": 1.2882505299215711e-05, "loss": 0.388, "step": 5361 }, { "epoch": 0.42479698950287187, "grad_norm": 1.321283591645511, "learning_rate": 1.288004808775246e-05, "loss": 0.2465, "step": 5362 }, { "epoch": 0.42487621311150725, "grad_norm": 1.2013034977072816, "learning_rate": 1.28775906866519e-05, "loss": 0.1646, "step": 5363 }, { "epoch": 0.4249554367201426, "grad_norm": 1.7386095757802484, "learning_rate": 1.2875133096075839e-05, "loss": 0.3628, "step": 5364 }, { "epoch": 0.42503466032877796, "grad_norm": 1.8435032525717856, "learning_rate": 1.2872675316186096e-05, "loss": 0.3344, "step": 5365 }, { "epoch": 0.42511388393741334, "grad_norm": 1.6258185510807717, "learning_rate": 1.2870217347144511e-05, "loss": 0.3605, "step": 5366 }, { "epoch": 0.4251931075460487, "grad_norm": 1.4388781415344534, "learning_rate": 1.2867759189112921e-05, "loss": 0.3127, "step": 5367 }, { "epoch": 0.4252723311546841, "grad_norm": 1.1994881040489052, "learning_rate": 1.2865300842253188e-05, "loss": 0.2033, "step": 5368 }, { "epoch": 0.4253515547633195, "grad_norm": 1.2516484779524133, "learning_rate": 1.2862842306727181e-05, "loss": 0.2895, "step": 5369 }, { "epoch": 0.42543077837195487, "grad_norm": 1.434242516030503, "learning_rate": 1.2860383582696783e-05, "loss": 0.3022, "step": 5370 }, { "epoch": 0.4255100019805902, "grad_norm": 1.3519392898244098, "learning_rate": 1.2857924670323892e-05, "loss": 0.3092, "step": 5371 }, { "epoch": 0.4255892255892256, "grad_norm": 1.3830544966052511, "learning_rate": 1.2855465569770407e-05, "loss": 0.2783, "step": 5372 }, { "epoch": 0.42566844919786095, "grad_norm": 1.3483132715233455, "learning_rate": 1.2853006281198257e-05, "loss": 0.2636, "step": 5373 }, { "epoch": 0.42574767280649634, "grad_norm": 1.869963143411993, "learning_rate": 1.2850546804769372e-05, "loss": 0.4697, "step": 5374 }, { "epoch": 0.4258268964151317, "grad_norm": 1.5025478573839384, "learning_rate": 1.2848087140645695e-05, "loss": 0.3994, "step": 5375 }, { "epoch": 0.4259061200237671, "grad_norm": 1.1380436569733503, "learning_rate": 1.2845627288989186e-05, "loss": 0.2087, "step": 5376 }, { "epoch": 0.4259853436324025, "grad_norm": 1.4749206660396121, "learning_rate": 1.284316724996181e-05, "loss": 0.2968, "step": 5377 }, { "epoch": 0.4260645672410378, "grad_norm": 1.4431868450966434, "learning_rate": 1.2840707023725552e-05, "loss": 0.2731, "step": 5378 }, { "epoch": 0.4261437908496732, "grad_norm": 1.2836643684982045, "learning_rate": 1.2838246610442406e-05, "loss": 0.2603, "step": 5379 }, { "epoch": 0.42622301445830857, "grad_norm": 1.532947971592485, "learning_rate": 1.2835786010274376e-05, "loss": 0.2896, "step": 5380 }, { "epoch": 0.42630223806694395, "grad_norm": 1.2125962378133146, "learning_rate": 1.283332522338348e-05, "loss": 0.2203, "step": 5381 }, { "epoch": 0.42638146167557933, "grad_norm": 1.4430830714804839, "learning_rate": 1.2830864249931756e-05, "loss": 0.3699, "step": 5382 }, { "epoch": 0.4264606852842147, "grad_norm": 1.4376294651538295, "learning_rate": 1.2828403090081238e-05, "loss": 0.3045, "step": 5383 }, { "epoch": 0.4265399088928501, "grad_norm": 1.5464936587077387, "learning_rate": 1.282594174399399e-05, "loss": 0.3466, "step": 5384 }, { "epoch": 0.4266191325014854, "grad_norm": 1.1663907507366942, "learning_rate": 1.2823480211832073e-05, "loss": 0.2369, "step": 5385 }, { "epoch": 0.4266983561101208, "grad_norm": 1.381108449801392, "learning_rate": 1.2821018493757569e-05, "loss": 0.2858, "step": 5386 }, { "epoch": 0.4267775797187562, "grad_norm": 1.4900068052255124, "learning_rate": 1.2818556589932575e-05, "loss": 0.3727, "step": 5387 }, { "epoch": 0.42685680332739157, "grad_norm": 1.4090394852503019, "learning_rate": 1.2816094500519188e-05, "loss": 0.3098, "step": 5388 }, { "epoch": 0.42693602693602695, "grad_norm": 1.6669878269675182, "learning_rate": 1.2813632225679528e-05, "loss": 0.3567, "step": 5389 }, { "epoch": 0.42701525054466233, "grad_norm": 1.2099463386222742, "learning_rate": 1.281116976557573e-05, "loss": 0.2898, "step": 5390 }, { "epoch": 0.4270944741532977, "grad_norm": 1.331961260766164, "learning_rate": 1.2808707120369923e-05, "loss": 0.2705, "step": 5391 }, { "epoch": 0.42717369776193304, "grad_norm": 1.1794589464847258, "learning_rate": 1.280624429022427e-05, "loss": 0.1987, "step": 5392 }, { "epoch": 0.4272529213705684, "grad_norm": 1.8481513262049374, "learning_rate": 1.2803781275300933e-05, "loss": 0.4254, "step": 5393 }, { "epoch": 0.4273321449792038, "grad_norm": 1.3145203662750549, "learning_rate": 1.2801318075762088e-05, "loss": 0.3131, "step": 5394 }, { "epoch": 0.4274113685878392, "grad_norm": 1.239401965793781, "learning_rate": 1.2798854691769927e-05, "loss": 0.277, "step": 5395 }, { "epoch": 0.42749059219647456, "grad_norm": 1.3683252291993193, "learning_rate": 1.2796391123486654e-05, "loss": 0.2328, "step": 5396 }, { "epoch": 0.42756981580510994, "grad_norm": 1.4362893318216192, "learning_rate": 1.2793927371074477e-05, "loss": 0.3275, "step": 5397 }, { "epoch": 0.42764903941374527, "grad_norm": 1.1718506240670885, "learning_rate": 1.279146343469563e-05, "loss": 0.2528, "step": 5398 }, { "epoch": 0.42772826302238065, "grad_norm": 1.3127499603795205, "learning_rate": 1.2788999314512347e-05, "loss": 0.1929, "step": 5399 }, { "epoch": 0.42780748663101603, "grad_norm": 1.2428444141788408, "learning_rate": 1.2786535010686879e-05, "loss": 0.2068, "step": 5400 }, { "epoch": 0.4278867102396514, "grad_norm": 1.285329046696986, "learning_rate": 1.2784070523381487e-05, "loss": 0.2293, "step": 5401 }, { "epoch": 0.4279659338482868, "grad_norm": 1.4271090747242758, "learning_rate": 1.2781605852758448e-05, "loss": 0.1902, "step": 5402 }, { "epoch": 0.4280451574569222, "grad_norm": 1.5853355287172861, "learning_rate": 1.2779140998980048e-05, "loss": 0.3348, "step": 5403 }, { "epoch": 0.42812438106555756, "grad_norm": 1.7229634546243293, "learning_rate": 1.2776675962208585e-05, "loss": 0.3129, "step": 5404 }, { "epoch": 0.4282036046741929, "grad_norm": 1.4588992325607515, "learning_rate": 1.2774210742606368e-05, "loss": 0.2887, "step": 5405 }, { "epoch": 0.42828282828282827, "grad_norm": 1.4014347468319028, "learning_rate": 1.2771745340335726e-05, "loss": 0.3475, "step": 5406 }, { "epoch": 0.42836205189146365, "grad_norm": 1.2241257501542995, "learning_rate": 1.276927975555899e-05, "loss": 0.2724, "step": 5407 }, { "epoch": 0.42844127550009903, "grad_norm": 1.6433282330507506, "learning_rate": 1.2766813988438505e-05, "loss": 0.3178, "step": 5408 }, { "epoch": 0.4285204991087344, "grad_norm": 1.4666289676114357, "learning_rate": 1.2764348039136634e-05, "loss": 0.3014, "step": 5409 }, { "epoch": 0.4285997227173698, "grad_norm": 1.6626758567941506, "learning_rate": 1.2761881907815744e-05, "loss": 0.4482, "step": 5410 }, { "epoch": 0.4286789463260052, "grad_norm": 1.3915045752123325, "learning_rate": 1.275941559463822e-05, "loss": 0.3292, "step": 5411 }, { "epoch": 0.4287581699346405, "grad_norm": 1.3488420178464307, "learning_rate": 1.2756949099766458e-05, "loss": 0.3079, "step": 5412 }, { "epoch": 0.4288373935432759, "grad_norm": 1.2761855978713215, "learning_rate": 1.2754482423362861e-05, "loss": 0.2771, "step": 5413 }, { "epoch": 0.42891661715191126, "grad_norm": 1.4813392383177097, "learning_rate": 1.2752015565589852e-05, "loss": 0.3155, "step": 5414 }, { "epoch": 0.42899584076054664, "grad_norm": 1.4125235626782122, "learning_rate": 1.2749548526609858e-05, "loss": 0.3407, "step": 5415 }, { "epoch": 0.429075064369182, "grad_norm": 1.3543089874507992, "learning_rate": 1.2747081306585325e-05, "loss": 0.2857, "step": 5416 }, { "epoch": 0.4291542879778174, "grad_norm": 1.388327874850087, "learning_rate": 1.2744613905678707e-05, "loss": 0.352, "step": 5417 }, { "epoch": 0.4292335115864528, "grad_norm": 1.3170784812768765, "learning_rate": 1.2742146324052466e-05, "loss": 0.2004, "step": 5418 }, { "epoch": 0.4293127351950881, "grad_norm": 1.7098624732253722, "learning_rate": 1.273967856186909e-05, "loss": 0.389, "step": 5419 }, { "epoch": 0.4293919588037235, "grad_norm": 1.5161389576590416, "learning_rate": 1.2737210619291058e-05, "loss": 0.2684, "step": 5420 }, { "epoch": 0.4294711824123589, "grad_norm": 1.4483997912609599, "learning_rate": 1.2734742496480878e-05, "loss": 0.3581, "step": 5421 }, { "epoch": 0.42955040602099426, "grad_norm": 1.7315986979764633, "learning_rate": 1.2732274193601066e-05, "loss": 0.3944, "step": 5422 }, { "epoch": 0.42962962962962964, "grad_norm": 1.70707699695665, "learning_rate": 1.2729805710814142e-05, "loss": 0.3951, "step": 5423 }, { "epoch": 0.429708853238265, "grad_norm": 1.229800085616609, "learning_rate": 1.2727337048282649e-05, "loss": 0.3034, "step": 5424 }, { "epoch": 0.4297880768469004, "grad_norm": 1.327922796851444, "learning_rate": 1.2724868206169134e-05, "loss": 0.2503, "step": 5425 }, { "epoch": 0.42986730045553573, "grad_norm": 1.4223452121407245, "learning_rate": 1.2722399184636158e-05, "loss": 0.3023, "step": 5426 }, { "epoch": 0.4299465240641711, "grad_norm": 1.5392242912135163, "learning_rate": 1.2719929983846298e-05, "loss": 0.2474, "step": 5427 }, { "epoch": 0.4300257476728065, "grad_norm": 1.286712388834556, "learning_rate": 1.2717460603962132e-05, "loss": 0.3497, "step": 5428 }, { "epoch": 0.4301049712814419, "grad_norm": 1.620424515101876, "learning_rate": 1.2714991045146265e-05, "loss": 0.3743, "step": 5429 }, { "epoch": 0.43018419489007725, "grad_norm": 1.7969404020852162, "learning_rate": 1.2712521307561298e-05, "loss": 0.28, "step": 5430 }, { "epoch": 0.43026341849871264, "grad_norm": 1.1727169216814828, "learning_rate": 1.2710051391369857e-05, "loss": 0.264, "step": 5431 }, { "epoch": 0.430342642107348, "grad_norm": 1.363950459841746, "learning_rate": 1.270758129673457e-05, "loss": 0.3174, "step": 5432 }, { "epoch": 0.43042186571598334, "grad_norm": 1.5429165350200493, "learning_rate": 1.2705111023818083e-05, "loss": 0.3709, "step": 5433 }, { "epoch": 0.4305010893246187, "grad_norm": 1.3033745895471434, "learning_rate": 1.2702640572783051e-05, "loss": 0.224, "step": 5434 }, { "epoch": 0.4305803129332541, "grad_norm": 1.8829247102128346, "learning_rate": 1.2700169943792143e-05, "loss": 0.366, "step": 5435 }, { "epoch": 0.4306595365418895, "grad_norm": 1.1961494435101305, "learning_rate": 1.2697699137008038e-05, "loss": 0.2143, "step": 5436 }, { "epoch": 0.43073876015052487, "grad_norm": 1.2330006922081789, "learning_rate": 1.2695228152593419e-05, "loss": 0.2745, "step": 5437 }, { "epoch": 0.43081798375916025, "grad_norm": 1.3238045984361115, "learning_rate": 1.2692756990710998e-05, "loss": 0.3038, "step": 5438 }, { "epoch": 0.4308972073677956, "grad_norm": 1.7456698958864234, "learning_rate": 1.269028565152349e-05, "loss": 0.2782, "step": 5439 }, { "epoch": 0.43097643097643096, "grad_norm": 1.2910717810119143, "learning_rate": 1.2687814135193613e-05, "loss": 0.2525, "step": 5440 }, { "epoch": 0.43105565458506634, "grad_norm": 1.2644365952577783, "learning_rate": 1.2685342441884107e-05, "loss": 0.2642, "step": 5441 }, { "epoch": 0.4311348781937017, "grad_norm": 1.972364586257956, "learning_rate": 1.2682870571757724e-05, "loss": 0.4723, "step": 5442 }, { "epoch": 0.4312141018023371, "grad_norm": 1.7539274480494045, "learning_rate": 1.2680398524977222e-05, "loss": 0.3905, "step": 5443 }, { "epoch": 0.4312933254109725, "grad_norm": 1.8552369469881138, "learning_rate": 1.2677926301705376e-05, "loss": 0.3314, "step": 5444 }, { "epoch": 0.43137254901960786, "grad_norm": 1.6734085615154801, "learning_rate": 1.2675453902104967e-05, "loss": 0.358, "step": 5445 }, { "epoch": 0.4314517726282432, "grad_norm": 1.5373536220584754, "learning_rate": 1.2672981326338793e-05, "loss": 0.327, "step": 5446 }, { "epoch": 0.4315309962368786, "grad_norm": 1.4209827127714774, "learning_rate": 1.267050857456966e-05, "loss": 0.3408, "step": 5447 }, { "epoch": 0.43161021984551395, "grad_norm": 1.3832346716169261, "learning_rate": 1.2668035646960384e-05, "loss": 0.2813, "step": 5448 }, { "epoch": 0.43168944345414934, "grad_norm": 1.4619540174563779, "learning_rate": 1.2665562543673803e-05, "loss": 0.3294, "step": 5449 }, { "epoch": 0.4317686670627847, "grad_norm": 1.5011239471859592, "learning_rate": 1.2663089264872751e-05, "loss": 0.2726, "step": 5450 }, { "epoch": 0.4318478906714201, "grad_norm": 1.054699979891497, "learning_rate": 1.2660615810720087e-05, "loss": 0.2814, "step": 5451 }, { "epoch": 0.4319271142800555, "grad_norm": 1.3282021130922372, "learning_rate": 1.2658142181378675e-05, "loss": 0.3067, "step": 5452 }, { "epoch": 0.4320063378886908, "grad_norm": 1.2653790624505565, "learning_rate": 1.2655668377011387e-05, "loss": 0.2886, "step": 5453 }, { "epoch": 0.4320855614973262, "grad_norm": 1.5848208966044717, "learning_rate": 1.2653194397781117e-05, "loss": 0.3624, "step": 5454 }, { "epoch": 0.43216478510596157, "grad_norm": 1.2103689062823213, "learning_rate": 1.2650720243850762e-05, "loss": 0.3343, "step": 5455 }, { "epoch": 0.43224400871459695, "grad_norm": 1.695500876910567, "learning_rate": 1.2648245915383233e-05, "loss": 0.4319, "step": 5456 }, { "epoch": 0.43232323232323233, "grad_norm": 1.641318706300649, "learning_rate": 1.2645771412541455e-05, "loss": 0.3233, "step": 5457 }, { "epoch": 0.4324024559318677, "grad_norm": 1.4264901037907947, "learning_rate": 1.2643296735488355e-05, "loss": 0.2835, "step": 5458 }, { "epoch": 0.4324816795405031, "grad_norm": 1.7114592953176366, "learning_rate": 1.2640821884386887e-05, "loss": 0.4391, "step": 5459 }, { "epoch": 0.4325609031491384, "grad_norm": 1.6222246082244827, "learning_rate": 1.2638346859400006e-05, "loss": 0.3495, "step": 5460 }, { "epoch": 0.4326401267577738, "grad_norm": 1.3545315424505424, "learning_rate": 1.2635871660690677e-05, "loss": 0.3564, "step": 5461 }, { "epoch": 0.4327193503664092, "grad_norm": 1.7432114165988388, "learning_rate": 1.2633396288421884e-05, "loss": 0.4184, "step": 5462 }, { "epoch": 0.43279857397504456, "grad_norm": 1.581166045709697, "learning_rate": 1.2630920742756616e-05, "loss": 0.3156, "step": 5463 }, { "epoch": 0.43287779758367995, "grad_norm": 1.3949629214009491, "learning_rate": 1.2628445023857875e-05, "loss": 0.3493, "step": 5464 }, { "epoch": 0.43295702119231533, "grad_norm": 1.285804365685932, "learning_rate": 1.2625969131888677e-05, "loss": 0.3022, "step": 5465 }, { "epoch": 0.4330362448009507, "grad_norm": 1.4283416742607622, "learning_rate": 1.2623493067012047e-05, "loss": 0.3524, "step": 5466 }, { "epoch": 0.43311546840958604, "grad_norm": 0.9835140281730688, "learning_rate": 1.2621016829391022e-05, "loss": 0.182, "step": 5467 }, { "epoch": 0.4331946920182214, "grad_norm": 1.1641937817399692, "learning_rate": 1.2618540419188654e-05, "loss": 0.2765, "step": 5468 }, { "epoch": 0.4332739156268568, "grad_norm": 1.5427922786986064, "learning_rate": 1.2616063836567994e-05, "loss": 0.2981, "step": 5469 }, { "epoch": 0.4333531392354922, "grad_norm": 1.4924127486511443, "learning_rate": 1.2613587081692118e-05, "loss": 0.328, "step": 5470 }, { "epoch": 0.43343236284412756, "grad_norm": 1.7285763192172479, "learning_rate": 1.2611110154724113e-05, "loss": 0.4709, "step": 5471 }, { "epoch": 0.43351158645276294, "grad_norm": 1.5157293613758454, "learning_rate": 1.2608633055827064e-05, "loss": 0.3337, "step": 5472 }, { "epoch": 0.4335908100613983, "grad_norm": 1.2172657654412193, "learning_rate": 1.260615578516408e-05, "loss": 0.2544, "step": 5473 }, { "epoch": 0.43367003367003365, "grad_norm": 1.5409390052442435, "learning_rate": 1.260367834289828e-05, "loss": 0.3411, "step": 5474 }, { "epoch": 0.43374925727866903, "grad_norm": 1.4382072378306487, "learning_rate": 1.2601200729192789e-05, "loss": 0.3718, "step": 5475 }, { "epoch": 0.4338284808873044, "grad_norm": 1.179465992343742, "learning_rate": 1.2598722944210746e-05, "loss": 0.2762, "step": 5476 }, { "epoch": 0.4339077044959398, "grad_norm": 1.59375203852541, "learning_rate": 1.25962449881153e-05, "loss": 0.4207, "step": 5477 }, { "epoch": 0.4339869281045752, "grad_norm": 1.388563111484153, "learning_rate": 1.2593766861069615e-05, "loss": 0.2863, "step": 5478 }, { "epoch": 0.43406615171321056, "grad_norm": 1.4750851146914306, "learning_rate": 1.2591288563236864e-05, "loss": 0.3282, "step": 5479 }, { "epoch": 0.4341453753218459, "grad_norm": 1.743719289196784, "learning_rate": 1.2588810094780227e-05, "loss": 0.3748, "step": 5480 }, { "epoch": 0.43422459893048126, "grad_norm": 1.4148853753485535, "learning_rate": 1.2586331455862902e-05, "loss": 0.2627, "step": 5481 }, { "epoch": 0.43430382253911665, "grad_norm": 1.4276579213531981, "learning_rate": 1.2583852646648097e-05, "loss": 0.328, "step": 5482 }, { "epoch": 0.434383046147752, "grad_norm": 1.3557638130504426, "learning_rate": 1.2581373667299026e-05, "loss": 0.337, "step": 5483 }, { "epoch": 0.4344622697563874, "grad_norm": 1.649902237559774, "learning_rate": 1.257889451797892e-05, "loss": 0.322, "step": 5484 }, { "epoch": 0.4345414933650228, "grad_norm": 1.452788465442924, "learning_rate": 1.257641519885102e-05, "loss": 0.2976, "step": 5485 }, { "epoch": 0.43462071697365817, "grad_norm": 1.5559965749267017, "learning_rate": 1.2573935710078576e-05, "loss": 0.3137, "step": 5486 }, { "epoch": 0.4346999405822935, "grad_norm": 1.4697167871168915, "learning_rate": 1.2571456051824851e-05, "loss": 0.3126, "step": 5487 }, { "epoch": 0.4347791641909289, "grad_norm": 1.3844928772019096, "learning_rate": 1.2568976224253115e-05, "loss": 0.2262, "step": 5488 }, { "epoch": 0.43485838779956426, "grad_norm": 1.498134018477766, "learning_rate": 1.256649622752666e-05, "loss": 0.3776, "step": 5489 }, { "epoch": 0.43493761140819964, "grad_norm": 1.245475668813004, "learning_rate": 1.2564016061808774e-05, "loss": 0.2297, "step": 5490 }, { "epoch": 0.435016835016835, "grad_norm": 1.589198936945319, "learning_rate": 1.2561535727262769e-05, "loss": 0.3655, "step": 5491 }, { "epoch": 0.4350960586254704, "grad_norm": 1.6472914681148645, "learning_rate": 1.2559055224051963e-05, "loss": 0.3296, "step": 5492 }, { "epoch": 0.4351752822341058, "grad_norm": 1.8696894172833403, "learning_rate": 1.2556574552339682e-05, "loss": 0.3698, "step": 5493 }, { "epoch": 0.4352545058427411, "grad_norm": 1.9858729806649456, "learning_rate": 1.2554093712289267e-05, "loss": 0.5243, "step": 5494 }, { "epoch": 0.4353337294513765, "grad_norm": 1.6033441160765232, "learning_rate": 1.2551612704064074e-05, "loss": 0.3302, "step": 5495 }, { "epoch": 0.4354129530600119, "grad_norm": 1.5319594864143755, "learning_rate": 1.2549131527827458e-05, "loss": 0.4081, "step": 5496 }, { "epoch": 0.43549217666864726, "grad_norm": 1.504913530009671, "learning_rate": 1.2546650183742801e-05, "loss": 0.3112, "step": 5497 }, { "epoch": 0.43557140027728264, "grad_norm": 1.2767031132609141, "learning_rate": 1.254416867197348e-05, "loss": 0.3113, "step": 5498 }, { "epoch": 0.435650623885918, "grad_norm": 1.7117570025406732, "learning_rate": 1.2541686992682896e-05, "loss": 0.4046, "step": 5499 }, { "epoch": 0.4357298474945534, "grad_norm": 1.5293320244132367, "learning_rate": 1.2539205146034452e-05, "loss": 0.3628, "step": 5500 }, { "epoch": 0.4358090711031887, "grad_norm": 1.3677255643301798, "learning_rate": 1.2536723132191566e-05, "loss": 0.3357, "step": 5501 }, { "epoch": 0.4358882947118241, "grad_norm": 1.6184853659040477, "learning_rate": 1.2534240951317669e-05, "loss": 0.3632, "step": 5502 }, { "epoch": 0.4359675183204595, "grad_norm": 1.490242335936876, "learning_rate": 1.25317586035762e-05, "loss": 0.3372, "step": 5503 }, { "epoch": 0.43604674192909487, "grad_norm": 1.3434680182456935, "learning_rate": 1.2529276089130607e-05, "loss": 0.3482, "step": 5504 }, { "epoch": 0.43612596553773025, "grad_norm": 1.2352051693493493, "learning_rate": 1.2526793408144355e-05, "loss": 0.2803, "step": 5505 }, { "epoch": 0.43620518914636564, "grad_norm": 1.6694804540272143, "learning_rate": 1.2524310560780914e-05, "loss": 0.3759, "step": 5506 }, { "epoch": 0.436284412755001, "grad_norm": 1.231372253748743, "learning_rate": 1.2521827547203773e-05, "loss": 0.2659, "step": 5507 }, { "epoch": 0.43636363636363634, "grad_norm": 1.432983039414379, "learning_rate": 1.2519344367576418e-05, "loss": 0.2474, "step": 5508 }, { "epoch": 0.4364428599722717, "grad_norm": 1.4892999601753967, "learning_rate": 1.2516861022062361e-05, "loss": 0.3025, "step": 5509 }, { "epoch": 0.4365220835809071, "grad_norm": 1.345678917014592, "learning_rate": 1.2514377510825113e-05, "loss": 0.2936, "step": 5510 }, { "epoch": 0.4366013071895425, "grad_norm": 1.5770487553466572, "learning_rate": 1.2511893834028209e-05, "loss": 0.4329, "step": 5511 }, { "epoch": 0.43668053079817787, "grad_norm": 1.4870329623371363, "learning_rate": 1.2509409991835178e-05, "loss": 0.3456, "step": 5512 }, { "epoch": 0.43675975440681325, "grad_norm": 1.301633174327458, "learning_rate": 1.2506925984409574e-05, "loss": 0.2842, "step": 5513 }, { "epoch": 0.43683897801544863, "grad_norm": 1.4075368831718562, "learning_rate": 1.250444181191496e-05, "loss": 0.268, "step": 5514 }, { "epoch": 0.43691820162408396, "grad_norm": 1.451309717991726, "learning_rate": 1.2501957474514898e-05, "loss": 0.282, "step": 5515 }, { "epoch": 0.43699742523271934, "grad_norm": 1.3603439783311486, "learning_rate": 1.249947297237298e-05, "loss": 0.2704, "step": 5516 }, { "epoch": 0.4370766488413547, "grad_norm": 1.4368503013002034, "learning_rate": 1.249698830565279e-05, "loss": 0.3424, "step": 5517 }, { "epoch": 0.4371558724499901, "grad_norm": 1.332731038219995, "learning_rate": 1.2494503474517935e-05, "loss": 0.2255, "step": 5518 }, { "epoch": 0.4372350960586255, "grad_norm": 1.4420217355323868, "learning_rate": 1.2492018479132033e-05, "loss": 0.2916, "step": 5519 }, { "epoch": 0.43731431966726086, "grad_norm": 1.3211587642497646, "learning_rate": 1.2489533319658703e-05, "loss": 0.2476, "step": 5520 }, { "epoch": 0.4373935432758962, "grad_norm": 1.8274973083374102, "learning_rate": 1.2487047996261578e-05, "loss": 0.394, "step": 5521 }, { "epoch": 0.43747276688453157, "grad_norm": 1.6968891552047662, "learning_rate": 1.2484562509104316e-05, "loss": 0.3865, "step": 5522 }, { "epoch": 0.43755199049316695, "grad_norm": 1.3754032116517625, "learning_rate": 1.2482076858350564e-05, "loss": 0.2733, "step": 5523 }, { "epoch": 0.43763121410180233, "grad_norm": 1.8364161689702525, "learning_rate": 1.2479591044163997e-05, "loss": 0.4175, "step": 5524 }, { "epoch": 0.4377104377104377, "grad_norm": 1.4297693803153317, "learning_rate": 1.2477105066708286e-05, "loss": 0.2563, "step": 5525 }, { "epoch": 0.4377896613190731, "grad_norm": 1.71100922622832, "learning_rate": 1.2474618926147129e-05, "loss": 0.3214, "step": 5526 }, { "epoch": 0.4378688849277085, "grad_norm": 1.352624504305475, "learning_rate": 1.2472132622644222e-05, "loss": 0.2543, "step": 5527 }, { "epoch": 0.4379481085363438, "grad_norm": 1.9597103256622024, "learning_rate": 1.2469646156363276e-05, "loss": 0.4333, "step": 5528 }, { "epoch": 0.4380273321449792, "grad_norm": 1.6459129158020713, "learning_rate": 1.2467159527468014e-05, "loss": 0.412, "step": 5529 }, { "epoch": 0.43810655575361457, "grad_norm": 1.6321473589199504, "learning_rate": 1.246467273612217e-05, "loss": 0.3876, "step": 5530 }, { "epoch": 0.43818577936224995, "grad_norm": 2.306533984815719, "learning_rate": 1.2462185782489484e-05, "loss": 0.3353, "step": 5531 }, { "epoch": 0.43826500297088533, "grad_norm": 1.6634065664543873, "learning_rate": 1.2459698666733712e-05, "loss": 0.381, "step": 5532 }, { "epoch": 0.4383442265795207, "grad_norm": 1.2720165084356065, "learning_rate": 1.2457211389018619e-05, "loss": 0.27, "step": 5533 }, { "epoch": 0.4384234501881561, "grad_norm": 1.3331215022361775, "learning_rate": 1.2454723949507978e-05, "loss": 0.3085, "step": 5534 }, { "epoch": 0.4385026737967914, "grad_norm": 1.4469390883879574, "learning_rate": 1.2452236348365579e-05, "loss": 0.3212, "step": 5535 }, { "epoch": 0.4385818974054268, "grad_norm": 1.3066558085486175, "learning_rate": 1.244974858575521e-05, "loss": 0.2593, "step": 5536 }, { "epoch": 0.4386611210140622, "grad_norm": 1.3556845452823256, "learning_rate": 1.2447260661840688e-05, "loss": 0.3022, "step": 5537 }, { "epoch": 0.43874034462269756, "grad_norm": 1.412813446296498, "learning_rate": 1.2444772576785828e-05, "loss": 0.3349, "step": 5538 }, { "epoch": 0.43881956823133295, "grad_norm": 1.4987757106315132, "learning_rate": 1.2442284330754456e-05, "loss": 0.3447, "step": 5539 }, { "epoch": 0.4388987918399683, "grad_norm": 1.2809113579222797, "learning_rate": 1.2439795923910413e-05, "loss": 0.2656, "step": 5540 }, { "epoch": 0.4389780154486037, "grad_norm": 1.6354779293121176, "learning_rate": 1.2437307356417547e-05, "loss": 0.3831, "step": 5541 }, { "epoch": 0.43905723905723903, "grad_norm": 1.564709595851338, "learning_rate": 1.2434818628439718e-05, "loss": 0.3179, "step": 5542 }, { "epoch": 0.4391364626658744, "grad_norm": 1.4931713730660812, "learning_rate": 1.24323297401408e-05, "loss": 0.333, "step": 5543 }, { "epoch": 0.4392156862745098, "grad_norm": 1.509970048928823, "learning_rate": 1.2429840691684672e-05, "loss": 0.3947, "step": 5544 }, { "epoch": 0.4392949098831452, "grad_norm": 1.3668560912590544, "learning_rate": 1.2427351483235224e-05, "loss": 0.3083, "step": 5545 }, { "epoch": 0.43937413349178056, "grad_norm": 1.9266141282935643, "learning_rate": 1.2424862114956367e-05, "loss": 0.3891, "step": 5546 }, { "epoch": 0.43945335710041594, "grad_norm": 1.7498146035725137, "learning_rate": 1.2422372587012001e-05, "loss": 0.3772, "step": 5547 }, { "epoch": 0.4395325807090513, "grad_norm": 1.786747893173884, "learning_rate": 1.2419882899566056e-05, "loss": 0.3258, "step": 5548 }, { "epoch": 0.43961180431768665, "grad_norm": 1.467619205649559, "learning_rate": 1.241739305278247e-05, "loss": 0.3966, "step": 5549 }, { "epoch": 0.43969102792632203, "grad_norm": 1.4787014616012117, "learning_rate": 1.2414903046825178e-05, "loss": 0.2968, "step": 5550 }, { "epoch": 0.4397702515349574, "grad_norm": 1.3699368777074505, "learning_rate": 1.2412412881858142e-05, "loss": 0.265, "step": 5551 }, { "epoch": 0.4398494751435928, "grad_norm": 1.222884656815162, "learning_rate": 1.240992255804533e-05, "loss": 0.209, "step": 5552 }, { "epoch": 0.4399286987522282, "grad_norm": 1.4643575849466588, "learning_rate": 1.2407432075550707e-05, "loss": 0.3106, "step": 5553 }, { "epoch": 0.44000792236086356, "grad_norm": 1.0793841234333779, "learning_rate": 1.2404941434538269e-05, "loss": 0.1828, "step": 5554 }, { "epoch": 0.4400871459694989, "grad_norm": 1.8614665961933032, "learning_rate": 1.2402450635172008e-05, "loss": 0.4639, "step": 5555 }, { "epoch": 0.44016636957813426, "grad_norm": 1.2472298582682795, "learning_rate": 1.2399959677615932e-05, "loss": 0.2303, "step": 5556 }, { "epoch": 0.44024559318676965, "grad_norm": 1.8496281759865363, "learning_rate": 1.239746856203406e-05, "loss": 0.307, "step": 5557 }, { "epoch": 0.440324816795405, "grad_norm": 1.943752343790704, "learning_rate": 1.239497728859042e-05, "loss": 0.364, "step": 5558 }, { "epoch": 0.4404040404040404, "grad_norm": 1.4816592777741944, "learning_rate": 1.2392485857449048e-05, "loss": 0.3342, "step": 5559 }, { "epoch": 0.4404832640126758, "grad_norm": 1.6050090471273073, "learning_rate": 1.2389994268773995e-05, "loss": 0.3299, "step": 5560 }, { "epoch": 0.44056248762131117, "grad_norm": 1.6392367962249623, "learning_rate": 1.238750252272932e-05, "loss": 0.3404, "step": 5561 }, { "epoch": 0.4406417112299465, "grad_norm": 1.4964988983324858, "learning_rate": 1.2385010619479093e-05, "loss": 0.3285, "step": 5562 }, { "epoch": 0.4407209348385819, "grad_norm": 1.492709753864469, "learning_rate": 1.2382518559187389e-05, "loss": 0.3792, "step": 5563 }, { "epoch": 0.44080015844721726, "grad_norm": 1.321878569212735, "learning_rate": 1.23800263420183e-05, "loss": 0.2791, "step": 5564 }, { "epoch": 0.44087938205585264, "grad_norm": 1.3989104011025209, "learning_rate": 1.2377533968135934e-05, "loss": 0.3068, "step": 5565 }, { "epoch": 0.440958605664488, "grad_norm": 1.4729596066716062, "learning_rate": 1.2375041437704394e-05, "loss": 0.2183, "step": 5566 }, { "epoch": 0.4410378292731234, "grad_norm": 1.1626634673552876, "learning_rate": 1.2372548750887805e-05, "loss": 0.2825, "step": 5567 }, { "epoch": 0.4411170528817588, "grad_norm": 1.4178911378023322, "learning_rate": 1.2370055907850293e-05, "loss": 0.3214, "step": 5568 }, { "epoch": 0.4411962764903941, "grad_norm": 1.7536119491841367, "learning_rate": 1.2367562908756005e-05, "loss": 0.2716, "step": 5569 }, { "epoch": 0.4412755000990295, "grad_norm": 1.3612077929654602, "learning_rate": 1.2365069753769092e-05, "loss": 0.2477, "step": 5570 }, { "epoch": 0.4413547237076649, "grad_norm": 1.3665235754423977, "learning_rate": 1.2362576443053716e-05, "loss": 0.3248, "step": 5571 }, { "epoch": 0.44143394731630026, "grad_norm": 1.6563006850044917, "learning_rate": 1.2360082976774049e-05, "loss": 0.3998, "step": 5572 }, { "epoch": 0.44151317092493564, "grad_norm": 1.1556465611201692, "learning_rate": 1.2357589355094275e-05, "loss": 0.1872, "step": 5573 }, { "epoch": 0.441592394533571, "grad_norm": 1.1516815317847333, "learning_rate": 1.2355095578178582e-05, "loss": 0.3242, "step": 5574 }, { "epoch": 0.4416716181422064, "grad_norm": 1.6032025281087987, "learning_rate": 1.2352601646191182e-05, "loss": 0.3119, "step": 5575 }, { "epoch": 0.4417508417508417, "grad_norm": 1.4502267987187858, "learning_rate": 1.235010755929628e-05, "loss": 0.2695, "step": 5576 }, { "epoch": 0.4418300653594771, "grad_norm": 1.5242226017208766, "learning_rate": 1.2347613317658105e-05, "loss": 0.3038, "step": 5577 }, { "epoch": 0.4419092889681125, "grad_norm": 1.607811135328703, "learning_rate": 1.234511892144089e-05, "loss": 0.3656, "step": 5578 }, { "epoch": 0.44198851257674787, "grad_norm": 1.5822780144113513, "learning_rate": 1.2342624370808876e-05, "loss": 0.4001, "step": 5579 }, { "epoch": 0.44206773618538325, "grad_norm": 1.445624606232555, "learning_rate": 1.2340129665926319e-05, "loss": 0.2533, "step": 5580 }, { "epoch": 0.44214695979401863, "grad_norm": 1.461841083046526, "learning_rate": 1.2337634806957486e-05, "loss": 0.2538, "step": 5581 }, { "epoch": 0.442226183402654, "grad_norm": 1.4950823860907971, "learning_rate": 1.2335139794066645e-05, "loss": 0.2502, "step": 5582 }, { "epoch": 0.44230540701128934, "grad_norm": 1.278136773901328, "learning_rate": 1.2332644627418088e-05, "loss": 0.251, "step": 5583 }, { "epoch": 0.4423846306199247, "grad_norm": 1.219042104528466, "learning_rate": 1.2330149307176105e-05, "loss": 0.298, "step": 5584 }, { "epoch": 0.4424638542285601, "grad_norm": 1.5508062254365524, "learning_rate": 1.2327653833505005e-05, "loss": 0.3416, "step": 5585 }, { "epoch": 0.4425430778371955, "grad_norm": 1.539181482091282, "learning_rate": 1.2325158206569095e-05, "loss": 0.4106, "step": 5586 }, { "epoch": 0.44262230144583087, "grad_norm": 1.7262858546380457, "learning_rate": 1.232266242653271e-05, "loss": 0.337, "step": 5587 }, { "epoch": 0.44270152505446625, "grad_norm": 1.4712714271943768, "learning_rate": 1.2320166493560176e-05, "loss": 0.3356, "step": 5588 }, { "epoch": 0.44278074866310163, "grad_norm": 1.7458166022993233, "learning_rate": 1.2317670407815844e-05, "loss": 0.4123, "step": 5589 }, { "epoch": 0.44285997227173696, "grad_norm": 1.8016174858002976, "learning_rate": 1.2315174169464068e-05, "loss": 0.2755, "step": 5590 }, { "epoch": 0.44293919588037234, "grad_norm": 1.4030103348673175, "learning_rate": 1.2312677778669211e-05, "loss": 0.2906, "step": 5591 }, { "epoch": 0.4430184194890077, "grad_norm": 1.487165116297807, "learning_rate": 1.2310181235595652e-05, "loss": 0.2786, "step": 5592 }, { "epoch": 0.4430976430976431, "grad_norm": 1.3117181667343012, "learning_rate": 1.2307684540407775e-05, "loss": 0.2367, "step": 5593 }, { "epoch": 0.4431768667062785, "grad_norm": 1.338151070222116, "learning_rate": 1.230518769326997e-05, "loss": 0.2512, "step": 5594 }, { "epoch": 0.44325609031491386, "grad_norm": 1.1997029433516588, "learning_rate": 1.2302690694346654e-05, "loss": 0.1748, "step": 5595 }, { "epoch": 0.4433353139235492, "grad_norm": 1.2224114046177161, "learning_rate": 1.230019354380223e-05, "loss": 0.2466, "step": 5596 }, { "epoch": 0.44341453753218457, "grad_norm": 1.2624692573410068, "learning_rate": 1.2297696241801133e-05, "loss": 0.2706, "step": 5597 }, { "epoch": 0.44349376114081995, "grad_norm": 1.447914568671778, "learning_rate": 1.2295198788507794e-05, "loss": 0.2923, "step": 5598 }, { "epoch": 0.44357298474945533, "grad_norm": 1.3210649850300396, "learning_rate": 1.2292701184086656e-05, "loss": 0.2898, "step": 5599 }, { "epoch": 0.4436522083580907, "grad_norm": 1.2037640375666785, "learning_rate": 1.2290203428702178e-05, "loss": 0.2773, "step": 5600 }, { "epoch": 0.4437314319667261, "grad_norm": 1.415959523648096, "learning_rate": 1.2287705522518824e-05, "loss": 0.2772, "step": 5601 }, { "epoch": 0.4438106555753615, "grad_norm": 1.466433524688798, "learning_rate": 1.228520746570107e-05, "loss": 0.2679, "step": 5602 }, { "epoch": 0.4438898791839968, "grad_norm": 1.462996885356959, "learning_rate": 1.22827092584134e-05, "loss": 0.2987, "step": 5603 }, { "epoch": 0.4439691027926322, "grad_norm": 1.4615390316373624, "learning_rate": 1.2280210900820309e-05, "loss": 0.2994, "step": 5604 }, { "epoch": 0.44404832640126757, "grad_norm": 1.263746322122163, "learning_rate": 1.22777123930863e-05, "loss": 0.2362, "step": 5605 }, { "epoch": 0.44412755000990295, "grad_norm": 1.3538626145891963, "learning_rate": 1.227521373537589e-05, "loss": 0.2549, "step": 5606 }, { "epoch": 0.44420677361853833, "grad_norm": 1.5081078101355778, "learning_rate": 1.2272714927853604e-05, "loss": 0.3, "step": 5607 }, { "epoch": 0.4442859972271737, "grad_norm": 1.6211686624569182, "learning_rate": 1.2270215970683977e-05, "loss": 0.3341, "step": 5608 }, { "epoch": 0.4443652208358091, "grad_norm": 1.8599278662623036, "learning_rate": 1.226771686403155e-05, "loss": 0.3583, "step": 5609 }, { "epoch": 0.4444444444444444, "grad_norm": 1.4266100934874566, "learning_rate": 1.2265217608060879e-05, "loss": 0.2859, "step": 5610 }, { "epoch": 0.4445236680530798, "grad_norm": 1.4027887297951571, "learning_rate": 1.226271820293653e-05, "loss": 0.3042, "step": 5611 }, { "epoch": 0.4446028916617152, "grad_norm": 1.4179418474260197, "learning_rate": 1.2260218648823073e-05, "loss": 0.3431, "step": 5612 }, { "epoch": 0.44468211527035056, "grad_norm": 1.2174693240267271, "learning_rate": 1.2257718945885096e-05, "loss": 0.2571, "step": 5613 }, { "epoch": 0.44476133887898595, "grad_norm": 1.397708506841354, "learning_rate": 1.2255219094287186e-05, "loss": 0.363, "step": 5614 }, { "epoch": 0.4448405624876213, "grad_norm": 1.4284158117727428, "learning_rate": 1.225271909419395e-05, "loss": 0.2899, "step": 5615 }, { "epoch": 0.4449197860962567, "grad_norm": 1.5232388442487834, "learning_rate": 1.2250218945770005e-05, "loss": 0.2806, "step": 5616 }, { "epoch": 0.44499900970489203, "grad_norm": 1.5501966759126469, "learning_rate": 1.2247718649179966e-05, "loss": 0.4024, "step": 5617 }, { "epoch": 0.4450782333135274, "grad_norm": 1.4514936474695521, "learning_rate": 1.2245218204588474e-05, "loss": 0.3021, "step": 5618 }, { "epoch": 0.4451574569221628, "grad_norm": 1.4863064472001586, "learning_rate": 1.2242717612160163e-05, "loss": 0.3389, "step": 5619 }, { "epoch": 0.4452366805307982, "grad_norm": 1.7453594524603433, "learning_rate": 1.2240216872059687e-05, "loss": 0.4303, "step": 5620 }, { "epoch": 0.44531590413943356, "grad_norm": 1.3268436436055626, "learning_rate": 1.2237715984451713e-05, "loss": 0.3191, "step": 5621 }, { "epoch": 0.44539512774806894, "grad_norm": 2.1214515781245535, "learning_rate": 1.2235214949500906e-05, "loss": 0.2806, "step": 5622 }, { "epoch": 0.4454743513567043, "grad_norm": 1.3528561737401772, "learning_rate": 1.223271376737195e-05, "loss": 0.3608, "step": 5623 }, { "epoch": 0.44555357496533965, "grad_norm": 1.5317753621719823, "learning_rate": 1.2230212438229539e-05, "loss": 0.301, "step": 5624 }, { "epoch": 0.44563279857397503, "grad_norm": 1.261222751812893, "learning_rate": 1.2227710962238367e-05, "loss": 0.2864, "step": 5625 }, { "epoch": 0.4457120221826104, "grad_norm": 1.5698112652160263, "learning_rate": 1.2225209339563144e-05, "loss": 0.2741, "step": 5626 }, { "epoch": 0.4457912457912458, "grad_norm": 1.3806836766393442, "learning_rate": 1.22227075703686e-05, "loss": 0.3045, "step": 5627 }, { "epoch": 0.4458704693998812, "grad_norm": 1.2311180337479266, "learning_rate": 1.2220205654819453e-05, "loss": 0.2273, "step": 5628 }, { "epoch": 0.44594969300851656, "grad_norm": 1.7031789584364545, "learning_rate": 1.2217703593080445e-05, "loss": 0.3879, "step": 5629 }, { "epoch": 0.44602891661715194, "grad_norm": 1.2619583877073286, "learning_rate": 1.221520138531633e-05, "loss": 0.326, "step": 5630 }, { "epoch": 0.44610814022578726, "grad_norm": 1.2582322548717744, "learning_rate": 1.2212699031691861e-05, "loss": 0.2206, "step": 5631 }, { "epoch": 0.44618736383442265, "grad_norm": 1.5115284585892574, "learning_rate": 1.221019653237181e-05, "loss": 0.334, "step": 5632 }, { "epoch": 0.446266587443058, "grad_norm": 1.491945475518214, "learning_rate": 1.2207693887520949e-05, "loss": 0.3372, "step": 5633 }, { "epoch": 0.4463458110516934, "grad_norm": 1.4588603187953577, "learning_rate": 1.2205191097304067e-05, "loss": 0.2208, "step": 5634 }, { "epoch": 0.4464250346603288, "grad_norm": 1.7213235676092462, "learning_rate": 1.2202688161885967e-05, "loss": 0.3477, "step": 5635 }, { "epoch": 0.44650425826896417, "grad_norm": 1.6365514417234186, "learning_rate": 1.2200185081431446e-05, "loss": 0.3598, "step": 5636 }, { "epoch": 0.4465834818775995, "grad_norm": 1.8158169757899714, "learning_rate": 1.2197681856105326e-05, "loss": 0.3387, "step": 5637 }, { "epoch": 0.4466627054862349, "grad_norm": 1.4116078861533325, "learning_rate": 1.219517848607243e-05, "loss": 0.3064, "step": 5638 }, { "epoch": 0.44674192909487026, "grad_norm": 1.395902534077429, "learning_rate": 1.2192674971497593e-05, "loss": 0.2931, "step": 5639 }, { "epoch": 0.44682115270350564, "grad_norm": 1.299377546562893, "learning_rate": 1.219017131254566e-05, "loss": 0.2398, "step": 5640 }, { "epoch": 0.446900376312141, "grad_norm": 1.3244621349745926, "learning_rate": 1.2187667509381484e-05, "loss": 0.2814, "step": 5641 }, { "epoch": 0.4469795999207764, "grad_norm": 1.3004202124217101, "learning_rate": 1.2185163562169928e-05, "loss": 0.2245, "step": 5642 }, { "epoch": 0.4470588235294118, "grad_norm": 1.493554717170338, "learning_rate": 1.2182659471075868e-05, "loss": 0.2889, "step": 5643 }, { "epoch": 0.4471380471380471, "grad_norm": 1.2714060205955164, "learning_rate": 1.2180155236264182e-05, "loss": 0.2444, "step": 5644 }, { "epoch": 0.4472172707466825, "grad_norm": 1.3094269976849267, "learning_rate": 1.2177650857899767e-05, "loss": 0.2621, "step": 5645 }, { "epoch": 0.4472964943553179, "grad_norm": 1.3355498116081759, "learning_rate": 1.217514633614752e-05, "loss": 0.3199, "step": 5646 }, { "epoch": 0.44737571796395326, "grad_norm": 1.5933756911115966, "learning_rate": 1.217264167117235e-05, "loss": 0.3589, "step": 5647 }, { "epoch": 0.44745494157258864, "grad_norm": 1.8591505173394445, "learning_rate": 1.2170136863139183e-05, "loss": 0.2954, "step": 5648 }, { "epoch": 0.447534165181224, "grad_norm": 1.5608999881471082, "learning_rate": 1.2167631912212942e-05, "loss": 0.4124, "step": 5649 }, { "epoch": 0.4476133887898594, "grad_norm": 1.3476633916095164, "learning_rate": 1.2165126818558572e-05, "loss": 0.2599, "step": 5650 }, { "epoch": 0.4476926123984947, "grad_norm": 1.550843676779218, "learning_rate": 1.2162621582341021e-05, "loss": 0.327, "step": 5651 }, { "epoch": 0.4477718360071301, "grad_norm": 1.567997106644771, "learning_rate": 1.2160116203725243e-05, "loss": 0.2653, "step": 5652 }, { "epoch": 0.4478510596157655, "grad_norm": 1.4763842217872796, "learning_rate": 1.2157610682876206e-05, "loss": 0.3003, "step": 5653 }, { "epoch": 0.44793028322440087, "grad_norm": 1.4265145401670776, "learning_rate": 1.2155105019958888e-05, "loss": 0.2474, "step": 5654 }, { "epoch": 0.44800950683303625, "grad_norm": 1.4110597688766937, "learning_rate": 1.2152599215138274e-05, "loss": 0.2818, "step": 5655 }, { "epoch": 0.44808873044167163, "grad_norm": 1.3134178866048638, "learning_rate": 1.215009326857936e-05, "loss": 0.242, "step": 5656 }, { "epoch": 0.448167954050307, "grad_norm": 1.6632629264082104, "learning_rate": 1.2147587180447149e-05, "loss": 0.3499, "step": 5657 }, { "epoch": 0.44824717765894234, "grad_norm": 1.3206271939147298, "learning_rate": 1.2145080950906656e-05, "loss": 0.2558, "step": 5658 }, { "epoch": 0.4483264012675777, "grad_norm": 1.4042377194422089, "learning_rate": 1.2142574580122903e-05, "loss": 0.2455, "step": 5659 }, { "epoch": 0.4484056248762131, "grad_norm": 1.3950783621012701, "learning_rate": 1.2140068068260923e-05, "loss": 0.2697, "step": 5660 }, { "epoch": 0.4484848484848485, "grad_norm": 1.4310829922275936, "learning_rate": 1.2137561415485761e-05, "loss": 0.2836, "step": 5661 }, { "epoch": 0.44856407209348387, "grad_norm": 1.5322757277538652, "learning_rate": 1.2135054621962464e-05, "loss": 0.2942, "step": 5662 }, { "epoch": 0.44864329570211925, "grad_norm": 1.6048773198584134, "learning_rate": 1.2132547687856093e-05, "loss": 0.282, "step": 5663 }, { "epoch": 0.44872251931075463, "grad_norm": 1.5719289004698593, "learning_rate": 1.2130040613331717e-05, "loss": 0.3725, "step": 5664 }, { "epoch": 0.44880174291938996, "grad_norm": 1.2912692388470648, "learning_rate": 1.2127533398554417e-05, "loss": 0.2314, "step": 5665 }, { "epoch": 0.44888096652802534, "grad_norm": 1.1860242115550932, "learning_rate": 1.2125026043689278e-05, "loss": 0.2297, "step": 5666 }, { "epoch": 0.4489601901366607, "grad_norm": 1.3353291468649866, "learning_rate": 1.2122518548901401e-05, "loss": 0.229, "step": 5667 }, { "epoch": 0.4490394137452961, "grad_norm": 1.680231970648821, "learning_rate": 1.2120010914355888e-05, "loss": 0.2709, "step": 5668 }, { "epoch": 0.4491186373539315, "grad_norm": 1.6520964488403986, "learning_rate": 1.2117503140217858e-05, "loss": 0.3672, "step": 5669 }, { "epoch": 0.44919786096256686, "grad_norm": 1.491990800763491, "learning_rate": 1.2114995226652437e-05, "loss": 0.2796, "step": 5670 }, { "epoch": 0.44927708457120225, "grad_norm": 1.4033688481664495, "learning_rate": 1.2112487173824755e-05, "loss": 0.2872, "step": 5671 }, { "epoch": 0.44935630817983757, "grad_norm": 1.2349682316523733, "learning_rate": 1.2109978981899956e-05, "loss": 0.2952, "step": 5672 }, { "epoch": 0.44943553178847295, "grad_norm": 1.5665694976544338, "learning_rate": 1.2107470651043198e-05, "loss": 0.3462, "step": 5673 }, { "epoch": 0.44951475539710833, "grad_norm": 1.9025505633444724, "learning_rate": 1.2104962181419635e-05, "loss": 0.3696, "step": 5674 }, { "epoch": 0.4495939790057437, "grad_norm": 1.3223970239672982, "learning_rate": 1.2102453573194442e-05, "loss": 0.3072, "step": 5675 }, { "epoch": 0.4496732026143791, "grad_norm": 1.2478443093308735, "learning_rate": 1.2099944826532796e-05, "loss": 0.2699, "step": 5676 }, { "epoch": 0.4497524262230145, "grad_norm": 1.321721375854688, "learning_rate": 1.2097435941599886e-05, "loss": 0.2597, "step": 5677 }, { "epoch": 0.4498316498316498, "grad_norm": 1.4070221763126811, "learning_rate": 1.2094926918560917e-05, "loss": 0.2411, "step": 5678 }, { "epoch": 0.4499108734402852, "grad_norm": 1.5447655077174007, "learning_rate": 1.2092417757581085e-05, "loss": 0.4654, "step": 5679 }, { "epoch": 0.44999009704892057, "grad_norm": 1.2587234604786222, "learning_rate": 1.2089908458825614e-05, "loss": 0.2615, "step": 5680 }, { "epoch": 0.45006932065755595, "grad_norm": 1.4747757819587082, "learning_rate": 1.2087399022459729e-05, "loss": 0.2272, "step": 5681 }, { "epoch": 0.45014854426619133, "grad_norm": 1.4322719388003904, "learning_rate": 1.208488944864866e-05, "loss": 0.2248, "step": 5682 }, { "epoch": 0.4502277678748267, "grad_norm": 1.6743515165257654, "learning_rate": 1.2082379737557655e-05, "loss": 0.4, "step": 5683 }, { "epoch": 0.4503069914834621, "grad_norm": 1.3977804548400712, "learning_rate": 1.2079869889351961e-05, "loss": 0.2876, "step": 5684 }, { "epoch": 0.4503862150920974, "grad_norm": 1.4194994349903556, "learning_rate": 1.2077359904196841e-05, "loss": 0.2952, "step": 5685 }, { "epoch": 0.4504654387007328, "grad_norm": 1.2085239542353055, "learning_rate": 1.2074849782257572e-05, "loss": 0.2729, "step": 5686 }, { "epoch": 0.4505446623093682, "grad_norm": 1.662042138796701, "learning_rate": 1.2072339523699426e-05, "loss": 0.3808, "step": 5687 }, { "epoch": 0.45062388591800356, "grad_norm": 1.6334799971591731, "learning_rate": 1.2069829128687693e-05, "loss": 0.3934, "step": 5688 }, { "epoch": 0.45070310952663895, "grad_norm": 1.5110405515963639, "learning_rate": 1.2067318597387672e-05, "loss": 0.342, "step": 5689 }, { "epoch": 0.4507823331352743, "grad_norm": 1.7289544962085541, "learning_rate": 1.2064807929964668e-05, "loss": 0.3268, "step": 5690 }, { "epoch": 0.4508615567439097, "grad_norm": 1.3999189308063729, "learning_rate": 1.2062297126584e-05, "loss": 0.2473, "step": 5691 }, { "epoch": 0.45094078035254503, "grad_norm": 1.6340043213683135, "learning_rate": 1.2059786187410984e-05, "loss": 0.3313, "step": 5692 }, { "epoch": 0.4510200039611804, "grad_norm": 1.4999903860809731, "learning_rate": 1.2057275112610962e-05, "loss": 0.292, "step": 5693 }, { "epoch": 0.4510992275698158, "grad_norm": 1.79067462279451, "learning_rate": 1.2054763902349273e-05, "loss": 0.3804, "step": 5694 }, { "epoch": 0.4511784511784512, "grad_norm": 1.4983228693808994, "learning_rate": 1.2052252556791267e-05, "loss": 0.3344, "step": 5695 }, { "epoch": 0.45125767478708656, "grad_norm": 1.4351855962247557, "learning_rate": 1.2049741076102307e-05, "loss": 0.3592, "step": 5696 }, { "epoch": 0.45133689839572194, "grad_norm": 1.5138150060860103, "learning_rate": 1.2047229460447759e-05, "loss": 0.3605, "step": 5697 }, { "epoch": 0.4514161220043573, "grad_norm": 1.350547225842883, "learning_rate": 1.2044717709993e-05, "loss": 0.2811, "step": 5698 }, { "epoch": 0.45149534561299265, "grad_norm": 1.4699234739128908, "learning_rate": 1.2042205824903419e-05, "loss": 0.3613, "step": 5699 }, { "epoch": 0.45157456922162803, "grad_norm": 1.365828196254392, "learning_rate": 1.203969380534441e-05, "loss": 0.2788, "step": 5700 }, { "epoch": 0.4516537928302634, "grad_norm": 1.2272319374468275, "learning_rate": 1.2037181651481378e-05, "loss": 0.2243, "step": 5701 }, { "epoch": 0.4517330164388988, "grad_norm": 1.117004464847774, "learning_rate": 1.2034669363479741e-05, "loss": 0.2283, "step": 5702 }, { "epoch": 0.4518122400475342, "grad_norm": 1.7623755314442646, "learning_rate": 1.2032156941504913e-05, "loss": 0.3985, "step": 5703 }, { "epoch": 0.45189146365616956, "grad_norm": 2.17191804917099, "learning_rate": 1.2029644385722327e-05, "loss": 0.4895, "step": 5704 }, { "epoch": 0.45197068726480494, "grad_norm": 1.3829735457567776, "learning_rate": 1.2027131696297429e-05, "loss": 0.246, "step": 5705 }, { "epoch": 0.45204991087344026, "grad_norm": 1.480793656550592, "learning_rate": 1.202461887339566e-05, "loss": 0.361, "step": 5706 }, { "epoch": 0.45212913448207565, "grad_norm": 1.0810474546126068, "learning_rate": 1.2022105917182478e-05, "loss": 0.2011, "step": 5707 }, { "epoch": 0.452208358090711, "grad_norm": 1.3395626871411956, "learning_rate": 1.2019592827823354e-05, "loss": 0.2786, "step": 5708 }, { "epoch": 0.4522875816993464, "grad_norm": 1.797745758637623, "learning_rate": 1.2017079605483758e-05, "loss": 0.3497, "step": 5709 }, { "epoch": 0.4523668053079818, "grad_norm": 1.2294759186821698, "learning_rate": 1.201456625032918e-05, "loss": 0.1891, "step": 5710 }, { "epoch": 0.45244602891661717, "grad_norm": 1.4325641156224465, "learning_rate": 1.2012052762525104e-05, "loss": 0.319, "step": 5711 }, { "epoch": 0.45252525252525255, "grad_norm": 1.4960332139200154, "learning_rate": 1.2009539142237034e-05, "loss": 0.3187, "step": 5712 }, { "epoch": 0.4526044761338879, "grad_norm": 1.4592232893191308, "learning_rate": 1.2007025389630484e-05, "loss": 0.307, "step": 5713 }, { "epoch": 0.45268369974252326, "grad_norm": 1.3591185586590093, "learning_rate": 1.2004511504870966e-05, "loss": 0.3745, "step": 5714 }, { "epoch": 0.45276292335115864, "grad_norm": 1.4897727322545176, "learning_rate": 1.2001997488124011e-05, "loss": 0.2485, "step": 5715 }, { "epoch": 0.452842146959794, "grad_norm": 1.483043569721881, "learning_rate": 1.1999483339555159e-05, "loss": 0.274, "step": 5716 }, { "epoch": 0.4529213705684294, "grad_norm": 1.5536590422542322, "learning_rate": 1.1996969059329944e-05, "loss": 0.2812, "step": 5717 }, { "epoch": 0.4530005941770648, "grad_norm": 1.8383420363434644, "learning_rate": 1.1994454647613928e-05, "loss": 0.3539, "step": 5718 }, { "epoch": 0.4530798177857001, "grad_norm": 1.409749698569539, "learning_rate": 1.199194010457267e-05, "loss": 0.3041, "step": 5719 }, { "epoch": 0.4531590413943355, "grad_norm": 1.4454408871043587, "learning_rate": 1.1989425430371739e-05, "loss": 0.2869, "step": 5720 }, { "epoch": 0.4532382650029709, "grad_norm": 1.4106434422151606, "learning_rate": 1.198691062517672e-05, "loss": 0.3308, "step": 5721 }, { "epoch": 0.45331748861160626, "grad_norm": 1.3976137445032315, "learning_rate": 1.1984395689153195e-05, "loss": 0.2172, "step": 5722 }, { "epoch": 0.45339671222024164, "grad_norm": 1.2540457976088886, "learning_rate": 1.1981880622466759e-05, "loss": 0.2578, "step": 5723 }, { "epoch": 0.453475935828877, "grad_norm": 1.597901950492082, "learning_rate": 1.1979365425283022e-05, "loss": 0.3333, "step": 5724 }, { "epoch": 0.4535551594375124, "grad_norm": 1.6601591706430912, "learning_rate": 1.1976850097767598e-05, "loss": 0.3309, "step": 5725 }, { "epoch": 0.4536343830461477, "grad_norm": 1.470213884409713, "learning_rate": 1.1974334640086104e-05, "loss": 0.3141, "step": 5726 }, { "epoch": 0.4537136066547831, "grad_norm": 1.308721113495105, "learning_rate": 1.1971819052404177e-05, "loss": 0.2414, "step": 5727 }, { "epoch": 0.4537928302634185, "grad_norm": 1.2961204700262785, "learning_rate": 1.196930333488745e-05, "loss": 0.2337, "step": 5728 }, { "epoch": 0.45387205387205387, "grad_norm": 1.6198537127432755, "learning_rate": 1.1966787487701577e-05, "loss": 0.3709, "step": 5729 }, { "epoch": 0.45395127748068925, "grad_norm": 1.4217143114476545, "learning_rate": 1.1964271511012208e-05, "loss": 0.23, "step": 5730 }, { "epoch": 0.45403050108932463, "grad_norm": 1.7570629663085537, "learning_rate": 1.1961755404985015e-05, "loss": 0.4046, "step": 5731 }, { "epoch": 0.45410972469796, "grad_norm": 1.5291335670041004, "learning_rate": 1.1959239169785668e-05, "loss": 0.4338, "step": 5732 }, { "epoch": 0.45418894830659534, "grad_norm": 1.1781689159578843, "learning_rate": 1.1956722805579846e-05, "loss": 0.2519, "step": 5733 }, { "epoch": 0.4542681719152307, "grad_norm": 1.3892331256496078, "learning_rate": 1.1954206312533246e-05, "loss": 0.2791, "step": 5734 }, { "epoch": 0.4543473955238661, "grad_norm": 1.4392557216515836, "learning_rate": 1.1951689690811558e-05, "loss": 0.3333, "step": 5735 }, { "epoch": 0.4544266191325015, "grad_norm": 1.5172430300275368, "learning_rate": 1.1949172940580498e-05, "loss": 0.357, "step": 5736 }, { "epoch": 0.45450584274113687, "grad_norm": 1.339423960795033, "learning_rate": 1.1946656062005781e-05, "loss": 0.2698, "step": 5737 }, { "epoch": 0.45458506634977225, "grad_norm": 1.3538659210414938, "learning_rate": 1.1944139055253126e-05, "loss": 0.3092, "step": 5738 }, { "epoch": 0.45466428995840763, "grad_norm": 1.3601226664085724, "learning_rate": 1.1941621920488271e-05, "loss": 0.3536, "step": 5739 }, { "epoch": 0.45474351356704296, "grad_norm": 1.5244300446272274, "learning_rate": 1.1939104657876953e-05, "loss": 0.3995, "step": 5740 }, { "epoch": 0.45482273717567834, "grad_norm": 1.4251734219174097, "learning_rate": 1.1936587267584924e-05, "loss": 0.2687, "step": 5741 }, { "epoch": 0.4549019607843137, "grad_norm": 1.3092030846523643, "learning_rate": 1.193406974977794e-05, "loss": 0.2706, "step": 5742 }, { "epoch": 0.4549811843929491, "grad_norm": 1.3208564538139058, "learning_rate": 1.1931552104621776e-05, "loss": 0.31, "step": 5743 }, { "epoch": 0.4550604080015845, "grad_norm": 1.5727958859080353, "learning_rate": 1.1929034332282192e-05, "loss": 0.2961, "step": 5744 }, { "epoch": 0.45513963161021986, "grad_norm": 1.6031212043223757, "learning_rate": 1.1926516432924984e-05, "loss": 0.3371, "step": 5745 }, { "epoch": 0.45521885521885525, "grad_norm": 1.2869036772558198, "learning_rate": 1.1923998406715937e-05, "loss": 0.2306, "step": 5746 }, { "epoch": 0.45529807882749057, "grad_norm": 1.4517582998730816, "learning_rate": 1.1921480253820852e-05, "loss": 0.3081, "step": 5747 }, { "epoch": 0.45537730243612595, "grad_norm": 1.6206205154294324, "learning_rate": 1.1918961974405539e-05, "loss": 0.3239, "step": 5748 }, { "epoch": 0.45545652604476133, "grad_norm": 1.2887167604453937, "learning_rate": 1.1916443568635812e-05, "loss": 0.2097, "step": 5749 }, { "epoch": 0.4555357496533967, "grad_norm": 1.514621119634706, "learning_rate": 1.1913925036677497e-05, "loss": 0.301, "step": 5750 }, { "epoch": 0.4556149732620321, "grad_norm": 1.2683254306705893, "learning_rate": 1.191140637869643e-05, "loss": 0.2318, "step": 5751 }, { "epoch": 0.4556941968706675, "grad_norm": 1.5431935493771602, "learning_rate": 1.1908887594858447e-05, "loss": 0.3735, "step": 5752 }, { "epoch": 0.45577342047930286, "grad_norm": 1.7857798091214419, "learning_rate": 1.1906368685329403e-05, "loss": 0.3739, "step": 5753 }, { "epoch": 0.4558526440879382, "grad_norm": 1.4597646732032068, "learning_rate": 1.1903849650275154e-05, "loss": 0.3089, "step": 5754 }, { "epoch": 0.45593186769657357, "grad_norm": 1.5590509118365934, "learning_rate": 1.1901330489861564e-05, "loss": 0.3693, "step": 5755 }, { "epoch": 0.45601109130520895, "grad_norm": 1.5386134339392226, "learning_rate": 1.1898811204254515e-05, "loss": 0.3234, "step": 5756 }, { "epoch": 0.45609031491384433, "grad_norm": 1.252584357863649, "learning_rate": 1.189629179361988e-05, "loss": 0.2621, "step": 5757 }, { "epoch": 0.4561695385224797, "grad_norm": 1.5034891076727033, "learning_rate": 1.1893772258123554e-05, "loss": 0.2951, "step": 5758 }, { "epoch": 0.4562487621311151, "grad_norm": 1.2413364254139216, "learning_rate": 1.1891252597931441e-05, "loss": 0.2453, "step": 5759 }, { "epoch": 0.4563279857397504, "grad_norm": 1.6617547901197451, "learning_rate": 1.1888732813209442e-05, "loss": 0.3789, "step": 5760 }, { "epoch": 0.4564072093483858, "grad_norm": 1.4517932555882869, "learning_rate": 1.1886212904123477e-05, "loss": 0.3268, "step": 5761 }, { "epoch": 0.4564864329570212, "grad_norm": 1.2400286812980494, "learning_rate": 1.1883692870839466e-05, "loss": 0.2754, "step": 5762 }, { "epoch": 0.45656565656565656, "grad_norm": 1.3262068219975502, "learning_rate": 1.1881172713523346e-05, "loss": 0.1943, "step": 5763 }, { "epoch": 0.45664488017429194, "grad_norm": 1.48672114506166, "learning_rate": 1.1878652432341053e-05, "loss": 0.3167, "step": 5764 }, { "epoch": 0.4567241037829273, "grad_norm": 1.5008902543237528, "learning_rate": 1.1876132027458535e-05, "loss": 0.323, "step": 5765 }, { "epoch": 0.4568033273915627, "grad_norm": 1.3969924935786768, "learning_rate": 1.1873611499041752e-05, "loss": 0.2427, "step": 5766 }, { "epoch": 0.45688255100019803, "grad_norm": 1.5149441443533935, "learning_rate": 1.1871090847256667e-05, "loss": 0.2943, "step": 5767 }, { "epoch": 0.4569617746088334, "grad_norm": 1.45923579524261, "learning_rate": 1.1868570072269252e-05, "loss": 0.2675, "step": 5768 }, { "epoch": 0.4570409982174688, "grad_norm": 1.429117756291073, "learning_rate": 1.186604917424549e-05, "loss": 0.3155, "step": 5769 }, { "epoch": 0.4571202218261042, "grad_norm": 1.3350056860898667, "learning_rate": 1.1863528153351369e-05, "loss": 0.2388, "step": 5770 }, { "epoch": 0.45719944543473956, "grad_norm": 1.5321491947718167, "learning_rate": 1.1861007009752884e-05, "loss": 0.3431, "step": 5771 }, { "epoch": 0.45727866904337494, "grad_norm": 1.2085103238833457, "learning_rate": 1.1858485743616044e-05, "loss": 0.269, "step": 5772 }, { "epoch": 0.4573578926520103, "grad_norm": 1.5498835246711031, "learning_rate": 1.185596435510686e-05, "loss": 0.2705, "step": 5773 }, { "epoch": 0.45743711626064565, "grad_norm": 1.3797000368243975, "learning_rate": 1.1853442844391354e-05, "loss": 0.2681, "step": 5774 }, { "epoch": 0.45751633986928103, "grad_norm": 1.9592103269698289, "learning_rate": 1.1850921211635554e-05, "loss": 0.3459, "step": 5775 }, { "epoch": 0.4575955634779164, "grad_norm": 1.2909897101579606, "learning_rate": 1.1848399457005496e-05, "loss": 0.2894, "step": 5776 }, { "epoch": 0.4576747870865518, "grad_norm": 1.4453921883376117, "learning_rate": 1.1845877580667232e-05, "loss": 0.2199, "step": 5777 }, { "epoch": 0.4577540106951872, "grad_norm": 1.5272811631257404, "learning_rate": 1.1843355582786806e-05, "loss": 0.274, "step": 5778 }, { "epoch": 0.45783323430382256, "grad_norm": 1.6125278654822204, "learning_rate": 1.1840833463530289e-05, "loss": 0.3534, "step": 5779 }, { "epoch": 0.45791245791245794, "grad_norm": 1.395935209993027, "learning_rate": 1.1838311223063745e-05, "loss": 0.3339, "step": 5780 }, { "epoch": 0.45799168152109326, "grad_norm": 1.55512571670022, "learning_rate": 1.1835788861553252e-05, "loss": 0.3615, "step": 5781 }, { "epoch": 0.45807090512972864, "grad_norm": 1.569535485537372, "learning_rate": 1.1833266379164894e-05, "loss": 0.2826, "step": 5782 }, { "epoch": 0.458150128738364, "grad_norm": 1.30058408106229, "learning_rate": 1.183074377606477e-05, "loss": 0.2461, "step": 5783 }, { "epoch": 0.4582293523469994, "grad_norm": 1.630604386877381, "learning_rate": 1.1828221052418973e-05, "loss": 0.3472, "step": 5784 }, { "epoch": 0.4583085759556348, "grad_norm": 1.269280938566071, "learning_rate": 1.182569820839362e-05, "loss": 0.2378, "step": 5785 }, { "epoch": 0.45838779956427017, "grad_norm": 1.7220948284689819, "learning_rate": 1.1823175244154823e-05, "loss": 0.3285, "step": 5786 }, { "epoch": 0.45846702317290555, "grad_norm": 1.2080600454938273, "learning_rate": 1.1820652159868706e-05, "loss": 0.2531, "step": 5787 }, { "epoch": 0.4585462467815409, "grad_norm": 1.3942546131497715, "learning_rate": 1.1818128955701409e-05, "loss": 0.2282, "step": 5788 }, { "epoch": 0.45862547039017626, "grad_norm": 1.1519972686446, "learning_rate": 1.1815605631819066e-05, "loss": 0.2448, "step": 5789 }, { "epoch": 0.45870469399881164, "grad_norm": 1.4903720459379082, "learning_rate": 1.181308218838783e-05, "loss": 0.3902, "step": 5790 }, { "epoch": 0.458783917607447, "grad_norm": 1.2604366536057166, "learning_rate": 1.1810558625573856e-05, "loss": 0.2314, "step": 5791 }, { "epoch": 0.4588631412160824, "grad_norm": 1.4650561507737663, "learning_rate": 1.1808034943543308e-05, "loss": 0.3038, "step": 5792 }, { "epoch": 0.4589423648247178, "grad_norm": 1.2026375208177977, "learning_rate": 1.1805511142462355e-05, "loss": 0.21, "step": 5793 }, { "epoch": 0.4590215884333531, "grad_norm": 1.3351964738289657, "learning_rate": 1.1802987222497186e-05, "loss": 0.2694, "step": 5794 }, { "epoch": 0.4591008120419885, "grad_norm": 1.494899818043445, "learning_rate": 1.1800463183813982e-05, "loss": 0.336, "step": 5795 }, { "epoch": 0.4591800356506239, "grad_norm": 1.6648417341917845, "learning_rate": 1.1797939026578941e-05, "loss": 0.3255, "step": 5796 }, { "epoch": 0.45925925925925926, "grad_norm": 1.674855659653074, "learning_rate": 1.1795414750958265e-05, "loss": 0.3838, "step": 5797 }, { "epoch": 0.45933848286789464, "grad_norm": 1.5051472501295597, "learning_rate": 1.1792890357118165e-05, "loss": 0.3576, "step": 5798 }, { "epoch": 0.45941770647653, "grad_norm": 1.3496815061636451, "learning_rate": 1.1790365845224866e-05, "loss": 0.2512, "step": 5799 }, { "epoch": 0.4594969300851654, "grad_norm": 1.5108426684795782, "learning_rate": 1.1787841215444588e-05, "loss": 0.4088, "step": 5800 }, { "epoch": 0.4595761536938007, "grad_norm": 1.5901979752085984, "learning_rate": 1.1785316467943568e-05, "loss": 0.3537, "step": 5801 }, { "epoch": 0.4596553773024361, "grad_norm": 1.7061098616246408, "learning_rate": 1.1782791602888052e-05, "loss": 0.3571, "step": 5802 }, { "epoch": 0.4597346009110715, "grad_norm": 1.5201164721215994, "learning_rate": 1.1780266620444285e-05, "loss": 0.242, "step": 5803 }, { "epoch": 0.45981382451970687, "grad_norm": 1.2207544637052976, "learning_rate": 1.1777741520778529e-05, "loss": 0.2173, "step": 5804 }, { "epoch": 0.45989304812834225, "grad_norm": 1.4754438542813102, "learning_rate": 1.1775216304057046e-05, "loss": 0.435, "step": 5805 }, { "epoch": 0.45997227173697763, "grad_norm": 1.1743709684450185, "learning_rate": 1.1772690970446113e-05, "loss": 0.2146, "step": 5806 }, { "epoch": 0.460051495345613, "grad_norm": 1.237303037004652, "learning_rate": 1.177016552011201e-05, "loss": 0.2859, "step": 5807 }, { "epoch": 0.46013071895424834, "grad_norm": 1.2029282991803614, "learning_rate": 1.176763995322102e-05, "loss": 0.234, "step": 5808 }, { "epoch": 0.4602099425628837, "grad_norm": 1.2952291340166198, "learning_rate": 1.1765114269939448e-05, "loss": 0.2942, "step": 5809 }, { "epoch": 0.4602891661715191, "grad_norm": 1.3861406224676602, "learning_rate": 1.1762588470433593e-05, "loss": 0.3288, "step": 5810 }, { "epoch": 0.4603683897801545, "grad_norm": 1.4694750471892293, "learning_rate": 1.176006255486977e-05, "loss": 0.352, "step": 5811 }, { "epoch": 0.46044761338878987, "grad_norm": 1.6924848631070186, "learning_rate": 1.1757536523414297e-05, "loss": 0.3088, "step": 5812 }, { "epoch": 0.46052683699742525, "grad_norm": 1.4901303566245971, "learning_rate": 1.1755010376233498e-05, "loss": 0.3659, "step": 5813 }, { "epoch": 0.46060606060606063, "grad_norm": 1.3640039024549215, "learning_rate": 1.175248411349371e-05, "loss": 0.2377, "step": 5814 }, { "epoch": 0.46068528421469596, "grad_norm": 1.3703779517395518, "learning_rate": 1.1749957735361279e-05, "loss": 0.3209, "step": 5815 }, { "epoch": 0.46076450782333134, "grad_norm": 1.439294206824867, "learning_rate": 1.174743124200255e-05, "loss": 0.3338, "step": 5816 }, { "epoch": 0.4608437314319667, "grad_norm": 1.3179582780701287, "learning_rate": 1.1744904633583883e-05, "loss": 0.2568, "step": 5817 }, { "epoch": 0.4609229550406021, "grad_norm": 1.3738466216599348, "learning_rate": 1.1742377910271638e-05, "loss": 0.3188, "step": 5818 }, { "epoch": 0.4610021786492375, "grad_norm": 1.2526366875212827, "learning_rate": 1.1739851072232195e-05, "loss": 0.1994, "step": 5819 }, { "epoch": 0.46108140225787286, "grad_norm": 1.353160274388425, "learning_rate": 1.1737324119631927e-05, "loss": 0.2055, "step": 5820 }, { "epoch": 0.46116062586650824, "grad_norm": 1.7081065976857985, "learning_rate": 1.173479705263723e-05, "loss": 0.4134, "step": 5821 }, { "epoch": 0.46123984947514357, "grad_norm": 1.2886692529671708, "learning_rate": 1.1732269871414492e-05, "loss": 0.2103, "step": 5822 }, { "epoch": 0.46131907308377895, "grad_norm": 1.325797169787858, "learning_rate": 1.1729742576130119e-05, "loss": 0.3467, "step": 5823 }, { "epoch": 0.46139829669241433, "grad_norm": 1.5460490773505353, "learning_rate": 1.1727215166950519e-05, "loss": 0.3146, "step": 5824 }, { "epoch": 0.4614775203010497, "grad_norm": 1.3872533892595444, "learning_rate": 1.172468764404211e-05, "loss": 0.2733, "step": 5825 }, { "epoch": 0.4615567439096851, "grad_norm": 1.4970445979959512, "learning_rate": 1.172216000757132e-05, "loss": 0.2673, "step": 5826 }, { "epoch": 0.4616359675183205, "grad_norm": 1.2613006210776547, "learning_rate": 1.1719632257704581e-05, "loss": 0.2661, "step": 5827 }, { "epoch": 0.46171519112695586, "grad_norm": 1.3411424333430852, "learning_rate": 1.171710439460833e-05, "loss": 0.2484, "step": 5828 }, { "epoch": 0.4617944147355912, "grad_norm": 1.3555948879770965, "learning_rate": 1.1714576418449017e-05, "loss": 0.2856, "step": 5829 }, { "epoch": 0.46187363834422657, "grad_norm": 1.5903874552063866, "learning_rate": 1.1712048329393097e-05, "loss": 0.4079, "step": 5830 }, { "epoch": 0.46195286195286195, "grad_norm": 1.1493036164470645, "learning_rate": 1.1709520127607035e-05, "loss": 0.1884, "step": 5831 }, { "epoch": 0.46203208556149733, "grad_norm": 1.3868844470942308, "learning_rate": 1.1706991813257295e-05, "loss": 0.2199, "step": 5832 }, { "epoch": 0.4621113091701327, "grad_norm": 1.3331341065218882, "learning_rate": 1.1704463386510358e-05, "loss": 0.2838, "step": 5833 }, { "epoch": 0.4621905327787681, "grad_norm": 1.1197279688201989, "learning_rate": 1.170193484753271e-05, "loss": 0.1892, "step": 5834 }, { "epoch": 0.4622697563874034, "grad_norm": 1.583624950072256, "learning_rate": 1.169940619649084e-05, "loss": 0.3658, "step": 5835 }, { "epoch": 0.4623489799960388, "grad_norm": 1.7080872713857547, "learning_rate": 1.1696877433551248e-05, "loss": 0.3536, "step": 5836 }, { "epoch": 0.4624282036046742, "grad_norm": 1.069071998411596, "learning_rate": 1.1694348558880447e-05, "loss": 0.2347, "step": 5837 }, { "epoch": 0.46250742721330956, "grad_norm": 1.4593226024461419, "learning_rate": 1.1691819572644941e-05, "loss": 0.3536, "step": 5838 }, { "epoch": 0.46258665082194494, "grad_norm": 1.4287101868376675, "learning_rate": 1.1689290475011258e-05, "loss": 0.3557, "step": 5839 }, { "epoch": 0.4626658744305803, "grad_norm": 1.2918833237530993, "learning_rate": 1.1686761266145926e-05, "loss": 0.2853, "step": 5840 }, { "epoch": 0.4627450980392157, "grad_norm": 1.4059293127245893, "learning_rate": 1.1684231946215478e-05, "loss": 0.233, "step": 5841 }, { "epoch": 0.46282432164785103, "grad_norm": 1.3019925088270827, "learning_rate": 1.1681702515386466e-05, "loss": 0.2923, "step": 5842 }, { "epoch": 0.4629035452564864, "grad_norm": 1.2342383719973151, "learning_rate": 1.167917297382543e-05, "loss": 0.2533, "step": 5843 }, { "epoch": 0.4629827688651218, "grad_norm": 1.526115511376604, "learning_rate": 1.1676643321698934e-05, "loss": 0.2866, "step": 5844 }, { "epoch": 0.4630619924737572, "grad_norm": 1.34552946388799, "learning_rate": 1.1674113559173548e-05, "loss": 0.3095, "step": 5845 }, { "epoch": 0.46314121608239256, "grad_norm": 1.4549767257678594, "learning_rate": 1.1671583686415833e-05, "loss": 0.3155, "step": 5846 }, { "epoch": 0.46322043969102794, "grad_norm": 1.990093310569591, "learning_rate": 1.1669053703592381e-05, "loss": 0.3812, "step": 5847 }, { "epoch": 0.4632996632996633, "grad_norm": 1.4509912979858135, "learning_rate": 1.1666523610869769e-05, "loss": 0.3414, "step": 5848 }, { "epoch": 0.46337888690829865, "grad_norm": 1.6202417294566522, "learning_rate": 1.1663993408414597e-05, "loss": 0.3421, "step": 5849 }, { "epoch": 0.46345811051693403, "grad_norm": 1.0987634947033051, "learning_rate": 1.1661463096393468e-05, "loss": 0.1903, "step": 5850 }, { "epoch": 0.4635373341255694, "grad_norm": 1.334617601431492, "learning_rate": 1.1658932674972985e-05, "loss": 0.3234, "step": 5851 }, { "epoch": 0.4636165577342048, "grad_norm": 1.2463186147500964, "learning_rate": 1.1656402144319772e-05, "loss": 0.2276, "step": 5852 }, { "epoch": 0.4636957813428402, "grad_norm": 1.298234601286431, "learning_rate": 1.1653871504600445e-05, "loss": 0.249, "step": 5853 }, { "epoch": 0.46377500495147556, "grad_norm": 1.5794023520799612, "learning_rate": 1.1651340755981634e-05, "loss": 0.3443, "step": 5854 }, { "epoch": 0.46385422856011094, "grad_norm": 1.2784290013496649, "learning_rate": 1.1648809898629987e-05, "loss": 0.3174, "step": 5855 }, { "epoch": 0.46393345216874626, "grad_norm": 1.452076026122277, "learning_rate": 1.1646278932712138e-05, "loss": 0.3894, "step": 5856 }, { "epoch": 0.46401267577738164, "grad_norm": 1.3344227301528433, "learning_rate": 1.1643747858394743e-05, "loss": 0.2706, "step": 5857 }, { "epoch": 0.464091899386017, "grad_norm": 0.9562097964729624, "learning_rate": 1.1641216675844461e-05, "loss": 0.1585, "step": 5858 }, { "epoch": 0.4641711229946524, "grad_norm": 1.5174806803779837, "learning_rate": 1.1638685385227958e-05, "loss": 0.2979, "step": 5859 }, { "epoch": 0.4642503466032878, "grad_norm": 1.6830460786407306, "learning_rate": 1.1636153986711906e-05, "loss": 0.403, "step": 5860 }, { "epoch": 0.46432957021192317, "grad_norm": 1.3076463841194843, "learning_rate": 1.163362248046299e-05, "loss": 0.2632, "step": 5861 }, { "epoch": 0.46440879382055855, "grad_norm": 1.4283172494652356, "learning_rate": 1.1631090866647891e-05, "loss": 0.2957, "step": 5862 }, { "epoch": 0.4644880174291939, "grad_norm": 1.181185972061132, "learning_rate": 1.1628559145433308e-05, "loss": 0.2359, "step": 5863 }, { "epoch": 0.46456724103782926, "grad_norm": 1.4970012536710695, "learning_rate": 1.1626027316985942e-05, "loss": 0.2791, "step": 5864 }, { "epoch": 0.46464646464646464, "grad_norm": 1.559380902925295, "learning_rate": 1.1623495381472499e-05, "loss": 0.3301, "step": 5865 }, { "epoch": 0.4647256882551, "grad_norm": 1.09724164281304, "learning_rate": 1.16209633390597e-05, "loss": 0.1572, "step": 5866 }, { "epoch": 0.4648049118637354, "grad_norm": 1.2164473046723352, "learning_rate": 1.161843118991426e-05, "loss": 0.2158, "step": 5867 }, { "epoch": 0.4648841354723708, "grad_norm": 1.6507502862357475, "learning_rate": 1.1615898934202917e-05, "loss": 0.3585, "step": 5868 }, { "epoch": 0.46496335908100617, "grad_norm": 1.5215294171972211, "learning_rate": 1.1613366572092404e-05, "loss": 0.3672, "step": 5869 }, { "epoch": 0.4650425826896415, "grad_norm": 1.449691723728734, "learning_rate": 1.1610834103749465e-05, "loss": 0.2966, "step": 5870 }, { "epoch": 0.4651218062982769, "grad_norm": 1.6031097741537381, "learning_rate": 1.1608301529340848e-05, "loss": 0.338, "step": 5871 }, { "epoch": 0.46520102990691226, "grad_norm": 1.3388235624601201, "learning_rate": 1.1605768849033318e-05, "loss": 0.2733, "step": 5872 }, { "epoch": 0.46528025351554764, "grad_norm": 1.3082532811308232, "learning_rate": 1.1603236062993635e-05, "loss": 0.2612, "step": 5873 }, { "epoch": 0.465359477124183, "grad_norm": 1.3401811728108182, "learning_rate": 1.1600703171388572e-05, "loss": 0.3436, "step": 5874 }, { "epoch": 0.4654387007328184, "grad_norm": 1.4250120946522484, "learning_rate": 1.1598170174384907e-05, "loss": 0.2687, "step": 5875 }, { "epoch": 0.4655179243414537, "grad_norm": 1.7167241785795126, "learning_rate": 1.1595637072149424e-05, "loss": 0.4221, "step": 5876 }, { "epoch": 0.4655971479500891, "grad_norm": 1.3247607893483335, "learning_rate": 1.159310386484892e-05, "loss": 0.2892, "step": 5877 }, { "epoch": 0.4656763715587245, "grad_norm": 1.4817050289616598, "learning_rate": 1.159057055265019e-05, "loss": 0.277, "step": 5878 }, { "epoch": 0.46575559516735987, "grad_norm": 1.8994949105587406, "learning_rate": 1.1588037135720043e-05, "loss": 0.3407, "step": 5879 }, { "epoch": 0.46583481877599525, "grad_norm": 1.3109245555235354, "learning_rate": 1.1585503614225292e-05, "loss": 0.272, "step": 5880 }, { "epoch": 0.46591404238463063, "grad_norm": 1.3529965601333391, "learning_rate": 1.1582969988332757e-05, "loss": 0.2307, "step": 5881 }, { "epoch": 0.465993265993266, "grad_norm": 1.3791916813137433, "learning_rate": 1.1580436258209266e-05, "loss": 0.3558, "step": 5882 }, { "epoch": 0.46607248960190134, "grad_norm": 1.402465504951365, "learning_rate": 1.1577902424021653e-05, "loss": 0.2872, "step": 5883 }, { "epoch": 0.4661517132105367, "grad_norm": 1.5555047853239525, "learning_rate": 1.1575368485936752e-05, "loss": 0.3506, "step": 5884 }, { "epoch": 0.4662309368191721, "grad_norm": 1.4793861360882956, "learning_rate": 1.1572834444121424e-05, "loss": 0.3755, "step": 5885 }, { "epoch": 0.4663101604278075, "grad_norm": 1.3379543298678456, "learning_rate": 1.157030029874251e-05, "loss": 0.2952, "step": 5886 }, { "epoch": 0.46638938403644287, "grad_norm": 1.5695977445171625, "learning_rate": 1.1567766049966882e-05, "loss": 0.3603, "step": 5887 }, { "epoch": 0.46646860764507825, "grad_norm": 1.2398785096113882, "learning_rate": 1.1565231697961398e-05, "loss": 0.2797, "step": 5888 }, { "epoch": 0.46654783125371363, "grad_norm": 1.2241398310527707, "learning_rate": 1.1562697242892939e-05, "loss": 0.2543, "step": 5889 }, { "epoch": 0.46662705486234896, "grad_norm": 1.383505644409573, "learning_rate": 1.156016268492839e-05, "loss": 0.2983, "step": 5890 }, { "epoch": 0.46670627847098434, "grad_norm": 1.2846054374557785, "learning_rate": 1.155762802423463e-05, "loss": 0.3342, "step": 5891 }, { "epoch": 0.4667855020796197, "grad_norm": 1.1907580795481385, "learning_rate": 1.1555093260978562e-05, "loss": 0.1971, "step": 5892 }, { "epoch": 0.4668647256882551, "grad_norm": 1.407457724818261, "learning_rate": 1.1552558395327087e-05, "loss": 0.2772, "step": 5893 }, { "epoch": 0.4669439492968905, "grad_norm": 1.2549333894460415, "learning_rate": 1.155002342744711e-05, "loss": 0.2265, "step": 5894 }, { "epoch": 0.46702317290552586, "grad_norm": 1.5459277325212406, "learning_rate": 1.1547488357505549e-05, "loss": 0.3602, "step": 5895 }, { "epoch": 0.46710239651416124, "grad_norm": 1.5646472437035468, "learning_rate": 1.1544953185669327e-05, "loss": 0.2949, "step": 5896 }, { "epoch": 0.46718162012279657, "grad_norm": 1.356841730982432, "learning_rate": 1.154241791210537e-05, "loss": 0.235, "step": 5897 }, { "epoch": 0.46726084373143195, "grad_norm": 1.2008078649428995, "learning_rate": 1.1539882536980616e-05, "loss": 0.2086, "step": 5898 }, { "epoch": 0.46734006734006733, "grad_norm": 1.6774727721437268, "learning_rate": 1.1537347060462007e-05, "loss": 0.3408, "step": 5899 }, { "epoch": 0.4674192909487027, "grad_norm": 1.5095871506198835, "learning_rate": 1.1534811482716487e-05, "loss": 0.2959, "step": 5900 }, { "epoch": 0.4674985145573381, "grad_norm": 1.479597507570882, "learning_rate": 1.1532275803911021e-05, "loss": 0.3246, "step": 5901 }, { "epoch": 0.4675777381659735, "grad_norm": 1.4314014571486418, "learning_rate": 1.1529740024212566e-05, "loss": 0.2955, "step": 5902 }, { "epoch": 0.46765696177460886, "grad_norm": 1.4497805663916152, "learning_rate": 1.1527204143788086e-05, "loss": 0.3409, "step": 5903 }, { "epoch": 0.4677361853832442, "grad_norm": 1.7079909826979645, "learning_rate": 1.1524668162804566e-05, "loss": 0.3229, "step": 5904 }, { "epoch": 0.46781540899187957, "grad_norm": 1.4018138618292848, "learning_rate": 1.1522132081428982e-05, "loss": 0.2869, "step": 5905 }, { "epoch": 0.46789463260051495, "grad_norm": 1.5362083590279747, "learning_rate": 1.1519595899828325e-05, "loss": 0.384, "step": 5906 }, { "epoch": 0.46797385620915033, "grad_norm": 1.279129152749894, "learning_rate": 1.151705961816959e-05, "loss": 0.388, "step": 5907 }, { "epoch": 0.4680530798177857, "grad_norm": 1.491971794167894, "learning_rate": 1.151452323661978e-05, "loss": 0.3205, "step": 5908 }, { "epoch": 0.4681323034264211, "grad_norm": 1.4883387354921294, "learning_rate": 1.15119867553459e-05, "loss": 0.3121, "step": 5909 }, { "epoch": 0.4682115270350565, "grad_norm": 1.3052414195923816, "learning_rate": 1.150945017451497e-05, "loss": 0.2389, "step": 5910 }, { "epoch": 0.4682907506436918, "grad_norm": 1.2167432251666415, "learning_rate": 1.1506913494294005e-05, "loss": 0.2623, "step": 5911 }, { "epoch": 0.4683699742523272, "grad_norm": 1.3460327688366835, "learning_rate": 1.1504376714850041e-05, "loss": 0.3318, "step": 5912 }, { "epoch": 0.46844919786096256, "grad_norm": 1.4621456928169723, "learning_rate": 1.1501839836350106e-05, "loss": 0.31, "step": 5913 }, { "epoch": 0.46852842146959794, "grad_norm": 1.2594011445357711, "learning_rate": 1.1499302858961245e-05, "loss": 0.3236, "step": 5914 }, { "epoch": 0.4686076450782333, "grad_norm": 1.1108403646694895, "learning_rate": 1.1496765782850507e-05, "loss": 0.2505, "step": 5915 }, { "epoch": 0.4686868686868687, "grad_norm": 1.496173446371289, "learning_rate": 1.149422860818494e-05, "loss": 0.3351, "step": 5916 }, { "epoch": 0.46876609229550403, "grad_norm": 1.6609630208205832, "learning_rate": 1.1491691335131614e-05, "loss": 0.3534, "step": 5917 }, { "epoch": 0.4688453159041394, "grad_norm": 1.242232844219903, "learning_rate": 1.148915396385759e-05, "loss": 0.2511, "step": 5918 }, { "epoch": 0.4689245395127748, "grad_norm": 1.4020675928915163, "learning_rate": 1.1486616494529939e-05, "loss": 0.2571, "step": 5919 }, { "epoch": 0.4690037631214102, "grad_norm": 1.3758723864915638, "learning_rate": 1.1484078927315749e-05, "loss": 0.2942, "step": 5920 }, { "epoch": 0.46908298673004556, "grad_norm": 1.4871185537974108, "learning_rate": 1.1481541262382102e-05, "loss": 0.2218, "step": 5921 }, { "epoch": 0.46916221033868094, "grad_norm": 1.195507971647317, "learning_rate": 1.1479003499896089e-05, "loss": 0.2192, "step": 5922 }, { "epoch": 0.4692414339473163, "grad_norm": 1.316782426701206, "learning_rate": 1.1476465640024814e-05, "loss": 0.2656, "step": 5923 }, { "epoch": 0.46932065755595165, "grad_norm": 1.477730636338676, "learning_rate": 1.147392768293538e-05, "loss": 0.2566, "step": 5924 }, { "epoch": 0.46939988116458703, "grad_norm": 1.5364536641776358, "learning_rate": 1.1471389628794902e-05, "loss": 0.3449, "step": 5925 }, { "epoch": 0.4694791047732224, "grad_norm": 1.5516792838355584, "learning_rate": 1.1468851477770495e-05, "loss": 0.3398, "step": 5926 }, { "epoch": 0.4695583283818578, "grad_norm": 1.3872521058879526, "learning_rate": 1.1466313230029284e-05, "loss": 0.3252, "step": 5927 }, { "epoch": 0.4696375519904932, "grad_norm": 1.7100901729966975, "learning_rate": 1.1463774885738408e-05, "loss": 0.3753, "step": 5928 }, { "epoch": 0.46971677559912856, "grad_norm": 1.6736545382458248, "learning_rate": 1.1461236445064993e-05, "loss": 0.2758, "step": 5929 }, { "epoch": 0.46979599920776394, "grad_norm": 1.2981479966578386, "learning_rate": 1.1458697908176194e-05, "loss": 0.308, "step": 5930 }, { "epoch": 0.46987522281639926, "grad_norm": 1.3274911620652021, "learning_rate": 1.1456159275239153e-05, "loss": 0.2804, "step": 5931 }, { "epoch": 0.46995444642503464, "grad_norm": 1.5261867594043812, "learning_rate": 1.1453620546421032e-05, "loss": 0.2517, "step": 5932 }, { "epoch": 0.47003367003367, "grad_norm": 1.4038590648956586, "learning_rate": 1.1451081721888992e-05, "loss": 0.267, "step": 5933 }, { "epoch": 0.4701128936423054, "grad_norm": 1.499493483332517, "learning_rate": 1.1448542801810203e-05, "loss": 0.3292, "step": 5934 }, { "epoch": 0.4701921172509408, "grad_norm": 1.6769715140374728, "learning_rate": 1.144600378635184e-05, "loss": 0.2732, "step": 5935 }, { "epoch": 0.47027134085957617, "grad_norm": 1.5400471128934965, "learning_rate": 1.1443464675681089e-05, "loss": 0.3277, "step": 5936 }, { "epoch": 0.47035056446821155, "grad_norm": 1.376606461102735, "learning_rate": 1.1440925469965129e-05, "loss": 0.284, "step": 5937 }, { "epoch": 0.4704297880768469, "grad_norm": 1.6647905708565027, "learning_rate": 1.1438386169371164e-05, "loss": 0.37, "step": 5938 }, { "epoch": 0.47050901168548226, "grad_norm": 1.8576555372334442, "learning_rate": 1.143584677406639e-05, "loss": 0.3821, "step": 5939 }, { "epoch": 0.47058823529411764, "grad_norm": 1.6176689734431222, "learning_rate": 1.1433307284218014e-05, "loss": 0.3724, "step": 5940 }, { "epoch": 0.470667458902753, "grad_norm": 1.154470501836486, "learning_rate": 1.1430767699993247e-05, "loss": 0.236, "step": 5941 }, { "epoch": 0.4707466825113884, "grad_norm": 1.4834278086407116, "learning_rate": 1.1428228021559316e-05, "loss": 0.2748, "step": 5942 }, { "epoch": 0.4708259061200238, "grad_norm": 1.4147886401587715, "learning_rate": 1.142568824908344e-05, "loss": 0.2135, "step": 5943 }, { "epoch": 0.47090512972865917, "grad_norm": 1.2925225013373358, "learning_rate": 1.1423148382732854e-05, "loss": 0.2721, "step": 5944 }, { "epoch": 0.4709843533372945, "grad_norm": 1.218865879245756, "learning_rate": 1.1420608422674793e-05, "loss": 0.205, "step": 5945 }, { "epoch": 0.4710635769459299, "grad_norm": 1.2818951399811043, "learning_rate": 1.1418068369076503e-05, "loss": 0.2212, "step": 5946 }, { "epoch": 0.47114280055456526, "grad_norm": 1.5054391030063439, "learning_rate": 1.1415528222105237e-05, "loss": 0.3062, "step": 5947 }, { "epoch": 0.47122202416320064, "grad_norm": 1.4852779923486854, "learning_rate": 1.1412987981928245e-05, "loss": 0.3026, "step": 5948 }, { "epoch": 0.471301247771836, "grad_norm": 1.770618919107372, "learning_rate": 1.1410447648712795e-05, "loss": 0.3651, "step": 5949 }, { "epoch": 0.4713804713804714, "grad_norm": 1.3022645623025109, "learning_rate": 1.1407907222626156e-05, "loss": 0.2436, "step": 5950 }, { "epoch": 0.4714596949891068, "grad_norm": 1.8189120329705482, "learning_rate": 1.1405366703835596e-05, "loss": 0.4105, "step": 5951 }, { "epoch": 0.4715389185977421, "grad_norm": 1.4035210034995014, "learning_rate": 1.1402826092508405e-05, "loss": 0.317, "step": 5952 }, { "epoch": 0.4716181422063775, "grad_norm": 1.317219889470921, "learning_rate": 1.1400285388811862e-05, "loss": 0.2371, "step": 5953 }, { "epoch": 0.47169736581501287, "grad_norm": 1.5607656846624318, "learning_rate": 1.1397744592913268e-05, "loss": 0.492, "step": 5954 }, { "epoch": 0.47177658942364825, "grad_norm": 1.4225192075784638, "learning_rate": 1.1395203704979915e-05, "loss": 0.2534, "step": 5955 }, { "epoch": 0.47185581303228363, "grad_norm": 1.239232030738451, "learning_rate": 1.1392662725179114e-05, "loss": 0.2385, "step": 5956 }, { "epoch": 0.471935036640919, "grad_norm": 1.3459192602928847, "learning_rate": 1.139012165367817e-05, "loss": 0.3007, "step": 5957 }, { "epoch": 0.47201426024955434, "grad_norm": 1.5164196766937215, "learning_rate": 1.1387580490644408e-05, "loss": 0.2717, "step": 5958 }, { "epoch": 0.4720934838581897, "grad_norm": 1.3116721827093707, "learning_rate": 1.1385039236245143e-05, "loss": 0.2823, "step": 5959 }, { "epoch": 0.4721727074668251, "grad_norm": 1.404417349529674, "learning_rate": 1.1382497890647712e-05, "loss": 0.3753, "step": 5960 }, { "epoch": 0.4722519310754605, "grad_norm": 1.3195655864414761, "learning_rate": 1.1379956454019445e-05, "loss": 0.2571, "step": 5961 }, { "epoch": 0.47233115468409587, "grad_norm": 1.2762592388574814, "learning_rate": 1.1377414926527688e-05, "loss": 0.2989, "step": 5962 }, { "epoch": 0.47241037829273125, "grad_norm": 1.3396648047037105, "learning_rate": 1.1374873308339784e-05, "loss": 0.3004, "step": 5963 }, { "epoch": 0.47248960190136663, "grad_norm": 1.6120739594418916, "learning_rate": 1.1372331599623088e-05, "loss": 0.3593, "step": 5964 }, { "epoch": 0.47256882551000196, "grad_norm": 1.8364815022000327, "learning_rate": 1.136978980054496e-05, "loss": 0.4019, "step": 5965 }, { "epoch": 0.47264804911863734, "grad_norm": 1.2683728517848438, "learning_rate": 1.1367247911272765e-05, "loss": 0.2819, "step": 5966 }, { "epoch": 0.4727272727272727, "grad_norm": 1.370140902306888, "learning_rate": 1.1364705931973872e-05, "loss": 0.281, "step": 5967 }, { "epoch": 0.4728064963359081, "grad_norm": 1.5152248178680985, "learning_rate": 1.1362163862815663e-05, "loss": 0.2529, "step": 5968 }, { "epoch": 0.4728857199445435, "grad_norm": 1.5128497787601511, "learning_rate": 1.1359621703965516e-05, "loss": 0.3365, "step": 5969 }, { "epoch": 0.47296494355317886, "grad_norm": 1.561562238164092, "learning_rate": 1.135707945559082e-05, "loss": 0.2592, "step": 5970 }, { "epoch": 0.47304416716181424, "grad_norm": 1.5266372534147483, "learning_rate": 1.1354537117858975e-05, "loss": 0.3673, "step": 5971 }, { "epoch": 0.47312339077044957, "grad_norm": 1.2184324700492684, "learning_rate": 1.1351994690937377e-05, "loss": 0.2603, "step": 5972 }, { "epoch": 0.47320261437908495, "grad_norm": 1.3089692427512045, "learning_rate": 1.1349452174993437e-05, "loss": 0.2772, "step": 5973 }, { "epoch": 0.47328183798772033, "grad_norm": 1.3054058114184117, "learning_rate": 1.1346909570194558e-05, "loss": 0.2523, "step": 5974 }, { "epoch": 0.4733610615963557, "grad_norm": 1.3652135593794648, "learning_rate": 1.134436687670817e-05, "loss": 0.2676, "step": 5975 }, { "epoch": 0.4734402852049911, "grad_norm": 1.4566075736221442, "learning_rate": 1.134182409470169e-05, "loss": 0.3784, "step": 5976 }, { "epoch": 0.4735195088136265, "grad_norm": 1.2841338732952017, "learning_rate": 1.133928122434255e-05, "loss": 0.2441, "step": 5977 }, { "epoch": 0.47359873242226186, "grad_norm": 1.3276317416263952, "learning_rate": 1.1336738265798187e-05, "loss": 0.2396, "step": 5978 }, { "epoch": 0.4736779560308972, "grad_norm": 1.2583401128047258, "learning_rate": 1.1334195219236039e-05, "loss": 0.243, "step": 5979 }, { "epoch": 0.47375717963953257, "grad_norm": 1.2683330238971544, "learning_rate": 1.1331652084823554e-05, "loss": 0.2722, "step": 5980 }, { "epoch": 0.47383640324816795, "grad_norm": 1.205500992216874, "learning_rate": 1.1329108862728192e-05, "loss": 0.2435, "step": 5981 }, { "epoch": 0.47391562685680333, "grad_norm": 1.760538755431643, "learning_rate": 1.1326565553117404e-05, "loss": 0.348, "step": 5982 }, { "epoch": 0.4739948504654387, "grad_norm": 1.27579246952634, "learning_rate": 1.1324022156158654e-05, "loss": 0.2266, "step": 5983 }, { "epoch": 0.4740740740740741, "grad_norm": 1.229733502088595, "learning_rate": 1.132147867201942e-05, "loss": 0.2496, "step": 5984 }, { "epoch": 0.4741532976827095, "grad_norm": 1.28064531087544, "learning_rate": 1.1318935100867172e-05, "loss": 0.274, "step": 5985 }, { "epoch": 0.4742325212913448, "grad_norm": 1.1309236981176227, "learning_rate": 1.1316391442869394e-05, "loss": 0.233, "step": 5986 }, { "epoch": 0.4743117448999802, "grad_norm": 1.5470727135678277, "learning_rate": 1.1313847698193577e-05, "loss": 0.3193, "step": 5987 }, { "epoch": 0.47439096850861556, "grad_norm": 1.457668273838197, "learning_rate": 1.1311303867007207e-05, "loss": 0.301, "step": 5988 }, { "epoch": 0.47447019211725094, "grad_norm": 1.4992119551980507, "learning_rate": 1.1308759949477786e-05, "loss": 0.3176, "step": 5989 }, { "epoch": 0.4745494157258863, "grad_norm": 1.5842216988059632, "learning_rate": 1.1306215945772823e-05, "loss": 0.3275, "step": 5990 }, { "epoch": 0.4746286393345217, "grad_norm": 1.493669317042276, "learning_rate": 1.1303671856059824e-05, "loss": 0.2543, "step": 5991 }, { "epoch": 0.4747078629431571, "grad_norm": 1.251839685129144, "learning_rate": 1.1301127680506305e-05, "loss": 0.2492, "step": 5992 }, { "epoch": 0.4747870865517924, "grad_norm": 1.633406133734667, "learning_rate": 1.1298583419279792e-05, "loss": 0.3561, "step": 5993 }, { "epoch": 0.4748663101604278, "grad_norm": 1.6011566996730517, "learning_rate": 1.1296039072547804e-05, "loss": 0.3474, "step": 5994 }, { "epoch": 0.4749455337690632, "grad_norm": 1.2264189686986706, "learning_rate": 1.1293494640477885e-05, "loss": 0.271, "step": 5995 }, { "epoch": 0.47502475737769856, "grad_norm": 1.2919760214549674, "learning_rate": 1.1290950123237564e-05, "loss": 0.2586, "step": 5996 }, { "epoch": 0.47510398098633394, "grad_norm": 1.6796117248621207, "learning_rate": 1.128840552099439e-05, "loss": 0.4553, "step": 5997 }, { "epoch": 0.4751832045949693, "grad_norm": 1.227128886322809, "learning_rate": 1.1285860833915914e-05, "loss": 0.1913, "step": 5998 }, { "epoch": 0.47526242820360465, "grad_norm": 1.2250094891337917, "learning_rate": 1.1283316062169685e-05, "loss": 0.2115, "step": 5999 }, { "epoch": 0.47534165181224003, "grad_norm": 1.7108502469572162, "learning_rate": 1.1280771205923269e-05, "loss": 0.345, "step": 6000 }, { "epoch": 0.4754208754208754, "grad_norm": 1.7599864752159344, "learning_rate": 1.1278226265344234e-05, "loss": 0.3063, "step": 6001 }, { "epoch": 0.4755000990295108, "grad_norm": 2.2697419168620634, "learning_rate": 1.127568124060015e-05, "loss": 0.4012, "step": 6002 }, { "epoch": 0.4755793226381462, "grad_norm": 1.2980018632857855, "learning_rate": 1.1273136131858595e-05, "loss": 0.245, "step": 6003 }, { "epoch": 0.47565854624678156, "grad_norm": 1.3332070098050692, "learning_rate": 1.1270590939287149e-05, "loss": 0.3118, "step": 6004 }, { "epoch": 0.47573776985541694, "grad_norm": 1.5320629454573327, "learning_rate": 1.1268045663053404e-05, "loss": 0.3201, "step": 6005 }, { "epoch": 0.47581699346405226, "grad_norm": 1.1469372709242003, "learning_rate": 1.1265500303324954e-05, "loss": 0.1437, "step": 6006 }, { "epoch": 0.47589621707268764, "grad_norm": 1.5315649411517587, "learning_rate": 1.12629548602694e-05, "loss": 0.3112, "step": 6007 }, { "epoch": 0.475975440681323, "grad_norm": 1.5694279047961142, "learning_rate": 1.1260409334054342e-05, "loss": 0.4045, "step": 6008 }, { "epoch": 0.4760546642899584, "grad_norm": 1.3664205732274488, "learning_rate": 1.1257863724847398e-05, "loss": 0.2469, "step": 6009 }, { "epoch": 0.4761338878985938, "grad_norm": 1.5024692934757262, "learning_rate": 1.1255318032816175e-05, "loss": 0.3241, "step": 6010 }, { "epoch": 0.47621311150722917, "grad_norm": 1.1753045154524353, "learning_rate": 1.1252772258128303e-05, "loss": 0.2462, "step": 6011 }, { "epoch": 0.47629233511586455, "grad_norm": 1.1477444374949006, "learning_rate": 1.1250226400951408e-05, "loss": 0.2704, "step": 6012 }, { "epoch": 0.4763715587244999, "grad_norm": 3.3942339342087204, "learning_rate": 1.1247680461453114e-05, "loss": 0.2028, "step": 6013 }, { "epoch": 0.47645078233313526, "grad_norm": 1.329591636823522, "learning_rate": 1.1245134439801073e-05, "loss": 0.2634, "step": 6014 }, { "epoch": 0.47653000594177064, "grad_norm": 1.343285052504467, "learning_rate": 1.1242588336162916e-05, "loss": 0.2703, "step": 6015 }, { "epoch": 0.476609229550406, "grad_norm": 1.7792368334618545, "learning_rate": 1.1240042150706296e-05, "loss": 0.3568, "step": 6016 }, { "epoch": 0.4766884531590414, "grad_norm": 1.5280482148636647, "learning_rate": 1.1237495883598868e-05, "loss": 0.3661, "step": 6017 }, { "epoch": 0.4767676767676768, "grad_norm": 1.355486962129685, "learning_rate": 1.1234949535008289e-05, "loss": 0.2357, "step": 6018 }, { "epoch": 0.47684690037631217, "grad_norm": 1.6909830984396892, "learning_rate": 1.1232403105102226e-05, "loss": 0.3152, "step": 6019 }, { "epoch": 0.4769261239849475, "grad_norm": 1.5336670804054056, "learning_rate": 1.122985659404835e-05, "loss": 0.3227, "step": 6020 }, { "epoch": 0.4770053475935829, "grad_norm": 1.2197454151740297, "learning_rate": 1.1227310002014332e-05, "loss": 0.2194, "step": 6021 }, { "epoch": 0.47708457120221825, "grad_norm": 1.7414665018098963, "learning_rate": 1.1224763329167859e-05, "loss": 0.324, "step": 6022 }, { "epoch": 0.47716379481085364, "grad_norm": 1.6191861133208167, "learning_rate": 1.122221657567661e-05, "loss": 0.2768, "step": 6023 }, { "epoch": 0.477243018419489, "grad_norm": 1.374871305369388, "learning_rate": 1.1219669741708282e-05, "loss": 0.2296, "step": 6024 }, { "epoch": 0.4773222420281244, "grad_norm": 1.5133038029946626, "learning_rate": 1.121712282743057e-05, "loss": 0.2706, "step": 6025 }, { "epoch": 0.4774014656367598, "grad_norm": 1.7551595723008606, "learning_rate": 1.1214575833011178e-05, "loss": 0.3844, "step": 6026 }, { "epoch": 0.4774806892453951, "grad_norm": 1.7334702973960008, "learning_rate": 1.121202875861781e-05, "loss": 0.4162, "step": 6027 }, { "epoch": 0.4775599128540305, "grad_norm": 1.2567969880572345, "learning_rate": 1.1209481604418182e-05, "loss": 0.2887, "step": 6028 }, { "epoch": 0.47763913646266587, "grad_norm": 1.4518389879777716, "learning_rate": 1.1206934370580009e-05, "loss": 0.2228, "step": 6029 }, { "epoch": 0.47771836007130125, "grad_norm": 1.6290220968104137, "learning_rate": 1.1204387057271016e-05, "loss": 0.3764, "step": 6030 }, { "epoch": 0.47779758367993663, "grad_norm": 1.4868357406347164, "learning_rate": 1.1201839664658929e-05, "loss": 0.2691, "step": 6031 }, { "epoch": 0.477876807288572, "grad_norm": 1.7429748829828071, "learning_rate": 1.1199292192911482e-05, "loss": 0.359, "step": 6032 }, { "epoch": 0.47795603089720734, "grad_norm": 1.5935227691304323, "learning_rate": 1.1196744642196417e-05, "loss": 0.3865, "step": 6033 }, { "epoch": 0.4780352545058427, "grad_norm": 1.8330323041966783, "learning_rate": 1.1194197012681473e-05, "loss": 0.2916, "step": 6034 }, { "epoch": 0.4781144781144781, "grad_norm": 1.2639469625366093, "learning_rate": 1.1191649304534405e-05, "loss": 0.2989, "step": 6035 }, { "epoch": 0.4781937017231135, "grad_norm": 1.32882367930818, "learning_rate": 1.1189101517922961e-05, "loss": 0.2583, "step": 6036 }, { "epoch": 0.47827292533174887, "grad_norm": 1.6894754003309616, "learning_rate": 1.1186553653014906e-05, "loss": 0.255, "step": 6037 }, { "epoch": 0.47835214894038425, "grad_norm": 1.2684105548175266, "learning_rate": 1.1184005709978002e-05, "loss": 0.2497, "step": 6038 }, { "epoch": 0.47843137254901963, "grad_norm": 1.2440034613844855, "learning_rate": 1.118145768898002e-05, "loss": 0.2192, "step": 6039 }, { "epoch": 0.47851059615765495, "grad_norm": 2.242382660724379, "learning_rate": 1.1178909590188731e-05, "loss": 0.4079, "step": 6040 }, { "epoch": 0.47858981976629034, "grad_norm": 1.7001611684196702, "learning_rate": 1.117636141377192e-05, "loss": 0.3439, "step": 6041 }, { "epoch": 0.4786690433749257, "grad_norm": 1.3333455683643658, "learning_rate": 1.117381315989737e-05, "loss": 0.2382, "step": 6042 }, { "epoch": 0.4787482669835611, "grad_norm": 1.6264075179649133, "learning_rate": 1.117126482873287e-05, "loss": 0.3446, "step": 6043 }, { "epoch": 0.4788274905921965, "grad_norm": 1.4433284741071852, "learning_rate": 1.1168716420446219e-05, "loss": 0.278, "step": 6044 }, { "epoch": 0.47890671420083186, "grad_norm": 1.187198190464409, "learning_rate": 1.1166167935205214e-05, "loss": 0.3087, "step": 6045 }, { "epoch": 0.47898593780946724, "grad_norm": 1.4613588748416362, "learning_rate": 1.1163619373177663e-05, "loss": 0.3026, "step": 6046 }, { "epoch": 0.47906516141810257, "grad_norm": 1.5677270430045716, "learning_rate": 1.1161070734531375e-05, "loss": 0.3013, "step": 6047 }, { "epoch": 0.47914438502673795, "grad_norm": 1.4638961224695024, "learning_rate": 1.1158522019434163e-05, "loss": 0.3343, "step": 6048 }, { "epoch": 0.47922360863537333, "grad_norm": 1.4432812664285235, "learning_rate": 1.1155973228053854e-05, "loss": 0.3026, "step": 6049 }, { "epoch": 0.4793028322440087, "grad_norm": 1.7201391117878246, "learning_rate": 1.1153424360558268e-05, "loss": 0.3999, "step": 6050 }, { "epoch": 0.4793820558526441, "grad_norm": 1.2652796050206054, "learning_rate": 1.115087541711524e-05, "loss": 0.3032, "step": 6051 }, { "epoch": 0.4794612794612795, "grad_norm": 1.5879817665026315, "learning_rate": 1.1148326397892601e-05, "loss": 0.3051, "step": 6052 }, { "epoch": 0.47954050306991486, "grad_norm": 1.334309076025932, "learning_rate": 1.1145777303058197e-05, "loss": 0.2718, "step": 6053 }, { "epoch": 0.4796197266785502, "grad_norm": 1.1271325317580927, "learning_rate": 1.1143228132779867e-05, "loss": 0.1848, "step": 6054 }, { "epoch": 0.47969895028718557, "grad_norm": 1.5355220001918997, "learning_rate": 1.1140678887225468e-05, "loss": 0.3171, "step": 6055 }, { "epoch": 0.47977817389582095, "grad_norm": 1.351441638714954, "learning_rate": 1.1138129566562853e-05, "loss": 0.2739, "step": 6056 }, { "epoch": 0.47985739750445633, "grad_norm": 1.3387486077673278, "learning_rate": 1.1135580170959881e-05, "loss": 0.2767, "step": 6057 }, { "epoch": 0.4799366211130917, "grad_norm": 1.6932996645532263, "learning_rate": 1.1133030700584419e-05, "loss": 0.3285, "step": 6058 }, { "epoch": 0.4800158447217271, "grad_norm": 1.6059731099901198, "learning_rate": 1.1130481155604336e-05, "loss": 0.2816, "step": 6059 }, { "epoch": 0.4800950683303625, "grad_norm": 1.7226693778961917, "learning_rate": 1.1127931536187511e-05, "loss": 0.3246, "step": 6060 }, { "epoch": 0.4801742919389978, "grad_norm": 1.360874599177222, "learning_rate": 1.1125381842501819e-05, "loss": 0.3091, "step": 6061 }, { "epoch": 0.4802535155476332, "grad_norm": 1.6501154279475763, "learning_rate": 1.1122832074715149e-05, "loss": 0.2993, "step": 6062 }, { "epoch": 0.48033273915626856, "grad_norm": 1.6870640200987324, "learning_rate": 1.1120282232995389e-05, "loss": 0.3337, "step": 6063 }, { "epoch": 0.48041196276490394, "grad_norm": 1.6210418443737509, "learning_rate": 1.1117732317510437e-05, "loss": 0.3656, "step": 6064 }, { "epoch": 0.4804911863735393, "grad_norm": 1.5093452687261766, "learning_rate": 1.111518232842819e-05, "loss": 0.3052, "step": 6065 }, { "epoch": 0.4805704099821747, "grad_norm": 1.5609851300451982, "learning_rate": 1.1112632265916548e-05, "loss": 0.3695, "step": 6066 }, { "epoch": 0.4806496335908101, "grad_norm": 1.5088324998048293, "learning_rate": 1.1110082130143427e-05, "loss": 0.2535, "step": 6067 }, { "epoch": 0.4807288571994454, "grad_norm": 1.547690678961717, "learning_rate": 1.1107531921276742e-05, "loss": 0.2419, "step": 6068 }, { "epoch": 0.4808080808080808, "grad_norm": 1.3274831775373486, "learning_rate": 1.1104981639484404e-05, "loss": 0.2472, "step": 6069 }, { "epoch": 0.4808873044167162, "grad_norm": 1.4082663194143967, "learning_rate": 1.1102431284934345e-05, "loss": 0.2078, "step": 6070 }, { "epoch": 0.48096652802535156, "grad_norm": 1.671828606916373, "learning_rate": 1.1099880857794491e-05, "loss": 0.3965, "step": 6071 }, { "epoch": 0.48104575163398694, "grad_norm": 1.7348320874419227, "learning_rate": 1.1097330358232775e-05, "loss": 0.4225, "step": 6072 }, { "epoch": 0.4811249752426223, "grad_norm": 1.2078015512494769, "learning_rate": 1.1094779786417133e-05, "loss": 0.2257, "step": 6073 }, { "epoch": 0.48120419885125765, "grad_norm": 1.5601087144285826, "learning_rate": 1.1092229142515512e-05, "loss": 0.3128, "step": 6074 }, { "epoch": 0.48128342245989303, "grad_norm": 1.5348638556364782, "learning_rate": 1.1089678426695854e-05, "loss": 0.2811, "step": 6075 }, { "epoch": 0.4813626460685284, "grad_norm": 1.144681120404384, "learning_rate": 1.1087127639126118e-05, "loss": 0.2154, "step": 6076 }, { "epoch": 0.4814418696771638, "grad_norm": 1.0768422041387595, "learning_rate": 1.1084576779974257e-05, "loss": 0.2095, "step": 6077 }, { "epoch": 0.4815210932857992, "grad_norm": 1.294588525842855, "learning_rate": 1.1082025849408231e-05, "loss": 0.2461, "step": 6078 }, { "epoch": 0.48160031689443455, "grad_norm": 1.6261272786961174, "learning_rate": 1.1079474847596014e-05, "loss": 0.3868, "step": 6079 }, { "epoch": 0.48167954050306994, "grad_norm": 1.5340116649883544, "learning_rate": 1.1076923774705568e-05, "loss": 0.2396, "step": 6080 }, { "epoch": 0.48175876411170526, "grad_norm": 1.390338022518972, "learning_rate": 1.1074372630904878e-05, "loss": 0.3044, "step": 6081 }, { "epoch": 0.48183798772034064, "grad_norm": 1.2863309913843124, "learning_rate": 1.1071821416361917e-05, "loss": 0.2391, "step": 6082 }, { "epoch": 0.481917211328976, "grad_norm": 1.1726082141951204, "learning_rate": 1.106927013124467e-05, "loss": 0.203, "step": 6083 }, { "epoch": 0.4819964349376114, "grad_norm": 1.4276651602226915, "learning_rate": 1.1066718775721135e-05, "loss": 0.2449, "step": 6084 }, { "epoch": 0.4820756585462468, "grad_norm": 1.3158419117919298, "learning_rate": 1.1064167349959299e-05, "loss": 0.2616, "step": 6085 }, { "epoch": 0.48215488215488217, "grad_norm": 1.3031351146360042, "learning_rate": 1.1061615854127165e-05, "loss": 0.2501, "step": 6086 }, { "epoch": 0.48223410576351755, "grad_norm": 1.47023789758246, "learning_rate": 1.1059064288392733e-05, "loss": 0.3226, "step": 6087 }, { "epoch": 0.4823133293721529, "grad_norm": 1.411754131252147, "learning_rate": 1.1056512652924014e-05, "loss": 0.2434, "step": 6088 }, { "epoch": 0.48239255298078826, "grad_norm": 1.347650328191249, "learning_rate": 1.1053960947889021e-05, "loss": 0.2648, "step": 6089 }, { "epoch": 0.48247177658942364, "grad_norm": 1.2983872738821516, "learning_rate": 1.1051409173455771e-05, "loss": 0.2545, "step": 6090 }, { "epoch": 0.482551000198059, "grad_norm": 1.365900899385686, "learning_rate": 1.1048857329792284e-05, "loss": 0.1888, "step": 6091 }, { "epoch": 0.4826302238066944, "grad_norm": 1.9224272628383734, "learning_rate": 1.1046305417066594e-05, "loss": 0.3606, "step": 6092 }, { "epoch": 0.4827094474153298, "grad_norm": 1.499434046670531, "learning_rate": 1.1043753435446722e-05, "loss": 0.4271, "step": 6093 }, { "epoch": 0.48278867102396517, "grad_norm": 1.1968634204507425, "learning_rate": 1.104120138510071e-05, "loss": 0.2532, "step": 6094 }, { "epoch": 0.4828678946326005, "grad_norm": 1.5236227774319238, "learning_rate": 1.1038649266196597e-05, "loss": 0.3163, "step": 6095 }, { "epoch": 0.4829471182412359, "grad_norm": 1.4116947999482448, "learning_rate": 1.1036097078902428e-05, "loss": 0.3145, "step": 6096 }, { "epoch": 0.48302634184987125, "grad_norm": 1.2596153432837602, "learning_rate": 1.1033544823386248e-05, "loss": 0.2994, "step": 6097 }, { "epoch": 0.48310556545850664, "grad_norm": 1.289307072363218, "learning_rate": 1.103099249981612e-05, "loss": 0.2409, "step": 6098 }, { "epoch": 0.483184789067142, "grad_norm": 1.544297408445771, "learning_rate": 1.1028440108360092e-05, "loss": 0.3131, "step": 6099 }, { "epoch": 0.4832640126757774, "grad_norm": 1.557601485409439, "learning_rate": 1.1025887649186236e-05, "loss": 0.2814, "step": 6100 }, { "epoch": 0.4833432362844128, "grad_norm": 1.4540441143302076, "learning_rate": 1.1023335122462611e-05, "loss": 0.2815, "step": 6101 }, { "epoch": 0.4834224598930481, "grad_norm": 1.317005895710935, "learning_rate": 1.102078252835729e-05, "loss": 0.2596, "step": 6102 }, { "epoch": 0.4835016835016835, "grad_norm": 1.535044273860009, "learning_rate": 1.1018229867038358e-05, "loss": 0.3207, "step": 6103 }, { "epoch": 0.48358090711031887, "grad_norm": 1.4982955697882976, "learning_rate": 1.1015677138673882e-05, "loss": 0.2412, "step": 6104 }, { "epoch": 0.48366013071895425, "grad_norm": 1.4052336110341974, "learning_rate": 1.1013124343431955e-05, "loss": 0.318, "step": 6105 }, { "epoch": 0.48373935432758963, "grad_norm": 1.4741090632617713, "learning_rate": 1.1010571481480668e-05, "loss": 0.3629, "step": 6106 }, { "epoch": 0.483818577936225, "grad_norm": 1.3490346128218675, "learning_rate": 1.1008018552988109e-05, "loss": 0.2351, "step": 6107 }, { "epoch": 0.4838978015448604, "grad_norm": 1.36436398062383, "learning_rate": 1.1005465558122382e-05, "loss": 0.2847, "step": 6108 }, { "epoch": 0.4839770251534957, "grad_norm": 1.272969101307285, "learning_rate": 1.1002912497051582e-05, "loss": 0.1808, "step": 6109 }, { "epoch": 0.4840562487621311, "grad_norm": 1.3055478096477444, "learning_rate": 1.1000359369943818e-05, "loss": 0.2161, "step": 6110 }, { "epoch": 0.4841354723707665, "grad_norm": 1.5306078048121168, "learning_rate": 1.099780617696721e-05, "loss": 0.3376, "step": 6111 }, { "epoch": 0.48421469597940187, "grad_norm": 1.3730613627599653, "learning_rate": 1.099525291828986e-05, "loss": 0.2235, "step": 6112 }, { "epoch": 0.48429391958803725, "grad_norm": 1.3100682501879188, "learning_rate": 1.0992699594079896e-05, "loss": 0.3177, "step": 6113 }, { "epoch": 0.48437314319667263, "grad_norm": 1.7476161209058387, "learning_rate": 1.0990146204505444e-05, "loss": 0.3261, "step": 6114 }, { "epoch": 0.48445236680530795, "grad_norm": 1.697437545080915, "learning_rate": 1.0987592749734624e-05, "loss": 0.2964, "step": 6115 }, { "epoch": 0.48453159041394334, "grad_norm": 2.054339072774018, "learning_rate": 1.0985039229935575e-05, "loss": 0.3761, "step": 6116 }, { "epoch": 0.4846108140225787, "grad_norm": 1.3524976967062285, "learning_rate": 1.098248564527643e-05, "loss": 0.2775, "step": 6117 }, { "epoch": 0.4846900376312141, "grad_norm": 1.3286666958139592, "learning_rate": 1.0979931995925335e-05, "loss": 0.2147, "step": 6118 }, { "epoch": 0.4847692612398495, "grad_norm": 1.629975469966293, "learning_rate": 1.0977378282050436e-05, "loss": 0.2756, "step": 6119 }, { "epoch": 0.48484848484848486, "grad_norm": 1.5810534047348923, "learning_rate": 1.0974824503819877e-05, "loss": 0.3123, "step": 6120 }, { "epoch": 0.48492770845712024, "grad_norm": 1.4893143668380495, "learning_rate": 1.0972270661401812e-05, "loss": 0.3211, "step": 6121 }, { "epoch": 0.48500693206575557, "grad_norm": 1.7121285060124665, "learning_rate": 1.0969716754964408e-05, "loss": 0.2966, "step": 6122 }, { "epoch": 0.48508615567439095, "grad_norm": 1.7242540541528943, "learning_rate": 1.0967162784675818e-05, "loss": 0.2644, "step": 6123 }, { "epoch": 0.48516537928302633, "grad_norm": 1.9237001576761963, "learning_rate": 1.0964608750704215e-05, "loss": 0.2968, "step": 6124 }, { "epoch": 0.4852446028916617, "grad_norm": 1.2791805749682155, "learning_rate": 1.0962054653217764e-05, "loss": 0.2776, "step": 6125 }, { "epoch": 0.4853238265002971, "grad_norm": 1.3204379381013476, "learning_rate": 1.0959500492384646e-05, "loss": 0.2396, "step": 6126 }, { "epoch": 0.4854030501089325, "grad_norm": 1.2136200820565095, "learning_rate": 1.0956946268373034e-05, "loss": 0.2379, "step": 6127 }, { "epoch": 0.48548227371756786, "grad_norm": 1.1866725081229594, "learning_rate": 1.0954391981351117e-05, "loss": 0.2514, "step": 6128 }, { "epoch": 0.4855614973262032, "grad_norm": 1.3051367107904754, "learning_rate": 1.0951837631487081e-05, "loss": 0.1947, "step": 6129 }, { "epoch": 0.48564072093483857, "grad_norm": 1.4428799347708885, "learning_rate": 1.0949283218949117e-05, "loss": 0.4157, "step": 6130 }, { "epoch": 0.48571994454347395, "grad_norm": 1.3938253661732678, "learning_rate": 1.094672874390542e-05, "loss": 0.3459, "step": 6131 }, { "epoch": 0.48579916815210933, "grad_norm": 1.6269717457578443, "learning_rate": 1.094417420652419e-05, "loss": 0.3469, "step": 6132 }, { "epoch": 0.4858783917607447, "grad_norm": 1.4581714390539897, "learning_rate": 1.0941619606973633e-05, "loss": 0.2911, "step": 6133 }, { "epoch": 0.4859576153693801, "grad_norm": 1.1782084152370544, "learning_rate": 1.0939064945421953e-05, "loss": 0.263, "step": 6134 }, { "epoch": 0.4860368389780155, "grad_norm": 1.4019701514320915, "learning_rate": 1.0936510222037368e-05, "loss": 0.2849, "step": 6135 }, { "epoch": 0.4861160625866508, "grad_norm": 1.1700490384584799, "learning_rate": 1.0933955436988088e-05, "loss": 0.2313, "step": 6136 }, { "epoch": 0.4861952861952862, "grad_norm": 1.7318887895354502, "learning_rate": 1.0931400590442337e-05, "loss": 0.32, "step": 6137 }, { "epoch": 0.48627450980392156, "grad_norm": 1.609251339289543, "learning_rate": 1.0928845682568344e-05, "loss": 0.3662, "step": 6138 }, { "epoch": 0.48635373341255694, "grad_norm": 1.361670973189962, "learning_rate": 1.0926290713534324e-05, "loss": 0.3193, "step": 6139 }, { "epoch": 0.4864329570211923, "grad_norm": 1.8012673427868948, "learning_rate": 1.0923735683508521e-05, "loss": 0.2807, "step": 6140 }, { "epoch": 0.4865121806298277, "grad_norm": 1.6193749438186245, "learning_rate": 1.092118059265917e-05, "loss": 0.3244, "step": 6141 }, { "epoch": 0.4865914042384631, "grad_norm": 1.2210525094443065, "learning_rate": 1.0918625441154508e-05, "loss": 0.2621, "step": 6142 }, { "epoch": 0.4866706278470984, "grad_norm": 1.6117566094763687, "learning_rate": 1.091607022916278e-05, "loss": 0.3951, "step": 6143 }, { "epoch": 0.4867498514557338, "grad_norm": 1.1755785169079471, "learning_rate": 1.0913514956852236e-05, "loss": 0.2441, "step": 6144 }, { "epoch": 0.4868290750643692, "grad_norm": 1.405229639012253, "learning_rate": 1.0910959624391127e-05, "loss": 0.2962, "step": 6145 }, { "epoch": 0.48690829867300456, "grad_norm": 1.5968607903656973, "learning_rate": 1.090840423194771e-05, "loss": 0.3532, "step": 6146 }, { "epoch": 0.48698752228163994, "grad_norm": 1.4864717981694215, "learning_rate": 1.0905848779690246e-05, "loss": 0.3387, "step": 6147 }, { "epoch": 0.4870667458902753, "grad_norm": 1.4055737743461898, "learning_rate": 1.0903293267786998e-05, "loss": 0.2189, "step": 6148 }, { "epoch": 0.4871459694989107, "grad_norm": 1.52242155947934, "learning_rate": 1.0900737696406235e-05, "loss": 0.2739, "step": 6149 }, { "epoch": 0.48722519310754603, "grad_norm": 1.4062902494315441, "learning_rate": 1.0898182065716227e-05, "loss": 0.2452, "step": 6150 }, { "epoch": 0.4873044167161814, "grad_norm": 1.467042791784936, "learning_rate": 1.0895626375885255e-05, "loss": 0.3041, "step": 6151 }, { "epoch": 0.4873836403248168, "grad_norm": 1.2393769660727878, "learning_rate": 1.0893070627081595e-05, "loss": 0.1865, "step": 6152 }, { "epoch": 0.4874628639334522, "grad_norm": 1.580462881695226, "learning_rate": 1.089051481947353e-05, "loss": 0.3253, "step": 6153 }, { "epoch": 0.48754208754208755, "grad_norm": 1.4570043502367767, "learning_rate": 1.0887958953229349e-05, "loss": 0.2974, "step": 6154 }, { "epoch": 0.48762131115072294, "grad_norm": 1.1194671289501286, "learning_rate": 1.0885403028517345e-05, "loss": 0.1863, "step": 6155 }, { "epoch": 0.48770053475935826, "grad_norm": 1.53081016103416, "learning_rate": 1.0882847045505809e-05, "loss": 0.3968, "step": 6156 }, { "epoch": 0.48777975836799364, "grad_norm": 1.491539542669247, "learning_rate": 1.0880291004363047e-05, "loss": 0.3016, "step": 6157 }, { "epoch": 0.487858981976629, "grad_norm": 1.2611339462723674, "learning_rate": 1.0877734905257354e-05, "loss": 0.254, "step": 6158 }, { "epoch": 0.4879382055852644, "grad_norm": 1.1961698704125379, "learning_rate": 1.0875178748357045e-05, "loss": 0.1887, "step": 6159 }, { "epoch": 0.4880174291938998, "grad_norm": 1.3721810304073363, "learning_rate": 1.0872622533830423e-05, "loss": 0.285, "step": 6160 }, { "epoch": 0.48809665280253517, "grad_norm": 1.567837652185398, "learning_rate": 1.0870066261845807e-05, "loss": 0.2954, "step": 6161 }, { "epoch": 0.48817587641117055, "grad_norm": 1.233238588712617, "learning_rate": 1.0867509932571517e-05, "loss": 0.2553, "step": 6162 }, { "epoch": 0.4882551000198059, "grad_norm": 1.397418530126188, "learning_rate": 1.0864953546175867e-05, "loss": 0.2703, "step": 6163 }, { "epoch": 0.48833432362844126, "grad_norm": 1.5610715288104426, "learning_rate": 1.0862397102827189e-05, "loss": 0.384, "step": 6164 }, { "epoch": 0.48841354723707664, "grad_norm": 1.245660498200154, "learning_rate": 1.0859840602693813e-05, "loss": 0.2671, "step": 6165 }, { "epoch": 0.488492770845712, "grad_norm": 1.3947092697329637, "learning_rate": 1.0857284045944071e-05, "loss": 0.2203, "step": 6166 }, { "epoch": 0.4885719944543474, "grad_norm": 1.218362053557024, "learning_rate": 1.0854727432746302e-05, "loss": 0.2852, "step": 6167 }, { "epoch": 0.4886512180629828, "grad_norm": 1.341351718036167, "learning_rate": 1.0852170763268838e-05, "loss": 0.189, "step": 6168 }, { "epoch": 0.48873044167161817, "grad_norm": 1.5884808936970678, "learning_rate": 1.0849614037680032e-05, "loss": 0.3538, "step": 6169 }, { "epoch": 0.4888096652802535, "grad_norm": 1.3849156013508366, "learning_rate": 1.0847057256148234e-05, "loss": 0.2517, "step": 6170 }, { "epoch": 0.4888888888888889, "grad_norm": 1.335160074346344, "learning_rate": 1.0844500418841788e-05, "loss": 0.2567, "step": 6171 }, { "epoch": 0.48896811249752425, "grad_norm": 1.9884280331886677, "learning_rate": 1.0841943525929053e-05, "loss": 0.387, "step": 6172 }, { "epoch": 0.48904733610615964, "grad_norm": 1.4552717666439374, "learning_rate": 1.0839386577578389e-05, "loss": 0.3008, "step": 6173 }, { "epoch": 0.489126559714795, "grad_norm": 1.3772706742532324, "learning_rate": 1.0836829573958155e-05, "loss": 0.2517, "step": 6174 }, { "epoch": 0.4892057833234304, "grad_norm": 1.4315596710202125, "learning_rate": 1.083427251523672e-05, "loss": 0.3322, "step": 6175 }, { "epoch": 0.4892850069320658, "grad_norm": 1.2146382197625711, "learning_rate": 1.0831715401582458e-05, "loss": 0.2601, "step": 6176 }, { "epoch": 0.4893642305407011, "grad_norm": 1.4693611008861538, "learning_rate": 1.0829158233163737e-05, "loss": 0.2508, "step": 6177 }, { "epoch": 0.4894434541493365, "grad_norm": 1.501286271071185, "learning_rate": 1.0826601010148935e-05, "loss": 0.4254, "step": 6178 }, { "epoch": 0.48952267775797187, "grad_norm": 1.5815926457007046, "learning_rate": 1.0824043732706435e-05, "loss": 0.347, "step": 6179 }, { "epoch": 0.48960190136660725, "grad_norm": 1.5415088564225938, "learning_rate": 1.0821486401004618e-05, "loss": 0.3386, "step": 6180 }, { "epoch": 0.48968112497524263, "grad_norm": 1.4474455234323833, "learning_rate": 1.0818929015211877e-05, "loss": 0.2656, "step": 6181 }, { "epoch": 0.489760348583878, "grad_norm": 1.4060165907615751, "learning_rate": 1.0816371575496598e-05, "loss": 0.2271, "step": 6182 }, { "epoch": 0.4898395721925134, "grad_norm": 1.3963858824595528, "learning_rate": 1.081381408202718e-05, "loss": 0.2914, "step": 6183 }, { "epoch": 0.4899187958011487, "grad_norm": 1.627029400041405, "learning_rate": 1.0811256534972024e-05, "loss": 0.3751, "step": 6184 }, { "epoch": 0.4899980194097841, "grad_norm": 1.2250895246335802, "learning_rate": 1.0808698934499524e-05, "loss": 0.2247, "step": 6185 }, { "epoch": 0.4900772430184195, "grad_norm": 1.3257314242280365, "learning_rate": 1.0806141280778093e-05, "loss": 0.2859, "step": 6186 }, { "epoch": 0.49015646662705487, "grad_norm": 1.1850919805751492, "learning_rate": 1.0803583573976137e-05, "loss": 0.2401, "step": 6187 }, { "epoch": 0.49023569023569025, "grad_norm": 1.1107332607084066, "learning_rate": 1.0801025814262068e-05, "loss": 0.2204, "step": 6188 }, { "epoch": 0.49031491384432563, "grad_norm": 1.5225029620487145, "learning_rate": 1.0798468001804305e-05, "loss": 0.3076, "step": 6189 }, { "epoch": 0.490394137452961, "grad_norm": 1.4702612479048018, "learning_rate": 1.0795910136771266e-05, "loss": 0.3108, "step": 6190 }, { "epoch": 0.49047336106159634, "grad_norm": 1.4772785731823188, "learning_rate": 1.0793352219331371e-05, "loss": 0.3196, "step": 6191 }, { "epoch": 0.4905525846702317, "grad_norm": 1.536878832032189, "learning_rate": 1.0790794249653056e-05, "loss": 0.3255, "step": 6192 }, { "epoch": 0.4906318082788671, "grad_norm": 1.73651753305478, "learning_rate": 1.0788236227904738e-05, "loss": 0.2982, "step": 6193 }, { "epoch": 0.4907110318875025, "grad_norm": 1.1649184716370518, "learning_rate": 1.0785678154254865e-05, "loss": 0.2053, "step": 6194 }, { "epoch": 0.49079025549613786, "grad_norm": 1.188098789055214, "learning_rate": 1.0783120028871858e-05, "loss": 0.2314, "step": 6195 }, { "epoch": 0.49086947910477324, "grad_norm": 1.3450706436396398, "learning_rate": 1.0780561851924168e-05, "loss": 0.2233, "step": 6196 }, { "epoch": 0.49094870271340857, "grad_norm": 1.784045083633948, "learning_rate": 1.0778003623580237e-05, "loss": 0.3568, "step": 6197 }, { "epoch": 0.49102792632204395, "grad_norm": 1.554802606764818, "learning_rate": 1.077544534400851e-05, "loss": 0.3411, "step": 6198 }, { "epoch": 0.49110714993067933, "grad_norm": 1.6058798273090809, "learning_rate": 1.0772887013377438e-05, "loss": 0.416, "step": 6199 }, { "epoch": 0.4911863735393147, "grad_norm": 1.5391679701428511, "learning_rate": 1.0770328631855476e-05, "loss": 0.3855, "step": 6200 }, { "epoch": 0.4912655971479501, "grad_norm": 1.5378938980102377, "learning_rate": 1.0767770199611078e-05, "loss": 0.2856, "step": 6201 }, { "epoch": 0.4913448207565855, "grad_norm": 1.166805596897456, "learning_rate": 1.076521171681271e-05, "loss": 0.2466, "step": 6202 }, { "epoch": 0.49142404436522086, "grad_norm": 1.536547216923099, "learning_rate": 1.0762653183628831e-05, "loss": 0.2851, "step": 6203 }, { "epoch": 0.4915032679738562, "grad_norm": 1.4223236654328237, "learning_rate": 1.0760094600227908e-05, "loss": 0.3708, "step": 6204 }, { "epoch": 0.49158249158249157, "grad_norm": 1.3718094513279078, "learning_rate": 1.0757535966778416e-05, "loss": 0.2244, "step": 6205 }, { "epoch": 0.49166171519112695, "grad_norm": 1.5640241098868874, "learning_rate": 1.0754977283448824e-05, "loss": 0.258, "step": 6206 }, { "epoch": 0.49174093879976233, "grad_norm": 1.6900784168741398, "learning_rate": 1.0752418550407611e-05, "loss": 0.4154, "step": 6207 }, { "epoch": 0.4918201624083977, "grad_norm": 1.7994718856508958, "learning_rate": 1.0749859767823256e-05, "loss": 0.384, "step": 6208 }, { "epoch": 0.4918993860170331, "grad_norm": 1.4717917761484856, "learning_rate": 1.0747300935864245e-05, "loss": 0.3591, "step": 6209 }, { "epoch": 0.4919786096256685, "grad_norm": 1.3383497822924593, "learning_rate": 1.074474205469906e-05, "loss": 0.2885, "step": 6210 }, { "epoch": 0.4920578332343038, "grad_norm": 1.6496577144297149, "learning_rate": 1.0742183124496197e-05, "loss": 0.387, "step": 6211 }, { "epoch": 0.4921370568429392, "grad_norm": 1.3466125167078744, "learning_rate": 1.0739624145424146e-05, "loss": 0.3039, "step": 6212 }, { "epoch": 0.49221628045157456, "grad_norm": 1.5160308645798655, "learning_rate": 1.0737065117651404e-05, "loss": 0.2912, "step": 6213 }, { "epoch": 0.49229550406020994, "grad_norm": 1.526127365587252, "learning_rate": 1.0734506041346468e-05, "loss": 0.294, "step": 6214 }, { "epoch": 0.4923747276688453, "grad_norm": 1.5005855609234349, "learning_rate": 1.0731946916677847e-05, "loss": 0.3837, "step": 6215 }, { "epoch": 0.4924539512774807, "grad_norm": 3.7392082045902666, "learning_rate": 1.0729387743814041e-05, "loss": 0.2066, "step": 6216 }, { "epoch": 0.4925331748861161, "grad_norm": 1.372508750294774, "learning_rate": 1.0726828522923563e-05, "loss": 0.2848, "step": 6217 }, { "epoch": 0.4926123984947514, "grad_norm": 1.3999552651359242, "learning_rate": 1.0724269254174921e-05, "loss": 0.2922, "step": 6218 }, { "epoch": 0.4926916221033868, "grad_norm": 1.4554709891905275, "learning_rate": 1.0721709937736638e-05, "loss": 0.357, "step": 6219 }, { "epoch": 0.4927708457120222, "grad_norm": 1.9487352301395504, "learning_rate": 1.0719150573777226e-05, "loss": 0.2293, "step": 6220 }, { "epoch": 0.49285006932065756, "grad_norm": 1.613084113343599, "learning_rate": 1.071659116246521e-05, "loss": 0.2746, "step": 6221 }, { "epoch": 0.49292929292929294, "grad_norm": 1.6120754882680997, "learning_rate": 1.0714031703969112e-05, "loss": 0.2751, "step": 6222 }, { "epoch": 0.4930085165379283, "grad_norm": 1.4454114067831845, "learning_rate": 1.0711472198457462e-05, "loss": 0.3271, "step": 6223 }, { "epoch": 0.4930877401465637, "grad_norm": 2.0995044285519193, "learning_rate": 1.0708912646098795e-05, "loss": 0.3394, "step": 6224 }, { "epoch": 0.49316696375519903, "grad_norm": 1.4956951299165366, "learning_rate": 1.0706353047061638e-05, "loss": 0.2947, "step": 6225 }, { "epoch": 0.4932461873638344, "grad_norm": 1.3049652912090355, "learning_rate": 1.070379340151453e-05, "loss": 0.2738, "step": 6226 }, { "epoch": 0.4933254109724698, "grad_norm": 1.1146566156178852, "learning_rate": 1.0701233709626018e-05, "loss": 0.2278, "step": 6227 }, { "epoch": 0.4934046345811052, "grad_norm": 1.3275771711305728, "learning_rate": 1.0698673971564637e-05, "loss": 0.2079, "step": 6228 }, { "epoch": 0.49348385818974055, "grad_norm": 1.5243329371612238, "learning_rate": 1.0696114187498938e-05, "loss": 0.3093, "step": 6229 }, { "epoch": 0.49356308179837594, "grad_norm": 1.1577371866744388, "learning_rate": 1.0693554357597469e-05, "loss": 0.244, "step": 6230 }, { "epoch": 0.4936423054070113, "grad_norm": 1.5597814402026162, "learning_rate": 1.069099448202878e-05, "loss": 0.2221, "step": 6231 }, { "epoch": 0.49372152901564664, "grad_norm": 1.7503557147364344, "learning_rate": 1.0688434560961434e-05, "loss": 0.3088, "step": 6232 }, { "epoch": 0.493800752624282, "grad_norm": 1.4601903226428612, "learning_rate": 1.068587459456398e-05, "loss": 0.2771, "step": 6233 }, { "epoch": 0.4938799762329174, "grad_norm": 1.1382601182910943, "learning_rate": 1.0683314583004986e-05, "loss": 0.2491, "step": 6234 }, { "epoch": 0.4939591998415528, "grad_norm": 1.5727566425849564, "learning_rate": 1.0680754526453017e-05, "loss": 0.2989, "step": 6235 }, { "epoch": 0.49403842345018817, "grad_norm": 1.762614677251262, "learning_rate": 1.0678194425076633e-05, "loss": 0.4067, "step": 6236 }, { "epoch": 0.49411764705882355, "grad_norm": 1.2959135276725782, "learning_rate": 1.0675634279044416e-05, "loss": 0.2455, "step": 6237 }, { "epoch": 0.4941968706674589, "grad_norm": 1.4073370915886836, "learning_rate": 1.0673074088524926e-05, "loss": 0.2874, "step": 6238 }, { "epoch": 0.49427609427609426, "grad_norm": 1.3388927040782768, "learning_rate": 1.067051385368675e-05, "loss": 0.256, "step": 6239 }, { "epoch": 0.49435531788472964, "grad_norm": 1.818760707827175, "learning_rate": 1.0667953574698461e-05, "loss": 0.2888, "step": 6240 }, { "epoch": 0.494434541493365, "grad_norm": 1.1949986080402983, "learning_rate": 1.0665393251728645e-05, "loss": 0.1809, "step": 6241 }, { "epoch": 0.4945137651020004, "grad_norm": 1.0965629093802949, "learning_rate": 1.0662832884945884e-05, "loss": 0.2424, "step": 6242 }, { "epoch": 0.4945929887106358, "grad_norm": 1.2472935835416856, "learning_rate": 1.0660272474518767e-05, "loss": 0.2297, "step": 6243 }, { "epoch": 0.49467221231927117, "grad_norm": 1.5464503337392086, "learning_rate": 1.0657712020615885e-05, "loss": 0.2786, "step": 6244 }, { "epoch": 0.4947514359279065, "grad_norm": 1.4992610845825791, "learning_rate": 1.0655151523405831e-05, "loss": 0.3562, "step": 6245 }, { "epoch": 0.4948306595365419, "grad_norm": 1.245430373819781, "learning_rate": 1.06525909830572e-05, "loss": 0.2233, "step": 6246 }, { "epoch": 0.49490988314517725, "grad_norm": 1.4250430094631756, "learning_rate": 1.0650030399738594e-05, "loss": 0.2508, "step": 6247 }, { "epoch": 0.49498910675381264, "grad_norm": 1.4143829630506732, "learning_rate": 1.0647469773618617e-05, "loss": 0.2001, "step": 6248 }, { "epoch": 0.495068330362448, "grad_norm": 1.3857901342762828, "learning_rate": 1.0644909104865869e-05, "loss": 0.2782, "step": 6249 }, { "epoch": 0.4951475539710834, "grad_norm": 1.616204388193114, "learning_rate": 1.0642348393648956e-05, "loss": 0.3346, "step": 6250 }, { "epoch": 0.4952267775797188, "grad_norm": 1.359732965277956, "learning_rate": 1.0639787640136497e-05, "loss": 0.2634, "step": 6251 }, { "epoch": 0.4953060011883541, "grad_norm": 1.4073442282474358, "learning_rate": 1.0637226844497096e-05, "loss": 0.2656, "step": 6252 }, { "epoch": 0.4953852247969895, "grad_norm": 1.435274302556513, "learning_rate": 1.0634666006899375e-05, "loss": 0.2603, "step": 6253 }, { "epoch": 0.49546444840562487, "grad_norm": 1.3454932940119206, "learning_rate": 1.0632105127511952e-05, "loss": 0.2368, "step": 6254 }, { "epoch": 0.49554367201426025, "grad_norm": 1.4679275124837228, "learning_rate": 1.0629544206503445e-05, "loss": 0.3462, "step": 6255 }, { "epoch": 0.49562289562289563, "grad_norm": 1.2267471935575582, "learning_rate": 1.0626983244042486e-05, "loss": 0.2273, "step": 6256 }, { "epoch": 0.495702119231531, "grad_norm": 1.541427155453795, "learning_rate": 1.0624422240297694e-05, "loss": 0.3725, "step": 6257 }, { "epoch": 0.4957813428401664, "grad_norm": 1.7333655301296014, "learning_rate": 1.0621861195437703e-05, "loss": 0.304, "step": 6258 }, { "epoch": 0.4958605664488017, "grad_norm": 1.3265310992758543, "learning_rate": 1.0619300109631146e-05, "loss": 0.2628, "step": 6259 }, { "epoch": 0.4959397900574371, "grad_norm": 1.6433740813037165, "learning_rate": 1.0616738983046652e-05, "loss": 0.3621, "step": 6260 }, { "epoch": 0.4960190136660725, "grad_norm": 1.5750802900338863, "learning_rate": 1.0614177815852866e-05, "loss": 0.3248, "step": 6261 }, { "epoch": 0.49609823727470787, "grad_norm": 1.5121960093776905, "learning_rate": 1.0611616608218429e-05, "loss": 0.3265, "step": 6262 }, { "epoch": 0.49617746088334325, "grad_norm": 1.7768865384174732, "learning_rate": 1.0609055360311978e-05, "loss": 0.3581, "step": 6263 }, { "epoch": 0.49625668449197863, "grad_norm": 1.2474729755742784, "learning_rate": 1.0606494072302164e-05, "loss": 0.2448, "step": 6264 }, { "epoch": 0.496335908100614, "grad_norm": 1.2985944960321019, "learning_rate": 1.0603932744357632e-05, "loss": 0.3004, "step": 6265 }, { "epoch": 0.49641513170924934, "grad_norm": 1.2504828116047193, "learning_rate": 1.0601371376647034e-05, "loss": 0.3334, "step": 6266 }, { "epoch": 0.4964943553178847, "grad_norm": 1.7673045482375467, "learning_rate": 1.0598809969339028e-05, "loss": 0.4251, "step": 6267 }, { "epoch": 0.4965735789265201, "grad_norm": 1.4300543141766955, "learning_rate": 1.0596248522602264e-05, "loss": 0.3234, "step": 6268 }, { "epoch": 0.4966528025351555, "grad_norm": 1.168006961050211, "learning_rate": 1.0593687036605402e-05, "loss": 0.2139, "step": 6269 }, { "epoch": 0.49673202614379086, "grad_norm": 1.3531644655552078, "learning_rate": 1.0591125511517108e-05, "loss": 0.2416, "step": 6270 }, { "epoch": 0.49681124975242624, "grad_norm": 1.4595553564355188, "learning_rate": 1.0588563947506043e-05, "loss": 0.3178, "step": 6271 }, { "epoch": 0.49689047336106157, "grad_norm": 1.2664669002686002, "learning_rate": 1.0586002344740875e-05, "loss": 0.2647, "step": 6272 }, { "epoch": 0.49696969696969695, "grad_norm": 1.544365897153881, "learning_rate": 1.0583440703390271e-05, "loss": 0.3532, "step": 6273 }, { "epoch": 0.49704892057833233, "grad_norm": 1.600690620995663, "learning_rate": 1.0580879023622903e-05, "loss": 0.2683, "step": 6274 }, { "epoch": 0.4971281441869677, "grad_norm": 1.2714748179550441, "learning_rate": 1.0578317305607451e-05, "loss": 0.2867, "step": 6275 }, { "epoch": 0.4972073677956031, "grad_norm": 1.4301270003448296, "learning_rate": 1.057575554951258e-05, "loss": 0.2947, "step": 6276 }, { "epoch": 0.4972865914042385, "grad_norm": 1.3371345246476254, "learning_rate": 1.0573193755506982e-05, "loss": 0.2499, "step": 6277 }, { "epoch": 0.49736581501287386, "grad_norm": 1.2749612202368028, "learning_rate": 1.0570631923759331e-05, "loss": 0.2905, "step": 6278 }, { "epoch": 0.4974450386215092, "grad_norm": 1.5034597442355828, "learning_rate": 1.0568070054438314e-05, "loss": 0.2907, "step": 6279 }, { "epoch": 0.49752426223014456, "grad_norm": 1.3295244689756287, "learning_rate": 1.0565508147712618e-05, "loss": 0.3003, "step": 6280 }, { "epoch": 0.49760348583877995, "grad_norm": 1.518507654758298, "learning_rate": 1.056294620375093e-05, "loss": 0.3952, "step": 6281 }, { "epoch": 0.49768270944741533, "grad_norm": 1.9016861623652315, "learning_rate": 1.0560384222721943e-05, "loss": 0.3065, "step": 6282 }, { "epoch": 0.4977619330560507, "grad_norm": 1.510930296790475, "learning_rate": 1.0557822204794353e-05, "loss": 0.3301, "step": 6283 }, { "epoch": 0.4978411566646861, "grad_norm": 1.2444624449144386, "learning_rate": 1.0555260150136852e-05, "loss": 0.2394, "step": 6284 }, { "epoch": 0.4979203802733215, "grad_norm": 1.2715390656264163, "learning_rate": 1.0552698058918146e-05, "loss": 0.1841, "step": 6285 }, { "epoch": 0.4979996038819568, "grad_norm": 1.527953083438856, "learning_rate": 1.055013593130693e-05, "loss": 0.2792, "step": 6286 }, { "epoch": 0.4980788274905922, "grad_norm": 1.5205960747348843, "learning_rate": 1.0547573767471913e-05, "loss": 0.278, "step": 6287 }, { "epoch": 0.49815805109922756, "grad_norm": 1.1759475177062586, "learning_rate": 1.0545011567581794e-05, "loss": 0.2069, "step": 6288 }, { "epoch": 0.49823727470786294, "grad_norm": 1.5542197874672103, "learning_rate": 1.0542449331805287e-05, "loss": 0.2755, "step": 6289 }, { "epoch": 0.4983164983164983, "grad_norm": 1.6735682879426799, "learning_rate": 1.05398870603111e-05, "loss": 0.3069, "step": 6290 }, { "epoch": 0.4983957219251337, "grad_norm": 1.1616981710431828, "learning_rate": 1.0537324753267952e-05, "loss": 0.2068, "step": 6291 }, { "epoch": 0.4984749455337691, "grad_norm": 1.3631482937227253, "learning_rate": 1.053476241084455e-05, "loss": 0.2849, "step": 6292 }, { "epoch": 0.4985541691424044, "grad_norm": 1.2351585466231974, "learning_rate": 1.0532200033209618e-05, "loss": 0.2237, "step": 6293 }, { "epoch": 0.4986333927510398, "grad_norm": 1.2898599967755477, "learning_rate": 1.0529637620531876e-05, "loss": 0.2411, "step": 6294 }, { "epoch": 0.4987126163596752, "grad_norm": 1.5160490050774054, "learning_rate": 1.0527075172980043e-05, "loss": 0.292, "step": 6295 }, { "epoch": 0.49879183996831056, "grad_norm": 1.664701938006669, "learning_rate": 1.0524512690722848e-05, "loss": 0.4192, "step": 6296 }, { "epoch": 0.49887106357694594, "grad_norm": 1.7257417530026737, "learning_rate": 1.0521950173929017e-05, "loss": 0.4208, "step": 6297 }, { "epoch": 0.4989502871855813, "grad_norm": 1.3613911180602976, "learning_rate": 1.0519387622767274e-05, "loss": 0.2872, "step": 6298 }, { "epoch": 0.4990295107942167, "grad_norm": 1.2873052402710918, "learning_rate": 1.051682503740636e-05, "loss": 0.2803, "step": 6299 }, { "epoch": 0.49910873440285203, "grad_norm": 1.4843627688896437, "learning_rate": 1.0514262418015e-05, "loss": 0.3811, "step": 6300 }, { "epoch": 0.4991879580114874, "grad_norm": 1.3470545590179122, "learning_rate": 1.0511699764761935e-05, "loss": 0.2975, "step": 6301 }, { "epoch": 0.4992671816201228, "grad_norm": 1.6528728456851278, "learning_rate": 1.0509137077815906e-05, "loss": 0.3831, "step": 6302 }, { "epoch": 0.4993464052287582, "grad_norm": 1.4427697117228835, "learning_rate": 1.0506574357345647e-05, "loss": 0.3384, "step": 6303 }, { "epoch": 0.49942562883739355, "grad_norm": 0.9572009528886708, "learning_rate": 1.0504011603519904e-05, "loss": 0.1484, "step": 6304 }, { "epoch": 0.49950485244602894, "grad_norm": 1.784759294359079, "learning_rate": 1.0501448816507425e-05, "loss": 0.2918, "step": 6305 }, { "epoch": 0.4995840760546643, "grad_norm": 1.315874661285899, "learning_rate": 1.0498885996476952e-05, "loss": 0.2748, "step": 6306 }, { "epoch": 0.49966329966329964, "grad_norm": 1.803346168407661, "learning_rate": 1.0496323143597237e-05, "loss": 0.3221, "step": 6307 }, { "epoch": 0.499742523271935, "grad_norm": 1.3579686590364919, "learning_rate": 1.049376025803703e-05, "loss": 0.2781, "step": 6308 }, { "epoch": 0.4998217468805704, "grad_norm": 1.3139149388195164, "learning_rate": 1.0491197339965087e-05, "loss": 0.2832, "step": 6309 }, { "epoch": 0.4999009704892058, "grad_norm": 1.2048091184539846, "learning_rate": 1.0488634389550166e-05, "loss": 0.2217, "step": 6310 }, { "epoch": 0.49998019409784117, "grad_norm": 1.3287662211919806, "learning_rate": 1.0486071406961017e-05, "loss": 0.238, "step": 6311 }, { "epoch": 0.5000594177064765, "grad_norm": 1.4266026307722997, "learning_rate": 1.0483508392366404e-05, "loss": 0.3412, "step": 6312 }, { "epoch": 0.5001386413151119, "grad_norm": 1.5612220547135882, "learning_rate": 1.0480945345935094e-05, "loss": 0.3407, "step": 6313 }, { "epoch": 0.5002178649237473, "grad_norm": 1.5972355706250965, "learning_rate": 1.0478382267835843e-05, "loss": 0.3087, "step": 6314 }, { "epoch": 0.5002970885323826, "grad_norm": 1.4969821150311746, "learning_rate": 1.0475819158237426e-05, "loss": 0.3352, "step": 6315 }, { "epoch": 0.5003763121410181, "grad_norm": 1.5942406862924126, "learning_rate": 1.0473256017308601e-05, "loss": 0.3134, "step": 6316 }, { "epoch": 0.5004555357496534, "grad_norm": 1.3503875305968052, "learning_rate": 1.047069284521815e-05, "loss": 0.264, "step": 6317 }, { "epoch": 0.5005347593582887, "grad_norm": 1.3902456508809122, "learning_rate": 1.0468129642134837e-05, "loss": 0.3021, "step": 6318 }, { "epoch": 0.5006139829669242, "grad_norm": 1.74825180207307, "learning_rate": 1.046556640822744e-05, "loss": 0.3069, "step": 6319 }, { "epoch": 0.5006932065755595, "grad_norm": 1.1880302661403606, "learning_rate": 1.0463003143664734e-05, "loss": 0.2653, "step": 6320 }, { "epoch": 0.5007724301841949, "grad_norm": 1.476139350780901, "learning_rate": 1.0460439848615502e-05, "loss": 0.2875, "step": 6321 }, { "epoch": 0.5008516537928303, "grad_norm": 1.5435811135936066, "learning_rate": 1.0457876523248518e-05, "loss": 0.1652, "step": 6322 }, { "epoch": 0.5009308774014657, "grad_norm": 1.4391808932957582, "learning_rate": 1.0455313167732573e-05, "loss": 0.3389, "step": 6323 }, { "epoch": 0.501010101010101, "grad_norm": 1.2931238020920421, "learning_rate": 1.0452749782236443e-05, "loss": 0.2588, "step": 6324 }, { "epoch": 0.5010893246187363, "grad_norm": 1.3202406215263744, "learning_rate": 1.0450186366928917e-05, "loss": 0.2223, "step": 6325 }, { "epoch": 0.5011685482273718, "grad_norm": 1.2655135659324483, "learning_rate": 1.044762292197879e-05, "loss": 0.2495, "step": 6326 }, { "epoch": 0.5012477718360071, "grad_norm": 1.4301511529458228, "learning_rate": 1.0445059447554844e-05, "loss": 0.3412, "step": 6327 }, { "epoch": 0.5013269954446425, "grad_norm": 1.4949349713764832, "learning_rate": 1.0442495943825874e-05, "loss": 0.3055, "step": 6328 }, { "epoch": 0.5014062190532779, "grad_norm": 1.287338881075157, "learning_rate": 1.0439932410960678e-05, "loss": 0.3185, "step": 6329 }, { "epoch": 0.5014854426619133, "grad_norm": 1.3154378514895597, "learning_rate": 1.0437368849128046e-05, "loss": 0.2072, "step": 6330 }, { "epoch": 0.5015646662705486, "grad_norm": 1.4571953480781072, "learning_rate": 1.043480525849678e-05, "loss": 0.2244, "step": 6331 }, { "epoch": 0.501643889879184, "grad_norm": 1.3076757025975771, "learning_rate": 1.0432241639235686e-05, "loss": 0.2991, "step": 6332 }, { "epoch": 0.5017231134878194, "grad_norm": 1.324348010807515, "learning_rate": 1.0429677991513554e-05, "loss": 0.2188, "step": 6333 }, { "epoch": 0.5018023370964547, "grad_norm": 1.3863302514469567, "learning_rate": 1.0427114315499196e-05, "loss": 0.3068, "step": 6334 }, { "epoch": 0.5018815607050902, "grad_norm": 1.8921203092021055, "learning_rate": 1.0424550611361412e-05, "loss": 0.3052, "step": 6335 }, { "epoch": 0.5019607843137255, "grad_norm": 1.490879549646653, "learning_rate": 1.0421986879269017e-05, "loss": 0.3738, "step": 6336 }, { "epoch": 0.5020400079223608, "grad_norm": 1.5907067481251906, "learning_rate": 1.0419423119390815e-05, "loss": 0.3113, "step": 6337 }, { "epoch": 0.5021192315309962, "grad_norm": 1.278589378102372, "learning_rate": 1.041685933189562e-05, "loss": 0.1976, "step": 6338 }, { "epoch": 0.5021984551396316, "grad_norm": 1.4468349983015867, "learning_rate": 1.041429551695224e-05, "loss": 0.2964, "step": 6339 }, { "epoch": 0.502277678748267, "grad_norm": 1.500715182540103, "learning_rate": 1.0411731674729497e-05, "loss": 0.2871, "step": 6340 }, { "epoch": 0.5023569023569023, "grad_norm": 1.0764066568969828, "learning_rate": 1.0409167805396202e-05, "loss": 0.1743, "step": 6341 }, { "epoch": 0.5024361259655378, "grad_norm": 1.4355219117678222, "learning_rate": 1.040660390912118e-05, "loss": 0.414, "step": 6342 }, { "epoch": 0.5025153495741731, "grad_norm": 1.564899334878142, "learning_rate": 1.0404039986073244e-05, "loss": 0.327, "step": 6343 }, { "epoch": 0.5025945731828084, "grad_norm": 1.5037304857000693, "learning_rate": 1.0401476036421219e-05, "loss": 0.3019, "step": 6344 }, { "epoch": 0.5026737967914439, "grad_norm": 1.3928120752202564, "learning_rate": 1.039891206033393e-05, "loss": 0.2134, "step": 6345 }, { "epoch": 0.5027530204000792, "grad_norm": 1.4469178046637121, "learning_rate": 1.0396348057980202e-05, "loss": 0.2972, "step": 6346 }, { "epoch": 0.5028322440087146, "grad_norm": 1.4359649333357054, "learning_rate": 1.0393784029528858e-05, "loss": 0.2773, "step": 6347 }, { "epoch": 0.50291146761735, "grad_norm": 1.4255707160412432, "learning_rate": 1.0391219975148734e-05, "loss": 0.3843, "step": 6348 }, { "epoch": 0.5029906912259854, "grad_norm": 1.3612589872962728, "learning_rate": 1.0388655895008654e-05, "loss": 0.2539, "step": 6349 }, { "epoch": 0.5030699148346207, "grad_norm": 1.6285956201381435, "learning_rate": 1.0386091789277458e-05, "loss": 0.3218, "step": 6350 }, { "epoch": 0.503149138443256, "grad_norm": 1.2423368370314876, "learning_rate": 1.038352765812397e-05, "loss": 0.3027, "step": 6351 }, { "epoch": 0.5032283620518915, "grad_norm": 1.2518129229801136, "learning_rate": 1.0380963501717034e-05, "loss": 0.3468, "step": 6352 }, { "epoch": 0.5033075856605268, "grad_norm": 1.459526927392641, "learning_rate": 1.0378399320225486e-05, "loss": 0.2918, "step": 6353 }, { "epoch": 0.5033868092691622, "grad_norm": 1.183591304700173, "learning_rate": 1.037583511381816e-05, "loss": 0.2244, "step": 6354 }, { "epoch": 0.5034660328777976, "grad_norm": 1.4313192692663943, "learning_rate": 1.0373270882663899e-05, "loss": 0.3969, "step": 6355 }, { "epoch": 0.503545256486433, "grad_norm": 1.5780992396789457, "learning_rate": 1.0370706626931553e-05, "loss": 0.3588, "step": 6356 }, { "epoch": 0.5036244800950683, "grad_norm": 1.1625966134437107, "learning_rate": 1.0368142346789954e-05, "loss": 0.2655, "step": 6357 }, { "epoch": 0.5037037037037037, "grad_norm": 1.437105815844461, "learning_rate": 1.0365578042407956e-05, "loss": 0.319, "step": 6358 }, { "epoch": 0.5037829273123391, "grad_norm": 1.2733907531753645, "learning_rate": 1.03630137139544e-05, "loss": 0.284, "step": 6359 }, { "epoch": 0.5038621509209744, "grad_norm": 1.435809079003575, "learning_rate": 1.0360449361598137e-05, "loss": 0.2839, "step": 6360 }, { "epoch": 0.5039413745296099, "grad_norm": 1.6249705527188003, "learning_rate": 1.0357884985508022e-05, "loss": 0.2991, "step": 6361 }, { "epoch": 0.5040205981382452, "grad_norm": 1.704940301500535, "learning_rate": 1.03553205858529e-05, "loss": 0.4343, "step": 6362 }, { "epoch": 0.5040998217468806, "grad_norm": 1.6877267088032806, "learning_rate": 1.0352756162801626e-05, "loss": 0.3925, "step": 6363 }, { "epoch": 0.5041790453555159, "grad_norm": 1.3248634560321138, "learning_rate": 1.035019171652306e-05, "loss": 0.2881, "step": 6364 }, { "epoch": 0.5042582689641513, "grad_norm": 1.1378056290900431, "learning_rate": 1.0347627247186053e-05, "loss": 0.221, "step": 6365 }, { "epoch": 0.5043374925727867, "grad_norm": 1.5673366086376939, "learning_rate": 1.0345062754959463e-05, "loss": 0.393, "step": 6366 }, { "epoch": 0.504416716181422, "grad_norm": 1.414995363440549, "learning_rate": 1.0342498240012153e-05, "loss": 0.301, "step": 6367 }, { "epoch": 0.5044959397900575, "grad_norm": 1.2063677710576597, "learning_rate": 1.0339933702512978e-05, "loss": 0.2292, "step": 6368 }, { "epoch": 0.5045751633986928, "grad_norm": 1.1707263613291337, "learning_rate": 1.0337369142630808e-05, "loss": 0.2108, "step": 6369 }, { "epoch": 0.5046543870073282, "grad_norm": 1.1985221189545539, "learning_rate": 1.0334804560534504e-05, "loss": 0.2547, "step": 6370 }, { "epoch": 0.5047336106159636, "grad_norm": 1.278989342246064, "learning_rate": 1.0332239956392926e-05, "loss": 0.2883, "step": 6371 }, { "epoch": 0.5048128342245989, "grad_norm": 1.4100396056115252, "learning_rate": 1.032967533037495e-05, "loss": 0.3149, "step": 6372 }, { "epoch": 0.5048920578332343, "grad_norm": 1.2774621508238386, "learning_rate": 1.0327110682649436e-05, "loss": 0.2699, "step": 6373 }, { "epoch": 0.5049712814418696, "grad_norm": 1.3135613067580887, "learning_rate": 1.0324546013385258e-05, "loss": 0.2478, "step": 6374 }, { "epoch": 0.5050505050505051, "grad_norm": 1.5670962180579926, "learning_rate": 1.0321981322751291e-05, "loss": 0.343, "step": 6375 }, { "epoch": 0.5051297286591404, "grad_norm": 1.5408071416404392, "learning_rate": 1.03194166109164e-05, "loss": 0.3523, "step": 6376 }, { "epoch": 0.5052089522677758, "grad_norm": 1.4833899120386194, "learning_rate": 1.0316851878049465e-05, "loss": 0.2376, "step": 6377 }, { "epoch": 0.5052881758764112, "grad_norm": 1.487260079556074, "learning_rate": 1.0314287124319353e-05, "loss": 0.3652, "step": 6378 }, { "epoch": 0.5053673994850465, "grad_norm": 1.3703523160461057, "learning_rate": 1.031172234989495e-05, "loss": 0.2771, "step": 6379 }, { "epoch": 0.5054466230936819, "grad_norm": 1.6207356903244037, "learning_rate": 1.030915755494513e-05, "loss": 0.4147, "step": 6380 }, { "epoch": 0.5055258467023173, "grad_norm": 1.5215774082448033, "learning_rate": 1.030659273963877e-05, "loss": 0.3735, "step": 6381 }, { "epoch": 0.5056050703109527, "grad_norm": 1.3647215287614936, "learning_rate": 1.0304027904144756e-05, "loss": 0.3171, "step": 6382 }, { "epoch": 0.505684293919588, "grad_norm": 1.5227745167813815, "learning_rate": 1.0301463048631968e-05, "loss": 0.3718, "step": 6383 }, { "epoch": 0.5057635175282235, "grad_norm": 1.2902648478382346, "learning_rate": 1.0298898173269285e-05, "loss": 0.2381, "step": 6384 }, { "epoch": 0.5058427411368588, "grad_norm": 1.364612823540458, "learning_rate": 1.0296333278225599e-05, "loss": 0.2405, "step": 6385 }, { "epoch": 0.5059219647454941, "grad_norm": 1.2743178880217614, "learning_rate": 1.0293768363669791e-05, "loss": 0.2172, "step": 6386 }, { "epoch": 0.5060011883541295, "grad_norm": 1.5114480877915564, "learning_rate": 1.0291203429770749e-05, "loss": 0.3032, "step": 6387 }, { "epoch": 0.5060804119627649, "grad_norm": 1.7953323070021674, "learning_rate": 1.0288638476697365e-05, "loss": 0.3065, "step": 6388 }, { "epoch": 0.5061596355714003, "grad_norm": 1.4854213913587258, "learning_rate": 1.0286073504618524e-05, "loss": 0.314, "step": 6389 }, { "epoch": 0.5062388591800356, "grad_norm": 1.28258545390892, "learning_rate": 1.0283508513703118e-05, "loss": 0.317, "step": 6390 }, { "epoch": 0.5063180827886711, "grad_norm": 1.664153732903344, "learning_rate": 1.0280943504120045e-05, "loss": 0.3049, "step": 6391 }, { "epoch": 0.5063973063973064, "grad_norm": 1.3287969335967227, "learning_rate": 1.027837847603819e-05, "loss": 0.2937, "step": 6392 }, { "epoch": 0.5064765300059417, "grad_norm": 1.3513776619080435, "learning_rate": 1.0275813429626456e-05, "loss": 0.2436, "step": 6393 }, { "epoch": 0.5065557536145772, "grad_norm": 1.5177674479376446, "learning_rate": 1.027324836505373e-05, "loss": 0.3859, "step": 6394 }, { "epoch": 0.5066349772232125, "grad_norm": 1.4038432760608448, "learning_rate": 1.0270683282488913e-05, "loss": 0.2824, "step": 6395 }, { "epoch": 0.5067142008318479, "grad_norm": 1.3341102595463405, "learning_rate": 1.026811818210091e-05, "loss": 0.1889, "step": 6396 }, { "epoch": 0.5067934244404833, "grad_norm": 1.1399310245436207, "learning_rate": 1.0265553064058612e-05, "loss": 0.1709, "step": 6397 }, { "epoch": 0.5068726480491187, "grad_norm": 1.1846882525796747, "learning_rate": 1.0262987928530921e-05, "loss": 0.2669, "step": 6398 }, { "epoch": 0.506951871657754, "grad_norm": 1.5292716481835442, "learning_rate": 1.0260422775686743e-05, "loss": 0.4058, "step": 6399 }, { "epoch": 0.5070310952663893, "grad_norm": 1.2776054425290138, "learning_rate": 1.0257857605694976e-05, "loss": 0.211, "step": 6400 }, { "epoch": 0.5071103188750248, "grad_norm": 1.3643267209202141, "learning_rate": 1.025529241872453e-05, "loss": 0.2445, "step": 6401 }, { "epoch": 0.5071895424836601, "grad_norm": 1.1899612143691012, "learning_rate": 1.0252727214944302e-05, "loss": 0.2256, "step": 6402 }, { "epoch": 0.5072687660922955, "grad_norm": 1.2377676761096175, "learning_rate": 1.0250161994523205e-05, "loss": 0.2539, "step": 6403 }, { "epoch": 0.5073479897009309, "grad_norm": 1.1867164070738405, "learning_rate": 1.0247596757630147e-05, "loss": 0.2179, "step": 6404 }, { "epoch": 0.5074272133095663, "grad_norm": 1.584171660603048, "learning_rate": 1.0245031504434032e-05, "loss": 0.2577, "step": 6405 }, { "epoch": 0.5075064369182016, "grad_norm": 1.687711130361364, "learning_rate": 1.024246623510377e-05, "loss": 0.3636, "step": 6406 }, { "epoch": 0.507585660526837, "grad_norm": 1.8105725693244594, "learning_rate": 1.0239900949808274e-05, "loss": 0.3856, "step": 6407 }, { "epoch": 0.5076648841354724, "grad_norm": 1.3081729232220904, "learning_rate": 1.0237335648716456e-05, "loss": 0.2592, "step": 6408 }, { "epoch": 0.5077441077441077, "grad_norm": 2.158287753452955, "learning_rate": 1.0234770331997224e-05, "loss": 0.3329, "step": 6409 }, { "epoch": 0.5078233313527432, "grad_norm": 1.2658435504708871, "learning_rate": 1.02322049998195e-05, "loss": 0.3249, "step": 6410 }, { "epoch": 0.5079025549613785, "grad_norm": 1.1497160756670872, "learning_rate": 1.022963965235219e-05, "loss": 0.2197, "step": 6411 }, { "epoch": 0.5079817785700138, "grad_norm": 2.030858136092228, "learning_rate": 1.0227074289764216e-05, "loss": 0.3183, "step": 6412 }, { "epoch": 0.5080610021786492, "grad_norm": 1.5608515501929952, "learning_rate": 1.0224508912224491e-05, "loss": 0.296, "step": 6413 }, { "epoch": 0.5081402257872846, "grad_norm": 1.4509740689687938, "learning_rate": 1.0221943519901935e-05, "loss": 0.2715, "step": 6414 }, { "epoch": 0.50821944939592, "grad_norm": 1.4275975461074069, "learning_rate": 1.0219378112965468e-05, "loss": 0.3238, "step": 6415 }, { "epoch": 0.5082986730045553, "grad_norm": 1.1466407148088318, "learning_rate": 1.0216812691584005e-05, "loss": 0.2091, "step": 6416 }, { "epoch": 0.5083778966131908, "grad_norm": 1.4011407964265867, "learning_rate": 1.021424725592647e-05, "loss": 0.2278, "step": 6417 }, { "epoch": 0.5084571202218261, "grad_norm": 1.415432571144129, "learning_rate": 1.0211681806161787e-05, "loss": 0.268, "step": 6418 }, { "epoch": 0.5085363438304614, "grad_norm": 1.5985332967556114, "learning_rate": 1.0209116342458872e-05, "loss": 0.3466, "step": 6419 }, { "epoch": 0.5086155674390969, "grad_norm": 2.9851529078817767, "learning_rate": 1.0206550864986656e-05, "loss": 0.2604, "step": 6420 }, { "epoch": 0.5086947910477322, "grad_norm": 1.3778038263067551, "learning_rate": 1.0203985373914056e-05, "loss": 0.3411, "step": 6421 }, { "epoch": 0.5087740146563676, "grad_norm": 1.5037392162532608, "learning_rate": 1.0201419869410001e-05, "loss": 0.312, "step": 6422 }, { "epoch": 0.508853238265003, "grad_norm": 1.5963366301949247, "learning_rate": 1.0198854351643416e-05, "loss": 0.2743, "step": 6423 }, { "epoch": 0.5089324618736384, "grad_norm": 1.7420541310395408, "learning_rate": 1.0196288820783232e-05, "loss": 0.3617, "step": 6424 }, { "epoch": 0.5090116854822737, "grad_norm": 1.3688084086079422, "learning_rate": 1.0193723276998371e-05, "loss": 0.2392, "step": 6425 }, { "epoch": 0.509090909090909, "grad_norm": 1.629014141711662, "learning_rate": 1.0191157720457765e-05, "loss": 0.3443, "step": 6426 }, { "epoch": 0.5091701326995445, "grad_norm": 1.2938522024794588, "learning_rate": 1.0188592151330343e-05, "loss": 0.2253, "step": 6427 }, { "epoch": 0.5092493563081798, "grad_norm": 1.4008045975761283, "learning_rate": 1.0186026569785037e-05, "loss": 0.2422, "step": 6428 }, { "epoch": 0.5093285799168152, "grad_norm": 1.5345817467949727, "learning_rate": 1.0183460975990773e-05, "loss": 0.422, "step": 6429 }, { "epoch": 0.5094078035254506, "grad_norm": 2.109083287768109, "learning_rate": 1.0180895370116488e-05, "loss": 0.2911, "step": 6430 }, { "epoch": 0.509487027134086, "grad_norm": 1.310915569201543, "learning_rate": 1.0178329752331116e-05, "loss": 0.2519, "step": 6431 }, { "epoch": 0.5095662507427213, "grad_norm": 1.3821282776105328, "learning_rate": 1.0175764122803584e-05, "loss": 0.2736, "step": 6432 }, { "epoch": 0.5096454743513567, "grad_norm": 1.181921339592671, "learning_rate": 1.017319848170283e-05, "loss": 0.2547, "step": 6433 }, { "epoch": 0.5097246979599921, "grad_norm": 1.4343224517362072, "learning_rate": 1.0170632829197792e-05, "loss": 0.3133, "step": 6434 }, { "epoch": 0.5098039215686274, "grad_norm": 1.2119076748344708, "learning_rate": 1.0168067165457403e-05, "loss": 0.2558, "step": 6435 }, { "epoch": 0.5098831451772629, "grad_norm": 1.4787283873775698, "learning_rate": 1.01655014906506e-05, "loss": 0.3147, "step": 6436 }, { "epoch": 0.5099623687858982, "grad_norm": 1.438045038420549, "learning_rate": 1.016293580494632e-05, "loss": 0.2881, "step": 6437 }, { "epoch": 0.5100415923945336, "grad_norm": 1.3427077703758772, "learning_rate": 1.0160370108513497e-05, "loss": 0.3295, "step": 6438 }, { "epoch": 0.5101208160031689, "grad_norm": 1.4109312458553513, "learning_rate": 1.015780440152108e-05, "loss": 0.2572, "step": 6439 }, { "epoch": 0.5102000396118043, "grad_norm": 1.3785757613042255, "learning_rate": 1.0155238684138e-05, "loss": 0.2714, "step": 6440 }, { "epoch": 0.5102792632204397, "grad_norm": 1.1241131454385036, "learning_rate": 1.0152672956533198e-05, "loss": 0.1971, "step": 6441 }, { "epoch": 0.510358486829075, "grad_norm": 1.3099059174885048, "learning_rate": 1.015010721887562e-05, "loss": 0.2611, "step": 6442 }, { "epoch": 0.5104377104377105, "grad_norm": 1.369051604179246, "learning_rate": 1.0147541471334204e-05, "loss": 0.2416, "step": 6443 }, { "epoch": 0.5105169340463458, "grad_norm": 1.1581257484655514, "learning_rate": 1.0144975714077889e-05, "loss": 0.2493, "step": 6444 }, { "epoch": 0.5105961576549812, "grad_norm": 1.6191615342743022, "learning_rate": 1.0142409947275621e-05, "loss": 0.3286, "step": 6445 }, { "epoch": 0.5106753812636166, "grad_norm": 1.6108592678209457, "learning_rate": 1.0139844171096345e-05, "loss": 0.3333, "step": 6446 }, { "epoch": 0.5107546048722519, "grad_norm": 1.51305686161588, "learning_rate": 1.0137278385709004e-05, "loss": 0.2652, "step": 6447 }, { "epoch": 0.5108338284808873, "grad_norm": 1.2447413467014472, "learning_rate": 1.0134712591282539e-05, "loss": 0.2715, "step": 6448 }, { "epoch": 0.5109130520895226, "grad_norm": 1.399517704221, "learning_rate": 1.0132146787985898e-05, "loss": 0.2783, "step": 6449 }, { "epoch": 0.5109922756981581, "grad_norm": 1.3441536692328138, "learning_rate": 1.0129580975988029e-05, "loss": 0.2627, "step": 6450 }, { "epoch": 0.5110714993067934, "grad_norm": 1.3401255907442646, "learning_rate": 1.0127015155457875e-05, "loss": 0.2593, "step": 6451 }, { "epoch": 0.5111507229154288, "grad_norm": 1.6690276283876362, "learning_rate": 1.0124449326564383e-05, "loss": 0.2879, "step": 6452 }, { "epoch": 0.5112299465240642, "grad_norm": 1.319519900289313, "learning_rate": 1.0121883489476505e-05, "loss": 0.2709, "step": 6453 }, { "epoch": 0.5113091701326995, "grad_norm": 1.5736805615527079, "learning_rate": 1.0119317644363182e-05, "loss": 0.3142, "step": 6454 }, { "epoch": 0.5113883937413349, "grad_norm": 1.3801318979259982, "learning_rate": 1.0116751791393371e-05, "loss": 0.2494, "step": 6455 }, { "epoch": 0.5114676173499703, "grad_norm": 1.815866072065313, "learning_rate": 1.011418593073601e-05, "loss": 0.3711, "step": 6456 }, { "epoch": 0.5115468409586057, "grad_norm": 1.3481706241405615, "learning_rate": 1.0111620062560059e-05, "loss": 0.2271, "step": 6457 }, { "epoch": 0.511626064567241, "grad_norm": 1.3327789776784724, "learning_rate": 1.0109054187034463e-05, "loss": 0.3119, "step": 6458 }, { "epoch": 0.5117052881758765, "grad_norm": 1.4790039403829909, "learning_rate": 1.0106488304328175e-05, "loss": 0.2823, "step": 6459 }, { "epoch": 0.5117845117845118, "grad_norm": 1.3280615405659113, "learning_rate": 1.010392241461014e-05, "loss": 0.2116, "step": 6460 }, { "epoch": 0.5118637353931471, "grad_norm": 1.7079129517671097, "learning_rate": 1.010135651804932e-05, "loss": 0.3061, "step": 6461 }, { "epoch": 0.5119429590017825, "grad_norm": 1.401544507139228, "learning_rate": 1.0098790614814658e-05, "loss": 0.3032, "step": 6462 }, { "epoch": 0.5120221826104179, "grad_norm": 1.4997603792331273, "learning_rate": 1.009622470507511e-05, "loss": 0.3821, "step": 6463 }, { "epoch": 0.5121014062190533, "grad_norm": 1.3993050920884416, "learning_rate": 1.0093658788999628e-05, "loss": 0.335, "step": 6464 }, { "epoch": 0.5121806298276886, "grad_norm": 1.5181080898013404, "learning_rate": 1.0091092866757164e-05, "loss": 0.2908, "step": 6465 }, { "epoch": 0.5122598534363241, "grad_norm": 1.1064918374837822, "learning_rate": 1.0088526938516676e-05, "loss": 0.2043, "step": 6466 }, { "epoch": 0.5123390770449594, "grad_norm": 1.1616319607176038, "learning_rate": 1.0085961004447114e-05, "loss": 0.2695, "step": 6467 }, { "epoch": 0.5124183006535947, "grad_norm": 1.4197303822570853, "learning_rate": 1.0083395064717429e-05, "loss": 0.2873, "step": 6468 }, { "epoch": 0.5124975242622302, "grad_norm": 1.1476501638597234, "learning_rate": 1.0080829119496587e-05, "loss": 0.2547, "step": 6469 }, { "epoch": 0.5125767478708655, "grad_norm": 1.3036752913469873, "learning_rate": 1.0078263168953532e-05, "loss": 0.2903, "step": 6470 }, { "epoch": 0.5126559714795009, "grad_norm": 1.3727559772529956, "learning_rate": 1.0075697213257227e-05, "loss": 0.2488, "step": 6471 }, { "epoch": 0.5127351950881363, "grad_norm": 1.3520458464295788, "learning_rate": 1.0073131252576622e-05, "loss": 0.2675, "step": 6472 }, { "epoch": 0.5128144186967717, "grad_norm": 1.3819109703755623, "learning_rate": 1.0070565287080676e-05, "loss": 0.3302, "step": 6473 }, { "epoch": 0.512893642305407, "grad_norm": 1.4470920899326982, "learning_rate": 1.0067999316938348e-05, "loss": 0.4019, "step": 6474 }, { "epoch": 0.5129728659140423, "grad_norm": 1.4741553189409777, "learning_rate": 1.006543334231859e-05, "loss": 0.3144, "step": 6475 }, { "epoch": 0.5130520895226778, "grad_norm": 1.583537386558902, "learning_rate": 1.0062867363390361e-05, "loss": 0.2579, "step": 6476 }, { "epoch": 0.5131313131313131, "grad_norm": 1.4580070357913764, "learning_rate": 1.0060301380322622e-05, "loss": 0.3546, "step": 6477 }, { "epoch": 0.5132105367399485, "grad_norm": 1.5032710064645964, "learning_rate": 1.0057735393284322e-05, "loss": 0.2987, "step": 6478 }, { "epoch": 0.5132897603485839, "grad_norm": 1.7188084790954914, "learning_rate": 1.0055169402444429e-05, "loss": 0.373, "step": 6479 }, { "epoch": 0.5133689839572193, "grad_norm": 1.4069100480277812, "learning_rate": 1.0052603407971892e-05, "loss": 0.2905, "step": 6480 }, { "epoch": 0.5134482075658546, "grad_norm": 1.2741138455825607, "learning_rate": 1.0050037410035676e-05, "loss": 0.2403, "step": 6481 }, { "epoch": 0.51352743117449, "grad_norm": 1.5121683741613499, "learning_rate": 1.004747140880474e-05, "loss": 0.325, "step": 6482 }, { "epoch": 0.5136066547831254, "grad_norm": 1.5632399529557859, "learning_rate": 1.0044905404448037e-05, "loss": 0.3244, "step": 6483 }, { "epoch": 0.5136858783917607, "grad_norm": 1.2694155225637005, "learning_rate": 1.0042339397134528e-05, "loss": 0.2687, "step": 6484 }, { "epoch": 0.5137651020003962, "grad_norm": 1.229779412965252, "learning_rate": 1.0039773387033178e-05, "loss": 0.2719, "step": 6485 }, { "epoch": 0.5138443256090315, "grad_norm": 1.4716763089929525, "learning_rate": 1.0037207374312936e-05, "loss": 0.2882, "step": 6486 }, { "epoch": 0.5139235492176669, "grad_norm": 1.7992606118391934, "learning_rate": 1.003464135914277e-05, "loss": 0.3414, "step": 6487 }, { "epoch": 0.5140027728263022, "grad_norm": 1.5190185780278507, "learning_rate": 1.0032075341691639e-05, "loss": 0.3217, "step": 6488 }, { "epoch": 0.5140819964349376, "grad_norm": 1.0155358847355467, "learning_rate": 1.0029509322128499e-05, "loss": 0.2007, "step": 6489 }, { "epoch": 0.514161220043573, "grad_norm": 1.322341655238348, "learning_rate": 1.0026943300622313e-05, "loss": 0.2863, "step": 6490 }, { "epoch": 0.5142404436522083, "grad_norm": 1.109876830347495, "learning_rate": 1.0024377277342038e-05, "loss": 0.2151, "step": 6491 }, { "epoch": 0.5143196672608438, "grad_norm": 1.609858442176041, "learning_rate": 1.002181125245664e-05, "loss": 0.3997, "step": 6492 }, { "epoch": 0.5143988908694791, "grad_norm": 1.6931970810371015, "learning_rate": 1.0019245226135075e-05, "loss": 0.4202, "step": 6493 }, { "epoch": 0.5144781144781144, "grad_norm": 1.4407113823007818, "learning_rate": 1.0016679198546304e-05, "loss": 0.3357, "step": 6494 }, { "epoch": 0.5145573380867499, "grad_norm": 1.3950102431679114, "learning_rate": 1.0014113169859285e-05, "loss": 0.3026, "step": 6495 }, { "epoch": 0.5146365616953852, "grad_norm": 1.4614967907400644, "learning_rate": 1.0011547140242987e-05, "loss": 0.2687, "step": 6496 }, { "epoch": 0.5147157853040206, "grad_norm": 1.3616784523328638, "learning_rate": 1.0008981109866363e-05, "loss": 0.2079, "step": 6497 }, { "epoch": 0.514795008912656, "grad_norm": 1.0006941442171828, "learning_rate": 1.0006415078898377e-05, "loss": 0.1817, "step": 6498 }, { "epoch": 0.5148742325212914, "grad_norm": 1.3198203608938563, "learning_rate": 1.0003849047507987e-05, "loss": 0.2822, "step": 6499 }, { "epoch": 0.5149534561299267, "grad_norm": 1.245552027001811, "learning_rate": 1.0001283015864157e-05, "loss": 0.2904, "step": 6500 }, { "epoch": 0.515032679738562, "grad_norm": 1.6016362034165224, "learning_rate": 9.998716984135847e-06, "loss": 0.4507, "step": 6501 }, { "epoch": 0.5151119033471975, "grad_norm": 1.4845955851877697, "learning_rate": 9.996150952492018e-06, "loss": 0.3836, "step": 6502 }, { "epoch": 0.5151911269558328, "grad_norm": 1.338854729799261, "learning_rate": 9.993584921101628e-06, "loss": 0.3241, "step": 6503 }, { "epoch": 0.5152703505644682, "grad_norm": 1.2501295576155476, "learning_rate": 9.991018890133642e-06, "loss": 0.2338, "step": 6504 }, { "epoch": 0.5153495741731036, "grad_norm": 1.367821724046173, "learning_rate": 9.988452859757017e-06, "loss": 0.3418, "step": 6505 }, { "epoch": 0.515428797781739, "grad_norm": 1.1045534234665813, "learning_rate": 9.985886830140717e-06, "loss": 0.1483, "step": 6506 }, { "epoch": 0.5155080213903743, "grad_norm": 1.6183770616315782, "learning_rate": 9.983320801453702e-06, "loss": 0.2698, "step": 6507 }, { "epoch": 0.5155872449990097, "grad_norm": 1.432076627891755, "learning_rate": 9.98075477386493e-06, "loss": 0.2755, "step": 6508 }, { "epoch": 0.5156664686076451, "grad_norm": 1.3340431764865326, "learning_rate": 9.978188747543364e-06, "loss": 0.2588, "step": 6509 }, { "epoch": 0.5157456922162804, "grad_norm": 1.581196240868058, "learning_rate": 9.975622722657965e-06, "loss": 0.3198, "step": 6510 }, { "epoch": 0.5158249158249159, "grad_norm": 1.2679945471071485, "learning_rate": 9.973056699377692e-06, "loss": 0.2705, "step": 6511 }, { "epoch": 0.5159041394335512, "grad_norm": 1.2261073996585534, "learning_rate": 9.970490677871506e-06, "loss": 0.2446, "step": 6512 }, { "epoch": 0.5159833630421866, "grad_norm": 1.891342414662232, "learning_rate": 9.967924658308366e-06, "loss": 0.3654, "step": 6513 }, { "epoch": 0.5160625866508219, "grad_norm": 1.2628602047310538, "learning_rate": 9.965358640857231e-06, "loss": 0.1947, "step": 6514 }, { "epoch": 0.5161418102594573, "grad_norm": 1.4322366128247757, "learning_rate": 9.962792625687067e-06, "loss": 0.3222, "step": 6515 }, { "epoch": 0.5162210338680927, "grad_norm": 1.336805850325114, "learning_rate": 9.960226612966828e-06, "loss": 0.3178, "step": 6516 }, { "epoch": 0.516300257476728, "grad_norm": 1.3832001781071752, "learning_rate": 9.957660602865477e-06, "loss": 0.2969, "step": 6517 }, { "epoch": 0.5163794810853635, "grad_norm": 1.2823249710214122, "learning_rate": 9.955094595551968e-06, "loss": 0.2846, "step": 6518 }, { "epoch": 0.5164587046939988, "grad_norm": 1.4275951606358972, "learning_rate": 9.952528591195265e-06, "loss": 0.237, "step": 6519 }, { "epoch": 0.5165379283026342, "grad_norm": 1.2665494870212624, "learning_rate": 9.949962589964327e-06, "loss": 0.265, "step": 6520 }, { "epoch": 0.5166171519112696, "grad_norm": 1.427985504438624, "learning_rate": 9.94739659202811e-06, "loss": 0.2594, "step": 6521 }, { "epoch": 0.5166963755199049, "grad_norm": 1.3770262666103272, "learning_rate": 9.944830597555573e-06, "loss": 0.2566, "step": 6522 }, { "epoch": 0.5167755991285403, "grad_norm": 1.1104599415081753, "learning_rate": 9.94226460671568e-06, "loss": 0.1907, "step": 6523 }, { "epoch": 0.5168548227371756, "grad_norm": 1.4519819827409062, "learning_rate": 9.939698619677383e-06, "loss": 0.3019, "step": 6524 }, { "epoch": 0.5169340463458111, "grad_norm": 1.2591534360748693, "learning_rate": 9.937132636609642e-06, "loss": 0.2439, "step": 6525 }, { "epoch": 0.5170132699544464, "grad_norm": 1.3530875061296936, "learning_rate": 9.934566657681412e-06, "loss": 0.2697, "step": 6526 }, { "epoch": 0.5170924935630818, "grad_norm": 1.2949836348716584, "learning_rate": 9.932000683061654e-06, "loss": 0.336, "step": 6527 }, { "epoch": 0.5171717171717172, "grad_norm": 1.5396352721555138, "learning_rate": 9.929434712919327e-06, "loss": 0.3644, "step": 6528 }, { "epoch": 0.5172509407803525, "grad_norm": 1.4718538655183226, "learning_rate": 9.926868747423381e-06, "loss": 0.3595, "step": 6529 }, { "epoch": 0.5173301643889879, "grad_norm": 1.3030805697611099, "learning_rate": 9.924302786742775e-06, "loss": 0.2359, "step": 6530 }, { "epoch": 0.5174093879976233, "grad_norm": 1.6778384483392856, "learning_rate": 9.92173683104647e-06, "loss": 0.333, "step": 6531 }, { "epoch": 0.5174886116062587, "grad_norm": 1.2860284615745243, "learning_rate": 9.919170880503416e-06, "loss": 0.1844, "step": 6532 }, { "epoch": 0.517567835214894, "grad_norm": 1.3396535564695895, "learning_rate": 9.916604935282573e-06, "loss": 0.3616, "step": 6533 }, { "epoch": 0.5176470588235295, "grad_norm": 1.5839666192931645, "learning_rate": 9.914038995552891e-06, "loss": 0.3028, "step": 6534 }, { "epoch": 0.5177262824321648, "grad_norm": 1.2158117892305256, "learning_rate": 9.911473061483326e-06, "loss": 0.2278, "step": 6535 }, { "epoch": 0.5178055060408001, "grad_norm": 1.1860243331133984, "learning_rate": 9.908907133242838e-06, "loss": 0.217, "step": 6536 }, { "epoch": 0.5178847296494355, "grad_norm": 1.571307756562921, "learning_rate": 9.906341211000375e-06, "loss": 0.3241, "step": 6537 }, { "epoch": 0.5179639532580709, "grad_norm": 1.352736758711321, "learning_rate": 9.903775294924892e-06, "loss": 0.2471, "step": 6538 }, { "epoch": 0.5180431768667063, "grad_norm": 1.1912345151225152, "learning_rate": 9.901209385185345e-06, "loss": 0.2186, "step": 6539 }, { "epoch": 0.5181224004753416, "grad_norm": 1.6722376099949459, "learning_rate": 9.898643481950683e-06, "loss": 0.3487, "step": 6540 }, { "epoch": 0.5182016240839771, "grad_norm": 1.34099016289351, "learning_rate": 9.89607758538986e-06, "loss": 0.273, "step": 6541 }, { "epoch": 0.5182808476926124, "grad_norm": 1.337450027613429, "learning_rate": 9.893511695671828e-06, "loss": 0.2893, "step": 6542 }, { "epoch": 0.5183600713012477, "grad_norm": 1.3192789259086608, "learning_rate": 9.890945812965538e-06, "loss": 0.2486, "step": 6543 }, { "epoch": 0.5184392949098832, "grad_norm": 1.5135675585355135, "learning_rate": 9.888379937439944e-06, "loss": 0.2595, "step": 6544 }, { "epoch": 0.5185185185185185, "grad_norm": 1.1337436770634528, "learning_rate": 9.885814069263991e-06, "loss": 0.1995, "step": 6545 }, { "epoch": 0.5185977421271539, "grad_norm": 1.5365587138434578, "learning_rate": 9.883248208606632e-06, "loss": 0.3141, "step": 6546 }, { "epoch": 0.5186769657357893, "grad_norm": 1.422701746607019, "learning_rate": 9.880682355636821e-06, "loss": 0.2765, "step": 6547 }, { "epoch": 0.5187561893444247, "grad_norm": 1.529237671876428, "learning_rate": 9.878116510523498e-06, "loss": 0.3059, "step": 6548 }, { "epoch": 0.51883541295306, "grad_norm": 1.2404879896734196, "learning_rate": 9.87555067343562e-06, "loss": 0.2403, "step": 6549 }, { "epoch": 0.5189146365616953, "grad_norm": 1.6405916649431471, "learning_rate": 9.872984844542128e-06, "loss": 0.2724, "step": 6550 }, { "epoch": 0.5189938601703308, "grad_norm": 1.6431578600880408, "learning_rate": 9.870419024011973e-06, "loss": 0.3528, "step": 6551 }, { "epoch": 0.5190730837789661, "grad_norm": 1.4649941004090359, "learning_rate": 9.867853212014104e-06, "loss": 0.2248, "step": 6552 }, { "epoch": 0.5191523073876015, "grad_norm": 1.4467380048203815, "learning_rate": 9.865287408717464e-06, "loss": 0.2586, "step": 6553 }, { "epoch": 0.5192315309962369, "grad_norm": 1.3687632080923724, "learning_rate": 9.862721614291e-06, "loss": 0.2637, "step": 6554 }, { "epoch": 0.5193107546048723, "grad_norm": 1.5348495927913135, "learning_rate": 9.860155828903658e-06, "loss": 0.282, "step": 6555 }, { "epoch": 0.5193899782135076, "grad_norm": 1.3104011170070438, "learning_rate": 9.85759005272438e-06, "loss": 0.2535, "step": 6556 }, { "epoch": 0.519469201822143, "grad_norm": 1.3679464498838536, "learning_rate": 9.855024285922114e-06, "loss": 0.3364, "step": 6557 }, { "epoch": 0.5195484254307784, "grad_norm": 1.3888039704372102, "learning_rate": 9.8524585286658e-06, "loss": 0.3388, "step": 6558 }, { "epoch": 0.5196276490394137, "grad_norm": 1.2903769663712974, "learning_rate": 9.84989278112438e-06, "loss": 0.2027, "step": 6559 }, { "epoch": 0.5197068726480492, "grad_norm": 1.5477846072334485, "learning_rate": 9.847327043466802e-06, "loss": 0.2101, "step": 6560 }, { "epoch": 0.5197860962566845, "grad_norm": 1.6518751885131344, "learning_rate": 9.844761315862002e-06, "loss": 0.2742, "step": 6561 }, { "epoch": 0.5198653198653199, "grad_norm": 1.4493261938639335, "learning_rate": 9.842195598478922e-06, "loss": 0.2671, "step": 6562 }, { "epoch": 0.5199445434739552, "grad_norm": 1.3792945002120043, "learning_rate": 9.839629891486503e-06, "loss": 0.226, "step": 6563 }, { "epoch": 0.5200237670825906, "grad_norm": 1.2222546090097701, "learning_rate": 9.83706419505368e-06, "loss": 0.306, "step": 6564 }, { "epoch": 0.520102990691226, "grad_norm": 1.4426633279180927, "learning_rate": 9.834498509349402e-06, "loss": 0.2855, "step": 6565 }, { "epoch": 0.5201822142998613, "grad_norm": 1.209579870158141, "learning_rate": 9.831932834542598e-06, "loss": 0.3127, "step": 6566 }, { "epoch": 0.5202614379084968, "grad_norm": 1.8676181339001057, "learning_rate": 9.829367170802208e-06, "loss": 0.3819, "step": 6567 }, { "epoch": 0.5203406615171321, "grad_norm": 1.3864076170216155, "learning_rate": 9.82680151829717e-06, "loss": 0.3071, "step": 6568 }, { "epoch": 0.5204198851257675, "grad_norm": 1.651451752149105, "learning_rate": 9.824235877196418e-06, "loss": 0.2857, "step": 6569 }, { "epoch": 0.5204991087344029, "grad_norm": 1.2776725528422195, "learning_rate": 9.821670247668887e-06, "loss": 0.282, "step": 6570 }, { "epoch": 0.5205783323430382, "grad_norm": 1.318574722385798, "learning_rate": 9.819104629883513e-06, "loss": 0.2155, "step": 6571 }, { "epoch": 0.5206575559516736, "grad_norm": 1.312429714055526, "learning_rate": 9.816539024009227e-06, "loss": 0.3126, "step": 6572 }, { "epoch": 0.520736779560309, "grad_norm": 1.4528507004088553, "learning_rate": 9.813973430214965e-06, "loss": 0.2408, "step": 6573 }, { "epoch": 0.5208160031689444, "grad_norm": 1.5859479582591554, "learning_rate": 9.811407848669657e-06, "loss": 0.4102, "step": 6574 }, { "epoch": 0.5208952267775797, "grad_norm": 1.5629767648893005, "learning_rate": 9.808842279542235e-06, "loss": 0.2933, "step": 6575 }, { "epoch": 0.520974450386215, "grad_norm": 1.2234931913568277, "learning_rate": 9.80627672300163e-06, "loss": 0.2267, "step": 6576 }, { "epoch": 0.5210536739948505, "grad_norm": 1.5754881580614246, "learning_rate": 9.80371117921677e-06, "loss": 0.3487, "step": 6577 }, { "epoch": 0.5211328976034858, "grad_norm": 1.130792574641297, "learning_rate": 9.801145648356585e-06, "loss": 0.2175, "step": 6578 }, { "epoch": 0.5212121212121212, "grad_norm": 1.2418300256654666, "learning_rate": 9.798580130590004e-06, "loss": 0.2637, "step": 6579 }, { "epoch": 0.5212913448207566, "grad_norm": 1.5691711928683074, "learning_rate": 9.79601462608595e-06, "loss": 0.3014, "step": 6580 }, { "epoch": 0.521370568429392, "grad_norm": 1.374551434834054, "learning_rate": 9.79344913501335e-06, "loss": 0.3059, "step": 6581 }, { "epoch": 0.5214497920380273, "grad_norm": 1.3503790526494002, "learning_rate": 9.790883657541133e-06, "loss": 0.2988, "step": 6582 }, { "epoch": 0.5215290156466627, "grad_norm": 1.7853621542948634, "learning_rate": 9.788318193838218e-06, "loss": 0.3092, "step": 6583 }, { "epoch": 0.5216082392552981, "grad_norm": 1.1868051561832857, "learning_rate": 9.785752744073534e-06, "loss": 0.1813, "step": 6584 }, { "epoch": 0.5216874628639334, "grad_norm": 1.7817175877858948, "learning_rate": 9.783187308416e-06, "loss": 0.4138, "step": 6585 }, { "epoch": 0.5217666864725689, "grad_norm": 1.2703527499319964, "learning_rate": 9.780621887034537e-06, "loss": 0.2491, "step": 6586 }, { "epoch": 0.5218459100812042, "grad_norm": 1.3236493636259552, "learning_rate": 9.778056480098068e-06, "loss": 0.2776, "step": 6587 }, { "epoch": 0.5219251336898396, "grad_norm": 1.3979214929633739, "learning_rate": 9.775491087775514e-06, "loss": 0.3045, "step": 6588 }, { "epoch": 0.5220043572984749, "grad_norm": 1.405423445149963, "learning_rate": 9.772925710235789e-06, "loss": 0.306, "step": 6589 }, { "epoch": 0.5220835809071103, "grad_norm": 1.681826081372409, "learning_rate": 9.770360347647817e-06, "loss": 0.3646, "step": 6590 }, { "epoch": 0.5221628045157457, "grad_norm": 1.098210308335219, "learning_rate": 9.767795000180507e-06, "loss": 0.1607, "step": 6591 }, { "epoch": 0.522242028124381, "grad_norm": 1.4567167802857144, "learning_rate": 9.76522966800278e-06, "loss": 0.2895, "step": 6592 }, { "epoch": 0.5223212517330165, "grad_norm": 1.7222077801597855, "learning_rate": 9.76266435128355e-06, "loss": 0.4386, "step": 6593 }, { "epoch": 0.5224004753416518, "grad_norm": 1.4542394656245397, "learning_rate": 9.76009905019173e-06, "loss": 0.3062, "step": 6594 }, { "epoch": 0.5224796989502872, "grad_norm": 1.3319879514273967, "learning_rate": 9.757533764896235e-06, "loss": 0.234, "step": 6595 }, { "epoch": 0.5225589225589226, "grad_norm": 1.102652204594049, "learning_rate": 9.754968495565973e-06, "loss": 0.2139, "step": 6596 }, { "epoch": 0.5226381461675579, "grad_norm": 1.5008788814630107, "learning_rate": 9.752403242369857e-06, "loss": 0.2938, "step": 6597 }, { "epoch": 0.5227173697761933, "grad_norm": 1.3042299229602294, "learning_rate": 9.749838005476798e-06, "loss": 0.2318, "step": 6598 }, { "epoch": 0.5227965933848286, "grad_norm": 1.4384754707650653, "learning_rate": 9.7472727850557e-06, "loss": 0.2833, "step": 6599 }, { "epoch": 0.5228758169934641, "grad_norm": 1.5659159084691918, "learning_rate": 9.744707581275473e-06, "loss": 0.3028, "step": 6600 }, { "epoch": 0.5229550406020994, "grad_norm": 1.499965713046398, "learning_rate": 9.742142394305026e-06, "loss": 0.3074, "step": 6601 }, { "epoch": 0.5230342642107348, "grad_norm": 1.5575060580220301, "learning_rate": 9.739577224313258e-06, "loss": 0.2783, "step": 6602 }, { "epoch": 0.5231134878193702, "grad_norm": 1.1197961427045093, "learning_rate": 9.737012071469082e-06, "loss": 0.2314, "step": 6603 }, { "epoch": 0.5231927114280055, "grad_norm": 1.2926415876245017, "learning_rate": 9.734446935941392e-06, "loss": 0.2241, "step": 6604 }, { "epoch": 0.5232719350366409, "grad_norm": 1.2255991018938845, "learning_rate": 9.731881817899092e-06, "loss": 0.2239, "step": 6605 }, { "epoch": 0.5233511586452763, "grad_norm": 1.3570637346160763, "learning_rate": 9.729316717511088e-06, "loss": 0.2831, "step": 6606 }, { "epoch": 0.5234303822539117, "grad_norm": 1.6452875735783177, "learning_rate": 9.726751634946272e-06, "loss": 0.2834, "step": 6607 }, { "epoch": 0.523509605862547, "grad_norm": 1.4815357622229162, "learning_rate": 9.724186570373548e-06, "loss": 0.3417, "step": 6608 }, { "epoch": 0.5235888294711825, "grad_norm": 1.1741083309154061, "learning_rate": 9.721621523961812e-06, "loss": 0.2388, "step": 6609 }, { "epoch": 0.5236680530798178, "grad_norm": 1.364266079115737, "learning_rate": 9.719056495879958e-06, "loss": 0.2487, "step": 6610 }, { "epoch": 0.5237472766884531, "grad_norm": 1.4003965569316312, "learning_rate": 9.716491486296883e-06, "loss": 0.3278, "step": 6611 }, { "epoch": 0.5238265002970885, "grad_norm": 1.2783877658279088, "learning_rate": 9.71392649538148e-06, "loss": 0.2696, "step": 6612 }, { "epoch": 0.5239057239057239, "grad_norm": 1.4169665245215126, "learning_rate": 9.711361523302638e-06, "loss": 0.2434, "step": 6613 }, { "epoch": 0.5239849475143593, "grad_norm": 1.300044334645034, "learning_rate": 9.708796570229253e-06, "loss": 0.2377, "step": 6614 }, { "epoch": 0.5240641711229946, "grad_norm": 1.0962981986510534, "learning_rate": 9.706231636330212e-06, "loss": 0.2179, "step": 6615 }, { "epoch": 0.5241433947316301, "grad_norm": 1.4706952055186984, "learning_rate": 9.703666721774403e-06, "loss": 0.2953, "step": 6616 }, { "epoch": 0.5242226183402654, "grad_norm": 1.2421938929607341, "learning_rate": 9.701101826730718e-06, "loss": 0.2397, "step": 6617 }, { "epoch": 0.5243018419489007, "grad_norm": 1.3832978359905337, "learning_rate": 9.698536951368035e-06, "loss": 0.283, "step": 6618 }, { "epoch": 0.5243810655575362, "grad_norm": 1.1344165727794133, "learning_rate": 9.695972095855248e-06, "loss": 0.2076, "step": 6619 }, { "epoch": 0.5244602891661715, "grad_norm": 1.5036956250666393, "learning_rate": 9.693407260361231e-06, "loss": 0.2379, "step": 6620 }, { "epoch": 0.5245395127748069, "grad_norm": 1.55834823894501, "learning_rate": 9.690842445054873e-06, "loss": 0.3146, "step": 6621 }, { "epoch": 0.5246187363834423, "grad_norm": 1.4085930128086739, "learning_rate": 9.688277650105053e-06, "loss": 0.3089, "step": 6622 }, { "epoch": 0.5246979599920777, "grad_norm": 1.3070274339398167, "learning_rate": 9.685712875680649e-06, "loss": 0.2517, "step": 6623 }, { "epoch": 0.524777183600713, "grad_norm": 1.4591815620715252, "learning_rate": 9.683148121950539e-06, "loss": 0.2746, "step": 6624 }, { "epoch": 0.5248564072093483, "grad_norm": 1.0482277801340272, "learning_rate": 9.680583389083602e-06, "loss": 0.2525, "step": 6625 }, { "epoch": 0.5249356308179838, "grad_norm": 1.4777370681230158, "learning_rate": 9.67801867724871e-06, "loss": 0.3394, "step": 6626 }, { "epoch": 0.5250148544266191, "grad_norm": 1.276657979632384, "learning_rate": 9.675453986614743e-06, "loss": 0.2875, "step": 6627 }, { "epoch": 0.5250940780352545, "grad_norm": 1.36954365882781, "learning_rate": 9.672889317350565e-06, "loss": 0.2801, "step": 6628 }, { "epoch": 0.5251733016438899, "grad_norm": 1.2029572516345086, "learning_rate": 9.670324669625053e-06, "loss": 0.2057, "step": 6629 }, { "epoch": 0.5252525252525253, "grad_norm": 1.3923040479544802, "learning_rate": 9.667760043607077e-06, "loss": 0.2774, "step": 6630 }, { "epoch": 0.5253317488611606, "grad_norm": 1.1785338348851206, "learning_rate": 9.6651954394655e-06, "loss": 0.2519, "step": 6631 }, { "epoch": 0.525410972469796, "grad_norm": 1.6707379241602642, "learning_rate": 9.662630857369194e-06, "loss": 0.3525, "step": 6632 }, { "epoch": 0.5254901960784314, "grad_norm": 1.296803773395515, "learning_rate": 9.660066297487024e-06, "loss": 0.2062, "step": 6633 }, { "epoch": 0.5255694196870667, "grad_norm": 1.1430938427299344, "learning_rate": 9.65750175998785e-06, "loss": 0.2355, "step": 6634 }, { "epoch": 0.5256486432957022, "grad_norm": 1.497283095255509, "learning_rate": 9.65493724504054e-06, "loss": 0.2755, "step": 6635 }, { "epoch": 0.5257278669043375, "grad_norm": 1.4440990083113452, "learning_rate": 9.65237275281395e-06, "loss": 0.2768, "step": 6636 }, { "epoch": 0.5258070905129729, "grad_norm": 1.4136335895172514, "learning_rate": 9.64980828347694e-06, "loss": 0.299, "step": 6637 }, { "epoch": 0.5258863141216082, "grad_norm": 1.3887299551900778, "learning_rate": 9.647243837198375e-06, "loss": 0.3214, "step": 6638 }, { "epoch": 0.5259655377302436, "grad_norm": 1.4807982323286617, "learning_rate": 9.644679414147102e-06, "loss": 0.2779, "step": 6639 }, { "epoch": 0.526044761338879, "grad_norm": 1.5786787197774497, "learning_rate": 9.64211501449198e-06, "loss": 0.2313, "step": 6640 }, { "epoch": 0.5261239849475143, "grad_norm": 1.6330978321519143, "learning_rate": 9.639550638401863e-06, "loss": 0.3444, "step": 6641 }, { "epoch": 0.5262032085561498, "grad_norm": 1.5450908475369736, "learning_rate": 9.6369862860456e-06, "loss": 0.3393, "step": 6642 }, { "epoch": 0.5262824321647851, "grad_norm": 1.2307514643855018, "learning_rate": 9.634421957592048e-06, "loss": 0.2625, "step": 6643 }, { "epoch": 0.5263616557734205, "grad_norm": 1.6330040943187798, "learning_rate": 9.631857653210048e-06, "loss": 0.2997, "step": 6644 }, { "epoch": 0.5264408793820559, "grad_norm": 1.377117676519559, "learning_rate": 9.629293373068449e-06, "loss": 0.3138, "step": 6645 }, { "epoch": 0.5265201029906912, "grad_norm": 1.3755934024613092, "learning_rate": 9.626729117336101e-06, "loss": 0.3118, "step": 6646 }, { "epoch": 0.5265993265993266, "grad_norm": 1.1537747893257073, "learning_rate": 9.624164886181841e-06, "loss": 0.2376, "step": 6647 }, { "epoch": 0.526678550207962, "grad_norm": 1.1797568424380762, "learning_rate": 9.621600679774516e-06, "loss": 0.2726, "step": 6648 }, { "epoch": 0.5267577738165974, "grad_norm": 1.6083939902514486, "learning_rate": 9.619036498282968e-06, "loss": 0.3965, "step": 6649 }, { "epoch": 0.5268369974252327, "grad_norm": 1.195518192767305, "learning_rate": 9.61647234187603e-06, "loss": 0.2494, "step": 6650 }, { "epoch": 0.526916221033868, "grad_norm": 1.4300413343079406, "learning_rate": 9.613908210722546e-06, "loss": 0.2458, "step": 6651 }, { "epoch": 0.5269954446425035, "grad_norm": 1.3963632808219681, "learning_rate": 9.611344104991346e-06, "loss": 0.2901, "step": 6652 }, { "epoch": 0.5270746682511388, "grad_norm": 1.2955989774072305, "learning_rate": 9.608780024851266e-06, "loss": 0.254, "step": 6653 }, { "epoch": 0.5271538918597742, "grad_norm": 1.1469459577731393, "learning_rate": 9.606215970471142e-06, "loss": 0.1615, "step": 6654 }, { "epoch": 0.5272331154684096, "grad_norm": 1.3524400286821538, "learning_rate": 9.6036519420198e-06, "loss": 0.2531, "step": 6655 }, { "epoch": 0.527312339077045, "grad_norm": 1.288490618518072, "learning_rate": 9.601087939666071e-06, "loss": 0.2012, "step": 6656 }, { "epoch": 0.5273915626856803, "grad_norm": 1.4052405040406672, "learning_rate": 9.598523963578785e-06, "loss": 0.326, "step": 6657 }, { "epoch": 0.5274707862943157, "grad_norm": 1.3148652952852977, "learning_rate": 9.595960013926761e-06, "loss": 0.2566, "step": 6658 }, { "epoch": 0.5275500099029511, "grad_norm": 1.3082786870586396, "learning_rate": 9.593396090878823e-06, "loss": 0.2181, "step": 6659 }, { "epoch": 0.5276292335115864, "grad_norm": 1.291568255789929, "learning_rate": 9.590832194603801e-06, "loss": 0.2321, "step": 6660 }, { "epoch": 0.5277084571202219, "grad_norm": 2.031491509892924, "learning_rate": 9.588268325270506e-06, "loss": 0.4026, "step": 6661 }, { "epoch": 0.5277876807288572, "grad_norm": 1.2305936223155234, "learning_rate": 9.585704483047761e-06, "loss": 0.1739, "step": 6662 }, { "epoch": 0.5278669043374926, "grad_norm": 1.2636170185223636, "learning_rate": 9.583140668104387e-06, "loss": 0.2811, "step": 6663 }, { "epoch": 0.5279461279461279, "grad_norm": 1.521316684136875, "learning_rate": 9.58057688060919e-06, "loss": 0.3333, "step": 6664 }, { "epoch": 0.5280253515547633, "grad_norm": 1.541617709181252, "learning_rate": 9.578013120730987e-06, "loss": 0.3052, "step": 6665 }, { "epoch": 0.5281045751633987, "grad_norm": 1.1608552202052271, "learning_rate": 9.575449388638592e-06, "loss": 0.2429, "step": 6666 }, { "epoch": 0.528183798772034, "grad_norm": 1.8317600087279482, "learning_rate": 9.57288568450081e-06, "loss": 0.3713, "step": 6667 }, { "epoch": 0.5282630223806695, "grad_norm": 1.6563670838843565, "learning_rate": 9.570322008486453e-06, "loss": 0.3075, "step": 6668 }, { "epoch": 0.5283422459893048, "grad_norm": 1.4928566141724477, "learning_rate": 9.567758360764321e-06, "loss": 0.2808, "step": 6669 }, { "epoch": 0.5284214695979402, "grad_norm": 1.5001467356875635, "learning_rate": 9.565194741503221e-06, "loss": 0.3164, "step": 6670 }, { "epoch": 0.5285006932065756, "grad_norm": 1.3537649281897015, "learning_rate": 9.562631150871959e-06, "loss": 0.3156, "step": 6671 }, { "epoch": 0.5285799168152109, "grad_norm": 1.2675423099131446, "learning_rate": 9.560067589039327e-06, "loss": 0.2407, "step": 6672 }, { "epoch": 0.5286591404238463, "grad_norm": 1.2573851830312253, "learning_rate": 9.55750405617413e-06, "loss": 0.2175, "step": 6673 }, { "epoch": 0.5287383640324816, "grad_norm": 1.5588520506002632, "learning_rate": 9.554940552445161e-06, "loss": 0.2615, "step": 6674 }, { "epoch": 0.5288175876411171, "grad_norm": 1.4332324400791656, "learning_rate": 9.552377078021215e-06, "loss": 0.3435, "step": 6675 }, { "epoch": 0.5288968112497524, "grad_norm": 1.1730884861729056, "learning_rate": 9.549813633071085e-06, "loss": 0.2783, "step": 6676 }, { "epoch": 0.5289760348583878, "grad_norm": 1.4439941691093812, "learning_rate": 9.54725021776356e-06, "loss": 0.3119, "step": 6677 }, { "epoch": 0.5290552584670232, "grad_norm": 1.7370101088080452, "learning_rate": 9.54468683226743e-06, "loss": 0.3228, "step": 6678 }, { "epoch": 0.5291344820756585, "grad_norm": 1.117600230125303, "learning_rate": 9.542123476751484e-06, "loss": 0.1909, "step": 6679 }, { "epoch": 0.5292137056842939, "grad_norm": 1.1515903249132564, "learning_rate": 9.5395601513845e-06, "loss": 0.2222, "step": 6680 }, { "epoch": 0.5292929292929293, "grad_norm": 1.8867139243750468, "learning_rate": 9.536996856335269e-06, "loss": 0.3558, "step": 6681 }, { "epoch": 0.5293721529015647, "grad_norm": 1.6752764695045361, "learning_rate": 9.534433591772562e-06, "loss": 0.3092, "step": 6682 }, { "epoch": 0.5294513765102, "grad_norm": 1.6033925630978305, "learning_rate": 9.531870357865165e-06, "loss": 0.2985, "step": 6683 }, { "epoch": 0.5295306001188355, "grad_norm": 1.2077827297526118, "learning_rate": 9.529307154781855e-06, "loss": 0.2444, "step": 6684 }, { "epoch": 0.5296098237274708, "grad_norm": 1.194335800866445, "learning_rate": 9.5267439826914e-06, "loss": 0.2148, "step": 6685 }, { "epoch": 0.5296890473361061, "grad_norm": 1.4376889664760628, "learning_rate": 9.524180841762577e-06, "loss": 0.289, "step": 6686 }, { "epoch": 0.5297682709447415, "grad_norm": 1.8093120720204678, "learning_rate": 9.52161773216416e-06, "loss": 0.3367, "step": 6687 }, { "epoch": 0.5298474945533769, "grad_norm": 1.333271514764325, "learning_rate": 9.519054654064909e-06, "loss": 0.2511, "step": 6688 }, { "epoch": 0.5299267181620123, "grad_norm": 1.3692322595912834, "learning_rate": 9.5164916076336e-06, "loss": 0.2392, "step": 6689 }, { "epoch": 0.5300059417706476, "grad_norm": 1.43380209821574, "learning_rate": 9.513928593038987e-06, "loss": 0.3539, "step": 6690 }, { "epoch": 0.5300851653792831, "grad_norm": 1.5233257978255093, "learning_rate": 9.51136561044984e-06, "loss": 0.2776, "step": 6691 }, { "epoch": 0.5301643889879184, "grad_norm": 1.4027923408473233, "learning_rate": 9.508802660034915e-06, "loss": 0.2725, "step": 6692 }, { "epoch": 0.5302436125965537, "grad_norm": 1.7700973357251542, "learning_rate": 9.506239741962971e-06, "loss": 0.4102, "step": 6693 }, { "epoch": 0.5303228362051892, "grad_norm": 1.7352410949755996, "learning_rate": 9.503676856402764e-06, "loss": 0.3326, "step": 6694 }, { "epoch": 0.5304020598138245, "grad_norm": 1.145217628127054, "learning_rate": 9.50111400352305e-06, "loss": 0.2194, "step": 6695 }, { "epoch": 0.5304812834224599, "grad_norm": 1.4212117018478085, "learning_rate": 9.498551183492578e-06, "loss": 0.2609, "step": 6696 }, { "epoch": 0.5305605070310953, "grad_norm": 1.3006076346746003, "learning_rate": 9.495988396480097e-06, "loss": 0.2996, "step": 6697 }, { "epoch": 0.5306397306397307, "grad_norm": 1.5917593329743487, "learning_rate": 9.493425642654356e-06, "loss": 0.3809, "step": 6698 }, { "epoch": 0.530718954248366, "grad_norm": 1.653836267235538, "learning_rate": 9.490862922184096e-06, "loss": 0.3099, "step": 6699 }, { "epoch": 0.5307981778570013, "grad_norm": 1.6133510340217554, "learning_rate": 9.488300235238067e-06, "loss": 0.3062, "step": 6700 }, { "epoch": 0.5308774014656368, "grad_norm": 1.302576952429008, "learning_rate": 9.485737581985002e-06, "loss": 0.2653, "step": 6701 }, { "epoch": 0.5309566250742721, "grad_norm": 1.0055575979666276, "learning_rate": 9.483174962593644e-06, "loss": 0.1029, "step": 6702 }, { "epoch": 0.5310358486829075, "grad_norm": 1.1682445847740925, "learning_rate": 9.480612377232728e-06, "loss": 0.227, "step": 6703 }, { "epoch": 0.5311150722915429, "grad_norm": 1.1652027436505905, "learning_rate": 9.478049826070988e-06, "loss": 0.2405, "step": 6704 }, { "epoch": 0.5311942959001783, "grad_norm": 1.4147847379924263, "learning_rate": 9.475487309277156e-06, "loss": 0.2778, "step": 6705 }, { "epoch": 0.5312735195088136, "grad_norm": 1.1377388074760229, "learning_rate": 9.472924827019959e-06, "loss": 0.2499, "step": 6706 }, { "epoch": 0.531352743117449, "grad_norm": 1.1330378433656962, "learning_rate": 9.470362379468125e-06, "loss": 0.238, "step": 6707 }, { "epoch": 0.5314319667260844, "grad_norm": 1.5014673745498222, "learning_rate": 9.467799966790384e-06, "loss": 0.2448, "step": 6708 }, { "epoch": 0.5315111903347197, "grad_norm": 1.3099047654609604, "learning_rate": 9.465237589155452e-06, "loss": 0.2557, "step": 6709 }, { "epoch": 0.5315904139433552, "grad_norm": 1.3190927619450001, "learning_rate": 9.462675246732051e-06, "loss": 0.2355, "step": 6710 }, { "epoch": 0.5316696375519905, "grad_norm": 1.379249246333291, "learning_rate": 9.460112939688901e-06, "loss": 0.3392, "step": 6711 }, { "epoch": 0.5317488611606259, "grad_norm": 1.345709745976777, "learning_rate": 9.457550668194714e-06, "loss": 0.2665, "step": 6712 }, { "epoch": 0.5318280847692612, "grad_norm": 1.4863562892142994, "learning_rate": 9.45498843241821e-06, "loss": 0.3, "step": 6713 }, { "epoch": 0.5319073083778966, "grad_norm": 1.3036253857552202, "learning_rate": 9.452426232528092e-06, "loss": 0.2613, "step": 6714 }, { "epoch": 0.531986531986532, "grad_norm": 1.2974735397661186, "learning_rate": 9.449864068693072e-06, "loss": 0.2025, "step": 6715 }, { "epoch": 0.5320657555951673, "grad_norm": 1.4594821946395855, "learning_rate": 9.447301941081856e-06, "loss": 0.3271, "step": 6716 }, { "epoch": 0.5321449792038028, "grad_norm": 1.4438487748657014, "learning_rate": 9.444739849863146e-06, "loss": 0.3451, "step": 6717 }, { "epoch": 0.5322242028124381, "grad_norm": 1.1641043139213456, "learning_rate": 9.442177795205647e-06, "loss": 0.2383, "step": 6718 }, { "epoch": 0.5323034264210735, "grad_norm": 1.3098938833785447, "learning_rate": 9.439615777278059e-06, "loss": 0.2169, "step": 6719 }, { "epoch": 0.5323826500297089, "grad_norm": 1.1825511914267253, "learning_rate": 9.437053796249071e-06, "loss": 0.2354, "step": 6720 }, { "epoch": 0.5324618736383442, "grad_norm": 1.28364833820355, "learning_rate": 9.434491852287385e-06, "loss": 0.2251, "step": 6721 }, { "epoch": 0.5325410972469796, "grad_norm": 1.4253957329378735, "learning_rate": 9.431929945561688e-06, "loss": 0.2498, "step": 6722 }, { "epoch": 0.532620320855615, "grad_norm": 1.426455864989217, "learning_rate": 9.429368076240669e-06, "loss": 0.2302, "step": 6723 }, { "epoch": 0.5326995444642504, "grad_norm": 1.679541425187411, "learning_rate": 9.42680624449302e-06, "loss": 0.3232, "step": 6724 }, { "epoch": 0.5327787680728857, "grad_norm": 1.7291603177588906, "learning_rate": 9.42424445048742e-06, "loss": 0.3309, "step": 6725 }, { "epoch": 0.5328579916815211, "grad_norm": 1.4696512636751686, "learning_rate": 9.42168269439255e-06, "loss": 0.2258, "step": 6726 }, { "epoch": 0.5329372152901565, "grad_norm": 1.259514963648564, "learning_rate": 9.419120976377098e-06, "loss": 0.237, "step": 6727 }, { "epoch": 0.5330164388987918, "grad_norm": 1.4603238518593502, "learning_rate": 9.41655929660973e-06, "loss": 0.3005, "step": 6728 }, { "epoch": 0.5330956625074272, "grad_norm": 1.2798535373106266, "learning_rate": 9.413997655259126e-06, "loss": 0.277, "step": 6729 }, { "epoch": 0.5331748861160626, "grad_norm": 1.390691093683762, "learning_rate": 9.411436052493957e-06, "loss": 0.283, "step": 6730 }, { "epoch": 0.533254109724698, "grad_norm": 1.4693044352301712, "learning_rate": 9.40887448848289e-06, "loss": 0.2664, "step": 6731 }, { "epoch": 0.5333333333333333, "grad_norm": 1.2935168541655746, "learning_rate": 9.406312963394598e-06, "loss": 0.2298, "step": 6732 }, { "epoch": 0.5334125569419687, "grad_norm": 1.568669002572422, "learning_rate": 9.403751477397738e-06, "loss": 0.387, "step": 6733 }, { "epoch": 0.5334917805506041, "grad_norm": 1.4895470401983604, "learning_rate": 9.401190030660975e-06, "loss": 0.317, "step": 6734 }, { "epoch": 0.5335710041592394, "grad_norm": 1.2190220465897612, "learning_rate": 9.398628623352969e-06, "loss": 0.1942, "step": 6735 }, { "epoch": 0.5336502277678749, "grad_norm": 1.1690231128980542, "learning_rate": 9.396067255642373e-06, "loss": 0.2146, "step": 6736 }, { "epoch": 0.5337294513765102, "grad_norm": 1.3757233061086984, "learning_rate": 9.39350592769784e-06, "loss": 0.306, "step": 6737 }, { "epoch": 0.5338086749851456, "grad_norm": 0.9724917279421184, "learning_rate": 9.390944639688027e-06, "loss": 0.2174, "step": 6738 }, { "epoch": 0.5338878985937809, "grad_norm": 1.272320542768353, "learning_rate": 9.388383391781576e-06, "loss": 0.2596, "step": 6739 }, { "epoch": 0.5339671222024163, "grad_norm": 1.411424576761223, "learning_rate": 9.385822184147136e-06, "loss": 0.2908, "step": 6740 }, { "epoch": 0.5340463458110517, "grad_norm": 1.6186305961905039, "learning_rate": 9.383261016953351e-06, "loss": 0.2893, "step": 6741 }, { "epoch": 0.534125569419687, "grad_norm": 1.5198495930305322, "learning_rate": 9.38069989036886e-06, "loss": 0.3601, "step": 6742 }, { "epoch": 0.5342047930283225, "grad_norm": 1.1213343399907656, "learning_rate": 9.3781388045623e-06, "loss": 0.1846, "step": 6743 }, { "epoch": 0.5342840166369578, "grad_norm": 1.5926127864248008, "learning_rate": 9.37557775970231e-06, "loss": 0.3638, "step": 6744 }, { "epoch": 0.5343632402455932, "grad_norm": 1.582921130618739, "learning_rate": 9.373016755957519e-06, "loss": 0.3654, "step": 6745 }, { "epoch": 0.5344424638542286, "grad_norm": 1.7729412170555228, "learning_rate": 9.370455793496558e-06, "loss": 0.348, "step": 6746 }, { "epoch": 0.5345216874628639, "grad_norm": 1.5847045242356583, "learning_rate": 9.367894872488053e-06, "loss": 0.3971, "step": 6747 }, { "epoch": 0.5346009110714993, "grad_norm": 1.3307159193732085, "learning_rate": 9.365333993100628e-06, "loss": 0.2686, "step": 6748 }, { "epoch": 0.5346801346801346, "grad_norm": 1.5361109802613226, "learning_rate": 9.362773155502909e-06, "loss": 0.3109, "step": 6749 }, { "epoch": 0.5347593582887701, "grad_norm": 1.8976565026585643, "learning_rate": 9.360212359863508e-06, "loss": 0.3811, "step": 6750 }, { "epoch": 0.5348385818974054, "grad_norm": 1.0991247901775503, "learning_rate": 9.357651606351047e-06, "loss": 0.1772, "step": 6751 }, { "epoch": 0.5349178055060408, "grad_norm": 1.233848250402271, "learning_rate": 9.355090895134138e-06, "loss": 0.2636, "step": 6752 }, { "epoch": 0.5349970291146762, "grad_norm": 1.4341967560828675, "learning_rate": 9.352530226381388e-06, "loss": 0.3195, "step": 6753 }, { "epoch": 0.5350762527233115, "grad_norm": 1.2225326727225299, "learning_rate": 9.349969600261408e-06, "loss": 0.2383, "step": 6754 }, { "epoch": 0.5351554763319469, "grad_norm": 1.324038486267805, "learning_rate": 9.347409016942803e-06, "loss": 0.3049, "step": 6755 }, { "epoch": 0.5352346999405823, "grad_norm": 1.7870197223119775, "learning_rate": 9.344848476594172e-06, "loss": 0.3703, "step": 6756 }, { "epoch": 0.5353139235492177, "grad_norm": 1.253002325033724, "learning_rate": 9.342287979384118e-06, "loss": 0.2114, "step": 6757 }, { "epoch": 0.535393147157853, "grad_norm": 1.340280887441604, "learning_rate": 9.339727525481234e-06, "loss": 0.319, "step": 6758 }, { "epoch": 0.5354723707664885, "grad_norm": 1.5554091223712008, "learning_rate": 9.33716711505412e-06, "loss": 0.2711, "step": 6759 }, { "epoch": 0.5355515943751238, "grad_norm": 1.4827656168956844, "learning_rate": 9.334606748271357e-06, "loss": 0.248, "step": 6760 }, { "epoch": 0.5356308179837591, "grad_norm": 1.6205721083917508, "learning_rate": 9.33204642530154e-06, "loss": 0.2556, "step": 6761 }, { "epoch": 0.5357100415923945, "grad_norm": 1.186600864266174, "learning_rate": 9.329486146313254e-06, "loss": 0.2268, "step": 6762 }, { "epoch": 0.5357892652010299, "grad_norm": 1.5137481023407515, "learning_rate": 9.326925911475075e-06, "loss": 0.2565, "step": 6763 }, { "epoch": 0.5358684888096653, "grad_norm": 1.5513750237024808, "learning_rate": 9.324365720955589e-06, "loss": 0.3307, "step": 6764 }, { "epoch": 0.5359477124183006, "grad_norm": 1.7423238094637552, "learning_rate": 9.321805574923369e-06, "loss": 0.2504, "step": 6765 }, { "epoch": 0.5360269360269361, "grad_norm": 1.1898912719658348, "learning_rate": 9.319245473546987e-06, "loss": 0.1922, "step": 6766 }, { "epoch": 0.5361061596355714, "grad_norm": 1.2772269424767866, "learning_rate": 9.316685416995017e-06, "loss": 0.2592, "step": 6767 }, { "epoch": 0.5361853832442067, "grad_norm": 1.2448787430388477, "learning_rate": 9.314125405436023e-06, "loss": 0.2139, "step": 6768 }, { "epoch": 0.5362646068528422, "grad_norm": 1.3890908283455774, "learning_rate": 9.311565439038571e-06, "loss": 0.2366, "step": 6769 }, { "epoch": 0.5363438304614775, "grad_norm": 1.317728037985118, "learning_rate": 9.309005517971222e-06, "loss": 0.3225, "step": 6770 }, { "epoch": 0.5364230540701129, "grad_norm": 1.6431510979972728, "learning_rate": 9.306445642402534e-06, "loss": 0.2214, "step": 6771 }, { "epoch": 0.5365022776787483, "grad_norm": 1.5371766928566697, "learning_rate": 9.303885812501064e-06, "loss": 0.2985, "step": 6772 }, { "epoch": 0.5365815012873837, "grad_norm": 1.1854091490886594, "learning_rate": 9.301326028435367e-06, "loss": 0.2215, "step": 6773 }, { "epoch": 0.536660724896019, "grad_norm": 1.6841690095207198, "learning_rate": 9.298766290373986e-06, "loss": 0.2578, "step": 6774 }, { "epoch": 0.5367399485046543, "grad_norm": 1.3203426464633086, "learning_rate": 9.296206598485471e-06, "loss": 0.304, "step": 6775 }, { "epoch": 0.5368191721132898, "grad_norm": 1.5355814729158108, "learning_rate": 9.293646952938365e-06, "loss": 0.3116, "step": 6776 }, { "epoch": 0.5368983957219251, "grad_norm": 1.2267660463598158, "learning_rate": 9.291087353901208e-06, "loss": 0.2722, "step": 6777 }, { "epoch": 0.5369776193305605, "grad_norm": 1.237091427193348, "learning_rate": 9.28852780154254e-06, "loss": 0.2164, "step": 6778 }, { "epoch": 0.5370568429391959, "grad_norm": 1.363068516064216, "learning_rate": 9.285968296030891e-06, "loss": 0.2532, "step": 6779 }, { "epoch": 0.5371360665478313, "grad_norm": 1.5448556293682676, "learning_rate": 9.283408837534793e-06, "loss": 0.2658, "step": 6780 }, { "epoch": 0.5372152901564666, "grad_norm": 1.273167080040899, "learning_rate": 9.280849426222778e-06, "loss": 0.2862, "step": 6781 }, { "epoch": 0.537294513765102, "grad_norm": 1.251335087647091, "learning_rate": 9.278290062263364e-06, "loss": 0.2154, "step": 6782 }, { "epoch": 0.5373737373737374, "grad_norm": 1.2054285202032164, "learning_rate": 9.27573074582508e-06, "loss": 0.2095, "step": 6783 }, { "epoch": 0.5374529609823727, "grad_norm": 1.1427525509151666, "learning_rate": 9.27317147707644e-06, "loss": 0.1762, "step": 6784 }, { "epoch": 0.5375321845910082, "grad_norm": 1.2848903398800302, "learning_rate": 9.270612256185962e-06, "loss": 0.2461, "step": 6785 }, { "epoch": 0.5376114081996435, "grad_norm": 1.5186389469273869, "learning_rate": 9.268053083322157e-06, "loss": 0.3463, "step": 6786 }, { "epoch": 0.5376906318082789, "grad_norm": 1.2931255254029264, "learning_rate": 9.265493958653533e-06, "loss": 0.2371, "step": 6787 }, { "epoch": 0.5377698554169142, "grad_norm": 1.518304574791514, "learning_rate": 9.262934882348599e-06, "loss": 0.2528, "step": 6788 }, { "epoch": 0.5378490790255496, "grad_norm": 1.2644482639293273, "learning_rate": 9.260375854575857e-06, "loss": 0.2101, "step": 6789 }, { "epoch": 0.537928302634185, "grad_norm": 1.1753587189304724, "learning_rate": 9.257816875503805e-06, "loss": 0.2433, "step": 6790 }, { "epoch": 0.5380075262428203, "grad_norm": 1.4517862240575137, "learning_rate": 9.255257945300941e-06, "loss": 0.2491, "step": 6791 }, { "epoch": 0.5380867498514558, "grad_norm": 1.2343677282261236, "learning_rate": 9.252699064135759e-06, "loss": 0.1576, "step": 6792 }, { "epoch": 0.5381659734600911, "grad_norm": 1.3733817026100017, "learning_rate": 9.250140232176746e-06, "loss": 0.2631, "step": 6793 }, { "epoch": 0.5382451970687265, "grad_norm": 1.303021594187281, "learning_rate": 9.247581449592392e-06, "loss": 0.2661, "step": 6794 }, { "epoch": 0.5383244206773619, "grad_norm": 1.4113178441734109, "learning_rate": 9.245022716551178e-06, "loss": 0.3083, "step": 6795 }, { "epoch": 0.5384036442859972, "grad_norm": 1.2252960972557936, "learning_rate": 9.242464033221584e-06, "loss": 0.2834, "step": 6796 }, { "epoch": 0.5384828678946326, "grad_norm": 1.6811551424151405, "learning_rate": 9.239905399772092e-06, "loss": 0.3737, "step": 6797 }, { "epoch": 0.538562091503268, "grad_norm": 1.668810809116935, "learning_rate": 9.237346816371169e-06, "loss": 0.3503, "step": 6798 }, { "epoch": 0.5386413151119034, "grad_norm": 1.7557551341166222, "learning_rate": 9.234788283187291e-06, "loss": 0.3591, "step": 6799 }, { "epoch": 0.5387205387205387, "grad_norm": 1.2810845849598966, "learning_rate": 9.23222980038892e-06, "loss": 0.2647, "step": 6800 }, { "epoch": 0.5387997623291741, "grad_norm": 1.6291312883364715, "learning_rate": 9.229671368144524e-06, "loss": 0.3946, "step": 6801 }, { "epoch": 0.5388789859378095, "grad_norm": 1.5480873934523631, "learning_rate": 9.227112986622562e-06, "loss": 0.3245, "step": 6802 }, { "epoch": 0.5389582095464448, "grad_norm": 1.2804502791430201, "learning_rate": 9.224554655991492e-06, "loss": 0.2534, "step": 6803 }, { "epoch": 0.5390374331550802, "grad_norm": 1.7414359878820775, "learning_rate": 9.221996376419763e-06, "loss": 0.3345, "step": 6804 }, { "epoch": 0.5391166567637156, "grad_norm": 1.3947994486707425, "learning_rate": 9.219438148075834e-06, "loss": 0.2538, "step": 6805 }, { "epoch": 0.539195880372351, "grad_norm": 1.9177309540266438, "learning_rate": 9.216879971128142e-06, "loss": 0.3725, "step": 6806 }, { "epoch": 0.5392751039809863, "grad_norm": 1.3845811015587115, "learning_rate": 9.21432184574514e-06, "loss": 0.2476, "step": 6807 }, { "epoch": 0.5393543275896218, "grad_norm": 1.469212269016225, "learning_rate": 9.21176377209526e-06, "loss": 0.2908, "step": 6808 }, { "epoch": 0.5394335511982571, "grad_norm": 1.4807062791577446, "learning_rate": 9.209205750346945e-06, "loss": 0.3041, "step": 6809 }, { "epoch": 0.5395127748068924, "grad_norm": 1.563196760277186, "learning_rate": 9.206647780668629e-06, "loss": 0.3051, "step": 6810 }, { "epoch": 0.5395919984155279, "grad_norm": 1.2496748411728456, "learning_rate": 9.204089863228736e-06, "loss": 0.2645, "step": 6811 }, { "epoch": 0.5396712220241632, "grad_norm": 1.209543218466997, "learning_rate": 9.201531998195697e-06, "loss": 0.2135, "step": 6812 }, { "epoch": 0.5397504456327986, "grad_norm": 1.1895299618052124, "learning_rate": 9.198974185737934e-06, "loss": 0.2273, "step": 6813 }, { "epoch": 0.5398296692414339, "grad_norm": 1.3328687189871387, "learning_rate": 9.196416426023868e-06, "loss": 0.2711, "step": 6814 }, { "epoch": 0.5399088928500693, "grad_norm": 1.3232598161038789, "learning_rate": 9.193858719221912e-06, "loss": 0.2928, "step": 6815 }, { "epoch": 0.5399881164587047, "grad_norm": 1.4064068710974462, "learning_rate": 9.19130106550048e-06, "loss": 0.2485, "step": 6816 }, { "epoch": 0.54006734006734, "grad_norm": 1.0767575670440195, "learning_rate": 9.188743465027981e-06, "loss": 0.1788, "step": 6817 }, { "epoch": 0.5401465636759755, "grad_norm": 1.2195140124731305, "learning_rate": 9.186185917972821e-06, "loss": 0.2083, "step": 6818 }, { "epoch": 0.5402257872846108, "grad_norm": 1.4999438641182867, "learning_rate": 9.183628424503405e-06, "loss": 0.2912, "step": 6819 }, { "epoch": 0.5403050108932462, "grad_norm": 1.2406112321295946, "learning_rate": 9.181070984788127e-06, "loss": 0.2556, "step": 6820 }, { "epoch": 0.5403842345018816, "grad_norm": 1.4732235368618307, "learning_rate": 9.178513598995384e-06, "loss": 0.2402, "step": 6821 }, { "epoch": 0.5404634581105169, "grad_norm": 1.2827621289604894, "learning_rate": 9.17595626729357e-06, "loss": 0.2637, "step": 6822 }, { "epoch": 0.5405426817191523, "grad_norm": 1.3969848391487683, "learning_rate": 9.17339898985107e-06, "loss": 0.2759, "step": 6823 }, { "epoch": 0.5406219053277876, "grad_norm": 1.5036426892098076, "learning_rate": 9.170841766836268e-06, "loss": 0.3368, "step": 6824 }, { "epoch": 0.5407011289364231, "grad_norm": 1.7783090548963019, "learning_rate": 9.168284598417547e-06, "loss": 0.3611, "step": 6825 }, { "epoch": 0.5407803525450584, "grad_norm": 1.144402790331783, "learning_rate": 9.165727484763283e-06, "loss": 0.2365, "step": 6826 }, { "epoch": 0.5408595761536938, "grad_norm": 1.6916031090588752, "learning_rate": 9.16317042604185e-06, "loss": 0.3724, "step": 6827 }, { "epoch": 0.5409387997623292, "grad_norm": 1.3850273636397326, "learning_rate": 9.160613422421616e-06, "loss": 0.275, "step": 6828 }, { "epoch": 0.5410180233709645, "grad_norm": 1.3759927025814456, "learning_rate": 9.158056474070952e-06, "loss": 0.235, "step": 6829 }, { "epoch": 0.5410972469795999, "grad_norm": 1.1254229791677905, "learning_rate": 9.155499581158217e-06, "loss": 0.2024, "step": 6830 }, { "epoch": 0.5411764705882353, "grad_norm": 1.0948319037781131, "learning_rate": 9.152942743851771e-06, "loss": 0.1867, "step": 6831 }, { "epoch": 0.5412556941968707, "grad_norm": 1.3607300192864529, "learning_rate": 9.15038596231997e-06, "loss": 0.2031, "step": 6832 }, { "epoch": 0.541334917805506, "grad_norm": 1.3352107419750603, "learning_rate": 9.147829236731164e-06, "loss": 0.2802, "step": 6833 }, { "epoch": 0.5414141414141415, "grad_norm": 1.6159081358685348, "learning_rate": 9.145272567253703e-06, "loss": 0.304, "step": 6834 }, { "epoch": 0.5414933650227768, "grad_norm": 1.3659771126025533, "learning_rate": 9.142715954055932e-06, "loss": 0.2851, "step": 6835 }, { "epoch": 0.5415725886314121, "grad_norm": 1.2891282544941869, "learning_rate": 9.140159397306188e-06, "loss": 0.1974, "step": 6836 }, { "epoch": 0.5416518122400475, "grad_norm": 1.1340869499362822, "learning_rate": 9.137602897172814e-06, "loss": 0.1817, "step": 6837 }, { "epoch": 0.5417310358486829, "grad_norm": 1.1750921698952466, "learning_rate": 9.135046453824136e-06, "loss": 0.2026, "step": 6838 }, { "epoch": 0.5418102594573183, "grad_norm": 1.411146416343359, "learning_rate": 9.132490067428488e-06, "loss": 0.3022, "step": 6839 }, { "epoch": 0.5418894830659536, "grad_norm": 1.5103095912909645, "learning_rate": 9.129933738154196e-06, "loss": 0.2528, "step": 6840 }, { "epoch": 0.5419687066745891, "grad_norm": 1.5544842207029321, "learning_rate": 9.12737746616958e-06, "loss": 0.3342, "step": 6841 }, { "epoch": 0.5420479302832244, "grad_norm": 1.302758909001281, "learning_rate": 9.124821251642959e-06, "loss": 0.2255, "step": 6842 }, { "epoch": 0.5421271538918597, "grad_norm": 1.4149142708560833, "learning_rate": 9.122265094742648e-06, "loss": 0.2909, "step": 6843 }, { "epoch": 0.5422063775004952, "grad_norm": 1.2476464510760097, "learning_rate": 9.119708995636957e-06, "loss": 0.2584, "step": 6844 }, { "epoch": 0.5422856011091305, "grad_norm": 1.4439780239021418, "learning_rate": 9.117152954494195e-06, "loss": 0.2923, "step": 6845 }, { "epoch": 0.5423648247177659, "grad_norm": 1.1680376514327657, "learning_rate": 9.114596971482658e-06, "loss": 0.2736, "step": 6846 }, { "epoch": 0.5424440483264013, "grad_norm": 1.4547670965966417, "learning_rate": 9.112041046770653e-06, "loss": 0.2747, "step": 6847 }, { "epoch": 0.5425232719350367, "grad_norm": 1.3594226843958468, "learning_rate": 9.109485180526474e-06, "loss": 0.2747, "step": 6848 }, { "epoch": 0.542602495543672, "grad_norm": 1.4465127403555254, "learning_rate": 9.106929372918408e-06, "loss": 0.305, "step": 6849 }, { "epoch": 0.5426817191523073, "grad_norm": 1.48632715850047, "learning_rate": 9.104373624114746e-06, "loss": 0.2501, "step": 6850 }, { "epoch": 0.5427609427609428, "grad_norm": 1.2631158319662426, "learning_rate": 9.101817934283775e-06, "loss": 0.2577, "step": 6851 }, { "epoch": 0.5428401663695781, "grad_norm": 1.567934137488143, "learning_rate": 9.099262303593768e-06, "loss": 0.3261, "step": 6852 }, { "epoch": 0.5429193899782135, "grad_norm": 1.4867969867070314, "learning_rate": 9.096706732213005e-06, "loss": 0.3252, "step": 6853 }, { "epoch": 0.5429986135868489, "grad_norm": 1.339329468674581, "learning_rate": 9.094151220309757e-06, "loss": 0.3003, "step": 6854 }, { "epoch": 0.5430778371954843, "grad_norm": 1.681644925970197, "learning_rate": 9.091595768052291e-06, "loss": 0.437, "step": 6855 }, { "epoch": 0.5431570608041196, "grad_norm": 1.1846300182515337, "learning_rate": 9.089040375608876e-06, "loss": 0.2448, "step": 6856 }, { "epoch": 0.543236284412755, "grad_norm": 1.7565882770308017, "learning_rate": 9.086485043147768e-06, "loss": 0.4709, "step": 6857 }, { "epoch": 0.5433155080213904, "grad_norm": 1.3354437476459637, "learning_rate": 9.083929770837222e-06, "loss": 0.26, "step": 6858 }, { "epoch": 0.5433947316300257, "grad_norm": 1.159526254455517, "learning_rate": 9.081374558845496e-06, "loss": 0.2099, "step": 6859 }, { "epoch": 0.5434739552386612, "grad_norm": 1.0995538827632811, "learning_rate": 9.078819407340833e-06, "loss": 0.2467, "step": 6860 }, { "epoch": 0.5435531788472965, "grad_norm": 1.4389166382750294, "learning_rate": 9.07626431649148e-06, "loss": 0.3022, "step": 6861 }, { "epoch": 0.5436324024559319, "grad_norm": 1.476952265784712, "learning_rate": 9.073709286465678e-06, "loss": 0.3213, "step": 6862 }, { "epoch": 0.5437116260645672, "grad_norm": 1.6442495749699753, "learning_rate": 9.071154317431661e-06, "loss": 0.2802, "step": 6863 }, { "epoch": 0.5437908496732026, "grad_norm": 1.408720907713713, "learning_rate": 9.068599409557664e-06, "loss": 0.2776, "step": 6864 }, { "epoch": 0.543870073281838, "grad_norm": 1.153009535378826, "learning_rate": 9.066044563011914e-06, "loss": 0.2973, "step": 6865 }, { "epoch": 0.5439492968904733, "grad_norm": 1.5159670829023433, "learning_rate": 9.063489777962634e-06, "loss": 0.3822, "step": 6866 }, { "epoch": 0.5440285204991088, "grad_norm": 1.06561274879749, "learning_rate": 9.06093505457805e-06, "loss": 0.1977, "step": 6867 }, { "epoch": 0.5441077441077441, "grad_norm": 1.3917428484823267, "learning_rate": 9.058380393026369e-06, "loss": 0.2851, "step": 6868 }, { "epoch": 0.5441869677163795, "grad_norm": 1.6141982997119226, "learning_rate": 9.055825793475814e-06, "loss": 0.3156, "step": 6869 }, { "epoch": 0.5442661913250149, "grad_norm": 1.4909132465592059, "learning_rate": 9.053271256094582e-06, "loss": 0.301, "step": 6870 }, { "epoch": 0.5443454149336502, "grad_norm": 1.6480605354528746, "learning_rate": 9.050716781050885e-06, "loss": 0.3054, "step": 6871 }, { "epoch": 0.5444246385422856, "grad_norm": 1.1320509743061042, "learning_rate": 9.04816236851292e-06, "loss": 0.253, "step": 6872 }, { "epoch": 0.544503862150921, "grad_norm": 1.3349196762230269, "learning_rate": 9.045608018648884e-06, "loss": 0.2674, "step": 6873 }, { "epoch": 0.5445830857595564, "grad_norm": 1.2001703711965126, "learning_rate": 9.043053731626964e-06, "loss": 0.22, "step": 6874 }, { "epoch": 0.5446623093681917, "grad_norm": 1.482132402507571, "learning_rate": 9.040499507615356e-06, "loss": 0.305, "step": 6875 }, { "epoch": 0.5447415329768271, "grad_norm": 1.4862296537832334, "learning_rate": 9.037945346782236e-06, "loss": 0.222, "step": 6876 }, { "epoch": 0.5448207565854625, "grad_norm": 1.3081674056000887, "learning_rate": 9.035391249295788e-06, "loss": 0.2588, "step": 6877 }, { "epoch": 0.5448999801940978, "grad_norm": 1.5066008134959405, "learning_rate": 9.032837215324183e-06, "loss": 0.279, "step": 6878 }, { "epoch": 0.5449792038027332, "grad_norm": 1.4267615465247832, "learning_rate": 9.030283245035594e-06, "loss": 0.3288, "step": 6879 }, { "epoch": 0.5450584274113686, "grad_norm": 1.3099286772753327, "learning_rate": 9.027729338598188e-06, "loss": 0.2788, "step": 6880 }, { "epoch": 0.545137651020004, "grad_norm": 1.0935041609545688, "learning_rate": 9.025175496180125e-06, "loss": 0.1947, "step": 6881 }, { "epoch": 0.5452168746286393, "grad_norm": 1.195419440790593, "learning_rate": 9.022621717949566e-06, "loss": 0.2279, "step": 6882 }, { "epoch": 0.5452960982372748, "grad_norm": 1.3532331676248126, "learning_rate": 9.020068004074665e-06, "loss": 0.2427, "step": 6883 }, { "epoch": 0.5453753218459101, "grad_norm": 1.6185665690603852, "learning_rate": 9.01751435472357e-06, "loss": 0.3573, "step": 6884 }, { "epoch": 0.5454545454545454, "grad_norm": 1.5977971488027076, "learning_rate": 9.014960770064429e-06, "loss": 0.3424, "step": 6885 }, { "epoch": 0.5455337690631809, "grad_norm": 1.5083925112010113, "learning_rate": 9.012407250265377e-06, "loss": 0.2995, "step": 6886 }, { "epoch": 0.5456129926718162, "grad_norm": 1.623482216406467, "learning_rate": 9.009853795494558e-06, "loss": 0.2871, "step": 6887 }, { "epoch": 0.5456922162804516, "grad_norm": 1.1919516117091917, "learning_rate": 9.007300405920105e-06, "loss": 0.2815, "step": 6888 }, { "epoch": 0.5457714398890869, "grad_norm": 1.4866016171699366, "learning_rate": 9.00474708171014e-06, "loss": 0.2689, "step": 6889 }, { "epoch": 0.5458506634977223, "grad_norm": 1.3101945873144103, "learning_rate": 9.002193823032791e-06, "loss": 0.2582, "step": 6890 }, { "epoch": 0.5459298871063577, "grad_norm": 1.3014135712972907, "learning_rate": 8.999640630056183e-06, "loss": 0.2487, "step": 6891 }, { "epoch": 0.546009110714993, "grad_norm": 1.2856734768354838, "learning_rate": 8.997087502948423e-06, "loss": 0.2811, "step": 6892 }, { "epoch": 0.5460883343236285, "grad_norm": 1.240251168182242, "learning_rate": 8.994534441877625e-06, "loss": 0.2886, "step": 6893 }, { "epoch": 0.5461675579322638, "grad_norm": 1.5874360894265425, "learning_rate": 8.991981447011896e-06, "loss": 0.3011, "step": 6894 }, { "epoch": 0.5462467815408992, "grad_norm": 1.4112277136789018, "learning_rate": 8.989428518519336e-06, "loss": 0.285, "step": 6895 }, { "epoch": 0.5463260051495346, "grad_norm": 1.4425651170508136, "learning_rate": 8.986875656568047e-06, "loss": 0.3021, "step": 6896 }, { "epoch": 0.5464052287581699, "grad_norm": 1.4728773843795968, "learning_rate": 8.984322861326122e-06, "loss": 0.3289, "step": 6897 }, { "epoch": 0.5464844523668053, "grad_norm": 1.263702375281725, "learning_rate": 8.981770132961649e-06, "loss": 0.2609, "step": 6898 }, { "epoch": 0.5465636759754406, "grad_norm": 1.2971754602789756, "learning_rate": 8.979217471642712e-06, "loss": 0.29, "step": 6899 }, { "epoch": 0.5466428995840761, "grad_norm": 1.3128668393061134, "learning_rate": 8.976664877537395e-06, "loss": 0.2386, "step": 6900 }, { "epoch": 0.5467221231927114, "grad_norm": 1.5249679057038013, "learning_rate": 8.974112350813771e-06, "loss": 0.3373, "step": 6901 }, { "epoch": 0.5468013468013468, "grad_norm": 1.2538269437771639, "learning_rate": 8.971559891639913e-06, "loss": 0.2874, "step": 6902 }, { "epoch": 0.5468805704099822, "grad_norm": 1.1265054139187154, "learning_rate": 8.969007500183886e-06, "loss": 0.1964, "step": 6903 }, { "epoch": 0.5469597940186175, "grad_norm": 1.8250436705146966, "learning_rate": 8.966455176613754e-06, "loss": 0.3165, "step": 6904 }, { "epoch": 0.5470390176272529, "grad_norm": 1.1358280277227444, "learning_rate": 8.963902921097579e-06, "loss": 0.3061, "step": 6905 }, { "epoch": 0.5471182412358883, "grad_norm": 1.422213027748279, "learning_rate": 8.961350733803406e-06, "loss": 0.2477, "step": 6906 }, { "epoch": 0.5471974648445237, "grad_norm": 1.3136773494120368, "learning_rate": 8.958798614899291e-06, "loss": 0.2944, "step": 6907 }, { "epoch": 0.547276688453159, "grad_norm": 1.2763049861047902, "learning_rate": 8.956246564553282e-06, "loss": 0.2564, "step": 6908 }, { "epoch": 0.5473559120617945, "grad_norm": 1.4557728586838161, "learning_rate": 8.95369458293341e-06, "loss": 0.1645, "step": 6909 }, { "epoch": 0.5474351356704298, "grad_norm": 1.5100263834540892, "learning_rate": 8.951142670207718e-06, "loss": 0.2934, "step": 6910 }, { "epoch": 0.5475143592790651, "grad_norm": 1.5802000441624515, "learning_rate": 8.948590826544232e-06, "loss": 0.3578, "step": 6911 }, { "epoch": 0.5475935828877005, "grad_norm": 1.3998515076163713, "learning_rate": 8.94603905211098e-06, "loss": 0.2114, "step": 6912 }, { "epoch": 0.5476728064963359, "grad_norm": 1.7569387517582804, "learning_rate": 8.943487347075988e-06, "loss": 0.3745, "step": 6913 }, { "epoch": 0.5477520301049713, "grad_norm": 1.3209345165525932, "learning_rate": 8.94093571160727e-06, "loss": 0.2668, "step": 6914 }, { "epoch": 0.5478312537136066, "grad_norm": 1.2811728368418742, "learning_rate": 8.938384145872838e-06, "loss": 0.2379, "step": 6915 }, { "epoch": 0.5479104773222421, "grad_norm": 1.2708678179518775, "learning_rate": 8.935832650040703e-06, "loss": 0.2316, "step": 6916 }, { "epoch": 0.5479897009308774, "grad_norm": 1.4508650297979424, "learning_rate": 8.933281224278867e-06, "loss": 0.2966, "step": 6917 }, { "epoch": 0.5480689245395127, "grad_norm": 1.4211166759044775, "learning_rate": 8.930729868755333e-06, "loss": 0.2614, "step": 6918 }, { "epoch": 0.5481481481481482, "grad_norm": 1.2284427625263326, "learning_rate": 8.928178583638088e-06, "loss": 0.2387, "step": 6919 }, { "epoch": 0.5482273717567835, "grad_norm": 1.504665184186934, "learning_rate": 8.925627369095125e-06, "loss": 0.2822, "step": 6920 }, { "epoch": 0.5483065953654189, "grad_norm": 1.473054408741151, "learning_rate": 8.923076225294434e-06, "loss": 0.2943, "step": 6921 }, { "epoch": 0.5483858189740543, "grad_norm": 1.2828874915525819, "learning_rate": 8.920525152403989e-06, "loss": 0.212, "step": 6922 }, { "epoch": 0.5484650425826897, "grad_norm": 1.8004652671248182, "learning_rate": 8.917974150591772e-06, "loss": 0.3856, "step": 6923 }, { "epoch": 0.548544266191325, "grad_norm": 1.2126109056910321, "learning_rate": 8.915423220025747e-06, "loss": 0.1608, "step": 6924 }, { "epoch": 0.5486234897999603, "grad_norm": 1.3678070054734122, "learning_rate": 8.912872360873885e-06, "loss": 0.2497, "step": 6925 }, { "epoch": 0.5487027134085958, "grad_norm": 1.1358820890931334, "learning_rate": 8.91032157330415e-06, "loss": 0.2253, "step": 6926 }, { "epoch": 0.5487819370172311, "grad_norm": 1.4626639327172861, "learning_rate": 8.907770857484493e-06, "loss": 0.2999, "step": 6927 }, { "epoch": 0.5488611606258665, "grad_norm": 1.4087092746931544, "learning_rate": 8.90522021358287e-06, "loss": 0.2805, "step": 6928 }, { "epoch": 0.5489403842345019, "grad_norm": 1.7609525402754054, "learning_rate": 8.90266964176723e-06, "loss": 0.3395, "step": 6929 }, { "epoch": 0.5490196078431373, "grad_norm": 1.1523568283379255, "learning_rate": 8.90011914220551e-06, "loss": 0.2809, "step": 6930 }, { "epoch": 0.5490988314517726, "grad_norm": 1.2190853828788084, "learning_rate": 8.897568715065658e-06, "loss": 0.1968, "step": 6931 }, { "epoch": 0.549178055060408, "grad_norm": 1.6419662613464567, "learning_rate": 8.895018360515597e-06, "loss": 0.3171, "step": 6932 }, { "epoch": 0.5492572786690434, "grad_norm": 1.448724775478952, "learning_rate": 8.892468078723262e-06, "loss": 0.2071, "step": 6933 }, { "epoch": 0.5493365022776787, "grad_norm": 1.8891208577997054, "learning_rate": 8.889917869856576e-06, "loss": 0.3859, "step": 6934 }, { "epoch": 0.5494157258863142, "grad_norm": 1.040836904582798, "learning_rate": 8.887367734083454e-06, "loss": 0.1659, "step": 6935 }, { "epoch": 0.5494949494949495, "grad_norm": 1.258803127289834, "learning_rate": 8.884817671571815e-06, "loss": 0.2696, "step": 6936 }, { "epoch": 0.5495741731035849, "grad_norm": 1.2084857038180408, "learning_rate": 8.882267682489566e-06, "loss": 0.249, "step": 6937 }, { "epoch": 0.5496533967122202, "grad_norm": 1.2568175200065401, "learning_rate": 8.879717767004613e-06, "loss": 0.2094, "step": 6938 }, { "epoch": 0.5497326203208556, "grad_norm": 1.23275359100953, "learning_rate": 8.877167925284855e-06, "loss": 0.2281, "step": 6939 }, { "epoch": 0.549811843929491, "grad_norm": 1.263428836065982, "learning_rate": 8.874618157498183e-06, "loss": 0.2671, "step": 6940 }, { "epoch": 0.5498910675381263, "grad_norm": 1.3501202071721725, "learning_rate": 8.872068463812492e-06, "loss": 0.288, "step": 6941 }, { "epoch": 0.5499702911467618, "grad_norm": 1.3474008197919445, "learning_rate": 8.869518844395667e-06, "loss": 0.28, "step": 6942 }, { "epoch": 0.5500495147553971, "grad_norm": 1.411981560808872, "learning_rate": 8.866969299415585e-06, "loss": 0.3008, "step": 6943 }, { "epoch": 0.5501287383640325, "grad_norm": 1.204544116518048, "learning_rate": 8.864419829040122e-06, "loss": 0.207, "step": 6944 }, { "epoch": 0.5502079619726679, "grad_norm": 1.0562696657413404, "learning_rate": 8.86187043343715e-06, "loss": 0.1735, "step": 6945 }, { "epoch": 0.5502871855813032, "grad_norm": 1.5935942836737471, "learning_rate": 8.859321112774535e-06, "loss": 0.2379, "step": 6946 }, { "epoch": 0.5503664091899386, "grad_norm": 1.5108345229367142, "learning_rate": 8.856771867220135e-06, "loss": 0.2932, "step": 6947 }, { "epoch": 0.550445632798574, "grad_norm": 1.2633926974575818, "learning_rate": 8.854222696941807e-06, "loss": 0.2405, "step": 6948 }, { "epoch": 0.5505248564072094, "grad_norm": 1.3106072137752853, "learning_rate": 8.8516736021074e-06, "loss": 0.2203, "step": 6949 }, { "epoch": 0.5506040800158447, "grad_norm": 1.526450625325032, "learning_rate": 8.849124582884762e-06, "loss": 0.2552, "step": 6950 }, { "epoch": 0.5506833036244801, "grad_norm": 1.5382908631266865, "learning_rate": 8.846575639441732e-06, "loss": 0.366, "step": 6951 }, { "epoch": 0.5507625272331155, "grad_norm": 1.7432571829080619, "learning_rate": 8.844026771946148e-06, "loss": 0.431, "step": 6952 }, { "epoch": 0.5508417508417508, "grad_norm": 1.199965665640729, "learning_rate": 8.841477980565838e-06, "loss": 0.2772, "step": 6953 }, { "epoch": 0.5509209744503862, "grad_norm": 1.9614952545636384, "learning_rate": 8.838929265468627e-06, "loss": 0.325, "step": 6954 }, { "epoch": 0.5510001980590216, "grad_norm": 1.6247552178977018, "learning_rate": 8.836380626822339e-06, "loss": 0.2707, "step": 6955 }, { "epoch": 0.551079421667657, "grad_norm": 1.756983936814514, "learning_rate": 8.833832064794787e-06, "loss": 0.4087, "step": 6956 }, { "epoch": 0.5511586452762923, "grad_norm": 1.1659653018660858, "learning_rate": 8.831283579553781e-06, "loss": 0.1787, "step": 6957 }, { "epoch": 0.5512378688849278, "grad_norm": 1.344745218583525, "learning_rate": 8.828735171267131e-06, "loss": 0.2938, "step": 6958 }, { "epoch": 0.5513170924935631, "grad_norm": 1.1213942986982695, "learning_rate": 8.82618684010263e-06, "loss": 0.2263, "step": 6959 }, { "epoch": 0.5513963161021984, "grad_norm": 1.456644804639871, "learning_rate": 8.823638586228081e-06, "loss": 0.3442, "step": 6960 }, { "epoch": 0.5514755397108339, "grad_norm": 1.443779846323453, "learning_rate": 8.82109040981127e-06, "loss": 0.2701, "step": 6961 }, { "epoch": 0.5515547633194692, "grad_norm": 1.3833112925058488, "learning_rate": 8.818542311019982e-06, "loss": 0.2554, "step": 6962 }, { "epoch": 0.5516339869281046, "grad_norm": 1.4962457347240226, "learning_rate": 8.815994290022e-06, "loss": 0.325, "step": 6963 }, { "epoch": 0.5517132105367399, "grad_norm": 1.2654105139461969, "learning_rate": 8.813446346985095e-06, "loss": 0.1987, "step": 6964 }, { "epoch": 0.5517924341453754, "grad_norm": 1.662239387594392, "learning_rate": 8.810898482077038e-06, "loss": 0.3357, "step": 6965 }, { "epoch": 0.5518716577540107, "grad_norm": 1.3287944208256632, "learning_rate": 8.808350695465597e-06, "loss": 0.2602, "step": 6966 }, { "epoch": 0.551950881362646, "grad_norm": 1.4833147334650705, "learning_rate": 8.805802987318527e-06, "loss": 0.293, "step": 6967 }, { "epoch": 0.5520301049712815, "grad_norm": 1.161244069502438, "learning_rate": 8.803255357803584e-06, "loss": 0.2576, "step": 6968 }, { "epoch": 0.5521093285799168, "grad_norm": 1.4312068122365944, "learning_rate": 8.800707807088521e-06, "loss": 0.2397, "step": 6969 }, { "epoch": 0.5521885521885522, "grad_norm": 1.5833591166841852, "learning_rate": 8.798160335341078e-06, "loss": 0.3399, "step": 6970 }, { "epoch": 0.5522677757971876, "grad_norm": 1.6048424508706616, "learning_rate": 8.795612942728989e-06, "loss": 0.3027, "step": 6971 }, { "epoch": 0.5523469994058229, "grad_norm": 1.286220152449889, "learning_rate": 8.793065629419996e-06, "loss": 0.2849, "step": 6972 }, { "epoch": 0.5524262230144583, "grad_norm": 1.212823327888148, "learning_rate": 8.790518395581823e-06, "loss": 0.2762, "step": 6973 }, { "epoch": 0.5525054466230936, "grad_norm": 1.431174705529821, "learning_rate": 8.787971241382193e-06, "loss": 0.3096, "step": 6974 }, { "epoch": 0.5525846702317291, "grad_norm": 1.3257544534779628, "learning_rate": 8.785424166988827e-06, "loss": 0.25, "step": 6975 }, { "epoch": 0.5526638938403644, "grad_norm": 1.151711056263223, "learning_rate": 8.782877172569433e-06, "loss": 0.2076, "step": 6976 }, { "epoch": 0.5527431174489998, "grad_norm": 1.3300492256841259, "learning_rate": 8.78033025829172e-06, "loss": 0.2822, "step": 6977 }, { "epoch": 0.5528223410576352, "grad_norm": 1.3911495408390682, "learning_rate": 8.777783424323396e-06, "loss": 0.253, "step": 6978 }, { "epoch": 0.5529015646662705, "grad_norm": 1.121397823657544, "learning_rate": 8.775236670832146e-06, "loss": 0.2275, "step": 6979 }, { "epoch": 0.5529807882749059, "grad_norm": 1.4714846754801767, "learning_rate": 8.772689997985674e-06, "loss": 0.2749, "step": 6980 }, { "epoch": 0.5530600118835413, "grad_norm": 1.6130036809663622, "learning_rate": 8.770143405951657e-06, "loss": 0.3361, "step": 6981 }, { "epoch": 0.5531392354921767, "grad_norm": 1.1722354047997954, "learning_rate": 8.76759689489778e-06, "loss": 0.2264, "step": 6982 }, { "epoch": 0.553218459100812, "grad_norm": 1.2197168007108004, "learning_rate": 8.765050464991716e-06, "loss": 0.2576, "step": 6983 }, { "epoch": 0.5532976827094475, "grad_norm": 1.363851834758067, "learning_rate": 8.762504116401137e-06, "loss": 0.2786, "step": 6984 }, { "epoch": 0.5533769063180828, "grad_norm": 1.3580455369340627, "learning_rate": 8.759957849293707e-06, "loss": 0.2213, "step": 6985 }, { "epoch": 0.5534561299267181, "grad_norm": 1.6290460759122434, "learning_rate": 8.75741166383709e-06, "loss": 0.3579, "step": 6986 }, { "epoch": 0.5535353535353535, "grad_norm": 1.4451655439647537, "learning_rate": 8.754865560198932e-06, "loss": 0.2448, "step": 6987 }, { "epoch": 0.5536145771439889, "grad_norm": 1.5497196325318428, "learning_rate": 8.752319538546888e-06, "loss": 0.3126, "step": 6988 }, { "epoch": 0.5536938007526243, "grad_norm": 1.630666797480555, "learning_rate": 8.749773599048597e-06, "loss": 0.3639, "step": 6989 }, { "epoch": 0.5537730243612596, "grad_norm": 1.0604334692389863, "learning_rate": 8.747227741871698e-06, "loss": 0.1621, "step": 6990 }, { "epoch": 0.5538522479698951, "grad_norm": 1.381439156081259, "learning_rate": 8.744681967183826e-06, "loss": 0.2841, "step": 6991 }, { "epoch": 0.5539314715785304, "grad_norm": 1.5024721739927875, "learning_rate": 8.742136275152606e-06, "loss": 0.3204, "step": 6992 }, { "epoch": 0.5540106951871657, "grad_norm": 1.4006014004183762, "learning_rate": 8.73959066594566e-06, "loss": 0.2688, "step": 6993 }, { "epoch": 0.5540899187958012, "grad_norm": 1.3557176773863153, "learning_rate": 8.737045139730605e-06, "loss": 0.2556, "step": 6994 }, { "epoch": 0.5541691424044365, "grad_norm": 1.0664299028816309, "learning_rate": 8.734499696675048e-06, "loss": 0.2105, "step": 6995 }, { "epoch": 0.5542483660130719, "grad_norm": 1.2018334964232642, "learning_rate": 8.731954336946599e-06, "loss": 0.1992, "step": 6996 }, { "epoch": 0.5543275896217073, "grad_norm": 1.3688962400292068, "learning_rate": 8.729409060712855e-06, "loss": 0.3049, "step": 6997 }, { "epoch": 0.5544068132303427, "grad_norm": 1.4765175368660743, "learning_rate": 8.726863868141408e-06, "loss": 0.2436, "step": 6998 }, { "epoch": 0.554486036838978, "grad_norm": 1.627207577853215, "learning_rate": 8.724318759399853e-06, "loss": 0.3119, "step": 6999 }, { "epoch": 0.5545652604476133, "grad_norm": 1.3067980713195708, "learning_rate": 8.721773734655768e-06, "loss": 0.2407, "step": 7000 }, { "epoch": 0.5546444840562488, "grad_norm": 1.2555764831978828, "learning_rate": 8.719228794076733e-06, "loss": 0.1954, "step": 7001 }, { "epoch": 0.5547237076648841, "grad_norm": 1.2926151986598575, "learning_rate": 8.716683937830318e-06, "loss": 0.2673, "step": 7002 }, { "epoch": 0.5548029312735195, "grad_norm": 1.3538085249155307, "learning_rate": 8.71413916608409e-06, "loss": 0.2841, "step": 7003 }, { "epoch": 0.5548821548821549, "grad_norm": 1.2621199109801466, "learning_rate": 8.711594479005614e-06, "loss": 0.2291, "step": 7004 }, { "epoch": 0.5549613784907903, "grad_norm": 1.1922708851372084, "learning_rate": 8.709049876762438e-06, "loss": 0.1892, "step": 7005 }, { "epoch": 0.5550406020994256, "grad_norm": 1.575841138124968, "learning_rate": 8.706505359522119e-06, "loss": 0.3524, "step": 7006 }, { "epoch": 0.555119825708061, "grad_norm": 1.6571406227612384, "learning_rate": 8.703960927452197e-06, "loss": 0.2215, "step": 7007 }, { "epoch": 0.5551990493166964, "grad_norm": 1.6755694622196637, "learning_rate": 8.701416580720212e-06, "loss": 0.3882, "step": 7008 }, { "epoch": 0.5552782729253317, "grad_norm": 1.5195066673599789, "learning_rate": 8.698872319493698e-06, "loss": 0.3143, "step": 7009 }, { "epoch": 0.5553574965339672, "grad_norm": 1.2318127103165126, "learning_rate": 8.69632814394018e-06, "loss": 0.2163, "step": 7010 }, { "epoch": 0.5554367201426025, "grad_norm": 1.1943752928933378, "learning_rate": 8.693784054227179e-06, "loss": 0.2625, "step": 7011 }, { "epoch": 0.5555159437512379, "grad_norm": 2.2911132376557894, "learning_rate": 8.691240050522215e-06, "loss": 0.4775, "step": 7012 }, { "epoch": 0.5555951673598732, "grad_norm": 1.496897603650919, "learning_rate": 8.688696132992797e-06, "loss": 0.2984, "step": 7013 }, { "epoch": 0.5556743909685086, "grad_norm": 1.4058859323284971, "learning_rate": 8.686152301806427e-06, "loss": 0.3652, "step": 7014 }, { "epoch": 0.555753614577144, "grad_norm": 1.2391362714854297, "learning_rate": 8.683608557130608e-06, "loss": 0.2354, "step": 7015 }, { "epoch": 0.5558328381857793, "grad_norm": 1.494118678644517, "learning_rate": 8.681064899132831e-06, "loss": 0.3046, "step": 7016 }, { "epoch": 0.5559120617944148, "grad_norm": 0.9862226319404613, "learning_rate": 8.678521327980585e-06, "loss": 0.2155, "step": 7017 }, { "epoch": 0.5559912854030501, "grad_norm": 1.1660430842016558, "learning_rate": 8.675977843841347e-06, "loss": 0.2464, "step": 7018 }, { "epoch": 0.5560705090116855, "grad_norm": 1.4253487223919135, "learning_rate": 8.673434446882601e-06, "loss": 0.2247, "step": 7019 }, { "epoch": 0.5561497326203209, "grad_norm": 1.2574712667766956, "learning_rate": 8.670891137271814e-06, "loss": 0.2092, "step": 7020 }, { "epoch": 0.5562289562289562, "grad_norm": 1.339563100725818, "learning_rate": 8.668347915176448e-06, "loss": 0.2391, "step": 7021 }, { "epoch": 0.5563081798375916, "grad_norm": 1.466115126696012, "learning_rate": 8.665804780763963e-06, "loss": 0.3085, "step": 7022 }, { "epoch": 0.556387403446227, "grad_norm": 1.128684611561647, "learning_rate": 8.663261734201818e-06, "loss": 0.2409, "step": 7023 }, { "epoch": 0.5564666270548624, "grad_norm": 1.3268103481091216, "learning_rate": 8.660718775657453e-06, "loss": 0.2964, "step": 7024 }, { "epoch": 0.5565458506634977, "grad_norm": 1.384930534251322, "learning_rate": 8.658175905298314e-06, "loss": 0.3002, "step": 7025 }, { "epoch": 0.5566250742721331, "grad_norm": 1.2477565063461078, "learning_rate": 8.655633123291833e-06, "loss": 0.2268, "step": 7026 }, { "epoch": 0.5567042978807685, "grad_norm": 1.317272146662216, "learning_rate": 8.653090429805442e-06, "loss": 0.2726, "step": 7027 }, { "epoch": 0.5567835214894038, "grad_norm": 1.1613292359285394, "learning_rate": 8.650547825006568e-06, "loss": 0.2131, "step": 7028 }, { "epoch": 0.5568627450980392, "grad_norm": 1.2324465875388209, "learning_rate": 8.648005309062623e-06, "loss": 0.2526, "step": 7029 }, { "epoch": 0.5569419687066746, "grad_norm": 1.1495201235057504, "learning_rate": 8.645462882141026e-06, "loss": 0.2579, "step": 7030 }, { "epoch": 0.55702119231531, "grad_norm": 1.3929608438990937, "learning_rate": 8.64292054440918e-06, "loss": 0.2303, "step": 7031 }, { "epoch": 0.5571004159239453, "grad_norm": 1.2622642989531188, "learning_rate": 8.640378296034486e-06, "loss": 0.1915, "step": 7032 }, { "epoch": 0.5571796395325808, "grad_norm": 1.5058902247650723, "learning_rate": 8.63783613718434e-06, "loss": 0.3413, "step": 7033 }, { "epoch": 0.5572588631412161, "grad_norm": 1.6088861569034487, "learning_rate": 8.63529406802613e-06, "loss": 0.2887, "step": 7034 }, { "epoch": 0.5573380867498514, "grad_norm": 1.5210899799877822, "learning_rate": 8.632752088727237e-06, "loss": 0.2557, "step": 7035 }, { "epoch": 0.5574173103584869, "grad_norm": 1.1025287746717471, "learning_rate": 8.63021019945504e-06, "loss": 0.2294, "step": 7036 }, { "epoch": 0.5574965339671222, "grad_norm": 1.3529702188346702, "learning_rate": 8.627668400376914e-06, "loss": 0.2395, "step": 7037 }, { "epoch": 0.5575757575757576, "grad_norm": 1.5041655223487767, "learning_rate": 8.625126691660216e-06, "loss": 0.3156, "step": 7038 }, { "epoch": 0.5576549811843929, "grad_norm": 1.502161549165598, "learning_rate": 8.622585073472314e-06, "loss": 0.2776, "step": 7039 }, { "epoch": 0.5577342047930284, "grad_norm": 1.578697230875441, "learning_rate": 8.620043545980554e-06, "loss": 0.3452, "step": 7040 }, { "epoch": 0.5578134284016637, "grad_norm": 1.5495976089780932, "learning_rate": 8.61750210935229e-06, "loss": 0.2586, "step": 7041 }, { "epoch": 0.557892652010299, "grad_norm": 1.5769415575724814, "learning_rate": 8.614960763754857e-06, "loss": 0.3021, "step": 7042 }, { "epoch": 0.5579718756189345, "grad_norm": 1.1811405929681862, "learning_rate": 8.612419509355593e-06, "loss": 0.2002, "step": 7043 }, { "epoch": 0.5580510992275698, "grad_norm": 1.3489136868126388, "learning_rate": 8.60987834632183e-06, "loss": 0.2774, "step": 7044 }, { "epoch": 0.5581303228362052, "grad_norm": 1.5170222762852672, "learning_rate": 8.607337274820888e-06, "loss": 0.3278, "step": 7045 }, { "epoch": 0.5582095464448406, "grad_norm": 1.3484185129395854, "learning_rate": 8.604796295020085e-06, "loss": 0.3032, "step": 7046 }, { "epoch": 0.558288770053476, "grad_norm": 1.492528151391354, "learning_rate": 8.602255407086736e-06, "loss": 0.2857, "step": 7047 }, { "epoch": 0.5583679936621113, "grad_norm": 1.5481470859715287, "learning_rate": 8.599714611188141e-06, "loss": 0.3363, "step": 7048 }, { "epoch": 0.5584472172707466, "grad_norm": 1.1986021562974838, "learning_rate": 8.5971739074916e-06, "loss": 0.215, "step": 7049 }, { "epoch": 0.5585264408793821, "grad_norm": 1.310784351988188, "learning_rate": 8.594633296164409e-06, "loss": 0.2689, "step": 7050 }, { "epoch": 0.5586056644880174, "grad_norm": 1.3452235131316717, "learning_rate": 8.59209277737385e-06, "loss": 0.2468, "step": 7051 }, { "epoch": 0.5586848880966528, "grad_norm": 1.4714512244632645, "learning_rate": 8.58955235128721e-06, "loss": 0.33, "step": 7052 }, { "epoch": 0.5587641117052882, "grad_norm": 1.5061138538914511, "learning_rate": 8.58701201807176e-06, "loss": 0.3156, "step": 7053 }, { "epoch": 0.5588433353139235, "grad_norm": 1.2522530177300015, "learning_rate": 8.584471777894768e-06, "loss": 0.1907, "step": 7054 }, { "epoch": 0.5589225589225589, "grad_norm": 1.3235660810518188, "learning_rate": 8.581931630923499e-06, "loss": 0.2377, "step": 7055 }, { "epoch": 0.5590017825311943, "grad_norm": 1.4326187422574033, "learning_rate": 8.57939157732521e-06, "loss": 0.2429, "step": 7056 }, { "epoch": 0.5590810061398297, "grad_norm": 2.109050067093894, "learning_rate": 8.576851617267151e-06, "loss": 0.2673, "step": 7057 }, { "epoch": 0.559160229748465, "grad_norm": 1.5605114478778377, "learning_rate": 8.574311750916565e-06, "loss": 0.2436, "step": 7058 }, { "epoch": 0.5592394533571005, "grad_norm": 1.584420623030401, "learning_rate": 8.571771978440689e-06, "loss": 0.3708, "step": 7059 }, { "epoch": 0.5593186769657358, "grad_norm": 1.2531242869135015, "learning_rate": 8.569232300006756e-06, "loss": 0.2125, "step": 7060 }, { "epoch": 0.5593979005743711, "grad_norm": 1.3675348283572362, "learning_rate": 8.566692715781992e-06, "loss": 0.2191, "step": 7061 }, { "epoch": 0.5594771241830065, "grad_norm": 1.4878703623488974, "learning_rate": 8.564153225933616e-06, "loss": 0.3401, "step": 7062 }, { "epoch": 0.5595563477916419, "grad_norm": 1.2066271452322694, "learning_rate": 8.56161383062884e-06, "loss": 0.2523, "step": 7063 }, { "epoch": 0.5596355714002773, "grad_norm": 1.1984780159012218, "learning_rate": 8.559074530034875e-06, "loss": 0.2059, "step": 7064 }, { "epoch": 0.5597147950089126, "grad_norm": 1.1447762422016563, "learning_rate": 8.556535324318916e-06, "loss": 0.1965, "step": 7065 }, { "epoch": 0.5597940186175481, "grad_norm": 1.3860454828574094, "learning_rate": 8.553996213648164e-06, "loss": 0.2709, "step": 7066 }, { "epoch": 0.5598732422261834, "grad_norm": 1.3432315884063428, "learning_rate": 8.551457198189799e-06, "loss": 0.2254, "step": 7067 }, { "epoch": 0.5599524658348187, "grad_norm": 1.0012160317613514, "learning_rate": 8.54891827811101e-06, "loss": 0.132, "step": 7068 }, { "epoch": 0.5600316894434542, "grad_norm": 2.055167057385489, "learning_rate": 8.546379453578972e-06, "loss": 0.2837, "step": 7069 }, { "epoch": 0.5601109130520895, "grad_norm": 1.7372481377277484, "learning_rate": 8.543840724760848e-06, "loss": 0.3502, "step": 7070 }, { "epoch": 0.5601901366607249, "grad_norm": 1.292778420881115, "learning_rate": 8.541302091823809e-06, "loss": 0.2589, "step": 7071 }, { "epoch": 0.5602693602693603, "grad_norm": 1.380709257864959, "learning_rate": 8.538763554935008e-06, "loss": 0.2285, "step": 7072 }, { "epoch": 0.5603485838779957, "grad_norm": 1.1594019575687378, "learning_rate": 8.536225114261597e-06, "loss": 0.2189, "step": 7073 }, { "epoch": 0.560427807486631, "grad_norm": 1.530342225639762, "learning_rate": 8.533686769970717e-06, "loss": 0.3002, "step": 7074 }, { "epoch": 0.5605070310952663, "grad_norm": 1.269648241788725, "learning_rate": 8.531148522229509e-06, "loss": 0.1908, "step": 7075 }, { "epoch": 0.5605862547039018, "grad_norm": 1.4729501156002238, "learning_rate": 8.528610371205102e-06, "loss": 0.2881, "step": 7076 }, { "epoch": 0.5606654783125371, "grad_norm": 1.719618071760003, "learning_rate": 8.526072317064623e-06, "loss": 0.3844, "step": 7077 }, { "epoch": 0.5607447019211725, "grad_norm": 1.3779970254440097, "learning_rate": 8.52353435997519e-06, "loss": 0.3204, "step": 7078 }, { "epoch": 0.5608239255298079, "grad_norm": 1.2054239120729657, "learning_rate": 8.520996500103915e-06, "loss": 0.2207, "step": 7079 }, { "epoch": 0.5609031491384433, "grad_norm": 1.671092167969114, "learning_rate": 8.518458737617903e-06, "loss": 0.3179, "step": 7080 }, { "epoch": 0.5609823727470786, "grad_norm": 1.3908866856142577, "learning_rate": 8.515921072684255e-06, "loss": 0.2189, "step": 7081 }, { "epoch": 0.561061596355714, "grad_norm": 1.236534212257595, "learning_rate": 8.513383505470065e-06, "loss": 0.2744, "step": 7082 }, { "epoch": 0.5611408199643494, "grad_norm": 1.4982742546327068, "learning_rate": 8.510846036142415e-06, "loss": 0.3714, "step": 7083 }, { "epoch": 0.5612200435729847, "grad_norm": 1.1582012330895561, "learning_rate": 8.50830866486839e-06, "loss": 0.2065, "step": 7084 }, { "epoch": 0.5612992671816202, "grad_norm": 1.4457075553700014, "learning_rate": 8.505771391815061e-06, "loss": 0.3379, "step": 7085 }, { "epoch": 0.5613784907902555, "grad_norm": 1.4555233570329484, "learning_rate": 8.503234217149496e-06, "loss": 0.2955, "step": 7086 }, { "epoch": 0.5614577143988909, "grad_norm": 1.3949290716945484, "learning_rate": 8.500697141038758e-06, "loss": 0.2615, "step": 7087 }, { "epoch": 0.5615369380075262, "grad_norm": 1.1355941681619464, "learning_rate": 8.498160163649896e-06, "loss": 0.2227, "step": 7088 }, { "epoch": 0.5616161616161616, "grad_norm": 1.2371204767851705, "learning_rate": 8.495623285149962e-06, "loss": 0.2551, "step": 7089 }, { "epoch": 0.561695385224797, "grad_norm": 1.187872217152119, "learning_rate": 8.493086505705998e-06, "loss": 0.2334, "step": 7090 }, { "epoch": 0.5617746088334323, "grad_norm": 1.441722024434417, "learning_rate": 8.490549825485036e-06, "loss": 0.3307, "step": 7091 }, { "epoch": 0.5618538324420678, "grad_norm": 1.4849448881758496, "learning_rate": 8.488013244654103e-06, "loss": 0.2316, "step": 7092 }, { "epoch": 0.5619330560507031, "grad_norm": 1.4975482818674597, "learning_rate": 8.485476763380224e-06, "loss": 0.3159, "step": 7093 }, { "epoch": 0.5620122796593385, "grad_norm": 1.2180345404673107, "learning_rate": 8.482940381830412e-06, "loss": 0.2262, "step": 7094 }, { "epoch": 0.5620915032679739, "grad_norm": 1.555724455160656, "learning_rate": 8.480404100171677e-06, "loss": 0.3019, "step": 7095 }, { "epoch": 0.5621707268766092, "grad_norm": 1.3002829453316416, "learning_rate": 8.47786791857102e-06, "loss": 0.2415, "step": 7096 }, { "epoch": 0.5622499504852446, "grad_norm": 1.4261023194091624, "learning_rate": 8.475331837195435e-06, "loss": 0.2855, "step": 7097 }, { "epoch": 0.56232917409388, "grad_norm": 1.9370986011708122, "learning_rate": 8.472795856211916e-06, "loss": 0.3187, "step": 7098 }, { "epoch": 0.5624083977025154, "grad_norm": 1.3045949246779922, "learning_rate": 8.470259975787438e-06, "loss": 0.2766, "step": 7099 }, { "epoch": 0.5624876213111507, "grad_norm": 1.4013713986669258, "learning_rate": 8.46772419608898e-06, "loss": 0.2362, "step": 7100 }, { "epoch": 0.5625668449197861, "grad_norm": 1.5170180070855386, "learning_rate": 8.465188517283514e-06, "loss": 0.2804, "step": 7101 }, { "epoch": 0.5626460685284215, "grad_norm": 1.4147934229144523, "learning_rate": 8.462652939537996e-06, "loss": 0.2835, "step": 7102 }, { "epoch": 0.5627252921370568, "grad_norm": 1.4312492575172449, "learning_rate": 8.460117463019387e-06, "loss": 0.2496, "step": 7103 }, { "epoch": 0.5628045157456922, "grad_norm": 1.2889102522545566, "learning_rate": 8.457582087894631e-06, "loss": 0.2368, "step": 7104 }, { "epoch": 0.5628837393543276, "grad_norm": 1.4258407587054147, "learning_rate": 8.455046814330674e-06, "loss": 0.336, "step": 7105 }, { "epoch": 0.562962962962963, "grad_norm": 1.3449280204438927, "learning_rate": 8.452511642494453e-06, "loss": 0.2886, "step": 7106 }, { "epoch": 0.5630421865715983, "grad_norm": 1.4418812595000428, "learning_rate": 8.449976572552891e-06, "loss": 0.3594, "step": 7107 }, { "epoch": 0.5631214101802338, "grad_norm": 1.1109709699757517, "learning_rate": 8.447441604672913e-06, "loss": 0.1827, "step": 7108 }, { "epoch": 0.5632006337888691, "grad_norm": 1.1345466575351015, "learning_rate": 8.444906739021438e-06, "loss": 0.242, "step": 7109 }, { "epoch": 0.5632798573975044, "grad_norm": 1.5383389449091234, "learning_rate": 8.442371975765368e-06, "loss": 0.2311, "step": 7110 }, { "epoch": 0.5633590810061399, "grad_norm": 1.2962654047524944, "learning_rate": 8.439837315071612e-06, "loss": 0.2936, "step": 7111 }, { "epoch": 0.5634383046147752, "grad_norm": 1.2769144612709133, "learning_rate": 8.43730275710706e-06, "loss": 0.3124, "step": 7112 }, { "epoch": 0.5635175282234106, "grad_norm": 1.2440169016945453, "learning_rate": 8.434768302038602e-06, "loss": 0.1866, "step": 7113 }, { "epoch": 0.5635967518320459, "grad_norm": 1.319345246272552, "learning_rate": 8.432233950033122e-06, "loss": 0.2753, "step": 7114 }, { "epoch": 0.5636759754406814, "grad_norm": 1.6019990992716826, "learning_rate": 8.42969970125749e-06, "loss": 0.3592, "step": 7115 }, { "epoch": 0.5637551990493167, "grad_norm": 1.2272841558527292, "learning_rate": 8.427165555878577e-06, "loss": 0.198, "step": 7116 }, { "epoch": 0.563834422657952, "grad_norm": 1.3109970490466731, "learning_rate": 8.424631514063247e-06, "loss": 0.2084, "step": 7117 }, { "epoch": 0.5639136462665875, "grad_norm": 1.482911107116727, "learning_rate": 8.422097575978349e-06, "loss": 0.3294, "step": 7118 }, { "epoch": 0.5639928698752228, "grad_norm": 1.2879923661507742, "learning_rate": 8.419563741790735e-06, "loss": 0.1595, "step": 7119 }, { "epoch": 0.5640720934838582, "grad_norm": 1.302712408204975, "learning_rate": 8.417030011667241e-06, "loss": 0.2373, "step": 7120 }, { "epoch": 0.5641513170924936, "grad_norm": 1.776325427910239, "learning_rate": 8.414496385774706e-06, "loss": 0.3503, "step": 7121 }, { "epoch": 0.564230540701129, "grad_norm": 1.1671649423199169, "learning_rate": 8.411962864279957e-06, "loss": 0.1674, "step": 7122 }, { "epoch": 0.5643097643097643, "grad_norm": 1.246239001050154, "learning_rate": 8.409429447349811e-06, "loss": 0.2581, "step": 7123 }, { "epoch": 0.5643889879183996, "grad_norm": 1.4502591271514103, "learning_rate": 8.406896135151081e-06, "loss": 0.2861, "step": 7124 }, { "epoch": 0.5644682115270351, "grad_norm": 1.3177987693624105, "learning_rate": 8.40436292785058e-06, "loss": 0.2516, "step": 7125 }, { "epoch": 0.5645474351356704, "grad_norm": 1.4517774883527377, "learning_rate": 8.401829825615098e-06, "loss": 0.305, "step": 7126 }, { "epoch": 0.5646266587443058, "grad_norm": 1.3020506327010801, "learning_rate": 8.399296828611433e-06, "loss": 0.2396, "step": 7127 }, { "epoch": 0.5647058823529412, "grad_norm": 1.147628002492846, "learning_rate": 8.396763937006369e-06, "loss": 0.2475, "step": 7128 }, { "epoch": 0.5647851059615765, "grad_norm": 1.4342675699159348, "learning_rate": 8.394231150966685e-06, "loss": 0.3366, "step": 7129 }, { "epoch": 0.5648643295702119, "grad_norm": 1.410060897074877, "learning_rate": 8.391698470659154e-06, "loss": 0.2977, "step": 7130 }, { "epoch": 0.5649435531788473, "grad_norm": 1.2769640257127037, "learning_rate": 8.38916589625054e-06, "loss": 0.2648, "step": 7131 }, { "epoch": 0.5650227767874827, "grad_norm": 1.4588174200269972, "learning_rate": 8.3866334279076e-06, "loss": 0.3581, "step": 7132 }, { "epoch": 0.565102000396118, "grad_norm": 1.5954168370806279, "learning_rate": 8.384101065797087e-06, "loss": 0.3426, "step": 7133 }, { "epoch": 0.5651812240047535, "grad_norm": 1.321265250035742, "learning_rate": 8.381568810085745e-06, "loss": 0.192, "step": 7134 }, { "epoch": 0.5652604476133888, "grad_norm": 1.5972391729437088, "learning_rate": 8.379036660940306e-06, "loss": 0.3712, "step": 7135 }, { "epoch": 0.5653396712220241, "grad_norm": 1.2563572724512664, "learning_rate": 8.376504618527505e-06, "loss": 0.2066, "step": 7136 }, { "epoch": 0.5654188948306595, "grad_norm": 1.4855726786431513, "learning_rate": 8.373972683014063e-06, "loss": 0.3124, "step": 7137 }, { "epoch": 0.5654981184392949, "grad_norm": 1.3577756788371746, "learning_rate": 8.371440854566696e-06, "loss": 0.2428, "step": 7138 }, { "epoch": 0.5655773420479303, "grad_norm": 1.2510886976573299, "learning_rate": 8.368909133352114e-06, "loss": 0.2725, "step": 7139 }, { "epoch": 0.5656565656565656, "grad_norm": 1.4216449113950804, "learning_rate": 8.366377519537015e-06, "loss": 0.2716, "step": 7140 }, { "epoch": 0.5657357892652011, "grad_norm": 1.4204817175435025, "learning_rate": 8.363846013288096e-06, "loss": 0.2205, "step": 7141 }, { "epoch": 0.5658150128738364, "grad_norm": 1.453708105307023, "learning_rate": 8.361314614772047e-06, "loss": 0.3083, "step": 7142 }, { "epoch": 0.5658942364824717, "grad_norm": 1.4690151780580532, "learning_rate": 8.358783324155542e-06, "loss": 0.2427, "step": 7143 }, { "epoch": 0.5659734600911072, "grad_norm": 1.6338805263042093, "learning_rate": 8.35625214160526e-06, "loss": 0.2832, "step": 7144 }, { "epoch": 0.5660526836997425, "grad_norm": 1.6266711394477922, "learning_rate": 8.353721067287865e-06, "loss": 0.244, "step": 7145 }, { "epoch": 0.5661319073083779, "grad_norm": 1.2933271179787933, "learning_rate": 8.351190101370016e-06, "loss": 0.2426, "step": 7146 }, { "epoch": 0.5662111309170133, "grad_norm": 1.414727881963241, "learning_rate": 8.348659244018367e-06, "loss": 0.2928, "step": 7147 }, { "epoch": 0.5662903545256487, "grad_norm": 1.71388820430358, "learning_rate": 8.34612849539956e-06, "loss": 0.3315, "step": 7148 }, { "epoch": 0.566369578134284, "grad_norm": 1.537900409905728, "learning_rate": 8.343597855680231e-06, "loss": 0.2701, "step": 7149 }, { "epoch": 0.5664488017429193, "grad_norm": 1.1662149283403267, "learning_rate": 8.341067325027017e-06, "loss": 0.2017, "step": 7150 }, { "epoch": 0.5665280253515548, "grad_norm": 1.293059542700422, "learning_rate": 8.338536903606535e-06, "loss": 0.224, "step": 7151 }, { "epoch": 0.5666072489601901, "grad_norm": 1.1617754791487092, "learning_rate": 8.336006591585406e-06, "loss": 0.1951, "step": 7152 }, { "epoch": 0.5666864725688255, "grad_norm": 1.2810911116043564, "learning_rate": 8.333476389130234e-06, "loss": 0.2979, "step": 7153 }, { "epoch": 0.5667656961774609, "grad_norm": 1.1212942231610585, "learning_rate": 8.330946296407622e-06, "loss": 0.2234, "step": 7154 }, { "epoch": 0.5668449197860963, "grad_norm": 1.063026537010679, "learning_rate": 8.328416313584169e-06, "loss": 0.1765, "step": 7155 }, { "epoch": 0.5669241433947316, "grad_norm": 1.3412520990949484, "learning_rate": 8.325886440826457e-06, "loss": 0.218, "step": 7156 }, { "epoch": 0.567003367003367, "grad_norm": 1.3402338232670137, "learning_rate": 8.323356678301067e-06, "loss": 0.2222, "step": 7157 }, { "epoch": 0.5670825906120024, "grad_norm": 1.6127387694985609, "learning_rate": 8.320827026174572e-06, "loss": 0.2746, "step": 7158 }, { "epoch": 0.5671618142206377, "grad_norm": 1.5460391460996852, "learning_rate": 8.318297484613538e-06, "loss": 0.2691, "step": 7159 }, { "epoch": 0.5672410378292732, "grad_norm": 1.418653938323995, "learning_rate": 8.315768053784524e-06, "loss": 0.2663, "step": 7160 }, { "epoch": 0.5673202614379085, "grad_norm": 1.444854532476563, "learning_rate": 8.313238733854076e-06, "loss": 0.326, "step": 7161 }, { "epoch": 0.5673994850465439, "grad_norm": 1.325639453001486, "learning_rate": 8.310709524988743e-06, "loss": 0.2246, "step": 7162 }, { "epoch": 0.5674787086551792, "grad_norm": 1.822703768895834, "learning_rate": 8.308180427355062e-06, "loss": 0.4237, "step": 7163 }, { "epoch": 0.5675579322638146, "grad_norm": 1.4074230301700068, "learning_rate": 8.305651441119558e-06, "loss": 0.2216, "step": 7164 }, { "epoch": 0.56763715587245, "grad_norm": 1.2876411145605942, "learning_rate": 8.303122566448754e-06, "loss": 0.2105, "step": 7165 }, { "epoch": 0.5677163794810853, "grad_norm": 1.4467519075522708, "learning_rate": 8.300593803509163e-06, "loss": 0.2845, "step": 7166 }, { "epoch": 0.5677956030897208, "grad_norm": 1.6318526228488663, "learning_rate": 8.298065152467293e-06, "loss": 0.3196, "step": 7167 }, { "epoch": 0.5678748266983561, "grad_norm": 1.2155098910124864, "learning_rate": 8.295536613489645e-06, "loss": 0.231, "step": 7168 }, { "epoch": 0.5679540503069915, "grad_norm": 1.241556015195051, "learning_rate": 8.293008186742708e-06, "loss": 0.2237, "step": 7169 }, { "epoch": 0.5680332739156269, "grad_norm": 1.196469630551233, "learning_rate": 8.290479872392969e-06, "loss": 0.2088, "step": 7170 }, { "epoch": 0.5681124975242622, "grad_norm": 1.4211634554594965, "learning_rate": 8.287951670606905e-06, "loss": 0.2663, "step": 7171 }, { "epoch": 0.5681917211328976, "grad_norm": 1.464335679270111, "learning_rate": 8.285423581550985e-06, "loss": 0.2598, "step": 7172 }, { "epoch": 0.568270944741533, "grad_norm": 1.1856924588774251, "learning_rate": 8.282895605391674e-06, "loss": 0.2235, "step": 7173 }, { "epoch": 0.5683501683501684, "grad_norm": 1.297086944415625, "learning_rate": 8.280367742295424e-06, "loss": 0.2838, "step": 7174 }, { "epoch": 0.5684293919588037, "grad_norm": 1.6851109871436516, "learning_rate": 8.277839992428683e-06, "loss": 0.3499, "step": 7175 }, { "epoch": 0.5685086155674391, "grad_norm": 1.326205667278145, "learning_rate": 8.275312355957893e-06, "loss": 0.2485, "step": 7176 }, { "epoch": 0.5685878391760745, "grad_norm": 1.5782902163343802, "learning_rate": 8.272784833049485e-06, "loss": 0.2873, "step": 7177 }, { "epoch": 0.5686670627847098, "grad_norm": 1.4622161714162363, "learning_rate": 8.270257423869885e-06, "loss": 0.2728, "step": 7178 }, { "epoch": 0.5687462863933452, "grad_norm": 1.247821175670185, "learning_rate": 8.267730128585511e-06, "loss": 0.2187, "step": 7179 }, { "epoch": 0.5688255100019806, "grad_norm": 1.5908534638055083, "learning_rate": 8.265202947362772e-06, "loss": 0.3639, "step": 7180 }, { "epoch": 0.568904733610616, "grad_norm": 1.4814417374129667, "learning_rate": 8.262675880368074e-06, "loss": 0.2928, "step": 7181 }, { "epoch": 0.5689839572192513, "grad_norm": 1.4497222458343686, "learning_rate": 8.260148927767807e-06, "loss": 0.3371, "step": 7182 }, { "epoch": 0.5690631808278868, "grad_norm": 1.559701533795049, "learning_rate": 8.257622089728362e-06, "loss": 0.3846, "step": 7183 }, { "epoch": 0.5691424044365221, "grad_norm": 1.6780123835529779, "learning_rate": 8.255095366416122e-06, "loss": 0.3641, "step": 7184 }, { "epoch": 0.5692216280451574, "grad_norm": 1.4272228181886968, "learning_rate": 8.25256875799745e-06, "loss": 0.2371, "step": 7185 }, { "epoch": 0.5693008516537928, "grad_norm": 1.3055538620491638, "learning_rate": 8.250042264638721e-06, "loss": 0.2882, "step": 7186 }, { "epoch": 0.5693800752624282, "grad_norm": 1.3411328009937957, "learning_rate": 8.24751588650629e-06, "loss": 0.3031, "step": 7187 }, { "epoch": 0.5694592988710636, "grad_norm": 1.208254297411466, "learning_rate": 8.244989623766502e-06, "loss": 0.2666, "step": 7188 }, { "epoch": 0.5695385224796989, "grad_norm": 1.6953329772989298, "learning_rate": 8.242463476585707e-06, "loss": 0.2908, "step": 7189 }, { "epoch": 0.5696177460883344, "grad_norm": 1.2408710405995895, "learning_rate": 8.239937445130232e-06, "loss": 0.2431, "step": 7190 }, { "epoch": 0.5696969696969697, "grad_norm": 1.3685469666742551, "learning_rate": 8.237411529566407e-06, "loss": 0.269, "step": 7191 }, { "epoch": 0.569776193305605, "grad_norm": 1.4202059129376712, "learning_rate": 8.234885730060554e-06, "loss": 0.2933, "step": 7192 }, { "epoch": 0.5698554169142405, "grad_norm": 1.2637095296332983, "learning_rate": 8.232360046778982e-06, "loss": 0.244, "step": 7193 }, { "epoch": 0.5699346405228758, "grad_norm": 1.3178977720511544, "learning_rate": 8.229834479887992e-06, "loss": 0.2497, "step": 7194 }, { "epoch": 0.5700138641315112, "grad_norm": 0.99659848784542, "learning_rate": 8.227309029553889e-06, "loss": 0.146, "step": 7195 }, { "epoch": 0.5700930877401466, "grad_norm": 1.443904702111047, "learning_rate": 8.224783695942954e-06, "loss": 0.2321, "step": 7196 }, { "epoch": 0.570172311348782, "grad_norm": 1.2633576251709218, "learning_rate": 8.222258479221473e-06, "loss": 0.2736, "step": 7197 }, { "epoch": 0.5702515349574173, "grad_norm": 1.4130574257940338, "learning_rate": 8.219733379555715e-06, "loss": 0.2393, "step": 7198 }, { "epoch": 0.5703307585660526, "grad_norm": 1.3170891729454701, "learning_rate": 8.217208397111948e-06, "loss": 0.2423, "step": 7199 }, { "epoch": 0.5704099821746881, "grad_norm": 1.4416706987205348, "learning_rate": 8.21468353205643e-06, "loss": 0.3085, "step": 7200 }, { "epoch": 0.5704892057833234, "grad_norm": 1.4266503880870656, "learning_rate": 8.212158784555412e-06, "loss": 0.3428, "step": 7201 }, { "epoch": 0.5705684293919588, "grad_norm": 1.3676641336902051, "learning_rate": 8.209634154775134e-06, "loss": 0.2048, "step": 7202 }, { "epoch": 0.5706476530005942, "grad_norm": 1.2522747428937528, "learning_rate": 8.207109642881836e-06, "loss": 0.2733, "step": 7203 }, { "epoch": 0.5707268766092296, "grad_norm": 1.5419887458613233, "learning_rate": 8.20458524904174e-06, "loss": 0.3115, "step": 7204 }, { "epoch": 0.5708061002178649, "grad_norm": 1.3122436431156401, "learning_rate": 8.202060973421064e-06, "loss": 0.242, "step": 7205 }, { "epoch": 0.5708853238265003, "grad_norm": 2.0007015289307954, "learning_rate": 8.199536816186025e-06, "loss": 0.2695, "step": 7206 }, { "epoch": 0.5709645474351357, "grad_norm": 1.6146837054048737, "learning_rate": 8.197012777502819e-06, "loss": 0.3655, "step": 7207 }, { "epoch": 0.571043771043771, "grad_norm": 1.4620019476941544, "learning_rate": 8.194488857537646e-06, "loss": 0.2243, "step": 7208 }, { "epoch": 0.5711229946524065, "grad_norm": 1.565684945572946, "learning_rate": 8.191965056456699e-06, "loss": 0.2865, "step": 7209 }, { "epoch": 0.5712022182610418, "grad_norm": 1.5097716303231314, "learning_rate": 8.18944137442615e-06, "loss": 0.3214, "step": 7210 }, { "epoch": 0.5712814418696771, "grad_norm": 1.6280593313768643, "learning_rate": 8.186917811612173e-06, "loss": 0.3614, "step": 7211 }, { "epoch": 0.5713606654783125, "grad_norm": 1.5799601981222253, "learning_rate": 8.184394368180937e-06, "loss": 0.2193, "step": 7212 }, { "epoch": 0.5714398890869479, "grad_norm": 1.2025324196971896, "learning_rate": 8.181871044298594e-06, "loss": 0.2427, "step": 7213 }, { "epoch": 0.5715191126955833, "grad_norm": 1.1725811005798168, "learning_rate": 8.179347840131297e-06, "loss": 0.1896, "step": 7214 }, { "epoch": 0.5715983363042186, "grad_norm": 1.3815789900598912, "learning_rate": 8.176824755845183e-06, "loss": 0.3027, "step": 7215 }, { "epoch": 0.5716775599128541, "grad_norm": 1.4111195877702958, "learning_rate": 8.174301791606384e-06, "loss": 0.2699, "step": 7216 }, { "epoch": 0.5717567835214894, "grad_norm": 1.4074945015660592, "learning_rate": 8.171778947581032e-06, "loss": 0.2813, "step": 7217 }, { "epoch": 0.5718360071301247, "grad_norm": 1.3231085750695826, "learning_rate": 8.169256223935236e-06, "loss": 0.2581, "step": 7218 }, { "epoch": 0.5719152307387602, "grad_norm": 1.3957289223898761, "learning_rate": 8.166733620835107e-06, "loss": 0.3323, "step": 7219 }, { "epoch": 0.5719944543473955, "grad_norm": 1.2571696708555646, "learning_rate": 8.164211138446753e-06, "loss": 0.3602, "step": 7220 }, { "epoch": 0.5720736779560309, "grad_norm": 1.4226328827209267, "learning_rate": 8.161688776936259e-06, "loss": 0.2949, "step": 7221 }, { "epoch": 0.5721529015646662, "grad_norm": 1.4221200719212066, "learning_rate": 8.159166536469717e-06, "loss": 0.2628, "step": 7222 }, { "epoch": 0.5722321251733017, "grad_norm": 1.530727459920596, "learning_rate": 8.156644417213196e-06, "loss": 0.284, "step": 7223 }, { "epoch": 0.572311348781937, "grad_norm": 1.0356838349467306, "learning_rate": 8.154122419332772e-06, "loss": 0.1774, "step": 7224 }, { "epoch": 0.5723905723905723, "grad_norm": 1.4099866280482791, "learning_rate": 8.151600542994506e-06, "loss": 0.2923, "step": 7225 }, { "epoch": 0.5724697959992078, "grad_norm": 1.2734690809589717, "learning_rate": 8.149078788364451e-06, "loss": 0.2387, "step": 7226 }, { "epoch": 0.5725490196078431, "grad_norm": 1.8625116504696313, "learning_rate": 8.14655715560865e-06, "loss": 0.2665, "step": 7227 }, { "epoch": 0.5726282432164785, "grad_norm": 1.3887418279282464, "learning_rate": 8.144035644893143e-06, "loss": 0.3241, "step": 7228 }, { "epoch": 0.5727074668251139, "grad_norm": 1.120719755707284, "learning_rate": 8.141514256383957e-06, "loss": 0.234, "step": 7229 }, { "epoch": 0.5727866904337493, "grad_norm": 1.2241228828981099, "learning_rate": 8.138992990247119e-06, "loss": 0.248, "step": 7230 }, { "epoch": 0.5728659140423846, "grad_norm": 1.5606421941372555, "learning_rate": 8.136471846648633e-06, "loss": 0.3318, "step": 7231 }, { "epoch": 0.57294513765102, "grad_norm": 1.3763982346812798, "learning_rate": 8.133950825754511e-06, "loss": 0.3385, "step": 7232 }, { "epoch": 0.5730243612596554, "grad_norm": 1.1966355672843383, "learning_rate": 8.13142992773075e-06, "loss": 0.263, "step": 7233 }, { "epoch": 0.5731035848682907, "grad_norm": 1.4915149852159055, "learning_rate": 8.128909152743334e-06, "loss": 0.336, "step": 7234 }, { "epoch": 0.5731828084769262, "grad_norm": 1.4870137648498836, "learning_rate": 8.12638850095825e-06, "loss": 0.2419, "step": 7235 }, { "epoch": 0.5732620320855615, "grad_norm": 1.4582273836264925, "learning_rate": 8.123867972541466e-06, "loss": 0.2685, "step": 7236 }, { "epoch": 0.5733412556941969, "grad_norm": 1.8251157920250018, "learning_rate": 8.12134756765895e-06, "loss": 0.3216, "step": 7237 }, { "epoch": 0.5734204793028322, "grad_norm": 1.320568846679935, "learning_rate": 8.118827286476658e-06, "loss": 0.3292, "step": 7238 }, { "epoch": 0.5734997029114676, "grad_norm": 1.574098794814502, "learning_rate": 8.116307129160535e-06, "loss": 0.3553, "step": 7239 }, { "epoch": 0.573578926520103, "grad_norm": 1.4276291328495054, "learning_rate": 8.113787095876525e-06, "loss": 0.272, "step": 7240 }, { "epoch": 0.5736581501287383, "grad_norm": 1.2988499104065252, "learning_rate": 8.11126718679056e-06, "loss": 0.2592, "step": 7241 }, { "epoch": 0.5737373737373738, "grad_norm": 1.374199117147126, "learning_rate": 8.10874740206856e-06, "loss": 0.3144, "step": 7242 }, { "epoch": 0.5738165973460091, "grad_norm": 1.5806091196897165, "learning_rate": 8.106227741876447e-06, "loss": 0.3672, "step": 7243 }, { "epoch": 0.5738958209546445, "grad_norm": 1.480670373717225, "learning_rate": 8.103708206380123e-06, "loss": 0.3067, "step": 7244 }, { "epoch": 0.5739750445632799, "grad_norm": 1.124948587926902, "learning_rate": 8.101188795745489e-06, "loss": 0.2215, "step": 7245 }, { "epoch": 0.5740542681719152, "grad_norm": 1.3452714486215582, "learning_rate": 8.098669510138438e-06, "loss": 0.2509, "step": 7246 }, { "epoch": 0.5741334917805506, "grad_norm": 1.3241386500206564, "learning_rate": 8.09615034972485e-06, "loss": 0.3039, "step": 7247 }, { "epoch": 0.574212715389186, "grad_norm": 1.6940291424289489, "learning_rate": 8.093631314670598e-06, "loss": 0.3379, "step": 7248 }, { "epoch": 0.5742919389978214, "grad_norm": 1.3042791043100272, "learning_rate": 8.091112405141555e-06, "loss": 0.2406, "step": 7249 }, { "epoch": 0.5743711626064567, "grad_norm": 1.4138797091392086, "learning_rate": 8.088593621303573e-06, "loss": 0.2819, "step": 7250 }, { "epoch": 0.5744503862150921, "grad_norm": 1.1609871664453206, "learning_rate": 8.086074963322505e-06, "loss": 0.2709, "step": 7251 }, { "epoch": 0.5745296098237275, "grad_norm": 1.417221535180172, "learning_rate": 8.083556431364191e-06, "loss": 0.3196, "step": 7252 }, { "epoch": 0.5746088334323628, "grad_norm": 1.2280668767312273, "learning_rate": 8.081038025594464e-06, "loss": 0.2553, "step": 7253 }, { "epoch": 0.5746880570409982, "grad_norm": 1.4324194840009161, "learning_rate": 8.078519746179153e-06, "loss": 0.2554, "step": 7254 }, { "epoch": 0.5747672806496336, "grad_norm": 1.206722546459733, "learning_rate": 8.076001593284066e-06, "loss": 0.2186, "step": 7255 }, { "epoch": 0.574846504258269, "grad_norm": 1.2200824157767334, "learning_rate": 8.073483567075018e-06, "loss": 0.2474, "step": 7256 }, { "epoch": 0.5749257278669043, "grad_norm": 1.1086685057097463, "learning_rate": 8.070965667717809e-06, "loss": 0.1861, "step": 7257 }, { "epoch": 0.5750049514755398, "grad_norm": 1.4588593881686898, "learning_rate": 8.06844789537823e-06, "loss": 0.3247, "step": 7258 }, { "epoch": 0.5750841750841751, "grad_norm": 1.6119223480152143, "learning_rate": 8.065930250222061e-06, "loss": 0.2647, "step": 7259 }, { "epoch": 0.5751633986928104, "grad_norm": 1.3166410465710123, "learning_rate": 8.063412732415077e-06, "loss": 0.2338, "step": 7260 }, { "epoch": 0.5752426223014458, "grad_norm": 1.6071503680308619, "learning_rate": 8.060895342123049e-06, "loss": 0.2933, "step": 7261 }, { "epoch": 0.5753218459100812, "grad_norm": 1.5054721075611828, "learning_rate": 8.058378079511732e-06, "loss": 0.3032, "step": 7262 }, { "epoch": 0.5754010695187166, "grad_norm": 1.2524042612027355, "learning_rate": 8.055860944746876e-06, "loss": 0.252, "step": 7263 }, { "epoch": 0.5754802931273519, "grad_norm": 1.4458466798033494, "learning_rate": 8.05334393799422e-06, "loss": 0.3023, "step": 7264 }, { "epoch": 0.5755595167359874, "grad_norm": 1.165159191844685, "learning_rate": 8.050827059419502e-06, "loss": 0.2117, "step": 7265 }, { "epoch": 0.5756387403446227, "grad_norm": 1.9643178697548813, "learning_rate": 8.04831030918844e-06, "loss": 0.4498, "step": 7266 }, { "epoch": 0.575717963953258, "grad_norm": 1.3179042212133283, "learning_rate": 8.045793687466757e-06, "loss": 0.2585, "step": 7267 }, { "epoch": 0.5757971875618935, "grad_norm": 1.216473730912966, "learning_rate": 8.043277194420155e-06, "loss": 0.2093, "step": 7268 }, { "epoch": 0.5758764111705288, "grad_norm": 1.497106052931288, "learning_rate": 8.040760830214334e-06, "loss": 0.272, "step": 7269 }, { "epoch": 0.5759556347791642, "grad_norm": 1.1778245759364987, "learning_rate": 8.038244595014986e-06, "loss": 0.2396, "step": 7270 }, { "epoch": 0.5760348583877996, "grad_norm": 1.183938231519052, "learning_rate": 8.03572848898779e-06, "loss": 0.2436, "step": 7271 }, { "epoch": 0.576114081996435, "grad_norm": 1.2965263685314983, "learning_rate": 8.033212512298422e-06, "loss": 0.2648, "step": 7272 }, { "epoch": 0.5761933056050703, "grad_norm": 1.5701755646348086, "learning_rate": 8.03069666511255e-06, "loss": 0.3472, "step": 7273 }, { "epoch": 0.5762725292137056, "grad_norm": 1.3982646831566523, "learning_rate": 8.028180947595823e-06, "loss": 0.2603, "step": 7274 }, { "epoch": 0.5763517528223411, "grad_norm": 1.2916567721489904, "learning_rate": 8.025665359913897e-06, "loss": 0.2129, "step": 7275 }, { "epoch": 0.5764309764309764, "grad_norm": 1.4876092167672121, "learning_rate": 8.023149902232404e-06, "loss": 0.3081, "step": 7276 }, { "epoch": 0.5765102000396118, "grad_norm": 1.2315801610526786, "learning_rate": 8.020634574716976e-06, "loss": 0.1866, "step": 7277 }, { "epoch": 0.5765894236482472, "grad_norm": 1.3507424955428773, "learning_rate": 8.018119377533243e-06, "loss": 0.2636, "step": 7278 }, { "epoch": 0.5766686472568826, "grad_norm": 1.2268210331017644, "learning_rate": 8.015604310846807e-06, "loss": 0.2522, "step": 7279 }, { "epoch": 0.5767478708655179, "grad_norm": 1.3586737812127547, "learning_rate": 8.013089374823281e-06, "loss": 0.2634, "step": 7280 }, { "epoch": 0.5768270944741533, "grad_norm": 1.4984117980761424, "learning_rate": 8.010574569628263e-06, "loss": 0.2553, "step": 7281 }, { "epoch": 0.5769063180827887, "grad_norm": 1.131431007506256, "learning_rate": 8.008059895427334e-06, "loss": 0.2147, "step": 7282 }, { "epoch": 0.576985541691424, "grad_norm": 1.3008946211435684, "learning_rate": 8.005545352386077e-06, "loss": 0.2217, "step": 7283 }, { "epoch": 0.5770647653000595, "grad_norm": 1.2098053682786098, "learning_rate": 8.003030940670061e-06, "loss": 0.1826, "step": 7284 }, { "epoch": 0.5771439889086948, "grad_norm": 1.6230686913290742, "learning_rate": 8.000516660444848e-06, "loss": 0.3414, "step": 7285 }, { "epoch": 0.5772232125173302, "grad_norm": 1.4830237338734515, "learning_rate": 7.99800251187599e-06, "loss": 0.2118, "step": 7286 }, { "epoch": 0.5773024361259655, "grad_norm": 1.5285111750596734, "learning_rate": 7.995488495129039e-06, "loss": 0.317, "step": 7287 }, { "epoch": 0.5773816597346009, "grad_norm": 1.6543401085003322, "learning_rate": 7.992974610369521e-06, "loss": 0.3133, "step": 7288 }, { "epoch": 0.5774608833432363, "grad_norm": 1.334644616991732, "learning_rate": 7.990460857762969e-06, "loss": 0.2308, "step": 7289 }, { "epoch": 0.5775401069518716, "grad_norm": 1.1066664730660816, "learning_rate": 7.987947237474903e-06, "loss": 0.2713, "step": 7290 }, { "epoch": 0.5776193305605071, "grad_norm": 1.4540309533179654, "learning_rate": 7.985433749670825e-06, "loss": 0.2909, "step": 7291 }, { "epoch": 0.5776985541691424, "grad_norm": 1.3043693364598066, "learning_rate": 7.982920394516247e-06, "loss": 0.2544, "step": 7292 }, { "epoch": 0.5777777777777777, "grad_norm": 1.501627106588649, "learning_rate": 7.98040717217665e-06, "loss": 0.3318, "step": 7293 }, { "epoch": 0.5778570013864132, "grad_norm": 1.3796591933032745, "learning_rate": 7.977894082817524e-06, "loss": 0.3004, "step": 7294 }, { "epoch": 0.5779362249950485, "grad_norm": 1.2097643865654226, "learning_rate": 7.975381126604346e-06, "loss": 0.2233, "step": 7295 }, { "epoch": 0.5780154486036839, "grad_norm": 1.224305721588464, "learning_rate": 7.972868303702576e-06, "loss": 0.2541, "step": 7296 }, { "epoch": 0.5780946722123192, "grad_norm": 1.365044953551066, "learning_rate": 7.970355614277674e-06, "loss": 0.2544, "step": 7297 }, { "epoch": 0.5781738958209547, "grad_norm": 1.7013580617683293, "learning_rate": 7.967843058495092e-06, "loss": 0.3387, "step": 7298 }, { "epoch": 0.57825311942959, "grad_norm": 1.3916278534074553, "learning_rate": 7.965330636520262e-06, "loss": 0.2722, "step": 7299 }, { "epoch": 0.5783323430382253, "grad_norm": 1.3446890157497295, "learning_rate": 7.962818348518623e-06, "loss": 0.3173, "step": 7300 }, { "epoch": 0.5784115666468608, "grad_norm": 1.3413083270373292, "learning_rate": 7.960306194655593e-06, "loss": 0.223, "step": 7301 }, { "epoch": 0.5784907902554961, "grad_norm": 1.3595201204389589, "learning_rate": 7.957794175096585e-06, "loss": 0.2963, "step": 7302 }, { "epoch": 0.5785700138641315, "grad_norm": 2.071475850875579, "learning_rate": 7.955282290007006e-06, "loss": 0.3452, "step": 7303 }, { "epoch": 0.5786492374727669, "grad_norm": 1.2413333177478025, "learning_rate": 7.952770539552246e-06, "loss": 0.2479, "step": 7304 }, { "epoch": 0.5787284610814023, "grad_norm": 1.2790926854684344, "learning_rate": 7.950258923897695e-06, "loss": 0.2207, "step": 7305 }, { "epoch": 0.5788076846900376, "grad_norm": 1.391275476848362, "learning_rate": 7.947747443208735e-06, "loss": 0.2766, "step": 7306 }, { "epoch": 0.578886908298673, "grad_norm": 1.1987126672239472, "learning_rate": 7.945236097650729e-06, "loss": 0.204, "step": 7307 }, { "epoch": 0.5789661319073084, "grad_norm": 1.4113514379631897, "learning_rate": 7.942724887389041e-06, "loss": 0.2143, "step": 7308 }, { "epoch": 0.5790453555159437, "grad_norm": 1.439208753432546, "learning_rate": 7.940213812589018e-06, "loss": 0.2548, "step": 7309 }, { "epoch": 0.5791245791245792, "grad_norm": 1.4574424181193621, "learning_rate": 7.937702873416005e-06, "loss": 0.2846, "step": 7310 }, { "epoch": 0.5792038027332145, "grad_norm": 1.2870828021658083, "learning_rate": 7.935192070035335e-06, "loss": 0.2058, "step": 7311 }, { "epoch": 0.5792830263418499, "grad_norm": 2.0664833564101626, "learning_rate": 7.932681402612332e-06, "loss": 0.5155, "step": 7312 }, { "epoch": 0.5793622499504852, "grad_norm": 1.2530629242543592, "learning_rate": 7.93017087131231e-06, "loss": 0.1958, "step": 7313 }, { "epoch": 0.5794414735591206, "grad_norm": 1.2947756905494958, "learning_rate": 7.927660476300578e-06, "loss": 0.2186, "step": 7314 }, { "epoch": 0.579520697167756, "grad_norm": 1.2896377557764966, "learning_rate": 7.925150217742431e-06, "loss": 0.2673, "step": 7315 }, { "epoch": 0.5795999207763913, "grad_norm": 1.4059878447564047, "learning_rate": 7.92264009580316e-06, "loss": 0.2946, "step": 7316 }, { "epoch": 0.5796791443850268, "grad_norm": 1.495302782265638, "learning_rate": 7.920130110648044e-06, "loss": 0.2782, "step": 7317 }, { "epoch": 0.5797583679936621, "grad_norm": 1.126325338876518, "learning_rate": 7.917620262442349e-06, "loss": 0.1944, "step": 7318 }, { "epoch": 0.5798375916022975, "grad_norm": 1.1030712262967364, "learning_rate": 7.915110551351344e-06, "loss": 0.1917, "step": 7319 }, { "epoch": 0.5799168152109329, "grad_norm": 1.4976207307270517, "learning_rate": 7.912600977540275e-06, "loss": 0.3116, "step": 7320 }, { "epoch": 0.5799960388195682, "grad_norm": 1.655132309868297, "learning_rate": 7.910091541174388e-06, "loss": 0.3465, "step": 7321 }, { "epoch": 0.5800752624282036, "grad_norm": 1.689741170290245, "learning_rate": 7.907582242418916e-06, "loss": 0.3135, "step": 7322 }, { "epoch": 0.580154486036839, "grad_norm": 1.5758540796190785, "learning_rate": 7.905073081439087e-06, "loss": 0.2983, "step": 7323 }, { "epoch": 0.5802337096454744, "grad_norm": 1.2937678167635196, "learning_rate": 7.902564058400116e-06, "loss": 0.2352, "step": 7324 }, { "epoch": 0.5803129332541097, "grad_norm": 1.3636209054955986, "learning_rate": 7.900055173467207e-06, "loss": 0.2677, "step": 7325 }, { "epoch": 0.5803921568627451, "grad_norm": 1.424779845363799, "learning_rate": 7.897546426805561e-06, "loss": 0.3104, "step": 7326 }, { "epoch": 0.5804713804713805, "grad_norm": 1.3146003884776032, "learning_rate": 7.89503781858037e-06, "loss": 0.2458, "step": 7327 }, { "epoch": 0.5805506040800158, "grad_norm": 1.4547270498172822, "learning_rate": 7.892529348956805e-06, "loss": 0.3257, "step": 7328 }, { "epoch": 0.5806298276886512, "grad_norm": 1.0780251596447976, "learning_rate": 7.890021018100045e-06, "loss": 0.1597, "step": 7329 }, { "epoch": 0.5807090512972866, "grad_norm": 1.325176772734977, "learning_rate": 7.887512826175247e-06, "loss": 0.2586, "step": 7330 }, { "epoch": 0.580788274905922, "grad_norm": 1.3332015474402934, "learning_rate": 7.885004773347565e-06, "loss": 0.228, "step": 7331 }, { "epoch": 0.5808674985145573, "grad_norm": 1.6020725920790337, "learning_rate": 7.882496859782145e-06, "loss": 0.2523, "step": 7332 }, { "epoch": 0.5809467221231928, "grad_norm": 1.161064535292935, "learning_rate": 7.879989085644114e-06, "loss": 0.1763, "step": 7333 }, { "epoch": 0.5810259457318281, "grad_norm": 1.4079571047952557, "learning_rate": 7.877481451098602e-06, "loss": 0.292, "step": 7334 }, { "epoch": 0.5811051693404634, "grad_norm": 1.129454161260187, "learning_rate": 7.874973956310726e-06, "loss": 0.2076, "step": 7335 }, { "epoch": 0.5811843929490988, "grad_norm": 1.5015655702099426, "learning_rate": 7.872466601445587e-06, "loss": 0.3853, "step": 7336 }, { "epoch": 0.5812636165577342, "grad_norm": 1.2535513976915917, "learning_rate": 7.869959386668286e-06, "loss": 0.2212, "step": 7337 }, { "epoch": 0.5813428401663696, "grad_norm": 1.4237868641733555, "learning_rate": 7.86745231214391e-06, "loss": 0.2518, "step": 7338 }, { "epoch": 0.5814220637750049, "grad_norm": 1.541267534677451, "learning_rate": 7.864945378037538e-06, "loss": 0.3293, "step": 7339 }, { "epoch": 0.5815012873836404, "grad_norm": 1.7285119828871987, "learning_rate": 7.862438584514242e-06, "loss": 0.2707, "step": 7340 }, { "epoch": 0.5815805109922757, "grad_norm": 1.25944636404642, "learning_rate": 7.859931931739077e-06, "loss": 0.2585, "step": 7341 }, { "epoch": 0.581659734600911, "grad_norm": 1.2411482866610528, "learning_rate": 7.857425419877097e-06, "loss": 0.2326, "step": 7342 }, { "epoch": 0.5817389582095465, "grad_norm": 1.203996177566958, "learning_rate": 7.854919049093345e-06, "loss": 0.1799, "step": 7343 }, { "epoch": 0.5818181818181818, "grad_norm": 1.2776306644459905, "learning_rate": 7.852412819552853e-06, "loss": 0.1868, "step": 7344 }, { "epoch": 0.5818974054268172, "grad_norm": 1.4108138126115257, "learning_rate": 7.849906731420642e-06, "loss": 0.2265, "step": 7345 }, { "epoch": 0.5819766290354526, "grad_norm": 1.013774624060726, "learning_rate": 7.847400784861727e-06, "loss": 0.1401, "step": 7346 }, { "epoch": 0.582055852644088, "grad_norm": 0.9857985470839145, "learning_rate": 7.844894980041112e-06, "loss": 0.1526, "step": 7347 }, { "epoch": 0.5821350762527233, "grad_norm": 1.1736385670994127, "learning_rate": 7.842389317123795e-06, "loss": 0.2057, "step": 7348 }, { "epoch": 0.5822142998613586, "grad_norm": 1.2513554917974397, "learning_rate": 7.839883796274758e-06, "loss": 0.218, "step": 7349 }, { "epoch": 0.5822935234699941, "grad_norm": 1.3033839660867366, "learning_rate": 7.83737841765898e-06, "loss": 0.3215, "step": 7350 }, { "epoch": 0.5823727470786294, "grad_norm": 1.5224865053561822, "learning_rate": 7.834873181441426e-06, "loss": 0.2809, "step": 7351 }, { "epoch": 0.5824519706872648, "grad_norm": 1.373630114415987, "learning_rate": 7.832368087787056e-06, "loss": 0.2496, "step": 7352 }, { "epoch": 0.5825311942959002, "grad_norm": 1.236517881219902, "learning_rate": 7.82986313686082e-06, "loss": 0.2146, "step": 7353 }, { "epoch": 0.5826104179045356, "grad_norm": 1.3183533586950056, "learning_rate": 7.82735832882765e-06, "loss": 0.2503, "step": 7354 }, { "epoch": 0.5826896415131709, "grad_norm": 1.3460888533703654, "learning_rate": 7.824853663852482e-06, "loss": 0.2735, "step": 7355 }, { "epoch": 0.5827688651218063, "grad_norm": 1.1530308237518005, "learning_rate": 7.822349142100236e-06, "loss": 0.2242, "step": 7356 }, { "epoch": 0.5828480887304417, "grad_norm": 1.1942893244455353, "learning_rate": 7.819844763735818e-06, "loss": 0.2023, "step": 7357 }, { "epoch": 0.582927312339077, "grad_norm": 1.3560379317248987, "learning_rate": 7.817340528924132e-06, "loss": 0.2989, "step": 7358 }, { "epoch": 0.5830065359477125, "grad_norm": 1.1286278903322118, "learning_rate": 7.814836437830074e-06, "loss": 0.2243, "step": 7359 }, { "epoch": 0.5830857595563478, "grad_norm": 1.2320040972737354, "learning_rate": 7.812332490618521e-06, "loss": 0.2617, "step": 7360 }, { "epoch": 0.5831649831649832, "grad_norm": 1.282660667798146, "learning_rate": 7.809828687454343e-06, "loss": 0.2566, "step": 7361 }, { "epoch": 0.5832442067736185, "grad_norm": 1.259016238667985, "learning_rate": 7.807325028502412e-06, "loss": 0.2135, "step": 7362 }, { "epoch": 0.5833234303822539, "grad_norm": 1.879730743599573, "learning_rate": 7.804821513927574e-06, "loss": 0.3322, "step": 7363 }, { "epoch": 0.5834026539908893, "grad_norm": 1.373046379468216, "learning_rate": 7.802318143894678e-06, "loss": 0.259, "step": 7364 }, { "epoch": 0.5834818775995246, "grad_norm": 1.1224787103862752, "learning_rate": 7.799814918568559e-06, "loss": 0.2102, "step": 7365 }, { "epoch": 0.5835611012081601, "grad_norm": 1.2441747236734526, "learning_rate": 7.797311838114038e-06, "loss": 0.2351, "step": 7366 }, { "epoch": 0.5836403248167954, "grad_norm": 1.2537666663779703, "learning_rate": 7.794808902695935e-06, "loss": 0.1883, "step": 7367 }, { "epoch": 0.5837195484254307, "grad_norm": 1.31655449917612, "learning_rate": 7.792306112479055e-06, "loss": 0.3199, "step": 7368 }, { "epoch": 0.5837987720340662, "grad_norm": 1.365052154501949, "learning_rate": 7.789803467628196e-06, "loss": 0.2828, "step": 7369 }, { "epoch": 0.5838779956427015, "grad_norm": 1.1945704099352312, "learning_rate": 7.787300968308144e-06, "loss": 0.2337, "step": 7370 }, { "epoch": 0.5839572192513369, "grad_norm": 1.4190714073996595, "learning_rate": 7.784798614683675e-06, "loss": 0.2485, "step": 7371 }, { "epoch": 0.5840364428599722, "grad_norm": 1.3220178221670316, "learning_rate": 7.782296406919557e-06, "loss": 0.221, "step": 7372 }, { "epoch": 0.5841156664686077, "grad_norm": 1.045638236908882, "learning_rate": 7.779794345180552e-06, "loss": 0.2106, "step": 7373 }, { "epoch": 0.584194890077243, "grad_norm": 1.3382782502913058, "learning_rate": 7.777292429631405e-06, "loss": 0.1874, "step": 7374 }, { "epoch": 0.5842741136858783, "grad_norm": 1.5283453665670712, "learning_rate": 7.774790660436857e-06, "loss": 0.3345, "step": 7375 }, { "epoch": 0.5843533372945138, "grad_norm": 1.3236706558559976, "learning_rate": 7.772289037761639e-06, "loss": 0.2906, "step": 7376 }, { "epoch": 0.5844325609031491, "grad_norm": 1.4051159634469799, "learning_rate": 7.769787561770466e-06, "loss": 0.2629, "step": 7377 }, { "epoch": 0.5845117845117845, "grad_norm": 1.7113763808153784, "learning_rate": 7.767286232628054e-06, "loss": 0.4199, "step": 7378 }, { "epoch": 0.5845910081204199, "grad_norm": 1.4889223924081287, "learning_rate": 7.764785050499098e-06, "loss": 0.2545, "step": 7379 }, { "epoch": 0.5846702317290553, "grad_norm": 1.2217819978861575, "learning_rate": 7.76228401554829e-06, "loss": 0.2117, "step": 7380 }, { "epoch": 0.5847494553376906, "grad_norm": 1.157869752827776, "learning_rate": 7.759783127940315e-06, "loss": 0.2249, "step": 7381 }, { "epoch": 0.584828678946326, "grad_norm": 1.369051172674697, "learning_rate": 7.757282387839842e-06, "loss": 0.3094, "step": 7382 }, { "epoch": 0.5849079025549614, "grad_norm": 1.3898218579430637, "learning_rate": 7.75478179541153e-06, "loss": 0.2696, "step": 7383 }, { "epoch": 0.5849871261635967, "grad_norm": 1.6501107646663669, "learning_rate": 7.752281350820037e-06, "loss": 0.3156, "step": 7384 }, { "epoch": 0.5850663497722322, "grad_norm": 1.647438200342468, "learning_rate": 7.749781054229998e-06, "loss": 0.3584, "step": 7385 }, { "epoch": 0.5851455733808675, "grad_norm": 1.4161007283074485, "learning_rate": 7.747280905806051e-06, "loss": 0.3003, "step": 7386 }, { "epoch": 0.5852247969895029, "grad_norm": 1.2375726643132374, "learning_rate": 7.744780905712818e-06, "loss": 0.2281, "step": 7387 }, { "epoch": 0.5853040205981382, "grad_norm": 1.3188851682562714, "learning_rate": 7.742281054114909e-06, "loss": 0.2709, "step": 7388 }, { "epoch": 0.5853832442067736, "grad_norm": 1.447484092421129, "learning_rate": 7.73978135117693e-06, "loss": 0.3015, "step": 7389 }, { "epoch": 0.585462467815409, "grad_norm": 1.2320822986661275, "learning_rate": 7.737281797063473e-06, "loss": 0.2331, "step": 7390 }, { "epoch": 0.5855416914240443, "grad_norm": 1.5933305415863923, "learning_rate": 7.734782391939123e-06, "loss": 0.263, "step": 7391 }, { "epoch": 0.5856209150326798, "grad_norm": 0.9928643675783463, "learning_rate": 7.732283135968452e-06, "loss": 0.1524, "step": 7392 }, { "epoch": 0.5857001386413151, "grad_norm": 1.309058063745589, "learning_rate": 7.729784029316025e-06, "loss": 0.2484, "step": 7393 }, { "epoch": 0.5857793622499505, "grad_norm": 1.316573126287946, "learning_rate": 7.7272850721464e-06, "loss": 0.3259, "step": 7394 }, { "epoch": 0.5858585858585859, "grad_norm": 1.8782328421029293, "learning_rate": 7.724786264624112e-06, "loss": 0.4171, "step": 7395 }, { "epoch": 0.5859378094672212, "grad_norm": 1.3319756342825644, "learning_rate": 7.722287606913703e-06, "loss": 0.2834, "step": 7396 }, { "epoch": 0.5860170330758566, "grad_norm": 1.1335744816448825, "learning_rate": 7.719789099179696e-06, "loss": 0.2151, "step": 7397 }, { "epoch": 0.586096256684492, "grad_norm": 1.2438505256062882, "learning_rate": 7.717290741586602e-06, "loss": 0.2575, "step": 7398 }, { "epoch": 0.5861754802931274, "grad_norm": 1.3637738624434186, "learning_rate": 7.714792534298934e-06, "loss": 0.294, "step": 7399 }, { "epoch": 0.5862547039017627, "grad_norm": 1.7378979538084858, "learning_rate": 7.712294477481177e-06, "loss": 0.4004, "step": 7400 }, { "epoch": 0.5863339275103981, "grad_norm": 1.4891553505268718, "learning_rate": 7.709796571297823e-06, "loss": 0.228, "step": 7401 }, { "epoch": 0.5864131511190335, "grad_norm": 1.4406014757288916, "learning_rate": 7.707298815913346e-06, "loss": 0.3156, "step": 7402 }, { "epoch": 0.5864923747276688, "grad_norm": 1.5959946042731206, "learning_rate": 7.70480121149221e-06, "loss": 0.3355, "step": 7403 }, { "epoch": 0.5865715983363042, "grad_norm": 1.0837926506559399, "learning_rate": 7.702303758198868e-06, "loss": 0.2148, "step": 7404 }, { "epoch": 0.5866508219449396, "grad_norm": 1.0914135582763453, "learning_rate": 7.699806456197771e-06, "loss": 0.1996, "step": 7405 }, { "epoch": 0.586730045553575, "grad_norm": 1.84398259220701, "learning_rate": 7.697309305653348e-06, "loss": 0.3525, "step": 7406 }, { "epoch": 0.5868092691622103, "grad_norm": 1.4811872452462012, "learning_rate": 7.694812306730031e-06, "loss": 0.2625, "step": 7407 }, { "epoch": 0.5868884927708458, "grad_norm": 1.4527479795667464, "learning_rate": 7.69231545959223e-06, "loss": 0.3628, "step": 7408 }, { "epoch": 0.5869677163794811, "grad_norm": 1.4336202808042589, "learning_rate": 7.689818764404351e-06, "loss": 0.3084, "step": 7409 }, { "epoch": 0.5870469399881164, "grad_norm": 1.078452761860379, "learning_rate": 7.687322221330794e-06, "loss": 0.1709, "step": 7410 }, { "epoch": 0.5871261635967518, "grad_norm": 1.306351426232991, "learning_rate": 7.684825830535935e-06, "loss": 0.2292, "step": 7411 }, { "epoch": 0.5872053872053872, "grad_norm": 1.3203699356716867, "learning_rate": 7.682329592184158e-06, "loss": 0.267, "step": 7412 }, { "epoch": 0.5872846108140226, "grad_norm": 1.4417812450917131, "learning_rate": 7.679833506439826e-06, "loss": 0.273, "step": 7413 }, { "epoch": 0.5873638344226579, "grad_norm": 1.3155191690084824, "learning_rate": 7.677337573467294e-06, "loss": 0.2662, "step": 7414 }, { "epoch": 0.5874430580312934, "grad_norm": 1.446292602686977, "learning_rate": 7.674841793430907e-06, "loss": 0.3361, "step": 7415 }, { "epoch": 0.5875222816399287, "grad_norm": 1.6835814421649804, "learning_rate": 7.672346166494999e-06, "loss": 0.3915, "step": 7416 }, { "epoch": 0.587601505248564, "grad_norm": 1.252841167850413, "learning_rate": 7.669850692823895e-06, "loss": 0.2524, "step": 7417 }, { "epoch": 0.5876807288571995, "grad_norm": 1.7175642424581115, "learning_rate": 7.667355372581913e-06, "loss": 0.2831, "step": 7418 }, { "epoch": 0.5877599524658348, "grad_norm": 1.4702081195470056, "learning_rate": 7.664860205933356e-06, "loss": 0.3117, "step": 7419 }, { "epoch": 0.5878391760744702, "grad_norm": 1.1536574519932967, "learning_rate": 7.662365193042516e-06, "loss": 0.2058, "step": 7420 }, { "epoch": 0.5879183996831056, "grad_norm": 1.3982714578969981, "learning_rate": 7.659870334073683e-06, "loss": 0.2512, "step": 7421 }, { "epoch": 0.587997623291741, "grad_norm": 1.1853779223638359, "learning_rate": 7.657375629191126e-06, "loss": 0.2034, "step": 7422 }, { "epoch": 0.5880768469003763, "grad_norm": 1.4861810625649794, "learning_rate": 7.654881078559112e-06, "loss": 0.2838, "step": 7423 }, { "epoch": 0.5881560705090116, "grad_norm": 1.385320203350565, "learning_rate": 7.652386682341895e-06, "loss": 0.2644, "step": 7424 }, { "epoch": 0.5882352941176471, "grad_norm": 1.5555083791737685, "learning_rate": 7.64989244070372e-06, "loss": 0.2881, "step": 7425 }, { "epoch": 0.5883145177262824, "grad_norm": 1.3076146519454994, "learning_rate": 7.647398353808822e-06, "loss": 0.2205, "step": 7426 }, { "epoch": 0.5883937413349178, "grad_norm": 1.316769571768223, "learning_rate": 7.644904421821418e-06, "loss": 0.2126, "step": 7427 }, { "epoch": 0.5884729649435532, "grad_norm": 1.3689127671483172, "learning_rate": 7.642410644905726e-06, "loss": 0.2671, "step": 7428 }, { "epoch": 0.5885521885521886, "grad_norm": 1.0943543827417543, "learning_rate": 7.639917023225953e-06, "loss": 0.1929, "step": 7429 }, { "epoch": 0.5886314121608239, "grad_norm": 1.6173908518413211, "learning_rate": 7.637423556946284e-06, "loss": 0.2325, "step": 7430 }, { "epoch": 0.5887106357694593, "grad_norm": 1.3764563617056509, "learning_rate": 7.63493024623091e-06, "loss": 0.2506, "step": 7431 }, { "epoch": 0.5887898593780947, "grad_norm": 1.284653619218265, "learning_rate": 7.632437091243996e-06, "loss": 0.254, "step": 7432 }, { "epoch": 0.58886908298673, "grad_norm": 1.3515770516345202, "learning_rate": 7.629944092149707e-06, "loss": 0.2726, "step": 7433 }, { "epoch": 0.5889483065953655, "grad_norm": 1.270990809515771, "learning_rate": 7.627451249112199e-06, "loss": 0.1839, "step": 7434 }, { "epoch": 0.5890275302040008, "grad_norm": 1.207246841455816, "learning_rate": 7.624958562295607e-06, "loss": 0.2001, "step": 7435 }, { "epoch": 0.5891067538126362, "grad_norm": 1.3501580352354754, "learning_rate": 7.622466031864066e-06, "loss": 0.1898, "step": 7436 }, { "epoch": 0.5891859774212715, "grad_norm": 1.0902484685565283, "learning_rate": 7.6199736579817005e-06, "loss": 0.2176, "step": 7437 }, { "epoch": 0.5892652010299069, "grad_norm": 1.5473770654886636, "learning_rate": 7.617481440812617e-06, "loss": 0.2818, "step": 7438 }, { "epoch": 0.5893444246385423, "grad_norm": 1.805836870565018, "learning_rate": 7.614989380520914e-06, "loss": 0.3491, "step": 7439 }, { "epoch": 0.5894236482471776, "grad_norm": 1.3799479293153125, "learning_rate": 7.612497477270686e-06, "loss": 0.2464, "step": 7440 }, { "epoch": 0.5895028718558131, "grad_norm": 1.3944719720163752, "learning_rate": 7.610005731226009e-06, "loss": 0.2698, "step": 7441 }, { "epoch": 0.5895820954644484, "grad_norm": 1.5419217738308628, "learning_rate": 7.607514142550955e-06, "loss": 0.3906, "step": 7442 }, { "epoch": 0.5896613190730838, "grad_norm": 1.337667194925046, "learning_rate": 7.605022711409585e-06, "loss": 0.2321, "step": 7443 }, { "epoch": 0.5897405426817192, "grad_norm": 1.2600107445664657, "learning_rate": 7.602531437965943e-06, "loss": 0.2493, "step": 7444 }, { "epoch": 0.5898197662903545, "grad_norm": 1.3604319618866716, "learning_rate": 7.6000403223840714e-06, "loss": 0.2597, "step": 7445 }, { "epoch": 0.5898989898989899, "grad_norm": 1.2765888641035044, "learning_rate": 7.597549364827997e-06, "loss": 0.242, "step": 7446 }, { "epoch": 0.5899782135076252, "grad_norm": 1.3756647494275633, "learning_rate": 7.595058565461736e-06, "loss": 0.2702, "step": 7447 }, { "epoch": 0.5900574371162607, "grad_norm": 1.5965004058795826, "learning_rate": 7.5925679244492985e-06, "loss": 0.3563, "step": 7448 }, { "epoch": 0.590136660724896, "grad_norm": 1.2983133756519158, "learning_rate": 7.5900774419546775e-06, "loss": 0.3042, "step": 7449 }, { "epoch": 0.5902158843335313, "grad_norm": 1.2219598943563936, "learning_rate": 7.58758711814186e-06, "loss": 0.173, "step": 7450 }, { "epoch": 0.5902951079421668, "grad_norm": 1.486462609571466, "learning_rate": 7.585096953174827e-06, "loss": 0.3226, "step": 7451 }, { "epoch": 0.5903743315508021, "grad_norm": 1.2695946551624102, "learning_rate": 7.582606947217537e-06, "loss": 0.2291, "step": 7452 }, { "epoch": 0.5904535551594375, "grad_norm": 1.4351904370970332, "learning_rate": 7.580117100433947e-06, "loss": 0.3575, "step": 7453 }, { "epoch": 0.5905327787680729, "grad_norm": 1.3127303331222588, "learning_rate": 7.577627412988005e-06, "loss": 0.2212, "step": 7454 }, { "epoch": 0.5906120023767083, "grad_norm": 1.2318312775641598, "learning_rate": 7.57513788504364e-06, "loss": 0.2877, "step": 7455 }, { "epoch": 0.5906912259853436, "grad_norm": 2.6279604482854646, "learning_rate": 7.572648516764778e-06, "loss": 0.2538, "step": 7456 }, { "epoch": 0.590770449593979, "grad_norm": 1.3803850522874588, "learning_rate": 7.570159308315331e-06, "loss": 0.2509, "step": 7457 }, { "epoch": 0.5908496732026144, "grad_norm": 1.2948777977724644, "learning_rate": 7.5676702598592025e-06, "loss": 0.2837, "step": 7458 }, { "epoch": 0.5909288968112497, "grad_norm": 1.576605562566521, "learning_rate": 7.5651813715602855e-06, "loss": 0.2639, "step": 7459 }, { "epoch": 0.5910081204198852, "grad_norm": 1.1023186007731323, "learning_rate": 7.562692643582456e-06, "loss": 0.2042, "step": 7460 }, { "epoch": 0.5910873440285205, "grad_norm": 1.2512309018769499, "learning_rate": 7.56020407608959e-06, "loss": 0.2917, "step": 7461 }, { "epoch": 0.5911665676371559, "grad_norm": 1.4407503336677954, "learning_rate": 7.557715669245547e-06, "loss": 0.3002, "step": 7462 }, { "epoch": 0.5912457912457912, "grad_norm": 1.489865731951598, "learning_rate": 7.555227423214174e-06, "loss": 0.2233, "step": 7463 }, { "epoch": 0.5913250148544266, "grad_norm": 1.2316773407667512, "learning_rate": 7.552739338159314e-06, "loss": 0.167, "step": 7464 }, { "epoch": 0.591404238463062, "grad_norm": 1.4010405204446073, "learning_rate": 7.550251414244791e-06, "loss": 0.236, "step": 7465 }, { "epoch": 0.5914834620716973, "grad_norm": 1.6825773584782147, "learning_rate": 7.5477636516344255e-06, "loss": 0.2709, "step": 7466 }, { "epoch": 0.5915626856803328, "grad_norm": 1.257420176577219, "learning_rate": 7.545276050492025e-06, "loss": 0.2572, "step": 7467 }, { "epoch": 0.5916419092889681, "grad_norm": 1.5538640611998669, "learning_rate": 7.542788610981384e-06, "loss": 0.3586, "step": 7468 }, { "epoch": 0.5917211328976035, "grad_norm": 1.473066563444876, "learning_rate": 7.540301333266289e-06, "loss": 0.3298, "step": 7469 }, { "epoch": 0.5918003565062389, "grad_norm": 1.157732814223413, "learning_rate": 7.537814217510518e-06, "loss": 0.2131, "step": 7470 }, { "epoch": 0.5918795801148742, "grad_norm": 1.2652375994673888, "learning_rate": 7.535327263877832e-06, "loss": 0.2705, "step": 7471 }, { "epoch": 0.5919588037235096, "grad_norm": 1.141362571719065, "learning_rate": 7.532840472531988e-06, "loss": 0.2423, "step": 7472 }, { "epoch": 0.592038027332145, "grad_norm": 1.7007286739688363, "learning_rate": 7.530353843636726e-06, "loss": 0.2879, "step": 7473 }, { "epoch": 0.5921172509407804, "grad_norm": 1.3018318599537513, "learning_rate": 7.52786737735578e-06, "loss": 0.2549, "step": 7474 }, { "epoch": 0.5921964745494157, "grad_norm": 1.1864689912220028, "learning_rate": 7.525381073852874e-06, "loss": 0.1985, "step": 7475 }, { "epoch": 0.5922756981580511, "grad_norm": 1.4190625680201512, "learning_rate": 7.522894933291715e-06, "loss": 0.3076, "step": 7476 }, { "epoch": 0.5923549217666865, "grad_norm": 1.2969648037633639, "learning_rate": 7.5204089558360076e-06, "loss": 0.2797, "step": 7477 }, { "epoch": 0.5924341453753218, "grad_norm": 1.3373739069866868, "learning_rate": 7.517923141649439e-06, "loss": 0.2766, "step": 7478 }, { "epoch": 0.5925133689839572, "grad_norm": 1.3512528231625365, "learning_rate": 7.515437490895688e-06, "loss": 0.36, "step": 7479 }, { "epoch": 0.5925925925925926, "grad_norm": 1.1546955065879365, "learning_rate": 7.5129520037384225e-06, "loss": 0.2742, "step": 7480 }, { "epoch": 0.592671816201228, "grad_norm": 1.6571432928098642, "learning_rate": 7.5104666803413015e-06, "loss": 0.2883, "step": 7481 }, { "epoch": 0.5927510398098633, "grad_norm": 1.0168811704328824, "learning_rate": 7.50798152086797e-06, "loss": 0.181, "step": 7482 }, { "epoch": 0.5928302634184988, "grad_norm": 1.0660797183668953, "learning_rate": 7.505496525482066e-06, "loss": 0.2247, "step": 7483 }, { "epoch": 0.5929094870271341, "grad_norm": 1.3884581243400174, "learning_rate": 7.503011694347212e-06, "loss": 0.2753, "step": 7484 }, { "epoch": 0.5929887106357694, "grad_norm": 1.6672136037078602, "learning_rate": 7.500527027627025e-06, "loss": 0.3018, "step": 7485 }, { "epoch": 0.5930679342444048, "grad_norm": 1.3825309258770417, "learning_rate": 7.4980425254851034e-06, "loss": 0.2729, "step": 7486 }, { "epoch": 0.5931471578530402, "grad_norm": 1.3220385655169353, "learning_rate": 7.495558188085044e-06, "loss": 0.224, "step": 7487 }, { "epoch": 0.5932263814616756, "grad_norm": 1.372592469822823, "learning_rate": 7.493074015590429e-06, "loss": 0.2625, "step": 7488 }, { "epoch": 0.5933056050703109, "grad_norm": 1.2115544832619078, "learning_rate": 7.490590008164824e-06, "loss": 0.2048, "step": 7489 }, { "epoch": 0.5933848286789464, "grad_norm": 1.237305794544412, "learning_rate": 7.488106165971795e-06, "loss": 0.2589, "step": 7490 }, { "epoch": 0.5934640522875817, "grad_norm": 1.3037533439806281, "learning_rate": 7.485622489174888e-06, "loss": 0.1868, "step": 7491 }, { "epoch": 0.593543275896217, "grad_norm": 1.3264577974516714, "learning_rate": 7.483138977937643e-06, "loss": 0.2209, "step": 7492 }, { "epoch": 0.5936224995048525, "grad_norm": 1.266194862230873, "learning_rate": 7.480655632423586e-06, "loss": 0.1685, "step": 7493 }, { "epoch": 0.5937017231134878, "grad_norm": 1.4688133553403262, "learning_rate": 7.478172452796231e-06, "loss": 0.281, "step": 7494 }, { "epoch": 0.5937809467221232, "grad_norm": 1.2678216071115498, "learning_rate": 7.475689439219085e-06, "loss": 0.2251, "step": 7495 }, { "epoch": 0.5938601703307586, "grad_norm": 1.2659312085566632, "learning_rate": 7.473206591855646e-06, "loss": 0.248, "step": 7496 }, { "epoch": 0.593939393939394, "grad_norm": 1.777273256780976, "learning_rate": 7.470723910869393e-06, "loss": 0.3508, "step": 7497 }, { "epoch": 0.5940186175480293, "grad_norm": 1.3234516144639468, "learning_rate": 7.468241396423801e-06, "loss": 0.2617, "step": 7498 }, { "epoch": 0.5940978411566646, "grad_norm": 1.2946505327690996, "learning_rate": 7.465759048682333e-06, "loss": 0.1795, "step": 7499 }, { "epoch": 0.5941770647653001, "grad_norm": 1.332227007812678, "learning_rate": 7.463276867808435e-06, "loss": 0.2254, "step": 7500 }, { "epoch": 0.5942562883739354, "grad_norm": 1.3911425480582986, "learning_rate": 7.46079485396555e-06, "loss": 0.2984, "step": 7501 }, { "epoch": 0.5943355119825708, "grad_norm": 1.461404613720531, "learning_rate": 7.458313007317106e-06, "loss": 0.3018, "step": 7502 }, { "epoch": 0.5944147355912062, "grad_norm": 1.3151399902166259, "learning_rate": 7.45583132802652e-06, "loss": 0.2576, "step": 7503 }, { "epoch": 0.5944939591998416, "grad_norm": 1.4257537833821978, "learning_rate": 7.4533498162572004e-06, "loss": 0.3103, "step": 7504 }, { "epoch": 0.5945731828084769, "grad_norm": 1.3807882136832523, "learning_rate": 7.450868472172541e-06, "loss": 0.2852, "step": 7505 }, { "epoch": 0.5946524064171123, "grad_norm": 1.5790774466013637, "learning_rate": 7.448387295935926e-06, "loss": 0.2449, "step": 7506 }, { "epoch": 0.5947316300257477, "grad_norm": 1.2052271497834661, "learning_rate": 7.445906287710733e-06, "loss": 0.2588, "step": 7507 }, { "epoch": 0.594810853634383, "grad_norm": 1.206588330644287, "learning_rate": 7.443425447660319e-06, "loss": 0.2394, "step": 7508 }, { "epoch": 0.5948900772430185, "grad_norm": 1.4398635310460473, "learning_rate": 7.4409447759480404e-06, "loss": 0.2689, "step": 7509 }, { "epoch": 0.5949693008516538, "grad_norm": 1.273649803856423, "learning_rate": 7.438464272737232e-06, "loss": 0.2313, "step": 7510 }, { "epoch": 0.5950485244602892, "grad_norm": 1.3196732345679216, "learning_rate": 7.435983938191227e-06, "loss": 0.2606, "step": 7511 }, { "epoch": 0.5951277480689245, "grad_norm": 1.41977868196511, "learning_rate": 7.433503772473343e-06, "loss": 0.3115, "step": 7512 }, { "epoch": 0.5952069716775599, "grad_norm": 1.5681720250193947, "learning_rate": 7.431023775746886e-06, "loss": 0.3535, "step": 7513 }, { "epoch": 0.5952861952861953, "grad_norm": 1.3059631707700323, "learning_rate": 7.428543948175151e-06, "loss": 0.2478, "step": 7514 }, { "epoch": 0.5953654188948306, "grad_norm": 1.4592618113563653, "learning_rate": 7.426064289921429e-06, "loss": 0.2491, "step": 7515 }, { "epoch": 0.5954446425034661, "grad_norm": 1.5934290539020544, "learning_rate": 7.423584801148985e-06, "loss": 0.3142, "step": 7516 }, { "epoch": 0.5955238661121014, "grad_norm": 1.2059085233451832, "learning_rate": 7.421105482021084e-06, "loss": 0.2405, "step": 7517 }, { "epoch": 0.5956030897207368, "grad_norm": 1.0503339324969476, "learning_rate": 7.41862633270098e-06, "loss": 0.1798, "step": 7518 }, { "epoch": 0.5956823133293722, "grad_norm": 1.179440912848171, "learning_rate": 7.416147353351909e-06, "loss": 0.2606, "step": 7519 }, { "epoch": 0.5957615369380075, "grad_norm": 1.7951957662791829, "learning_rate": 7.4136685441371025e-06, "loss": 0.3119, "step": 7520 }, { "epoch": 0.5958407605466429, "grad_norm": 1.2866725077806018, "learning_rate": 7.41118990521978e-06, "loss": 0.2682, "step": 7521 }, { "epoch": 0.5959199841552782, "grad_norm": 1.4937678006327972, "learning_rate": 7.408711436763143e-06, "loss": 0.3004, "step": 7522 }, { "epoch": 0.5959992077639137, "grad_norm": 1.2954424479938147, "learning_rate": 7.406233138930389e-06, "loss": 0.2728, "step": 7523 }, { "epoch": 0.596078431372549, "grad_norm": 1.051369170825376, "learning_rate": 7.4037550118847044e-06, "loss": 0.2164, "step": 7524 }, { "epoch": 0.5961576549811844, "grad_norm": 1.4811417945690901, "learning_rate": 7.401277055789259e-06, "loss": 0.291, "step": 7525 }, { "epoch": 0.5962368785898198, "grad_norm": 1.4524776438044589, "learning_rate": 7.398799270807217e-06, "loss": 0.268, "step": 7526 }, { "epoch": 0.5963161021984551, "grad_norm": 1.5893021749315575, "learning_rate": 7.3963216571017235e-06, "loss": 0.3047, "step": 7527 }, { "epoch": 0.5963953258070905, "grad_norm": 1.2217490114807055, "learning_rate": 7.3938442148359215e-06, "loss": 0.2327, "step": 7528 }, { "epoch": 0.5964745494157259, "grad_norm": 1.461490164975469, "learning_rate": 7.391366944172941e-06, "loss": 0.2637, "step": 7529 }, { "epoch": 0.5965537730243613, "grad_norm": 1.6686776838493305, "learning_rate": 7.388889845275893e-06, "loss": 0.3454, "step": 7530 }, { "epoch": 0.5966329966329966, "grad_norm": 1.6606977997013623, "learning_rate": 7.3864129183078835e-06, "loss": 0.3359, "step": 7531 }, { "epoch": 0.596712220241632, "grad_norm": 1.4439122612478255, "learning_rate": 7.38393616343201e-06, "loss": 0.2494, "step": 7532 }, { "epoch": 0.5967914438502674, "grad_norm": 1.1928585321778848, "learning_rate": 7.381459580811352e-06, "loss": 0.2281, "step": 7533 }, { "epoch": 0.5968706674589027, "grad_norm": 1.195981238174483, "learning_rate": 7.378983170608982e-06, "loss": 0.2145, "step": 7534 }, { "epoch": 0.5969498910675382, "grad_norm": 1.2098194020188708, "learning_rate": 7.376506932987956e-06, "loss": 0.2793, "step": 7535 }, { "epoch": 0.5970291146761735, "grad_norm": 1.162930319798335, "learning_rate": 7.374030868111326e-06, "loss": 0.2118, "step": 7536 }, { "epoch": 0.5971083382848089, "grad_norm": 1.3675315864210198, "learning_rate": 7.371554976142128e-06, "loss": 0.2394, "step": 7537 }, { "epoch": 0.5971875618934442, "grad_norm": 1.3157797402425673, "learning_rate": 7.369079257243388e-06, "loss": 0.262, "step": 7538 }, { "epoch": 0.5972667855020796, "grad_norm": 1.241262629533937, "learning_rate": 7.366603711578119e-06, "loss": 0.1741, "step": 7539 }, { "epoch": 0.597346009110715, "grad_norm": 1.0737004901610891, "learning_rate": 7.364128339309326e-06, "loss": 0.1747, "step": 7540 }, { "epoch": 0.5974252327193503, "grad_norm": 1.6515854999616628, "learning_rate": 7.361653140599997e-06, "loss": 0.2809, "step": 7541 }, { "epoch": 0.5975044563279858, "grad_norm": 1.214640346893805, "learning_rate": 7.359178115613116e-06, "loss": 0.2101, "step": 7542 }, { "epoch": 0.5975836799366211, "grad_norm": 1.5687227286702028, "learning_rate": 7.356703264511646e-06, "loss": 0.3295, "step": 7543 }, { "epoch": 0.5976629035452565, "grad_norm": 1.3371952710416235, "learning_rate": 7.354228587458549e-06, "loss": 0.2467, "step": 7544 }, { "epoch": 0.5977421271538919, "grad_norm": 1.6742897953652043, "learning_rate": 7.351754084616771e-06, "loss": 0.2696, "step": 7545 }, { "epoch": 0.5978213507625272, "grad_norm": 1.6202427255169483, "learning_rate": 7.349279756149241e-06, "loss": 0.294, "step": 7546 }, { "epoch": 0.5979005743711626, "grad_norm": 1.145232105313392, "learning_rate": 7.346805602218885e-06, "loss": 0.1856, "step": 7547 }, { "epoch": 0.597979797979798, "grad_norm": 1.2489985364064866, "learning_rate": 7.344331622988616e-06, "loss": 0.1871, "step": 7548 }, { "epoch": 0.5980590215884334, "grad_norm": 1.190098770449158, "learning_rate": 7.341857818621328e-06, "loss": 0.1944, "step": 7549 }, { "epoch": 0.5981382451970687, "grad_norm": 1.1875903422994931, "learning_rate": 7.339384189279917e-06, "loss": 0.1735, "step": 7550 }, { "epoch": 0.5982174688057041, "grad_norm": 1.1442166451653273, "learning_rate": 7.33691073512725e-06, "loss": 0.2234, "step": 7551 }, { "epoch": 0.5982966924143395, "grad_norm": 1.2501289177988377, "learning_rate": 7.3344374563262e-06, "loss": 0.2216, "step": 7552 }, { "epoch": 0.5983759160229748, "grad_norm": 1.2439122423072726, "learning_rate": 7.3319643530396175e-06, "loss": 0.2552, "step": 7553 }, { "epoch": 0.5984551396316102, "grad_norm": 1.3609535658775174, "learning_rate": 7.329491425430344e-06, "loss": 0.2249, "step": 7554 }, { "epoch": 0.5985343632402456, "grad_norm": 1.7554836451089235, "learning_rate": 7.327018673661209e-06, "loss": 0.3718, "step": 7555 }, { "epoch": 0.598613586848881, "grad_norm": 1.3091489250488109, "learning_rate": 7.324546097895036e-06, "loss": 0.2797, "step": 7556 }, { "epoch": 0.5986928104575163, "grad_norm": 1.2228300459839474, "learning_rate": 7.3220736982946275e-06, "loss": 0.2146, "step": 7557 }, { "epoch": 0.5987720340661518, "grad_norm": 1.4658117667642692, "learning_rate": 7.3196014750227815e-06, "loss": 0.3217, "step": 7558 }, { "epoch": 0.5988512576747871, "grad_norm": 1.0709370498880546, "learning_rate": 7.317129428242279e-06, "loss": 0.219, "step": 7559 }, { "epoch": 0.5989304812834224, "grad_norm": 1.5411691154734617, "learning_rate": 7.3146575581158945e-06, "loss": 0.2521, "step": 7560 }, { "epoch": 0.5990097048920578, "grad_norm": 1.507784814225209, "learning_rate": 7.312185864806391e-06, "loss": 0.284, "step": 7561 }, { "epoch": 0.5990889285006932, "grad_norm": 1.5397344550307526, "learning_rate": 7.309714348476513e-06, "loss": 0.2678, "step": 7562 }, { "epoch": 0.5991681521093286, "grad_norm": 1.2582193892350415, "learning_rate": 7.307243009289005e-06, "loss": 0.2144, "step": 7563 }, { "epoch": 0.5992473757179639, "grad_norm": 1.29034387370778, "learning_rate": 7.304771847406582e-06, "loss": 0.2591, "step": 7564 }, { "epoch": 0.5993265993265994, "grad_norm": 1.2722615946466806, "learning_rate": 7.3023008629919665e-06, "loss": 0.222, "step": 7565 }, { "epoch": 0.5994058229352347, "grad_norm": 1.185571388012682, "learning_rate": 7.299830056207861e-06, "loss": 0.1994, "step": 7566 }, { "epoch": 0.59948504654387, "grad_norm": 1.1144300415417774, "learning_rate": 7.29735942721695e-06, "loss": 0.2111, "step": 7567 }, { "epoch": 0.5995642701525055, "grad_norm": 1.6675233219565557, "learning_rate": 7.294888976181919e-06, "loss": 0.3327, "step": 7568 }, { "epoch": 0.5996434937611408, "grad_norm": 1.4716918778419212, "learning_rate": 7.2924187032654335e-06, "loss": 0.2673, "step": 7569 }, { "epoch": 0.5997227173697762, "grad_norm": 1.2864802650628842, "learning_rate": 7.289948608630146e-06, "loss": 0.2492, "step": 7570 }, { "epoch": 0.5998019409784116, "grad_norm": 1.9405470338841433, "learning_rate": 7.287478692438705e-06, "loss": 0.3413, "step": 7571 }, { "epoch": 0.599881164587047, "grad_norm": 1.3919885563331966, "learning_rate": 7.285008954853739e-06, "loss": 0.2946, "step": 7572 }, { "epoch": 0.5999603881956823, "grad_norm": 1.3527190620007843, "learning_rate": 7.282539396037868e-06, "loss": 0.25, "step": 7573 }, { "epoch": 0.6000396118043176, "grad_norm": 1.3575830560168818, "learning_rate": 7.280070016153706e-06, "loss": 0.247, "step": 7574 }, { "epoch": 0.6001188354129531, "grad_norm": 1.1074610957125472, "learning_rate": 7.277600815363842e-06, "loss": 0.1799, "step": 7575 }, { "epoch": 0.6001980590215884, "grad_norm": 1.4974324744478347, "learning_rate": 7.275131793830865e-06, "loss": 0.2427, "step": 7576 }, { "epoch": 0.6002772826302238, "grad_norm": 1.4819571534446438, "learning_rate": 7.272662951717352e-06, "loss": 0.285, "step": 7577 }, { "epoch": 0.6003565062388592, "grad_norm": 1.1200596221058652, "learning_rate": 7.270194289185858e-06, "loss": 0.1978, "step": 7578 }, { "epoch": 0.6004357298474946, "grad_norm": 1.4567852423629415, "learning_rate": 7.267725806398936e-06, "loss": 0.2565, "step": 7579 }, { "epoch": 0.6005149534561299, "grad_norm": 1.6309369064281796, "learning_rate": 7.265257503519122e-06, "loss": 0.2526, "step": 7580 }, { "epoch": 0.6005941770647653, "grad_norm": 1.4367210835566138, "learning_rate": 7.262789380708942e-06, "loss": 0.3525, "step": 7581 }, { "epoch": 0.6006734006734007, "grad_norm": 1.2838400073077956, "learning_rate": 7.260321438130913e-06, "loss": 0.2382, "step": 7582 }, { "epoch": 0.600752624282036, "grad_norm": 1.3781989739168623, "learning_rate": 7.257853675947533e-06, "loss": 0.2087, "step": 7583 }, { "epoch": 0.6008318478906715, "grad_norm": 1.3718476582603227, "learning_rate": 7.255386094321293e-06, "loss": 0.2938, "step": 7584 }, { "epoch": 0.6009110714993068, "grad_norm": 1.2773596554900355, "learning_rate": 7.2529186934146756e-06, "loss": 0.1827, "step": 7585 }, { "epoch": 0.6009902951079422, "grad_norm": 1.4263887915558608, "learning_rate": 7.250451473390141e-06, "loss": 0.2592, "step": 7586 }, { "epoch": 0.6010695187165775, "grad_norm": 1.2470390430746008, "learning_rate": 7.24798443441015e-06, "loss": 0.2346, "step": 7587 }, { "epoch": 0.6011487423252129, "grad_norm": 1.6550969227994585, "learning_rate": 7.24551757663714e-06, "loss": 0.2215, "step": 7588 }, { "epoch": 0.6012279659338483, "grad_norm": 1.2391121337914008, "learning_rate": 7.2430509002335434e-06, "loss": 0.2657, "step": 7589 }, { "epoch": 0.6013071895424836, "grad_norm": 1.4223756864133885, "learning_rate": 7.240584405361781e-06, "loss": 0.2925, "step": 7590 }, { "epoch": 0.6013864131511191, "grad_norm": 1.1830896443593542, "learning_rate": 7.238118092184256e-06, "loss": 0.2278, "step": 7591 }, { "epoch": 0.6014656367597544, "grad_norm": 1.31474535313264, "learning_rate": 7.2356519608633665e-06, "loss": 0.2805, "step": 7592 }, { "epoch": 0.6015448603683898, "grad_norm": 1.5988782456451411, "learning_rate": 7.233186011561498e-06, "loss": 0.4399, "step": 7593 }, { "epoch": 0.6016240839770252, "grad_norm": 1.237152187497868, "learning_rate": 7.230720244441016e-06, "loss": 0.2222, "step": 7594 }, { "epoch": 0.6017033075856605, "grad_norm": 1.4422977852783678, "learning_rate": 7.228254659664278e-06, "loss": 0.3135, "step": 7595 }, { "epoch": 0.6017825311942959, "grad_norm": 1.2867701459483227, "learning_rate": 7.225789257393636e-06, "loss": 0.2205, "step": 7596 }, { "epoch": 0.6018617548029312, "grad_norm": 1.358466347550248, "learning_rate": 7.223324037791421e-06, "loss": 0.288, "step": 7597 }, { "epoch": 0.6019409784115667, "grad_norm": 1.4813948425701016, "learning_rate": 7.220859001019957e-06, "loss": 0.2921, "step": 7598 }, { "epoch": 0.602020202020202, "grad_norm": 1.475011915392138, "learning_rate": 7.218394147241559e-06, "loss": 0.2789, "step": 7599 }, { "epoch": 0.6020994256288374, "grad_norm": 1.4199374867012704, "learning_rate": 7.2159294766185174e-06, "loss": 0.2562, "step": 7600 }, { "epoch": 0.6021786492374728, "grad_norm": 1.1078324594846334, "learning_rate": 7.213464989313126e-06, "loss": 0.2555, "step": 7601 }, { "epoch": 0.6022578728461081, "grad_norm": 1.205078372849288, "learning_rate": 7.211000685487658e-06, "loss": 0.257, "step": 7602 }, { "epoch": 0.6023370964547435, "grad_norm": 1.3474617361512962, "learning_rate": 7.208536565304374e-06, "loss": 0.3349, "step": 7603 }, { "epoch": 0.6024163200633789, "grad_norm": 1.296315277738826, "learning_rate": 7.206072628925526e-06, "loss": 0.2403, "step": 7604 }, { "epoch": 0.6024955436720143, "grad_norm": 1.2907027882147537, "learning_rate": 7.203608876513351e-06, "loss": 0.2365, "step": 7605 }, { "epoch": 0.6025747672806496, "grad_norm": 1.2602089604938873, "learning_rate": 7.201145308230075e-06, "loss": 0.1982, "step": 7606 }, { "epoch": 0.602653990889285, "grad_norm": 1.482015580486737, "learning_rate": 7.198681924237918e-06, "loss": 0.1965, "step": 7607 }, { "epoch": 0.6027332144979204, "grad_norm": 1.3410721016847686, "learning_rate": 7.196218724699072e-06, "loss": 0.2991, "step": 7608 }, { "epoch": 0.6028124381065557, "grad_norm": 1.3224091380902647, "learning_rate": 7.193755709775734e-06, "loss": 0.2715, "step": 7609 }, { "epoch": 0.6028916617151912, "grad_norm": 1.3255702135615999, "learning_rate": 7.191292879630081e-06, "loss": 0.3004, "step": 7610 }, { "epoch": 0.6029708853238265, "grad_norm": 1.0371932607578918, "learning_rate": 7.188830234424275e-06, "loss": 0.1657, "step": 7611 }, { "epoch": 0.6030501089324619, "grad_norm": 1.5098590101199332, "learning_rate": 7.186367774320474e-06, "loss": 0.3258, "step": 7612 }, { "epoch": 0.6031293325410972, "grad_norm": 1.5422270160827176, "learning_rate": 7.1839054994808145e-06, "loss": 0.3902, "step": 7613 }, { "epoch": 0.6032085561497326, "grad_norm": 1.312643343052515, "learning_rate": 7.181443410067428e-06, "loss": 0.1997, "step": 7614 }, { "epoch": 0.603287779758368, "grad_norm": 1.417290301611864, "learning_rate": 7.1789815062424325e-06, "loss": 0.2415, "step": 7615 }, { "epoch": 0.6033670033670033, "grad_norm": 1.4662280227092654, "learning_rate": 7.176519788167929e-06, "loss": 0.2777, "step": 7616 }, { "epoch": 0.6034462269756388, "grad_norm": 1.652959151890591, "learning_rate": 7.174058256006012e-06, "loss": 0.3207, "step": 7617 }, { "epoch": 0.6035254505842741, "grad_norm": 1.3832409994574812, "learning_rate": 7.171596909918763e-06, "loss": 0.2664, "step": 7618 }, { "epoch": 0.6036046741929095, "grad_norm": 1.4818966972735843, "learning_rate": 7.169135750068247e-06, "loss": 0.3078, "step": 7619 }, { "epoch": 0.6036838978015449, "grad_norm": 1.3124057107514822, "learning_rate": 7.1666747766165226e-06, "loss": 0.3103, "step": 7620 }, { "epoch": 0.6037631214101802, "grad_norm": 1.2642671973250477, "learning_rate": 7.164213989725628e-06, "loss": 0.229, "step": 7621 }, { "epoch": 0.6038423450188156, "grad_norm": 1.4475338696578857, "learning_rate": 7.1617533895575975e-06, "loss": 0.2366, "step": 7622 }, { "epoch": 0.6039215686274509, "grad_norm": 1.1362475466523527, "learning_rate": 7.1592929762744515e-06, "loss": 0.249, "step": 7623 }, { "epoch": 0.6040007922360864, "grad_norm": 1.4969631805560057, "learning_rate": 7.156832750038192e-06, "loss": 0.3206, "step": 7624 }, { "epoch": 0.6040800158447217, "grad_norm": 1.480775787039677, "learning_rate": 7.154372711010815e-06, "loss": 0.3075, "step": 7625 }, { "epoch": 0.6041592394533571, "grad_norm": 1.0438062446654606, "learning_rate": 7.1519128593543065e-06, "loss": 0.2199, "step": 7626 }, { "epoch": 0.6042384630619925, "grad_norm": 1.2425749351325481, "learning_rate": 7.149453195230629e-06, "loss": 0.2482, "step": 7627 }, { "epoch": 0.6043176866706278, "grad_norm": 1.5276301750159949, "learning_rate": 7.1469937188017444e-06, "loss": 0.3, "step": 7628 }, { "epoch": 0.6043969102792632, "grad_norm": 1.3645058575409816, "learning_rate": 7.144534430229595e-06, "loss": 0.2666, "step": 7629 }, { "epoch": 0.6044761338878986, "grad_norm": 1.3346557313714589, "learning_rate": 7.142075329676112e-06, "loss": 0.3015, "step": 7630 }, { "epoch": 0.604555357496534, "grad_norm": 1.5178466886404989, "learning_rate": 7.139616417303221e-06, "loss": 0.3404, "step": 7631 }, { "epoch": 0.6046345811051693, "grad_norm": 1.5751998276511372, "learning_rate": 7.137157693272822e-06, "loss": 0.319, "step": 7632 }, { "epoch": 0.6047138047138048, "grad_norm": 1.2243817083712045, "learning_rate": 7.1346991577468136e-06, "loss": 0.2456, "step": 7633 }, { "epoch": 0.6047930283224401, "grad_norm": 1.576997019983935, "learning_rate": 7.132240810887083e-06, "loss": 0.3, "step": 7634 }, { "epoch": 0.6048722519310754, "grad_norm": 1.276460742090886, "learning_rate": 7.129782652855492e-06, "loss": 0.2325, "step": 7635 }, { "epoch": 0.6049514755397108, "grad_norm": 1.2307566147077864, "learning_rate": 7.127324683813906e-06, "loss": 0.2649, "step": 7636 }, { "epoch": 0.6050306991483462, "grad_norm": 1.4509190114186126, "learning_rate": 7.124866903924164e-06, "loss": 0.2656, "step": 7637 }, { "epoch": 0.6051099227569816, "grad_norm": 1.1120443691618007, "learning_rate": 7.122409313348102e-06, "loss": 0.183, "step": 7638 }, { "epoch": 0.6051891463656169, "grad_norm": 1.5302864414371933, "learning_rate": 7.119951912247545e-06, "loss": 0.3274, "step": 7639 }, { "epoch": 0.6052683699742524, "grad_norm": 1.2440057244227303, "learning_rate": 7.117494700784292e-06, "loss": 0.2593, "step": 7640 }, { "epoch": 0.6053475935828877, "grad_norm": 1.2163018543505084, "learning_rate": 7.115037679120147e-06, "loss": 0.2439, "step": 7641 }, { "epoch": 0.605426817191523, "grad_norm": 1.3989399837038783, "learning_rate": 7.112580847416886e-06, "loss": 0.2719, "step": 7642 }, { "epoch": 0.6055060408001585, "grad_norm": 1.451482042129622, "learning_rate": 7.110124205836283e-06, "loss": 0.2781, "step": 7643 }, { "epoch": 0.6055852644087938, "grad_norm": 1.1850817091931767, "learning_rate": 7.107667754540097e-06, "loss": 0.1898, "step": 7644 }, { "epoch": 0.6056644880174292, "grad_norm": 1.3807420375681736, "learning_rate": 7.105211493690073e-06, "loss": 0.2276, "step": 7645 }, { "epoch": 0.6057437116260646, "grad_norm": 1.2778309598246989, "learning_rate": 7.102755423447941e-06, "loss": 0.3443, "step": 7646 }, { "epoch": 0.6058229352347, "grad_norm": 1.3612511716945264, "learning_rate": 7.100299543975426e-06, "loss": 0.2833, "step": 7647 }, { "epoch": 0.6059021588433353, "grad_norm": 1.3438388616097634, "learning_rate": 7.097843855434232e-06, "loss": 0.238, "step": 7648 }, { "epoch": 0.6059813824519706, "grad_norm": 1.06734096536117, "learning_rate": 7.09538835798606e-06, "loss": 0.2218, "step": 7649 }, { "epoch": 0.6060606060606061, "grad_norm": 1.7653414371877851, "learning_rate": 7.092933051792583e-06, "loss": 0.3245, "step": 7650 }, { "epoch": 0.6061398296692414, "grad_norm": 1.4484162132772, "learning_rate": 7.090477937015479e-06, "loss": 0.2901, "step": 7651 }, { "epoch": 0.6062190532778768, "grad_norm": 1.4631815501374397, "learning_rate": 7.088023013816403e-06, "loss": 0.2601, "step": 7652 }, { "epoch": 0.6062982768865122, "grad_norm": 1.302104715331595, "learning_rate": 7.085568282357e-06, "loss": 0.1908, "step": 7653 }, { "epoch": 0.6063775004951476, "grad_norm": 1.1477863929626102, "learning_rate": 7.083113742798901e-06, "loss": 0.2326, "step": 7654 }, { "epoch": 0.6064567241037829, "grad_norm": 1.3271057650593732, "learning_rate": 7.080659395303729e-06, "loss": 0.239, "step": 7655 }, { "epoch": 0.6065359477124183, "grad_norm": 1.4422446146305785, "learning_rate": 7.078205240033087e-06, "loss": 0.3386, "step": 7656 }, { "epoch": 0.6066151713210537, "grad_norm": 1.2683362562076268, "learning_rate": 7.075751277148574e-06, "loss": 0.2658, "step": 7657 }, { "epoch": 0.606694394929689, "grad_norm": 1.2103415811882574, "learning_rate": 7.073297506811766e-06, "loss": 0.2344, "step": 7658 }, { "epoch": 0.6067736185383245, "grad_norm": 1.7299228955644137, "learning_rate": 7.0708439291842345e-06, "loss": 0.3189, "step": 7659 }, { "epoch": 0.6068528421469598, "grad_norm": 1.436109037777133, "learning_rate": 7.068390544427539e-06, "loss": 0.3035, "step": 7660 }, { "epoch": 0.6069320657555952, "grad_norm": 1.1486437387618742, "learning_rate": 7.065937352703218e-06, "loss": 0.1783, "step": 7661 }, { "epoch": 0.6070112893642305, "grad_norm": 1.6571791017563637, "learning_rate": 7.063484354172804e-06, "loss": 0.2971, "step": 7662 }, { "epoch": 0.6070905129728659, "grad_norm": 1.2819092805765941, "learning_rate": 7.061031548997818e-06, "loss": 0.2742, "step": 7663 }, { "epoch": 0.6071697365815013, "grad_norm": 1.6164089896622902, "learning_rate": 7.058578937339759e-06, "loss": 0.2814, "step": 7664 }, { "epoch": 0.6072489601901366, "grad_norm": 1.1338041250321742, "learning_rate": 7.056126519360129e-06, "loss": 0.2087, "step": 7665 }, { "epoch": 0.6073281837987721, "grad_norm": 1.4153650065011405, "learning_rate": 7.053674295220399e-06, "loss": 0.2774, "step": 7666 }, { "epoch": 0.6074074074074074, "grad_norm": 1.0904547920568635, "learning_rate": 7.05122226508204e-06, "loss": 0.2073, "step": 7667 }, { "epoch": 0.6074866310160428, "grad_norm": 1.2735500978240153, "learning_rate": 7.048770429106509e-06, "loss": 0.2182, "step": 7668 }, { "epoch": 0.6075658546246782, "grad_norm": 1.2202602223211134, "learning_rate": 7.0463187874552415e-06, "loss": 0.1987, "step": 7669 }, { "epoch": 0.6076450782333135, "grad_norm": 1.6657327480832729, "learning_rate": 7.043867340289672e-06, "loss": 0.3109, "step": 7670 }, { "epoch": 0.6077243018419489, "grad_norm": 1.546529025544999, "learning_rate": 7.0414160877712155e-06, "loss": 0.3119, "step": 7671 }, { "epoch": 0.6078035254505842, "grad_norm": 1.5595210707850917, "learning_rate": 7.038965030061273e-06, "loss": 0.3367, "step": 7672 }, { "epoch": 0.6078827490592197, "grad_norm": 1.4489660816512373, "learning_rate": 7.0365141673212336e-06, "loss": 0.3363, "step": 7673 }, { "epoch": 0.607961972667855, "grad_norm": 1.3846116873889731, "learning_rate": 7.034063499712479e-06, "loss": 0.2654, "step": 7674 }, { "epoch": 0.6080411962764904, "grad_norm": 1.4727879496742948, "learning_rate": 7.031613027396369e-06, "loss": 0.2991, "step": 7675 }, { "epoch": 0.6081204198851258, "grad_norm": 1.093452263137143, "learning_rate": 7.029162750534259e-06, "loss": 0.1996, "step": 7676 }, { "epoch": 0.6081996434937611, "grad_norm": 1.3466974223453194, "learning_rate": 7.02671266928749e-06, "loss": 0.3031, "step": 7677 }, { "epoch": 0.6082788671023965, "grad_norm": 1.4547073963412491, "learning_rate": 7.024262783817382e-06, "loss": 0.2758, "step": 7678 }, { "epoch": 0.6083580907110319, "grad_norm": 1.0736126359896117, "learning_rate": 7.02181309428525e-06, "loss": 0.1938, "step": 7679 }, { "epoch": 0.6084373143196673, "grad_norm": 1.3088296906648964, "learning_rate": 7.0193636008524e-06, "loss": 0.1881, "step": 7680 }, { "epoch": 0.6085165379283026, "grad_norm": 1.3886848472259685, "learning_rate": 7.016914303680111e-06, "loss": 0.3084, "step": 7681 }, { "epoch": 0.6085957615369381, "grad_norm": 1.2613166203389428, "learning_rate": 7.014465202929665e-06, "loss": 0.2663, "step": 7682 }, { "epoch": 0.6086749851455734, "grad_norm": 1.6515958637953383, "learning_rate": 7.012016298762317e-06, "loss": 0.2547, "step": 7683 }, { "epoch": 0.6087542087542087, "grad_norm": 1.5680217077627265, "learning_rate": 7.009567591339319e-06, "loss": 0.267, "step": 7684 }, { "epoch": 0.6088334323628442, "grad_norm": 1.3609889060529607, "learning_rate": 7.007119080821908e-06, "loss": 0.2134, "step": 7685 }, { "epoch": 0.6089126559714795, "grad_norm": 1.2815317668378545, "learning_rate": 7.004670767371302e-06, "loss": 0.2573, "step": 7686 }, { "epoch": 0.6089918795801149, "grad_norm": 1.4211930606019993, "learning_rate": 7.002222651148714e-06, "loss": 0.26, "step": 7687 }, { "epoch": 0.6090711031887502, "grad_norm": 1.5759770865439906, "learning_rate": 6.999774732315343e-06, "loss": 0.228, "step": 7688 }, { "epoch": 0.6091503267973856, "grad_norm": 1.2649741365817415, "learning_rate": 6.9973270110323666e-06, "loss": 0.3179, "step": 7689 }, { "epoch": 0.609229550406021, "grad_norm": 1.4724172675170308, "learning_rate": 6.994879487460961e-06, "loss": 0.2535, "step": 7690 }, { "epoch": 0.6093087740146563, "grad_norm": 1.3633123981322597, "learning_rate": 6.992432161762278e-06, "loss": 0.2103, "step": 7691 }, { "epoch": 0.6093879976232918, "grad_norm": 1.1961069374163367, "learning_rate": 6.989985034097466e-06, "loss": 0.2661, "step": 7692 }, { "epoch": 0.6094672212319271, "grad_norm": 1.100296627858807, "learning_rate": 6.9875381046276605e-06, "loss": 0.2293, "step": 7693 }, { "epoch": 0.6095464448405625, "grad_norm": 1.3163332359817324, "learning_rate": 6.985091373513972e-06, "loss": 0.2662, "step": 7694 }, { "epoch": 0.6096256684491979, "grad_norm": 1.5701687072096584, "learning_rate": 6.982644840917509e-06, "loss": 0.2711, "step": 7695 }, { "epoch": 0.6097048920578332, "grad_norm": 1.5999833871568183, "learning_rate": 6.980198506999368e-06, "loss": 0.2725, "step": 7696 }, { "epoch": 0.6097841156664686, "grad_norm": 1.3046754328671022, "learning_rate": 6.977752371920623e-06, "loss": 0.2719, "step": 7697 }, { "epoch": 0.6098633392751039, "grad_norm": 1.166379536581183, "learning_rate": 6.975306435842344e-06, "loss": 0.239, "step": 7698 }, { "epoch": 0.6099425628837394, "grad_norm": 1.3532412927885529, "learning_rate": 6.97286069892558e-06, "loss": 0.3494, "step": 7699 }, { "epoch": 0.6100217864923747, "grad_norm": 1.8497233726219748, "learning_rate": 6.970415161331373e-06, "loss": 0.3164, "step": 7700 }, { "epoch": 0.6101010101010101, "grad_norm": 1.4577701930336793, "learning_rate": 6.967969823220752e-06, "loss": 0.3132, "step": 7701 }, { "epoch": 0.6101802337096455, "grad_norm": 1.2740756093705055, "learning_rate": 6.965524684754729e-06, "loss": 0.1977, "step": 7702 }, { "epoch": 0.6102594573182808, "grad_norm": 1.6928182834385006, "learning_rate": 6.963079746094302e-06, "loss": 0.3022, "step": 7703 }, { "epoch": 0.6103386809269162, "grad_norm": 1.1915882985252264, "learning_rate": 6.960635007400465e-06, "loss": 0.1831, "step": 7704 }, { "epoch": 0.6104179045355516, "grad_norm": 0.9612231998826694, "learning_rate": 6.9581904688341854e-06, "loss": 0.1583, "step": 7705 }, { "epoch": 0.610497128144187, "grad_norm": 1.2584465451269586, "learning_rate": 6.955746130556429e-06, "loss": 0.2703, "step": 7706 }, { "epoch": 0.6105763517528223, "grad_norm": 1.3272191941585112, "learning_rate": 6.95330199272814e-06, "loss": 0.256, "step": 7707 }, { "epoch": 0.6106555753614578, "grad_norm": 1.4735213475794482, "learning_rate": 6.950858055510254e-06, "loss": 0.2835, "step": 7708 }, { "epoch": 0.6107347989700931, "grad_norm": 1.5163448598653948, "learning_rate": 6.948414319063696e-06, "loss": 0.2501, "step": 7709 }, { "epoch": 0.6108140225787284, "grad_norm": 1.045999863935323, "learning_rate": 6.945970783549372e-06, "loss": 0.2008, "step": 7710 }, { "epoch": 0.6108932461873638, "grad_norm": 1.36572657805259, "learning_rate": 6.943527449128174e-06, "loss": 0.2031, "step": 7711 }, { "epoch": 0.6109724697959992, "grad_norm": 1.4901676569597133, "learning_rate": 6.9410843159609905e-06, "loss": 0.2991, "step": 7712 }, { "epoch": 0.6110516934046346, "grad_norm": 1.646298927811571, "learning_rate": 6.9386413842086845e-06, "loss": 0.355, "step": 7713 }, { "epoch": 0.6111309170132699, "grad_norm": 1.2423640550843618, "learning_rate": 6.936198654032114e-06, "loss": 0.2717, "step": 7714 }, { "epoch": 0.6112101406219054, "grad_norm": 1.2973883818946998, "learning_rate": 6.933756125592117e-06, "loss": 0.2152, "step": 7715 }, { "epoch": 0.6112893642305407, "grad_norm": 1.2275036350913804, "learning_rate": 6.931313799049526e-06, "loss": 0.2509, "step": 7716 }, { "epoch": 0.611368587839176, "grad_norm": 1.312081765391115, "learning_rate": 6.928871674565158e-06, "loss": 0.232, "step": 7717 }, { "epoch": 0.6114478114478115, "grad_norm": 1.3661461186965818, "learning_rate": 6.926429752299812e-06, "loss": 0.3271, "step": 7718 }, { "epoch": 0.6115270350564468, "grad_norm": 1.882720190549972, "learning_rate": 6.923988032414277e-06, "loss": 0.3696, "step": 7719 }, { "epoch": 0.6116062586650822, "grad_norm": 1.1984863845970999, "learning_rate": 6.9215465150693305e-06, "loss": 0.2027, "step": 7720 }, { "epoch": 0.6116854822737176, "grad_norm": 1.429597771750269, "learning_rate": 6.919105200425733e-06, "loss": 0.2919, "step": 7721 }, { "epoch": 0.611764705882353, "grad_norm": 1.5184198041344545, "learning_rate": 6.916664088644234e-06, "loss": 0.2871, "step": 7722 }, { "epoch": 0.6118439294909883, "grad_norm": 1.4086076423644345, "learning_rate": 6.914223179885567e-06, "loss": 0.2428, "step": 7723 }, { "epoch": 0.6119231530996236, "grad_norm": 1.2572304389546765, "learning_rate": 6.911782474310456e-06, "loss": 0.236, "step": 7724 }, { "epoch": 0.6120023767082591, "grad_norm": 1.6418119743764823, "learning_rate": 6.909341972079613e-06, "loss": 0.3402, "step": 7725 }, { "epoch": 0.6120816003168944, "grad_norm": 1.1670073821444726, "learning_rate": 6.9069016733537255e-06, "loss": 0.2307, "step": 7726 }, { "epoch": 0.6121608239255298, "grad_norm": 1.564452810238282, "learning_rate": 6.904461578293483e-06, "loss": 0.2565, "step": 7727 }, { "epoch": 0.6122400475341652, "grad_norm": 1.5527829802522377, "learning_rate": 6.902021687059549e-06, "loss": 0.3159, "step": 7728 }, { "epoch": 0.6123192711428006, "grad_norm": 1.6433272836333535, "learning_rate": 6.89958199981258e-06, "loss": 0.3423, "step": 7729 }, { "epoch": 0.6123984947514359, "grad_norm": 1.8738589396243654, "learning_rate": 6.89714251671322e-06, "loss": 0.3125, "step": 7730 }, { "epoch": 0.6124777183600713, "grad_norm": 1.1874115241160907, "learning_rate": 6.894703237922094e-06, "loss": 0.2464, "step": 7731 }, { "epoch": 0.6125569419687067, "grad_norm": 1.8512966006014047, "learning_rate": 6.892264163599817e-06, "loss": 0.3413, "step": 7732 }, { "epoch": 0.612636165577342, "grad_norm": 1.2406554799048644, "learning_rate": 6.889825293906993e-06, "loss": 0.2473, "step": 7733 }, { "epoch": 0.6127153891859775, "grad_norm": 1.4156787630362848, "learning_rate": 6.887386629004207e-06, "loss": 0.256, "step": 7734 }, { "epoch": 0.6127946127946128, "grad_norm": 1.5132544175692206, "learning_rate": 6.884948169052037e-06, "loss": 0.2709, "step": 7735 }, { "epoch": 0.6128738364032482, "grad_norm": 2.067735977357367, "learning_rate": 6.88250991421104e-06, "loss": 0.2776, "step": 7736 }, { "epoch": 0.6129530600118835, "grad_norm": 1.4397452115485452, "learning_rate": 6.880071864641762e-06, "loss": 0.2494, "step": 7737 }, { "epoch": 0.6130322836205189, "grad_norm": 1.2730386636922504, "learning_rate": 6.8776340205047446e-06, "loss": 0.2682, "step": 7738 }, { "epoch": 0.6131115072291543, "grad_norm": 1.3112896925886945, "learning_rate": 6.875196381960498e-06, "loss": 0.2414, "step": 7739 }, { "epoch": 0.6131907308377896, "grad_norm": 1.0521531302110294, "learning_rate": 6.872758949169536e-06, "loss": 0.2107, "step": 7740 }, { "epoch": 0.6132699544464251, "grad_norm": 1.1492510187805138, "learning_rate": 6.8703217222923525e-06, "loss": 0.2349, "step": 7741 }, { "epoch": 0.6133491780550604, "grad_norm": 1.022767235650454, "learning_rate": 6.867884701489421e-06, "loss": 0.1664, "step": 7742 }, { "epoch": 0.6134284016636958, "grad_norm": 1.246149490561892, "learning_rate": 6.865447886921215e-06, "loss": 0.1421, "step": 7743 }, { "epoch": 0.6135076252723312, "grad_norm": 1.6341138540698272, "learning_rate": 6.86301127874818e-06, "loss": 0.3103, "step": 7744 }, { "epoch": 0.6135868488809665, "grad_norm": 1.4571288520051886, "learning_rate": 6.860574877130757e-06, "loss": 0.282, "step": 7745 }, { "epoch": 0.6136660724896019, "grad_norm": 1.1698689438632988, "learning_rate": 6.8581386822293765e-06, "loss": 0.2366, "step": 7746 }, { "epoch": 0.6137452960982372, "grad_norm": 1.6744235509914767, "learning_rate": 6.8557026942044425e-06, "loss": 0.3847, "step": 7747 }, { "epoch": 0.6138245197068727, "grad_norm": 1.0594846265669022, "learning_rate": 6.853266913216357e-06, "loss": 0.1388, "step": 7748 }, { "epoch": 0.613903743315508, "grad_norm": 1.3872101678269653, "learning_rate": 6.850831339425508e-06, "loss": 0.2847, "step": 7749 }, { "epoch": 0.6139829669241434, "grad_norm": 1.4442779616701826, "learning_rate": 6.848395972992261e-06, "loss": 0.2721, "step": 7750 }, { "epoch": 0.6140621905327788, "grad_norm": 1.5233426138249149, "learning_rate": 6.845960814076973e-06, "loss": 0.3488, "step": 7751 }, { "epoch": 0.6141414141414141, "grad_norm": 1.382163340681494, "learning_rate": 6.8435258628399905e-06, "loss": 0.2964, "step": 7752 }, { "epoch": 0.6142206377500495, "grad_norm": 1.321844255477333, "learning_rate": 6.841091119441639e-06, "loss": 0.2742, "step": 7753 }, { "epoch": 0.6142998613586849, "grad_norm": 1.407107473806448, "learning_rate": 6.8386565840422385e-06, "loss": 0.2881, "step": 7754 }, { "epoch": 0.6143790849673203, "grad_norm": 1.2948420184462874, "learning_rate": 6.836222256802093e-06, "loss": 0.219, "step": 7755 }, { "epoch": 0.6144583085759556, "grad_norm": 1.3238417128741429, "learning_rate": 6.833788137881486e-06, "loss": 0.3172, "step": 7756 }, { "epoch": 0.6145375321845911, "grad_norm": 1.2385372749502246, "learning_rate": 6.8313542274406964e-06, "loss": 0.2717, "step": 7757 }, { "epoch": 0.6146167557932264, "grad_norm": 1.007569470426892, "learning_rate": 6.828920525639985e-06, "loss": 0.1992, "step": 7758 }, { "epoch": 0.6146959794018617, "grad_norm": 1.1787572754542879, "learning_rate": 6.826487032639597e-06, "loss": 0.2152, "step": 7759 }, { "epoch": 0.6147752030104972, "grad_norm": 1.3642322573829562, "learning_rate": 6.8240537485997704e-06, "loss": 0.2747, "step": 7760 }, { "epoch": 0.6148544266191325, "grad_norm": 1.5193715878449312, "learning_rate": 6.821620673680721e-06, "loss": 0.297, "step": 7761 }, { "epoch": 0.6149336502277679, "grad_norm": 1.39310703365241, "learning_rate": 6.819187808042656e-06, "loss": 0.2786, "step": 7762 }, { "epoch": 0.6150128738364032, "grad_norm": 1.6407176420538385, "learning_rate": 6.816755151845771e-06, "loss": 0.3333, "step": 7763 }, { "epoch": 0.6150920974450387, "grad_norm": 1.4982201325932958, "learning_rate": 6.814322705250241e-06, "loss": 0.3212, "step": 7764 }, { "epoch": 0.615171321053674, "grad_norm": 1.6815880092237951, "learning_rate": 6.8118904684162325e-06, "loss": 0.377, "step": 7765 }, { "epoch": 0.6152505446623093, "grad_norm": 1.5737255499392573, "learning_rate": 6.8094584415038975e-06, "loss": 0.3937, "step": 7766 }, { "epoch": 0.6153297682709448, "grad_norm": 1.278821402629601, "learning_rate": 6.807026624673372e-06, "loss": 0.2233, "step": 7767 }, { "epoch": 0.6154089918795801, "grad_norm": 1.1800119192224856, "learning_rate": 6.80459501808478e-06, "loss": 0.2195, "step": 7768 }, { "epoch": 0.6154882154882155, "grad_norm": 1.1942410774626977, "learning_rate": 6.8021636218982275e-06, "loss": 0.1952, "step": 7769 }, { "epoch": 0.6155674390968509, "grad_norm": 1.2058949477151222, "learning_rate": 6.799732436273816e-06, "loss": 0.2194, "step": 7770 }, { "epoch": 0.6156466627054862, "grad_norm": 1.1194869878543667, "learning_rate": 6.797301461371626e-06, "loss": 0.2013, "step": 7771 }, { "epoch": 0.6157258863141216, "grad_norm": 1.429570216790275, "learning_rate": 6.7948706973517235e-06, "loss": 0.2476, "step": 7772 }, { "epoch": 0.6158051099227569, "grad_norm": 1.3034841215222428, "learning_rate": 6.792440144374162e-06, "loss": 0.275, "step": 7773 }, { "epoch": 0.6158843335313924, "grad_norm": 1.152664346651917, "learning_rate": 6.790009802598984e-06, "loss": 0.21, "step": 7774 }, { "epoch": 0.6159635571400277, "grad_norm": 1.4544537788642788, "learning_rate": 6.787579672186215e-06, "loss": 0.3069, "step": 7775 }, { "epoch": 0.6160427807486631, "grad_norm": 0.9895825427148084, "learning_rate": 6.78514975329587e-06, "loss": 0.1693, "step": 7776 }, { "epoch": 0.6161220043572985, "grad_norm": 1.2583732512982744, "learning_rate": 6.78272004608794e-06, "loss": 0.2255, "step": 7777 }, { "epoch": 0.6162012279659338, "grad_norm": 1.5195780759360493, "learning_rate": 6.780290550722417e-06, "loss": 0.4331, "step": 7778 }, { "epoch": 0.6162804515745692, "grad_norm": 1.1795453733648757, "learning_rate": 6.777861267359272e-06, "loss": 0.2066, "step": 7779 }, { "epoch": 0.6163596751832046, "grad_norm": 1.5333298623501757, "learning_rate": 6.7754321961584535e-06, "loss": 0.2336, "step": 7780 }, { "epoch": 0.61643889879184, "grad_norm": 1.1174812412087562, "learning_rate": 6.773003337279911e-06, "loss": 0.1891, "step": 7781 }, { "epoch": 0.6165181224004753, "grad_norm": 1.223592985208414, "learning_rate": 6.7705746908835734e-06, "loss": 0.2797, "step": 7782 }, { "epoch": 0.6165973460091108, "grad_norm": 1.0826977734484358, "learning_rate": 6.768146257129351e-06, "loss": 0.2157, "step": 7783 }, { "epoch": 0.6166765696177461, "grad_norm": 1.5628539638921424, "learning_rate": 6.765718036177148e-06, "loss": 0.369, "step": 7784 }, { "epoch": 0.6167557932263814, "grad_norm": 1.3651095380859373, "learning_rate": 6.763290028186849e-06, "loss": 0.2958, "step": 7785 }, { "epoch": 0.6168350168350168, "grad_norm": 1.4408720965076283, "learning_rate": 6.760862233318327e-06, "loss": 0.2942, "step": 7786 }, { "epoch": 0.6169142404436522, "grad_norm": 1.7356013934880312, "learning_rate": 6.758434651731445e-06, "loss": 0.3133, "step": 7787 }, { "epoch": 0.6169934640522876, "grad_norm": 1.2284488376324045, "learning_rate": 6.756007283586039e-06, "loss": 0.2357, "step": 7788 }, { "epoch": 0.6170726876609229, "grad_norm": 1.1350879978097135, "learning_rate": 6.753580129041945e-06, "loss": 0.2467, "step": 7789 }, { "epoch": 0.6171519112695584, "grad_norm": 1.2389020391180534, "learning_rate": 6.751153188258983e-06, "loss": 0.2583, "step": 7790 }, { "epoch": 0.6172311348781937, "grad_norm": 1.449969054861684, "learning_rate": 6.748726461396946e-06, "loss": 0.209, "step": 7791 }, { "epoch": 0.617310358486829, "grad_norm": 1.0899637416630972, "learning_rate": 6.7462999486156315e-06, "loss": 0.2141, "step": 7792 }, { "epoch": 0.6173895820954645, "grad_norm": 1.068208711496623, "learning_rate": 6.743873650074807e-06, "loss": 0.2102, "step": 7793 }, { "epoch": 0.6174688057040998, "grad_norm": 1.2790955957950543, "learning_rate": 6.741447565934236e-06, "loss": 0.2604, "step": 7794 }, { "epoch": 0.6175480293127352, "grad_norm": 1.113377807828955, "learning_rate": 6.739021696353665e-06, "loss": 0.1867, "step": 7795 }, { "epoch": 0.6176272529213706, "grad_norm": 1.6958804608095717, "learning_rate": 6.736596041492821e-06, "loss": 0.3465, "step": 7796 }, { "epoch": 0.617706476530006, "grad_norm": 1.532662396731456, "learning_rate": 6.734170601511427e-06, "loss": 0.3379, "step": 7797 }, { "epoch": 0.6177857001386413, "grad_norm": 1.4325661972155246, "learning_rate": 6.7317453765691855e-06, "loss": 0.2166, "step": 7798 }, { "epoch": 0.6178649237472766, "grad_norm": 1.0810134276551888, "learning_rate": 6.729320366825785e-06, "loss": 0.1891, "step": 7799 }, { "epoch": 0.6179441473559121, "grad_norm": 1.4236772235474453, "learning_rate": 6.726895572440901e-06, "loss": 0.2813, "step": 7800 }, { "epoch": 0.6180233709645474, "grad_norm": 1.5092256028731807, "learning_rate": 6.7244709935741925e-06, "loss": 0.3087, "step": 7801 }, { "epoch": 0.6181025945731828, "grad_norm": 1.5979792887277424, "learning_rate": 6.722046630385309e-06, "loss": 0.3196, "step": 7802 }, { "epoch": 0.6181818181818182, "grad_norm": 1.5055075254314938, "learning_rate": 6.719622483033883e-06, "loss": 0.305, "step": 7803 }, { "epoch": 0.6182610417904536, "grad_norm": 1.1393192551815847, "learning_rate": 6.7171985516795315e-06, "loss": 0.2066, "step": 7804 }, { "epoch": 0.6183402653990889, "grad_norm": 1.268081009863507, "learning_rate": 6.714774836481862e-06, "loss": 0.2761, "step": 7805 }, { "epoch": 0.6184194890077243, "grad_norm": 1.3522460011113968, "learning_rate": 6.71235133760046e-06, "loss": 0.2176, "step": 7806 }, { "epoch": 0.6184987126163597, "grad_norm": 1.2098124392195162, "learning_rate": 6.709928055194902e-06, "loss": 0.2217, "step": 7807 }, { "epoch": 0.618577936224995, "grad_norm": 1.2087322220014571, "learning_rate": 6.707504989424753e-06, "loss": 0.2854, "step": 7808 }, { "epoch": 0.6186571598336305, "grad_norm": 1.1241695770250928, "learning_rate": 6.705082140449557e-06, "loss": 0.1981, "step": 7809 }, { "epoch": 0.6187363834422658, "grad_norm": 1.5880513437450594, "learning_rate": 6.702659508428847e-06, "loss": 0.2637, "step": 7810 }, { "epoch": 0.6188156070509012, "grad_norm": 1.3693406675072954, "learning_rate": 6.7002370935221454e-06, "loss": 0.2713, "step": 7811 }, { "epoch": 0.6188948306595365, "grad_norm": 1.170654097612544, "learning_rate": 6.697814895888951e-06, "loss": 0.2192, "step": 7812 }, { "epoch": 0.6189740542681719, "grad_norm": 1.3087175467841219, "learning_rate": 6.695392915688759e-06, "loss": 0.2481, "step": 7813 }, { "epoch": 0.6190532778768073, "grad_norm": 1.4849130638357622, "learning_rate": 6.692971153081041e-06, "loss": 0.3214, "step": 7814 }, { "epoch": 0.6191325014854426, "grad_norm": 1.3953250991898063, "learning_rate": 6.690549608225258e-06, "loss": 0.2264, "step": 7815 }, { "epoch": 0.6192117250940781, "grad_norm": 1.370734142632818, "learning_rate": 6.688128281280863e-06, "loss": 0.2355, "step": 7816 }, { "epoch": 0.6192909487027134, "grad_norm": 1.2819694950918628, "learning_rate": 6.685707172407284e-06, "loss": 0.1974, "step": 7817 }, { "epoch": 0.6193701723113488, "grad_norm": 1.6678432907521146, "learning_rate": 6.683286281763939e-06, "loss": 0.2446, "step": 7818 }, { "epoch": 0.6194493959199842, "grad_norm": 1.1343649598551384, "learning_rate": 6.6808656095102365e-06, "loss": 0.191, "step": 7819 }, { "epoch": 0.6195286195286195, "grad_norm": 1.0976398957495892, "learning_rate": 6.6784451558055596e-06, "loss": 0.2147, "step": 7820 }, { "epoch": 0.6196078431372549, "grad_norm": 1.414591058325258, "learning_rate": 6.67602492080929e-06, "loss": 0.2328, "step": 7821 }, { "epoch": 0.6196870667458902, "grad_norm": 1.5082495366457516, "learning_rate": 6.6736049046807815e-06, "loss": 0.2742, "step": 7822 }, { "epoch": 0.6197662903545257, "grad_norm": 1.2146270981585556, "learning_rate": 6.671185107579387e-06, "loss": 0.1722, "step": 7823 }, { "epoch": 0.619845513963161, "grad_norm": 1.1089253094008462, "learning_rate": 6.668765529664436e-06, "loss": 0.152, "step": 7824 }, { "epoch": 0.6199247375717964, "grad_norm": 1.2432384518883137, "learning_rate": 6.6663461710952445e-06, "loss": 0.2288, "step": 7825 }, { "epoch": 0.6200039611804318, "grad_norm": 1.4470943878159777, "learning_rate": 6.663927032031118e-06, "loss": 0.2928, "step": 7826 }, { "epoch": 0.6200831847890671, "grad_norm": 1.5076932585509855, "learning_rate": 6.661508112631347e-06, "loss": 0.2546, "step": 7827 }, { "epoch": 0.6201624083977025, "grad_norm": 1.571916336192168, "learning_rate": 6.659089413055202e-06, "loss": 0.3052, "step": 7828 }, { "epoch": 0.6202416320063379, "grad_norm": 1.3894933548873982, "learning_rate": 6.656670933461942e-06, "loss": 0.2695, "step": 7829 }, { "epoch": 0.6203208556149733, "grad_norm": 1.0872391840674955, "learning_rate": 6.654252674010815e-06, "loss": 0.1721, "step": 7830 }, { "epoch": 0.6204000792236086, "grad_norm": 1.064168591015102, "learning_rate": 6.6518346348610484e-06, "loss": 0.1547, "step": 7831 }, { "epoch": 0.6204793028322441, "grad_norm": 1.2802428101478698, "learning_rate": 6.649416816171861e-06, "loss": 0.2642, "step": 7832 }, { "epoch": 0.6205585264408794, "grad_norm": 1.4539983747523386, "learning_rate": 6.646999218102457e-06, "loss": 0.2649, "step": 7833 }, { "epoch": 0.6206377500495147, "grad_norm": 1.5812233432610567, "learning_rate": 6.644581840812019e-06, "loss": 0.2845, "step": 7834 }, { "epoch": 0.6207169736581502, "grad_norm": 1.5996091098100835, "learning_rate": 6.64216468445972e-06, "loss": 0.3499, "step": 7835 }, { "epoch": 0.6207961972667855, "grad_norm": 1.3978420024613916, "learning_rate": 6.639747749204723e-06, "loss": 0.2936, "step": 7836 }, { "epoch": 0.6208754208754209, "grad_norm": 1.2812456399799128, "learning_rate": 6.637331035206166e-06, "loss": 0.192, "step": 7837 }, { "epoch": 0.6209546444840562, "grad_norm": 1.6261109627284294, "learning_rate": 6.634914542623182e-06, "loss": 0.3179, "step": 7838 }, { "epoch": 0.6210338680926917, "grad_norm": 1.333396456551822, "learning_rate": 6.632498271614882e-06, "loss": 0.31, "step": 7839 }, { "epoch": 0.621113091701327, "grad_norm": 1.4797066334661229, "learning_rate": 6.630082222340366e-06, "loss": 0.2998, "step": 7840 }, { "epoch": 0.6211923153099623, "grad_norm": 1.45164227806152, "learning_rate": 6.627666394958725e-06, "loss": 0.3126, "step": 7841 }, { "epoch": 0.6212715389185978, "grad_norm": 1.737152882985017, "learning_rate": 6.625250789629021e-06, "loss": 0.2985, "step": 7842 }, { "epoch": 0.6213507625272331, "grad_norm": 1.1370470924073617, "learning_rate": 6.622835406510315e-06, "loss": 0.2198, "step": 7843 }, { "epoch": 0.6214299861358685, "grad_norm": 1.4729672412875099, "learning_rate": 6.620420245761651e-06, "loss": 0.253, "step": 7844 }, { "epoch": 0.6215092097445039, "grad_norm": 1.300988908160649, "learning_rate": 6.6180053075420484e-06, "loss": 0.2385, "step": 7845 }, { "epoch": 0.6215884333531392, "grad_norm": 1.5699782638837072, "learning_rate": 6.615590592010526e-06, "loss": 0.3094, "step": 7846 }, { "epoch": 0.6216676569617746, "grad_norm": 1.473281039707279, "learning_rate": 6.613176099326077e-06, "loss": 0.241, "step": 7847 }, { "epoch": 0.6217468805704099, "grad_norm": 1.113483652990401, "learning_rate": 6.610761829647685e-06, "loss": 0.1978, "step": 7848 }, { "epoch": 0.6218261041790454, "grad_norm": 1.360539071493947, "learning_rate": 6.608347783134319e-06, "loss": 0.2553, "step": 7849 }, { "epoch": 0.6219053277876807, "grad_norm": 1.2327850961705231, "learning_rate": 6.605933959944933e-06, "loss": 0.2659, "step": 7850 }, { "epoch": 0.6219845513963161, "grad_norm": 1.1907455991686198, "learning_rate": 6.603520360238462e-06, "loss": 0.2421, "step": 7851 }, { "epoch": 0.6220637750049515, "grad_norm": 1.4671631812125632, "learning_rate": 6.601106984173835e-06, "loss": 0.2675, "step": 7852 }, { "epoch": 0.6221429986135868, "grad_norm": 1.2849240582613972, "learning_rate": 6.598693831909957e-06, "loss": 0.2875, "step": 7853 }, { "epoch": 0.6222222222222222, "grad_norm": 1.4438224010003577, "learning_rate": 6.596280903605725e-06, "loss": 0.3196, "step": 7854 }, { "epoch": 0.6223014458308576, "grad_norm": 1.3191640947961132, "learning_rate": 6.593868199420017e-06, "loss": 0.2612, "step": 7855 }, { "epoch": 0.622380669439493, "grad_norm": 1.4503705790415486, "learning_rate": 6.591455719511699e-06, "loss": 0.2706, "step": 7856 }, { "epoch": 0.6224598930481283, "grad_norm": 1.5041727883374938, "learning_rate": 6.589043464039624e-06, "loss": 0.3972, "step": 7857 }, { "epoch": 0.6225391166567638, "grad_norm": 1.5429877257400408, "learning_rate": 6.58663143316262e-06, "loss": 0.3489, "step": 7858 }, { "epoch": 0.6226183402653991, "grad_norm": 1.4556036248722355, "learning_rate": 6.584219627039513e-06, "loss": 0.3174, "step": 7859 }, { "epoch": 0.6226975638740344, "grad_norm": 1.3495884674443839, "learning_rate": 6.58180804582911e-06, "loss": 0.2731, "step": 7860 }, { "epoch": 0.6227767874826698, "grad_norm": 1.229775798580066, "learning_rate": 6.579396689690198e-06, "loss": 0.2403, "step": 7861 }, { "epoch": 0.6228560110913052, "grad_norm": 1.3210697496673112, "learning_rate": 6.576985558781557e-06, "loss": 0.2539, "step": 7862 }, { "epoch": 0.6229352346999406, "grad_norm": 1.4186420081770639, "learning_rate": 6.574574653261945e-06, "loss": 0.3054, "step": 7863 }, { "epoch": 0.6230144583085759, "grad_norm": 1.2521601700348162, "learning_rate": 6.572163973290109e-06, "loss": 0.2679, "step": 7864 }, { "epoch": 0.6230936819172114, "grad_norm": 1.6865582370350147, "learning_rate": 6.569753519024784e-06, "loss": 0.3346, "step": 7865 }, { "epoch": 0.6231729055258467, "grad_norm": 1.6728242113314875, "learning_rate": 6.567343290624683e-06, "loss": 0.2515, "step": 7866 }, { "epoch": 0.623252129134482, "grad_norm": 1.0310324890464801, "learning_rate": 6.564933288248509e-06, "loss": 0.1736, "step": 7867 }, { "epoch": 0.6233313527431175, "grad_norm": 1.2877800114096616, "learning_rate": 6.562523512054951e-06, "loss": 0.2237, "step": 7868 }, { "epoch": 0.6234105763517528, "grad_norm": 1.3010640430093539, "learning_rate": 6.560113962202679e-06, "loss": 0.1949, "step": 7869 }, { "epoch": 0.6234897999603882, "grad_norm": 1.360701075201637, "learning_rate": 6.557704638850352e-06, "loss": 0.2671, "step": 7870 }, { "epoch": 0.6235690235690236, "grad_norm": 1.5045777587901634, "learning_rate": 6.555295542156609e-06, "loss": 0.2996, "step": 7871 }, { "epoch": 0.623648247177659, "grad_norm": 1.3987349807546454, "learning_rate": 6.55288667228008e-06, "loss": 0.2374, "step": 7872 }, { "epoch": 0.6237274707862943, "grad_norm": 1.3726309683596492, "learning_rate": 6.550478029379379e-06, "loss": 0.3302, "step": 7873 }, { "epoch": 0.6238066943949296, "grad_norm": 1.3422415988420477, "learning_rate": 6.548069613613099e-06, "loss": 0.2699, "step": 7874 }, { "epoch": 0.6238859180035651, "grad_norm": 1.4771107362273952, "learning_rate": 6.545661425139827e-06, "loss": 0.3082, "step": 7875 }, { "epoch": 0.6239651416122004, "grad_norm": 1.1439122437650966, "learning_rate": 6.543253464118131e-06, "loss": 0.1911, "step": 7876 }, { "epoch": 0.6240443652208358, "grad_norm": 1.3750241773055052, "learning_rate": 6.540845730706557e-06, "loss": 0.2409, "step": 7877 }, { "epoch": 0.6241235888294712, "grad_norm": 1.1334701504850064, "learning_rate": 6.538438225063653e-06, "loss": 0.2246, "step": 7878 }, { "epoch": 0.6242028124381066, "grad_norm": 1.2744100356036898, "learning_rate": 6.536030947347931e-06, "loss": 0.1619, "step": 7879 }, { "epoch": 0.6242820360467419, "grad_norm": 1.356265927746559, "learning_rate": 6.533623897717905e-06, "loss": 0.2923, "step": 7880 }, { "epoch": 0.6243612596553773, "grad_norm": 1.7969961616461871, "learning_rate": 6.531217076332068e-06, "loss": 0.3339, "step": 7881 }, { "epoch": 0.6244404832640127, "grad_norm": 1.028256064515624, "learning_rate": 6.528810483348893e-06, "loss": 0.2179, "step": 7882 }, { "epoch": 0.624519706872648, "grad_norm": 1.2845900586565773, "learning_rate": 6.526404118926848e-06, "loss": 0.2258, "step": 7883 }, { "epoch": 0.6245989304812835, "grad_norm": 1.3564910475185137, "learning_rate": 6.523997983224375e-06, "loss": 0.2134, "step": 7884 }, { "epoch": 0.6246781540899188, "grad_norm": 1.4488507660186598, "learning_rate": 6.52159207639991e-06, "loss": 0.3189, "step": 7885 }, { "epoch": 0.6247573776985542, "grad_norm": 1.2704437460350062, "learning_rate": 6.519186398611872e-06, "loss": 0.2244, "step": 7886 }, { "epoch": 0.6248366013071895, "grad_norm": 1.4763807725933376, "learning_rate": 6.51678095001866e-06, "loss": 0.1966, "step": 7887 }, { "epoch": 0.6249158249158249, "grad_norm": 1.222871590649851, "learning_rate": 6.51437573077866e-06, "loss": 0.2023, "step": 7888 }, { "epoch": 0.6249950485244603, "grad_norm": 1.157591028130183, "learning_rate": 6.5119707410502495e-06, "loss": 0.171, "step": 7889 }, { "epoch": 0.6250742721330956, "grad_norm": 1.2708367551985662, "learning_rate": 6.509565980991781e-06, "loss": 0.2439, "step": 7890 }, { "epoch": 0.6251534957417311, "grad_norm": 1.1816391458178548, "learning_rate": 6.5071614507615985e-06, "loss": 0.2012, "step": 7891 }, { "epoch": 0.6252327193503664, "grad_norm": 1.0269538697081408, "learning_rate": 6.5047571505180265e-06, "loss": 0.216, "step": 7892 }, { "epoch": 0.6253119429590018, "grad_norm": 1.9270199626841715, "learning_rate": 6.502353080419379e-06, "loss": 0.4559, "step": 7893 }, { "epoch": 0.6253911665676372, "grad_norm": 1.5655967262342554, "learning_rate": 6.4999492406239525e-06, "loss": 0.2552, "step": 7894 }, { "epoch": 0.6254703901762725, "grad_norm": 1.2875754867098208, "learning_rate": 6.497545631290025e-06, "loss": 0.2306, "step": 7895 }, { "epoch": 0.6255496137849079, "grad_norm": 1.2069262045484102, "learning_rate": 6.495142252575866e-06, "loss": 0.2279, "step": 7896 }, { "epoch": 0.6256288373935432, "grad_norm": 1.2335206564036638, "learning_rate": 6.492739104639727e-06, "loss": 0.2351, "step": 7897 }, { "epoch": 0.6257080610021787, "grad_norm": 1.213606316899808, "learning_rate": 6.490336187639841e-06, "loss": 0.178, "step": 7898 }, { "epoch": 0.625787284610814, "grad_norm": 1.3864068886610745, "learning_rate": 6.487933501734429e-06, "loss": 0.2918, "step": 7899 }, { "epoch": 0.6258665082194494, "grad_norm": 1.2782345427445867, "learning_rate": 6.485531047081697e-06, "loss": 0.2264, "step": 7900 }, { "epoch": 0.6259457318280848, "grad_norm": 1.7215899775331072, "learning_rate": 6.483128823839835e-06, "loss": 0.3407, "step": 7901 }, { "epoch": 0.6260249554367201, "grad_norm": 1.3305414033458183, "learning_rate": 6.480726832167019e-06, "loss": 0.2411, "step": 7902 }, { "epoch": 0.6261041790453555, "grad_norm": 1.2733483334165683, "learning_rate": 6.4783250722214066e-06, "loss": 0.2167, "step": 7903 }, { "epoch": 0.6261834026539909, "grad_norm": 1.1135076361338612, "learning_rate": 6.475923544161142e-06, "loss": 0.2394, "step": 7904 }, { "epoch": 0.6262626262626263, "grad_norm": 1.3461582826813059, "learning_rate": 6.473522248144359e-06, "loss": 0.2058, "step": 7905 }, { "epoch": 0.6263418498712616, "grad_norm": 1.064316761926121, "learning_rate": 6.471121184329167e-06, "loss": 0.1524, "step": 7906 }, { "epoch": 0.6264210734798971, "grad_norm": 1.4828105648414607, "learning_rate": 6.468720352873662e-06, "loss": 0.304, "step": 7907 }, { "epoch": 0.6265002970885324, "grad_norm": 1.3572785813027461, "learning_rate": 6.466319753935933e-06, "loss": 0.2474, "step": 7908 }, { "epoch": 0.6265795206971677, "grad_norm": 1.066587242392369, "learning_rate": 6.463919387674043e-06, "loss": 0.2126, "step": 7909 }, { "epoch": 0.6266587443058032, "grad_norm": 1.7415276609291013, "learning_rate": 6.461519254246046e-06, "loss": 0.4044, "step": 7910 }, { "epoch": 0.6267379679144385, "grad_norm": 1.3298089502106016, "learning_rate": 6.459119353809982e-06, "loss": 0.2452, "step": 7911 }, { "epoch": 0.6268171915230739, "grad_norm": 1.661440900791588, "learning_rate": 6.45671968652387e-06, "loss": 0.3367, "step": 7912 }, { "epoch": 0.6268964151317092, "grad_norm": 1.3651603244542607, "learning_rate": 6.4543202525457175e-06, "loss": 0.3134, "step": 7913 }, { "epoch": 0.6269756387403447, "grad_norm": 1.4048164691702776, "learning_rate": 6.451921052033516e-06, "loss": 0.2961, "step": 7914 }, { "epoch": 0.62705486234898, "grad_norm": 1.3361953354383622, "learning_rate": 6.449522085145241e-06, "loss": 0.2737, "step": 7915 }, { "epoch": 0.6271340859576153, "grad_norm": 1.149003513924878, "learning_rate": 6.447123352038853e-06, "loss": 0.1764, "step": 7916 }, { "epoch": 0.6272133095662508, "grad_norm": 1.3471055065035118, "learning_rate": 6.444724852872297e-06, "loss": 0.2685, "step": 7917 }, { "epoch": 0.6272925331748861, "grad_norm": 1.2276114647335168, "learning_rate": 6.4423265878035015e-06, "loss": 0.2226, "step": 7918 }, { "epoch": 0.6273717567835215, "grad_norm": 1.188488469219547, "learning_rate": 6.439928556990382e-06, "loss": 0.232, "step": 7919 }, { "epoch": 0.6274509803921569, "grad_norm": 1.402828012944689, "learning_rate": 6.437530760590838e-06, "loss": 0.2987, "step": 7920 }, { "epoch": 0.6275302040007923, "grad_norm": 1.7895268636931385, "learning_rate": 6.435133198762751e-06, "loss": 0.2609, "step": 7921 }, { "epoch": 0.6276094276094276, "grad_norm": 1.102775135462172, "learning_rate": 6.432735871663991e-06, "loss": 0.1933, "step": 7922 }, { "epoch": 0.6276886512180629, "grad_norm": 1.1671431399334748, "learning_rate": 6.430338779452407e-06, "loss": 0.2014, "step": 7923 }, { "epoch": 0.6277678748266984, "grad_norm": 1.460059218832911, "learning_rate": 6.4279419222858416e-06, "loss": 0.2962, "step": 7924 }, { "epoch": 0.6278470984353337, "grad_norm": 1.3251451811598105, "learning_rate": 6.4255453003221115e-06, "loss": 0.2646, "step": 7925 }, { "epoch": 0.6279263220439691, "grad_norm": 1.071990626031152, "learning_rate": 6.423148913719022e-06, "loss": 0.2182, "step": 7926 }, { "epoch": 0.6280055456526045, "grad_norm": 1.463140844815423, "learning_rate": 6.420752762634369e-06, "loss": 0.3495, "step": 7927 }, { "epoch": 0.6280847692612398, "grad_norm": 1.7618109014012373, "learning_rate": 6.4183568472259216e-06, "loss": 0.3039, "step": 7928 }, { "epoch": 0.6281639928698752, "grad_norm": 1.2088212086692502, "learning_rate": 6.415961167651443e-06, "loss": 0.2194, "step": 7929 }, { "epoch": 0.6282432164785106, "grad_norm": 1.3023905476271826, "learning_rate": 6.413565724068678e-06, "loss": 0.25, "step": 7930 }, { "epoch": 0.628322440087146, "grad_norm": 1.5106230695221274, "learning_rate": 6.4111705166353525e-06, "loss": 0.3293, "step": 7931 }, { "epoch": 0.6284016636957813, "grad_norm": 1.3437321579925263, "learning_rate": 6.40877554550918e-06, "loss": 0.2893, "step": 7932 }, { "epoch": 0.6284808873044168, "grad_norm": 1.3422776168889232, "learning_rate": 6.406380810847856e-06, "loss": 0.2189, "step": 7933 }, { "epoch": 0.6285601109130521, "grad_norm": 1.6842183198735798, "learning_rate": 6.403986312809065e-06, "loss": 0.3122, "step": 7934 }, { "epoch": 0.6286393345216874, "grad_norm": 1.1621268293310467, "learning_rate": 6.401592051550475e-06, "loss": 0.2065, "step": 7935 }, { "epoch": 0.6287185581303228, "grad_norm": 1.5883806928644246, "learning_rate": 6.399198027229732e-06, "loss": 0.2708, "step": 7936 }, { "epoch": 0.6287977817389582, "grad_norm": 1.3300264994041024, "learning_rate": 6.39680424000447e-06, "loss": 0.2705, "step": 7937 }, { "epoch": 0.6288770053475936, "grad_norm": 1.181351397387026, "learning_rate": 6.3944106900323174e-06, "loss": 0.2567, "step": 7938 }, { "epoch": 0.6289562289562289, "grad_norm": 1.4421441585754056, "learning_rate": 6.392017377470867e-06, "loss": 0.3056, "step": 7939 }, { "epoch": 0.6290354525648644, "grad_norm": 1.4291983578427043, "learning_rate": 6.389624302477715e-06, "loss": 0.2634, "step": 7940 }, { "epoch": 0.6291146761734997, "grad_norm": 1.421398808971498, "learning_rate": 6.387231465210428e-06, "loss": 0.2465, "step": 7941 }, { "epoch": 0.629193899782135, "grad_norm": 1.344573473955173, "learning_rate": 6.384838865826567e-06, "loss": 0.2981, "step": 7942 }, { "epoch": 0.6292731233907705, "grad_norm": 1.2131340886699908, "learning_rate": 6.382446504483672e-06, "loss": 0.2531, "step": 7943 }, { "epoch": 0.6293523469994058, "grad_norm": 1.4725237311461137, "learning_rate": 6.380054381339267e-06, "loss": 0.319, "step": 7944 }, { "epoch": 0.6294315706080412, "grad_norm": 1.3522484977876823, "learning_rate": 6.377662496550863e-06, "loss": 0.2295, "step": 7945 }, { "epoch": 0.6295107942166766, "grad_norm": 1.4569857053753255, "learning_rate": 6.375270850275956e-06, "loss": 0.2879, "step": 7946 }, { "epoch": 0.629590017825312, "grad_norm": 1.1392354980395054, "learning_rate": 6.37287944267202e-06, "loss": 0.2135, "step": 7947 }, { "epoch": 0.6296692414339473, "grad_norm": 1.4279972461409565, "learning_rate": 6.370488273896522e-06, "loss": 0.2462, "step": 7948 }, { "epoch": 0.6297484650425826, "grad_norm": 1.5304215447769638, "learning_rate": 6.368097344106905e-06, "loss": 0.2406, "step": 7949 }, { "epoch": 0.6298276886512181, "grad_norm": 1.3711366317779963, "learning_rate": 6.365706653460602e-06, "loss": 0.2472, "step": 7950 }, { "epoch": 0.6299069122598534, "grad_norm": 1.1960743131696108, "learning_rate": 6.363316202115033e-06, "loss": 0.2353, "step": 7951 }, { "epoch": 0.6299861358684888, "grad_norm": 1.2335483904974787, "learning_rate": 6.3609259902275884e-06, "loss": 0.1919, "step": 7952 }, { "epoch": 0.6300653594771242, "grad_norm": 1.2573339591647876, "learning_rate": 6.358536017955659e-06, "loss": 0.2759, "step": 7953 }, { "epoch": 0.6301445830857596, "grad_norm": 1.6335534015466822, "learning_rate": 6.3561462854566135e-06, "loss": 0.2305, "step": 7954 }, { "epoch": 0.6302238066943949, "grad_norm": 1.50173513891403, "learning_rate": 6.3537567928878e-06, "loss": 0.3171, "step": 7955 }, { "epoch": 0.6303030303030303, "grad_norm": 1.2106244228139458, "learning_rate": 6.3513675404065575e-06, "loss": 0.179, "step": 7956 }, { "epoch": 0.6303822539116657, "grad_norm": 1.3410385020453481, "learning_rate": 6.348978528170205e-06, "loss": 0.2723, "step": 7957 }, { "epoch": 0.630461477520301, "grad_norm": 1.1109539175552414, "learning_rate": 6.34658975633605e-06, "loss": 0.2144, "step": 7958 }, { "epoch": 0.6305407011289365, "grad_norm": 1.1604329522500991, "learning_rate": 6.344201225061382e-06, "loss": 0.1995, "step": 7959 }, { "epoch": 0.6306199247375718, "grad_norm": 1.5451625398131381, "learning_rate": 6.341812934503469e-06, "loss": 0.2753, "step": 7960 }, { "epoch": 0.6306991483462072, "grad_norm": 1.0907712760922053, "learning_rate": 6.339424884819574e-06, "loss": 0.1979, "step": 7961 }, { "epoch": 0.6307783719548425, "grad_norm": 1.4321159554861054, "learning_rate": 6.337037076166939e-06, "loss": 0.2376, "step": 7962 }, { "epoch": 0.6308575955634779, "grad_norm": 1.464011371325957, "learning_rate": 6.334649508702784e-06, "loss": 0.25, "step": 7963 }, { "epoch": 0.6309368191721133, "grad_norm": 1.1842917072394445, "learning_rate": 6.332262182584325e-06, "loss": 0.1981, "step": 7964 }, { "epoch": 0.6310160427807486, "grad_norm": 1.6624431311533532, "learning_rate": 6.3298750979687515e-06, "loss": 0.2409, "step": 7965 }, { "epoch": 0.6310952663893841, "grad_norm": 1.3020087291640237, "learning_rate": 6.327488255013244e-06, "loss": 0.1949, "step": 7966 }, { "epoch": 0.6311744899980194, "grad_norm": 1.7397515464689268, "learning_rate": 6.325101653874965e-06, "loss": 0.3581, "step": 7967 }, { "epoch": 0.6312537136066548, "grad_norm": 1.4357715584318613, "learning_rate": 6.322715294711057e-06, "loss": 0.2899, "step": 7968 }, { "epoch": 0.6313329372152902, "grad_norm": 1.0551460725633173, "learning_rate": 6.320329177678656e-06, "loss": 0.2118, "step": 7969 }, { "epoch": 0.6314121608239255, "grad_norm": 1.551773028752438, "learning_rate": 6.31794330293487e-06, "loss": 0.3104, "step": 7970 }, { "epoch": 0.6314913844325609, "grad_norm": 1.3773959896894676, "learning_rate": 6.315557670636803e-06, "loss": 0.277, "step": 7971 }, { "epoch": 0.6315706080411962, "grad_norm": 1.5496005972463787, "learning_rate": 6.313172280941534e-06, "loss": 0.3401, "step": 7972 }, { "epoch": 0.6316498316498317, "grad_norm": 1.189422442448734, "learning_rate": 6.31078713400613e-06, "loss": 0.2331, "step": 7973 }, { "epoch": 0.631729055258467, "grad_norm": 1.2019421472257314, "learning_rate": 6.308402229987641e-06, "loss": 0.1867, "step": 7974 }, { "epoch": 0.6318082788671024, "grad_norm": 1.325017322609509, "learning_rate": 6.3060175690431055e-06, "loss": 0.2416, "step": 7975 }, { "epoch": 0.6318875024757378, "grad_norm": 1.056175835548149, "learning_rate": 6.303633151329535e-06, "loss": 0.1663, "step": 7976 }, { "epoch": 0.6319667260843731, "grad_norm": 1.3293310055862992, "learning_rate": 6.3012489770039396e-06, "loss": 0.2022, "step": 7977 }, { "epoch": 0.6320459496930085, "grad_norm": 1.579073432393702, "learning_rate": 6.2988650462232995e-06, "loss": 0.2912, "step": 7978 }, { "epoch": 0.6321251733016439, "grad_norm": 1.2519327769615696, "learning_rate": 6.296481359144587e-06, "loss": 0.1929, "step": 7979 }, { "epoch": 0.6322043969102793, "grad_norm": 1.71041473158289, "learning_rate": 6.29409791592476e-06, "loss": 0.3188, "step": 7980 }, { "epoch": 0.6322836205189146, "grad_norm": 1.1347644233140113, "learning_rate": 6.2917147167207495e-06, "loss": 0.1712, "step": 7981 }, { "epoch": 0.6323628441275501, "grad_norm": 1.1843762479035767, "learning_rate": 6.289331761689482e-06, "loss": 0.2228, "step": 7982 }, { "epoch": 0.6324420677361854, "grad_norm": 1.53311235433751, "learning_rate": 6.286949050987868e-06, "loss": 0.2868, "step": 7983 }, { "epoch": 0.6325212913448207, "grad_norm": 1.302764049039856, "learning_rate": 6.284566584772791e-06, "loss": 0.2255, "step": 7984 }, { "epoch": 0.6326005149534562, "grad_norm": 1.2957824955745165, "learning_rate": 6.2821843632011245e-06, "loss": 0.2041, "step": 7985 }, { "epoch": 0.6326797385620915, "grad_norm": 1.969293870083056, "learning_rate": 6.2798023864297315e-06, "loss": 0.2917, "step": 7986 }, { "epoch": 0.6327589621707269, "grad_norm": 1.2233563381305874, "learning_rate": 6.277420654615449e-06, "loss": 0.2188, "step": 7987 }, { "epoch": 0.6328381857793622, "grad_norm": 1.1513267264730471, "learning_rate": 6.275039167915103e-06, "loss": 0.2073, "step": 7988 }, { "epoch": 0.6329174093879977, "grad_norm": 0.9315260048976783, "learning_rate": 6.2726579264855084e-06, "loss": 0.1426, "step": 7989 }, { "epoch": 0.632996632996633, "grad_norm": 1.4360417558532792, "learning_rate": 6.270276930483451e-06, "loss": 0.2376, "step": 7990 }, { "epoch": 0.6330758566052683, "grad_norm": 1.1726899624308016, "learning_rate": 6.267896180065711e-06, "loss": 0.2248, "step": 7991 }, { "epoch": 0.6331550802139038, "grad_norm": 0.9689849167460542, "learning_rate": 6.265515675389053e-06, "loss": 0.1758, "step": 7992 }, { "epoch": 0.6332343038225391, "grad_norm": 1.6434028325602796, "learning_rate": 6.263135416610217e-06, "loss": 0.3037, "step": 7993 }, { "epoch": 0.6333135274311745, "grad_norm": 1.417284940949376, "learning_rate": 6.260755403885934e-06, "loss": 0.3272, "step": 7994 }, { "epoch": 0.6333927510398099, "grad_norm": 1.3356281462827087, "learning_rate": 6.258375637372914e-06, "loss": 0.2352, "step": 7995 }, { "epoch": 0.6334719746484453, "grad_norm": 1.4068535366615593, "learning_rate": 6.2559961172278545e-06, "loss": 0.244, "step": 7996 }, { "epoch": 0.6335511982570806, "grad_norm": 1.3966119148081728, "learning_rate": 6.253616843607439e-06, "loss": 0.283, "step": 7997 }, { "epoch": 0.6336304218657159, "grad_norm": 1.3414003634640943, "learning_rate": 6.251237816668324e-06, "loss": 0.2806, "step": 7998 }, { "epoch": 0.6337096454743514, "grad_norm": 1.3611285228852037, "learning_rate": 6.248859036567162e-06, "loss": 0.2706, "step": 7999 }, { "epoch": 0.6337888690829867, "grad_norm": 1.3566240572591168, "learning_rate": 6.246480503460585e-06, "loss": 0.32, "step": 8000 }, { "epoch": 0.6338680926916221, "grad_norm": 1.4524789934657398, "learning_rate": 6.2441022175052034e-06, "loss": 0.2852, "step": 8001 }, { "epoch": 0.6339473163002575, "grad_norm": 1.3058458142880431, "learning_rate": 6.241724178857621e-06, "loss": 0.1823, "step": 8002 }, { "epoch": 0.6340265399088929, "grad_norm": 1.2251111763773705, "learning_rate": 6.2393463876744165e-06, "loss": 0.223, "step": 8003 }, { "epoch": 0.6341057635175282, "grad_norm": 1.332157357102398, "learning_rate": 6.236968844112157e-06, "loss": 0.2037, "step": 8004 }, { "epoch": 0.6341849871261636, "grad_norm": 1.3961917006140965, "learning_rate": 6.234591548327393e-06, "loss": 0.3648, "step": 8005 }, { "epoch": 0.634264210734799, "grad_norm": 1.4176685393081507, "learning_rate": 6.232214500476657e-06, "loss": 0.2373, "step": 8006 }, { "epoch": 0.6343434343434343, "grad_norm": 1.3710331613135092, "learning_rate": 6.229837700716465e-06, "loss": 0.2875, "step": 8007 }, { "epoch": 0.6344226579520698, "grad_norm": 1.0594316895398406, "learning_rate": 6.227461149203324e-06, "loss": 0.2316, "step": 8008 }, { "epoch": 0.6345018815607051, "grad_norm": 1.1444203954850434, "learning_rate": 6.225084846093711e-06, "loss": 0.1773, "step": 8009 }, { "epoch": 0.6345811051693404, "grad_norm": 1.3261424124115733, "learning_rate": 6.222708791544098e-06, "loss": 0.2409, "step": 8010 }, { "epoch": 0.6346603287779758, "grad_norm": 1.3358312534447354, "learning_rate": 6.220332985710936e-06, "loss": 0.2852, "step": 8011 }, { "epoch": 0.6347395523866112, "grad_norm": 1.5026752282340712, "learning_rate": 6.21795742875066e-06, "loss": 0.323, "step": 8012 }, { "epoch": 0.6348187759952466, "grad_norm": 1.4066423973431426, "learning_rate": 6.21558212081969e-06, "loss": 0.3013, "step": 8013 }, { "epoch": 0.6348979996038819, "grad_norm": 1.4722671985572602, "learning_rate": 6.213207062074427e-06, "loss": 0.2705, "step": 8014 }, { "epoch": 0.6349772232125174, "grad_norm": 1.0601271666965861, "learning_rate": 6.210832252671257e-06, "loss": 0.2131, "step": 8015 }, { "epoch": 0.6350564468211527, "grad_norm": 1.1400294517673846, "learning_rate": 6.208457692766554e-06, "loss": 0.1932, "step": 8016 }, { "epoch": 0.635135670429788, "grad_norm": 1.2491108110090605, "learning_rate": 6.206083382516665e-06, "loss": 0.2065, "step": 8017 }, { "epoch": 0.6352148940384235, "grad_norm": 1.321773297198763, "learning_rate": 6.203709322077933e-06, "loss": 0.2545, "step": 8018 }, { "epoch": 0.6352941176470588, "grad_norm": 1.1923136976366386, "learning_rate": 6.201335511606673e-06, "loss": 0.2056, "step": 8019 }, { "epoch": 0.6353733412556942, "grad_norm": 1.3347489964688852, "learning_rate": 6.198961951259193e-06, "loss": 0.2804, "step": 8020 }, { "epoch": 0.6354525648643295, "grad_norm": 1.1267880587626702, "learning_rate": 6.196588641191778e-06, "loss": 0.2438, "step": 8021 }, { "epoch": 0.635531788472965, "grad_norm": 1.1308980116832539, "learning_rate": 6.194215581560701e-06, "loss": 0.242, "step": 8022 }, { "epoch": 0.6356110120816003, "grad_norm": 1.2779739730243986, "learning_rate": 6.191842772522214e-06, "loss": 0.2337, "step": 8023 }, { "epoch": 0.6356902356902356, "grad_norm": 1.3624698490752212, "learning_rate": 6.18947021423256e-06, "loss": 0.3439, "step": 8024 }, { "epoch": 0.6357694592988711, "grad_norm": 1.0973915768768137, "learning_rate": 6.187097906847954e-06, "loss": 0.1771, "step": 8025 }, { "epoch": 0.6358486829075064, "grad_norm": 1.467508941145104, "learning_rate": 6.184725850524608e-06, "loss": 0.2596, "step": 8026 }, { "epoch": 0.6359279065161418, "grad_norm": 1.522003872481998, "learning_rate": 6.182354045418704e-06, "loss": 0.2466, "step": 8027 }, { "epoch": 0.6360071301247772, "grad_norm": 1.3645213643982332, "learning_rate": 6.179982491686416e-06, "loss": 0.221, "step": 8028 }, { "epoch": 0.6360863537334126, "grad_norm": 1.2548595312537048, "learning_rate": 6.177611189483903e-06, "loss": 0.2466, "step": 8029 }, { "epoch": 0.6361655773420479, "grad_norm": 1.106055212766811, "learning_rate": 6.175240138967299e-06, "loss": 0.2443, "step": 8030 }, { "epoch": 0.6362448009506833, "grad_norm": 1.3466327499411583, "learning_rate": 6.172869340292729e-06, "loss": 0.2148, "step": 8031 }, { "epoch": 0.6363240245593187, "grad_norm": 1.021235311324439, "learning_rate": 6.170498793616298e-06, "loss": 0.1594, "step": 8032 }, { "epoch": 0.636403248167954, "grad_norm": 1.2045430605283585, "learning_rate": 6.168128499094095e-06, "loss": 0.1911, "step": 8033 }, { "epoch": 0.6364824717765895, "grad_norm": 1.374392964832849, "learning_rate": 6.165758456882193e-06, "loss": 0.2621, "step": 8034 }, { "epoch": 0.6365616953852248, "grad_norm": 1.2347304732789428, "learning_rate": 6.163388667136646e-06, "loss": 0.2421, "step": 8035 }, { "epoch": 0.6366409189938602, "grad_norm": 1.2669424764477113, "learning_rate": 6.161019130013495e-06, "loss": 0.223, "step": 8036 }, { "epoch": 0.6367201426024955, "grad_norm": 1.4873313119263174, "learning_rate": 6.158649845668764e-06, "loss": 0.2653, "step": 8037 }, { "epoch": 0.6367993662111309, "grad_norm": 0.9596454291776326, "learning_rate": 6.156280814258455e-06, "loss": 0.1501, "step": 8038 }, { "epoch": 0.6368785898197663, "grad_norm": 1.200643575966026, "learning_rate": 6.153912035938559e-06, "loss": 0.1945, "step": 8039 }, { "epoch": 0.6369578134284016, "grad_norm": 1.1927866166700951, "learning_rate": 6.151543510865053e-06, "loss": 0.2033, "step": 8040 }, { "epoch": 0.6370370370370371, "grad_norm": 1.2795686409282219, "learning_rate": 6.149175239193887e-06, "loss": 0.2985, "step": 8041 }, { "epoch": 0.6371162606456724, "grad_norm": 0.9553649361006948, "learning_rate": 6.1468072210810035e-06, "loss": 0.1341, "step": 8042 }, { "epoch": 0.6371954842543078, "grad_norm": 1.5852703901838598, "learning_rate": 6.144439456682323e-06, "loss": 0.329, "step": 8043 }, { "epoch": 0.6372747078629432, "grad_norm": 1.482306706139458, "learning_rate": 6.142071946153751e-06, "loss": 0.2806, "step": 8044 }, { "epoch": 0.6373539314715785, "grad_norm": 1.1460548324101296, "learning_rate": 6.139704689651181e-06, "loss": 0.1685, "step": 8045 }, { "epoch": 0.6374331550802139, "grad_norm": 1.626341732291147, "learning_rate": 6.1373376873304814e-06, "loss": 0.2091, "step": 8046 }, { "epoch": 0.6375123786888492, "grad_norm": 1.001152699744232, "learning_rate": 6.134970939347511e-06, "loss": 0.1285, "step": 8047 }, { "epoch": 0.6375916022974847, "grad_norm": 1.008053339614477, "learning_rate": 6.132604445858104e-06, "loss": 0.1458, "step": 8048 }, { "epoch": 0.63767082590612, "grad_norm": 1.1157277496227973, "learning_rate": 6.130238207018085e-06, "loss": 0.1716, "step": 8049 }, { "epoch": 0.6377500495147554, "grad_norm": 1.6419352984436941, "learning_rate": 6.127872222983264e-06, "loss": 0.276, "step": 8050 }, { "epoch": 0.6378292731233908, "grad_norm": 1.20600993351154, "learning_rate": 6.125506493909422e-06, "loss": 0.208, "step": 8051 }, { "epoch": 0.6379084967320261, "grad_norm": 1.566105491328064, "learning_rate": 6.123141019952334e-06, "loss": 0.255, "step": 8052 }, { "epoch": 0.6379877203406615, "grad_norm": 1.4479775924949587, "learning_rate": 6.1207758012677595e-06, "loss": 0.2486, "step": 8053 }, { "epoch": 0.6380669439492969, "grad_norm": 1.2511611696692906, "learning_rate": 6.11841083801143e-06, "loss": 0.2055, "step": 8054 }, { "epoch": 0.6381461675579323, "grad_norm": 1.7104731442978196, "learning_rate": 6.116046130339073e-06, "loss": 0.295, "step": 8055 }, { "epoch": 0.6382253911665676, "grad_norm": 1.7903796169589645, "learning_rate": 6.1136816784063855e-06, "loss": 0.3202, "step": 8056 }, { "epoch": 0.6383046147752031, "grad_norm": 1.349185893522902, "learning_rate": 6.1113174823690615e-06, "loss": 0.2162, "step": 8057 }, { "epoch": 0.6383838383838384, "grad_norm": 1.3086956141157358, "learning_rate": 6.108953542382771e-06, "loss": 0.2543, "step": 8058 }, { "epoch": 0.6384630619924737, "grad_norm": 1.141986189379648, "learning_rate": 6.106589858603167e-06, "loss": 0.1621, "step": 8059 }, { "epoch": 0.6385422856011091, "grad_norm": 1.43661214316852, "learning_rate": 6.1042264311858845e-06, "loss": 0.3148, "step": 8060 }, { "epoch": 0.6386215092097445, "grad_norm": 1.8620339954626937, "learning_rate": 6.101863260286551e-06, "loss": 0.3747, "step": 8061 }, { "epoch": 0.6387007328183799, "grad_norm": 1.0105018161183208, "learning_rate": 6.099500346060765e-06, "loss": 0.1521, "step": 8062 }, { "epoch": 0.6387799564270152, "grad_norm": 1.2111547571213945, "learning_rate": 6.09713768866411e-06, "loss": 0.2178, "step": 8063 }, { "epoch": 0.6388591800356507, "grad_norm": 1.1808399021789684, "learning_rate": 6.094775288252157e-06, "loss": 0.1908, "step": 8064 }, { "epoch": 0.638938403644286, "grad_norm": 1.3504727635905611, "learning_rate": 6.092413144980465e-06, "loss": 0.2443, "step": 8065 }, { "epoch": 0.6390176272529213, "grad_norm": 1.3454894316807033, "learning_rate": 6.090051259004563e-06, "loss": 0.2283, "step": 8066 }, { "epoch": 0.6390968508615568, "grad_norm": 1.3156447316691806, "learning_rate": 6.087689630479974e-06, "loss": 0.2693, "step": 8067 }, { "epoch": 0.6391760744701921, "grad_norm": 1.6816845594710903, "learning_rate": 6.085328259562195e-06, "loss": 0.2772, "step": 8068 }, { "epoch": 0.6392552980788275, "grad_norm": 1.148164657575566, "learning_rate": 6.082967146406714e-06, "loss": 0.187, "step": 8069 }, { "epoch": 0.6393345216874629, "grad_norm": 1.5950127381559818, "learning_rate": 6.0806062911690025e-06, "loss": 0.2589, "step": 8070 }, { "epoch": 0.6394137452960983, "grad_norm": 1.2873938690263296, "learning_rate": 6.078245694004503e-06, "loss": 0.2335, "step": 8071 }, { "epoch": 0.6394929689047336, "grad_norm": 1.4644729728192902, "learning_rate": 6.075885355068658e-06, "loss": 0.2976, "step": 8072 }, { "epoch": 0.6395721925133689, "grad_norm": 1.421648315784826, "learning_rate": 6.073525274516879e-06, "loss": 0.2411, "step": 8073 }, { "epoch": 0.6396514161220044, "grad_norm": 1.1954237777976786, "learning_rate": 6.071165452504568e-06, "loss": 0.2523, "step": 8074 }, { "epoch": 0.6397306397306397, "grad_norm": 1.3567725475206658, "learning_rate": 6.068805889187109e-06, "loss": 0.2488, "step": 8075 }, { "epoch": 0.6398098633392751, "grad_norm": 1.2934966069985312, "learning_rate": 6.066446584719864e-06, "loss": 0.2265, "step": 8076 }, { "epoch": 0.6398890869479105, "grad_norm": 1.144472010160506, "learning_rate": 6.064087539258186e-06, "loss": 0.253, "step": 8077 }, { "epoch": 0.6399683105565459, "grad_norm": 1.3979526248631313, "learning_rate": 6.061728752957406e-06, "loss": 0.2382, "step": 8078 }, { "epoch": 0.6400475341651812, "grad_norm": 1.4491552380755233, "learning_rate": 6.059370225972834e-06, "loss": 0.2756, "step": 8079 }, { "epoch": 0.6401267577738166, "grad_norm": 1.3682277452711427, "learning_rate": 6.057011958459776e-06, "loss": 0.221, "step": 8080 }, { "epoch": 0.640205981382452, "grad_norm": 1.5430427389900436, "learning_rate": 6.0546539505735055e-06, "loss": 0.23, "step": 8081 }, { "epoch": 0.6402852049910873, "grad_norm": 1.7759455149578112, "learning_rate": 6.052296202469288e-06, "loss": 0.3947, "step": 8082 }, { "epoch": 0.6403644285997228, "grad_norm": 1.6337039711604564, "learning_rate": 6.049938714302372e-06, "loss": 0.3786, "step": 8083 }, { "epoch": 0.6404436522083581, "grad_norm": 1.111737770160778, "learning_rate": 6.047581486227984e-06, "loss": 0.1883, "step": 8084 }, { "epoch": 0.6405228758169934, "grad_norm": 1.1198638871192923, "learning_rate": 6.045224518401338e-06, "loss": 0.2154, "step": 8085 }, { "epoch": 0.6406020994256288, "grad_norm": 2.4032904982201178, "learning_rate": 6.04286781097763e-06, "loss": 0.3569, "step": 8086 }, { "epoch": 0.6406813230342642, "grad_norm": 1.420201500240393, "learning_rate": 6.040511364112034e-06, "loss": 0.2391, "step": 8087 }, { "epoch": 0.6407605466428996, "grad_norm": 1.3607410030656533, "learning_rate": 6.038155177959715e-06, "loss": 0.2567, "step": 8088 }, { "epoch": 0.6408397702515349, "grad_norm": 1.1934694222060973, "learning_rate": 6.035799252675811e-06, "loss": 0.1822, "step": 8089 }, { "epoch": 0.6409189938601704, "grad_norm": 1.272895635947715, "learning_rate": 6.0334435884154526e-06, "loss": 0.2073, "step": 8090 }, { "epoch": 0.6409982174688057, "grad_norm": 1.6594473483004053, "learning_rate": 6.031088185333751e-06, "loss": 0.3758, "step": 8091 }, { "epoch": 0.641077441077441, "grad_norm": 1.4848538734620187, "learning_rate": 6.028733043585793e-06, "loss": 0.2321, "step": 8092 }, { "epoch": 0.6411566646860765, "grad_norm": 1.3133230616462108, "learning_rate": 6.026378163326654e-06, "loss": 0.2826, "step": 8093 }, { "epoch": 0.6412358882947118, "grad_norm": 1.0856353463652906, "learning_rate": 6.024023544711396e-06, "loss": 0.1959, "step": 8094 }, { "epoch": 0.6413151119033472, "grad_norm": 1.3612629007419905, "learning_rate": 6.021669187895054e-06, "loss": 0.2353, "step": 8095 }, { "epoch": 0.6413943355119825, "grad_norm": 1.3690513963544237, "learning_rate": 6.019315093032656e-06, "loss": 0.3015, "step": 8096 }, { "epoch": 0.641473559120618, "grad_norm": 1.407255136875795, "learning_rate": 6.016961260279204e-06, "loss": 0.2593, "step": 8097 }, { "epoch": 0.6415527827292533, "grad_norm": 1.1827693175835738, "learning_rate": 6.0146076897896865e-06, "loss": 0.2307, "step": 8098 }, { "epoch": 0.6416320063378886, "grad_norm": 1.7272292197631658, "learning_rate": 6.012254381719078e-06, "loss": 0.3228, "step": 8099 }, { "epoch": 0.6417112299465241, "grad_norm": 1.1978680700700264, "learning_rate": 6.0099013362223305e-06, "loss": 0.2156, "step": 8100 }, { "epoch": 0.6417904535551594, "grad_norm": 1.4270642523243047, "learning_rate": 6.007548553454379e-06, "loss": 0.2831, "step": 8101 }, { "epoch": 0.6418696771637948, "grad_norm": 1.2915990697196864, "learning_rate": 6.005196033570147e-06, "loss": 0.251, "step": 8102 }, { "epoch": 0.6419489007724302, "grad_norm": 1.667631554960559, "learning_rate": 6.002843776724534e-06, "loss": 0.3437, "step": 8103 }, { "epoch": 0.6420281243810656, "grad_norm": 1.2014071194813634, "learning_rate": 6.000491783072426e-06, "loss": 0.2435, "step": 8104 }, { "epoch": 0.6421073479897009, "grad_norm": 1.252933854040516, "learning_rate": 5.998140052768687e-06, "loss": 0.2237, "step": 8105 }, { "epoch": 0.6421865715983363, "grad_norm": 1.2022151399151069, "learning_rate": 5.995788585968171e-06, "loss": 0.2314, "step": 8106 }, { "epoch": 0.6422657952069717, "grad_norm": 1.1784643893099416, "learning_rate": 5.993437382825711e-06, "loss": 0.1883, "step": 8107 }, { "epoch": 0.642345018815607, "grad_norm": 1.1422486946810984, "learning_rate": 5.991086443496119e-06, "loss": 0.1828, "step": 8108 }, { "epoch": 0.6424242424242425, "grad_norm": 1.3525221768082902, "learning_rate": 5.9887357681341955e-06, "loss": 0.2166, "step": 8109 }, { "epoch": 0.6425034660328778, "grad_norm": 1.4813949540614977, "learning_rate": 5.9863853568947215e-06, "loss": 0.3234, "step": 8110 }, { "epoch": 0.6425826896415132, "grad_norm": 1.1455857166825378, "learning_rate": 5.9840352099324595e-06, "loss": 0.2065, "step": 8111 }, { "epoch": 0.6426619132501485, "grad_norm": 1.0984746551442057, "learning_rate": 5.981685327402156e-06, "loss": 0.1567, "step": 8112 }, { "epoch": 0.6427411368587839, "grad_norm": 1.2642006811308861, "learning_rate": 5.9793357094585365e-06, "loss": 0.2269, "step": 8113 }, { "epoch": 0.6428203604674193, "grad_norm": 1.4220520740083904, "learning_rate": 5.976986356256316e-06, "loss": 0.2324, "step": 8114 }, { "epoch": 0.6428995840760546, "grad_norm": 1.1291914443064694, "learning_rate": 5.974637267950187e-06, "loss": 0.2274, "step": 8115 }, { "epoch": 0.6429788076846901, "grad_norm": 1.247434955682266, "learning_rate": 5.972288444694822e-06, "loss": 0.233, "step": 8116 }, { "epoch": 0.6430580312933254, "grad_norm": 1.4103680739157824, "learning_rate": 5.9699398866448846e-06, "loss": 0.2528, "step": 8117 }, { "epoch": 0.6431372549019608, "grad_norm": 1.4585774943281162, "learning_rate": 5.967591593955016e-06, "loss": 0.2967, "step": 8118 }, { "epoch": 0.6432164785105962, "grad_norm": 1.098661328588494, "learning_rate": 5.965243566779837e-06, "loss": 0.2398, "step": 8119 }, { "epoch": 0.6432957021192315, "grad_norm": 1.6583771941322858, "learning_rate": 5.962895805273956e-06, "loss": 0.3312, "step": 8120 }, { "epoch": 0.6433749257278669, "grad_norm": 1.273705229997679, "learning_rate": 5.960548309591958e-06, "loss": 0.2072, "step": 8121 }, { "epoch": 0.6434541493365022, "grad_norm": 1.6771965777271247, "learning_rate": 5.958201079888419e-06, "loss": 0.2701, "step": 8122 }, { "epoch": 0.6435333729451377, "grad_norm": 1.6019870082854617, "learning_rate": 5.9558541163178915e-06, "loss": 0.2773, "step": 8123 }, { "epoch": 0.643612596553773, "grad_norm": 1.4033284501363137, "learning_rate": 5.953507419034911e-06, "loss": 0.2614, "step": 8124 }, { "epoch": 0.6436918201624084, "grad_norm": 1.4180770775533054, "learning_rate": 5.951160988193998e-06, "loss": 0.3666, "step": 8125 }, { "epoch": 0.6437710437710438, "grad_norm": 1.2279536714428312, "learning_rate": 5.948814823949649e-06, "loss": 0.2352, "step": 8126 }, { "epoch": 0.6438502673796791, "grad_norm": 1.3640400335928415, "learning_rate": 5.946468926456352e-06, "loss": 0.2319, "step": 8127 }, { "epoch": 0.6439294909883145, "grad_norm": 0.9992245033639103, "learning_rate": 5.944123295868574e-06, "loss": 0.1401, "step": 8128 }, { "epoch": 0.6440087145969499, "grad_norm": 1.3656269910040781, "learning_rate": 5.9417779323407576e-06, "loss": 0.2585, "step": 8129 }, { "epoch": 0.6440879382055853, "grad_norm": 1.4782410547695937, "learning_rate": 5.939432836027339e-06, "loss": 0.2431, "step": 8130 }, { "epoch": 0.6441671618142206, "grad_norm": 1.2938615571457268, "learning_rate": 5.937088007082731e-06, "loss": 0.2073, "step": 8131 }, { "epoch": 0.6442463854228561, "grad_norm": 1.195901103286782, "learning_rate": 5.934743445661326e-06, "loss": 0.243, "step": 8132 }, { "epoch": 0.6443256090314914, "grad_norm": 1.4971039839426732, "learning_rate": 5.932399151917507e-06, "loss": 0.2666, "step": 8133 }, { "epoch": 0.6444048326401267, "grad_norm": 1.292911705123545, "learning_rate": 5.93005512600563e-06, "loss": 0.2669, "step": 8134 }, { "epoch": 0.6444840562487621, "grad_norm": 1.28628281896135, "learning_rate": 5.92771136808004e-06, "loss": 0.2629, "step": 8135 }, { "epoch": 0.6445632798573975, "grad_norm": 1.3083046895397594, "learning_rate": 5.925367878295063e-06, "loss": 0.2365, "step": 8136 }, { "epoch": 0.6446425034660329, "grad_norm": 1.4778166867537974, "learning_rate": 5.9230246568050035e-06, "loss": 0.2244, "step": 8137 }, { "epoch": 0.6447217270746682, "grad_norm": 1.1986066038734173, "learning_rate": 5.920681703764153e-06, "loss": 0.1907, "step": 8138 }, { "epoch": 0.6448009506833037, "grad_norm": 1.4055921060539385, "learning_rate": 5.918339019326789e-06, "loss": 0.2382, "step": 8139 }, { "epoch": 0.644880174291939, "grad_norm": 1.3909759227487568, "learning_rate": 5.915996603647157e-06, "loss": 0.2749, "step": 8140 }, { "epoch": 0.6449593979005743, "grad_norm": 1.6910052400495772, "learning_rate": 5.913654456879496e-06, "loss": 0.3476, "step": 8141 }, { "epoch": 0.6450386215092098, "grad_norm": 1.4956247990378597, "learning_rate": 5.911312579178028e-06, "loss": 0.321, "step": 8142 }, { "epoch": 0.6451178451178451, "grad_norm": 1.3955388581533108, "learning_rate": 5.908970970696955e-06, "loss": 0.2963, "step": 8143 }, { "epoch": 0.6451970687264805, "grad_norm": 1.2812517471902314, "learning_rate": 5.906629631590457e-06, "loss": 0.1798, "step": 8144 }, { "epoch": 0.6452762923351159, "grad_norm": 1.3023495715682363, "learning_rate": 5.904288562012703e-06, "loss": 0.1976, "step": 8145 }, { "epoch": 0.6453555159437513, "grad_norm": 1.1957466887544972, "learning_rate": 5.901947762117838e-06, "loss": 0.2409, "step": 8146 }, { "epoch": 0.6454347395523866, "grad_norm": 1.2181002182020895, "learning_rate": 5.899607232059994e-06, "loss": 0.2302, "step": 8147 }, { "epoch": 0.6455139631610219, "grad_norm": 1.3383646556688498, "learning_rate": 5.897266971993286e-06, "loss": 0.3127, "step": 8148 }, { "epoch": 0.6455931867696574, "grad_norm": 1.3013278544632247, "learning_rate": 5.894926982071805e-06, "loss": 0.2451, "step": 8149 }, { "epoch": 0.6456724103782927, "grad_norm": 1.455135644873426, "learning_rate": 5.892587262449631e-06, "loss": 0.2684, "step": 8150 }, { "epoch": 0.6457516339869281, "grad_norm": 1.3949670248958077, "learning_rate": 5.890247813280822e-06, "loss": 0.2396, "step": 8151 }, { "epoch": 0.6458308575955635, "grad_norm": 1.4654976448807884, "learning_rate": 5.8879086347194196e-06, "loss": 0.2395, "step": 8152 }, { "epoch": 0.6459100812041989, "grad_norm": 1.2225795709262621, "learning_rate": 5.885569726919449e-06, "loss": 0.2041, "step": 8153 }, { "epoch": 0.6459893048128342, "grad_norm": 1.2257139244742783, "learning_rate": 5.883231090034911e-06, "loss": 0.2311, "step": 8154 }, { "epoch": 0.6460685284214696, "grad_norm": 1.5088672370952754, "learning_rate": 5.8808927242197984e-06, "loss": 0.3058, "step": 8155 }, { "epoch": 0.646147752030105, "grad_norm": 1.2286795373313426, "learning_rate": 5.878554629628081e-06, "loss": 0.2665, "step": 8156 }, { "epoch": 0.6462269756387403, "grad_norm": 1.1509846666770909, "learning_rate": 5.87621680641371e-06, "loss": 0.2457, "step": 8157 }, { "epoch": 0.6463061992473758, "grad_norm": 1.249818567896011, "learning_rate": 5.873879254730621e-06, "loss": 0.179, "step": 8158 }, { "epoch": 0.6463854228560111, "grad_norm": 1.2229891677882212, "learning_rate": 5.871541974732727e-06, "loss": 0.1548, "step": 8159 }, { "epoch": 0.6464646464646465, "grad_norm": 1.3046319716154064, "learning_rate": 5.869204966573929e-06, "loss": 0.2272, "step": 8160 }, { "epoch": 0.6465438700732818, "grad_norm": 1.4202659233052892, "learning_rate": 5.866868230408111e-06, "loss": 0.246, "step": 8161 }, { "epoch": 0.6466230936819172, "grad_norm": 1.5452830360718968, "learning_rate": 5.86453176638913e-06, "loss": 0.2401, "step": 8162 }, { "epoch": 0.6467023172905526, "grad_norm": 1.4251130783891992, "learning_rate": 5.862195574670834e-06, "loss": 0.2375, "step": 8163 }, { "epoch": 0.6467815408991879, "grad_norm": 1.4304510655614806, "learning_rate": 5.85985965540705e-06, "loss": 0.3316, "step": 8164 }, { "epoch": 0.6468607645078234, "grad_norm": 1.3000918366011986, "learning_rate": 5.857524008751586e-06, "loss": 0.186, "step": 8165 }, { "epoch": 0.6469399881164587, "grad_norm": 1.382534595387525, "learning_rate": 5.855188634858235e-06, "loss": 0.3204, "step": 8166 }, { "epoch": 0.647019211725094, "grad_norm": 1.3098944138557873, "learning_rate": 5.852853533880768e-06, "loss": 0.2021, "step": 8167 }, { "epoch": 0.6470984353337295, "grad_norm": 1.384742778393619, "learning_rate": 5.850518705972941e-06, "loss": 0.2784, "step": 8168 }, { "epoch": 0.6471776589423648, "grad_norm": 1.2976537575609843, "learning_rate": 5.848184151288492e-06, "loss": 0.2723, "step": 8169 }, { "epoch": 0.6472568825510002, "grad_norm": 1.2891180624230896, "learning_rate": 5.845849869981137e-06, "loss": 0.2111, "step": 8170 }, { "epoch": 0.6473361061596355, "grad_norm": 1.742309750335544, "learning_rate": 5.843515862204581e-06, "loss": 0.3064, "step": 8171 }, { "epoch": 0.647415329768271, "grad_norm": 1.3321958610973492, "learning_rate": 5.841182128112506e-06, "loss": 0.2421, "step": 8172 }, { "epoch": 0.6474945533769063, "grad_norm": 1.220068248213159, "learning_rate": 5.838848667858577e-06, "loss": 0.2585, "step": 8173 }, { "epoch": 0.6475737769855416, "grad_norm": 1.1799814246518199, "learning_rate": 5.83651548159644e-06, "loss": 0.1991, "step": 8174 }, { "epoch": 0.6476530005941771, "grad_norm": 1.4795636129522518, "learning_rate": 5.834182569479727e-06, "loss": 0.2515, "step": 8175 }, { "epoch": 0.6477322242028124, "grad_norm": 0.991585093191464, "learning_rate": 5.831849931662047e-06, "loss": 0.1368, "step": 8176 }, { "epoch": 0.6478114478114478, "grad_norm": 1.1468145182399327, "learning_rate": 5.829517568296989e-06, "loss": 0.1973, "step": 8177 }, { "epoch": 0.6478906714200832, "grad_norm": 1.5411334228179303, "learning_rate": 5.827185479538138e-06, "loss": 0.3021, "step": 8178 }, { "epoch": 0.6479698950287186, "grad_norm": 1.3809742617805387, "learning_rate": 5.824853665539043e-06, "loss": 0.2363, "step": 8179 }, { "epoch": 0.6480491186373539, "grad_norm": 1.3884944220143203, "learning_rate": 5.82252212645324e-06, "loss": 0.2015, "step": 8180 }, { "epoch": 0.6481283422459893, "grad_norm": 1.3504039071335587, "learning_rate": 5.820190862434259e-06, "loss": 0.2755, "step": 8181 }, { "epoch": 0.6482075658546247, "grad_norm": 1.5062850129582808, "learning_rate": 5.8178598736355985e-06, "loss": 0.2207, "step": 8182 }, { "epoch": 0.64828678946326, "grad_norm": 1.3468532382164187, "learning_rate": 5.815529160210738e-06, "loss": 0.2389, "step": 8183 }, { "epoch": 0.6483660130718955, "grad_norm": 1.2873584372407643, "learning_rate": 5.813198722313151e-06, "loss": 0.2538, "step": 8184 }, { "epoch": 0.6484452366805308, "grad_norm": 1.2147192141188234, "learning_rate": 5.810868560096283e-06, "loss": 0.2408, "step": 8185 }, { "epoch": 0.6485244602891662, "grad_norm": 1.150210873959303, "learning_rate": 5.808538673713564e-06, "loss": 0.179, "step": 8186 }, { "epoch": 0.6486036838978015, "grad_norm": 1.5514435555091965, "learning_rate": 5.8062090633184e-06, "loss": 0.2602, "step": 8187 }, { "epoch": 0.6486829075064369, "grad_norm": 1.3612607417387983, "learning_rate": 5.803879729064195e-06, "loss": 0.1671, "step": 8188 }, { "epoch": 0.6487621311150723, "grad_norm": 1.5891191175421429, "learning_rate": 5.801550671104319e-06, "loss": 0.3012, "step": 8189 }, { "epoch": 0.6488413547237076, "grad_norm": 0.9579452311161174, "learning_rate": 5.7992218895921256e-06, "loss": 0.1441, "step": 8190 }, { "epoch": 0.6489205783323431, "grad_norm": 1.2491284816231087, "learning_rate": 5.796893384680964e-06, "loss": 0.2324, "step": 8191 }, { "epoch": 0.6489998019409784, "grad_norm": 1.152755292113233, "learning_rate": 5.7945651565241455e-06, "loss": 0.1983, "step": 8192 }, { "epoch": 0.6490790255496138, "grad_norm": 1.4206019671704502, "learning_rate": 5.792237205274974e-06, "loss": 0.2531, "step": 8193 }, { "epoch": 0.6491582491582492, "grad_norm": 1.4044823750296693, "learning_rate": 5.789909531086741e-06, "loss": 0.3426, "step": 8194 }, { "epoch": 0.6492374727668845, "grad_norm": 1.4364212142703516, "learning_rate": 5.787582134112706e-06, "loss": 0.2413, "step": 8195 }, { "epoch": 0.6493166963755199, "grad_norm": 1.064031031591425, "learning_rate": 5.785255014506115e-06, "loss": 0.2006, "step": 8196 }, { "epoch": 0.6493959199841552, "grad_norm": 1.371714606810431, "learning_rate": 5.782928172420206e-06, "loss": 0.2549, "step": 8197 }, { "epoch": 0.6494751435927907, "grad_norm": 1.4441606154563482, "learning_rate": 5.780601608008185e-06, "loss": 0.2654, "step": 8198 }, { "epoch": 0.649554367201426, "grad_norm": 1.4719931528968992, "learning_rate": 5.778275321423241e-06, "loss": 0.3326, "step": 8199 }, { "epoch": 0.6496335908100614, "grad_norm": 1.4809493585096092, "learning_rate": 5.7759493128185584e-06, "loss": 0.2896, "step": 8200 }, { "epoch": 0.6497128144186968, "grad_norm": 1.659886742655029, "learning_rate": 5.773623582347289e-06, "loss": 0.2966, "step": 8201 }, { "epoch": 0.6497920380273321, "grad_norm": 1.4284371556711388, "learning_rate": 5.77129813016257e-06, "loss": 0.3075, "step": 8202 }, { "epoch": 0.6498712616359675, "grad_norm": 1.3929331099693574, "learning_rate": 5.768972956417518e-06, "loss": 0.2894, "step": 8203 }, { "epoch": 0.6499504852446029, "grad_norm": 1.229699544949765, "learning_rate": 5.766648061265242e-06, "loss": 0.1881, "step": 8204 }, { "epoch": 0.6500297088532383, "grad_norm": 1.4991288856567153, "learning_rate": 5.764323444858823e-06, "loss": 0.2548, "step": 8205 }, { "epoch": 0.6501089324618736, "grad_norm": 1.4754548644404095, "learning_rate": 5.761999107351319e-06, "loss": 0.2941, "step": 8206 }, { "epoch": 0.6501881560705091, "grad_norm": 1.288205072146522, "learning_rate": 5.759675048895785e-06, "loss": 0.2971, "step": 8207 }, { "epoch": 0.6502673796791444, "grad_norm": 1.7057389019166083, "learning_rate": 5.757351269645248e-06, "loss": 0.4096, "step": 8208 }, { "epoch": 0.6503466032877797, "grad_norm": 1.1658150089316675, "learning_rate": 5.75502776975271e-06, "loss": 0.2161, "step": 8209 }, { "epoch": 0.6504258268964151, "grad_norm": 1.3783310309691115, "learning_rate": 5.752704549371173e-06, "loss": 0.2188, "step": 8210 }, { "epoch": 0.6505050505050505, "grad_norm": 1.4202501898947362, "learning_rate": 5.750381608653605e-06, "loss": 0.2834, "step": 8211 }, { "epoch": 0.6505842741136859, "grad_norm": 1.1136144699359627, "learning_rate": 5.748058947752955e-06, "loss": 0.1744, "step": 8212 }, { "epoch": 0.6506634977223212, "grad_norm": 1.1337237879831854, "learning_rate": 5.745736566822169e-06, "loss": 0.2105, "step": 8213 }, { "epoch": 0.6507427213309567, "grad_norm": 1.4534454354028474, "learning_rate": 5.743414466014159e-06, "loss": 0.3502, "step": 8214 }, { "epoch": 0.650821944939592, "grad_norm": 1.4728076953042555, "learning_rate": 5.7410926454818265e-06, "loss": 0.3483, "step": 8215 }, { "epoch": 0.6509011685482273, "grad_norm": 1.2762477065054285, "learning_rate": 5.738771105378046e-06, "loss": 0.2245, "step": 8216 }, { "epoch": 0.6509803921568628, "grad_norm": 1.6185604962932965, "learning_rate": 5.7364498458556914e-06, "loss": 0.3277, "step": 8217 }, { "epoch": 0.6510596157654981, "grad_norm": 1.3256756646747319, "learning_rate": 5.734128867067593e-06, "loss": 0.2721, "step": 8218 }, { "epoch": 0.6511388393741335, "grad_norm": 1.2585430603388206, "learning_rate": 5.731808169166586e-06, "loss": 0.23, "step": 8219 }, { "epoch": 0.6512180629827689, "grad_norm": 1.4410735344608805, "learning_rate": 5.7294877523054735e-06, "loss": 0.2193, "step": 8220 }, { "epoch": 0.6512972865914043, "grad_norm": 1.3540027586279855, "learning_rate": 5.727167616637042e-06, "loss": 0.2816, "step": 8221 }, { "epoch": 0.6513765102000396, "grad_norm": 1.5948557006961954, "learning_rate": 5.7248477623140655e-06, "loss": 0.2955, "step": 8222 }, { "epoch": 0.6514557338086749, "grad_norm": 1.1882125275000315, "learning_rate": 5.722528189489294e-06, "loss": 0.2069, "step": 8223 }, { "epoch": 0.6515349574173104, "grad_norm": 1.319626426826207, "learning_rate": 5.720208898315454e-06, "loss": 0.3017, "step": 8224 }, { "epoch": 0.6516141810259457, "grad_norm": 1.6088370067367068, "learning_rate": 5.717889888945271e-06, "loss": 0.3623, "step": 8225 }, { "epoch": 0.6516934046345811, "grad_norm": 1.2051490578485706, "learning_rate": 5.715571161531433e-06, "loss": 0.1816, "step": 8226 }, { "epoch": 0.6517726282432165, "grad_norm": 1.5428819965220586, "learning_rate": 5.7132527162266194e-06, "loss": 0.282, "step": 8227 }, { "epoch": 0.6518518518518519, "grad_norm": 1.3229346718964534, "learning_rate": 5.710934553183484e-06, "loss": 0.2727, "step": 8228 }, { "epoch": 0.6519310754604872, "grad_norm": 1.3644065855357472, "learning_rate": 5.708616672554675e-06, "loss": 0.2808, "step": 8229 }, { "epoch": 0.6520102990691226, "grad_norm": 1.3604704436586774, "learning_rate": 5.7062990744928086e-06, "loss": 0.2885, "step": 8230 }, { "epoch": 0.652089522677758, "grad_norm": 1.437622917063151, "learning_rate": 5.703981759150483e-06, "loss": 0.2306, "step": 8231 }, { "epoch": 0.6521687462863933, "grad_norm": 1.2195711467253147, "learning_rate": 5.701664726680294e-06, "loss": 0.1796, "step": 8232 }, { "epoch": 0.6522479698950288, "grad_norm": 1.3599793326001144, "learning_rate": 5.699347977234799e-06, "loss": 0.2303, "step": 8233 }, { "epoch": 0.6523271935036641, "grad_norm": 1.1689218302714448, "learning_rate": 5.697031510966542e-06, "loss": 0.175, "step": 8234 }, { "epoch": 0.6524064171122995, "grad_norm": 1.2743345105512172, "learning_rate": 5.69471532802806e-06, "loss": 0.2434, "step": 8235 }, { "epoch": 0.6524856407209348, "grad_norm": 1.8280711414999011, "learning_rate": 5.692399428571857e-06, "loss": 0.2843, "step": 8236 }, { "epoch": 0.6525648643295702, "grad_norm": 1.4282173217074114, "learning_rate": 5.690083812750422e-06, "loss": 0.2156, "step": 8237 }, { "epoch": 0.6526440879382056, "grad_norm": 1.4012797356770035, "learning_rate": 5.687768480716233e-06, "loss": 0.2789, "step": 8238 }, { "epoch": 0.6527233115468409, "grad_norm": 1.3066002093093614, "learning_rate": 5.685453432621741e-06, "loss": 0.2154, "step": 8239 }, { "epoch": 0.6528025351554764, "grad_norm": 1.3782246275048002, "learning_rate": 5.683138668619381e-06, "loss": 0.2415, "step": 8240 }, { "epoch": 0.6528817587641117, "grad_norm": 1.2795835626579697, "learning_rate": 5.680824188861564e-06, "loss": 0.1666, "step": 8241 }, { "epoch": 0.6529609823727471, "grad_norm": 1.4334796424175882, "learning_rate": 5.678509993500695e-06, "loss": 0.1831, "step": 8242 }, { "epoch": 0.6530402059813825, "grad_norm": 1.4289170971199112, "learning_rate": 5.676196082689149e-06, "loss": 0.2407, "step": 8243 }, { "epoch": 0.6531194295900178, "grad_norm": 1.3171045771506669, "learning_rate": 5.673882456579282e-06, "loss": 0.2051, "step": 8244 }, { "epoch": 0.6531986531986532, "grad_norm": 1.4045195840365614, "learning_rate": 5.6715691153234445e-06, "loss": 0.269, "step": 8245 }, { "epoch": 0.6532778768072885, "grad_norm": 1.1058765581272658, "learning_rate": 5.669256059073953e-06, "loss": 0.1613, "step": 8246 }, { "epoch": 0.653357100415924, "grad_norm": 1.4565447769680877, "learning_rate": 5.666943287983106e-06, "loss": 0.2709, "step": 8247 }, { "epoch": 0.6534363240245593, "grad_norm": 1.9498937160185494, "learning_rate": 5.664630802203201e-06, "loss": 0.3357, "step": 8248 }, { "epoch": 0.6535155476331946, "grad_norm": 1.5119685588075111, "learning_rate": 5.662318601886496e-06, "loss": 0.3351, "step": 8249 }, { "epoch": 0.6535947712418301, "grad_norm": 1.5925238934924428, "learning_rate": 5.660006687185235e-06, "loss": 0.2522, "step": 8250 }, { "epoch": 0.6536739948504654, "grad_norm": 1.3812899894915327, "learning_rate": 5.657695058251656e-06, "loss": 0.3437, "step": 8251 }, { "epoch": 0.6537532184591008, "grad_norm": 1.4443810885064245, "learning_rate": 5.655383715237963e-06, "loss": 0.2827, "step": 8252 }, { "epoch": 0.6538324420677362, "grad_norm": 1.2048313068771106, "learning_rate": 5.653072658296344e-06, "loss": 0.1971, "step": 8253 }, { "epoch": 0.6539116656763716, "grad_norm": 1.4143378162351634, "learning_rate": 5.650761887578977e-06, "loss": 0.3018, "step": 8254 }, { "epoch": 0.6539908892850069, "grad_norm": 1.4622225499434423, "learning_rate": 5.648451403238013e-06, "loss": 0.3305, "step": 8255 }, { "epoch": 0.6540701128936423, "grad_norm": 1.3939297464312732, "learning_rate": 5.646141205425586e-06, "loss": 0.2254, "step": 8256 }, { "epoch": 0.6541493365022777, "grad_norm": 1.3600801342035775, "learning_rate": 5.643831294293808e-06, "loss": 0.236, "step": 8257 }, { "epoch": 0.654228560110913, "grad_norm": 1.2500107183974192, "learning_rate": 5.641521669994782e-06, "loss": 0.2517, "step": 8258 }, { "epoch": 0.6543077837195485, "grad_norm": 1.5466957951513571, "learning_rate": 5.639212332680581e-06, "loss": 0.2372, "step": 8259 }, { "epoch": 0.6543870073281838, "grad_norm": 1.4120252459455545, "learning_rate": 5.636903282503263e-06, "loss": 0.2378, "step": 8260 }, { "epoch": 0.6544662309368192, "grad_norm": 1.5833833191874824, "learning_rate": 5.6345945196148734e-06, "loss": 0.3072, "step": 8261 }, { "epoch": 0.6545454545454545, "grad_norm": 0.9104005117978915, "learning_rate": 5.63228604416743e-06, "loss": 0.1209, "step": 8262 }, { "epoch": 0.6546246781540899, "grad_norm": 1.40885313207492, "learning_rate": 5.62997785631293e-06, "loss": 0.2315, "step": 8263 }, { "epoch": 0.6547039017627253, "grad_norm": 1.2514514813918491, "learning_rate": 5.627669956203365e-06, "loss": 0.1825, "step": 8264 }, { "epoch": 0.6547831253713606, "grad_norm": 1.591887297539188, "learning_rate": 5.6253623439906955e-06, "loss": 0.3877, "step": 8265 }, { "epoch": 0.6548623489799961, "grad_norm": 1.3277121617300123, "learning_rate": 5.623055019826862e-06, "loss": 0.3057, "step": 8266 }, { "epoch": 0.6549415725886314, "grad_norm": 1.5458358195134896, "learning_rate": 5.6207479838637995e-06, "loss": 0.2262, "step": 8267 }, { "epoch": 0.6550207961972668, "grad_norm": 1.3775270832751523, "learning_rate": 5.618441236253411e-06, "loss": 0.2259, "step": 8268 }, { "epoch": 0.6551000198059022, "grad_norm": 1.593305268993249, "learning_rate": 5.616134777147578e-06, "loss": 0.2855, "step": 8269 }, { "epoch": 0.6551792434145375, "grad_norm": 1.4931850181518935, "learning_rate": 5.6138286066981815e-06, "loss": 0.2566, "step": 8270 }, { "epoch": 0.6552584670231729, "grad_norm": 1.6341990950191314, "learning_rate": 5.611522725057067e-06, "loss": 0.2683, "step": 8271 }, { "epoch": 0.6553376906318082, "grad_norm": 1.2327587732323917, "learning_rate": 5.6092171323760635e-06, "loss": 0.2829, "step": 8272 }, { "epoch": 0.6554169142404437, "grad_norm": 1.143406831715001, "learning_rate": 5.6069118288069824e-06, "loss": 0.1866, "step": 8273 }, { "epoch": 0.655496137849079, "grad_norm": 1.4306590530622671, "learning_rate": 5.604606814501623e-06, "loss": 0.2277, "step": 8274 }, { "epoch": 0.6555753614577144, "grad_norm": 0.9951344606511768, "learning_rate": 5.602302089611755e-06, "loss": 0.1471, "step": 8275 }, { "epoch": 0.6556545850663498, "grad_norm": 1.1532442807584862, "learning_rate": 5.599997654289129e-06, "loss": 0.2038, "step": 8276 }, { "epoch": 0.6557338086749851, "grad_norm": 1.511912539074862, "learning_rate": 5.5976935086854914e-06, "loss": 0.3291, "step": 8277 }, { "epoch": 0.6558130322836205, "grad_norm": 1.2813473369591573, "learning_rate": 5.595389652952555e-06, "loss": 0.2494, "step": 8278 }, { "epoch": 0.6558922558922559, "grad_norm": 1.3428850586847336, "learning_rate": 5.59308608724201e-06, "loss": 0.2035, "step": 8279 }, { "epoch": 0.6559714795008913, "grad_norm": 1.5236277624601837, "learning_rate": 5.590782811705547e-06, "loss": 0.2166, "step": 8280 }, { "epoch": 0.6560507031095266, "grad_norm": 1.5965910841898305, "learning_rate": 5.588479826494817e-06, "loss": 0.2721, "step": 8281 }, { "epoch": 0.6561299267181621, "grad_norm": 1.1829408889037407, "learning_rate": 5.5861771317614624e-06, "loss": 0.2173, "step": 8282 }, { "epoch": 0.6562091503267974, "grad_norm": 1.2316774437155615, "learning_rate": 5.583874727657109e-06, "loss": 0.2144, "step": 8283 }, { "epoch": 0.6562883739354327, "grad_norm": 1.1893141187767418, "learning_rate": 5.581572614333356e-06, "loss": 0.1748, "step": 8284 }, { "epoch": 0.6563675975440681, "grad_norm": 1.2242846043315372, "learning_rate": 5.579270791941787e-06, "loss": 0.1655, "step": 8285 }, { "epoch": 0.6564468211527035, "grad_norm": 1.709248591628549, "learning_rate": 5.5769692606339584e-06, "loss": 0.295, "step": 8286 }, { "epoch": 0.6565260447613389, "grad_norm": 1.4099422681992102, "learning_rate": 5.574668020561428e-06, "loss": 0.2489, "step": 8287 }, { "epoch": 0.6566052683699742, "grad_norm": 1.3778342992923602, "learning_rate": 5.572367071875715e-06, "loss": 0.2679, "step": 8288 }, { "epoch": 0.6566844919786097, "grad_norm": 1.8174632950361576, "learning_rate": 5.570066414728321e-06, "loss": 0.3384, "step": 8289 }, { "epoch": 0.656763715587245, "grad_norm": 1.0790279673517058, "learning_rate": 5.567766049270742e-06, "loss": 0.1812, "step": 8290 }, { "epoch": 0.6568429391958803, "grad_norm": 1.1235633883750664, "learning_rate": 5.5654659756544425e-06, "loss": 0.2207, "step": 8291 }, { "epoch": 0.6569221628045158, "grad_norm": 1.322570757406409, "learning_rate": 5.563166194030868e-06, "loss": 0.2504, "step": 8292 }, { "epoch": 0.6570013864131511, "grad_norm": 1.9260048581567604, "learning_rate": 5.560866704551454e-06, "loss": 0.3836, "step": 8293 }, { "epoch": 0.6570806100217865, "grad_norm": 1.1553126660387465, "learning_rate": 5.5585675073676085e-06, "loss": 0.1587, "step": 8294 }, { "epoch": 0.6571598336304219, "grad_norm": 1.2434067681545933, "learning_rate": 5.556268602630721e-06, "loss": 0.2561, "step": 8295 }, { "epoch": 0.6572390572390573, "grad_norm": 1.0898522372539647, "learning_rate": 5.553969990492164e-06, "loss": 0.2022, "step": 8296 }, { "epoch": 0.6573182808476926, "grad_norm": 1.2522349290649752, "learning_rate": 5.5516716711032906e-06, "loss": 0.2396, "step": 8297 }, { "epoch": 0.6573975044563279, "grad_norm": 1.4848079093922917, "learning_rate": 5.54937364461543e-06, "loss": 0.3321, "step": 8298 }, { "epoch": 0.6574767280649634, "grad_norm": 1.4566944120284944, "learning_rate": 5.547075911179902e-06, "loss": 0.2531, "step": 8299 }, { "epoch": 0.6575559516735987, "grad_norm": 1.271216319498662, "learning_rate": 5.544778470948001e-06, "loss": 0.2168, "step": 8300 }, { "epoch": 0.6576351752822341, "grad_norm": 1.6472376808299591, "learning_rate": 5.542481324070996e-06, "loss": 0.3467, "step": 8301 }, { "epoch": 0.6577143988908695, "grad_norm": 1.2443022079359343, "learning_rate": 5.540184470700152e-06, "loss": 0.218, "step": 8302 }, { "epoch": 0.6577936224995049, "grad_norm": 1.3790539371654038, "learning_rate": 5.537887910986701e-06, "loss": 0.2933, "step": 8303 }, { "epoch": 0.6578728461081402, "grad_norm": 1.576153855481717, "learning_rate": 5.535591645081857e-06, "loss": 0.3258, "step": 8304 }, { "epoch": 0.6579520697167756, "grad_norm": 1.2823367458262571, "learning_rate": 5.5332956731368245e-06, "loss": 0.2333, "step": 8305 }, { "epoch": 0.658031293325411, "grad_norm": 1.2228559518003623, "learning_rate": 5.530999995302781e-06, "loss": 0.2224, "step": 8306 }, { "epoch": 0.6581105169340463, "grad_norm": 1.1215705590993594, "learning_rate": 5.528704611730879e-06, "loss": 0.1965, "step": 8307 }, { "epoch": 0.6581897405426818, "grad_norm": 1.2812794941391847, "learning_rate": 5.5264095225722705e-06, "loss": 0.2487, "step": 8308 }, { "epoch": 0.6582689641513171, "grad_norm": 1.2293533067062077, "learning_rate": 5.524114727978067e-06, "loss": 0.2174, "step": 8309 }, { "epoch": 0.6583481877599525, "grad_norm": 0.9242983580092674, "learning_rate": 5.5218202280993725e-06, "loss": 0.1394, "step": 8310 }, { "epoch": 0.6584274113685878, "grad_norm": 1.2580389178541742, "learning_rate": 5.519526023087265e-06, "loss": 0.1566, "step": 8311 }, { "epoch": 0.6585066349772232, "grad_norm": 1.221147943462261, "learning_rate": 5.517232113092814e-06, "loss": 0.2364, "step": 8312 }, { "epoch": 0.6585858585858586, "grad_norm": 1.4221653778866195, "learning_rate": 5.5149384982670585e-06, "loss": 0.2911, "step": 8313 }, { "epoch": 0.6586650821944939, "grad_norm": 1.2091627067742483, "learning_rate": 5.512645178761018e-06, "loss": 0.21, "step": 8314 }, { "epoch": 0.6587443058031294, "grad_norm": 1.5999950611040303, "learning_rate": 5.5103521547257045e-06, "loss": 0.256, "step": 8315 }, { "epoch": 0.6588235294117647, "grad_norm": 1.206254322324537, "learning_rate": 5.508059426312099e-06, "loss": 0.238, "step": 8316 }, { "epoch": 0.6589027530204001, "grad_norm": 1.2901609319620173, "learning_rate": 5.5057669936711625e-06, "loss": 0.1795, "step": 8317 }, { "epoch": 0.6589819766290355, "grad_norm": 1.3063836660918773, "learning_rate": 5.503474856953849e-06, "loss": 0.2549, "step": 8318 }, { "epoch": 0.6590612002376708, "grad_norm": 1.2839605847742221, "learning_rate": 5.50118301631108e-06, "loss": 0.2291, "step": 8319 }, { "epoch": 0.6591404238463062, "grad_norm": 1.4042232323395845, "learning_rate": 5.498891471893758e-06, "loss": 0.2637, "step": 8320 }, { "epoch": 0.6592196474549415, "grad_norm": 1.3900295040393047, "learning_rate": 5.49660022385278e-06, "loss": 0.2781, "step": 8321 }, { "epoch": 0.659298871063577, "grad_norm": 1.4135468750059055, "learning_rate": 5.494309272339007e-06, "loss": 0.2366, "step": 8322 }, { "epoch": 0.6593780946722123, "grad_norm": 0.9572690044823501, "learning_rate": 5.492018617503284e-06, "loss": 0.1343, "step": 8323 }, { "epoch": 0.6594573182808476, "grad_norm": 1.1413248322814105, "learning_rate": 5.48972825949645e-06, "loss": 0.2179, "step": 8324 }, { "epoch": 0.6595365418894831, "grad_norm": 1.486728618626868, "learning_rate": 5.487438198469306e-06, "loss": 0.3243, "step": 8325 }, { "epoch": 0.6596157654981184, "grad_norm": 1.5748786044733105, "learning_rate": 5.485148434572645e-06, "loss": 0.3075, "step": 8326 }, { "epoch": 0.6596949891067538, "grad_norm": 1.579162638486017, "learning_rate": 5.48285896795723e-06, "loss": 0.4096, "step": 8327 }, { "epoch": 0.6597742127153892, "grad_norm": 1.6213997059223582, "learning_rate": 5.480569798773822e-06, "loss": 0.308, "step": 8328 }, { "epoch": 0.6598534363240246, "grad_norm": 1.264210585124539, "learning_rate": 5.478280927173145e-06, "loss": 0.2154, "step": 8329 }, { "epoch": 0.6599326599326599, "grad_norm": 1.2948944287049189, "learning_rate": 5.4759923533059105e-06, "loss": 0.3472, "step": 8330 }, { "epoch": 0.6600118835412953, "grad_norm": 1.326143600656642, "learning_rate": 5.473704077322814e-06, "loss": 0.2294, "step": 8331 }, { "epoch": 0.6600911071499307, "grad_norm": 1.2643029076313272, "learning_rate": 5.471416099374525e-06, "loss": 0.2127, "step": 8332 }, { "epoch": 0.660170330758566, "grad_norm": 1.396813226002153, "learning_rate": 5.469128419611691e-06, "loss": 0.2167, "step": 8333 }, { "epoch": 0.6602495543672015, "grad_norm": 1.3223163825626678, "learning_rate": 5.466841038184954e-06, "loss": 0.1849, "step": 8334 }, { "epoch": 0.6603287779758368, "grad_norm": 1.1877953739738731, "learning_rate": 5.464553955244922e-06, "loss": 0.197, "step": 8335 }, { "epoch": 0.6604080015844722, "grad_norm": 1.5085980914038413, "learning_rate": 5.4622671709421856e-06, "loss": 0.2754, "step": 8336 }, { "epoch": 0.6604872251931075, "grad_norm": 1.3688318101684382, "learning_rate": 5.459980685427326e-06, "loss": 0.2589, "step": 8337 }, { "epoch": 0.6605664488017429, "grad_norm": 1.2919678212384955, "learning_rate": 5.457694498850892e-06, "loss": 0.2215, "step": 8338 }, { "epoch": 0.6606456724103783, "grad_norm": 1.3112073508953472, "learning_rate": 5.455408611363416e-06, "loss": 0.2623, "step": 8339 }, { "epoch": 0.6607248960190136, "grad_norm": 1.2517730206784308, "learning_rate": 5.45312302311542e-06, "loss": 0.237, "step": 8340 }, { "epoch": 0.6608041196276491, "grad_norm": 1.400009166504189, "learning_rate": 5.450837734257395e-06, "loss": 0.2322, "step": 8341 }, { "epoch": 0.6608833432362844, "grad_norm": 1.0102956686638216, "learning_rate": 5.448552744939815e-06, "loss": 0.2114, "step": 8342 }, { "epoch": 0.6609625668449198, "grad_norm": 1.1463414346435077, "learning_rate": 5.446268055313132e-06, "loss": 0.2032, "step": 8343 }, { "epoch": 0.6610417904535552, "grad_norm": 1.2156987153703278, "learning_rate": 5.443983665527792e-06, "loss": 0.2609, "step": 8344 }, { "epoch": 0.6611210140621905, "grad_norm": 1.260749758893954, "learning_rate": 5.441699575734204e-06, "loss": 0.2674, "step": 8345 }, { "epoch": 0.6612002376708259, "grad_norm": 1.2613301971594093, "learning_rate": 5.439415786082762e-06, "loss": 0.2166, "step": 8346 }, { "epoch": 0.6612794612794612, "grad_norm": 1.3285489438114124, "learning_rate": 5.437132296723852e-06, "loss": 0.1998, "step": 8347 }, { "epoch": 0.6613586848880967, "grad_norm": 1.1987701287684924, "learning_rate": 5.434849107807823e-06, "loss": 0.1647, "step": 8348 }, { "epoch": 0.661437908496732, "grad_norm": 1.3031800953705288, "learning_rate": 5.432566219485012e-06, "loss": 0.2656, "step": 8349 }, { "epoch": 0.6615171321053674, "grad_norm": 1.2078192042504405, "learning_rate": 5.430283631905742e-06, "loss": 0.2589, "step": 8350 }, { "epoch": 0.6615963557140028, "grad_norm": 1.1896796883324154, "learning_rate": 5.428001345220306e-06, "loss": 0.1696, "step": 8351 }, { "epoch": 0.6616755793226381, "grad_norm": 1.1528873650765146, "learning_rate": 5.425719359578978e-06, "loss": 0.2196, "step": 8352 }, { "epoch": 0.6617548029312735, "grad_norm": 1.5532941954778245, "learning_rate": 5.423437675132025e-06, "loss": 0.346, "step": 8353 }, { "epoch": 0.6618340265399089, "grad_norm": 1.3119490666397386, "learning_rate": 5.42115629202968e-06, "loss": 0.2696, "step": 8354 }, { "epoch": 0.6619132501485443, "grad_norm": 1.235671770418397, "learning_rate": 5.4188752104221565e-06, "loss": 0.2668, "step": 8355 }, { "epoch": 0.6619924737571796, "grad_norm": 1.2449852544970257, "learning_rate": 5.416594430459663e-06, "loss": 0.2882, "step": 8356 }, { "epoch": 0.6620716973658151, "grad_norm": 1.4161869570843637, "learning_rate": 5.41431395229237e-06, "loss": 0.3182, "step": 8357 }, { "epoch": 0.6621509209744504, "grad_norm": 1.2546617566662572, "learning_rate": 5.41203377607044e-06, "loss": 0.2123, "step": 8358 }, { "epoch": 0.6622301445830857, "grad_norm": 1.3723082212726574, "learning_rate": 5.409753901944006e-06, "loss": 0.2785, "step": 8359 }, { "epoch": 0.6623093681917211, "grad_norm": 1.4318874831162889, "learning_rate": 5.407474330063194e-06, "loss": 0.2453, "step": 8360 }, { "epoch": 0.6623885918003565, "grad_norm": 1.368128744646739, "learning_rate": 5.4051950605781e-06, "loss": 0.2587, "step": 8361 }, { "epoch": 0.6624678154089919, "grad_norm": 1.4443501090486048, "learning_rate": 5.402916093638798e-06, "loss": 0.2618, "step": 8362 }, { "epoch": 0.6625470390176272, "grad_norm": 1.4678922342274117, "learning_rate": 5.400637429395357e-06, "loss": 0.2644, "step": 8363 }, { "epoch": 0.6626262626262627, "grad_norm": 1.366951558813349, "learning_rate": 5.398359067997808e-06, "loss": 0.2818, "step": 8364 }, { "epoch": 0.662705486234898, "grad_norm": 1.3309913539033384, "learning_rate": 5.3960810095961705e-06, "loss": 0.26, "step": 8365 }, { "epoch": 0.6627847098435333, "grad_norm": 1.4347339554316367, "learning_rate": 5.39380325434045e-06, "loss": 0.2139, "step": 8366 }, { "epoch": 0.6628639334521688, "grad_norm": 1.3133136544607424, "learning_rate": 5.3915258023806195e-06, "loss": 0.284, "step": 8367 }, { "epoch": 0.6629431570608041, "grad_norm": 1.471385836672491, "learning_rate": 5.3892486538666386e-06, "loss": 0.1892, "step": 8368 }, { "epoch": 0.6630223806694395, "grad_norm": 1.4931572519025, "learning_rate": 5.386971808948451e-06, "loss": 0.2538, "step": 8369 }, { "epoch": 0.6631016042780749, "grad_norm": 2.995854586788855, "learning_rate": 5.384695267775975e-06, "loss": 0.319, "step": 8370 }, { "epoch": 0.6631808278867103, "grad_norm": 1.7268331743191982, "learning_rate": 5.382419030499107e-06, "loss": 0.3081, "step": 8371 }, { "epoch": 0.6632600514953456, "grad_norm": 1.2224559884035995, "learning_rate": 5.380143097267723e-06, "loss": 0.2135, "step": 8372 }, { "epoch": 0.6633392751039809, "grad_norm": 1.4065575245339281, "learning_rate": 5.377867468231695e-06, "loss": 0.2067, "step": 8373 }, { "epoch": 0.6634184987126164, "grad_norm": 1.1803841719814607, "learning_rate": 5.3755921435408464e-06, "loss": 0.1844, "step": 8374 }, { "epoch": 0.6634977223212517, "grad_norm": 1.2172472524865847, "learning_rate": 5.373317123345008e-06, "loss": 0.2221, "step": 8375 }, { "epoch": 0.6635769459298871, "grad_norm": 1.2480401821511662, "learning_rate": 5.371042407793974e-06, "loss": 0.1877, "step": 8376 }, { "epoch": 0.6636561695385225, "grad_norm": 1.4717036508368897, "learning_rate": 5.368767997037521e-06, "loss": 0.2942, "step": 8377 }, { "epoch": 0.6637353931471579, "grad_norm": 1.0134401571970157, "learning_rate": 5.366493891225415e-06, "loss": 0.1901, "step": 8378 }, { "epoch": 0.6638146167557932, "grad_norm": 1.1112482262365826, "learning_rate": 5.3642200905073914e-06, "loss": 0.2169, "step": 8379 }, { "epoch": 0.6638938403644286, "grad_norm": 1.1695907281471865, "learning_rate": 5.361946595033165e-06, "loss": 0.2204, "step": 8380 }, { "epoch": 0.663973063973064, "grad_norm": 1.3739125590736159, "learning_rate": 5.359673404952442e-06, "loss": 0.2422, "step": 8381 }, { "epoch": 0.6640522875816993, "grad_norm": 1.195805585087894, "learning_rate": 5.357400520414898e-06, "loss": 0.2693, "step": 8382 }, { "epoch": 0.6641315111903348, "grad_norm": 1.2661165772777006, "learning_rate": 5.355127941570191e-06, "loss": 0.244, "step": 8383 }, { "epoch": 0.6642107347989701, "grad_norm": 0.9370240991230251, "learning_rate": 5.352855668567956e-06, "loss": 0.1232, "step": 8384 }, { "epoch": 0.6642899584076055, "grad_norm": 1.6172551702759002, "learning_rate": 5.350583701557816e-06, "loss": 0.2761, "step": 8385 }, { "epoch": 0.6643691820162408, "grad_norm": 1.293160169426191, "learning_rate": 5.348312040689369e-06, "loss": 0.1908, "step": 8386 }, { "epoch": 0.6644484056248762, "grad_norm": 1.3561173474545039, "learning_rate": 5.346040686112189e-06, "loss": 0.2291, "step": 8387 }, { "epoch": 0.6645276292335116, "grad_norm": 1.2176060804054414, "learning_rate": 5.34376963797584e-06, "loss": 0.2455, "step": 8388 }, { "epoch": 0.6646068528421469, "grad_norm": 1.3198655701131181, "learning_rate": 5.3414988964298555e-06, "loss": 0.2862, "step": 8389 }, { "epoch": 0.6646860764507824, "grad_norm": 1.1604702791360113, "learning_rate": 5.3392284616237486e-06, "loss": 0.204, "step": 8390 }, { "epoch": 0.6647653000594177, "grad_norm": 1.2216233457999, "learning_rate": 5.336958333707026e-06, "loss": 0.1788, "step": 8391 }, { "epoch": 0.6648445236680531, "grad_norm": 1.6633590522851645, "learning_rate": 5.33468851282916e-06, "loss": 0.2555, "step": 8392 }, { "epoch": 0.6649237472766885, "grad_norm": 1.616510431791573, "learning_rate": 5.332418999139604e-06, "loss": 0.2635, "step": 8393 }, { "epoch": 0.6650029708853238, "grad_norm": 1.3381350250591473, "learning_rate": 5.330149792787801e-06, "loss": 0.2218, "step": 8394 }, { "epoch": 0.6650821944939592, "grad_norm": 1.9189265524650396, "learning_rate": 5.3278808939231654e-06, "loss": 0.3083, "step": 8395 }, { "epoch": 0.6651614181025945, "grad_norm": 1.6407692076974278, "learning_rate": 5.32561230269509e-06, "loss": 0.2311, "step": 8396 }, { "epoch": 0.66524064171123, "grad_norm": 1.4622716419975412, "learning_rate": 5.32334401925295e-06, "loss": 0.2655, "step": 8397 }, { "epoch": 0.6653198653198653, "grad_norm": 1.410505805108147, "learning_rate": 5.321076043746108e-06, "loss": 0.3128, "step": 8398 }, { "epoch": 0.6653990889285007, "grad_norm": 1.3380589622612908, "learning_rate": 5.318808376323895e-06, "loss": 0.3093, "step": 8399 }, { "epoch": 0.6654783125371361, "grad_norm": 1.4032477156111962, "learning_rate": 5.316541017135622e-06, "loss": 0.2241, "step": 8400 }, { "epoch": 0.6655575361457714, "grad_norm": 1.441332382555078, "learning_rate": 5.314273966330591e-06, "loss": 0.2432, "step": 8401 }, { "epoch": 0.6656367597544068, "grad_norm": 1.645489629587223, "learning_rate": 5.3120072240580735e-06, "loss": 0.3441, "step": 8402 }, { "epoch": 0.6657159833630422, "grad_norm": 1.2124897845237796, "learning_rate": 5.309740790467319e-06, "loss": 0.2366, "step": 8403 }, { "epoch": 0.6657952069716776, "grad_norm": 1.4168855422162083, "learning_rate": 5.307474665707569e-06, "loss": 0.2054, "step": 8404 }, { "epoch": 0.6658744305803129, "grad_norm": 1.2560444462070148, "learning_rate": 5.305208849928034e-06, "loss": 0.1564, "step": 8405 }, { "epoch": 0.6659536541889483, "grad_norm": 1.5681134696838033, "learning_rate": 5.302943343277902e-06, "loss": 0.2732, "step": 8406 }, { "epoch": 0.6660328777975837, "grad_norm": 1.7513691102008173, "learning_rate": 5.300678145906354e-06, "loss": 0.3366, "step": 8407 }, { "epoch": 0.666112101406219, "grad_norm": 1.8098170248833743, "learning_rate": 5.298413257962538e-06, "loss": 0.2996, "step": 8408 }, { "epoch": 0.6661913250148545, "grad_norm": 1.2707327230635488, "learning_rate": 5.296148679595583e-06, "loss": 0.2315, "step": 8409 }, { "epoch": 0.6662705486234898, "grad_norm": 1.2813082736197279, "learning_rate": 5.293884410954608e-06, "loss": 0.2243, "step": 8410 }, { "epoch": 0.6663497722321252, "grad_norm": 1.14942739380983, "learning_rate": 5.291620452188699e-06, "loss": 0.1942, "step": 8411 }, { "epoch": 0.6664289958407605, "grad_norm": 1.168954136334741, "learning_rate": 5.28935680344693e-06, "loss": 0.166, "step": 8412 }, { "epoch": 0.6665082194493959, "grad_norm": 1.326096717153951, "learning_rate": 5.287093464878343e-06, "loss": 0.2357, "step": 8413 }, { "epoch": 0.6665874430580313, "grad_norm": 1.4431652099713854, "learning_rate": 5.28483043663198e-06, "loss": 0.2854, "step": 8414 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2459451609829055, "learning_rate": 5.282567718856845e-06, "loss": 0.2501, "step": 8415 }, { "epoch": 0.6667458902753021, "grad_norm": 1.822981619676857, "learning_rate": 5.280305311701921e-06, "loss": 0.3417, "step": 8416 }, { "epoch": 0.6668251138839374, "grad_norm": 1.520409681572383, "learning_rate": 5.278043215316189e-06, "loss": 0.2515, "step": 8417 }, { "epoch": 0.6669043374925728, "grad_norm": 1.52910510635187, "learning_rate": 5.275781429848589e-06, "loss": 0.3594, "step": 8418 }, { "epoch": 0.6669835611012082, "grad_norm": 1.274697958119952, "learning_rate": 5.273519955448047e-06, "loss": 0.1938, "step": 8419 }, { "epoch": 0.6670627847098435, "grad_norm": 1.3571398947702835, "learning_rate": 5.271258792263476e-06, "loss": 0.1616, "step": 8420 }, { "epoch": 0.6671420083184789, "grad_norm": 1.1099998093198988, "learning_rate": 5.268997940443762e-06, "loss": 0.2173, "step": 8421 }, { "epoch": 0.6672212319271142, "grad_norm": 1.3393583314098787, "learning_rate": 5.266737400137765e-06, "loss": 0.2252, "step": 8422 }, { "epoch": 0.6673004555357497, "grad_norm": 1.4986042238431063, "learning_rate": 5.26447717149434e-06, "loss": 0.3519, "step": 8423 }, { "epoch": 0.667379679144385, "grad_norm": 1.515978760049668, "learning_rate": 5.2622172546623055e-06, "loss": 0.3013, "step": 8424 }, { "epoch": 0.6674589027530204, "grad_norm": 1.1733334031683973, "learning_rate": 5.259957649790466e-06, "loss": 0.2071, "step": 8425 }, { "epoch": 0.6675381263616558, "grad_norm": 1.4020459253389366, "learning_rate": 5.257698357027609e-06, "loss": 0.248, "step": 8426 }, { "epoch": 0.6676173499702911, "grad_norm": 1.405320425980236, "learning_rate": 5.2554393765225e-06, "loss": 0.2485, "step": 8427 }, { "epoch": 0.6676965735789265, "grad_norm": 1.1675120456773727, "learning_rate": 5.253180708423877e-06, "loss": 0.2084, "step": 8428 }, { "epoch": 0.6677757971875619, "grad_norm": 1.6621527545587165, "learning_rate": 5.25092235288046e-06, "loss": 0.3226, "step": 8429 }, { "epoch": 0.6678550207961973, "grad_norm": 1.833572965974962, "learning_rate": 5.248664310040958e-06, "loss": 0.3796, "step": 8430 }, { "epoch": 0.6679342444048326, "grad_norm": 1.5756434244848083, "learning_rate": 5.246406580054051e-06, "loss": 0.2665, "step": 8431 }, { "epoch": 0.6680134680134681, "grad_norm": 1.0922043325905952, "learning_rate": 5.244149163068394e-06, "loss": 0.2668, "step": 8432 }, { "epoch": 0.6680926916221034, "grad_norm": 1.5790725364950091, "learning_rate": 5.241892059232634e-06, "loss": 0.3399, "step": 8433 }, { "epoch": 0.6681719152307387, "grad_norm": 1.2691597457055435, "learning_rate": 5.239635268695386e-06, "loss": 0.3114, "step": 8434 }, { "epoch": 0.6682511388393741, "grad_norm": 1.124685674957571, "learning_rate": 5.237378791605249e-06, "loss": 0.1443, "step": 8435 }, { "epoch": 0.6683303624480095, "grad_norm": 1.4088100322452433, "learning_rate": 5.235122628110805e-06, "loss": 0.2443, "step": 8436 }, { "epoch": 0.6684095860566449, "grad_norm": 1.506275338203759, "learning_rate": 5.232866778360608e-06, "loss": 0.225, "step": 8437 }, { "epoch": 0.6684888096652802, "grad_norm": 1.2302394320978156, "learning_rate": 5.230611242503193e-06, "loss": 0.2566, "step": 8438 }, { "epoch": 0.6685680332739157, "grad_norm": 1.3367305292439307, "learning_rate": 5.228356020687082e-06, "loss": 0.1728, "step": 8439 }, { "epoch": 0.668647256882551, "grad_norm": 1.570880041465479, "learning_rate": 5.226101113060769e-06, "loss": 0.4568, "step": 8440 }, { "epoch": 0.6687264804911863, "grad_norm": 1.2146470855416167, "learning_rate": 5.223846519772722e-06, "loss": 0.1659, "step": 8441 }, { "epoch": 0.6688057040998218, "grad_norm": 0.9793511826724763, "learning_rate": 5.221592240971403e-06, "loss": 0.1179, "step": 8442 }, { "epoch": 0.6688849277084571, "grad_norm": 1.311119656394066, "learning_rate": 5.219338276805243e-06, "loss": 0.2936, "step": 8443 }, { "epoch": 0.6689641513170925, "grad_norm": 1.264479814063757, "learning_rate": 5.217084627422656e-06, "loss": 0.2176, "step": 8444 }, { "epoch": 0.6690433749257279, "grad_norm": 1.559064233739909, "learning_rate": 5.214831292972027e-06, "loss": 0.3054, "step": 8445 }, { "epoch": 0.6691225985343633, "grad_norm": 1.469417472785673, "learning_rate": 5.212578273601738e-06, "loss": 0.2489, "step": 8446 }, { "epoch": 0.6692018221429986, "grad_norm": 1.4144467094226918, "learning_rate": 5.210325569460133e-06, "loss": 0.2049, "step": 8447 }, { "epoch": 0.6692810457516339, "grad_norm": 1.7113882056992418, "learning_rate": 5.208073180695538e-06, "loss": 0.283, "step": 8448 }, { "epoch": 0.6693602693602694, "grad_norm": 1.4925757654922105, "learning_rate": 5.205821107456273e-06, "loss": 0.2645, "step": 8449 }, { "epoch": 0.6694394929689047, "grad_norm": 1.3284292865441338, "learning_rate": 5.203569349890618e-06, "loss": 0.2357, "step": 8450 }, { "epoch": 0.6695187165775401, "grad_norm": 1.490567371265332, "learning_rate": 5.201317908146843e-06, "loss": 0.2021, "step": 8451 }, { "epoch": 0.6695979401861755, "grad_norm": 1.139763631216647, "learning_rate": 5.199066782373194e-06, "loss": 0.1641, "step": 8452 }, { "epoch": 0.6696771637948109, "grad_norm": 1.4704643530286972, "learning_rate": 5.196815972717897e-06, "loss": 0.2812, "step": 8453 }, { "epoch": 0.6697563874034462, "grad_norm": 1.188518352415322, "learning_rate": 5.194565479329154e-06, "loss": 0.2318, "step": 8454 }, { "epoch": 0.6698356110120816, "grad_norm": 1.157039522355783, "learning_rate": 5.192315302355153e-06, "loss": 0.1978, "step": 8455 }, { "epoch": 0.669914834620717, "grad_norm": 1.6029673775771003, "learning_rate": 5.190065441944059e-06, "loss": 0.2648, "step": 8456 }, { "epoch": 0.6699940582293523, "grad_norm": 1.4121598554047372, "learning_rate": 5.187815898244006e-06, "loss": 0.236, "step": 8457 }, { "epoch": 0.6700732818379878, "grad_norm": 1.6087640823315905, "learning_rate": 5.185566671403126e-06, "loss": 0.3141, "step": 8458 }, { "epoch": 0.6701525054466231, "grad_norm": 1.7917968674289064, "learning_rate": 5.183317761569515e-06, "loss": 0.3191, "step": 8459 }, { "epoch": 0.6702317290552585, "grad_norm": 1.3066643526897765, "learning_rate": 5.181069168891248e-06, "loss": 0.2384, "step": 8460 }, { "epoch": 0.6703109526638938, "grad_norm": 1.0940547286913052, "learning_rate": 5.178820893516394e-06, "loss": 0.1534, "step": 8461 }, { "epoch": 0.6703901762725292, "grad_norm": 1.5425968513334931, "learning_rate": 5.176572935592986e-06, "loss": 0.2228, "step": 8462 }, { "epoch": 0.6704693998811646, "grad_norm": 1.5085774417439075, "learning_rate": 5.1743252952690385e-06, "loss": 0.3214, "step": 8463 }, { "epoch": 0.6705486234897999, "grad_norm": 1.2742431060320412, "learning_rate": 5.172077972692553e-06, "loss": 0.2724, "step": 8464 }, { "epoch": 0.6706278470984354, "grad_norm": 1.3093260273582867, "learning_rate": 5.1698309680115024e-06, "loss": 0.2752, "step": 8465 }, { "epoch": 0.6707070707070707, "grad_norm": 1.544112622530947, "learning_rate": 5.167584281373838e-06, "loss": 0.3363, "step": 8466 }, { "epoch": 0.6707862943157061, "grad_norm": 0.858740691762967, "learning_rate": 5.165337912927502e-06, "loss": 0.1133, "step": 8467 }, { "epoch": 0.6708655179243415, "grad_norm": 1.0593484135962687, "learning_rate": 5.1630918628204e-06, "loss": 0.1754, "step": 8468 }, { "epoch": 0.6709447415329768, "grad_norm": 1.286337851740026, "learning_rate": 5.1608461312004245e-06, "loss": 0.221, "step": 8469 }, { "epoch": 0.6710239651416122, "grad_norm": 1.1830134553335772, "learning_rate": 5.158600718215443e-06, "loss": 0.2016, "step": 8470 }, { "epoch": 0.6711031887502475, "grad_norm": 1.3487162268666644, "learning_rate": 5.156355624013314e-06, "loss": 0.3195, "step": 8471 }, { "epoch": 0.671182412358883, "grad_norm": 1.4888128495542008, "learning_rate": 5.15411084874186e-06, "loss": 0.3181, "step": 8472 }, { "epoch": 0.6712616359675183, "grad_norm": 1.6446068645907317, "learning_rate": 5.151866392548886e-06, "loss": 0.2726, "step": 8473 }, { "epoch": 0.6713408595761537, "grad_norm": 1.1877490702384044, "learning_rate": 5.149622255582185e-06, "loss": 0.2221, "step": 8474 }, { "epoch": 0.6714200831847891, "grad_norm": 1.0990522472709088, "learning_rate": 5.147378437989522e-06, "loss": 0.1913, "step": 8475 }, { "epoch": 0.6714993067934244, "grad_norm": 1.2740815935402738, "learning_rate": 5.145134939918634e-06, "loss": 0.1948, "step": 8476 }, { "epoch": 0.6715785304020598, "grad_norm": 1.3417832980468165, "learning_rate": 5.1428917615172555e-06, "loss": 0.2577, "step": 8477 }, { "epoch": 0.6716577540106952, "grad_norm": 1.4687130162282487, "learning_rate": 5.140648902933083e-06, "loss": 0.2008, "step": 8478 }, { "epoch": 0.6717369776193306, "grad_norm": 1.121392047181105, "learning_rate": 5.138406364313795e-06, "loss": 0.1886, "step": 8479 }, { "epoch": 0.6718162012279659, "grad_norm": 1.2229166373422677, "learning_rate": 5.136164145807059e-06, "loss": 0.2089, "step": 8480 }, { "epoch": 0.6718954248366014, "grad_norm": 1.7232786237188593, "learning_rate": 5.13392224756051e-06, "loss": 0.3236, "step": 8481 }, { "epoch": 0.6719746484452367, "grad_norm": 1.627451441971832, "learning_rate": 5.131680669721768e-06, "loss": 0.2498, "step": 8482 }, { "epoch": 0.672053872053872, "grad_norm": 1.4868196604619808, "learning_rate": 5.129439412438424e-06, "loss": 0.296, "step": 8483 }, { "epoch": 0.6721330956625075, "grad_norm": 2.1039698134866898, "learning_rate": 5.127198475858064e-06, "loss": 0.4189, "step": 8484 }, { "epoch": 0.6722123192711428, "grad_norm": 1.3308319363700254, "learning_rate": 5.124957860128237e-06, "loss": 0.2114, "step": 8485 }, { "epoch": 0.6722915428797782, "grad_norm": 1.3765890639853047, "learning_rate": 5.122717565396474e-06, "loss": 0.2948, "step": 8486 }, { "epoch": 0.6723707664884135, "grad_norm": 1.0239417494022371, "learning_rate": 5.1204775918102955e-06, "loss": 0.1926, "step": 8487 }, { "epoch": 0.6724499900970489, "grad_norm": 1.1905686610768917, "learning_rate": 5.11823793951719e-06, "loss": 0.1737, "step": 8488 }, { "epoch": 0.6725292137056843, "grad_norm": 1.8023486808047542, "learning_rate": 5.115998608664621e-06, "loss": 0.348, "step": 8489 }, { "epoch": 0.6726084373143196, "grad_norm": 0.9987481576320517, "learning_rate": 5.1137595994000475e-06, "loss": 0.158, "step": 8490 }, { "epoch": 0.6726876609229551, "grad_norm": 1.444126904666399, "learning_rate": 5.111520911870894e-06, "loss": 0.2027, "step": 8491 }, { "epoch": 0.6727668845315904, "grad_norm": 1.2909806308018084, "learning_rate": 5.109282546224563e-06, "loss": 0.2702, "step": 8492 }, { "epoch": 0.6728461081402258, "grad_norm": 1.6389571173709172, "learning_rate": 5.107044502608447e-06, "loss": 0.3497, "step": 8493 }, { "epoch": 0.6729253317488612, "grad_norm": 1.2757763688490689, "learning_rate": 5.104806781169906e-06, "loss": 0.2155, "step": 8494 }, { "epoch": 0.6730045553574965, "grad_norm": 1.5524669891540126, "learning_rate": 5.102569382056281e-06, "loss": 0.2848, "step": 8495 }, { "epoch": 0.6730837789661319, "grad_norm": 1.005628449136803, "learning_rate": 5.100332305414902e-06, "loss": 0.1861, "step": 8496 }, { "epoch": 0.6731630025747672, "grad_norm": 1.1559717672010146, "learning_rate": 5.098095551393066e-06, "loss": 0.2316, "step": 8497 }, { "epoch": 0.6732422261834027, "grad_norm": 1.4256732174835267, "learning_rate": 5.095859120138049e-06, "loss": 0.2419, "step": 8498 }, { "epoch": 0.673321449792038, "grad_norm": 1.3173134431924085, "learning_rate": 5.093623011797108e-06, "loss": 0.1876, "step": 8499 }, { "epoch": 0.6734006734006734, "grad_norm": 1.5911699341757466, "learning_rate": 5.091387226517489e-06, "loss": 0.2915, "step": 8500 }, { "epoch": 0.6734798970093088, "grad_norm": 1.1139462187101312, "learning_rate": 5.089151764446403e-06, "loss": 0.1811, "step": 8501 }, { "epoch": 0.6735591206179441, "grad_norm": 1.1686913602798101, "learning_rate": 5.086916625731038e-06, "loss": 0.1525, "step": 8502 }, { "epoch": 0.6736383442265795, "grad_norm": 1.5299993645370331, "learning_rate": 5.084681810518577e-06, "loss": 0.2097, "step": 8503 }, { "epoch": 0.6737175678352149, "grad_norm": 1.3585993864843418, "learning_rate": 5.0824473189561695e-06, "loss": 0.2968, "step": 8504 }, { "epoch": 0.6737967914438503, "grad_norm": 1.27432862901369, "learning_rate": 5.080213151190938e-06, "loss": 0.2051, "step": 8505 }, { "epoch": 0.6738760150524856, "grad_norm": 1.5630370579833923, "learning_rate": 5.077979307370004e-06, "loss": 0.2414, "step": 8506 }, { "epoch": 0.6739552386611211, "grad_norm": 1.6661789343586069, "learning_rate": 5.075745787640448e-06, "loss": 0.3777, "step": 8507 }, { "epoch": 0.6740344622697564, "grad_norm": 1.3845123026878738, "learning_rate": 5.073512592149334e-06, "loss": 0.249, "step": 8508 }, { "epoch": 0.6741136858783917, "grad_norm": 1.174362597711559, "learning_rate": 5.071279721043716e-06, "loss": 0.2074, "step": 8509 }, { "epoch": 0.6741929094870271, "grad_norm": 1.7705932258648158, "learning_rate": 5.069047174470613e-06, "loss": 0.2935, "step": 8510 }, { "epoch": 0.6742721330956625, "grad_norm": 1.4229093657661485, "learning_rate": 5.066814952577021e-06, "loss": 0.307, "step": 8511 }, { "epoch": 0.6743513567042979, "grad_norm": 1.6769480762142728, "learning_rate": 5.064583055509935e-06, "loss": 0.3272, "step": 8512 }, { "epoch": 0.6744305803129332, "grad_norm": 1.3281396162767891, "learning_rate": 5.062351483416304e-06, "loss": 0.2595, "step": 8513 }, { "epoch": 0.6745098039215687, "grad_norm": 1.201852454644897, "learning_rate": 5.060120236443071e-06, "loss": 0.2006, "step": 8514 }, { "epoch": 0.674589027530204, "grad_norm": 1.2356393449355945, "learning_rate": 5.057889314737148e-06, "loss": 0.2186, "step": 8515 }, { "epoch": 0.6746682511388393, "grad_norm": 1.3155436934862497, "learning_rate": 5.055658718445435e-06, "loss": 0.2524, "step": 8516 }, { "epoch": 0.6747474747474748, "grad_norm": 1.7750986944987535, "learning_rate": 5.053428447714806e-06, "loss": 0.379, "step": 8517 }, { "epoch": 0.6748266983561101, "grad_norm": 1.2559600000096462, "learning_rate": 5.05119850269211e-06, "loss": 0.2072, "step": 8518 }, { "epoch": 0.6749059219647455, "grad_norm": 1.1358965470143332, "learning_rate": 5.048968883524182e-06, "loss": 0.1779, "step": 8519 }, { "epoch": 0.6749851455733809, "grad_norm": 1.1901867645170272, "learning_rate": 5.046739590357832e-06, "loss": 0.2308, "step": 8520 }, { "epoch": 0.6750643691820163, "grad_norm": 1.3546082273199074, "learning_rate": 5.044510623339842e-06, "loss": 0.2334, "step": 8521 }, { "epoch": 0.6751435927906516, "grad_norm": 1.7713516653502106, "learning_rate": 5.042281982616986e-06, "loss": 0.3492, "step": 8522 }, { "epoch": 0.6752228163992869, "grad_norm": 1.2446057612552865, "learning_rate": 5.0400536683360064e-06, "loss": 0.1683, "step": 8523 }, { "epoch": 0.6753020400079224, "grad_norm": 1.3075030122277054, "learning_rate": 5.037825680643624e-06, "loss": 0.1911, "step": 8524 }, { "epoch": 0.6753812636165577, "grad_norm": 1.4412739645827133, "learning_rate": 5.035598019686549e-06, "loss": 0.251, "step": 8525 }, { "epoch": 0.6754604872251931, "grad_norm": 1.3244581510206603, "learning_rate": 5.033370685611456e-06, "loss": 0.2223, "step": 8526 }, { "epoch": 0.6755397108338285, "grad_norm": 1.556723114017837, "learning_rate": 5.031143678565005e-06, "loss": 0.3142, "step": 8527 }, { "epoch": 0.6756189344424639, "grad_norm": 1.4241638309888554, "learning_rate": 5.028916998693831e-06, "loss": 0.2512, "step": 8528 }, { "epoch": 0.6756981580510992, "grad_norm": 1.2360192743296314, "learning_rate": 5.02669064614456e-06, "loss": 0.1853, "step": 8529 }, { "epoch": 0.6757773816597346, "grad_norm": 1.5868442034821189, "learning_rate": 5.024464621063773e-06, "loss": 0.2948, "step": 8530 }, { "epoch": 0.67585660526837, "grad_norm": 1.283333770338844, "learning_rate": 5.022238923598055e-06, "loss": 0.2354, "step": 8531 }, { "epoch": 0.6759358288770053, "grad_norm": 1.2101424870751536, "learning_rate": 5.020013553893952e-06, "loss": 0.2214, "step": 8532 }, { "epoch": 0.6760150524856408, "grad_norm": 1.4364586890311886, "learning_rate": 5.017788512097989e-06, "loss": 0.2765, "step": 8533 }, { "epoch": 0.6760942760942761, "grad_norm": 1.4230165155876449, "learning_rate": 5.015563798356684e-06, "loss": 0.2996, "step": 8534 }, { "epoch": 0.6761734997029115, "grad_norm": 1.2452297731025281, "learning_rate": 5.0133394128165204e-06, "loss": 0.1962, "step": 8535 }, { "epoch": 0.6762527233115468, "grad_norm": 1.3670093631658324, "learning_rate": 5.011115355623957e-06, "loss": 0.2449, "step": 8536 }, { "epoch": 0.6763319469201822, "grad_norm": 1.2923012252307138, "learning_rate": 5.008891626925447e-06, "loss": 0.245, "step": 8537 }, { "epoch": 0.6764111705288176, "grad_norm": 1.1383179692423497, "learning_rate": 5.006668226867407e-06, "loss": 0.1846, "step": 8538 }, { "epoch": 0.6764903941374529, "grad_norm": 1.3503419657400249, "learning_rate": 5.004445155596238e-06, "loss": 0.2412, "step": 8539 }, { "epoch": 0.6765696177460884, "grad_norm": 1.0356937427480393, "learning_rate": 5.0022224132583154e-06, "loss": 0.1732, "step": 8540 }, { "epoch": 0.6766488413547237, "grad_norm": 1.0524578307785075, "learning_rate": 5.000000000000003e-06, "loss": 0.1456, "step": 8541 }, { "epoch": 0.6767280649633591, "grad_norm": 1.2741921087485921, "learning_rate": 4.997777915967631e-06, "loss": 0.2426, "step": 8542 }, { "epoch": 0.6768072885719945, "grad_norm": 1.4206165948309513, "learning_rate": 4.995556161307511e-06, "loss": 0.3036, "step": 8543 }, { "epoch": 0.6768865121806298, "grad_norm": 1.2446505785218223, "learning_rate": 4.993334736165941e-06, "loss": 0.2099, "step": 8544 }, { "epoch": 0.6769657357892652, "grad_norm": 1.346416870547465, "learning_rate": 4.991113640689189e-06, "loss": 0.2301, "step": 8545 }, { "epoch": 0.6770449593979005, "grad_norm": 1.4051032845380176, "learning_rate": 4.988892875023499e-06, "loss": 0.3313, "step": 8546 }, { "epoch": 0.677124183006536, "grad_norm": 1.5692723343691821, "learning_rate": 4.9866724393151044e-06, "loss": 0.2838, "step": 8547 }, { "epoch": 0.6772034066151713, "grad_norm": 1.081993396857605, "learning_rate": 4.984452333710207e-06, "loss": 0.1436, "step": 8548 }, { "epoch": 0.6772826302238067, "grad_norm": 1.3878648851649706, "learning_rate": 4.982232558354986e-06, "loss": 0.2102, "step": 8549 }, { "epoch": 0.6773618538324421, "grad_norm": 1.0320329047141141, "learning_rate": 4.980013113395612e-06, "loss": 0.1597, "step": 8550 }, { "epoch": 0.6774410774410774, "grad_norm": 1.496138733532675, "learning_rate": 4.9777939989782185e-06, "loss": 0.3216, "step": 8551 }, { "epoch": 0.6775203010497128, "grad_norm": 1.4446178105813183, "learning_rate": 4.975575215248926e-06, "loss": 0.2606, "step": 8552 }, { "epoch": 0.6775995246583482, "grad_norm": 1.71587294719814, "learning_rate": 4.9733567623538245e-06, "loss": 0.3823, "step": 8553 }, { "epoch": 0.6776787482669836, "grad_norm": 1.3290831786811594, "learning_rate": 4.9711386404389995e-06, "loss": 0.1807, "step": 8554 }, { "epoch": 0.6777579718756189, "grad_norm": 1.8292531658663334, "learning_rate": 4.968920849650496e-06, "loss": 0.291, "step": 8555 }, { "epoch": 0.6778371954842544, "grad_norm": 1.4476667885143453, "learning_rate": 4.966703390134343e-06, "loss": 0.2544, "step": 8556 }, { "epoch": 0.6779164190928897, "grad_norm": 1.2217352397274268, "learning_rate": 4.964486262036557e-06, "loss": 0.2115, "step": 8557 }, { "epoch": 0.677995642701525, "grad_norm": 1.2727769476316784, "learning_rate": 4.962269465503121e-06, "loss": 0.2694, "step": 8558 }, { "epoch": 0.6780748663101605, "grad_norm": 1.3004398328897144, "learning_rate": 4.960053000679997e-06, "loss": 0.281, "step": 8559 }, { "epoch": 0.6781540899187958, "grad_norm": 1.0766637861645403, "learning_rate": 4.957836867713138e-06, "loss": 0.2118, "step": 8560 }, { "epoch": 0.6782333135274312, "grad_norm": 0.9833972780603426, "learning_rate": 4.955621066748457e-06, "loss": 0.1567, "step": 8561 }, { "epoch": 0.6783125371360665, "grad_norm": 1.272943105731747, "learning_rate": 4.953405597931854e-06, "loss": 0.2609, "step": 8562 }, { "epoch": 0.6783917607447019, "grad_norm": 1.34055412456491, "learning_rate": 4.951190461409214e-06, "loss": 0.3105, "step": 8563 }, { "epoch": 0.6784709843533373, "grad_norm": 1.0597316522501141, "learning_rate": 4.948975657326388e-06, "loss": 0.1565, "step": 8564 }, { "epoch": 0.6785502079619726, "grad_norm": 1.2370891530969725, "learning_rate": 4.946761185829208e-06, "loss": 0.2557, "step": 8565 }, { "epoch": 0.6786294315706081, "grad_norm": 1.251515660269242, "learning_rate": 4.944547047063493e-06, "loss": 0.2265, "step": 8566 }, { "epoch": 0.6787086551792434, "grad_norm": 1.267939094041213, "learning_rate": 4.942333241175029e-06, "loss": 0.2647, "step": 8567 }, { "epoch": 0.6787878787878788, "grad_norm": 1.2703385903366424, "learning_rate": 4.940119768309585e-06, "loss": 0.3006, "step": 8568 }, { "epoch": 0.6788671023965142, "grad_norm": 1.641252186872801, "learning_rate": 4.937906628612905e-06, "loss": 0.2762, "step": 8569 }, { "epoch": 0.6789463260051495, "grad_norm": 1.1852139783363618, "learning_rate": 4.93569382223072e-06, "loss": 0.1936, "step": 8570 }, { "epoch": 0.6790255496137849, "grad_norm": 1.1237934425917269, "learning_rate": 4.933481349308728e-06, "loss": 0.1646, "step": 8571 }, { "epoch": 0.6791047732224202, "grad_norm": 1.2792892159284825, "learning_rate": 4.931269209992607e-06, "loss": 0.1964, "step": 8572 }, { "epoch": 0.6791839968310557, "grad_norm": 1.3535169506301947, "learning_rate": 4.929057404428023e-06, "loss": 0.229, "step": 8573 }, { "epoch": 0.679263220439691, "grad_norm": 1.270914990598429, "learning_rate": 4.926845932760609e-06, "loss": 0.236, "step": 8574 }, { "epoch": 0.6793424440483264, "grad_norm": 1.4601992781719328, "learning_rate": 4.924634795135976e-06, "loss": 0.2838, "step": 8575 }, { "epoch": 0.6794216676569618, "grad_norm": 1.0870324693730369, "learning_rate": 4.922423991699725e-06, "loss": 0.2215, "step": 8576 }, { "epoch": 0.6795008912655971, "grad_norm": 1.2837534706134157, "learning_rate": 4.920213522597422e-06, "loss": 0.1865, "step": 8577 }, { "epoch": 0.6795801148742325, "grad_norm": 1.5147009069352122, "learning_rate": 4.918003387974614e-06, "loss": 0.2342, "step": 8578 }, { "epoch": 0.6796593384828679, "grad_norm": 1.4185544406192958, "learning_rate": 4.915793587976832e-06, "loss": 0.2612, "step": 8579 }, { "epoch": 0.6797385620915033, "grad_norm": 1.230529519506329, "learning_rate": 4.913584122749578e-06, "loss": 0.217, "step": 8580 }, { "epoch": 0.6798177857001386, "grad_norm": 1.2563515486373664, "learning_rate": 4.911374992438334e-06, "loss": 0.2081, "step": 8581 }, { "epoch": 0.6798970093087741, "grad_norm": 1.278051742881388, "learning_rate": 4.909166197188563e-06, "loss": 0.2414, "step": 8582 }, { "epoch": 0.6799762329174094, "grad_norm": 1.5299302325629525, "learning_rate": 4.906957737145703e-06, "loss": 0.2439, "step": 8583 }, { "epoch": 0.6800554565260447, "grad_norm": 1.4409084299545565, "learning_rate": 4.904749612455171e-06, "loss": 0.2634, "step": 8584 }, { "epoch": 0.6801346801346801, "grad_norm": 1.388945805481724, "learning_rate": 4.902541823262356e-06, "loss": 0.2138, "step": 8585 }, { "epoch": 0.6802139037433155, "grad_norm": 1.4827560577832648, "learning_rate": 4.900334369712637e-06, "loss": 0.2761, "step": 8586 }, { "epoch": 0.6802931273519509, "grad_norm": 1.4864774984070275, "learning_rate": 4.898127251951363e-06, "loss": 0.2993, "step": 8587 }, { "epoch": 0.6803723509605862, "grad_norm": 1.3524092702526227, "learning_rate": 4.895920470123857e-06, "loss": 0.2232, "step": 8588 }, { "epoch": 0.6804515745692217, "grad_norm": 1.4298295528186709, "learning_rate": 4.893714024375432e-06, "loss": 0.2615, "step": 8589 }, { "epoch": 0.680530798177857, "grad_norm": 1.3163579260613203, "learning_rate": 4.89150791485137e-06, "loss": 0.2048, "step": 8590 }, { "epoch": 0.6806100217864923, "grad_norm": 1.5571368110245922, "learning_rate": 4.889302141696925e-06, "loss": 0.2324, "step": 8591 }, { "epoch": 0.6806892453951278, "grad_norm": 1.1372708376378766, "learning_rate": 4.88709670505735e-06, "loss": 0.2016, "step": 8592 }, { "epoch": 0.6807684690037631, "grad_norm": 1.3949320182730078, "learning_rate": 4.884891605077853e-06, "loss": 0.2366, "step": 8593 }, { "epoch": 0.6808476926123985, "grad_norm": 1.4697455827538954, "learning_rate": 4.882686841903627e-06, "loss": 0.2734, "step": 8594 }, { "epoch": 0.6809269162210339, "grad_norm": 1.1974708909381966, "learning_rate": 4.8804824156798544e-06, "loss": 0.2149, "step": 8595 }, { "epoch": 0.6810061398296693, "grad_norm": 1.334623519574587, "learning_rate": 4.878278326551682e-06, "loss": 0.2597, "step": 8596 }, { "epoch": 0.6810853634383046, "grad_norm": 1.041702276895194, "learning_rate": 4.876074574664232e-06, "loss": 0.1712, "step": 8597 }, { "epoch": 0.6811645870469399, "grad_norm": 1.2225275755342317, "learning_rate": 4.873871160162622e-06, "loss": 0.1966, "step": 8598 }, { "epoch": 0.6812438106555754, "grad_norm": 1.32063055974464, "learning_rate": 4.871668083191931e-06, "loss": 0.2505, "step": 8599 }, { "epoch": 0.6813230342642107, "grad_norm": 1.5434533961499866, "learning_rate": 4.8694653438972195e-06, "loss": 0.2377, "step": 8600 }, { "epoch": 0.6814022578728461, "grad_norm": 1.259061429644916, "learning_rate": 4.867262942423525e-06, "loss": 0.2205, "step": 8601 }, { "epoch": 0.6814814814814815, "grad_norm": 1.3001787816459887, "learning_rate": 4.865060878915873e-06, "loss": 0.2244, "step": 8602 }, { "epoch": 0.6815607050901169, "grad_norm": 1.5300656722630785, "learning_rate": 4.862859153519252e-06, "loss": 0.3558, "step": 8603 }, { "epoch": 0.6816399286987522, "grad_norm": 1.3489123113385095, "learning_rate": 4.860657766378637e-06, "loss": 0.2644, "step": 8604 }, { "epoch": 0.6817191523073876, "grad_norm": 1.1164682734736109, "learning_rate": 4.858456717638981e-06, "loss": 0.1658, "step": 8605 }, { "epoch": 0.681798375916023, "grad_norm": 1.1368699256579105, "learning_rate": 4.856256007445211e-06, "loss": 0.1782, "step": 8606 }, { "epoch": 0.6818775995246583, "grad_norm": 1.117845241395544, "learning_rate": 4.8540556359422335e-06, "loss": 0.2178, "step": 8607 }, { "epoch": 0.6819568231332938, "grad_norm": 0.9665361599182319, "learning_rate": 4.85185560327493e-06, "loss": 0.1887, "step": 8608 }, { "epoch": 0.6820360467419291, "grad_norm": 1.117663236029897, "learning_rate": 4.849655909588165e-06, "loss": 0.2507, "step": 8609 }, { "epoch": 0.6821152703505645, "grad_norm": 1.6921984046164105, "learning_rate": 4.847456555026773e-06, "loss": 0.3705, "step": 8610 }, { "epoch": 0.6821944939591998, "grad_norm": 1.1210723898425137, "learning_rate": 4.845257539735577e-06, "loss": 0.1664, "step": 8611 }, { "epoch": 0.6822737175678352, "grad_norm": 1.415947074903872, "learning_rate": 4.843058863859369e-06, "loss": 0.2803, "step": 8612 }, { "epoch": 0.6823529411764706, "grad_norm": 1.24483140501919, "learning_rate": 4.840860527542919e-06, "loss": 0.205, "step": 8613 }, { "epoch": 0.6824321647851059, "grad_norm": 1.2923913122358617, "learning_rate": 4.838662530930981e-06, "loss": 0.1862, "step": 8614 }, { "epoch": 0.6825113883937414, "grad_norm": 1.3062788589390226, "learning_rate": 4.836464874168282e-06, "loss": 0.2518, "step": 8615 }, { "epoch": 0.6825906120023767, "grad_norm": 1.347790117500209, "learning_rate": 4.834267557399521e-06, "loss": 0.2, "step": 8616 }, { "epoch": 0.6826698356110121, "grad_norm": 1.2176999549205083, "learning_rate": 4.832070580769389e-06, "loss": 0.234, "step": 8617 }, { "epoch": 0.6827490592196475, "grad_norm": 1.3712042554632222, "learning_rate": 4.829873944422544e-06, "loss": 0.2095, "step": 8618 }, { "epoch": 0.6828282828282828, "grad_norm": 1.5183410923960414, "learning_rate": 4.8276776485036185e-06, "loss": 0.2868, "step": 8619 }, { "epoch": 0.6829075064369182, "grad_norm": 1.4883322989058514, "learning_rate": 4.825481693157235e-06, "loss": 0.2597, "step": 8620 }, { "epoch": 0.6829867300455535, "grad_norm": 1.145485347967497, "learning_rate": 4.823286078527984e-06, "loss": 0.1854, "step": 8621 }, { "epoch": 0.683065953654189, "grad_norm": 1.6704907213039475, "learning_rate": 4.8210908047604336e-06, "loss": 0.2967, "step": 8622 }, { "epoch": 0.6831451772628243, "grad_norm": 1.3095787412204616, "learning_rate": 4.818895871999136e-06, "loss": 0.2335, "step": 8623 }, { "epoch": 0.6832244008714597, "grad_norm": 1.1791792651962514, "learning_rate": 4.816701280388617e-06, "loss": 0.1793, "step": 8624 }, { "epoch": 0.6833036244800951, "grad_norm": 1.3638562304683728, "learning_rate": 4.814507030073377e-06, "loss": 0.2062, "step": 8625 }, { "epoch": 0.6833828480887304, "grad_norm": 1.6530399676521763, "learning_rate": 4.812313121197896e-06, "loss": 0.2895, "step": 8626 }, { "epoch": 0.6834620716973658, "grad_norm": 1.7617160150098754, "learning_rate": 4.810119553906637e-06, "loss": 0.3051, "step": 8627 }, { "epoch": 0.6835412953060012, "grad_norm": 1.2416989542473023, "learning_rate": 4.807926328344033e-06, "loss": 0.1912, "step": 8628 }, { "epoch": 0.6836205189146366, "grad_norm": 1.323281728304749, "learning_rate": 4.805733444654496e-06, "loss": 0.2318, "step": 8629 }, { "epoch": 0.6836997425232719, "grad_norm": 1.1683418309452485, "learning_rate": 4.8035409029824195e-06, "loss": 0.218, "step": 8630 }, { "epoch": 0.6837789661319074, "grad_norm": 1.3856863044127457, "learning_rate": 4.801348703472173e-06, "loss": 0.2756, "step": 8631 }, { "epoch": 0.6838581897405427, "grad_norm": 1.9462143901227849, "learning_rate": 4.7991568462680945e-06, "loss": 0.2219, "step": 8632 }, { "epoch": 0.683937413349178, "grad_norm": 1.1454987336134659, "learning_rate": 4.796965331514517e-06, "loss": 0.157, "step": 8633 }, { "epoch": 0.6840166369578135, "grad_norm": 1.1352715343075432, "learning_rate": 4.794774159355737e-06, "loss": 0.1711, "step": 8634 }, { "epoch": 0.6840958605664488, "grad_norm": 1.2226293572108666, "learning_rate": 4.79258332993603e-06, "loss": 0.2928, "step": 8635 }, { "epoch": 0.6841750841750842, "grad_norm": 1.2016806855619928, "learning_rate": 4.7903928433996576e-06, "loss": 0.1816, "step": 8636 }, { "epoch": 0.6842543077837195, "grad_norm": 1.1763682111881113, "learning_rate": 4.788202699890848e-06, "loss": 0.2076, "step": 8637 }, { "epoch": 0.684333531392355, "grad_norm": 1.7213378312112722, "learning_rate": 4.786012899553815e-06, "loss": 0.3442, "step": 8638 }, { "epoch": 0.6844127550009903, "grad_norm": 1.2935252698058537, "learning_rate": 4.783823442532739e-06, "loss": 0.2891, "step": 8639 }, { "epoch": 0.6844919786096256, "grad_norm": 1.2061771396523617, "learning_rate": 4.781634328971796e-06, "loss": 0.2254, "step": 8640 }, { "epoch": 0.6845712022182611, "grad_norm": 1.3499445483141876, "learning_rate": 4.779445559015122e-06, "loss": 0.2311, "step": 8641 }, { "epoch": 0.6846504258268964, "grad_norm": 1.5390461832226396, "learning_rate": 4.777257132806835e-06, "loss": 0.262, "step": 8642 }, { "epoch": 0.6847296494355318, "grad_norm": 1.154896922438916, "learning_rate": 4.775069050491039e-06, "loss": 0.1816, "step": 8643 }, { "epoch": 0.6848088730441672, "grad_norm": 1.3634878824321564, "learning_rate": 4.772881312211805e-06, "loss": 0.2733, "step": 8644 }, { "epoch": 0.6848880966528025, "grad_norm": 1.3856473474547926, "learning_rate": 4.770693918113183e-06, "loss": 0.2377, "step": 8645 }, { "epoch": 0.6849673202614379, "grad_norm": 1.128114675175529, "learning_rate": 4.768506868339206e-06, "loss": 0.1942, "step": 8646 }, { "epoch": 0.6850465438700732, "grad_norm": 1.3358746521230798, "learning_rate": 4.766320163033882e-06, "loss": 0.2585, "step": 8647 }, { "epoch": 0.6851257674787087, "grad_norm": 1.2952707079162062, "learning_rate": 4.764133802341188e-06, "loss": 0.2014, "step": 8648 }, { "epoch": 0.685204991087344, "grad_norm": 1.758093567423146, "learning_rate": 4.761947786405092e-06, "loss": 0.3067, "step": 8649 }, { "epoch": 0.6852842146959794, "grad_norm": 1.2860424786279239, "learning_rate": 4.759762115369531e-06, "loss": 0.2163, "step": 8650 }, { "epoch": 0.6853634383046148, "grad_norm": 1.5019812376462904, "learning_rate": 4.7575767893784174e-06, "loss": 0.2491, "step": 8651 }, { "epoch": 0.6854426619132501, "grad_norm": 1.2064717851010112, "learning_rate": 4.755391808575651e-06, "loss": 0.1965, "step": 8652 }, { "epoch": 0.6855218855218855, "grad_norm": 1.727082139943516, "learning_rate": 4.7532071731050975e-06, "loss": 0.343, "step": 8653 }, { "epoch": 0.6856011091305209, "grad_norm": 1.0309992307517994, "learning_rate": 4.7510228831106064e-06, "loss": 0.1793, "step": 8654 }, { "epoch": 0.6856803327391563, "grad_norm": 1.3969742025231717, "learning_rate": 4.748838938735999e-06, "loss": 0.2725, "step": 8655 }, { "epoch": 0.6857595563477916, "grad_norm": 1.190236086142712, "learning_rate": 4.746655340125082e-06, "loss": 0.2112, "step": 8656 }, { "epoch": 0.6858387799564271, "grad_norm": 1.1543977751939545, "learning_rate": 4.744472087421635e-06, "loss": 0.1994, "step": 8657 }, { "epoch": 0.6859180035650624, "grad_norm": 1.1914671245124866, "learning_rate": 4.74228918076941e-06, "loss": 0.1743, "step": 8658 }, { "epoch": 0.6859972271736977, "grad_norm": 1.3011104136012606, "learning_rate": 4.740106620312147e-06, "loss": 0.2404, "step": 8659 }, { "epoch": 0.6860764507823331, "grad_norm": 1.2144773451360018, "learning_rate": 4.737924406193554e-06, "loss": 0.2079, "step": 8660 }, { "epoch": 0.6861556743909685, "grad_norm": 1.1849785985586108, "learning_rate": 4.735742538557316e-06, "loss": 0.2054, "step": 8661 }, { "epoch": 0.6862348979996039, "grad_norm": 1.3869187978249207, "learning_rate": 4.733561017547104e-06, "loss": 0.281, "step": 8662 }, { "epoch": 0.6863141216082392, "grad_norm": 1.2918667619297424, "learning_rate": 4.73137984330656e-06, "loss": 0.2742, "step": 8663 }, { "epoch": 0.6863933452168747, "grad_norm": 1.0196352551807941, "learning_rate": 4.729199015979298e-06, "loss": 0.1816, "step": 8664 }, { "epoch": 0.68647256882551, "grad_norm": 1.2178115970693308, "learning_rate": 4.727018535708922e-06, "loss": 0.172, "step": 8665 }, { "epoch": 0.6865517924341453, "grad_norm": 1.5066039014437018, "learning_rate": 4.724838402639006e-06, "loss": 0.3132, "step": 8666 }, { "epoch": 0.6866310160427808, "grad_norm": 1.1456257437859685, "learning_rate": 4.7226586169130925e-06, "loss": 0.2016, "step": 8667 }, { "epoch": 0.6867102396514161, "grad_norm": 1.2602859007210447, "learning_rate": 4.7204791786747215e-06, "loss": 0.2405, "step": 8668 }, { "epoch": 0.6867894632600515, "grad_norm": 1.4354888054579482, "learning_rate": 4.718300088067392e-06, "loss": 0.2455, "step": 8669 }, { "epoch": 0.6868686868686869, "grad_norm": 1.3860828594033565, "learning_rate": 4.716121345234589e-06, "loss": 0.2227, "step": 8670 }, { "epoch": 0.6869479104773223, "grad_norm": 1.7746613521586, "learning_rate": 4.713942950319767e-06, "loss": 0.2306, "step": 8671 }, { "epoch": 0.6870271340859576, "grad_norm": 1.3516699810515453, "learning_rate": 4.71176490346637e-06, "loss": 0.2236, "step": 8672 }, { "epoch": 0.6871063576945929, "grad_norm": 1.195409475282975, "learning_rate": 4.709587204817809e-06, "loss": 0.146, "step": 8673 }, { "epoch": 0.6871855813032284, "grad_norm": 1.221400101328681, "learning_rate": 4.707409854517471e-06, "loss": 0.2292, "step": 8674 }, { "epoch": 0.6872648049118637, "grad_norm": 1.65413198229928, "learning_rate": 4.705232852708732e-06, "loss": 0.3104, "step": 8675 }, { "epoch": 0.6873440285204991, "grad_norm": 1.5063706668556864, "learning_rate": 4.703056199534933e-06, "loss": 0.2406, "step": 8676 }, { "epoch": 0.6874232521291345, "grad_norm": 1.1346410709801327, "learning_rate": 4.700879895139391e-06, "loss": 0.181, "step": 8677 }, { "epoch": 0.6875024757377699, "grad_norm": 1.3318105281097143, "learning_rate": 4.698703939665414e-06, "loss": 0.2183, "step": 8678 }, { "epoch": 0.6875816993464052, "grad_norm": 1.4999602492663062, "learning_rate": 4.696528333256275e-06, "loss": 0.2284, "step": 8679 }, { "epoch": 0.6876609229550406, "grad_norm": 1.358231022935779, "learning_rate": 4.694353076055222e-06, "loss": 0.2322, "step": 8680 }, { "epoch": 0.687740146563676, "grad_norm": 1.039892398264782, "learning_rate": 4.6921781682054954e-06, "loss": 0.1552, "step": 8681 }, { "epoch": 0.6878193701723113, "grad_norm": 1.208743790314156, "learning_rate": 4.6900036098502956e-06, "loss": 0.19, "step": 8682 }, { "epoch": 0.6878985937809468, "grad_norm": 1.3307110870309475, "learning_rate": 4.687829401132804e-06, "loss": 0.2304, "step": 8683 }, { "epoch": 0.6879778173895821, "grad_norm": 1.0195743451844448, "learning_rate": 4.685655542196194e-06, "loss": 0.1875, "step": 8684 }, { "epoch": 0.6880570409982175, "grad_norm": 1.2583843440268834, "learning_rate": 4.6834820331835915e-06, "loss": 0.2802, "step": 8685 }, { "epoch": 0.6881362646068528, "grad_norm": 0.84717147414347, "learning_rate": 4.681308874238112e-06, "loss": 0.0957, "step": 8686 }, { "epoch": 0.6882154882154882, "grad_norm": 1.5709025061132116, "learning_rate": 4.679136065502855e-06, "loss": 0.281, "step": 8687 }, { "epoch": 0.6882947118241236, "grad_norm": 1.3563168476709715, "learning_rate": 4.676963607120886e-06, "loss": 0.2647, "step": 8688 }, { "epoch": 0.6883739354327589, "grad_norm": 1.4025748676997238, "learning_rate": 4.674791499235246e-06, "loss": 0.3222, "step": 8689 }, { "epoch": 0.6884531590413944, "grad_norm": 1.27085195451568, "learning_rate": 4.672619741988966e-06, "loss": 0.2055, "step": 8690 }, { "epoch": 0.6885323826500297, "grad_norm": 1.1802933538323492, "learning_rate": 4.670448335525043e-06, "loss": 0.2221, "step": 8691 }, { "epoch": 0.6886116062586651, "grad_norm": 1.0575423849188226, "learning_rate": 4.66827727998645e-06, "loss": 0.1819, "step": 8692 }, { "epoch": 0.6886908298673005, "grad_norm": 1.4898335149535489, "learning_rate": 4.666106575516146e-06, "loss": 0.3458, "step": 8693 }, { "epoch": 0.6887700534759358, "grad_norm": 1.187573317501121, "learning_rate": 4.663936222257059e-06, "loss": 0.1664, "step": 8694 }, { "epoch": 0.6888492770845712, "grad_norm": 1.7699323738081327, "learning_rate": 4.661766220352098e-06, "loss": 0.2722, "step": 8695 }, { "epoch": 0.6889285006932065, "grad_norm": 1.285605046750008, "learning_rate": 4.659596569944139e-06, "loss": 0.2059, "step": 8696 }, { "epoch": 0.689007724301842, "grad_norm": 1.3943419936684098, "learning_rate": 4.657427271176055e-06, "loss": 0.2118, "step": 8697 }, { "epoch": 0.6890869479104773, "grad_norm": 1.7547872137325948, "learning_rate": 4.655258324190678e-06, "loss": 0.4043, "step": 8698 }, { "epoch": 0.6891661715191127, "grad_norm": 1.4085062101251735, "learning_rate": 4.65308972913082e-06, "loss": 0.3605, "step": 8699 }, { "epoch": 0.6892453951277481, "grad_norm": 1.2008387531312104, "learning_rate": 4.6509214861392785e-06, "loss": 0.2962, "step": 8700 }, { "epoch": 0.6893246187363834, "grad_norm": 1.2940143376387314, "learning_rate": 4.648753595358818e-06, "loss": 0.2615, "step": 8701 }, { "epoch": 0.6894038423450188, "grad_norm": 1.2277870085967522, "learning_rate": 4.646586056932183e-06, "loss": 0.2294, "step": 8702 }, { "epoch": 0.6894830659536542, "grad_norm": 1.3006327385280338, "learning_rate": 4.6444188710021e-06, "loss": 0.2783, "step": 8703 }, { "epoch": 0.6895622895622896, "grad_norm": 1.3089766592434473, "learning_rate": 4.6422520377112646e-06, "loss": 0.2622, "step": 8704 }, { "epoch": 0.6896415131709249, "grad_norm": 1.1188427806612686, "learning_rate": 4.640085557202349e-06, "loss": 0.1849, "step": 8705 }, { "epoch": 0.6897207367795604, "grad_norm": 1.337276875350201, "learning_rate": 4.637919429618014e-06, "loss": 0.2179, "step": 8706 }, { "epoch": 0.6897999603881957, "grad_norm": 0.944755643224885, "learning_rate": 4.635753655100883e-06, "loss": 0.1451, "step": 8707 }, { "epoch": 0.689879183996831, "grad_norm": 1.2254631560513103, "learning_rate": 4.633588233793559e-06, "loss": 0.2135, "step": 8708 }, { "epoch": 0.6899584076054665, "grad_norm": 0.9947477879968354, "learning_rate": 4.631423165838632e-06, "loss": 0.1134, "step": 8709 }, { "epoch": 0.6900376312141018, "grad_norm": 1.509402594511407, "learning_rate": 4.629258451378658e-06, "loss": 0.2853, "step": 8710 }, { "epoch": 0.6901168548227372, "grad_norm": 1.6917343469637296, "learning_rate": 4.6270940905561725e-06, "loss": 0.2887, "step": 8711 }, { "epoch": 0.6901960784313725, "grad_norm": 1.0878979515818141, "learning_rate": 4.624930083513684e-06, "loss": 0.1687, "step": 8712 }, { "epoch": 0.690275302040008, "grad_norm": 1.262003386822182, "learning_rate": 4.62276643039369e-06, "loss": 0.2617, "step": 8713 }, { "epoch": 0.6903545256486433, "grad_norm": 1.3320596668585638, "learning_rate": 4.620603131338655e-06, "loss": 0.2516, "step": 8714 }, { "epoch": 0.6904337492572786, "grad_norm": 1.483981182453267, "learning_rate": 4.6184401864910136e-06, "loss": 0.2622, "step": 8715 }, { "epoch": 0.6905129728659141, "grad_norm": 1.3631144009995455, "learning_rate": 4.616277595993196e-06, "loss": 0.2521, "step": 8716 }, { "epoch": 0.6905921964745494, "grad_norm": 1.4478497043496281, "learning_rate": 4.614115359987595e-06, "loss": 0.209, "step": 8717 }, { "epoch": 0.6906714200831848, "grad_norm": 1.2430777705725242, "learning_rate": 4.6119534786165765e-06, "loss": 0.192, "step": 8718 }, { "epoch": 0.6907506436918202, "grad_norm": 1.384934937383765, "learning_rate": 4.609791952022501e-06, "loss": 0.2355, "step": 8719 }, { "epoch": 0.6908298673004556, "grad_norm": 1.7022029348205618, "learning_rate": 4.607630780347689e-06, "loss": 0.3012, "step": 8720 }, { "epoch": 0.6909090909090909, "grad_norm": 1.3707122641620741, "learning_rate": 4.60546996373444e-06, "loss": 0.2147, "step": 8721 }, { "epoch": 0.6909883145177262, "grad_norm": 1.4744656815451067, "learning_rate": 4.603309502325041e-06, "loss": 0.2657, "step": 8722 }, { "epoch": 0.6910675381263617, "grad_norm": 1.6494406511377315, "learning_rate": 4.601149396261744e-06, "loss": 0.2953, "step": 8723 }, { "epoch": 0.691146761734997, "grad_norm": 1.1806655749821362, "learning_rate": 4.598989645686782e-06, "loss": 0.1276, "step": 8724 }, { "epoch": 0.6912259853436324, "grad_norm": 1.7525038805497757, "learning_rate": 4.596830250742359e-06, "loss": 0.2903, "step": 8725 }, { "epoch": 0.6913052089522678, "grad_norm": 1.0571604790375972, "learning_rate": 4.594671211570671e-06, "loss": 0.1928, "step": 8726 }, { "epoch": 0.6913844325609031, "grad_norm": 1.3804828341951167, "learning_rate": 4.592512528313874e-06, "loss": 0.2436, "step": 8727 }, { "epoch": 0.6914636561695385, "grad_norm": 1.6509485856087844, "learning_rate": 4.590354201114103e-06, "loss": 0.3109, "step": 8728 }, { "epoch": 0.6915428797781739, "grad_norm": 1.6471259632083965, "learning_rate": 4.588196230113483e-06, "loss": 0.2787, "step": 8729 }, { "epoch": 0.6916221033868093, "grad_norm": 1.3425949322709636, "learning_rate": 4.586038615454102e-06, "loss": 0.2169, "step": 8730 }, { "epoch": 0.6917013269954446, "grad_norm": 1.2102864100087452, "learning_rate": 4.583881357278023e-06, "loss": 0.2457, "step": 8731 }, { "epoch": 0.6917805506040801, "grad_norm": 1.4576335760363308, "learning_rate": 4.5817244557273e-06, "loss": 0.2565, "step": 8732 }, { "epoch": 0.6918597742127154, "grad_norm": 1.0434994412803442, "learning_rate": 4.5795679109439505e-06, "loss": 0.2039, "step": 8733 }, { "epoch": 0.6919389978213507, "grad_norm": 1.420031312849618, "learning_rate": 4.57741172306997e-06, "loss": 0.3024, "step": 8734 }, { "epoch": 0.6920182214299861, "grad_norm": 1.566416117201726, "learning_rate": 4.5752558922473376e-06, "loss": 0.3626, "step": 8735 }, { "epoch": 0.6920974450386215, "grad_norm": 1.4186331093927886, "learning_rate": 4.573100418618004e-06, "loss": 0.2393, "step": 8736 }, { "epoch": 0.6921766686472569, "grad_norm": 1.52562327832964, "learning_rate": 4.57094530232389e-06, "loss": 0.3389, "step": 8737 }, { "epoch": 0.6922558922558922, "grad_norm": 1.0635909690795307, "learning_rate": 4.5687905435069106e-06, "loss": 0.193, "step": 8738 }, { "epoch": 0.6923351158645277, "grad_norm": 1.348352342256825, "learning_rate": 4.566636142308939e-06, "loss": 0.1924, "step": 8739 }, { "epoch": 0.692414339473163, "grad_norm": 1.994858954210715, "learning_rate": 4.564482098871834e-06, "loss": 0.2743, "step": 8740 }, { "epoch": 0.6924935630817983, "grad_norm": 1.5937476550343863, "learning_rate": 4.562328413337426e-06, "loss": 0.1965, "step": 8741 }, { "epoch": 0.6925727866904338, "grad_norm": 1.1209159642424666, "learning_rate": 4.56017508584753e-06, "loss": 0.1776, "step": 8742 }, { "epoch": 0.6926520102990691, "grad_norm": 1.4812936938993486, "learning_rate": 4.558022116543931e-06, "loss": 0.215, "step": 8743 }, { "epoch": 0.6927312339077045, "grad_norm": 1.2060702534011785, "learning_rate": 4.555869505568386e-06, "loss": 0.2142, "step": 8744 }, { "epoch": 0.6928104575163399, "grad_norm": 1.365700120754585, "learning_rate": 4.553717253062643e-06, "loss": 0.2796, "step": 8745 }, { "epoch": 0.6928896811249753, "grad_norm": 1.2835470597011995, "learning_rate": 4.551565359168411e-06, "loss": 0.251, "step": 8746 }, { "epoch": 0.6929689047336106, "grad_norm": 1.155391005013538, "learning_rate": 4.549413824027382e-06, "loss": 0.2516, "step": 8747 }, { "epoch": 0.6930481283422459, "grad_norm": 1.7505118271213098, "learning_rate": 4.54726264778123e-06, "loss": 0.2689, "step": 8748 }, { "epoch": 0.6931273519508814, "grad_norm": 1.526987493710666, "learning_rate": 4.5451118305715954e-06, "loss": 0.2771, "step": 8749 }, { "epoch": 0.6932065755595167, "grad_norm": 1.3722794258858013, "learning_rate": 4.542961372540096e-06, "loss": 0.223, "step": 8750 }, { "epoch": 0.6932857991681521, "grad_norm": 1.504889370782872, "learning_rate": 4.540811273828336e-06, "loss": 0.247, "step": 8751 }, { "epoch": 0.6933650227767875, "grad_norm": 1.2212461020321752, "learning_rate": 4.538661534577886e-06, "loss": 0.2314, "step": 8752 }, { "epoch": 0.6934442463854229, "grad_norm": 1.3492761550397696, "learning_rate": 4.5365121549302916e-06, "loss": 0.2101, "step": 8753 }, { "epoch": 0.6935234699940582, "grad_norm": 1.257409896677816, "learning_rate": 4.534363135027086e-06, "loss": 0.1682, "step": 8754 }, { "epoch": 0.6936026936026936, "grad_norm": 1.304270526398794, "learning_rate": 4.532214475009771e-06, "loss": 0.2333, "step": 8755 }, { "epoch": 0.693681917211329, "grad_norm": 1.647257818436006, "learning_rate": 4.530066175019823e-06, "loss": 0.2754, "step": 8756 }, { "epoch": 0.6937611408199643, "grad_norm": 1.1709194583314289, "learning_rate": 4.527918235198692e-06, "loss": 0.2048, "step": 8757 }, { "epoch": 0.6938403644285998, "grad_norm": 1.3617125210134675, "learning_rate": 4.525770655687821e-06, "loss": 0.2177, "step": 8758 }, { "epoch": 0.6939195880372351, "grad_norm": 1.5215139691324813, "learning_rate": 4.523623436628611e-06, "loss": 0.253, "step": 8759 }, { "epoch": 0.6939988116458705, "grad_norm": 1.2925865333906732, "learning_rate": 4.521476578162445e-06, "loss": 0.2122, "step": 8760 }, { "epoch": 0.6940780352545058, "grad_norm": 1.3929779419343227, "learning_rate": 4.519330080430687e-06, "loss": 0.2246, "step": 8761 }, { "epoch": 0.6941572588631412, "grad_norm": 1.3636242206882143, "learning_rate": 4.517183943574673e-06, "loss": 0.172, "step": 8762 }, { "epoch": 0.6942364824717766, "grad_norm": 1.3772760187609399, "learning_rate": 4.515038167735715e-06, "loss": 0.302, "step": 8763 }, { "epoch": 0.6943157060804119, "grad_norm": 1.4778475696989446, "learning_rate": 4.5128927530551e-06, "loss": 0.231, "step": 8764 }, { "epoch": 0.6943949296890474, "grad_norm": 0.8099686810018009, "learning_rate": 4.510747699674096e-06, "loss": 0.0897, "step": 8765 }, { "epoch": 0.6944741532976827, "grad_norm": 1.4415160462821115, "learning_rate": 4.50860300773394e-06, "loss": 0.2337, "step": 8766 }, { "epoch": 0.6945533769063181, "grad_norm": 1.5181661334498546, "learning_rate": 4.506458677375856e-06, "loss": 0.3006, "step": 8767 }, { "epoch": 0.6946326005149535, "grad_norm": 1.3778953504845226, "learning_rate": 4.504314708741037e-06, "loss": 0.2874, "step": 8768 }, { "epoch": 0.6947118241235888, "grad_norm": 1.2925908182928736, "learning_rate": 4.502171101970645e-06, "loss": 0.1839, "step": 8769 }, { "epoch": 0.6947910477322242, "grad_norm": 1.2884098423161836, "learning_rate": 4.5000278572058365e-06, "loss": 0.1839, "step": 8770 }, { "epoch": 0.6948702713408595, "grad_norm": 1.2785623006265123, "learning_rate": 4.497884974587729e-06, "loss": 0.2067, "step": 8771 }, { "epoch": 0.694949494949495, "grad_norm": 1.4554851056660039, "learning_rate": 4.495742454257418e-06, "loss": 0.238, "step": 8772 }, { "epoch": 0.6950287185581303, "grad_norm": 1.447509004133046, "learning_rate": 4.493600296355986e-06, "loss": 0.2989, "step": 8773 }, { "epoch": 0.6951079421667657, "grad_norm": 1.3943162340471837, "learning_rate": 4.491458501024479e-06, "loss": 0.2883, "step": 8774 }, { "epoch": 0.6951871657754011, "grad_norm": 1.2076899978167548, "learning_rate": 4.489317068403919e-06, "loss": 0.187, "step": 8775 }, { "epoch": 0.6952663893840364, "grad_norm": 1.4105980297316858, "learning_rate": 4.487175998635319e-06, "loss": 0.2658, "step": 8776 }, { "epoch": 0.6953456129926718, "grad_norm": 1.416529640210077, "learning_rate": 4.485035291859654e-06, "loss": 0.2537, "step": 8777 }, { "epoch": 0.6954248366013072, "grad_norm": 1.1543317782707463, "learning_rate": 4.482894948217875e-06, "loss": 0.1839, "step": 8778 }, { "epoch": 0.6955040602099426, "grad_norm": 1.0478537328893316, "learning_rate": 4.48075496785092e-06, "loss": 0.1744, "step": 8779 }, { "epoch": 0.6955832838185779, "grad_norm": 1.642981889691146, "learning_rate": 4.4786153508996944e-06, "loss": 0.2143, "step": 8780 }, { "epoch": 0.6956625074272134, "grad_norm": 1.4862071057729505, "learning_rate": 4.47647609750508e-06, "loss": 0.2416, "step": 8781 }, { "epoch": 0.6957417310358487, "grad_norm": 1.2940984396724406, "learning_rate": 4.4743372078079335e-06, "loss": 0.2544, "step": 8782 }, { "epoch": 0.695820954644484, "grad_norm": 1.4054540989895064, "learning_rate": 4.472198681949098e-06, "loss": 0.2561, "step": 8783 }, { "epoch": 0.6959001782531195, "grad_norm": 1.5158475747576985, "learning_rate": 4.470060520069381e-06, "loss": 0.2757, "step": 8784 }, { "epoch": 0.6959794018617548, "grad_norm": 1.2560370940733032, "learning_rate": 4.467922722309567e-06, "loss": 0.2924, "step": 8785 }, { "epoch": 0.6960586254703902, "grad_norm": 1.4106584520775325, "learning_rate": 4.465785288810427e-06, "loss": 0.2588, "step": 8786 }, { "epoch": 0.6961378490790255, "grad_norm": 1.4586344429334912, "learning_rate": 4.4636482197126965e-06, "loss": 0.2467, "step": 8787 }, { "epoch": 0.696217072687661, "grad_norm": 1.1379517706083984, "learning_rate": 4.461511515157087e-06, "loss": 0.1914, "step": 8788 }, { "epoch": 0.6962962962962963, "grad_norm": 1.385806669472669, "learning_rate": 4.459375175284299e-06, "loss": 0.2247, "step": 8789 }, { "epoch": 0.6963755199049316, "grad_norm": 1.4387901667417589, "learning_rate": 4.457239200234996e-06, "loss": 0.2696, "step": 8790 }, { "epoch": 0.6964547435135671, "grad_norm": 1.0836588377750644, "learning_rate": 4.4551035901498186e-06, "loss": 0.2048, "step": 8791 }, { "epoch": 0.6965339671222024, "grad_norm": 1.270436749011163, "learning_rate": 4.4529683451693916e-06, "loss": 0.2197, "step": 8792 }, { "epoch": 0.6966131907308378, "grad_norm": 1.5154897511294212, "learning_rate": 4.45083346543431e-06, "loss": 0.2377, "step": 8793 }, { "epoch": 0.6966924143394732, "grad_norm": 1.3066526828623988, "learning_rate": 4.448698951085143e-06, "loss": 0.2356, "step": 8794 }, { "epoch": 0.6967716379481086, "grad_norm": 1.2470028333404923, "learning_rate": 4.446564802262435e-06, "loss": 0.1113, "step": 8795 }, { "epoch": 0.6968508615567439, "grad_norm": 1.386025383460799, "learning_rate": 4.444431019106718e-06, "loss": 0.1977, "step": 8796 }, { "epoch": 0.6969300851653792, "grad_norm": 1.5959343072064138, "learning_rate": 4.4422976017584866e-06, "loss": 0.3356, "step": 8797 }, { "epoch": 0.6970093087740147, "grad_norm": 1.2099062541547179, "learning_rate": 4.440164550358212e-06, "loss": 0.2219, "step": 8798 }, { "epoch": 0.69708853238265, "grad_norm": 1.243105102253641, "learning_rate": 4.438031865046353e-06, "loss": 0.1937, "step": 8799 }, { "epoch": 0.6971677559912854, "grad_norm": 1.3659841787956428, "learning_rate": 4.435899545963333e-06, "loss": 0.2334, "step": 8800 }, { "epoch": 0.6972469795999208, "grad_norm": 1.133632684945457, "learning_rate": 4.4337675932495515e-06, "loss": 0.167, "step": 8801 }, { "epoch": 0.6973262032085561, "grad_norm": 1.2762805432610065, "learning_rate": 4.431636007045396e-06, "loss": 0.1663, "step": 8802 }, { "epoch": 0.6974054268171915, "grad_norm": 1.3279277988480715, "learning_rate": 4.429504787491214e-06, "loss": 0.2183, "step": 8803 }, { "epoch": 0.6974846504258269, "grad_norm": 1.5680494355985402, "learning_rate": 4.427373934727337e-06, "loss": 0.3935, "step": 8804 }, { "epoch": 0.6975638740344623, "grad_norm": 1.0921913552068514, "learning_rate": 4.425243448894074e-06, "loss": 0.1628, "step": 8805 }, { "epoch": 0.6976430976430976, "grad_norm": 1.2228603767237676, "learning_rate": 4.423113330131708e-06, "loss": 0.2317, "step": 8806 }, { "epoch": 0.6977223212517331, "grad_norm": 1.6127320662513773, "learning_rate": 4.42098357858049e-06, "loss": 0.3141, "step": 8807 }, { "epoch": 0.6978015448603684, "grad_norm": 1.2127393656729695, "learning_rate": 4.418854194380663e-06, "loss": 0.251, "step": 8808 }, { "epoch": 0.6978807684690037, "grad_norm": 1.0391332710304808, "learning_rate": 4.416725177672432e-06, "loss": 0.1568, "step": 8809 }, { "epoch": 0.6979599920776391, "grad_norm": 1.2022421904540985, "learning_rate": 4.4145965285959836e-06, "loss": 0.2347, "step": 8810 }, { "epoch": 0.6980392156862745, "grad_norm": 1.2580558238458839, "learning_rate": 4.412468247291474e-06, "loss": 0.2178, "step": 8811 }, { "epoch": 0.6981184392949099, "grad_norm": 1.2965298613678125, "learning_rate": 4.410340333899049e-06, "loss": 0.2408, "step": 8812 }, { "epoch": 0.6981976629035452, "grad_norm": 1.5271511246798966, "learning_rate": 4.408212788558818e-06, "loss": 0.271, "step": 8813 }, { "epoch": 0.6982768865121807, "grad_norm": 1.5411104285903274, "learning_rate": 4.406085611410864e-06, "loss": 0.3036, "step": 8814 }, { "epoch": 0.698356110120816, "grad_norm": 1.4777125680054977, "learning_rate": 4.403958802595261e-06, "loss": 0.3012, "step": 8815 }, { "epoch": 0.6984353337294513, "grad_norm": 1.4592262357239205, "learning_rate": 4.401832362252044e-06, "loss": 0.2601, "step": 8816 }, { "epoch": 0.6985145573380868, "grad_norm": 1.2754787670211158, "learning_rate": 4.399706290521225e-06, "loss": 0.1998, "step": 8817 }, { "epoch": 0.6985937809467221, "grad_norm": 1.374528557590866, "learning_rate": 4.397580587542805e-06, "loss": 0.2236, "step": 8818 }, { "epoch": 0.6986730045553575, "grad_norm": 1.2649836213993118, "learning_rate": 4.3954552534567455e-06, "loss": 0.2244, "step": 8819 }, { "epoch": 0.6987522281639929, "grad_norm": 1.3787977130235152, "learning_rate": 4.393330288402986e-06, "loss": 0.29, "step": 8820 }, { "epoch": 0.6988314517726283, "grad_norm": 1.573556956514344, "learning_rate": 4.391205692521453e-06, "loss": 0.2901, "step": 8821 }, { "epoch": 0.6989106753812636, "grad_norm": 1.4970212671112728, "learning_rate": 4.389081465952039e-06, "loss": 0.2867, "step": 8822 }, { "epoch": 0.6989898989898989, "grad_norm": 1.1030741586174122, "learning_rate": 4.386957608834607e-06, "loss": 0.1685, "step": 8823 }, { "epoch": 0.6990691225985344, "grad_norm": 1.7454605754173802, "learning_rate": 4.384834121309013e-06, "loss": 0.3222, "step": 8824 }, { "epoch": 0.6991483462071697, "grad_norm": 1.4506190724576338, "learning_rate": 4.382711003515072e-06, "loss": 0.2763, "step": 8825 }, { "epoch": 0.6992275698158051, "grad_norm": 1.3550989101118316, "learning_rate": 4.3805882555925846e-06, "loss": 0.1947, "step": 8826 }, { "epoch": 0.6993067934244405, "grad_norm": 1.3405342202457475, "learning_rate": 4.378465877681317e-06, "loss": 0.2151, "step": 8827 }, { "epoch": 0.6993860170330759, "grad_norm": 1.3577711192679864, "learning_rate": 4.376343869921027e-06, "loss": 0.2469, "step": 8828 }, { "epoch": 0.6994652406417112, "grad_norm": 1.4776447890626994, "learning_rate": 4.374222232451433e-06, "loss": 0.3133, "step": 8829 }, { "epoch": 0.6995444642503466, "grad_norm": 1.2475273626649297, "learning_rate": 4.3721009654122315e-06, "loss": 0.2606, "step": 8830 }, { "epoch": 0.699623687858982, "grad_norm": 1.0841324754200954, "learning_rate": 4.369980068943106e-06, "loss": 0.166, "step": 8831 }, { "epoch": 0.6997029114676173, "grad_norm": 1.2375589900776478, "learning_rate": 4.367859543183702e-06, "loss": 0.2523, "step": 8832 }, { "epoch": 0.6997821350762528, "grad_norm": 1.150124650157148, "learning_rate": 4.3657393882736456e-06, "loss": 0.1865, "step": 8833 }, { "epoch": 0.6998613586848881, "grad_norm": 1.396953914316769, "learning_rate": 4.3636196043525415e-06, "loss": 0.2713, "step": 8834 }, { "epoch": 0.6999405822935235, "grad_norm": 1.1166195642514403, "learning_rate": 4.361500191559967e-06, "loss": 0.218, "step": 8835 }, { "epoch": 0.7000198059021588, "grad_norm": 1.2507957612721141, "learning_rate": 4.35938115003547e-06, "loss": 0.2655, "step": 8836 }, { "epoch": 0.7000990295107942, "grad_norm": 1.2561618669072205, "learning_rate": 4.357262479918587e-06, "loss": 0.1881, "step": 8837 }, { "epoch": 0.7001782531194296, "grad_norm": 1.1331421191175617, "learning_rate": 4.355144181348819e-06, "loss": 0.2144, "step": 8838 }, { "epoch": 0.7002574767280649, "grad_norm": 1.329150152318647, "learning_rate": 4.353026254465642e-06, "loss": 0.2442, "step": 8839 }, { "epoch": 0.7003367003367004, "grad_norm": 1.3699642315685037, "learning_rate": 4.350908699408521e-06, "loss": 0.2486, "step": 8840 }, { "epoch": 0.7004159239453357, "grad_norm": 1.3698029164621957, "learning_rate": 4.348791516316878e-06, "loss": 0.2504, "step": 8841 }, { "epoch": 0.7004951475539711, "grad_norm": 1.4660385399250195, "learning_rate": 4.346674705330117e-06, "loss": 0.2627, "step": 8842 }, { "epoch": 0.7005743711626065, "grad_norm": 1.1877712750901945, "learning_rate": 4.344558266587628e-06, "loss": 0.2314, "step": 8843 }, { "epoch": 0.7006535947712418, "grad_norm": 1.0924659198478786, "learning_rate": 4.342442200228766e-06, "loss": 0.1674, "step": 8844 }, { "epoch": 0.7007328183798772, "grad_norm": 1.2391572113579326, "learning_rate": 4.340326506392859e-06, "loss": 0.1906, "step": 8845 }, { "epoch": 0.7008120419885125, "grad_norm": 1.2861973452650104, "learning_rate": 4.338211185219222e-06, "loss": 0.2424, "step": 8846 }, { "epoch": 0.700891265597148, "grad_norm": 1.4967423782653613, "learning_rate": 4.336096236847136e-06, "loss": 0.2651, "step": 8847 }, { "epoch": 0.7009704892057833, "grad_norm": 1.106477185134246, "learning_rate": 4.333981661415856e-06, "loss": 0.1363, "step": 8848 }, { "epoch": 0.7010497128144187, "grad_norm": 1.3555705126951718, "learning_rate": 4.331867459064623e-06, "loss": 0.2175, "step": 8849 }, { "epoch": 0.7011289364230541, "grad_norm": 1.4114150946018673, "learning_rate": 4.329753629932646e-06, "loss": 0.2885, "step": 8850 }, { "epoch": 0.7012081600316894, "grad_norm": 1.3218864577591556, "learning_rate": 4.327640174159109e-06, "loss": 0.2847, "step": 8851 }, { "epoch": 0.7012873836403248, "grad_norm": 1.1831231914701292, "learning_rate": 4.325527091883168e-06, "loss": 0.1438, "step": 8852 }, { "epoch": 0.7013666072489602, "grad_norm": 1.5070682114586984, "learning_rate": 4.323414383243969e-06, "loss": 0.2667, "step": 8853 }, { "epoch": 0.7014458308575956, "grad_norm": 1.2902207942796624, "learning_rate": 4.321302048380619e-06, "loss": 0.2427, "step": 8854 }, { "epoch": 0.7015250544662309, "grad_norm": 1.6337809821668723, "learning_rate": 4.319190087432201e-06, "loss": 0.2736, "step": 8855 }, { "epoch": 0.7016042780748664, "grad_norm": 1.2964343606334223, "learning_rate": 4.317078500537785e-06, "loss": 0.3022, "step": 8856 }, { "epoch": 0.7016835016835017, "grad_norm": 1.407194086118684, "learning_rate": 4.314967287836405e-06, "loss": 0.2619, "step": 8857 }, { "epoch": 0.701762725292137, "grad_norm": 1.5025381898005565, "learning_rate": 4.3128564494670715e-06, "loss": 0.2507, "step": 8858 }, { "epoch": 0.7018419489007724, "grad_norm": 1.5473653448593045, "learning_rate": 4.310745985568779e-06, "loss": 0.263, "step": 8859 }, { "epoch": 0.7019211725094078, "grad_norm": 1.5279687347061008, "learning_rate": 4.3086358962804885e-06, "loss": 0.2252, "step": 8860 }, { "epoch": 0.7020003961180432, "grad_norm": 1.248919945168579, "learning_rate": 4.306526181741135e-06, "loss": 0.2516, "step": 8861 }, { "epoch": 0.7020796197266785, "grad_norm": 1.4363142308416414, "learning_rate": 4.304416842089641e-06, "loss": 0.2416, "step": 8862 }, { "epoch": 0.702158843335314, "grad_norm": 1.5856464484106991, "learning_rate": 4.302307877464893e-06, "loss": 0.2462, "step": 8863 }, { "epoch": 0.7022380669439493, "grad_norm": 1.2228645686668598, "learning_rate": 4.300199288005753e-06, "loss": 0.2222, "step": 8864 }, { "epoch": 0.7023172905525846, "grad_norm": 1.1943693296604285, "learning_rate": 4.298091073851066e-06, "loss": 0.1743, "step": 8865 }, { "epoch": 0.7023965141612201, "grad_norm": 0.9489549208756705, "learning_rate": 4.295983235139647e-06, "loss": 0.1473, "step": 8866 }, { "epoch": 0.7024757377698554, "grad_norm": 1.6134968380053571, "learning_rate": 4.293875772010287e-06, "loss": 0.2231, "step": 8867 }, { "epoch": 0.7025549613784908, "grad_norm": 1.5126120617975245, "learning_rate": 4.291768684601746e-06, "loss": 0.2647, "step": 8868 }, { "epoch": 0.7026341849871262, "grad_norm": 1.234192928214084, "learning_rate": 4.289661973052774e-06, "loss": 0.2126, "step": 8869 }, { "epoch": 0.7027134085957616, "grad_norm": 1.3349316479660362, "learning_rate": 4.287555637502086e-06, "loss": 0.2585, "step": 8870 }, { "epoch": 0.7027926322043969, "grad_norm": 1.5783766651416529, "learning_rate": 4.285449678088369e-06, "loss": 0.2997, "step": 8871 }, { "epoch": 0.7028718558130322, "grad_norm": 1.2489360253198072, "learning_rate": 4.283344094950297e-06, "loss": 0.2515, "step": 8872 }, { "epoch": 0.7029510794216677, "grad_norm": 1.4003730092696396, "learning_rate": 4.2812388882265095e-06, "loss": 0.2608, "step": 8873 }, { "epoch": 0.703030303030303, "grad_norm": 1.2116296877476789, "learning_rate": 4.279134058055622e-06, "loss": 0.2208, "step": 8874 }, { "epoch": 0.7031095266389384, "grad_norm": 1.5623281862728544, "learning_rate": 4.2770296045762315e-06, "loss": 0.2656, "step": 8875 }, { "epoch": 0.7031887502475738, "grad_norm": 1.1472092646930603, "learning_rate": 4.274925527926907e-06, "loss": 0.2592, "step": 8876 }, { "epoch": 0.7032679738562092, "grad_norm": 1.0869502010717735, "learning_rate": 4.272821828246183e-06, "loss": 0.159, "step": 8877 }, { "epoch": 0.7033471974648445, "grad_norm": 1.4178843089143776, "learning_rate": 4.270718505672588e-06, "loss": 0.2633, "step": 8878 }, { "epoch": 0.7034264210734799, "grad_norm": 1.24786991171341, "learning_rate": 4.2686155603446134e-06, "loss": 0.2291, "step": 8879 }, { "epoch": 0.7035056446821153, "grad_norm": 1.5405362632685256, "learning_rate": 4.266512992400726e-06, "loss": 0.3126, "step": 8880 }, { "epoch": 0.7035848682907506, "grad_norm": 1.2324526643331342, "learning_rate": 4.2644108019793665e-06, "loss": 0.2487, "step": 8881 }, { "epoch": 0.7036640918993861, "grad_norm": 1.3283865467153089, "learning_rate": 4.262308989218961e-06, "loss": 0.2385, "step": 8882 }, { "epoch": 0.7037433155080214, "grad_norm": 1.2483571111991931, "learning_rate": 4.2602075542579e-06, "loss": 0.224, "step": 8883 }, { "epoch": 0.7038225391166567, "grad_norm": 1.303042737802848, "learning_rate": 4.258106497234551e-06, "loss": 0.2442, "step": 8884 }, { "epoch": 0.7039017627252921, "grad_norm": 1.4292886046190605, "learning_rate": 4.256005818287265e-06, "loss": 0.291, "step": 8885 }, { "epoch": 0.7039809863339275, "grad_norm": 1.2123406052092123, "learning_rate": 4.253905517554356e-06, "loss": 0.2432, "step": 8886 }, { "epoch": 0.7040602099425629, "grad_norm": 1.0967983864533966, "learning_rate": 4.251805595174117e-06, "loss": 0.1886, "step": 8887 }, { "epoch": 0.7041394335511982, "grad_norm": 1.5317372605693833, "learning_rate": 4.249706051284824e-06, "loss": 0.2696, "step": 8888 }, { "epoch": 0.7042186571598337, "grad_norm": 1.4409124523580885, "learning_rate": 4.24760688602472e-06, "loss": 0.3022, "step": 8889 }, { "epoch": 0.704297880768469, "grad_norm": 1.4902861652147272, "learning_rate": 4.245508099532021e-06, "loss": 0.2617, "step": 8890 }, { "epoch": 0.7043771043771043, "grad_norm": 1.4545460296068426, "learning_rate": 4.243409691944927e-06, "loss": 0.2749, "step": 8891 }, { "epoch": 0.7044563279857398, "grad_norm": 1.3462913922671005, "learning_rate": 4.241311663401606e-06, "loss": 0.2506, "step": 8892 }, { "epoch": 0.7045355515943751, "grad_norm": 1.4933935746132918, "learning_rate": 4.2392140140401996e-06, "loss": 0.2073, "step": 8893 }, { "epoch": 0.7046147752030105, "grad_norm": 1.431178771877433, "learning_rate": 4.237116743998835e-06, "loss": 0.3002, "step": 8894 }, { "epoch": 0.7046939988116458, "grad_norm": 1.1504529671048034, "learning_rate": 4.235019853415603e-06, "loss": 0.2432, "step": 8895 }, { "epoch": 0.7047732224202813, "grad_norm": 1.0942706664230106, "learning_rate": 4.232923342428574e-06, "loss": 0.2122, "step": 8896 }, { "epoch": 0.7048524460289166, "grad_norm": 1.2535495924159599, "learning_rate": 4.230827211175791e-06, "loss": 0.26, "step": 8897 }, { "epoch": 0.7049316696375519, "grad_norm": 1.2115789151055185, "learning_rate": 4.22873145979528e-06, "loss": 0.1836, "step": 8898 }, { "epoch": 0.7050108932461874, "grad_norm": 1.3120290099293208, "learning_rate": 4.226636088425033e-06, "loss": 0.2248, "step": 8899 }, { "epoch": 0.7050901168548227, "grad_norm": 1.4289531951763148, "learning_rate": 4.2245410972030154e-06, "loss": 0.2398, "step": 8900 }, { "epoch": 0.7051693404634581, "grad_norm": 1.2498053136329115, "learning_rate": 4.222446486267181e-06, "loss": 0.22, "step": 8901 }, { "epoch": 0.7052485640720935, "grad_norm": 1.4639804149280384, "learning_rate": 4.220352255755445e-06, "loss": 0.2612, "step": 8902 }, { "epoch": 0.7053277876807289, "grad_norm": 1.1922254344167316, "learning_rate": 4.218258405805701e-06, "loss": 0.1632, "step": 8903 }, { "epoch": 0.7054070112893642, "grad_norm": 1.5265282643117082, "learning_rate": 4.216164936555823e-06, "loss": 0.3068, "step": 8904 }, { "epoch": 0.7054862348979996, "grad_norm": 1.8747856188475003, "learning_rate": 4.214071848143655e-06, "loss": 0.3932, "step": 8905 }, { "epoch": 0.705565458506635, "grad_norm": 1.0341393801837642, "learning_rate": 4.211979140707012e-06, "loss": 0.1925, "step": 8906 }, { "epoch": 0.7056446821152703, "grad_norm": 1.4229911545021316, "learning_rate": 4.209886814383696e-06, "loss": 0.3113, "step": 8907 }, { "epoch": 0.7057239057239058, "grad_norm": 1.5059989869798511, "learning_rate": 4.207794869311472e-06, "loss": 0.2933, "step": 8908 }, { "epoch": 0.7058031293325411, "grad_norm": 1.345198624372794, "learning_rate": 4.205703305628082e-06, "loss": 0.2746, "step": 8909 }, { "epoch": 0.7058823529411765, "grad_norm": 1.3773974894062775, "learning_rate": 4.203612123471254e-06, "loss": 0.2837, "step": 8910 }, { "epoch": 0.7059615765498118, "grad_norm": 1.0800277934005587, "learning_rate": 4.201521322978677e-06, "loss": 0.1694, "step": 8911 }, { "epoch": 0.7060408001584472, "grad_norm": 1.2959808655530942, "learning_rate": 4.19943090428802e-06, "loss": 0.3015, "step": 8912 }, { "epoch": 0.7061200237670826, "grad_norm": 1.171000056245596, "learning_rate": 4.197340867536923e-06, "loss": 0.1526, "step": 8913 }, { "epoch": 0.7061992473757179, "grad_norm": 1.130753341245374, "learning_rate": 4.195251212863014e-06, "loss": 0.1913, "step": 8914 }, { "epoch": 0.7062784709843534, "grad_norm": 1.8869494786582943, "learning_rate": 4.193161940403882e-06, "loss": 0.4051, "step": 8915 }, { "epoch": 0.7063576945929887, "grad_norm": 0.9810038352177908, "learning_rate": 4.191073050297091e-06, "loss": 0.1223, "step": 8916 }, { "epoch": 0.7064369182016241, "grad_norm": 1.6368855284270838, "learning_rate": 4.188984542680192e-06, "loss": 0.3014, "step": 8917 }, { "epoch": 0.7065161418102595, "grad_norm": 1.3344330523306043, "learning_rate": 4.186896417690701e-06, "loss": 0.2324, "step": 8918 }, { "epoch": 0.7065953654188948, "grad_norm": 1.4222349404829133, "learning_rate": 4.18480867546611e-06, "loss": 0.2174, "step": 8919 }, { "epoch": 0.7066745890275302, "grad_norm": 1.530686486745354, "learning_rate": 4.182721316143888e-06, "loss": 0.2746, "step": 8920 }, { "epoch": 0.7067538126361655, "grad_norm": 1.1611083898133914, "learning_rate": 4.180634339861474e-06, "loss": 0.1831, "step": 8921 }, { "epoch": 0.706833036244801, "grad_norm": 1.5052280522508614, "learning_rate": 4.178547746756285e-06, "loss": 0.289, "step": 8922 }, { "epoch": 0.7069122598534363, "grad_norm": 1.187851917689239, "learning_rate": 4.17646153696572e-06, "loss": 0.1766, "step": 8923 }, { "epoch": 0.7069914834620717, "grad_norm": 1.2496962311411817, "learning_rate": 4.174375710627141e-06, "loss": 0.2302, "step": 8924 }, { "epoch": 0.7070707070707071, "grad_norm": 1.2421867203157149, "learning_rate": 4.172290267877887e-06, "loss": 0.2272, "step": 8925 }, { "epoch": 0.7071499306793424, "grad_norm": 1.0984019318469123, "learning_rate": 4.170205208855281e-06, "loss": 0.1465, "step": 8926 }, { "epoch": 0.7072291542879778, "grad_norm": 1.1395597288490344, "learning_rate": 4.1681205336966115e-06, "loss": 0.2093, "step": 8927 }, { "epoch": 0.7073083778966132, "grad_norm": 1.2487292701753407, "learning_rate": 4.16603624253914e-06, "loss": 0.1816, "step": 8928 }, { "epoch": 0.7073876015052486, "grad_norm": 1.7038540930200028, "learning_rate": 4.163952335520114e-06, "loss": 0.3728, "step": 8929 }, { "epoch": 0.7074668251138839, "grad_norm": 0.9933114233267937, "learning_rate": 4.161868812776746e-06, "loss": 0.162, "step": 8930 }, { "epoch": 0.7075460487225194, "grad_norm": 1.253010524215799, "learning_rate": 4.15978567444622e-06, "loss": 0.1933, "step": 8931 }, { "epoch": 0.7076252723311547, "grad_norm": 1.2080224557839214, "learning_rate": 4.157702920665712e-06, "loss": 0.1833, "step": 8932 }, { "epoch": 0.70770449593979, "grad_norm": 1.4994320465947872, "learning_rate": 4.155620551572354e-06, "loss": 0.2954, "step": 8933 }, { "epoch": 0.7077837195484254, "grad_norm": 1.178940771929154, "learning_rate": 4.153538567303258e-06, "loss": 0.1421, "step": 8934 }, { "epoch": 0.7078629431570608, "grad_norm": 1.4522507863236107, "learning_rate": 4.151456967995519e-06, "loss": 0.2621, "step": 8935 }, { "epoch": 0.7079421667656962, "grad_norm": 1.1402514628989178, "learning_rate": 4.149375753786198e-06, "loss": 0.1736, "step": 8936 }, { "epoch": 0.7080213903743315, "grad_norm": 1.310741945449736, "learning_rate": 4.147294924812332e-06, "loss": 0.1997, "step": 8937 }, { "epoch": 0.708100613982967, "grad_norm": 1.382323889213825, "learning_rate": 4.14521448121093e-06, "loss": 0.2456, "step": 8938 }, { "epoch": 0.7081798375916023, "grad_norm": 1.358592866893608, "learning_rate": 4.143134423118986e-06, "loss": 0.2317, "step": 8939 }, { "epoch": 0.7082590612002376, "grad_norm": 1.3878803532731059, "learning_rate": 4.14105475067346e-06, "loss": 0.2804, "step": 8940 }, { "epoch": 0.7083382848088731, "grad_norm": 1.5010143660187203, "learning_rate": 4.138975464011284e-06, "loss": 0.2855, "step": 8941 }, { "epoch": 0.7084175084175084, "grad_norm": 1.4822281166949847, "learning_rate": 4.136896563269375e-06, "loss": 0.2604, "step": 8942 }, { "epoch": 0.7084967320261438, "grad_norm": 1.7823968046660603, "learning_rate": 4.1348180485846145e-06, "loss": 0.2746, "step": 8943 }, { "epoch": 0.7085759556347792, "grad_norm": 1.6793395161121027, "learning_rate": 4.1327399200938625e-06, "loss": 0.355, "step": 8944 }, { "epoch": 0.7086551792434146, "grad_norm": 1.1469810030817083, "learning_rate": 4.1306621779339585e-06, "loss": 0.1675, "step": 8945 }, { "epoch": 0.7087344028520499, "grad_norm": 1.7302084421352244, "learning_rate": 4.128584822241708e-06, "loss": 0.2946, "step": 8946 }, { "epoch": 0.7088136264606852, "grad_norm": 1.2352175931965101, "learning_rate": 4.126507853153891e-06, "loss": 0.1795, "step": 8947 }, { "epoch": 0.7088928500693207, "grad_norm": 1.1199262750356624, "learning_rate": 4.124431270807277e-06, "loss": 0.1704, "step": 8948 }, { "epoch": 0.708972073677956, "grad_norm": 1.6278931663285423, "learning_rate": 4.12235507533859e-06, "loss": 0.2821, "step": 8949 }, { "epoch": 0.7090512972865914, "grad_norm": 1.5660534492520206, "learning_rate": 4.120279266884537e-06, "loss": 0.2983, "step": 8950 }, { "epoch": 0.7091305208952268, "grad_norm": 1.3064092237363494, "learning_rate": 4.118203845581807e-06, "loss": 0.2881, "step": 8951 }, { "epoch": 0.7092097445038622, "grad_norm": 1.5817005002859825, "learning_rate": 4.11612881156705e-06, "loss": 0.2902, "step": 8952 }, { "epoch": 0.7092889681124975, "grad_norm": 1.4482026214695563, "learning_rate": 4.114054164976902e-06, "loss": 0.2221, "step": 8953 }, { "epoch": 0.7093681917211329, "grad_norm": 1.1715292738475596, "learning_rate": 4.111979905947961e-06, "loss": 0.2134, "step": 8954 }, { "epoch": 0.7094474153297683, "grad_norm": 1.007314089879112, "learning_rate": 4.109906034616816e-06, "loss": 0.1743, "step": 8955 }, { "epoch": 0.7095266389384036, "grad_norm": 1.330620689314987, "learning_rate": 4.107832551120017e-06, "loss": 0.2446, "step": 8956 }, { "epoch": 0.7096058625470391, "grad_norm": 1.1128299120690779, "learning_rate": 4.105759455594091e-06, "loss": 0.1884, "step": 8957 }, { "epoch": 0.7096850861556744, "grad_norm": 1.2270307392421749, "learning_rate": 4.103686748175545e-06, "loss": 0.2158, "step": 8958 }, { "epoch": 0.7097643097643098, "grad_norm": 1.2344572270629277, "learning_rate": 4.101614429000857e-06, "loss": 0.199, "step": 8959 }, { "epoch": 0.7098435333729451, "grad_norm": 1.374465883047471, "learning_rate": 4.099542498206473e-06, "loss": 0.2417, "step": 8960 }, { "epoch": 0.7099227569815805, "grad_norm": 1.4821912590017494, "learning_rate": 4.0974709559288275e-06, "loss": 0.288, "step": 8961 }, { "epoch": 0.7100019805902159, "grad_norm": 1.2705373231103056, "learning_rate": 4.095399802304319e-06, "loss": 0.1886, "step": 8962 }, { "epoch": 0.7100812041988512, "grad_norm": 1.8521915691468798, "learning_rate": 4.093329037469319e-06, "loss": 0.3131, "step": 8963 }, { "epoch": 0.7101604278074867, "grad_norm": 1.464937948944682, "learning_rate": 4.091258661560184e-06, "loss": 0.2031, "step": 8964 }, { "epoch": 0.710239651416122, "grad_norm": 1.3769760266370796, "learning_rate": 4.0891886747132356e-06, "loss": 0.2451, "step": 8965 }, { "epoch": 0.7103188750247573, "grad_norm": 1.2849825892640707, "learning_rate": 4.087119077064772e-06, "loss": 0.1866, "step": 8966 }, { "epoch": 0.7103980986333928, "grad_norm": 1.1656718412010967, "learning_rate": 4.085049868751062e-06, "loss": 0.2546, "step": 8967 }, { "epoch": 0.7104773222420281, "grad_norm": 1.2174788292014693, "learning_rate": 4.082981049908362e-06, "loss": 0.1285, "step": 8968 }, { "epoch": 0.7105565458506635, "grad_norm": 1.5469373528723134, "learning_rate": 4.080912620672888e-06, "loss": 0.2923, "step": 8969 }, { "epoch": 0.7106357694592988, "grad_norm": 1.1328493991348587, "learning_rate": 4.078844581180833e-06, "loss": 0.2217, "step": 8970 }, { "epoch": 0.7107149930679343, "grad_norm": 1.7402433485228144, "learning_rate": 4.076776931568376e-06, "loss": 0.3558, "step": 8971 }, { "epoch": 0.7107942166765696, "grad_norm": 1.4668948850751835, "learning_rate": 4.074709671971657e-06, "loss": 0.1751, "step": 8972 }, { "epoch": 0.7108734402852049, "grad_norm": 1.2441211406329185, "learning_rate": 4.0726428025267925e-06, "loss": 0.2068, "step": 8973 }, { "epoch": 0.7109526638938404, "grad_norm": 1.7137251639933813, "learning_rate": 4.070576323369882e-06, "loss": 0.3064, "step": 8974 }, { "epoch": 0.7110318875024757, "grad_norm": 2.1190465614055443, "learning_rate": 4.06851023463699e-06, "loss": 0.3796, "step": 8975 }, { "epoch": 0.7111111111111111, "grad_norm": 1.3114276834040828, "learning_rate": 4.066444536464155e-06, "loss": 0.2966, "step": 8976 }, { "epoch": 0.7111903347197465, "grad_norm": 1.5385039036910069, "learning_rate": 4.0643792289874e-06, "loss": 0.2879, "step": 8977 }, { "epoch": 0.7112695583283819, "grad_norm": 1.5517305636079035, "learning_rate": 4.062314312342712e-06, "loss": 0.2147, "step": 8978 }, { "epoch": 0.7113487819370172, "grad_norm": 1.5057773684239024, "learning_rate": 4.060249786666054e-06, "loss": 0.2703, "step": 8979 }, { "epoch": 0.7114280055456526, "grad_norm": 1.3201734721898857, "learning_rate": 4.0581856520933706e-06, "loss": 0.3002, "step": 8980 }, { "epoch": 0.711507229154288, "grad_norm": 1.4272849989351317, "learning_rate": 4.056121908760571e-06, "loss": 0.2749, "step": 8981 }, { "epoch": 0.7115864527629233, "grad_norm": 1.5035970356560153, "learning_rate": 4.054058556803544e-06, "loss": 0.256, "step": 8982 }, { "epoch": 0.7116656763715588, "grad_norm": 1.287171503303611, "learning_rate": 4.051995596358147e-06, "loss": 0.2919, "step": 8983 }, { "epoch": 0.7117448999801941, "grad_norm": 2.5897559008098447, "learning_rate": 4.049933027560225e-06, "loss": 0.3718, "step": 8984 }, { "epoch": 0.7118241235888295, "grad_norm": 1.5390994862845784, "learning_rate": 4.047870850545581e-06, "loss": 0.3018, "step": 8985 }, { "epoch": 0.7119033471974648, "grad_norm": 1.1898032086420869, "learning_rate": 4.045809065449999e-06, "loss": 0.1878, "step": 8986 }, { "epoch": 0.7119825708061002, "grad_norm": 1.328411055156878, "learning_rate": 4.043747672409245e-06, "loss": 0.2012, "step": 8987 }, { "epoch": 0.7120617944147356, "grad_norm": 1.4234533009263486, "learning_rate": 4.041686671559046e-06, "loss": 0.2763, "step": 8988 }, { "epoch": 0.7121410180233709, "grad_norm": 1.4652878913669343, "learning_rate": 4.039626063035107e-06, "loss": 0.274, "step": 8989 }, { "epoch": 0.7122202416320064, "grad_norm": 1.263822406020965, "learning_rate": 4.0375658469731164e-06, "loss": 0.2465, "step": 8990 }, { "epoch": 0.7122994652406417, "grad_norm": 1.20848533173463, "learning_rate": 4.035506023508724e-06, "loss": 0.2154, "step": 8991 }, { "epoch": 0.7123786888492771, "grad_norm": 1.2170323054802772, "learning_rate": 4.033446592777558e-06, "loss": 0.1773, "step": 8992 }, { "epoch": 0.7124579124579125, "grad_norm": 1.0294314339054975, "learning_rate": 4.031387554915228e-06, "loss": 0.1985, "step": 8993 }, { "epoch": 0.7125371360665478, "grad_norm": 1.193940916802525, "learning_rate": 4.029328910057308e-06, "loss": 0.2637, "step": 8994 }, { "epoch": 0.7126163596751832, "grad_norm": 1.5566493177144933, "learning_rate": 4.027270658339347e-06, "loss": 0.1989, "step": 8995 }, { "epoch": 0.7126955832838185, "grad_norm": 1.5295165194469522, "learning_rate": 4.025212799896881e-06, "loss": 0.2743, "step": 8996 }, { "epoch": 0.712774806892454, "grad_norm": 1.4613446268286052, "learning_rate": 4.023155334865401e-06, "loss": 0.2218, "step": 8997 }, { "epoch": 0.7128540305010893, "grad_norm": 1.2398734197867007, "learning_rate": 4.0210982633803784e-06, "loss": 0.2364, "step": 8998 }, { "epoch": 0.7129332541097247, "grad_norm": 1.6925614390069985, "learning_rate": 4.01904158557727e-06, "loss": 0.2184, "step": 8999 }, { "epoch": 0.7130124777183601, "grad_norm": 1.4202977316688459, "learning_rate": 4.016985301591496e-06, "loss": 0.2616, "step": 9000 }, { "epoch": 0.7130917013269954, "grad_norm": 1.2506375432868608, "learning_rate": 4.014929411558447e-06, "loss": 0.2094, "step": 9001 }, { "epoch": 0.7131709249356308, "grad_norm": 1.3054118280473486, "learning_rate": 4.012873915613501e-06, "loss": 0.176, "step": 9002 }, { "epoch": 0.7132501485442662, "grad_norm": 1.444516804791575, "learning_rate": 4.010818813892e-06, "loss": 0.2752, "step": 9003 }, { "epoch": 0.7133293721529016, "grad_norm": 1.5373647334029328, "learning_rate": 4.008764106529259e-06, "loss": 0.2647, "step": 9004 }, { "epoch": 0.7134085957615369, "grad_norm": 1.6671742934707288, "learning_rate": 4.006709793660577e-06, "loss": 0.2392, "step": 9005 }, { "epoch": 0.7134878193701724, "grad_norm": 1.2668977071434002, "learning_rate": 4.004655875421217e-06, "loss": 0.2297, "step": 9006 }, { "epoch": 0.7135670429788077, "grad_norm": 1.5688026064058769, "learning_rate": 4.00260235194642e-06, "loss": 0.2741, "step": 9007 }, { "epoch": 0.713646266587443, "grad_norm": 1.3969267008213884, "learning_rate": 4.0005492233713964e-06, "loss": 0.3198, "step": 9008 }, { "epoch": 0.7137254901960784, "grad_norm": 1.417873309013034, "learning_rate": 3.998496489831343e-06, "loss": 0.2897, "step": 9009 }, { "epoch": 0.7138047138047138, "grad_norm": 1.3945581015026076, "learning_rate": 3.996444151461417e-06, "loss": 0.2293, "step": 9010 }, { "epoch": 0.7138839374133492, "grad_norm": 1.3622241021418544, "learning_rate": 3.994392208396754e-06, "loss": 0.2906, "step": 9011 }, { "epoch": 0.7139631610219845, "grad_norm": 1.3626724666421084, "learning_rate": 3.992340660772472e-06, "loss": 0.1835, "step": 9012 }, { "epoch": 0.71404238463062, "grad_norm": 1.50551276746951, "learning_rate": 3.990289508723648e-06, "loss": 0.2766, "step": 9013 }, { "epoch": 0.7141216082392553, "grad_norm": 0.9556086942653758, "learning_rate": 3.988238752385341e-06, "loss": 0.1384, "step": 9014 }, { "epoch": 0.7142008318478906, "grad_norm": 2.0880808386667127, "learning_rate": 3.986188391892587e-06, "loss": 0.4237, "step": 9015 }, { "epoch": 0.7142800554565261, "grad_norm": 1.7198087895852605, "learning_rate": 3.984138427380393e-06, "loss": 0.2723, "step": 9016 }, { "epoch": 0.7143592790651614, "grad_norm": 1.4521358032938667, "learning_rate": 3.982088858983733e-06, "loss": 0.2654, "step": 9017 }, { "epoch": 0.7144385026737968, "grad_norm": 1.3883799319391696, "learning_rate": 3.9800396868375675e-06, "loss": 0.2716, "step": 9018 }, { "epoch": 0.7145177262824322, "grad_norm": 1.0780511348095898, "learning_rate": 3.977990911076823e-06, "loss": 0.2129, "step": 9019 }, { "epoch": 0.7145969498910676, "grad_norm": 1.5947985654274843, "learning_rate": 3.975942531836397e-06, "loss": 0.2882, "step": 9020 }, { "epoch": 0.7146761734997029, "grad_norm": 1.95268466018519, "learning_rate": 3.973894549251175e-06, "loss": 0.2846, "step": 9021 }, { "epoch": 0.7147553971083382, "grad_norm": 1.3787962054997573, "learning_rate": 3.971846963455999e-06, "loss": 0.2275, "step": 9022 }, { "epoch": 0.7148346207169737, "grad_norm": 1.1991493716763608, "learning_rate": 3.969799774585696e-06, "loss": 0.1521, "step": 9023 }, { "epoch": 0.714913844325609, "grad_norm": 1.289898908853806, "learning_rate": 3.967752982775058e-06, "loss": 0.2519, "step": 9024 }, { "epoch": 0.7149930679342444, "grad_norm": 1.544903453103133, "learning_rate": 3.965706588158865e-06, "loss": 0.2991, "step": 9025 }, { "epoch": 0.7150722915428798, "grad_norm": 1.294177410951474, "learning_rate": 3.963660590871858e-06, "loss": 0.2407, "step": 9026 }, { "epoch": 0.7151515151515152, "grad_norm": 1.1544579395495873, "learning_rate": 3.961614991048752e-06, "loss": 0.2209, "step": 9027 }, { "epoch": 0.7152307387601505, "grad_norm": 1.382340644960581, "learning_rate": 3.959569788824248e-06, "loss": 0.2418, "step": 9028 }, { "epoch": 0.7153099623687859, "grad_norm": 1.2359153387844068, "learning_rate": 3.957524984333009e-06, "loss": 0.1819, "step": 9029 }, { "epoch": 0.7153891859774213, "grad_norm": 1.690780457319807, "learning_rate": 3.955480577709672e-06, "loss": 0.2685, "step": 9030 }, { "epoch": 0.7154684095860566, "grad_norm": 1.6204886194279187, "learning_rate": 3.953436569088856e-06, "loss": 0.2346, "step": 9031 }, { "epoch": 0.7155476331946921, "grad_norm": 1.2425747479937628, "learning_rate": 3.951392958605149e-06, "loss": 0.2361, "step": 9032 }, { "epoch": 0.7156268568033274, "grad_norm": 1.4058289208351415, "learning_rate": 3.949349746393108e-06, "loss": 0.2157, "step": 9033 }, { "epoch": 0.7157060804119628, "grad_norm": 1.3169195354893017, "learning_rate": 3.947306932587277e-06, "loss": 0.2385, "step": 9034 }, { "epoch": 0.7157853040205981, "grad_norm": 1.0951620641319275, "learning_rate": 3.945264517322159e-06, "loss": 0.2082, "step": 9035 }, { "epoch": 0.7158645276292335, "grad_norm": 1.197357149111598, "learning_rate": 3.943222500732241e-06, "loss": 0.2261, "step": 9036 }, { "epoch": 0.7159437512378689, "grad_norm": 1.3422019346954976, "learning_rate": 3.941180882951972e-06, "loss": 0.2534, "step": 9037 }, { "epoch": 0.7160229748465042, "grad_norm": 1.3501328623608448, "learning_rate": 3.9391396641157945e-06, "loss": 0.2615, "step": 9038 }, { "epoch": 0.7161021984551397, "grad_norm": 0.9942498582722339, "learning_rate": 3.937098844358106e-06, "loss": 0.143, "step": 9039 }, { "epoch": 0.716181422063775, "grad_norm": 1.4172362741288296, "learning_rate": 3.935058423813282e-06, "loss": 0.3341, "step": 9040 }, { "epoch": 0.7162606456724103, "grad_norm": 1.2363248904148585, "learning_rate": 3.933018402615683e-06, "loss": 0.1997, "step": 9041 }, { "epoch": 0.7163398692810458, "grad_norm": 1.3422468309943558, "learning_rate": 3.9309787808996284e-06, "loss": 0.2196, "step": 9042 }, { "epoch": 0.7164190928896811, "grad_norm": 0.9774040519574582, "learning_rate": 3.928939558799415e-06, "loss": 0.1153, "step": 9043 }, { "epoch": 0.7164983164983165, "grad_norm": 1.0863916992133649, "learning_rate": 3.926900736449324e-06, "loss": 0.2064, "step": 9044 }, { "epoch": 0.7165775401069518, "grad_norm": 1.0002593720277957, "learning_rate": 3.924862313983597e-06, "loss": 0.1596, "step": 9045 }, { "epoch": 0.7166567637155873, "grad_norm": 1.3932472778366485, "learning_rate": 3.922824291536452e-06, "loss": 0.2481, "step": 9046 }, { "epoch": 0.7167359873242226, "grad_norm": 0.9962748413897758, "learning_rate": 3.920786669242089e-06, "loss": 0.1278, "step": 9047 }, { "epoch": 0.7168152109328579, "grad_norm": 1.6232137006139786, "learning_rate": 3.918749447234674e-06, "loss": 0.2955, "step": 9048 }, { "epoch": 0.7168944345414934, "grad_norm": 1.5235827747133797, "learning_rate": 3.9167126256483415e-06, "loss": 0.2681, "step": 9049 }, { "epoch": 0.7169736581501287, "grad_norm": 1.3367344251523108, "learning_rate": 3.914676204617216e-06, "loss": 0.2435, "step": 9050 }, { "epoch": 0.7170528817587641, "grad_norm": 1.3680093831329665, "learning_rate": 3.912640184275381e-06, "loss": 0.246, "step": 9051 }, { "epoch": 0.7171321053673995, "grad_norm": 1.3377380656431945, "learning_rate": 3.9106045647569005e-06, "loss": 0.2599, "step": 9052 }, { "epoch": 0.7172113289760349, "grad_norm": 1.409688831657273, "learning_rate": 3.908569346195804e-06, "loss": 0.2097, "step": 9053 }, { "epoch": 0.7172905525846702, "grad_norm": 1.3583176684963718, "learning_rate": 3.90653452872611e-06, "loss": 0.2054, "step": 9054 }, { "epoch": 0.7173697761933056, "grad_norm": 1.7883433832273263, "learning_rate": 3.904500112481798e-06, "loss": 0.2189, "step": 9055 }, { "epoch": 0.717448999801941, "grad_norm": 1.232030654344082, "learning_rate": 3.902466097596821e-06, "loss": 0.2295, "step": 9056 }, { "epoch": 0.7175282234105763, "grad_norm": 1.4565580123232083, "learning_rate": 3.900432484205115e-06, "loss": 0.2396, "step": 9057 }, { "epoch": 0.7176074470192118, "grad_norm": 1.414573804697823, "learning_rate": 3.89839927244058e-06, "loss": 0.273, "step": 9058 }, { "epoch": 0.7176866706278471, "grad_norm": 1.0414269176818227, "learning_rate": 3.89636646243709e-06, "loss": 0.1537, "step": 9059 }, { "epoch": 0.7177658942364825, "grad_norm": 1.2462945110856305, "learning_rate": 3.894334054328505e-06, "loss": 0.2506, "step": 9060 }, { "epoch": 0.7178451178451178, "grad_norm": 1.1804790382461414, "learning_rate": 3.892302048248642e-06, "loss": 0.2585, "step": 9061 }, { "epoch": 0.7179243414537532, "grad_norm": 1.4667374757539555, "learning_rate": 3.890270444331298e-06, "loss": 0.2645, "step": 9062 }, { "epoch": 0.7180035650623886, "grad_norm": 0.9600023172585878, "learning_rate": 3.888239242710251e-06, "loss": 0.1723, "step": 9063 }, { "epoch": 0.7180827886710239, "grad_norm": 1.01333676930565, "learning_rate": 3.886208443519242e-06, "loss": 0.195, "step": 9064 }, { "epoch": 0.7181620122796594, "grad_norm": 1.3008146269289977, "learning_rate": 3.884178046891984e-06, "loss": 0.2526, "step": 9065 }, { "epoch": 0.7182412358882947, "grad_norm": 1.6311986018573938, "learning_rate": 3.88214805296218e-06, "loss": 0.2837, "step": 9066 }, { "epoch": 0.7183204594969301, "grad_norm": 1.6269981796246011, "learning_rate": 3.880118461863488e-06, "loss": 0.2876, "step": 9067 }, { "epoch": 0.7183996831055655, "grad_norm": 1.11590203203802, "learning_rate": 3.878089273729549e-06, "loss": 0.1671, "step": 9068 }, { "epoch": 0.7184789067142008, "grad_norm": 1.5151874495742101, "learning_rate": 3.876060488693971e-06, "loss": 0.2842, "step": 9069 }, { "epoch": 0.7185581303228362, "grad_norm": 1.3642308576024993, "learning_rate": 3.874032106890347e-06, "loss": 0.3031, "step": 9070 }, { "epoch": 0.7186373539314715, "grad_norm": 1.2313416433394608, "learning_rate": 3.872004128452231e-06, "loss": 0.1562, "step": 9071 }, { "epoch": 0.718716577540107, "grad_norm": 1.230306013720861, "learning_rate": 3.8699765535131565e-06, "loss": 0.2573, "step": 9072 }, { "epoch": 0.7187958011487423, "grad_norm": 1.1694303246651188, "learning_rate": 3.867949382206632e-06, "loss": 0.254, "step": 9073 }, { "epoch": 0.7188750247573777, "grad_norm": 1.3620704442743587, "learning_rate": 3.8659226146661344e-06, "loss": 0.1971, "step": 9074 }, { "epoch": 0.7189542483660131, "grad_norm": 1.5346681498097818, "learning_rate": 3.8638962510251175e-06, "loss": 0.2883, "step": 9075 }, { "epoch": 0.7190334719746484, "grad_norm": 1.4799091518088818, "learning_rate": 3.861870291417008e-06, "loss": 0.2909, "step": 9076 }, { "epoch": 0.7191126955832838, "grad_norm": 1.2910406950038384, "learning_rate": 3.859844735975205e-06, "loss": 0.2187, "step": 9077 }, { "epoch": 0.7191919191919192, "grad_norm": 1.2724622355374415, "learning_rate": 3.857819584833078e-06, "loss": 0.1962, "step": 9078 }, { "epoch": 0.7192711428005546, "grad_norm": 1.3141976310423746, "learning_rate": 3.855794838123981e-06, "loss": 0.2358, "step": 9079 }, { "epoch": 0.7193503664091899, "grad_norm": 1.2624793606379443, "learning_rate": 3.85377049598123e-06, "loss": 0.1814, "step": 9080 }, { "epoch": 0.7194295900178254, "grad_norm": 1.1727626485389202, "learning_rate": 3.851746558538113e-06, "loss": 0.154, "step": 9081 }, { "epoch": 0.7195088136264607, "grad_norm": 1.4935629384103712, "learning_rate": 3.849723025927907e-06, "loss": 0.2135, "step": 9082 }, { "epoch": 0.719588037235096, "grad_norm": 1.4589405184697402, "learning_rate": 3.847699898283846e-06, "loss": 0.2659, "step": 9083 }, { "epoch": 0.7196672608437314, "grad_norm": 1.548261058681011, "learning_rate": 3.84567717573914e-06, "loss": 0.286, "step": 9084 }, { "epoch": 0.7197464844523668, "grad_norm": 1.704379046714596, "learning_rate": 3.843654858426981e-06, "loss": 0.2722, "step": 9085 }, { "epoch": 0.7198257080610022, "grad_norm": 1.3708585904493755, "learning_rate": 3.84163294648053e-06, "loss": 0.2662, "step": 9086 }, { "epoch": 0.7199049316696375, "grad_norm": 1.722818761832865, "learning_rate": 3.839611440032912e-06, "loss": 0.2947, "step": 9087 }, { "epoch": 0.719984155278273, "grad_norm": 1.6219138598153262, "learning_rate": 3.837590339217243e-06, "loss": 0.2602, "step": 9088 }, { "epoch": 0.7200633788869083, "grad_norm": 1.5719260667691108, "learning_rate": 3.835569644166599e-06, "loss": 0.2513, "step": 9089 }, { "epoch": 0.7201426024955436, "grad_norm": 1.1547441760721175, "learning_rate": 3.833549355014028e-06, "loss": 0.2083, "step": 9090 }, { "epoch": 0.7202218261041791, "grad_norm": 1.4110336340101988, "learning_rate": 3.8315294718925656e-06, "loss": 0.2201, "step": 9091 }, { "epoch": 0.7203010497128144, "grad_norm": 1.598816391263384, "learning_rate": 3.829509994935206e-06, "loss": 0.2865, "step": 9092 }, { "epoch": 0.7203802733214498, "grad_norm": 1.2538371785748803, "learning_rate": 3.827490924274922e-06, "loss": 0.2382, "step": 9093 }, { "epoch": 0.7204594969300852, "grad_norm": 1.0873609452603825, "learning_rate": 3.825472260044658e-06, "loss": 0.1541, "step": 9094 }, { "epoch": 0.7205387205387206, "grad_norm": 1.3573680158424042, "learning_rate": 3.8234540023773385e-06, "loss": 0.2475, "step": 9095 }, { "epoch": 0.7206179441473559, "grad_norm": 1.4837327294446518, "learning_rate": 3.821436151405854e-06, "loss": 0.191, "step": 9096 }, { "epoch": 0.7206971677559912, "grad_norm": 1.6275015565994009, "learning_rate": 3.819418707263065e-06, "loss": 0.3439, "step": 9097 }, { "epoch": 0.7207763913646267, "grad_norm": 1.2794147769301913, "learning_rate": 3.8174016700818196e-06, "loss": 0.1915, "step": 9098 }, { "epoch": 0.720855614973262, "grad_norm": 1.5547963555298354, "learning_rate": 3.815385039994925e-06, "loss": 0.2681, "step": 9099 }, { "epoch": 0.7209348385818974, "grad_norm": 1.3879000668113997, "learning_rate": 3.8133688171351645e-06, "loss": 0.2446, "step": 9100 }, { "epoch": 0.7210140621905328, "grad_norm": 1.1918122028659293, "learning_rate": 3.811353001635302e-06, "loss": 0.2133, "step": 9101 }, { "epoch": 0.7210932857991682, "grad_norm": 0.9975258595949281, "learning_rate": 3.8093375936280665e-06, "loss": 0.1668, "step": 9102 }, { "epoch": 0.7211725094078035, "grad_norm": 1.7044578662922938, "learning_rate": 3.807322593246159e-06, "loss": 0.3496, "step": 9103 }, { "epoch": 0.7212517330164389, "grad_norm": 1.2902476579414237, "learning_rate": 3.805308000622265e-06, "loss": 0.204, "step": 9104 }, { "epoch": 0.7213309566250743, "grad_norm": 1.3657153123355068, "learning_rate": 3.8032938158890333e-06, "loss": 0.2819, "step": 9105 }, { "epoch": 0.7214101802337096, "grad_norm": 1.1325856774703331, "learning_rate": 3.8012800391790814e-06, "loss": 0.163, "step": 9106 }, { "epoch": 0.7214894038423451, "grad_norm": 1.2128913966401265, "learning_rate": 3.799266670625018e-06, "loss": 0.1836, "step": 9107 }, { "epoch": 0.7215686274509804, "grad_norm": 1.06215324418043, "learning_rate": 3.797253710359409e-06, "loss": 0.2255, "step": 9108 }, { "epoch": 0.7216478510596158, "grad_norm": 1.7021288409437525, "learning_rate": 3.7952411585147954e-06, "loss": 0.3169, "step": 9109 }, { "epoch": 0.7217270746682511, "grad_norm": 1.3048481119217095, "learning_rate": 3.793229015223694e-06, "loss": 0.2835, "step": 9110 }, { "epoch": 0.7218062982768865, "grad_norm": 1.155927614584592, "learning_rate": 3.7912172806186e-06, "loss": 0.2117, "step": 9111 }, { "epoch": 0.7218855218855219, "grad_norm": 2.1252991957176386, "learning_rate": 3.7892059548319726e-06, "loss": 0.1649, "step": 9112 }, { "epoch": 0.7219647454941572, "grad_norm": 1.4229190429994243, "learning_rate": 3.7871950379962463e-06, "loss": 0.2586, "step": 9113 }, { "epoch": 0.7220439691027927, "grad_norm": 1.610405328284298, "learning_rate": 3.785184530243835e-06, "loss": 0.3102, "step": 9114 }, { "epoch": 0.722123192711428, "grad_norm": 1.4951300508021137, "learning_rate": 3.7831744317071194e-06, "loss": 0.2944, "step": 9115 }, { "epoch": 0.7222024163200634, "grad_norm": 1.3742903698509503, "learning_rate": 3.7811647425184508e-06, "loss": 0.2977, "step": 9116 }, { "epoch": 0.7222816399286988, "grad_norm": 1.1168992732742293, "learning_rate": 3.7791554628101635e-06, "loss": 0.2328, "step": 9117 }, { "epoch": 0.7223608635373341, "grad_norm": 1.2595954596448162, "learning_rate": 3.777146592714557e-06, "loss": 0.1812, "step": 9118 }, { "epoch": 0.7224400871459695, "grad_norm": 1.4626741318563155, "learning_rate": 3.7751381323639e-06, "loss": 0.2663, "step": 9119 }, { "epoch": 0.7225193107546048, "grad_norm": 1.3626353026404632, "learning_rate": 3.7731300818904494e-06, "loss": 0.2432, "step": 9120 }, { "epoch": 0.7225985343632403, "grad_norm": 1.2176623681197087, "learning_rate": 3.7711224414264216e-06, "loss": 0.2071, "step": 9121 }, { "epoch": 0.7226777579718756, "grad_norm": 1.386022587529955, "learning_rate": 3.7691152111040087e-06, "loss": 0.2965, "step": 9122 }, { "epoch": 0.7227569815805109, "grad_norm": 1.3087252713515691, "learning_rate": 3.767108391055374e-06, "loss": 0.2382, "step": 9123 }, { "epoch": 0.7228362051891464, "grad_norm": 1.2507412328548588, "learning_rate": 3.7651019814126656e-06, "loss": 0.2175, "step": 9124 }, { "epoch": 0.7229154287977817, "grad_norm": 1.496495849835261, "learning_rate": 3.7630959823079914e-06, "loss": 0.2511, "step": 9125 }, { "epoch": 0.7229946524064171, "grad_norm": 1.0128113093324684, "learning_rate": 3.761090393873432e-06, "loss": 0.1491, "step": 9126 }, { "epoch": 0.7230738760150525, "grad_norm": 1.10580729200062, "learning_rate": 3.7590852162410553e-06, "loss": 0.1461, "step": 9127 }, { "epoch": 0.7231530996236879, "grad_norm": 1.1735246489626505, "learning_rate": 3.757080449542887e-06, "loss": 0.2685, "step": 9128 }, { "epoch": 0.7232323232323232, "grad_norm": 1.5722172581561735, "learning_rate": 3.7550760939109287e-06, "loss": 0.3031, "step": 9129 }, { "epoch": 0.7233115468409586, "grad_norm": 1.326354747879238, "learning_rate": 3.7530721494771648e-06, "loss": 0.2596, "step": 9130 }, { "epoch": 0.723390770449594, "grad_norm": 1.4097677300592528, "learning_rate": 3.751068616373541e-06, "loss": 0.2374, "step": 9131 }, { "epoch": 0.7234699940582293, "grad_norm": 1.2765079594111168, "learning_rate": 3.749065494731978e-06, "loss": 0.1791, "step": 9132 }, { "epoch": 0.7235492176668648, "grad_norm": 1.2524215207437652, "learning_rate": 3.747062784684378e-06, "loss": 0.2361, "step": 9133 }, { "epoch": 0.7236284412755001, "grad_norm": 1.496733449333491, "learning_rate": 3.7450604863626063e-06, "loss": 0.2117, "step": 9134 }, { "epoch": 0.7237076648841355, "grad_norm": 1.5781423855302263, "learning_rate": 3.7430585998985004e-06, "loss": 0.3049, "step": 9135 }, { "epoch": 0.7237868884927708, "grad_norm": 1.4988720101411532, "learning_rate": 3.7410571254238835e-06, "loss": 0.2465, "step": 9136 }, { "epoch": 0.7238661121014062, "grad_norm": 1.1902463792591904, "learning_rate": 3.7390560630705387e-06, "loss": 0.2148, "step": 9137 }, { "epoch": 0.7239453357100416, "grad_norm": 1.1679536509076427, "learning_rate": 3.7370554129702265e-06, "loss": 0.1793, "step": 9138 }, { "epoch": 0.7240245593186769, "grad_norm": 1.372903686651834, "learning_rate": 3.735055175254676e-06, "loss": 0.2671, "step": 9139 }, { "epoch": 0.7241037829273124, "grad_norm": 1.2645719699218168, "learning_rate": 3.733055350055601e-06, "loss": 0.2085, "step": 9140 }, { "epoch": 0.7241830065359477, "grad_norm": 1.2307984472419495, "learning_rate": 3.7310559375046774e-06, "loss": 0.2338, "step": 9141 }, { "epoch": 0.7242622301445831, "grad_norm": 0.9072967688136768, "learning_rate": 3.7290569377335517e-06, "loss": 0.1457, "step": 9142 }, { "epoch": 0.7243414537532185, "grad_norm": 1.312261910175203, "learning_rate": 3.7270583508738565e-06, "loss": 0.2024, "step": 9143 }, { "epoch": 0.7244206773618538, "grad_norm": 1.4524428509186667, "learning_rate": 3.725060177057185e-06, "loss": 0.3443, "step": 9144 }, { "epoch": 0.7244999009704892, "grad_norm": 1.561340853007366, "learning_rate": 3.723062416415105e-06, "loss": 0.266, "step": 9145 }, { "epoch": 0.7245791245791245, "grad_norm": 1.169470448614544, "learning_rate": 3.721065069079165e-06, "loss": 0.1993, "step": 9146 }, { "epoch": 0.72465834818776, "grad_norm": 1.3998710336942795, "learning_rate": 3.7190681351808778e-06, "loss": 0.2344, "step": 9147 }, { "epoch": 0.7247375717963953, "grad_norm": 1.8655369572716267, "learning_rate": 3.7170716148517294e-06, "loss": 0.279, "step": 9148 }, { "epoch": 0.7248167954050307, "grad_norm": 1.1494656495790188, "learning_rate": 3.715075508223187e-06, "loss": 0.1921, "step": 9149 }, { "epoch": 0.7248960190136661, "grad_norm": 1.2025807824181314, "learning_rate": 3.71307981542668e-06, "loss": 0.2421, "step": 9150 }, { "epoch": 0.7249752426223014, "grad_norm": 1.497224958482197, "learning_rate": 3.7110845365936144e-06, "loss": 0.1951, "step": 9151 }, { "epoch": 0.7250544662309368, "grad_norm": 1.2831204237393163, "learning_rate": 3.709089671855378e-06, "loss": 0.2439, "step": 9152 }, { "epoch": 0.7251336898395722, "grad_norm": 1.1272935862878195, "learning_rate": 3.707095221343313e-06, "loss": 0.1831, "step": 9153 }, { "epoch": 0.7252129134482076, "grad_norm": 1.3080852736183557, "learning_rate": 3.7051011851887455e-06, "loss": 0.2622, "step": 9154 }, { "epoch": 0.7252921370568429, "grad_norm": 1.4049070830851684, "learning_rate": 3.7031075635229787e-06, "loss": 0.2894, "step": 9155 }, { "epoch": 0.7253713606654784, "grad_norm": 1.3485045411849925, "learning_rate": 3.70111435647728e-06, "loss": 0.18, "step": 9156 }, { "epoch": 0.7254505842741137, "grad_norm": 1.1033833265399628, "learning_rate": 3.6991215641828903e-06, "loss": 0.2039, "step": 9157 }, { "epoch": 0.725529807882749, "grad_norm": 1.8296198157336274, "learning_rate": 3.6971291867710303e-06, "loss": 0.3798, "step": 9158 }, { "epoch": 0.7256090314913844, "grad_norm": 1.062518835043534, "learning_rate": 3.6951372243728854e-06, "loss": 0.1571, "step": 9159 }, { "epoch": 0.7256882551000198, "grad_norm": 1.48299692990113, "learning_rate": 3.693145677119615e-06, "loss": 0.2854, "step": 9160 }, { "epoch": 0.7257674787086552, "grad_norm": 1.6220806568353399, "learning_rate": 3.691154545142357e-06, "loss": 0.3071, "step": 9161 }, { "epoch": 0.7258467023172905, "grad_norm": 1.5203522106123646, "learning_rate": 3.6891638285722176e-06, "loss": 0.227, "step": 9162 }, { "epoch": 0.725925925925926, "grad_norm": 1.328898106537588, "learning_rate": 3.687173527540273e-06, "loss": 0.279, "step": 9163 }, { "epoch": 0.7260051495345613, "grad_norm": 1.7248760988692442, "learning_rate": 3.6851836421775733e-06, "loss": 0.3089, "step": 9164 }, { "epoch": 0.7260843731431966, "grad_norm": 1.310575111967257, "learning_rate": 3.683194172615149e-06, "loss": 0.249, "step": 9165 }, { "epoch": 0.7261635967518321, "grad_norm": 1.2477260115881732, "learning_rate": 3.681205118983995e-06, "loss": 0.2313, "step": 9166 }, { "epoch": 0.7262428203604674, "grad_norm": 1.213549402713103, "learning_rate": 3.6792164814150756e-06, "loss": 0.1836, "step": 9167 }, { "epoch": 0.7263220439691028, "grad_norm": 1.1400004162296868, "learning_rate": 3.6772282600393393e-06, "loss": 0.1793, "step": 9168 }, { "epoch": 0.7264012675777382, "grad_norm": 1.2709422135491037, "learning_rate": 3.675240454987701e-06, "loss": 0.1752, "step": 9169 }, { "epoch": 0.7264804911863736, "grad_norm": 1.4090124291424384, "learning_rate": 3.6732530663910415e-06, "loss": 0.1986, "step": 9170 }, { "epoch": 0.7265597147950089, "grad_norm": 1.1188457934832954, "learning_rate": 3.6712660943802292e-06, "loss": 0.1625, "step": 9171 }, { "epoch": 0.7266389384036442, "grad_norm": 1.316698601066028, "learning_rate": 3.6692795390860913e-06, "loss": 0.2277, "step": 9172 }, { "epoch": 0.7267181620122797, "grad_norm": 1.3798962236311325, "learning_rate": 3.667293400639432e-06, "loss": 0.2361, "step": 9173 }, { "epoch": 0.726797385620915, "grad_norm": 1.1470900392120604, "learning_rate": 3.665307679171034e-06, "loss": 0.1955, "step": 9174 }, { "epoch": 0.7268766092295504, "grad_norm": 1.1680899340665103, "learning_rate": 3.6633223748116454e-06, "loss": 0.2032, "step": 9175 }, { "epoch": 0.7269558328381858, "grad_norm": 1.5435944484658588, "learning_rate": 3.661337487691985e-06, "loss": 0.267, "step": 9176 }, { "epoch": 0.7270350564468212, "grad_norm": 1.3897853506991977, "learning_rate": 3.659353017942754e-06, "loss": 0.3199, "step": 9177 }, { "epoch": 0.7271142800554565, "grad_norm": 1.368592942867567, "learning_rate": 3.6573689656946177e-06, "loss": 0.2274, "step": 9178 }, { "epoch": 0.7271935036640919, "grad_norm": 1.1122670078375376, "learning_rate": 3.655385331078217e-06, "loss": 0.1743, "step": 9179 }, { "epoch": 0.7272727272727273, "grad_norm": 1.4071660036217781, "learning_rate": 3.6534021142241595e-06, "loss": 0.2613, "step": 9180 }, { "epoch": 0.7273519508813626, "grad_norm": 1.8890939266097444, "learning_rate": 3.6514193152630382e-06, "loss": 0.272, "step": 9181 }, { "epoch": 0.7274311744899981, "grad_norm": 1.3160664503159987, "learning_rate": 3.649436934325409e-06, "loss": 0.2217, "step": 9182 }, { "epoch": 0.7275103980986334, "grad_norm": 1.0103078103482206, "learning_rate": 3.647454971541796e-06, "loss": 0.2157, "step": 9183 }, { "epoch": 0.7275896217072688, "grad_norm": 0.8574222887958896, "learning_rate": 3.6454734270427107e-06, "loss": 0.1131, "step": 9184 }, { "epoch": 0.7276688453159041, "grad_norm": 1.5113481448845036, "learning_rate": 3.6434923009586244e-06, "loss": 0.2745, "step": 9185 }, { "epoch": 0.7277480689245395, "grad_norm": 1.2424786346479255, "learning_rate": 3.6415115934199795e-06, "loss": 0.2065, "step": 9186 }, { "epoch": 0.7278272925331749, "grad_norm": 1.5154489509773454, "learning_rate": 3.6395313045572055e-06, "loss": 0.2691, "step": 9187 }, { "epoch": 0.7279065161418102, "grad_norm": 1.2871245413142585, "learning_rate": 3.6375514345006913e-06, "loss": 0.2071, "step": 9188 }, { "epoch": 0.7279857397504457, "grad_norm": 1.2526553455170215, "learning_rate": 3.635571983380797e-06, "loss": 0.1724, "step": 9189 }, { "epoch": 0.728064963359081, "grad_norm": 1.5383174550681595, "learning_rate": 3.6335929513278667e-06, "loss": 0.2606, "step": 9190 }, { "epoch": 0.7281441869677164, "grad_norm": 1.124724780323608, "learning_rate": 3.631614338472208e-06, "loss": 0.2089, "step": 9191 }, { "epoch": 0.7282234105763518, "grad_norm": 1.1423749369986838, "learning_rate": 3.6296361449440985e-06, "loss": 0.2272, "step": 9192 }, { "epoch": 0.7283026341849871, "grad_norm": 1.3092976939433734, "learning_rate": 3.6276583708738013e-06, "loss": 0.2267, "step": 9193 }, { "epoch": 0.7283818577936225, "grad_norm": 1.2717162872533698, "learning_rate": 3.6256810163915368e-06, "loss": 0.2111, "step": 9194 }, { "epoch": 0.7284610814022578, "grad_norm": 1.294649018524043, "learning_rate": 3.623704081627507e-06, "loss": 0.2673, "step": 9195 }, { "epoch": 0.7285403050108933, "grad_norm": 1.1352388942796117, "learning_rate": 3.62172756671188e-06, "loss": 0.1596, "step": 9196 }, { "epoch": 0.7286195286195286, "grad_norm": 1.3163293480876854, "learning_rate": 3.619751471774805e-06, "loss": 0.2469, "step": 9197 }, { "epoch": 0.728698752228164, "grad_norm": 1.1337940027698996, "learning_rate": 3.6177757969463956e-06, "loss": 0.2014, "step": 9198 }, { "epoch": 0.7287779758367994, "grad_norm": 1.317877051692378, "learning_rate": 3.615800542356738e-06, "loss": 0.1979, "step": 9199 }, { "epoch": 0.7288571994454347, "grad_norm": 1.2159449622660556, "learning_rate": 3.6138257081358985e-06, "loss": 0.2114, "step": 9200 }, { "epoch": 0.7289364230540701, "grad_norm": 1.298740027294028, "learning_rate": 3.6118512944139084e-06, "loss": 0.2574, "step": 9201 }, { "epoch": 0.7290156466627055, "grad_norm": 1.4551696096768028, "learning_rate": 3.609877301320769e-06, "loss": 0.3134, "step": 9202 }, { "epoch": 0.7290948702713409, "grad_norm": 1.5053194288121399, "learning_rate": 3.607903728986465e-06, "loss": 0.2715, "step": 9203 }, { "epoch": 0.7291740938799762, "grad_norm": 1.5620221337434352, "learning_rate": 3.6059305775409435e-06, "loss": 0.3126, "step": 9204 }, { "epoch": 0.7292533174886116, "grad_norm": 1.4039005881959574, "learning_rate": 3.6039578471141244e-06, "loss": 0.2692, "step": 9205 }, { "epoch": 0.729332541097247, "grad_norm": 1.4417294694543692, "learning_rate": 3.6019855378359092e-06, "loss": 0.263, "step": 9206 }, { "epoch": 0.7294117647058823, "grad_norm": 0.9734123703556665, "learning_rate": 3.6000136498361605e-06, "loss": 0.1242, "step": 9207 }, { "epoch": 0.7294909883145178, "grad_norm": 0.7478774499811458, "learning_rate": 3.5980421832447188e-06, "loss": 0.1154, "step": 9208 }, { "epoch": 0.7295702119231531, "grad_norm": 1.3087079723772048, "learning_rate": 3.5960711381913904e-06, "loss": 0.2268, "step": 9209 }, { "epoch": 0.7296494355317885, "grad_norm": 1.3090031073400228, "learning_rate": 3.5941005148059684e-06, "loss": 0.2945, "step": 9210 }, { "epoch": 0.7297286591404238, "grad_norm": 1.2752879997082822, "learning_rate": 3.5921303132182038e-06, "loss": 0.2515, "step": 9211 }, { "epoch": 0.7298078827490592, "grad_norm": 1.2470509100316363, "learning_rate": 3.5901605335578214e-06, "loss": 0.2622, "step": 9212 }, { "epoch": 0.7298871063576946, "grad_norm": 1.3744644439824854, "learning_rate": 3.5881911759545296e-06, "loss": 0.2886, "step": 9213 }, { "epoch": 0.7299663299663299, "grad_norm": 0.9697025603182141, "learning_rate": 3.5862222405379975e-06, "loss": 0.1375, "step": 9214 }, { "epoch": 0.7300455535749654, "grad_norm": 1.4726527285422166, "learning_rate": 3.584253727437866e-06, "loss": 0.2277, "step": 9215 }, { "epoch": 0.7301247771836007, "grad_norm": 1.2100584204271536, "learning_rate": 3.5822856367837587e-06, "loss": 0.2463, "step": 9216 }, { "epoch": 0.7302040007922361, "grad_norm": 1.280314552776881, "learning_rate": 3.5803179687052636e-06, "loss": 0.2011, "step": 9217 }, { "epoch": 0.7302832244008715, "grad_norm": 1.2978310272709583, "learning_rate": 3.578350723331937e-06, "loss": 0.1776, "step": 9218 }, { "epoch": 0.7303624480095068, "grad_norm": 1.481709808646171, "learning_rate": 3.5763839007933186e-06, "loss": 0.3289, "step": 9219 }, { "epoch": 0.7304416716181422, "grad_norm": 1.193720926516714, "learning_rate": 3.574417501218913e-06, "loss": 0.2148, "step": 9220 }, { "epoch": 0.7305208952267775, "grad_norm": 1.6336905063296179, "learning_rate": 3.572451524738193e-06, "loss": 0.3305, "step": 9221 }, { "epoch": 0.730600118835413, "grad_norm": 1.1928713445998316, "learning_rate": 3.5704859714806162e-06, "loss": 0.2324, "step": 9222 }, { "epoch": 0.7306793424440483, "grad_norm": 1.367042494144639, "learning_rate": 3.568520841575601e-06, "loss": 0.2348, "step": 9223 }, { "epoch": 0.7307585660526837, "grad_norm": 1.2958420252155944, "learning_rate": 3.5665561351525423e-06, "loss": 0.2418, "step": 9224 }, { "epoch": 0.7308377896613191, "grad_norm": 1.351444965577694, "learning_rate": 3.564591852340803e-06, "loss": 0.2164, "step": 9225 }, { "epoch": 0.7309170132699544, "grad_norm": 1.3836486065638958, "learning_rate": 3.562627993269728e-06, "loss": 0.1892, "step": 9226 }, { "epoch": 0.7309962368785898, "grad_norm": 1.3952793992642734, "learning_rate": 3.5606645580686262e-06, "loss": 0.2695, "step": 9227 }, { "epoch": 0.7310754604872252, "grad_norm": 1.2718676658557244, "learning_rate": 3.558701546866775e-06, "loss": 0.2371, "step": 9228 }, { "epoch": 0.7311546840958606, "grad_norm": 1.239544096955322, "learning_rate": 3.5567389597934367e-06, "loss": 0.2413, "step": 9229 }, { "epoch": 0.7312339077044959, "grad_norm": 1.3831662880171955, "learning_rate": 3.5547767969778355e-06, "loss": 0.1706, "step": 9230 }, { "epoch": 0.7313131313131314, "grad_norm": 1.497345554527076, "learning_rate": 3.5528150585491695e-06, "loss": 0.3261, "step": 9231 }, { "epoch": 0.7313923549217667, "grad_norm": 1.5860677089133086, "learning_rate": 3.5508537446366097e-06, "loss": 0.2808, "step": 9232 }, { "epoch": 0.731471578530402, "grad_norm": 1.395644138994821, "learning_rate": 3.548892855369299e-06, "loss": 0.2719, "step": 9233 }, { "epoch": 0.7315508021390374, "grad_norm": 1.3163582283306305, "learning_rate": 3.5469323908763507e-06, "loss": 0.1699, "step": 9234 }, { "epoch": 0.7316300257476728, "grad_norm": 1.3470163291986994, "learning_rate": 3.544972351286857e-06, "loss": 0.272, "step": 9235 }, { "epoch": 0.7317092493563082, "grad_norm": 1.2846938658523657, "learning_rate": 3.543012736729875e-06, "loss": 0.2143, "step": 9236 }, { "epoch": 0.7317884729649435, "grad_norm": 1.4181215349696352, "learning_rate": 3.541053547334431e-06, "loss": 0.1985, "step": 9237 }, { "epoch": 0.731867696573579, "grad_norm": 1.2393105836174212, "learning_rate": 3.5390947832295366e-06, "loss": 0.2, "step": 9238 }, { "epoch": 0.7319469201822143, "grad_norm": 1.4206063640871278, "learning_rate": 3.5371364445441624e-06, "loss": 0.2596, "step": 9239 }, { "epoch": 0.7320261437908496, "grad_norm": 1.12714059429672, "learning_rate": 3.535178531407253e-06, "loss": 0.2472, "step": 9240 }, { "epoch": 0.7321053673994851, "grad_norm": 1.1754069399453526, "learning_rate": 3.5332210439477334e-06, "loss": 0.2222, "step": 9241 }, { "epoch": 0.7321845910081204, "grad_norm": 1.1840809153633325, "learning_rate": 3.5312639822944917e-06, "loss": 0.1683, "step": 9242 }, { "epoch": 0.7322638146167558, "grad_norm": 1.2364644998397787, "learning_rate": 3.529307346576388e-06, "loss": 0.2998, "step": 9243 }, { "epoch": 0.7323430382253912, "grad_norm": 1.4977267706621402, "learning_rate": 3.527351136922265e-06, "loss": 0.2293, "step": 9244 }, { "epoch": 0.7324222618340266, "grad_norm": 1.598424269754826, "learning_rate": 3.525395353460924e-06, "loss": 0.2092, "step": 9245 }, { "epoch": 0.7325014854426619, "grad_norm": 1.0762020920966138, "learning_rate": 3.5234399963211418e-06, "loss": 0.172, "step": 9246 }, { "epoch": 0.7325807090512972, "grad_norm": 1.1283011982714446, "learning_rate": 3.521485065631677e-06, "loss": 0.1973, "step": 9247 }, { "epoch": 0.7326599326599327, "grad_norm": 1.3472524578459015, "learning_rate": 3.5195305615212473e-06, "loss": 0.22, "step": 9248 }, { "epoch": 0.732739156268568, "grad_norm": 1.2031077235456313, "learning_rate": 3.517576484118549e-06, "loss": 0.1709, "step": 9249 }, { "epoch": 0.7328183798772034, "grad_norm": 1.3521594777320847, "learning_rate": 3.5156228335522434e-06, "loss": 0.2566, "step": 9250 }, { "epoch": 0.7328976034858388, "grad_norm": 1.4394382310170906, "learning_rate": 3.513669609950977e-06, "loss": 0.2657, "step": 9251 }, { "epoch": 0.7329768270944742, "grad_norm": 1.0113890499781821, "learning_rate": 3.5117168134433566e-06, "loss": 0.2191, "step": 9252 }, { "epoch": 0.7330560507031095, "grad_norm": 1.7668251346988608, "learning_rate": 3.5097644441579602e-06, "loss": 0.3022, "step": 9253 }, { "epoch": 0.7331352743117449, "grad_norm": 1.3264919356428448, "learning_rate": 3.507812502223351e-06, "loss": 0.2632, "step": 9254 }, { "epoch": 0.7332144979203803, "grad_norm": 1.3202488044573206, "learning_rate": 3.5058609877680495e-06, "loss": 0.2346, "step": 9255 }, { "epoch": 0.7332937215290156, "grad_norm": 1.249267761519028, "learning_rate": 3.5039099009205503e-06, "loss": 0.177, "step": 9256 }, { "epoch": 0.7333729451376511, "grad_norm": 1.0385498589178, "learning_rate": 3.5019592418093306e-06, "loss": 0.1958, "step": 9257 }, { "epoch": 0.7334521687462864, "grad_norm": 1.7322886298834497, "learning_rate": 3.5000090105628282e-06, "loss": 0.2548, "step": 9258 }, { "epoch": 0.7335313923549218, "grad_norm": 1.6212657464488363, "learning_rate": 3.4980592073094533e-06, "loss": 0.3126, "step": 9259 }, { "epoch": 0.7336106159635571, "grad_norm": 1.232431030794991, "learning_rate": 3.4961098321775978e-06, "loss": 0.2031, "step": 9260 }, { "epoch": 0.7336898395721925, "grad_norm": 1.210809283044841, "learning_rate": 3.4941608852956143e-06, "loss": 0.2083, "step": 9261 }, { "epoch": 0.7337690631808279, "grad_norm": 1.4815542113270626, "learning_rate": 3.4922123667918305e-06, "loss": 0.2493, "step": 9262 }, { "epoch": 0.7338482867894632, "grad_norm": 1.370812047528853, "learning_rate": 3.4902642767945506e-06, "loss": 0.2685, "step": 9263 }, { "epoch": 0.7339275103980987, "grad_norm": 1.307021871133628, "learning_rate": 3.488316615432047e-06, "loss": 0.2045, "step": 9264 }, { "epoch": 0.734006734006734, "grad_norm": 1.3912112571895232, "learning_rate": 3.486369382832561e-06, "loss": 0.2461, "step": 9265 }, { "epoch": 0.7340859576153694, "grad_norm": 1.6418173241747325, "learning_rate": 3.484422579124306e-06, "loss": 0.3147, "step": 9266 }, { "epoch": 0.7341651812240048, "grad_norm": 1.3062255568355514, "learning_rate": 3.4824762044354763e-06, "loss": 0.2357, "step": 9267 }, { "epoch": 0.7342444048326401, "grad_norm": 1.5047478729694654, "learning_rate": 3.480530258894229e-06, "loss": 0.3066, "step": 9268 }, { "epoch": 0.7343236284412755, "grad_norm": 1.2897483515247474, "learning_rate": 3.478584742628691e-06, "loss": 0.2355, "step": 9269 }, { "epoch": 0.7344028520499108, "grad_norm": 1.4894731624445268, "learning_rate": 3.4766396557669712e-06, "loss": 0.2566, "step": 9270 }, { "epoch": 0.7344820756585463, "grad_norm": 1.5329417592975518, "learning_rate": 3.4746949984371425e-06, "loss": 0.2657, "step": 9271 }, { "epoch": 0.7345612992671816, "grad_norm": 1.451594454701546, "learning_rate": 3.472750770767247e-06, "loss": 0.2656, "step": 9272 }, { "epoch": 0.734640522875817, "grad_norm": 1.3640996989805043, "learning_rate": 3.470806972885309e-06, "loss": 0.2252, "step": 9273 }, { "epoch": 0.7347197464844524, "grad_norm": 1.2277469136655474, "learning_rate": 3.468863604919316e-06, "loss": 0.17, "step": 9274 }, { "epoch": 0.7347989700930877, "grad_norm": 1.4960432655706746, "learning_rate": 3.4669206669972254e-06, "loss": 0.2332, "step": 9275 }, { "epoch": 0.7348781937017231, "grad_norm": 1.2216736409084543, "learning_rate": 3.4649781592469765e-06, "loss": 0.1903, "step": 9276 }, { "epoch": 0.7349574173103585, "grad_norm": 1.44950255840134, "learning_rate": 3.4630360817964715e-06, "loss": 0.3536, "step": 9277 }, { "epoch": 0.7350366409189939, "grad_norm": 1.3600526631535825, "learning_rate": 3.4610944347735864e-06, "loss": 0.2556, "step": 9278 }, { "epoch": 0.7351158645276292, "grad_norm": 1.379654388160424, "learning_rate": 3.459153218306167e-06, "loss": 0.2525, "step": 9279 }, { "epoch": 0.7351950881362646, "grad_norm": 1.3443176104254027, "learning_rate": 3.457212432522038e-06, "loss": 0.2215, "step": 9280 }, { "epoch": 0.7352743117449, "grad_norm": 1.491289938673264, "learning_rate": 3.455272077548989e-06, "loss": 0.3319, "step": 9281 }, { "epoch": 0.7353535353535353, "grad_norm": 1.3342119726559776, "learning_rate": 3.453332153514779e-06, "loss": 0.2904, "step": 9282 }, { "epoch": 0.7354327589621708, "grad_norm": 1.4302412012794026, "learning_rate": 3.4513926605471504e-06, "loss": 0.2005, "step": 9283 }, { "epoch": 0.7355119825708061, "grad_norm": 1.1844039876753532, "learning_rate": 3.449453598773804e-06, "loss": 0.1767, "step": 9284 }, { "epoch": 0.7355912061794415, "grad_norm": 1.4912406308192818, "learning_rate": 3.4475149683224164e-06, "loss": 0.3269, "step": 9285 }, { "epoch": 0.7356704297880768, "grad_norm": 1.177134188332753, "learning_rate": 3.445576769320642e-06, "loss": 0.2069, "step": 9286 }, { "epoch": 0.7357496533967122, "grad_norm": 1.3052468325488955, "learning_rate": 3.4436390018960997e-06, "loss": 0.2515, "step": 9287 }, { "epoch": 0.7358288770053476, "grad_norm": 1.276417938819899, "learning_rate": 3.4417016661763793e-06, "loss": 0.2371, "step": 9288 }, { "epoch": 0.7359081006139829, "grad_norm": 1.1018211843230064, "learning_rate": 3.439764762289051e-06, "loss": 0.1982, "step": 9289 }, { "epoch": 0.7359873242226184, "grad_norm": 1.3027848068597259, "learning_rate": 3.4378282903616457e-06, "loss": 0.2057, "step": 9290 }, { "epoch": 0.7360665478312537, "grad_norm": 1.4328870946841892, "learning_rate": 3.4358922505216707e-06, "loss": 0.306, "step": 9291 }, { "epoch": 0.7361457714398891, "grad_norm": 0.9964209043201193, "learning_rate": 3.4339566428966086e-06, "loss": 0.1596, "step": 9292 }, { "epoch": 0.7362249950485245, "grad_norm": 1.2152756409676986, "learning_rate": 3.4320214676139087e-06, "loss": 0.2524, "step": 9293 }, { "epoch": 0.7363042186571598, "grad_norm": 1.6232576532946363, "learning_rate": 3.4300867248009917e-06, "loss": 0.278, "step": 9294 }, { "epoch": 0.7363834422657952, "grad_norm": 1.5540840955000574, "learning_rate": 3.4281524145852485e-06, "loss": 0.2511, "step": 9295 }, { "epoch": 0.7364626658744305, "grad_norm": 1.3783879673009558, "learning_rate": 3.4262185370940504e-06, "loss": 0.2009, "step": 9296 }, { "epoch": 0.736541889483066, "grad_norm": 1.0583357887529627, "learning_rate": 3.4242850924547297e-06, "loss": 0.149, "step": 9297 }, { "epoch": 0.7366211130917013, "grad_norm": 1.4592309555612877, "learning_rate": 3.422352080794593e-06, "loss": 0.2032, "step": 9298 }, { "epoch": 0.7367003367003367, "grad_norm": 1.5684020107815297, "learning_rate": 3.4204195022409247e-06, "loss": 0.2315, "step": 9299 }, { "epoch": 0.7367795603089721, "grad_norm": 1.617297646162543, "learning_rate": 3.418487356920974e-06, "loss": 0.2939, "step": 9300 }, { "epoch": 0.7368587839176074, "grad_norm": 1.183777979118823, "learning_rate": 3.4165556449619584e-06, "loss": 0.185, "step": 9301 }, { "epoch": 0.7369380075262428, "grad_norm": 1.7859773945344035, "learning_rate": 3.4146243664910804e-06, "loss": 0.2041, "step": 9302 }, { "epoch": 0.7370172311348782, "grad_norm": 1.005660326886881, "learning_rate": 3.4126935216355005e-06, "loss": 0.1418, "step": 9303 }, { "epoch": 0.7370964547435136, "grad_norm": 1.3574530398897882, "learning_rate": 3.4107631105223528e-06, "loss": 0.2562, "step": 9304 }, { "epoch": 0.7371756783521489, "grad_norm": 1.4927142668338989, "learning_rate": 3.4088331332787527e-06, "loss": 0.2526, "step": 9305 }, { "epoch": 0.7372549019607844, "grad_norm": 1.2867817885613204, "learning_rate": 3.406903590031776e-06, "loss": 0.2534, "step": 9306 }, { "epoch": 0.7373341255694197, "grad_norm": 1.176489627015894, "learning_rate": 3.4049744809084697e-06, "loss": 0.1957, "step": 9307 }, { "epoch": 0.737413349178055, "grad_norm": 1.1926070072609174, "learning_rate": 3.4030458060358682e-06, "loss": 0.2039, "step": 9308 }, { "epoch": 0.7374925727866904, "grad_norm": 1.3437740891606778, "learning_rate": 3.4011175655409546e-06, "loss": 0.2742, "step": 9309 }, { "epoch": 0.7375717963953258, "grad_norm": 1.118499232921844, "learning_rate": 3.399189759550694e-06, "loss": 0.194, "step": 9310 }, { "epoch": 0.7376510200039612, "grad_norm": 1.299508216593726, "learning_rate": 3.3972623881920296e-06, "loss": 0.2468, "step": 9311 }, { "epoch": 0.7377302436125965, "grad_norm": 1.190980108520649, "learning_rate": 3.3953354515918667e-06, "loss": 0.2117, "step": 9312 }, { "epoch": 0.737809467221232, "grad_norm": 1.1517214436632648, "learning_rate": 3.3934089498770816e-06, "loss": 0.2051, "step": 9313 }, { "epoch": 0.7378886908298673, "grad_norm": 1.4131190686879942, "learning_rate": 3.3914828831745306e-06, "loss": 0.2081, "step": 9314 }, { "epoch": 0.7379679144385026, "grad_norm": 1.305668698482543, "learning_rate": 3.3895572516110353e-06, "loss": 0.2618, "step": 9315 }, { "epoch": 0.7380471380471381, "grad_norm": 1.4289927646275424, "learning_rate": 3.3876320553133834e-06, "loss": 0.2351, "step": 9316 }, { "epoch": 0.7381263616557734, "grad_norm": 1.2561146489296369, "learning_rate": 3.385707294408347e-06, "loss": 0.2313, "step": 9317 }, { "epoch": 0.7382055852644088, "grad_norm": 1.2874340522406893, "learning_rate": 3.38378296902266e-06, "loss": 0.2394, "step": 9318 }, { "epoch": 0.7382848088730442, "grad_norm": 1.105598495541412, "learning_rate": 3.3818590792830285e-06, "loss": 0.1684, "step": 9319 }, { "epoch": 0.7383640324816796, "grad_norm": 1.6062150899695402, "learning_rate": 3.3799356253161288e-06, "loss": 0.2915, "step": 9320 }, { "epoch": 0.7384432560903149, "grad_norm": 1.3679716411851794, "learning_rate": 3.3780126072486188e-06, "loss": 0.22, "step": 9321 }, { "epoch": 0.7385224796989502, "grad_norm": 1.5656838834204838, "learning_rate": 3.376090025207115e-06, "loss": 0.2978, "step": 9322 }, { "epoch": 0.7386017033075857, "grad_norm": 1.358561615519632, "learning_rate": 3.3741678793182077e-06, "loss": 0.2135, "step": 9323 }, { "epoch": 0.738680926916221, "grad_norm": 1.116543505132853, "learning_rate": 3.372246169708466e-06, "loss": 0.1902, "step": 9324 }, { "epoch": 0.7387601505248564, "grad_norm": 1.19632013296997, "learning_rate": 3.3703248965044253e-06, "loss": 0.2075, "step": 9325 }, { "epoch": 0.7388393741334918, "grad_norm": 1.675068129797508, "learning_rate": 3.368404059832586e-06, "loss": 0.3717, "step": 9326 }, { "epoch": 0.7389185977421272, "grad_norm": 1.174493597031542, "learning_rate": 3.366483659819434e-06, "loss": 0.2151, "step": 9327 }, { "epoch": 0.7389978213507625, "grad_norm": 1.3595132861478358, "learning_rate": 3.364563696591414e-06, "loss": 0.2324, "step": 9328 }, { "epoch": 0.7390770449593979, "grad_norm": 1.0135582023985867, "learning_rate": 3.3626441702749436e-06, "loss": 0.1709, "step": 9329 }, { "epoch": 0.7391562685680333, "grad_norm": 1.2881326987708404, "learning_rate": 3.360725080996421e-06, "loss": 0.1937, "step": 9330 }, { "epoch": 0.7392354921766686, "grad_norm": 1.8125928481184876, "learning_rate": 3.3588064288822055e-06, "loss": 0.3408, "step": 9331 }, { "epoch": 0.739314715785304, "grad_norm": 1.608115395275425, "learning_rate": 3.356888214058629e-06, "loss": 0.2227, "step": 9332 }, { "epoch": 0.7393939393939394, "grad_norm": 1.7541517618321727, "learning_rate": 3.354970436652001e-06, "loss": 0.292, "step": 9333 }, { "epoch": 0.7394731630025748, "grad_norm": 1.1438638487660662, "learning_rate": 3.3530530967885964e-06, "loss": 0.2032, "step": 9334 }, { "epoch": 0.7395523866112101, "grad_norm": 1.0269616738925067, "learning_rate": 3.351136194594662e-06, "loss": 0.1253, "step": 9335 }, { "epoch": 0.7396316102198455, "grad_norm": 1.3222051669189687, "learning_rate": 3.3492197301964145e-06, "loss": 0.1968, "step": 9336 }, { "epoch": 0.7397108338284809, "grad_norm": 1.6246901847849982, "learning_rate": 3.3473037037200484e-06, "loss": 0.3324, "step": 9337 }, { "epoch": 0.7397900574371162, "grad_norm": 1.4138539785306259, "learning_rate": 3.345388115291723e-06, "loss": 0.2502, "step": 9338 }, { "epoch": 0.7398692810457517, "grad_norm": 1.3342604312615531, "learning_rate": 3.3434729650375675e-06, "loss": 0.2127, "step": 9339 }, { "epoch": 0.739948504654387, "grad_norm": 1.3255346940347943, "learning_rate": 3.341558253083692e-06, "loss": 0.2182, "step": 9340 }, { "epoch": 0.7400277282630224, "grad_norm": 1.5704784999120203, "learning_rate": 3.3396439795561662e-06, "loss": 0.3077, "step": 9341 }, { "epoch": 0.7401069518716578, "grad_norm": 1.350155520054778, "learning_rate": 3.3377301445810327e-06, "loss": 0.1995, "step": 9342 }, { "epoch": 0.7401861754802931, "grad_norm": 1.4020594977913219, "learning_rate": 3.3358167482843173e-06, "loss": 0.2596, "step": 9343 }, { "epoch": 0.7402653990889285, "grad_norm": 1.2113602218994153, "learning_rate": 3.3339037907920024e-06, "loss": 0.1901, "step": 9344 }, { "epoch": 0.7403446226975638, "grad_norm": 1.312382290869686, "learning_rate": 3.331991272230044e-06, "loss": 0.1848, "step": 9345 }, { "epoch": 0.7404238463061993, "grad_norm": 1.4020087520398796, "learning_rate": 3.330079192724379e-06, "loss": 0.2452, "step": 9346 }, { "epoch": 0.7405030699148346, "grad_norm": 1.503886557144299, "learning_rate": 3.328167552400906e-06, "loss": 0.3149, "step": 9347 }, { "epoch": 0.74058229352347, "grad_norm": 1.390393847020418, "learning_rate": 3.326256351385494e-06, "loss": 0.1894, "step": 9348 }, { "epoch": 0.7406615171321054, "grad_norm": 1.204952612826259, "learning_rate": 3.324345589803991e-06, "loss": 0.1679, "step": 9349 }, { "epoch": 0.7407407407407407, "grad_norm": 1.102868956440528, "learning_rate": 3.3224352677822115e-06, "loss": 0.1637, "step": 9350 }, { "epoch": 0.7408199643493761, "grad_norm": 1.3170442454502893, "learning_rate": 3.3205253854459386e-06, "loss": 0.1963, "step": 9351 }, { "epoch": 0.7408991879580115, "grad_norm": 1.7118820201009344, "learning_rate": 3.3186159429209263e-06, "loss": 0.2487, "step": 9352 }, { "epoch": 0.7409784115666469, "grad_norm": 1.3228384432786102, "learning_rate": 3.316706940332908e-06, "loss": 0.2282, "step": 9353 }, { "epoch": 0.7410576351752822, "grad_norm": 1.2812824330023773, "learning_rate": 3.314798377807581e-06, "loss": 0.2276, "step": 9354 }, { "epoch": 0.7411368587839177, "grad_norm": 1.4483446977021537, "learning_rate": 3.312890255470609e-06, "loss": 0.2815, "step": 9355 }, { "epoch": 0.741216082392553, "grad_norm": 1.1402000416932498, "learning_rate": 3.3109825734476407e-06, "loss": 0.2162, "step": 9356 }, { "epoch": 0.7412953060011883, "grad_norm": 1.1035150403036862, "learning_rate": 3.3090753318642855e-06, "loss": 0.1856, "step": 9357 }, { "epoch": 0.7413745296098238, "grad_norm": 1.270229413518336, "learning_rate": 3.307168530846121e-06, "loss": 0.1862, "step": 9358 }, { "epoch": 0.7414537532184591, "grad_norm": 1.3788715034941004, "learning_rate": 3.3052621705187083e-06, "loss": 0.2669, "step": 9359 }, { "epoch": 0.7415329768270945, "grad_norm": 1.5985590459322399, "learning_rate": 3.303356251007569e-06, "loss": 0.2632, "step": 9360 }, { "epoch": 0.7416122004357298, "grad_norm": 0.9359331089641134, "learning_rate": 3.301450772438195e-06, "loss": 0.183, "step": 9361 }, { "epoch": 0.7416914240443652, "grad_norm": 1.1797604146938332, "learning_rate": 3.2995457349360595e-06, "loss": 0.167, "step": 9362 }, { "epoch": 0.7417706476530006, "grad_norm": 1.441246918230819, "learning_rate": 3.297641138626597e-06, "loss": 0.3442, "step": 9363 }, { "epoch": 0.7418498712616359, "grad_norm": 1.489922558472212, "learning_rate": 3.295736983635215e-06, "loss": 0.2688, "step": 9364 }, { "epoch": 0.7419290948702714, "grad_norm": 1.2666159934738908, "learning_rate": 3.293833270087291e-06, "loss": 0.2283, "step": 9365 }, { "epoch": 0.7420083184789067, "grad_norm": 1.073465256701786, "learning_rate": 3.291929998108182e-06, "loss": 0.1883, "step": 9366 }, { "epoch": 0.7420875420875421, "grad_norm": 1.364829554548889, "learning_rate": 3.2900271678232045e-06, "loss": 0.2387, "step": 9367 }, { "epoch": 0.7421667656961775, "grad_norm": 1.2525372690383816, "learning_rate": 3.2881247793576488e-06, "loss": 0.2123, "step": 9368 }, { "epoch": 0.7422459893048128, "grad_norm": 1.42498933805347, "learning_rate": 3.286222832836784e-06, "loss": 0.2469, "step": 9369 }, { "epoch": 0.7423252129134482, "grad_norm": 1.0395948803097224, "learning_rate": 3.284321328385842e-06, "loss": 0.1803, "step": 9370 }, { "epoch": 0.7424044365220835, "grad_norm": 1.1343727906686847, "learning_rate": 3.282420266130022e-06, "loss": 0.156, "step": 9371 }, { "epoch": 0.742483660130719, "grad_norm": 1.2896552906941683, "learning_rate": 3.280519646194509e-06, "loss": 0.2883, "step": 9372 }, { "epoch": 0.7425628837393543, "grad_norm": 1.144277478317561, "learning_rate": 3.278619468704445e-06, "loss": 0.1901, "step": 9373 }, { "epoch": 0.7426421073479897, "grad_norm": 1.306115371250631, "learning_rate": 3.276719733784943e-06, "loss": 0.2369, "step": 9374 }, { "epoch": 0.7427213309566251, "grad_norm": 1.1351004925493535, "learning_rate": 3.2748204415611016e-06, "loss": 0.183, "step": 9375 }, { "epoch": 0.7428005545652604, "grad_norm": 1.5978591651667373, "learning_rate": 3.2729215921579738e-06, "loss": 0.2698, "step": 9376 }, { "epoch": 0.7428797781738958, "grad_norm": 1.2230814751155743, "learning_rate": 3.271023185700587e-06, "loss": 0.1909, "step": 9377 }, { "epoch": 0.7429590017825312, "grad_norm": 1.5315091948619288, "learning_rate": 3.269125222313949e-06, "loss": 0.2832, "step": 9378 }, { "epoch": 0.7430382253911666, "grad_norm": 1.0749651270596776, "learning_rate": 3.2672277021230283e-06, "loss": 0.1364, "step": 9379 }, { "epoch": 0.7431174489998019, "grad_norm": 1.340169914459609, "learning_rate": 3.2653306252527673e-06, "loss": 0.2201, "step": 9380 }, { "epoch": 0.7431966726084374, "grad_norm": 1.5406688127686632, "learning_rate": 3.2634339918280765e-06, "loss": 0.2878, "step": 9381 }, { "epoch": 0.7432758962170727, "grad_norm": 1.145921811277957, "learning_rate": 3.2615378019738455e-06, "loss": 0.1986, "step": 9382 }, { "epoch": 0.743355119825708, "grad_norm": 1.3886567263108123, "learning_rate": 3.2596420558149277e-06, "loss": 0.2859, "step": 9383 }, { "epoch": 0.7434343434343434, "grad_norm": 1.288110451264625, "learning_rate": 3.257746753476144e-06, "loss": 0.2582, "step": 9384 }, { "epoch": 0.7435135670429788, "grad_norm": 1.282485742503463, "learning_rate": 3.255851895082299e-06, "loss": 0.1978, "step": 9385 }, { "epoch": 0.7435927906516142, "grad_norm": 1.2656152737878041, "learning_rate": 3.2539574807581555e-06, "loss": 0.2331, "step": 9386 }, { "epoch": 0.7436720142602495, "grad_norm": 1.5588947267541353, "learning_rate": 3.2520635106284516e-06, "loss": 0.2379, "step": 9387 }, { "epoch": 0.743751237868885, "grad_norm": 1.498764868012792, "learning_rate": 3.250169984817897e-06, "loss": 0.3503, "step": 9388 }, { "epoch": 0.7438304614775203, "grad_norm": 1.6338774467513675, "learning_rate": 3.248276903451171e-06, "loss": 0.246, "step": 9389 }, { "epoch": 0.7439096850861556, "grad_norm": 1.447355605954297, "learning_rate": 3.24638426665292e-06, "loss": 0.3435, "step": 9390 }, { "epoch": 0.7439889086947911, "grad_norm": 1.4814802268143872, "learning_rate": 3.2444920745477727e-06, "loss": 0.2474, "step": 9391 }, { "epoch": 0.7440681323034264, "grad_norm": 1.3975331282726227, "learning_rate": 3.2426003272603158e-06, "loss": 0.2944, "step": 9392 }, { "epoch": 0.7441473559120618, "grad_norm": 1.2730895826492326, "learning_rate": 3.2407090249151105e-06, "loss": 0.2151, "step": 9393 }, { "epoch": 0.7442265795206972, "grad_norm": 1.1512579437931159, "learning_rate": 3.238818167636695e-06, "loss": 0.1818, "step": 9394 }, { "epoch": 0.7443058031293326, "grad_norm": 1.2852383478758886, "learning_rate": 3.2369277555495705e-06, "loss": 0.2205, "step": 9395 }, { "epoch": 0.7443850267379679, "grad_norm": 1.4782483356658025, "learning_rate": 3.235037788778208e-06, "loss": 0.2166, "step": 9396 }, { "epoch": 0.7444642503466032, "grad_norm": 1.3791635627572578, "learning_rate": 3.2331482674470605e-06, "loss": 0.3152, "step": 9397 }, { "epoch": 0.7445434739552387, "grad_norm": 1.319750949406635, "learning_rate": 3.2312591916805382e-06, "loss": 0.2256, "step": 9398 }, { "epoch": 0.744622697563874, "grad_norm": 1.0207439209579192, "learning_rate": 3.2293705616030267e-06, "loss": 0.1507, "step": 9399 }, { "epoch": 0.7447019211725094, "grad_norm": 1.0894260346706084, "learning_rate": 3.2274823773388885e-06, "loss": 0.1402, "step": 9400 }, { "epoch": 0.7447811447811448, "grad_norm": 1.2563275498512658, "learning_rate": 3.2255946390124482e-06, "loss": 0.1691, "step": 9401 }, { "epoch": 0.7448603683897802, "grad_norm": 1.5389010121626974, "learning_rate": 3.223707346748002e-06, "loss": 0.196, "step": 9402 }, { "epoch": 0.7449395919984155, "grad_norm": 1.1763971645552154, "learning_rate": 3.221820500669823e-06, "loss": 0.1593, "step": 9403 }, { "epoch": 0.7450188156070509, "grad_norm": 1.3794139382948734, "learning_rate": 3.2199341009021514e-06, "loss": 0.2419, "step": 9404 }, { "epoch": 0.7450980392156863, "grad_norm": 1.602692670876231, "learning_rate": 3.218048147569195e-06, "loss": 0.3202, "step": 9405 }, { "epoch": 0.7451772628243216, "grad_norm": 2.1733670177972346, "learning_rate": 3.216162640795133e-06, "loss": 0.2975, "step": 9406 }, { "epoch": 0.745256486432957, "grad_norm": 1.5008987525985351, "learning_rate": 3.2142775807041214e-06, "loss": 0.2438, "step": 9407 }, { "epoch": 0.7453357100415924, "grad_norm": 1.401590559714894, "learning_rate": 3.2123929674202816e-06, "loss": 0.277, "step": 9408 }, { "epoch": 0.7454149336502278, "grad_norm": 1.4131344961909658, "learning_rate": 3.2105088010677e-06, "loss": 0.2571, "step": 9409 }, { "epoch": 0.7454941572588631, "grad_norm": 1.3357463628827901, "learning_rate": 3.2086250817704488e-06, "loss": 0.2026, "step": 9410 }, { "epoch": 0.7455733808674985, "grad_norm": 1.3362518109459844, "learning_rate": 3.2067418096525593e-06, "loss": 0.2325, "step": 9411 }, { "epoch": 0.7456526044761339, "grad_norm": 1.4926871147088352, "learning_rate": 3.2048589848380297e-06, "loss": 0.194, "step": 9412 }, { "epoch": 0.7457318280847692, "grad_norm": 1.090169403182317, "learning_rate": 3.202976607450844e-06, "loss": 0.1293, "step": 9413 }, { "epoch": 0.7458110516934047, "grad_norm": 1.390001396079619, "learning_rate": 3.201094677614943e-06, "loss": 0.2167, "step": 9414 }, { "epoch": 0.74589027530204, "grad_norm": 1.5013227373329407, "learning_rate": 3.1992131954542404e-06, "loss": 0.2715, "step": 9415 }, { "epoch": 0.7459694989106754, "grad_norm": 1.3790550209074108, "learning_rate": 3.1973321610926277e-06, "loss": 0.2343, "step": 9416 }, { "epoch": 0.7460487225193108, "grad_norm": 1.3541490417400641, "learning_rate": 3.1954515746539616e-06, "loss": 0.263, "step": 9417 }, { "epoch": 0.7461279461279461, "grad_norm": 1.2788272538823096, "learning_rate": 3.193571436262064e-06, "loss": 0.2382, "step": 9418 }, { "epoch": 0.7462071697365815, "grad_norm": 1.138278157720186, "learning_rate": 3.191691746040739e-06, "loss": 0.1841, "step": 9419 }, { "epoch": 0.7462863933452168, "grad_norm": 1.1083300899303032, "learning_rate": 3.189812504113754e-06, "loss": 0.1574, "step": 9420 }, { "epoch": 0.7463656169538523, "grad_norm": 1.4827937501877615, "learning_rate": 3.187933710604847e-06, "loss": 0.2568, "step": 9421 }, { "epoch": 0.7464448405624876, "grad_norm": 1.2168448782762713, "learning_rate": 3.186055365637725e-06, "loss": 0.198, "step": 9422 }, { "epoch": 0.746524064171123, "grad_norm": 1.8300571771067111, "learning_rate": 3.184177469336073e-06, "loss": 0.3388, "step": 9423 }, { "epoch": 0.7466032877797584, "grad_norm": 1.3376084543935285, "learning_rate": 3.1823000218235388e-06, "loss": 0.2035, "step": 9424 }, { "epoch": 0.7466825113883937, "grad_norm": 1.3971110358923733, "learning_rate": 3.180423023223741e-06, "loss": 0.2405, "step": 9425 }, { "epoch": 0.7467617349970291, "grad_norm": 2.097276449333718, "learning_rate": 3.1785464736602754e-06, "loss": 0.3482, "step": 9426 }, { "epoch": 0.7468409586056645, "grad_norm": 1.294310265785697, "learning_rate": 3.1766703732567027e-06, "loss": 0.2069, "step": 9427 }, { "epoch": 0.7469201822142999, "grad_norm": 1.5895978455063275, "learning_rate": 3.1747947221365517e-06, "loss": 0.2403, "step": 9428 }, { "epoch": 0.7469994058229352, "grad_norm": 1.1535859219206004, "learning_rate": 3.17291952042333e-06, "loss": 0.1927, "step": 9429 }, { "epoch": 0.7470786294315707, "grad_norm": 1.5031014669288114, "learning_rate": 3.171044768240508e-06, "loss": 0.2773, "step": 9430 }, { "epoch": 0.747157853040206, "grad_norm": 1.4734598814275917, "learning_rate": 3.169170465711525e-06, "loss": 0.2835, "step": 9431 }, { "epoch": 0.7472370766488413, "grad_norm": 1.2373302455978916, "learning_rate": 3.167296612959803e-06, "loss": 0.1604, "step": 9432 }, { "epoch": 0.7473163002574768, "grad_norm": 1.6178309639539645, "learning_rate": 3.1654232101087225e-06, "loss": 0.3613, "step": 9433 }, { "epoch": 0.7473955238661121, "grad_norm": 1.262968555120363, "learning_rate": 3.1635502572816333e-06, "loss": 0.203, "step": 9434 }, { "epoch": 0.7474747474747475, "grad_norm": 1.163022347376603, "learning_rate": 3.1616777546018696e-06, "loss": 0.2306, "step": 9435 }, { "epoch": 0.7475539710833828, "grad_norm": 1.6540756082833261, "learning_rate": 3.1598057021927207e-06, "loss": 0.2017, "step": 9436 }, { "epoch": 0.7476331946920183, "grad_norm": 1.2626823362028161, "learning_rate": 3.1579341001774546e-06, "loss": 0.2295, "step": 9437 }, { "epoch": 0.7477124183006536, "grad_norm": 1.0592146146929295, "learning_rate": 3.1560629486793014e-06, "loss": 0.1668, "step": 9438 }, { "epoch": 0.7477916419092889, "grad_norm": 1.3127302032741552, "learning_rate": 3.154192247821476e-06, "loss": 0.2523, "step": 9439 }, { "epoch": 0.7478708655179244, "grad_norm": 1.1647802678212953, "learning_rate": 3.1523219977271515e-06, "loss": 0.1996, "step": 9440 }, { "epoch": 0.7479500891265597, "grad_norm": 1.5647371157342005, "learning_rate": 3.1504521985194715e-06, "loss": 0.2719, "step": 9441 }, { "epoch": 0.7480293127351951, "grad_norm": 1.5630162121234015, "learning_rate": 3.1485828503215588e-06, "loss": 0.2543, "step": 9442 }, { "epoch": 0.7481085363438305, "grad_norm": 1.2100950588025379, "learning_rate": 3.1467139532564985e-06, "loss": 0.162, "step": 9443 }, { "epoch": 0.7481877599524658, "grad_norm": 1.2043503266422373, "learning_rate": 3.144845507447345e-06, "loss": 0.1887, "step": 9444 }, { "epoch": 0.7482669835611012, "grad_norm": 1.2921367172680136, "learning_rate": 3.1429775130171337e-06, "loss": 0.2382, "step": 9445 }, { "epoch": 0.7483462071697365, "grad_norm": 1.2495429039947195, "learning_rate": 3.141109970088859e-06, "loss": 0.2479, "step": 9446 }, { "epoch": 0.748425430778372, "grad_norm": 1.1069075926656768, "learning_rate": 3.1392428787854865e-06, "loss": 0.1447, "step": 9447 }, { "epoch": 0.7485046543870073, "grad_norm": 1.4397127315683689, "learning_rate": 3.1373762392299632e-06, "loss": 0.2683, "step": 9448 }, { "epoch": 0.7485838779956427, "grad_norm": 1.2169207604471608, "learning_rate": 3.135510051545192e-06, "loss": 0.2281, "step": 9449 }, { "epoch": 0.7486631016042781, "grad_norm": 1.0603902525045215, "learning_rate": 3.133644315854055e-06, "loss": 0.2329, "step": 9450 }, { "epoch": 0.7487423252129134, "grad_norm": 1.1278550844491935, "learning_rate": 3.131779032279397e-06, "loss": 0.2111, "step": 9451 }, { "epoch": 0.7488215488215488, "grad_norm": 1.2612176210777584, "learning_rate": 3.1299142009440463e-06, "loss": 0.2302, "step": 9452 }, { "epoch": 0.7489007724301842, "grad_norm": 1.3104144120060726, "learning_rate": 3.1280498219707876e-06, "loss": 0.1999, "step": 9453 }, { "epoch": 0.7489799960388196, "grad_norm": 1.593919469996017, "learning_rate": 3.1261858954823798e-06, "loss": 0.2601, "step": 9454 }, { "epoch": 0.7490592196474549, "grad_norm": 1.4810734301495718, "learning_rate": 3.12432242160156e-06, "loss": 0.265, "step": 9455 }, { "epoch": 0.7491384432560904, "grad_norm": 1.232098949405676, "learning_rate": 3.1224594004510246e-06, "loss": 0.2258, "step": 9456 }, { "epoch": 0.7492176668647257, "grad_norm": 1.3008262681884801, "learning_rate": 3.1205968321534406e-06, "loss": 0.2256, "step": 9457 }, { "epoch": 0.749296890473361, "grad_norm": 1.3652165436394026, "learning_rate": 3.1187347168314586e-06, "loss": 0.3011, "step": 9458 }, { "epoch": 0.7493761140819964, "grad_norm": 1.2019777872383308, "learning_rate": 3.1168730546076844e-06, "loss": 0.1736, "step": 9459 }, { "epoch": 0.7494553376906318, "grad_norm": 1.4344420786214576, "learning_rate": 3.1150118456046963e-06, "loss": 0.2771, "step": 9460 }, { "epoch": 0.7495345612992672, "grad_norm": 1.1784019137698762, "learning_rate": 3.1131510899450533e-06, "loss": 0.218, "step": 9461 }, { "epoch": 0.7496137849079025, "grad_norm": 1.4764273319575023, "learning_rate": 3.1112907877512732e-06, "loss": 0.2423, "step": 9462 }, { "epoch": 0.749693008516538, "grad_norm": 1.744742682660706, "learning_rate": 3.1094309391458455e-06, "loss": 0.3954, "step": 9463 }, { "epoch": 0.7497722321251733, "grad_norm": 1.7072083694017732, "learning_rate": 3.107571544251241e-06, "loss": 0.2741, "step": 9464 }, { "epoch": 0.7498514557338086, "grad_norm": 1.3629758318455325, "learning_rate": 3.1057126031898843e-06, "loss": 0.2522, "step": 9465 }, { "epoch": 0.7499306793424441, "grad_norm": 1.093599098222902, "learning_rate": 3.1038541160841752e-06, "loss": 0.1698, "step": 9466 }, { "epoch": 0.7500099029510794, "grad_norm": 1.351543909752522, "learning_rate": 3.1019960830564945e-06, "loss": 0.2193, "step": 9467 }, { "epoch": 0.7500891265597148, "grad_norm": 2.0966597716556294, "learning_rate": 3.1001385042291797e-06, "loss": 0.3362, "step": 9468 }, { "epoch": 0.7501683501683502, "grad_norm": 1.5211743504519524, "learning_rate": 3.0982813797245413e-06, "loss": 0.3306, "step": 9469 }, { "epoch": 0.7502475737769856, "grad_norm": 1.2358795506796807, "learning_rate": 3.096424709664868e-06, "loss": 0.2208, "step": 9470 }, { "epoch": 0.7503267973856209, "grad_norm": 1.1294991235195104, "learning_rate": 3.094568494172411e-06, "loss": 0.1982, "step": 9471 }, { "epoch": 0.7504060209942562, "grad_norm": 1.313782117735638, "learning_rate": 3.0927127333693872e-06, "loss": 0.211, "step": 9472 }, { "epoch": 0.7504852446028917, "grad_norm": 1.5260213198657948, "learning_rate": 3.090857427377998e-06, "loss": 0.2891, "step": 9473 }, { "epoch": 0.750564468211527, "grad_norm": 1.727509647485842, "learning_rate": 3.0890025763204025e-06, "loss": 0.2981, "step": 9474 }, { "epoch": 0.7506436918201624, "grad_norm": 1.1328880165124846, "learning_rate": 3.087148180318734e-06, "loss": 0.1931, "step": 9475 }, { "epoch": 0.7507229154287978, "grad_norm": 1.4866288309273816, "learning_rate": 3.0852942394950915e-06, "loss": 0.2789, "step": 9476 }, { "epoch": 0.7508021390374332, "grad_norm": 1.3697140300964896, "learning_rate": 3.083440753971556e-06, "loss": 0.2254, "step": 9477 }, { "epoch": 0.7508813626460685, "grad_norm": 1.1896947340579718, "learning_rate": 3.0815877238701653e-06, "loss": 0.1875, "step": 9478 }, { "epoch": 0.7509605862547039, "grad_norm": 1.5220249568042512, "learning_rate": 3.079735149312931e-06, "loss": 0.3272, "step": 9479 }, { "epoch": 0.7510398098633393, "grad_norm": 1.4274812682359288, "learning_rate": 3.077883030421843e-06, "loss": 0.1905, "step": 9480 }, { "epoch": 0.7511190334719746, "grad_norm": 1.148347303567613, "learning_rate": 3.0760313673188493e-06, "loss": 0.1689, "step": 9481 }, { "epoch": 0.75119825708061, "grad_norm": 1.3045861258190539, "learning_rate": 3.0741801601258714e-06, "loss": 0.2333, "step": 9482 }, { "epoch": 0.7512774806892454, "grad_norm": 1.541061156672222, "learning_rate": 3.072329408964808e-06, "loss": 0.2338, "step": 9483 }, { "epoch": 0.7513567042978808, "grad_norm": 1.0135960605517473, "learning_rate": 3.0704791139575195e-06, "loss": 0.1608, "step": 9484 }, { "epoch": 0.7514359279065161, "grad_norm": 1.5085062572010506, "learning_rate": 3.0686292752258352e-06, "loss": 0.2657, "step": 9485 }, { "epoch": 0.7515151515151515, "grad_norm": 1.2458356638595636, "learning_rate": 3.066779892891564e-06, "loss": 0.207, "step": 9486 }, { "epoch": 0.7515943751237869, "grad_norm": 1.2736112655279421, "learning_rate": 3.064930967076477e-06, "loss": 0.2092, "step": 9487 }, { "epoch": 0.7516735987324222, "grad_norm": 1.4314328367833653, "learning_rate": 3.063082497902313e-06, "loss": 0.2227, "step": 9488 }, { "epoch": 0.7517528223410577, "grad_norm": 1.394462967768676, "learning_rate": 3.0612344854907917e-06, "loss": 0.2461, "step": 9489 }, { "epoch": 0.751832045949693, "grad_norm": 1.1942375880349436, "learning_rate": 3.0593869299635925e-06, "loss": 0.1888, "step": 9490 }, { "epoch": 0.7519112695583284, "grad_norm": 1.368240226170967, "learning_rate": 3.0575398314423677e-06, "loss": 0.2445, "step": 9491 }, { "epoch": 0.7519904931669638, "grad_norm": 0.9101826210930218, "learning_rate": 3.0556931900487365e-06, "loss": 0.1348, "step": 9492 }, { "epoch": 0.7520697167755991, "grad_norm": 1.290594676041806, "learning_rate": 3.053847005904298e-06, "loss": 0.2429, "step": 9493 }, { "epoch": 0.7521489403842345, "grad_norm": 1.5415974361876146, "learning_rate": 3.052001279130612e-06, "loss": 0.238, "step": 9494 }, { "epoch": 0.7522281639928698, "grad_norm": 1.5041012964745173, "learning_rate": 3.0501560098492056e-06, "loss": 0.2446, "step": 9495 }, { "epoch": 0.7523073876015053, "grad_norm": 1.6898963618734115, "learning_rate": 3.0483111981815906e-06, "loss": 0.2575, "step": 9496 }, { "epoch": 0.7523866112101406, "grad_norm": 1.1767442627187736, "learning_rate": 3.046466844249232e-06, "loss": 0.2172, "step": 9497 }, { "epoch": 0.752465834818776, "grad_norm": 1.4569689780534123, "learning_rate": 3.0446229481735713e-06, "loss": 0.2465, "step": 9498 }, { "epoch": 0.7525450584274114, "grad_norm": 1.468710675687599, "learning_rate": 3.042779510076025e-06, "loss": 0.2394, "step": 9499 }, { "epoch": 0.7526242820360467, "grad_norm": 1.2814993580961627, "learning_rate": 3.0409365300779725e-06, "loss": 0.2472, "step": 9500 }, { "epoch": 0.7527035056446821, "grad_norm": 1.3358978428510522, "learning_rate": 3.039094008300761e-06, "loss": 0.1886, "step": 9501 }, { "epoch": 0.7527827292533175, "grad_norm": 1.5352179574927975, "learning_rate": 3.0372519448657188e-06, "loss": 0.2542, "step": 9502 }, { "epoch": 0.7528619528619529, "grad_norm": 1.1781326724959185, "learning_rate": 3.0354103398941327e-06, "loss": 0.1727, "step": 9503 }, { "epoch": 0.7529411764705882, "grad_norm": 1.5323387135404547, "learning_rate": 3.0335691935072618e-06, "loss": 0.2667, "step": 9504 }, { "epoch": 0.7530204000792237, "grad_norm": 1.42261100595425, "learning_rate": 3.0317285058263426e-06, "loss": 0.2447, "step": 9505 }, { "epoch": 0.753099623687859, "grad_norm": 1.415446079918829, "learning_rate": 3.029888276972571e-06, "loss": 0.2289, "step": 9506 }, { "epoch": 0.7531788472964943, "grad_norm": 1.4968788512640403, "learning_rate": 3.0280485070671197e-06, "loss": 0.3105, "step": 9507 }, { "epoch": 0.7532580709051298, "grad_norm": 1.0715515709813266, "learning_rate": 3.0262091962311234e-06, "loss": 0.1649, "step": 9508 }, { "epoch": 0.7533372945137651, "grad_norm": 1.3075741828816314, "learning_rate": 3.0243703445856985e-06, "loss": 0.2199, "step": 9509 }, { "epoch": 0.7534165181224005, "grad_norm": 0.9690785440856734, "learning_rate": 3.0225319522519226e-06, "loss": 0.1289, "step": 9510 }, { "epoch": 0.7534957417310358, "grad_norm": 1.8770743582388911, "learning_rate": 3.0206940193508404e-06, "loss": 0.2559, "step": 9511 }, { "epoch": 0.7535749653396713, "grad_norm": 1.2434742312321545, "learning_rate": 3.018856546003479e-06, "loss": 0.208, "step": 9512 }, { "epoch": 0.7536541889483066, "grad_norm": 1.262959253719853, "learning_rate": 3.0170195323308216e-06, "loss": 0.2348, "step": 9513 }, { "epoch": 0.7537334125569419, "grad_norm": 1.3354250949069246, "learning_rate": 3.0151829784538257e-06, "loss": 0.2857, "step": 9514 }, { "epoch": 0.7538126361655774, "grad_norm": 1.3675365592528514, "learning_rate": 3.0133468844934245e-06, "loss": 0.2698, "step": 9515 }, { "epoch": 0.7538918597742127, "grad_norm": 1.4365720898127374, "learning_rate": 3.0115112505705134e-06, "loss": 0.2075, "step": 9516 }, { "epoch": 0.7539710833828481, "grad_norm": 1.1988970087854138, "learning_rate": 3.0096760768059576e-06, "loss": 0.1956, "step": 9517 }, { "epoch": 0.7540503069914835, "grad_norm": 1.1522947267556833, "learning_rate": 3.0078413633205995e-06, "loss": 0.1861, "step": 9518 }, { "epoch": 0.7541295306001188, "grad_norm": 1.2209150591935631, "learning_rate": 3.0060071102352438e-06, "loss": 0.2628, "step": 9519 }, { "epoch": 0.7542087542087542, "grad_norm": 1.4103845726564295, "learning_rate": 3.0041733176706668e-06, "loss": 0.2706, "step": 9520 }, { "epoch": 0.7542879778173895, "grad_norm": 1.2875364793294792, "learning_rate": 3.002339985747611e-06, "loss": 0.2163, "step": 9521 }, { "epoch": 0.754367201426025, "grad_norm": 1.3317610253988597, "learning_rate": 3.0005071145868004e-06, "loss": 0.2273, "step": 9522 }, { "epoch": 0.7544464250346603, "grad_norm": 1.1607337882390345, "learning_rate": 2.998674704308917e-06, "loss": 0.2016, "step": 9523 }, { "epoch": 0.7545256486432957, "grad_norm": 1.1084687185978421, "learning_rate": 2.9968427550346136e-06, "loss": 0.1855, "step": 9524 }, { "epoch": 0.7546048722519311, "grad_norm": 1.2487578857852424, "learning_rate": 2.9950112668845198e-06, "loss": 0.1952, "step": 9525 }, { "epoch": 0.7546840958605664, "grad_norm": 1.38847565888578, "learning_rate": 2.9931802399792285e-06, "loss": 0.2416, "step": 9526 }, { "epoch": 0.7547633194692018, "grad_norm": 1.2632913089183573, "learning_rate": 2.9913496744393e-06, "loss": 0.2295, "step": 9527 }, { "epoch": 0.7548425430778372, "grad_norm": 1.5195212094501305, "learning_rate": 2.9895195703852763e-06, "loss": 0.2458, "step": 9528 }, { "epoch": 0.7549217666864726, "grad_norm": 1.9625672878995102, "learning_rate": 2.987689927937656e-06, "loss": 0.2459, "step": 9529 }, { "epoch": 0.7550009902951079, "grad_norm": 1.2635610926153735, "learning_rate": 2.98586074721691e-06, "loss": 0.2229, "step": 9530 }, { "epoch": 0.7550802139037434, "grad_norm": 1.4812100305828517, "learning_rate": 2.9840320283434865e-06, "loss": 0.28, "step": 9531 }, { "epoch": 0.7551594375123787, "grad_norm": 1.4769865636154187, "learning_rate": 2.982203771437796e-06, "loss": 0.3241, "step": 9532 }, { "epoch": 0.755238661121014, "grad_norm": 1.3150086989581382, "learning_rate": 2.9803759766202157e-06, "loss": 0.2836, "step": 9533 }, { "epoch": 0.7553178847296494, "grad_norm": 1.3087754527590916, "learning_rate": 2.9785486440111044e-06, "loss": 0.236, "step": 9534 }, { "epoch": 0.7553971083382848, "grad_norm": 1.0365413676042066, "learning_rate": 2.9767217737307805e-06, "loss": 0.1812, "step": 9535 }, { "epoch": 0.7554763319469202, "grad_norm": 1.3340487885286738, "learning_rate": 2.974895365899534e-06, "loss": 0.2268, "step": 9536 }, { "epoch": 0.7555555555555555, "grad_norm": 1.1422599602684496, "learning_rate": 2.973069420637621e-06, "loss": 0.2095, "step": 9537 }, { "epoch": 0.755634779164191, "grad_norm": 1.3553187726065785, "learning_rate": 2.971243938065279e-06, "loss": 0.1969, "step": 9538 }, { "epoch": 0.7557140027728263, "grad_norm": 1.1594535084901914, "learning_rate": 2.9694189183027034e-06, "loss": 0.1943, "step": 9539 }, { "epoch": 0.7557932263814616, "grad_norm": 1.3311149925183907, "learning_rate": 2.9675943614700588e-06, "loss": 0.2043, "step": 9540 }, { "epoch": 0.7558724499900971, "grad_norm": 1.3556045657692193, "learning_rate": 2.965770267687492e-06, "loss": 0.2091, "step": 9541 }, { "epoch": 0.7559516735987324, "grad_norm": 1.7533325389689827, "learning_rate": 2.963946637075107e-06, "loss": 0.2925, "step": 9542 }, { "epoch": 0.7560308972073678, "grad_norm": 1.3636354750017925, "learning_rate": 2.9621234697529787e-06, "loss": 0.246, "step": 9543 }, { "epoch": 0.7561101208160032, "grad_norm": 1.2756807963810644, "learning_rate": 2.9603007658411575e-06, "loss": 0.2206, "step": 9544 }, { "epoch": 0.7561893444246386, "grad_norm": 1.479260983103371, "learning_rate": 2.958478525459657e-06, "loss": 0.242, "step": 9545 }, { "epoch": 0.7562685680332739, "grad_norm": 1.399242252878075, "learning_rate": 2.9566567487284613e-06, "loss": 0.3363, "step": 9546 }, { "epoch": 0.7563477916419092, "grad_norm": 1.1762853626261915, "learning_rate": 2.9548354357675325e-06, "loss": 0.1669, "step": 9547 }, { "epoch": 0.7564270152505447, "grad_norm": 1.2031901359787553, "learning_rate": 2.9530145866967897e-06, "loss": 0.1391, "step": 9548 }, { "epoch": 0.75650623885918, "grad_norm": 1.1256424405975474, "learning_rate": 2.951194201636125e-06, "loss": 0.168, "step": 9549 }, { "epoch": 0.7565854624678154, "grad_norm": 1.2346522278018233, "learning_rate": 2.9493742807054094e-06, "loss": 0.1862, "step": 9550 }, { "epoch": 0.7566646860764508, "grad_norm": 1.9002467841148203, "learning_rate": 2.947554824024472e-06, "loss": 0.2879, "step": 9551 }, { "epoch": 0.7567439096850862, "grad_norm": 1.3541772414966324, "learning_rate": 2.9457358317131125e-06, "loss": 0.2286, "step": 9552 }, { "epoch": 0.7568231332937215, "grad_norm": 1.2795563295436516, "learning_rate": 2.943917303891107e-06, "loss": 0.2211, "step": 9553 }, { "epoch": 0.7569023569023569, "grad_norm": 1.2959381356085935, "learning_rate": 2.942099240678197e-06, "loss": 0.265, "step": 9554 }, { "epoch": 0.7569815805109923, "grad_norm": 1.1469560239842571, "learning_rate": 2.940281642194087e-06, "loss": 0.1718, "step": 9555 }, { "epoch": 0.7570608041196276, "grad_norm": 1.1959361190498585, "learning_rate": 2.938464508558466e-06, "loss": 0.193, "step": 9556 }, { "epoch": 0.757140027728263, "grad_norm": 1.4007615458854932, "learning_rate": 2.936647839890979e-06, "loss": 0.2508, "step": 9557 }, { "epoch": 0.7572192513368984, "grad_norm": 1.2136373586815747, "learning_rate": 2.9348316363112417e-06, "loss": 0.181, "step": 9558 }, { "epoch": 0.7572984749455338, "grad_norm": 1.1975436375730801, "learning_rate": 2.933015897938849e-06, "loss": 0.2536, "step": 9559 }, { "epoch": 0.7573776985541691, "grad_norm": 1.5071986401978135, "learning_rate": 2.9312006248933543e-06, "loss": 0.2582, "step": 9560 }, { "epoch": 0.7574569221628045, "grad_norm": 1.0592827270598004, "learning_rate": 2.9293858172942867e-06, "loss": 0.1377, "step": 9561 }, { "epoch": 0.7575361457714399, "grad_norm": 1.462186093008237, "learning_rate": 2.9275714752611383e-06, "loss": 0.2574, "step": 9562 }, { "epoch": 0.7576153693800752, "grad_norm": 1.4344983083853058, "learning_rate": 2.9257575989133803e-06, "loss": 0.2613, "step": 9563 }, { "epoch": 0.7576945929887107, "grad_norm": 1.4442717033631893, "learning_rate": 2.9239441883704455e-06, "loss": 0.2487, "step": 9564 }, { "epoch": 0.757773816597346, "grad_norm": 1.901278749560395, "learning_rate": 2.9221312437517357e-06, "loss": 0.3364, "step": 9565 }, { "epoch": 0.7578530402059814, "grad_norm": 1.7299008919290797, "learning_rate": 2.9203187651766297e-06, "loss": 0.2696, "step": 9566 }, { "epoch": 0.7579322638146168, "grad_norm": 1.2760114705482966, "learning_rate": 2.918506752764467e-06, "loss": 0.1949, "step": 9567 }, { "epoch": 0.7580114874232521, "grad_norm": 1.289478336015059, "learning_rate": 2.916695206634558e-06, "loss": 0.2143, "step": 9568 }, { "epoch": 0.7580907110318875, "grad_norm": 1.3210774509193999, "learning_rate": 2.91488412690619e-06, "loss": 0.198, "step": 9569 }, { "epoch": 0.7581699346405228, "grad_norm": 1.9759176749004026, "learning_rate": 2.913073513698611e-06, "loss": 0.259, "step": 9570 }, { "epoch": 0.7582491582491583, "grad_norm": 1.48195582188223, "learning_rate": 2.9112633671310387e-06, "loss": 0.1944, "step": 9571 }, { "epoch": 0.7583283818577936, "grad_norm": 1.3166515744083336, "learning_rate": 2.9094536873226663e-06, "loss": 0.1775, "step": 9572 }, { "epoch": 0.758407605466429, "grad_norm": 1.4684754035297503, "learning_rate": 2.9076444743926524e-06, "loss": 0.293, "step": 9573 }, { "epoch": 0.7584868290750644, "grad_norm": 1.414567671978473, "learning_rate": 2.9058357284601204e-06, "loss": 0.18, "step": 9574 }, { "epoch": 0.7585660526836997, "grad_norm": 1.3132058618934817, "learning_rate": 2.9040274496441732e-06, "loss": 0.2823, "step": 9575 }, { "epoch": 0.7586452762923351, "grad_norm": 1.6600077484501494, "learning_rate": 2.902219638063876e-06, "loss": 0.3342, "step": 9576 }, { "epoch": 0.7587244999009705, "grad_norm": 1.2748401202404402, "learning_rate": 2.9004122938382617e-06, "loss": 0.2177, "step": 9577 }, { "epoch": 0.7588037235096059, "grad_norm": 1.39759407545113, "learning_rate": 2.8986054170863344e-06, "loss": 0.2277, "step": 9578 }, { "epoch": 0.7588829471182412, "grad_norm": 1.4573152447670206, "learning_rate": 2.8967990079270736e-06, "loss": 0.2476, "step": 9579 }, { "epoch": 0.7589621707268767, "grad_norm": 1.9204576128964057, "learning_rate": 2.89499306647942e-06, "loss": 0.2756, "step": 9580 }, { "epoch": 0.759041394335512, "grad_norm": 1.5141146172467528, "learning_rate": 2.8931875928622833e-06, "loss": 0.2031, "step": 9581 }, { "epoch": 0.7591206179441473, "grad_norm": 1.3757728820671016, "learning_rate": 2.89138258719455e-06, "loss": 0.2606, "step": 9582 }, { "epoch": 0.7591998415527828, "grad_norm": 1.6196468068252399, "learning_rate": 2.8895780495950687e-06, "loss": 0.2549, "step": 9583 }, { "epoch": 0.7592790651614181, "grad_norm": 1.2864436229302496, "learning_rate": 2.8877739801826577e-06, "loss": 0.2147, "step": 9584 }, { "epoch": 0.7593582887700535, "grad_norm": 1.162692149326294, "learning_rate": 2.8859703790761095e-06, "loss": 0.2213, "step": 9585 }, { "epoch": 0.7594375123786888, "grad_norm": 1.6123628220486017, "learning_rate": 2.8841672463941827e-06, "loss": 0.2494, "step": 9586 }, { "epoch": 0.7595167359873243, "grad_norm": 1.4773579274025002, "learning_rate": 2.8823645822556e-06, "loss": 0.2303, "step": 9587 }, { "epoch": 0.7595959595959596, "grad_norm": 1.419175965475744, "learning_rate": 2.8805623867790655e-06, "loss": 0.2726, "step": 9588 }, { "epoch": 0.7596751832045949, "grad_norm": 1.5534825544499657, "learning_rate": 2.8787606600832408e-06, "loss": 0.2893, "step": 9589 }, { "epoch": 0.7597544068132304, "grad_norm": 1.1943753060486557, "learning_rate": 2.876959402286759e-06, "loss": 0.1931, "step": 9590 }, { "epoch": 0.7598336304218657, "grad_norm": 1.6457476959368162, "learning_rate": 2.8751586135082275e-06, "loss": 0.3365, "step": 9591 }, { "epoch": 0.7599128540305011, "grad_norm": 1.2569935626941384, "learning_rate": 2.873358293866221e-06, "loss": 0.193, "step": 9592 }, { "epoch": 0.7599920776391365, "grad_norm": 1.4402726699614747, "learning_rate": 2.8715584434792786e-06, "loss": 0.2193, "step": 9593 }, { "epoch": 0.7600713012477719, "grad_norm": 1.3640078068656039, "learning_rate": 2.86975906246591e-06, "loss": 0.2182, "step": 9594 }, { "epoch": 0.7601505248564072, "grad_norm": 1.2762447527374055, "learning_rate": 2.867960150944602e-06, "loss": 0.2142, "step": 9595 }, { "epoch": 0.7602297484650425, "grad_norm": 1.2333784276620576, "learning_rate": 2.8661617090338e-06, "loss": 0.151, "step": 9596 }, { "epoch": 0.760308972073678, "grad_norm": 1.546120027415051, "learning_rate": 2.864363736851922e-06, "loss": 0.3125, "step": 9597 }, { "epoch": 0.7603881956823133, "grad_norm": 1.2463588421189138, "learning_rate": 2.86256623451736e-06, "loss": 0.196, "step": 9598 }, { "epoch": 0.7604674192909487, "grad_norm": 1.3399879817442026, "learning_rate": 2.860769202148468e-06, "loss": 0.1937, "step": 9599 }, { "epoch": 0.7605466428995841, "grad_norm": 1.5030957843147574, "learning_rate": 2.8589726398635688e-06, "loss": 0.204, "step": 9600 }, { "epoch": 0.7606258665082194, "grad_norm": 1.2832427503815433, "learning_rate": 2.8571765477809645e-06, "loss": 0.2276, "step": 9601 }, { "epoch": 0.7607050901168548, "grad_norm": 1.3912188717597578, "learning_rate": 2.8553809260189145e-06, "loss": 0.1929, "step": 9602 }, { "epoch": 0.7607843137254902, "grad_norm": 1.0342192893577118, "learning_rate": 2.8535857746956507e-06, "loss": 0.1493, "step": 9603 }, { "epoch": 0.7608635373341256, "grad_norm": 1.1498466708481796, "learning_rate": 2.8517910939293804e-06, "loss": 0.1857, "step": 9604 }, { "epoch": 0.7609427609427609, "grad_norm": 1.1688361274280603, "learning_rate": 2.849996883838271e-06, "loss": 0.1651, "step": 9605 }, { "epoch": 0.7610219845513964, "grad_norm": 1.4190484967403216, "learning_rate": 2.8482031445404634e-06, "loss": 0.2331, "step": 9606 }, { "epoch": 0.7611012081600317, "grad_norm": 1.2670280482520957, "learning_rate": 2.8464098761540637e-06, "loss": 0.185, "step": 9607 }, { "epoch": 0.761180431768667, "grad_norm": 1.6250569952397633, "learning_rate": 2.844617078797155e-06, "loss": 0.2446, "step": 9608 }, { "epoch": 0.7612596553773024, "grad_norm": 1.1229052076592008, "learning_rate": 2.842824752587783e-06, "loss": 0.1705, "step": 9609 }, { "epoch": 0.7613388789859378, "grad_norm": 1.3501124681231995, "learning_rate": 2.8410328976439595e-06, "loss": 0.2274, "step": 9610 }, { "epoch": 0.7614181025945732, "grad_norm": 1.8396143205707307, "learning_rate": 2.839241514083676e-06, "loss": 0.3653, "step": 9611 }, { "epoch": 0.7614973262032085, "grad_norm": 1.3383341171895384, "learning_rate": 2.837450602024884e-06, "loss": 0.2463, "step": 9612 }, { "epoch": 0.761576549811844, "grad_norm": 1.1014026103065113, "learning_rate": 2.8356601615855027e-06, "loss": 0.1782, "step": 9613 }, { "epoch": 0.7616557734204793, "grad_norm": 1.064498943487028, "learning_rate": 2.83387019288343e-06, "loss": 0.1894, "step": 9614 }, { "epoch": 0.7617349970291146, "grad_norm": 1.437961500822072, "learning_rate": 2.8320806960365234e-06, "loss": 0.2239, "step": 9615 }, { "epoch": 0.7618142206377501, "grad_norm": 1.494959339111669, "learning_rate": 2.8302916711626106e-06, "loss": 0.1239, "step": 9616 }, { "epoch": 0.7618934442463854, "grad_norm": 1.4718711394640587, "learning_rate": 2.8285031183794955e-06, "loss": 0.3247, "step": 9617 }, { "epoch": 0.7619726678550208, "grad_norm": 1.682513820831713, "learning_rate": 2.8267150378049437e-06, "loss": 0.3004, "step": 9618 }, { "epoch": 0.7620518914636562, "grad_norm": 0.9797877302659065, "learning_rate": 2.8249274295566863e-06, "loss": 0.1714, "step": 9619 }, { "epoch": 0.7621311150722916, "grad_norm": 1.5454328721732453, "learning_rate": 2.823140293752441e-06, "loss": 0.2438, "step": 9620 }, { "epoch": 0.7622103386809269, "grad_norm": 1.3219426016898606, "learning_rate": 2.821353630509871e-06, "loss": 0.2401, "step": 9621 }, { "epoch": 0.7622895622895622, "grad_norm": 1.1017171040008353, "learning_rate": 2.819567439946621e-06, "loss": 0.1546, "step": 9622 }, { "epoch": 0.7623687858981977, "grad_norm": 1.1866900616599196, "learning_rate": 2.8177817221803074e-06, "loss": 0.1575, "step": 9623 }, { "epoch": 0.762448009506833, "grad_norm": 1.2252375571065437, "learning_rate": 2.8159964773285074e-06, "loss": 0.1654, "step": 9624 }, { "epoch": 0.7625272331154684, "grad_norm": 1.3566432891303415, "learning_rate": 2.8142117055087704e-06, "loss": 0.1498, "step": 9625 }, { "epoch": 0.7626064567241038, "grad_norm": 1.8138063197517948, "learning_rate": 2.8124274068386203e-06, "loss": 0.3144, "step": 9626 }, { "epoch": 0.7626856803327392, "grad_norm": 1.388547678117533, "learning_rate": 2.8106435814355404e-06, "loss": 0.2591, "step": 9627 }, { "epoch": 0.7627649039413745, "grad_norm": 1.4476412809386547, "learning_rate": 2.808860229416984e-06, "loss": 0.2584, "step": 9628 }, { "epoch": 0.7628441275500099, "grad_norm": 1.63881227206624, "learning_rate": 2.8070773509003846e-06, "loss": 0.2471, "step": 9629 }, { "epoch": 0.7629233511586453, "grad_norm": 1.43305716308842, "learning_rate": 2.80529494600313e-06, "loss": 0.2709, "step": 9630 }, { "epoch": 0.7630025747672806, "grad_norm": 1.338375552829051, "learning_rate": 2.8035130148425847e-06, "loss": 0.191, "step": 9631 }, { "epoch": 0.763081798375916, "grad_norm": 1.221563060580026, "learning_rate": 2.801731557536078e-06, "loss": 0.1933, "step": 9632 }, { "epoch": 0.7631610219845514, "grad_norm": 1.2712555631294444, "learning_rate": 2.799950574200915e-06, "loss": 0.2368, "step": 9633 }, { "epoch": 0.7632402455931868, "grad_norm": 1.1162015888261139, "learning_rate": 2.7981700649543618e-06, "loss": 0.187, "step": 9634 }, { "epoch": 0.7633194692018221, "grad_norm": 1.4390583013939366, "learning_rate": 2.796390029913655e-06, "loss": 0.2369, "step": 9635 }, { "epoch": 0.7633986928104575, "grad_norm": 1.4801049402661324, "learning_rate": 2.794610469196004e-06, "loss": 0.2393, "step": 9636 }, { "epoch": 0.7634779164190929, "grad_norm": 1.3794599199353628, "learning_rate": 2.792831382918585e-06, "loss": 0.3018, "step": 9637 }, { "epoch": 0.7635571400277282, "grad_norm": 1.2072235882098208, "learning_rate": 2.791052771198538e-06, "loss": 0.1639, "step": 9638 }, { "epoch": 0.7636363636363637, "grad_norm": 1.3002816627252245, "learning_rate": 2.7892746341529807e-06, "loss": 0.2505, "step": 9639 }, { "epoch": 0.763715587244999, "grad_norm": 1.2160307854137866, "learning_rate": 2.7874969718989943e-06, "loss": 0.1985, "step": 9640 }, { "epoch": 0.7637948108536344, "grad_norm": 1.2765914341471314, "learning_rate": 2.785719784553624e-06, "loss": 0.2456, "step": 9641 }, { "epoch": 0.7638740344622698, "grad_norm": 1.246250225584539, "learning_rate": 2.7839430722338956e-06, "loss": 0.2048, "step": 9642 }, { "epoch": 0.7639532580709051, "grad_norm": 1.1130326574190037, "learning_rate": 2.7821668350567956e-06, "loss": 0.2293, "step": 9643 }, { "epoch": 0.7640324816795405, "grad_norm": 1.6670628317880867, "learning_rate": 2.7803910731392757e-06, "loss": 0.2843, "step": 9644 }, { "epoch": 0.7641117052881758, "grad_norm": 1.1663749664511274, "learning_rate": 2.778615786598269e-06, "loss": 0.2257, "step": 9645 }, { "epoch": 0.7641909288968113, "grad_norm": 1.1326643697143965, "learning_rate": 2.776840975550664e-06, "loss": 0.203, "step": 9646 }, { "epoch": 0.7642701525054466, "grad_norm": 1.275479010381982, "learning_rate": 2.7750666401133263e-06, "loss": 0.1942, "step": 9647 }, { "epoch": 0.764349376114082, "grad_norm": 0.9954876350818346, "learning_rate": 2.773292780403083e-06, "loss": 0.1533, "step": 9648 }, { "epoch": 0.7644285997227174, "grad_norm": 1.2266258157105536, "learning_rate": 2.7715193965367403e-06, "loss": 0.188, "step": 9649 }, { "epoch": 0.7645078233313527, "grad_norm": 1.3944449727972121, "learning_rate": 2.769746488631064e-06, "loss": 0.2403, "step": 9650 }, { "epoch": 0.7645870469399881, "grad_norm": 1.2968171985292305, "learning_rate": 2.767974056802789e-06, "loss": 0.1945, "step": 9651 }, { "epoch": 0.7646662705486235, "grad_norm": 1.3527802382386294, "learning_rate": 2.766202101168628e-06, "loss": 0.2197, "step": 9652 }, { "epoch": 0.7647454941572589, "grad_norm": 1.1243499762329816, "learning_rate": 2.76443062184525e-06, "loss": 0.207, "step": 9653 }, { "epoch": 0.7648247177658942, "grad_norm": 1.5515031320433774, "learning_rate": 2.7626596189492983e-06, "loss": 0.2665, "step": 9654 }, { "epoch": 0.7649039413745297, "grad_norm": 1.5172629951981524, "learning_rate": 2.76088909259739e-06, "loss": 0.1985, "step": 9655 }, { "epoch": 0.764983164983165, "grad_norm": 1.3690252606106479, "learning_rate": 2.7591190429061023e-06, "loss": 0.1933, "step": 9656 }, { "epoch": 0.7650623885918003, "grad_norm": 1.4631754202698102, "learning_rate": 2.757349469991981e-06, "loss": 0.2499, "step": 9657 }, { "epoch": 0.7651416122004358, "grad_norm": 0.9908502330179804, "learning_rate": 2.7555803739715512e-06, "loss": 0.1773, "step": 9658 }, { "epoch": 0.7652208358090711, "grad_norm": 1.2440165795221518, "learning_rate": 2.7538117549612963e-06, "loss": 0.1835, "step": 9659 }, { "epoch": 0.7653000594177065, "grad_norm": 1.1858777893085968, "learning_rate": 2.752043613077667e-06, "loss": 0.1656, "step": 9660 }, { "epoch": 0.7653792830263418, "grad_norm": 1.0996440543094386, "learning_rate": 2.7502759484370946e-06, "loss": 0.1784, "step": 9661 }, { "epoch": 0.7654585066349773, "grad_norm": 1.4794582881552152, "learning_rate": 2.748508761155967e-06, "loss": 0.2633, "step": 9662 }, { "epoch": 0.7655377302436126, "grad_norm": 1.5349235958232526, "learning_rate": 2.746742051350646e-06, "loss": 0.2146, "step": 9663 }, { "epoch": 0.7656169538522479, "grad_norm": 1.6214920852118246, "learning_rate": 2.7449758191374574e-06, "loss": 0.3423, "step": 9664 }, { "epoch": 0.7656961774608834, "grad_norm": 1.3985187169370845, "learning_rate": 2.7432100646327043e-06, "loss": 0.2276, "step": 9665 }, { "epoch": 0.7657754010695187, "grad_norm": 1.2965400438387649, "learning_rate": 2.7414447879526517e-06, "loss": 0.2271, "step": 9666 }, { "epoch": 0.7658546246781541, "grad_norm": 1.5466914478942493, "learning_rate": 2.739679989213532e-06, "loss": 0.2806, "step": 9667 }, { "epoch": 0.7659338482867895, "grad_norm": 1.2912448927774847, "learning_rate": 2.7379156685315523e-06, "loss": 0.1687, "step": 9668 }, { "epoch": 0.7660130718954249, "grad_norm": 1.2906373929689472, "learning_rate": 2.7361518260228827e-06, "loss": 0.1924, "step": 9669 }, { "epoch": 0.7660922955040602, "grad_norm": 1.1449090500809695, "learning_rate": 2.734388461803661e-06, "loss": 0.1995, "step": 9670 }, { "epoch": 0.7661715191126955, "grad_norm": 1.0505989608056299, "learning_rate": 2.7326255759900024e-06, "loss": 0.1781, "step": 9671 }, { "epoch": 0.766250742721331, "grad_norm": 1.309540519979868, "learning_rate": 2.7308631686979816e-06, "loss": 0.1884, "step": 9672 }, { "epoch": 0.7663299663299663, "grad_norm": 1.2054370913353984, "learning_rate": 2.7291012400436414e-06, "loss": 0.1789, "step": 9673 }, { "epoch": 0.7664091899386017, "grad_norm": 1.412604659709111, "learning_rate": 2.7273397901430023e-06, "loss": 0.1587, "step": 9674 }, { "epoch": 0.7664884135472371, "grad_norm": 1.2535616167202037, "learning_rate": 2.7255788191120435e-06, "loss": 0.2327, "step": 9675 }, { "epoch": 0.7665676371558724, "grad_norm": 1.2929861235799582, "learning_rate": 2.723818327066717e-06, "loss": 0.203, "step": 9676 }, { "epoch": 0.7666468607645078, "grad_norm": 1.5603009488862183, "learning_rate": 2.722058314122941e-06, "loss": 0.2062, "step": 9677 }, { "epoch": 0.7667260843731432, "grad_norm": 1.2764935684103669, "learning_rate": 2.7202987803966073e-06, "loss": 0.1792, "step": 9678 }, { "epoch": 0.7668053079817786, "grad_norm": 1.2313598379160944, "learning_rate": 2.718539726003573e-06, "loss": 0.2443, "step": 9679 }, { "epoch": 0.7668845315904139, "grad_norm": 1.3190603265335255, "learning_rate": 2.7167811510596577e-06, "loss": 0.1981, "step": 9680 }, { "epoch": 0.7669637551990494, "grad_norm": 1.5167742906135497, "learning_rate": 2.715023055680661e-06, "loss": 0.2341, "step": 9681 }, { "epoch": 0.7670429788076847, "grad_norm": 1.5095545221269455, "learning_rate": 2.7132654399823444e-06, "loss": 0.2363, "step": 9682 }, { "epoch": 0.76712220241632, "grad_norm": 1.4820826102641982, "learning_rate": 2.7115083040804337e-06, "loss": 0.2477, "step": 9683 }, { "epoch": 0.7672014260249554, "grad_norm": 1.4788755937620777, "learning_rate": 2.709751648090634e-06, "loss": 0.21, "step": 9684 }, { "epoch": 0.7672806496335908, "grad_norm": 1.2081503108109057, "learning_rate": 2.7079954721286108e-06, "loss": 0.2003, "step": 9685 }, { "epoch": 0.7673598732422262, "grad_norm": 1.248696332684323, "learning_rate": 2.7062397763099945e-06, "loss": 0.2363, "step": 9686 }, { "epoch": 0.7674390968508615, "grad_norm": 1.273745915104082, "learning_rate": 2.7044845607503967e-06, "loss": 0.2007, "step": 9687 }, { "epoch": 0.767518320459497, "grad_norm": 1.489220722122109, "learning_rate": 2.7027298255653878e-06, "loss": 0.289, "step": 9688 }, { "epoch": 0.7675975440681323, "grad_norm": 1.4521833134623467, "learning_rate": 2.700975570870503e-06, "loss": 0.2193, "step": 9689 }, { "epoch": 0.7676767676767676, "grad_norm": 1.389571089131228, "learning_rate": 2.6992217967812606e-06, "loss": 0.2432, "step": 9690 }, { "epoch": 0.7677559912854031, "grad_norm": 1.6863830982484698, "learning_rate": 2.697468503413134e-06, "loss": 0.2734, "step": 9691 }, { "epoch": 0.7678352148940384, "grad_norm": 1.6764967277894864, "learning_rate": 2.6957156908815684e-06, "loss": 0.2464, "step": 9692 }, { "epoch": 0.7679144385026738, "grad_norm": 1.393252266810544, "learning_rate": 2.6939633593019754e-06, "loss": 0.2243, "step": 9693 }, { "epoch": 0.7679936621113091, "grad_norm": 1.3671780697355251, "learning_rate": 2.692211508789744e-06, "loss": 0.2277, "step": 9694 }, { "epoch": 0.7680728857199446, "grad_norm": 1.6234745240168307, "learning_rate": 2.6904601394602216e-06, "loss": 0.279, "step": 9695 }, { "epoch": 0.7681521093285799, "grad_norm": 1.709171466067922, "learning_rate": 2.688709251428725e-06, "loss": 0.2656, "step": 9696 }, { "epoch": 0.7682313329372152, "grad_norm": 1.3736529211297932, "learning_rate": 2.6869588448105475e-06, "loss": 0.2279, "step": 9697 }, { "epoch": 0.7683105565458507, "grad_norm": 1.1789271419360619, "learning_rate": 2.685208919720942e-06, "loss": 0.2164, "step": 9698 }, { "epoch": 0.768389780154486, "grad_norm": 1.2415615531908208, "learning_rate": 2.683459476275133e-06, "loss": 0.2604, "step": 9699 }, { "epoch": 0.7684690037631214, "grad_norm": 1.3779018612909175, "learning_rate": 2.6817105145883117e-06, "loss": 0.2754, "step": 9700 }, { "epoch": 0.7685482273717568, "grad_norm": 1.457510329751007, "learning_rate": 2.6799620347756407e-06, "loss": 0.2095, "step": 9701 }, { "epoch": 0.7686274509803922, "grad_norm": 1.605748800608434, "learning_rate": 2.6782140369522435e-06, "loss": 0.3182, "step": 9702 }, { "epoch": 0.7687066745890275, "grad_norm": 1.3277532140691806, "learning_rate": 2.676466521233225e-06, "loss": 0.2313, "step": 9703 }, { "epoch": 0.7687858981976629, "grad_norm": 1.4428146105231685, "learning_rate": 2.674719487733649e-06, "loss": 0.2122, "step": 9704 }, { "epoch": 0.7688651218062983, "grad_norm": 1.1147503023422805, "learning_rate": 2.672972936568543e-06, "loss": 0.2337, "step": 9705 }, { "epoch": 0.7689443454149336, "grad_norm": 1.5862144631022426, "learning_rate": 2.6712268678529187e-06, "loss": 0.2747, "step": 9706 }, { "epoch": 0.769023569023569, "grad_norm": 1.4749711758766628, "learning_rate": 2.669481281701739e-06, "loss": 0.2023, "step": 9707 }, { "epoch": 0.7691027926322044, "grad_norm": 1.0974533667605268, "learning_rate": 2.6677361782299437e-06, "loss": 0.1809, "step": 9708 }, { "epoch": 0.7691820162408398, "grad_norm": 1.4069468903267026, "learning_rate": 2.665991557552442e-06, "loss": 0.2398, "step": 9709 }, { "epoch": 0.7692612398494751, "grad_norm": 1.470254654563354, "learning_rate": 2.6642474197841086e-06, "loss": 0.2879, "step": 9710 }, { "epoch": 0.7693404634581105, "grad_norm": 1.4812002953277617, "learning_rate": 2.6625037650397812e-06, "loss": 0.1972, "step": 9711 }, { "epoch": 0.7694196870667459, "grad_norm": 1.4990453061249278, "learning_rate": 2.6607605934342785e-06, "loss": 0.2276, "step": 9712 }, { "epoch": 0.7694989106753812, "grad_norm": 1.6736194542702678, "learning_rate": 2.659017905082376e-06, "loss": 0.2862, "step": 9713 }, { "epoch": 0.7695781342840167, "grad_norm": 1.1684544272392003, "learning_rate": 2.657275700098819e-06, "loss": 0.2286, "step": 9714 }, { "epoch": 0.769657357892652, "grad_norm": 1.2044203004374867, "learning_rate": 2.65553397859833e-06, "loss": 0.2267, "step": 9715 }, { "epoch": 0.7697365815012874, "grad_norm": 1.5780793831662523, "learning_rate": 2.6537927406955888e-06, "loss": 0.1856, "step": 9716 }, { "epoch": 0.7698158051099228, "grad_norm": 1.1369371413828187, "learning_rate": 2.6520519865052476e-06, "loss": 0.1573, "step": 9717 }, { "epoch": 0.7698950287185581, "grad_norm": 1.5582316550777615, "learning_rate": 2.6503117161419246e-06, "loss": 0.2785, "step": 9718 }, { "epoch": 0.7699742523271935, "grad_norm": 1.0793850245542824, "learning_rate": 2.6485719297202127e-06, "loss": 0.1452, "step": 9719 }, { "epoch": 0.7700534759358288, "grad_norm": 1.2917755849869386, "learning_rate": 2.646832627354667e-06, "loss": 0.257, "step": 9720 }, { "epoch": 0.7701326995444643, "grad_norm": 0.9475948104829591, "learning_rate": 2.645093809159809e-06, "loss": 0.194, "step": 9721 }, { "epoch": 0.7702119231530996, "grad_norm": 1.4620853099101796, "learning_rate": 2.643355475250137e-06, "loss": 0.3362, "step": 9722 }, { "epoch": 0.770291146761735, "grad_norm": 1.1663102274819164, "learning_rate": 2.6416176257401083e-06, "loss": 0.2278, "step": 9723 }, { "epoch": 0.7703703703703704, "grad_norm": 1.3870115097687417, "learning_rate": 2.639880260744151e-06, "loss": 0.2143, "step": 9724 }, { "epoch": 0.7704495939790057, "grad_norm": 1.6102421385417403, "learning_rate": 2.6381433803766654e-06, "loss": 0.2995, "step": 9725 }, { "epoch": 0.7705288175876411, "grad_norm": 1.2639052746443182, "learning_rate": 2.6364069847520155e-06, "loss": 0.2011, "step": 9726 }, { "epoch": 0.7706080411962765, "grad_norm": 1.0391428470685706, "learning_rate": 2.6346710739845317e-06, "loss": 0.1407, "step": 9727 }, { "epoch": 0.7706872648049119, "grad_norm": 1.2638084198101853, "learning_rate": 2.6329356481885215e-06, "loss": 0.1783, "step": 9728 }, { "epoch": 0.7707664884135472, "grad_norm": 1.2691448031715797, "learning_rate": 2.6312007074782497e-06, "loss": 0.1933, "step": 9729 }, { "epoch": 0.7708457120221827, "grad_norm": 1.4917692481855218, "learning_rate": 2.6294662519679525e-06, "loss": 0.2648, "step": 9730 }, { "epoch": 0.770924935630818, "grad_norm": 1.1915371161373722, "learning_rate": 2.627732281771841e-06, "loss": 0.1371, "step": 9731 }, { "epoch": 0.7710041592394533, "grad_norm": 1.0843401463639948, "learning_rate": 2.6259987970040858e-06, "loss": 0.158, "step": 9732 }, { "epoch": 0.7710833828480887, "grad_norm": 1.3029022926510931, "learning_rate": 2.6242657977788277e-06, "loss": 0.1934, "step": 9733 }, { "epoch": 0.7711626064567241, "grad_norm": 1.3933167070185897, "learning_rate": 2.6225332842101746e-06, "loss": 0.2154, "step": 9734 }, { "epoch": 0.7712418300653595, "grad_norm": 1.5026971001230482, "learning_rate": 2.6208012564122097e-06, "loss": 0.1992, "step": 9735 }, { "epoch": 0.7713210536739948, "grad_norm": 1.4474525946262127, "learning_rate": 2.6190697144989753e-06, "loss": 0.2217, "step": 9736 }, { "epoch": 0.7714002772826303, "grad_norm": 1.2414046060407191, "learning_rate": 2.617338658584483e-06, "loss": 0.2242, "step": 9737 }, { "epoch": 0.7714795008912656, "grad_norm": 1.264662520040703, "learning_rate": 2.6156080887827183e-06, "loss": 0.1982, "step": 9738 }, { "epoch": 0.7715587244999009, "grad_norm": 1.471965184920465, "learning_rate": 2.613878005207631e-06, "loss": 0.2816, "step": 9739 }, { "epoch": 0.7716379481085364, "grad_norm": 0.9852968884174986, "learning_rate": 2.612148407973134e-06, "loss": 0.1191, "step": 9740 }, { "epoch": 0.7717171717171717, "grad_norm": 1.4947392045049763, "learning_rate": 2.6104192971931197e-06, "loss": 0.2491, "step": 9741 }, { "epoch": 0.7717963953258071, "grad_norm": 1.4711988183903855, "learning_rate": 2.6086906729814378e-06, "loss": 0.2991, "step": 9742 }, { "epoch": 0.7718756189344425, "grad_norm": 1.4129032041874277, "learning_rate": 2.606962535451907e-06, "loss": 0.1856, "step": 9743 }, { "epoch": 0.7719548425430779, "grad_norm": 1.0585597784984708, "learning_rate": 2.605234884718324e-06, "loss": 0.1695, "step": 9744 }, { "epoch": 0.7720340661517132, "grad_norm": 1.3450712662523423, "learning_rate": 2.6035077208944416e-06, "loss": 0.2678, "step": 9745 }, { "epoch": 0.7721132897603485, "grad_norm": 1.068305692416248, "learning_rate": 2.601781044093984e-06, "loss": 0.1412, "step": 9746 }, { "epoch": 0.772192513368984, "grad_norm": 1.2969221167205398, "learning_rate": 2.600054854430649e-06, "loss": 0.2098, "step": 9747 }, { "epoch": 0.7722717369776193, "grad_norm": 1.570730164246765, "learning_rate": 2.5983291520180965e-06, "loss": 0.2825, "step": 9748 }, { "epoch": 0.7723509605862547, "grad_norm": 1.4732032095279834, "learning_rate": 2.5966039369699537e-06, "loss": 0.2761, "step": 9749 }, { "epoch": 0.7724301841948901, "grad_norm": 1.5011264372345434, "learning_rate": 2.5948792093998167e-06, "loss": 0.2623, "step": 9750 }, { "epoch": 0.7725094078035255, "grad_norm": 1.3924967768554901, "learning_rate": 2.5931549694212545e-06, "loss": 0.2309, "step": 9751 }, { "epoch": 0.7725886314121608, "grad_norm": 2.087296032616522, "learning_rate": 2.5914312171477983e-06, "loss": 0.2276, "step": 9752 }, { "epoch": 0.7726678550207962, "grad_norm": 1.179237864240833, "learning_rate": 2.589707952692947e-06, "loss": 0.1556, "step": 9753 }, { "epoch": 0.7727470786294316, "grad_norm": 1.1975446071149518, "learning_rate": 2.5879851761701724e-06, "loss": 0.187, "step": 9754 }, { "epoch": 0.7728263022380669, "grad_norm": 1.0076508648276241, "learning_rate": 2.586262887692911e-06, "loss": 0.1276, "step": 9755 }, { "epoch": 0.7729055258467024, "grad_norm": 1.1773819719638143, "learning_rate": 2.5845410873745614e-06, "loss": 0.2286, "step": 9756 }, { "epoch": 0.7729847494553377, "grad_norm": 1.8822293925967903, "learning_rate": 2.5828197753285043e-06, "loss": 0.3079, "step": 9757 }, { "epoch": 0.773063973063973, "grad_norm": 1.2767485432948658, "learning_rate": 2.581098951668075e-06, "loss": 0.2438, "step": 9758 }, { "epoch": 0.7731431966726084, "grad_norm": 1.4030859500932207, "learning_rate": 2.5793786165065805e-06, "loss": 0.2347, "step": 9759 }, { "epoch": 0.7732224202812438, "grad_norm": 1.3994373534062892, "learning_rate": 2.5776587699573007e-06, "loss": 0.2039, "step": 9760 }, { "epoch": 0.7733016438898792, "grad_norm": 1.4011803800677545, "learning_rate": 2.5759394121334767e-06, "loss": 0.2197, "step": 9761 }, { "epoch": 0.7733808674985145, "grad_norm": 1.0681782158546003, "learning_rate": 2.57422054314832e-06, "loss": 0.1544, "step": 9762 }, { "epoch": 0.77346009110715, "grad_norm": 1.3640290680430198, "learning_rate": 2.572502163115007e-06, "loss": 0.1993, "step": 9763 }, { "epoch": 0.7735393147157853, "grad_norm": 1.8237249707028338, "learning_rate": 2.5707842721466914e-06, "loss": 0.3321, "step": 9764 }, { "epoch": 0.7736185383244206, "grad_norm": 1.4114956114890511, "learning_rate": 2.5690668703564835e-06, "loss": 0.2341, "step": 9765 }, { "epoch": 0.7736977619330561, "grad_norm": 2.2349017314993844, "learning_rate": 2.5673499578574644e-06, "loss": 0.2746, "step": 9766 }, { "epoch": 0.7737769855416914, "grad_norm": 1.6321926073560515, "learning_rate": 2.565633534762689e-06, "loss": 0.2406, "step": 9767 }, { "epoch": 0.7738562091503268, "grad_norm": 1.588654883202697, "learning_rate": 2.5639176011851753e-06, "loss": 0.3065, "step": 9768 }, { "epoch": 0.7739354327589621, "grad_norm": 1.4625348158453002, "learning_rate": 2.562202157237903e-06, "loss": 0.2306, "step": 9769 }, { "epoch": 0.7740146563675976, "grad_norm": 1.4127855907656617, "learning_rate": 2.5604872030338336e-06, "loss": 0.2176, "step": 9770 }, { "epoch": 0.7740938799762329, "grad_norm": 1.1559966829911, "learning_rate": 2.5587727386858853e-06, "loss": 0.2204, "step": 9771 }, { "epoch": 0.7741731035848682, "grad_norm": 1.3936104441926196, "learning_rate": 2.5570587643069435e-06, "loss": 0.2191, "step": 9772 }, { "epoch": 0.7742523271935037, "grad_norm": 1.4867912888290955, "learning_rate": 2.555345280009872e-06, "loss": 0.2951, "step": 9773 }, { "epoch": 0.774331550802139, "grad_norm": 1.2391187519545641, "learning_rate": 2.5536322859074934e-06, "loss": 0.2176, "step": 9774 }, { "epoch": 0.7744107744107744, "grad_norm": 1.3700158717381583, "learning_rate": 2.551919782112596e-06, "loss": 0.2926, "step": 9775 }, { "epoch": 0.7744899980194098, "grad_norm": 1.4758559459077216, "learning_rate": 2.550207768737949e-06, "loss": 0.3297, "step": 9776 }, { "epoch": 0.7745692216280452, "grad_norm": 1.4231930240261537, "learning_rate": 2.54849624589627e-06, "loss": 0.2502, "step": 9777 }, { "epoch": 0.7746484452366805, "grad_norm": 1.277991131774114, "learning_rate": 2.546785213700258e-06, "loss": 0.1998, "step": 9778 }, { "epoch": 0.7747276688453159, "grad_norm": 1.0933793358100243, "learning_rate": 2.5450746722625785e-06, "loss": 0.1578, "step": 9779 }, { "epoch": 0.7748068924539513, "grad_norm": 1.2826164703801584, "learning_rate": 2.5433646216958617e-06, "loss": 0.2039, "step": 9780 }, { "epoch": 0.7748861160625866, "grad_norm": 1.2445057308386933, "learning_rate": 2.5416550621127024e-06, "loss": 0.1823, "step": 9781 }, { "epoch": 0.774965339671222, "grad_norm": 1.128413867916967, "learning_rate": 2.539945993625673e-06, "loss": 0.128, "step": 9782 }, { "epoch": 0.7750445632798574, "grad_norm": 1.6025571787779858, "learning_rate": 2.5382374163473046e-06, "loss": 0.2727, "step": 9783 }, { "epoch": 0.7751237868884928, "grad_norm": 1.1236097844657231, "learning_rate": 2.536529330390095e-06, "loss": 0.1779, "step": 9784 }, { "epoch": 0.7752030104971281, "grad_norm": 1.4867734272149344, "learning_rate": 2.5348217358665207e-06, "loss": 0.2373, "step": 9785 }, { "epoch": 0.7752822341057635, "grad_norm": 1.5319555874516364, "learning_rate": 2.5331146328890145e-06, "loss": 0.2603, "step": 9786 }, { "epoch": 0.7753614577143989, "grad_norm": 1.4968446226011585, "learning_rate": 2.5314080215699822e-06, "loss": 0.2673, "step": 9787 }, { "epoch": 0.7754406813230342, "grad_norm": 1.5590768794546803, "learning_rate": 2.5297019020217904e-06, "loss": 0.2624, "step": 9788 }, { "epoch": 0.7755199049316697, "grad_norm": 1.288873511730507, "learning_rate": 2.5279962743567877e-06, "loss": 0.2038, "step": 9789 }, { "epoch": 0.775599128540305, "grad_norm": 1.2411919203457322, "learning_rate": 2.526291138687278e-06, "loss": 0.1737, "step": 9790 }, { "epoch": 0.7756783521489404, "grad_norm": 1.8322792847583997, "learning_rate": 2.5245864951255317e-06, "loss": 0.3471, "step": 9791 }, { "epoch": 0.7757575757575758, "grad_norm": 1.5299762404464519, "learning_rate": 2.522882343783799e-06, "loss": 0.2267, "step": 9792 }, { "epoch": 0.7758367993662111, "grad_norm": 1.274294858630166, "learning_rate": 2.521178684774286e-06, "loss": 0.2005, "step": 9793 }, { "epoch": 0.7759160229748465, "grad_norm": 1.621905189167536, "learning_rate": 2.519475518209167e-06, "loss": 0.3123, "step": 9794 }, { "epoch": 0.7759952465834818, "grad_norm": 1.3538410344102365, "learning_rate": 2.5177728442005956e-06, "loss": 0.1818, "step": 9795 }, { "epoch": 0.7760744701921173, "grad_norm": 1.5069096239824853, "learning_rate": 2.516070662860679e-06, "loss": 0.3361, "step": 9796 }, { "epoch": 0.7761536938007526, "grad_norm": 1.3526837243609664, "learning_rate": 2.5143689743014966e-06, "loss": 0.224, "step": 9797 }, { "epoch": 0.776232917409388, "grad_norm": 1.3140397788197928, "learning_rate": 2.5126677786351005e-06, "loss": 0.2085, "step": 9798 }, { "epoch": 0.7763121410180234, "grad_norm": 1.291769236508765, "learning_rate": 2.5109670759735063e-06, "loss": 0.1774, "step": 9799 }, { "epoch": 0.7763913646266587, "grad_norm": 1.1257316434451494, "learning_rate": 2.509266866428691e-06, "loss": 0.187, "step": 9800 }, { "epoch": 0.7764705882352941, "grad_norm": 1.1852026694356723, "learning_rate": 2.507567150112613e-06, "loss": 0.2007, "step": 9801 }, { "epoch": 0.7765498118439295, "grad_norm": 1.4961105200209923, "learning_rate": 2.5058679271371865e-06, "loss": 0.2072, "step": 9802 }, { "epoch": 0.7766290354525649, "grad_norm": 1.3459071584133198, "learning_rate": 2.504169197614298e-06, "loss": 0.2177, "step": 9803 }, { "epoch": 0.7767082590612002, "grad_norm": 0.91136821091373, "learning_rate": 2.5024709616557964e-06, "loss": 0.1261, "step": 9804 }, { "epoch": 0.7767874826698357, "grad_norm": 1.3580249630504875, "learning_rate": 2.500773219373509e-06, "loss": 0.2736, "step": 9805 }, { "epoch": 0.776866706278471, "grad_norm": 1.1835191512655656, "learning_rate": 2.499075970879222e-06, "loss": 0.1916, "step": 9806 }, { "epoch": 0.7769459298871063, "grad_norm": 1.4384152599467444, "learning_rate": 2.4973792162846878e-06, "loss": 0.2339, "step": 9807 }, { "epoch": 0.7770251534957417, "grad_norm": 1.704774782212089, "learning_rate": 2.4956829557016336e-06, "loss": 0.3013, "step": 9808 }, { "epoch": 0.7771043771043771, "grad_norm": 1.1597230656190647, "learning_rate": 2.493987189241749e-06, "loss": 0.1611, "step": 9809 }, { "epoch": 0.7771836007130125, "grad_norm": 1.3320295329712077, "learning_rate": 2.4922919170166883e-06, "loss": 0.2666, "step": 9810 }, { "epoch": 0.7772628243216478, "grad_norm": 1.2956372994974583, "learning_rate": 2.4905971391380823e-06, "loss": 0.2342, "step": 9811 }, { "epoch": 0.7773420479302833, "grad_norm": 1.4098237361886397, "learning_rate": 2.488902855717522e-06, "loss": 0.2352, "step": 9812 }, { "epoch": 0.7774212715389186, "grad_norm": 1.4430902858066355, "learning_rate": 2.487209066866565e-06, "loss": 0.2438, "step": 9813 }, { "epoch": 0.7775004951475539, "grad_norm": 1.325367469354069, "learning_rate": 2.485515772696745e-06, "loss": 0.2683, "step": 9814 }, { "epoch": 0.7775797187561894, "grad_norm": 1.6190655999350856, "learning_rate": 2.483822973319553e-06, "loss": 0.2791, "step": 9815 }, { "epoch": 0.7776589423648247, "grad_norm": 1.686175871770783, "learning_rate": 2.482130668846451e-06, "loss": 0.2646, "step": 9816 }, { "epoch": 0.7777381659734601, "grad_norm": 1.2999105474559334, "learning_rate": 2.480438859388873e-06, "loss": 0.1858, "step": 9817 }, { "epoch": 0.7778173895820955, "grad_norm": 1.3646265368255728, "learning_rate": 2.4787475450582133e-06, "loss": 0.1911, "step": 9818 }, { "epoch": 0.7778966131907309, "grad_norm": 1.194040823596541, "learning_rate": 2.4770567259658386e-06, "loss": 0.1936, "step": 9819 }, { "epoch": 0.7779758367993662, "grad_norm": 1.2695115005678987, "learning_rate": 2.4753664022230783e-06, "loss": 0.2075, "step": 9820 }, { "epoch": 0.7780550604080015, "grad_norm": 1.163372999305103, "learning_rate": 2.473676573941236e-06, "loss": 0.1783, "step": 9821 }, { "epoch": 0.778134284016637, "grad_norm": 1.1635340373882301, "learning_rate": 2.471987241231577e-06, "loss": 0.2323, "step": 9822 }, { "epoch": 0.7782135076252723, "grad_norm": 1.3777705748488132, "learning_rate": 2.4702984042053335e-06, "loss": 0.2314, "step": 9823 }, { "epoch": 0.7782927312339077, "grad_norm": 1.38497452629102, "learning_rate": 2.468610062973712e-06, "loss": 0.1805, "step": 9824 }, { "epoch": 0.7783719548425431, "grad_norm": 1.3448713104542693, "learning_rate": 2.466922217647879e-06, "loss": 0.1973, "step": 9825 }, { "epoch": 0.7784511784511785, "grad_norm": 1.5234148565987975, "learning_rate": 2.465234868338968e-06, "loss": 0.2097, "step": 9826 }, { "epoch": 0.7785304020598138, "grad_norm": 1.2804172945685526, "learning_rate": 2.4635480151580902e-06, "loss": 0.252, "step": 9827 }, { "epoch": 0.7786096256684492, "grad_norm": 1.1899370919299495, "learning_rate": 2.461861658216311e-06, "loss": 0.2178, "step": 9828 }, { "epoch": 0.7786888492770846, "grad_norm": 1.3986738066004556, "learning_rate": 2.4601757976246685e-06, "loss": 0.242, "step": 9829 }, { "epoch": 0.7787680728857199, "grad_norm": 1.2427130278414202, "learning_rate": 2.4584904334941728e-06, "loss": 0.1395, "step": 9830 }, { "epoch": 0.7788472964943554, "grad_norm": 1.2550569843571655, "learning_rate": 2.456805565935795e-06, "loss": 0.2126, "step": 9831 }, { "epoch": 0.7789265201029907, "grad_norm": 1.3278799203293916, "learning_rate": 2.4551211950604713e-06, "loss": 0.2362, "step": 9832 }, { "epoch": 0.7790057437116261, "grad_norm": 1.4787427880788118, "learning_rate": 2.4534373209791162e-06, "loss": 0.2312, "step": 9833 }, { "epoch": 0.7790849673202614, "grad_norm": 1.3342973874062503, "learning_rate": 2.451753943802603e-06, "loss": 0.2501, "step": 9834 }, { "epoch": 0.7791641909288968, "grad_norm": 1.3620104584058979, "learning_rate": 2.4500710636417725e-06, "loss": 0.2311, "step": 9835 }, { "epoch": 0.7792434145375322, "grad_norm": 1.1549766958184322, "learning_rate": 2.4483886806074308e-06, "loss": 0.1735, "step": 9836 }, { "epoch": 0.7793226381461675, "grad_norm": 1.4914316260343865, "learning_rate": 2.4467067948103616e-06, "loss": 0.3064, "step": 9837 }, { "epoch": 0.779401861754803, "grad_norm": 1.4867861831483329, "learning_rate": 2.4450254063613056e-06, "loss": 0.2603, "step": 9838 }, { "epoch": 0.7794810853634383, "grad_norm": 1.1722369248622868, "learning_rate": 2.4433445153709722e-06, "loss": 0.1299, "step": 9839 }, { "epoch": 0.7795603089720736, "grad_norm": 1.6269310481454606, "learning_rate": 2.441664121950045e-06, "loss": 0.2723, "step": 9840 }, { "epoch": 0.7796395325807091, "grad_norm": 1.6435256799912368, "learning_rate": 2.439984226209167e-06, "loss": 0.2002, "step": 9841 }, { "epoch": 0.7797187561893444, "grad_norm": 1.2210533318748373, "learning_rate": 2.438304828258947e-06, "loss": 0.2124, "step": 9842 }, { "epoch": 0.7797979797979798, "grad_norm": 1.5488817003804343, "learning_rate": 2.4366259282099737e-06, "loss": 0.2708, "step": 9843 }, { "epoch": 0.7798772034066151, "grad_norm": 1.1249739446523817, "learning_rate": 2.4349475261727905e-06, "loss": 0.2071, "step": 9844 }, { "epoch": 0.7799564270152506, "grad_norm": 1.495207088742677, "learning_rate": 2.4332696222579078e-06, "loss": 0.233, "step": 9845 }, { "epoch": 0.7800356506238859, "grad_norm": 1.3890539034197336, "learning_rate": 2.4315922165758154e-06, "loss": 0.2772, "step": 9846 }, { "epoch": 0.7801148742325212, "grad_norm": 1.1698237027329774, "learning_rate": 2.4299153092369598e-06, "loss": 0.1939, "step": 9847 }, { "epoch": 0.7801940978411567, "grad_norm": 1.3966118506002616, "learning_rate": 2.428238900351755e-06, "loss": 0.2102, "step": 9848 }, { "epoch": 0.780273321449792, "grad_norm": 1.1226373455404322, "learning_rate": 2.426562990030582e-06, "loss": 0.174, "step": 9849 }, { "epoch": 0.7803525450584274, "grad_norm": 1.2347883158850974, "learning_rate": 2.424887578383799e-06, "loss": 0.2374, "step": 9850 }, { "epoch": 0.7804317686670628, "grad_norm": 1.5164522983368813, "learning_rate": 2.4232126655217202e-06, "loss": 0.2444, "step": 9851 }, { "epoch": 0.7805109922756982, "grad_norm": 1.081402277973487, "learning_rate": 2.421538251554627e-06, "loss": 0.1829, "step": 9852 }, { "epoch": 0.7805902158843335, "grad_norm": 1.1776046044388244, "learning_rate": 2.4198643365927767e-06, "loss": 0.197, "step": 9853 }, { "epoch": 0.7806694394929689, "grad_norm": 1.1795129618040738, "learning_rate": 2.4181909207463873e-06, "loss": 0.1705, "step": 9854 }, { "epoch": 0.7807486631016043, "grad_norm": 1.1989535403746896, "learning_rate": 2.4165180041256444e-06, "loss": 0.2565, "step": 9855 }, { "epoch": 0.7808278867102396, "grad_norm": 1.1321351278901886, "learning_rate": 2.4148455868407015e-06, "loss": 0.1942, "step": 9856 }, { "epoch": 0.780907110318875, "grad_norm": 1.6103943661973856, "learning_rate": 2.413173669001676e-06, "loss": 0.2792, "step": 9857 }, { "epoch": 0.7809863339275104, "grad_norm": 1.3498004566441262, "learning_rate": 2.4115022507186626e-06, "loss": 0.165, "step": 9858 }, { "epoch": 0.7810655575361458, "grad_norm": 1.4002256424804078, "learning_rate": 2.409831332101712e-06, "loss": 0.1966, "step": 9859 }, { "epoch": 0.7811447811447811, "grad_norm": 1.4073168574616537, "learning_rate": 2.4081609132608464e-06, "loss": 0.2263, "step": 9860 }, { "epoch": 0.7812240047534165, "grad_norm": 1.30523152072414, "learning_rate": 2.406490994306052e-06, "loss": 0.2838, "step": 9861 }, { "epoch": 0.7813032283620519, "grad_norm": 1.1465749574652757, "learning_rate": 2.4048215753472914e-06, "loss": 0.1992, "step": 9862 }, { "epoch": 0.7813824519706872, "grad_norm": 1.6836733865332354, "learning_rate": 2.403152656494485e-06, "loss": 0.2525, "step": 9863 }, { "epoch": 0.7814616755793227, "grad_norm": 1.3334983347970941, "learning_rate": 2.401484237857519e-06, "loss": 0.2664, "step": 9864 }, { "epoch": 0.781540899187958, "grad_norm": 1.2727137205827692, "learning_rate": 2.3998163195462565e-06, "loss": 0.1925, "step": 9865 }, { "epoch": 0.7816201227965934, "grad_norm": 1.2162621333401924, "learning_rate": 2.398148901670521e-06, "loss": 0.2598, "step": 9866 }, { "epoch": 0.7816993464052288, "grad_norm": 1.1008673990444313, "learning_rate": 2.396481984340098e-06, "loss": 0.1268, "step": 9867 }, { "epoch": 0.7817785700138641, "grad_norm": 1.2822854896648175, "learning_rate": 2.3948155676647546e-06, "loss": 0.1421, "step": 9868 }, { "epoch": 0.7818577936224995, "grad_norm": 1.8116326552945974, "learning_rate": 2.393149651754212e-06, "loss": 0.2408, "step": 9869 }, { "epoch": 0.7819370172311348, "grad_norm": 1.1781609148244916, "learning_rate": 2.391484236718159e-06, "loss": 0.1898, "step": 9870 }, { "epoch": 0.7820162408397703, "grad_norm": 1.8513002772033784, "learning_rate": 2.389819322666264e-06, "loss": 0.202, "step": 9871 }, { "epoch": 0.7820954644484056, "grad_norm": 1.3133458015195805, "learning_rate": 2.3881549097081467e-06, "loss": 0.1942, "step": 9872 }, { "epoch": 0.782174688057041, "grad_norm": 1.7831442212345616, "learning_rate": 2.3864909979534044e-06, "loss": 0.2722, "step": 9873 }, { "epoch": 0.7822539116656764, "grad_norm": 1.2140586418904127, "learning_rate": 2.3848275875115925e-06, "loss": 0.2197, "step": 9874 }, { "epoch": 0.7823331352743117, "grad_norm": 1.424360517002461, "learning_rate": 2.3831646784922446e-06, "loss": 0.2347, "step": 9875 }, { "epoch": 0.7824123588829471, "grad_norm": 1.8904571960955172, "learning_rate": 2.381502271004853e-06, "loss": 0.3165, "step": 9876 }, { "epoch": 0.7824915824915825, "grad_norm": 1.392190133827388, "learning_rate": 2.3798403651588765e-06, "loss": 0.2685, "step": 9877 }, { "epoch": 0.7825708061002179, "grad_norm": 1.5229441541479927, "learning_rate": 2.3781789610637483e-06, "loss": 0.336, "step": 9878 }, { "epoch": 0.7826500297088532, "grad_norm": 1.344870866671153, "learning_rate": 2.376518058828863e-06, "loss": 0.2437, "step": 9879 }, { "epoch": 0.7827292533174887, "grad_norm": 1.1635431213517604, "learning_rate": 2.3748576585635774e-06, "loss": 0.1958, "step": 9880 }, { "epoch": 0.782808476926124, "grad_norm": 1.2723448606048013, "learning_rate": 2.373197760377228e-06, "loss": 0.2052, "step": 9881 }, { "epoch": 0.7828877005347593, "grad_norm": 1.636000304317728, "learning_rate": 2.371538364379109e-06, "loss": 0.2451, "step": 9882 }, { "epoch": 0.7829669241433947, "grad_norm": 1.2617145233470228, "learning_rate": 2.36987947067848e-06, "loss": 0.2691, "step": 9883 }, { "epoch": 0.7830461477520301, "grad_norm": 1.3400291145152263, "learning_rate": 2.368221079384577e-06, "loss": 0.2098, "step": 9884 }, { "epoch": 0.7831253713606655, "grad_norm": 1.3312196768751225, "learning_rate": 2.3665631906065933e-06, "loss": 0.2294, "step": 9885 }, { "epoch": 0.7832045949693008, "grad_norm": 1.1271118797356454, "learning_rate": 2.364905804453692e-06, "loss": 0.1843, "step": 9886 }, { "epoch": 0.7832838185779363, "grad_norm": 1.2462825292571693, "learning_rate": 2.3632489210350074e-06, "loss": 0.1737, "step": 9887 }, { "epoch": 0.7833630421865716, "grad_norm": 1.036274618253312, "learning_rate": 2.361592540459636e-06, "loss": 0.1428, "step": 9888 }, { "epoch": 0.7834422657952069, "grad_norm": 1.367322973295281, "learning_rate": 2.3599366628366427e-06, "loss": 0.2396, "step": 9889 }, { "epoch": 0.7835214894038424, "grad_norm": 1.500516288589599, "learning_rate": 2.358281288275055e-06, "loss": 0.28, "step": 9890 }, { "epoch": 0.7836007130124777, "grad_norm": 1.2892530881249833, "learning_rate": 2.356626416883878e-06, "loss": 0.2377, "step": 9891 }, { "epoch": 0.7836799366211131, "grad_norm": 1.7851417420327353, "learning_rate": 2.354972048772074e-06, "loss": 0.2475, "step": 9892 }, { "epoch": 0.7837591602297485, "grad_norm": 1.037699305032629, "learning_rate": 2.353318184048573e-06, "loss": 0.1255, "step": 9893 }, { "epoch": 0.7838383838383839, "grad_norm": 1.087055042943525, "learning_rate": 2.351664822822277e-06, "loss": 0.1387, "step": 9894 }, { "epoch": 0.7839176074470192, "grad_norm": 1.2408409281557684, "learning_rate": 2.3500119652020526e-06, "loss": 0.217, "step": 9895 }, { "epoch": 0.7839968310556545, "grad_norm": 1.3334025655193391, "learning_rate": 2.348359611296728e-06, "loss": 0.2555, "step": 9896 }, { "epoch": 0.78407605466429, "grad_norm": 1.4739817711549104, "learning_rate": 2.346707761215108e-06, "loss": 0.2154, "step": 9897 }, { "epoch": 0.7841552782729253, "grad_norm": 1.2291750974138853, "learning_rate": 2.345056415065956e-06, "loss": 0.2162, "step": 9898 }, { "epoch": 0.7842345018815607, "grad_norm": 1.4078494400038302, "learning_rate": 2.343405572958004e-06, "loss": 0.2383, "step": 9899 }, { "epoch": 0.7843137254901961, "grad_norm": 1.4286986823989243, "learning_rate": 2.341755234999956e-06, "loss": 0.3231, "step": 9900 }, { "epoch": 0.7843929490988315, "grad_norm": 1.4633301709602267, "learning_rate": 2.3401054013004776e-06, "loss": 0.2451, "step": 9901 }, { "epoch": 0.7844721727074668, "grad_norm": 1.2974481192501213, "learning_rate": 2.338456071968198e-06, "loss": 0.2193, "step": 9902 }, { "epoch": 0.7845513963161022, "grad_norm": 1.3732503791224975, "learning_rate": 2.336807247111723e-06, "loss": 0.2161, "step": 9903 }, { "epoch": 0.7846306199247376, "grad_norm": 1.2483504707354818, "learning_rate": 2.3351589268396193e-06, "loss": 0.194, "step": 9904 }, { "epoch": 0.7847098435333729, "grad_norm": 1.4166070919215041, "learning_rate": 2.3335111112604194e-06, "loss": 0.2603, "step": 9905 }, { "epoch": 0.7847890671420084, "grad_norm": 1.3310924005256144, "learning_rate": 2.33186380048262e-06, "loss": 0.2891, "step": 9906 }, { "epoch": 0.7848682907506437, "grad_norm": 1.3732942770407457, "learning_rate": 2.330216994614696e-06, "loss": 0.2719, "step": 9907 }, { "epoch": 0.7849475143592791, "grad_norm": 1.4653131419759486, "learning_rate": 2.3285706937650786e-06, "loss": 0.2239, "step": 9908 }, { "epoch": 0.7850267379679144, "grad_norm": 1.4753549367756522, "learning_rate": 2.3269248980421653e-06, "loss": 0.256, "step": 9909 }, { "epoch": 0.7851059615765498, "grad_norm": 1.1013197545678601, "learning_rate": 2.3252796075543295e-06, "loss": 0.1526, "step": 9910 }, { "epoch": 0.7851851851851852, "grad_norm": 1.0476398735857544, "learning_rate": 2.3236348224099038e-06, "loss": 0.1783, "step": 9911 }, { "epoch": 0.7852644087938205, "grad_norm": 1.6738783342305954, "learning_rate": 2.3219905427171864e-06, "loss": 0.2963, "step": 9912 }, { "epoch": 0.785343632402456, "grad_norm": 1.5196516826877013, "learning_rate": 2.320346768584449e-06, "loss": 0.2409, "step": 9913 }, { "epoch": 0.7854228560110913, "grad_norm": 1.2553838154213728, "learning_rate": 2.3187035001199254e-06, "loss": 0.1451, "step": 9914 }, { "epoch": 0.7855020796197266, "grad_norm": 1.3606211542973146, "learning_rate": 2.317060737431813e-06, "loss": 0.2268, "step": 9915 }, { "epoch": 0.7855813032283621, "grad_norm": 1.1879794831866943, "learning_rate": 2.3154184806282863e-06, "loss": 0.1611, "step": 9916 }, { "epoch": 0.7856605268369974, "grad_norm": 1.3529114657561345, "learning_rate": 2.3137767298174774e-06, "loss": 0.1757, "step": 9917 }, { "epoch": 0.7857397504456328, "grad_norm": 1.283361078893396, "learning_rate": 2.312135485107486e-06, "loss": 0.1776, "step": 9918 }, { "epoch": 0.7858189740542681, "grad_norm": 1.1860801780667327, "learning_rate": 2.3104947466063785e-06, "loss": 0.1919, "step": 9919 }, { "epoch": 0.7858981976629036, "grad_norm": 1.0042676441401837, "learning_rate": 2.3088545144221964e-06, "loss": 0.1079, "step": 9920 }, { "epoch": 0.7859774212715389, "grad_norm": 1.6201670304917153, "learning_rate": 2.307214788662936e-06, "loss": 0.323, "step": 9921 }, { "epoch": 0.7860566448801742, "grad_norm": 1.4393124213212198, "learning_rate": 2.3055755694365644e-06, "loss": 0.2208, "step": 9922 }, { "epoch": 0.7861358684888097, "grad_norm": 1.1975356183472057, "learning_rate": 2.303936856851021e-06, "loss": 0.1589, "step": 9923 }, { "epoch": 0.786215092097445, "grad_norm": 1.0148862121154767, "learning_rate": 2.302298651014204e-06, "loss": 0.1356, "step": 9924 }, { "epoch": 0.7862943157060804, "grad_norm": 1.1291302129390803, "learning_rate": 2.3006609520339796e-06, "loss": 0.1434, "step": 9925 }, { "epoch": 0.7863735393147158, "grad_norm": 1.6482819136838143, "learning_rate": 2.2990237600181864e-06, "loss": 0.3151, "step": 9926 }, { "epoch": 0.7864527629233512, "grad_norm": 1.4021068748727905, "learning_rate": 2.2973870750746253e-06, "loss": 0.2076, "step": 9927 }, { "epoch": 0.7865319865319865, "grad_norm": 1.3497590054526278, "learning_rate": 2.2957508973110586e-06, "loss": 0.219, "step": 9928 }, { "epoch": 0.7866112101406219, "grad_norm": 1.4529947070303282, "learning_rate": 2.2941152268352284e-06, "loss": 0.2479, "step": 9929 }, { "epoch": 0.7866904337492573, "grad_norm": 1.6627996199675967, "learning_rate": 2.292480063754833e-06, "loss": 0.2571, "step": 9930 }, { "epoch": 0.7867696573578926, "grad_norm": 1.5263400909391849, "learning_rate": 2.2908454081775344e-06, "loss": 0.2494, "step": 9931 }, { "epoch": 0.786848880966528, "grad_norm": 1.0879206355764373, "learning_rate": 2.2892112602109783e-06, "loss": 0.1376, "step": 9932 }, { "epoch": 0.7869281045751634, "grad_norm": 1.2449612663426468, "learning_rate": 2.2875776199627564e-06, "loss": 0.2087, "step": 9933 }, { "epoch": 0.7870073281837988, "grad_norm": 1.38946532860024, "learning_rate": 2.2859444875404347e-06, "loss": 0.2079, "step": 9934 }, { "epoch": 0.7870865517924341, "grad_norm": 1.526377619670447, "learning_rate": 2.2843118630515536e-06, "loss": 0.2657, "step": 9935 }, { "epoch": 0.7871657754010695, "grad_norm": 1.2577583156740642, "learning_rate": 2.282679746603611e-06, "loss": 0.1613, "step": 9936 }, { "epoch": 0.7872449990097049, "grad_norm": 1.4840892467713886, "learning_rate": 2.281048138304072e-06, "loss": 0.278, "step": 9937 }, { "epoch": 0.7873242226183402, "grad_norm": 1.8028862577357145, "learning_rate": 2.279417038260373e-06, "loss": 0.3526, "step": 9938 }, { "epoch": 0.7874034462269757, "grad_norm": 1.237516044815252, "learning_rate": 2.2777864465799137e-06, "loss": 0.2163, "step": 9939 }, { "epoch": 0.787482669835611, "grad_norm": 1.394261685373207, "learning_rate": 2.276156363370058e-06, "loss": 0.2079, "step": 9940 }, { "epoch": 0.7875618934442464, "grad_norm": 1.1622432649522068, "learning_rate": 2.274526788738143e-06, "loss": 0.2118, "step": 9941 }, { "epoch": 0.7876411170528818, "grad_norm": 1.7095905653464842, "learning_rate": 2.272897722791466e-06, "loss": 0.3082, "step": 9942 }, { "epoch": 0.7877203406615171, "grad_norm": 1.5326759806882049, "learning_rate": 2.271269165637294e-06, "loss": 0.2862, "step": 9943 }, { "epoch": 0.7877995642701525, "grad_norm": 1.2973899393295614, "learning_rate": 2.2696411173828557e-06, "loss": 0.2435, "step": 9944 }, { "epoch": 0.7878787878787878, "grad_norm": 1.6309969512560585, "learning_rate": 2.268013578135357e-06, "loss": 0.2768, "step": 9945 }, { "epoch": 0.7879580114874233, "grad_norm": 1.2742891241394383, "learning_rate": 2.266386548001961e-06, "loss": 0.2556, "step": 9946 }, { "epoch": 0.7880372350960586, "grad_norm": 1.3937887766082877, "learning_rate": 2.264760027089795e-06, "loss": 0.3047, "step": 9947 }, { "epoch": 0.788116458704694, "grad_norm": 1.0740663943769213, "learning_rate": 2.2631340155059656e-06, "loss": 0.1616, "step": 9948 }, { "epoch": 0.7881956823133294, "grad_norm": 1.5917645559927993, "learning_rate": 2.261508513357532e-06, "loss": 0.2537, "step": 9949 }, { "epoch": 0.7882749059219647, "grad_norm": 1.2451814826213414, "learning_rate": 2.2598835207515267e-06, "loss": 0.1486, "step": 9950 }, { "epoch": 0.7883541295306001, "grad_norm": 1.5512190307109022, "learning_rate": 2.2582590377949497e-06, "loss": 0.2783, "step": 9951 }, { "epoch": 0.7884333531392355, "grad_norm": 1.412805822510152, "learning_rate": 2.2566350645947656e-06, "loss": 0.2471, "step": 9952 }, { "epoch": 0.7885125767478709, "grad_norm": 1.1487181400057618, "learning_rate": 2.2550116012579004e-06, "loss": 0.139, "step": 9953 }, { "epoch": 0.7885918003565062, "grad_norm": 1.3641899940447755, "learning_rate": 2.253388647891258e-06, "loss": 0.2443, "step": 9954 }, { "epoch": 0.7886710239651417, "grad_norm": 1.0853164644691753, "learning_rate": 2.2517662046016975e-06, "loss": 0.1411, "step": 9955 }, { "epoch": 0.788750247573777, "grad_norm": 1.5195153135443165, "learning_rate": 2.250144271496049e-06, "loss": 0.2386, "step": 9956 }, { "epoch": 0.7888294711824123, "grad_norm": 1.1884205594310875, "learning_rate": 2.2485228486811128e-06, "loss": 0.2158, "step": 9957 }, { "epoch": 0.7889086947910477, "grad_norm": 1.4803177855233298, "learning_rate": 2.2469019362636478e-06, "loss": 0.1788, "step": 9958 }, { "epoch": 0.7889879183996831, "grad_norm": 1.2687977783308744, "learning_rate": 2.2452815343503862e-06, "loss": 0.2236, "step": 9959 }, { "epoch": 0.7890671420083185, "grad_norm": 1.5656619584628373, "learning_rate": 2.2436616430480197e-06, "loss": 0.3275, "step": 9960 }, { "epoch": 0.7891463656169538, "grad_norm": 1.3806051030987687, "learning_rate": 2.2420422624632153e-06, "loss": 0.2763, "step": 9961 }, { "epoch": 0.7892255892255893, "grad_norm": 1.376220612768902, "learning_rate": 2.2404233927025985e-06, "loss": 0.2085, "step": 9962 }, { "epoch": 0.7893048128342246, "grad_norm": 1.1990561626777876, "learning_rate": 2.238805033872762e-06, "loss": 0.1961, "step": 9963 }, { "epoch": 0.7893840364428599, "grad_norm": 1.3329692751043065, "learning_rate": 2.237187186080273e-06, "loss": 0.1826, "step": 9964 }, { "epoch": 0.7894632600514954, "grad_norm": 1.481753581596124, "learning_rate": 2.235569849431655e-06, "loss": 0.2444, "step": 9965 }, { "epoch": 0.7895424836601307, "grad_norm": 1.2432384672455212, "learning_rate": 2.2339530240333993e-06, "loss": 0.2369, "step": 9966 }, { "epoch": 0.7896217072687661, "grad_norm": 1.1202647849761296, "learning_rate": 2.2323367099919724e-06, "loss": 0.23, "step": 9967 }, { "epoch": 0.7897009308774015, "grad_norm": 1.2914505321209109, "learning_rate": 2.230720907413797e-06, "loss": 0.2492, "step": 9968 }, { "epoch": 0.7897801544860369, "grad_norm": 1.181051442283646, "learning_rate": 2.2291056164052638e-06, "loss": 0.177, "step": 9969 }, { "epoch": 0.7898593780946722, "grad_norm": 1.2216963109119947, "learning_rate": 2.2274908370727376e-06, "loss": 0.1986, "step": 9970 }, { "epoch": 0.7899386017033075, "grad_norm": 1.3921930510509921, "learning_rate": 2.2258765695225416e-06, "loss": 0.1693, "step": 9971 }, { "epoch": 0.790017825311943, "grad_norm": 1.0550051674722272, "learning_rate": 2.224262813860962e-06, "loss": 0.1581, "step": 9972 }, { "epoch": 0.7900970489205783, "grad_norm": 1.610546763894353, "learning_rate": 2.2226495701942663e-06, "loss": 0.263, "step": 9973 }, { "epoch": 0.7901762725292137, "grad_norm": 1.4657956609637355, "learning_rate": 2.2210368386286742e-06, "loss": 0.2867, "step": 9974 }, { "epoch": 0.7902554961378491, "grad_norm": 1.1946257522627222, "learning_rate": 2.219424619270375e-06, "loss": 0.1617, "step": 9975 }, { "epoch": 0.7903347197464845, "grad_norm": 1.3262265321883955, "learning_rate": 2.2178129122255255e-06, "loss": 0.196, "step": 9976 }, { "epoch": 0.7904139433551198, "grad_norm": 1.1606617437482047, "learning_rate": 2.2162017176002514e-06, "loss": 0.1893, "step": 9977 }, { "epoch": 0.7904931669637552, "grad_norm": 1.287499575600566, "learning_rate": 2.2145910355006415e-06, "loss": 0.2389, "step": 9978 }, { "epoch": 0.7905723905723906, "grad_norm": 1.6596604641298756, "learning_rate": 2.212980866032749e-06, "loss": 0.233, "step": 9979 }, { "epoch": 0.7906516141810259, "grad_norm": 1.4249190205777467, "learning_rate": 2.2113712093025997e-06, "loss": 0.2629, "step": 9980 }, { "epoch": 0.7907308377896614, "grad_norm": 1.5299727945194308, "learning_rate": 2.20976206541618e-06, "loss": 0.247, "step": 9981 }, { "epoch": 0.7908100613982967, "grad_norm": 1.0478590279732038, "learning_rate": 2.208153434479442e-06, "loss": 0.1952, "step": 9982 }, { "epoch": 0.7908892850069321, "grad_norm": 1.128319044059468, "learning_rate": 2.20654531659831e-06, "loss": 0.2117, "step": 9983 }, { "epoch": 0.7909685086155674, "grad_norm": 1.6686463227701107, "learning_rate": 2.2049377118786696e-06, "loss": 0.2512, "step": 9984 }, { "epoch": 0.7910477322242028, "grad_norm": 1.3467671370617873, "learning_rate": 2.2033306204263704e-06, "loss": 0.1928, "step": 9985 }, { "epoch": 0.7911269558328382, "grad_norm": 1.2930284830740735, "learning_rate": 2.2017240423472384e-06, "loss": 0.1806, "step": 9986 }, { "epoch": 0.7912061794414735, "grad_norm": 1.323305642487313, "learning_rate": 2.200117977747055e-06, "loss": 0.2334, "step": 9987 }, { "epoch": 0.791285403050109, "grad_norm": 1.4208029351884266, "learning_rate": 2.198512426731568e-06, "loss": 0.1911, "step": 9988 }, { "epoch": 0.7913646266587443, "grad_norm": 1.3667820698657067, "learning_rate": 2.196907389406504e-06, "loss": 0.2243, "step": 9989 }, { "epoch": 0.7914438502673797, "grad_norm": 1.791093536456264, "learning_rate": 2.195302865877541e-06, "loss": 0.3501, "step": 9990 }, { "epoch": 0.7915230738760151, "grad_norm": 1.4951505870953965, "learning_rate": 2.193698856250331e-06, "loss": 0.2447, "step": 9991 }, { "epoch": 0.7916022974846504, "grad_norm": 1.3089823436456214, "learning_rate": 2.1920953606304875e-06, "loss": 0.2843, "step": 9992 }, { "epoch": 0.7916815210932858, "grad_norm": 1.2958487797745992, "learning_rate": 2.1904923791235965e-06, "loss": 0.2211, "step": 9993 }, { "epoch": 0.7917607447019211, "grad_norm": 1.2825546731431516, "learning_rate": 2.188889911835207e-06, "loss": 0.1861, "step": 9994 }, { "epoch": 0.7918399683105566, "grad_norm": 1.4713950370423254, "learning_rate": 2.1872879588708286e-06, "loss": 0.2604, "step": 9995 }, { "epoch": 0.7919191919191919, "grad_norm": 1.3323944419507083, "learning_rate": 2.185686520335948e-06, "loss": 0.2348, "step": 9996 }, { "epoch": 0.7919984155278272, "grad_norm": 1.3074796064722987, "learning_rate": 2.184085596336011e-06, "loss": 0.2235, "step": 9997 }, { "epoch": 0.7920776391364627, "grad_norm": 1.1598380733286042, "learning_rate": 2.1824851869764262e-06, "loss": 0.1717, "step": 9998 }, { "epoch": 0.792156862745098, "grad_norm": 1.5321560189673116, "learning_rate": 2.1808852923625802e-06, "loss": 0.286, "step": 9999 }, { "epoch": 0.7922360863537334, "grad_norm": 1.8248009756127332, "learning_rate": 2.1792859125998134e-06, "loss": 0.3009, "step": 10000 }, { "epoch": 0.7923153099623688, "grad_norm": 1.2349953833747491, "learning_rate": 2.1776870477934353e-06, "loss": 0.1715, "step": 10001 }, { "epoch": 0.7923945335710042, "grad_norm": 1.1666802052134546, "learning_rate": 2.1760886980487307e-06, "loss": 0.2028, "step": 10002 }, { "epoch": 0.7924737571796395, "grad_norm": 1.5195253589555158, "learning_rate": 2.174490863470938e-06, "loss": 0.323, "step": 10003 }, { "epoch": 0.7925529807882749, "grad_norm": 1.259825688601649, "learning_rate": 2.1728935441652687e-06, "loss": 0.1365, "step": 10004 }, { "epoch": 0.7926322043969103, "grad_norm": 1.2666422881448816, "learning_rate": 2.1712967402368947e-06, "loss": 0.1547, "step": 10005 }, { "epoch": 0.7927114280055456, "grad_norm": 1.799324352157349, "learning_rate": 2.169700451790964e-06, "loss": 0.2196, "step": 10006 }, { "epoch": 0.792790651614181, "grad_norm": 1.4724774522458546, "learning_rate": 2.168104678932581e-06, "loss": 0.2557, "step": 10007 }, { "epoch": 0.7928698752228164, "grad_norm": 1.5231941789975854, "learning_rate": 2.166509421766818e-06, "loss": 0.2054, "step": 10008 }, { "epoch": 0.7929490988314518, "grad_norm": 1.107829132588173, "learning_rate": 2.1649146803987197e-06, "loss": 0.1706, "step": 10009 }, { "epoch": 0.7930283224400871, "grad_norm": 1.3376833380354436, "learning_rate": 2.1633204549332897e-06, "loss": 0.2047, "step": 10010 }, { "epoch": 0.7931075460487225, "grad_norm": 0.9672791640395079, "learning_rate": 2.1617267454754996e-06, "loss": 0.1423, "step": 10011 }, { "epoch": 0.7931867696573579, "grad_norm": 1.2769356374566705, "learning_rate": 2.160133552130289e-06, "loss": 0.2471, "step": 10012 }, { "epoch": 0.7932659932659932, "grad_norm": 1.426737951051182, "learning_rate": 2.1585408750025584e-06, "loss": 0.2077, "step": 10013 }, { "epoch": 0.7933452168746287, "grad_norm": 1.2895947556304939, "learning_rate": 2.1569487141971824e-06, "loss": 0.235, "step": 10014 }, { "epoch": 0.793424440483264, "grad_norm": 1.4399102103735264, "learning_rate": 2.155357069818995e-06, "loss": 0.2659, "step": 10015 }, { "epoch": 0.7935036640918994, "grad_norm": 1.2342433407853597, "learning_rate": 2.1537659419727987e-06, "loss": 0.2617, "step": 10016 }, { "epoch": 0.7935828877005348, "grad_norm": 1.526028949621356, "learning_rate": 2.152175330763359e-06, "loss": 0.1728, "step": 10017 }, { "epoch": 0.7936621113091701, "grad_norm": 1.2718600277377743, "learning_rate": 2.150585236295415e-06, "loss": 0.1894, "step": 10018 }, { "epoch": 0.7937413349178055, "grad_norm": 1.2385976620892274, "learning_rate": 2.148995658673665e-06, "loss": 0.1586, "step": 10019 }, { "epoch": 0.7938205585264408, "grad_norm": 1.3831165724156946, "learning_rate": 2.14740659800277e-06, "loss": 0.2596, "step": 10020 }, { "epoch": 0.7938997821350763, "grad_norm": 1.2254736289746195, "learning_rate": 2.1458180543873697e-06, "loss": 0.1684, "step": 10021 }, { "epoch": 0.7939790057437116, "grad_norm": 1.3576555103115144, "learning_rate": 2.1442300279320593e-06, "loss": 0.2172, "step": 10022 }, { "epoch": 0.794058229352347, "grad_norm": 1.438419980302335, "learning_rate": 2.142642518741399e-06, "loss": 0.3328, "step": 10023 }, { "epoch": 0.7941374529609824, "grad_norm": 1.156647007357365, "learning_rate": 2.141055526919924e-06, "loss": 0.1638, "step": 10024 }, { "epoch": 0.7942166765696177, "grad_norm": 1.1524903426571225, "learning_rate": 2.1394690525721275e-06, "loss": 0.2042, "step": 10025 }, { "epoch": 0.7942959001782531, "grad_norm": 1.5182758667856178, "learning_rate": 2.137883095802469e-06, "loss": 0.2956, "step": 10026 }, { "epoch": 0.7943751237868885, "grad_norm": 1.3379707359334319, "learning_rate": 2.1362976567153813e-06, "loss": 0.2433, "step": 10027 }, { "epoch": 0.7944543473955239, "grad_norm": 1.4079850434038292, "learning_rate": 2.134712735415255e-06, "loss": 0.258, "step": 10028 }, { "epoch": 0.7945335710041592, "grad_norm": 1.2298891520714446, "learning_rate": 2.13312833200645e-06, "loss": 0.1831, "step": 10029 }, { "epoch": 0.7946127946127947, "grad_norm": 1.2505744234829472, "learning_rate": 2.131544446593289e-06, "loss": 0.1919, "step": 10030 }, { "epoch": 0.79469201822143, "grad_norm": 1.271701641006501, "learning_rate": 2.1299610792800675e-06, "loss": 0.1558, "step": 10031 }, { "epoch": 0.7947712418300653, "grad_norm": 1.3392105134756338, "learning_rate": 2.1283782301710408e-06, "loss": 0.1874, "step": 10032 }, { "epoch": 0.7948504654387007, "grad_norm": 1.2659260053617787, "learning_rate": 2.1267958993704297e-06, "loss": 0.2426, "step": 10033 }, { "epoch": 0.7949296890473361, "grad_norm": 1.2716818583067733, "learning_rate": 2.1252140869824266e-06, "loss": 0.2159, "step": 10034 }, { "epoch": 0.7950089126559715, "grad_norm": 1.74424913273211, "learning_rate": 2.1236327931111868e-06, "loss": 0.2462, "step": 10035 }, { "epoch": 0.7950881362646068, "grad_norm": 1.1670589718188686, "learning_rate": 2.122052017860825e-06, "loss": 0.21, "step": 10036 }, { "epoch": 0.7951673598732423, "grad_norm": 1.3652414913893929, "learning_rate": 2.120471761335434e-06, "loss": 0.1874, "step": 10037 }, { "epoch": 0.7952465834818776, "grad_norm": 1.3673507089888488, "learning_rate": 2.118892023639064e-06, "loss": 0.1542, "step": 10038 }, { "epoch": 0.7953258070905129, "grad_norm": 1.33695022773868, "learning_rate": 2.1173128048757307e-06, "loss": 0.2508, "step": 10039 }, { "epoch": 0.7954050306991484, "grad_norm": 1.3589920098432118, "learning_rate": 2.115734105149422e-06, "loss": 0.288, "step": 10040 }, { "epoch": 0.7954842543077837, "grad_norm": 1.1912113453417041, "learning_rate": 2.1141559245640865e-06, "loss": 0.1589, "step": 10041 }, { "epoch": 0.7955634779164191, "grad_norm": 1.4471018026891507, "learning_rate": 2.1125782632236357e-06, "loss": 0.1956, "step": 10042 }, { "epoch": 0.7956427015250545, "grad_norm": 1.2340719063866965, "learning_rate": 2.111001121231957e-06, "loss": 0.1916, "step": 10043 }, { "epoch": 0.7957219251336899, "grad_norm": 1.6502802025963668, "learning_rate": 2.1094244986928956e-06, "loss": 0.2641, "step": 10044 }, { "epoch": 0.7958011487423252, "grad_norm": 1.4286059065437176, "learning_rate": 2.1078483957102637e-06, "loss": 0.2142, "step": 10045 }, { "epoch": 0.7958803723509605, "grad_norm": 1.1541005697579656, "learning_rate": 2.1062728123878383e-06, "loss": 0.1928, "step": 10046 }, { "epoch": 0.795959595959596, "grad_norm": 0.9779231866751608, "learning_rate": 2.1046977488293675e-06, "loss": 0.164, "step": 10047 }, { "epoch": 0.7960388195682313, "grad_norm": 1.7990159984894762, "learning_rate": 2.1031232051385606e-06, "loss": 0.2656, "step": 10048 }, { "epoch": 0.7961180431768667, "grad_norm": 1.4442474253449442, "learning_rate": 2.1015491814190913e-06, "loss": 0.2755, "step": 10049 }, { "epoch": 0.7961972667855021, "grad_norm": 1.1360343486965774, "learning_rate": 2.099975677774606e-06, "loss": 0.1755, "step": 10050 }, { "epoch": 0.7962764903941375, "grad_norm": 1.1675717082990071, "learning_rate": 2.0984026943087087e-06, "loss": 0.1938, "step": 10051 }, { "epoch": 0.7963557140027728, "grad_norm": 1.5112116981848314, "learning_rate": 2.096830231124972e-06, "loss": 0.2701, "step": 10052 }, { "epoch": 0.7964349376114082, "grad_norm": 1.6807096099557952, "learning_rate": 2.0952582883269403e-06, "loss": 0.3468, "step": 10053 }, { "epoch": 0.7965141612200436, "grad_norm": 1.6110911115903424, "learning_rate": 2.093686866018114e-06, "loss": 0.2474, "step": 10054 }, { "epoch": 0.7965933848286789, "grad_norm": 0.984221143593065, "learning_rate": 2.0921159643019627e-06, "loss": 0.1595, "step": 10055 }, { "epoch": 0.7966726084373144, "grad_norm": 1.1097585500989249, "learning_rate": 2.0905455832819277e-06, "loss": 0.1871, "step": 10056 }, { "epoch": 0.7967518320459497, "grad_norm": 1.2385862124277391, "learning_rate": 2.088975723061408e-06, "loss": 0.2024, "step": 10057 }, { "epoch": 0.7968310556545851, "grad_norm": 1.0930097652320747, "learning_rate": 2.0874063837437687e-06, "loss": 0.2358, "step": 10058 }, { "epoch": 0.7969102792632204, "grad_norm": 1.455189501290251, "learning_rate": 2.085837565432349e-06, "loss": 0.2509, "step": 10059 }, { "epoch": 0.7969895028718558, "grad_norm": 1.4618715079436955, "learning_rate": 2.0842692682304442e-06, "loss": 0.2388, "step": 10060 }, { "epoch": 0.7970687264804912, "grad_norm": 1.3330412367482674, "learning_rate": 2.0827014922413213e-06, "loss": 0.2716, "step": 10061 }, { "epoch": 0.7971479500891265, "grad_norm": 1.697535382314123, "learning_rate": 2.0811342375682065e-06, "loss": 0.333, "step": 10062 }, { "epoch": 0.797227173697762, "grad_norm": 1.1133608534907486, "learning_rate": 2.0795675043143016e-06, "loss": 0.1791, "step": 10063 }, { "epoch": 0.7973063973063973, "grad_norm": 1.091187182538296, "learning_rate": 2.0780012925827653e-06, "loss": 0.1503, "step": 10064 }, { "epoch": 0.7973856209150327, "grad_norm": 1.3266508000551338, "learning_rate": 2.0764356024767228e-06, "loss": 0.296, "step": 10065 }, { "epoch": 0.7974648445236681, "grad_norm": 1.3878071703715749, "learning_rate": 2.0748704340992743e-06, "loss": 0.2458, "step": 10066 }, { "epoch": 0.7975440681323034, "grad_norm": 1.4026447611034674, "learning_rate": 2.0733057875534734e-06, "loss": 0.2269, "step": 10067 }, { "epoch": 0.7976232917409388, "grad_norm": 1.115526369033375, "learning_rate": 2.0717416629423425e-06, "loss": 0.169, "step": 10068 }, { "epoch": 0.7977025153495741, "grad_norm": 1.220850918869706, "learning_rate": 2.0701780603688783e-06, "loss": 0.1405, "step": 10069 }, { "epoch": 0.7977817389582096, "grad_norm": 1.2753124954344028, "learning_rate": 2.068614979936032e-06, "loss": 0.206, "step": 10070 }, { "epoch": 0.7978609625668449, "grad_norm": 1.3964368718481102, "learning_rate": 2.0670524217467237e-06, "loss": 0.2255, "step": 10071 }, { "epoch": 0.7979401861754803, "grad_norm": 1.2966942491927698, "learning_rate": 2.0654903859038457e-06, "loss": 0.1999, "step": 10072 }, { "epoch": 0.7980194097841157, "grad_norm": 1.3729307646340145, "learning_rate": 2.0639288725102467e-06, "loss": 0.2211, "step": 10073 }, { "epoch": 0.798098633392751, "grad_norm": 1.114727632434341, "learning_rate": 2.0623678816687433e-06, "loss": 0.1379, "step": 10074 }, { "epoch": 0.7981778570013864, "grad_norm": 1.1836953650481632, "learning_rate": 2.0608074134821243e-06, "loss": 0.1822, "step": 10075 }, { "epoch": 0.7982570806100218, "grad_norm": 1.353414572508888, "learning_rate": 2.0592474680531347e-06, "loss": 0.2078, "step": 10076 }, { "epoch": 0.7983363042186572, "grad_norm": 1.3768498118736798, "learning_rate": 2.0576880454844926e-06, "loss": 0.2266, "step": 10077 }, { "epoch": 0.7984155278272925, "grad_norm": 1.2433226735011762, "learning_rate": 2.0561291458788736e-06, "loss": 0.1993, "step": 10078 }, { "epoch": 0.7984947514359279, "grad_norm": 1.0821210913896915, "learning_rate": 2.0545707693389296e-06, "loss": 0.147, "step": 10079 }, { "epoch": 0.7985739750445633, "grad_norm": 1.4345023903943792, "learning_rate": 2.0530129159672685e-06, "loss": 0.2451, "step": 10080 }, { "epoch": 0.7986531986531986, "grad_norm": 0.9771276319871717, "learning_rate": 2.0514555858664663e-06, "loss": 0.1182, "step": 10081 }, { "epoch": 0.798732422261834, "grad_norm": 1.4876256690502427, "learning_rate": 2.0498987791390713e-06, "loss": 0.2366, "step": 10082 }, { "epoch": 0.7988116458704694, "grad_norm": 1.1456559547730043, "learning_rate": 2.0483424958875876e-06, "loss": 0.1358, "step": 10083 }, { "epoch": 0.7988908694791048, "grad_norm": 1.1835780928546362, "learning_rate": 2.0467867362144867e-06, "loss": 0.1932, "step": 10084 }, { "epoch": 0.7989700930877401, "grad_norm": 1.4400455663919776, "learning_rate": 2.0452315002222134e-06, "loss": 0.2307, "step": 10085 }, { "epoch": 0.7990493166963755, "grad_norm": 1.4977139210642982, "learning_rate": 2.04367678801317e-06, "loss": 0.235, "step": 10086 }, { "epoch": 0.7991285403050109, "grad_norm": 1.286165988202563, "learning_rate": 2.0421225996897243e-06, "loss": 0.1891, "step": 10087 }, { "epoch": 0.7992077639136462, "grad_norm": 1.2970440831640693, "learning_rate": 2.0405689353542204e-06, "loss": 0.2039, "step": 10088 }, { "epoch": 0.7992869875222817, "grad_norm": 1.5789173845812898, "learning_rate": 2.0390157951089506e-06, "loss": 0.3052, "step": 10089 }, { "epoch": 0.799366211130917, "grad_norm": 1.1536288192742066, "learning_rate": 2.0374631790561815e-06, "loss": 0.1806, "step": 10090 }, { "epoch": 0.7994454347395524, "grad_norm": 1.2498738727402872, "learning_rate": 2.0359110872981526e-06, "loss": 0.2317, "step": 10091 }, { "epoch": 0.7995246583481878, "grad_norm": 1.4342050112591576, "learning_rate": 2.034359519937057e-06, "loss": 0.3094, "step": 10092 }, { "epoch": 0.7996038819568231, "grad_norm": 1.1459214232825665, "learning_rate": 2.032808477075057e-06, "loss": 0.1456, "step": 10093 }, { "epoch": 0.7996831055654585, "grad_norm": 1.2717047424352017, "learning_rate": 2.0312579588142846e-06, "loss": 0.1807, "step": 10094 }, { "epoch": 0.7997623291740938, "grad_norm": 1.6236080733459077, "learning_rate": 2.029707965256833e-06, "loss": 0.3838, "step": 10095 }, { "epoch": 0.7998415527827293, "grad_norm": 1.261159963505592, "learning_rate": 2.0281584965047585e-06, "loss": 0.1919, "step": 10096 }, { "epoch": 0.7999207763913646, "grad_norm": 1.3827902907310599, "learning_rate": 2.0266095526600925e-06, "loss": 0.2018, "step": 10097 }, { "epoch": 0.8, "grad_norm": 1.3708064203720107, "learning_rate": 2.0250611338248215e-06, "loss": 0.2607, "step": 10098 }, { "epoch": 0.8000792236086354, "grad_norm": 1.0277386201776315, "learning_rate": 2.0235132401008985e-06, "loss": 0.1426, "step": 10099 }, { "epoch": 0.8001584472172707, "grad_norm": 1.1300879985948629, "learning_rate": 2.0219658715902514e-06, "loss": 0.1674, "step": 10100 }, { "epoch": 0.8002376708259061, "grad_norm": 1.2743524301711278, "learning_rate": 2.0204190283947645e-06, "loss": 0.2482, "step": 10101 }, { "epoch": 0.8003168944345415, "grad_norm": 1.101843815938498, "learning_rate": 2.0188727106162874e-06, "loss": 0.1358, "step": 10102 }, { "epoch": 0.8003961180431769, "grad_norm": 1.8307161788054416, "learning_rate": 2.017326918356639e-06, "loss": 0.3069, "step": 10103 }, { "epoch": 0.8004753416518122, "grad_norm": 1.2863919198324845, "learning_rate": 2.0157816517176045e-06, "loss": 0.2062, "step": 10104 }, { "epoch": 0.8005545652604477, "grad_norm": 1.6485069544438204, "learning_rate": 2.0142369108009306e-06, "loss": 0.267, "step": 10105 }, { "epoch": 0.800633788869083, "grad_norm": 1.1038291976889658, "learning_rate": 2.012692695708328e-06, "loss": 0.1501, "step": 10106 }, { "epoch": 0.8007130124777183, "grad_norm": 1.4175338489884162, "learning_rate": 2.011149006541483e-06, "loss": 0.238, "step": 10107 }, { "epoch": 0.8007922360863537, "grad_norm": 1.5039496538066082, "learning_rate": 2.0096058434020348e-06, "loss": 0.2327, "step": 10108 }, { "epoch": 0.8008714596949891, "grad_norm": 1.308265541140065, "learning_rate": 2.0080632063915927e-06, "loss": 0.1959, "step": 10109 }, { "epoch": 0.8009506833036245, "grad_norm": 1.1449948458269021, "learning_rate": 2.0065210956117354e-06, "loss": 0.2028, "step": 10110 }, { "epoch": 0.8010299069122598, "grad_norm": 1.4949408616518625, "learning_rate": 2.0049795111640023e-06, "loss": 0.2555, "step": 10111 }, { "epoch": 0.8011091305208953, "grad_norm": 1.0771438450015118, "learning_rate": 2.0034384531498962e-06, "loss": 0.1698, "step": 10112 }, { "epoch": 0.8011883541295306, "grad_norm": 1.381539600164165, "learning_rate": 2.0018979216708935e-06, "loss": 0.2812, "step": 10113 }, { "epoch": 0.8012675777381659, "grad_norm": 1.7045386541522283, "learning_rate": 2.000357916828428e-06, "loss": 0.3365, "step": 10114 }, { "epoch": 0.8013468013468014, "grad_norm": 0.9927199053215623, "learning_rate": 1.9988184387239027e-06, "loss": 0.1396, "step": 10115 }, { "epoch": 0.8014260249554367, "grad_norm": 1.2445765848041155, "learning_rate": 1.9972794874586808e-06, "loss": 0.2612, "step": 10116 }, { "epoch": 0.8015052485640721, "grad_norm": 1.381229687809628, "learning_rate": 1.9957410631341e-06, "loss": 0.2502, "step": 10117 }, { "epoch": 0.8015844721727075, "grad_norm": 1.1127587353477846, "learning_rate": 1.9942031658514573e-06, "loss": 0.2265, "step": 10118 }, { "epoch": 0.8016636957813429, "grad_norm": 1.0196678179637952, "learning_rate": 1.992665795712011e-06, "loss": 0.1545, "step": 10119 }, { "epoch": 0.8017429193899782, "grad_norm": 1.666506403482929, "learning_rate": 1.991128952816996e-06, "loss": 0.2651, "step": 10120 }, { "epoch": 0.8018221429986135, "grad_norm": 1.324582785510361, "learning_rate": 1.9895926372676042e-06, "loss": 0.1995, "step": 10121 }, { "epoch": 0.801901366607249, "grad_norm": 1.7086514642676323, "learning_rate": 1.988056849164991e-06, "loss": 0.2918, "step": 10122 }, { "epoch": 0.8019805902158843, "grad_norm": 1.18088746654375, "learning_rate": 1.986521588610285e-06, "loss": 0.1739, "step": 10123 }, { "epoch": 0.8020598138245197, "grad_norm": 1.2591856160721422, "learning_rate": 1.9849868557045738e-06, "loss": 0.2048, "step": 10124 }, { "epoch": 0.8021390374331551, "grad_norm": 1.1179822318003738, "learning_rate": 1.9834526505489105e-06, "loss": 0.193, "step": 10125 }, { "epoch": 0.8022182610417905, "grad_norm": 1.214455416425855, "learning_rate": 1.9819189732443187e-06, "loss": 0.2295, "step": 10126 }, { "epoch": 0.8022974846504258, "grad_norm": 1.0561176382575492, "learning_rate": 1.9803858238917826e-06, "loss": 0.1751, "step": 10127 }, { "epoch": 0.8023767082590612, "grad_norm": 1.2849639222606646, "learning_rate": 1.97885320259225e-06, "loss": 0.1984, "step": 10128 }, { "epoch": 0.8024559318676966, "grad_norm": 1.5194907069407513, "learning_rate": 1.9773211094466404e-06, "loss": 0.2277, "step": 10129 }, { "epoch": 0.8025351554763319, "grad_norm": 1.176551235848641, "learning_rate": 1.975789544555834e-06, "loss": 0.2234, "step": 10130 }, { "epoch": 0.8026143790849674, "grad_norm": 0.7892352253899914, "learning_rate": 1.9742585080206754e-06, "loss": 0.1076, "step": 10131 }, { "epoch": 0.8026936026936027, "grad_norm": 0.9758370650968448, "learning_rate": 1.9727279999419745e-06, "loss": 0.1279, "step": 10132 }, { "epoch": 0.8027728263022381, "grad_norm": 1.1767196344829758, "learning_rate": 1.9711980204205115e-06, "loss": 0.2078, "step": 10133 }, { "epoch": 0.8028520499108734, "grad_norm": 1.7160701199502668, "learning_rate": 1.9696685695570285e-06, "loss": 0.2065, "step": 10134 }, { "epoch": 0.8029312735195088, "grad_norm": 1.0815104996295632, "learning_rate": 1.9681396474522264e-06, "loss": 0.1797, "step": 10135 }, { "epoch": 0.8030104971281442, "grad_norm": 1.2219685433630565, "learning_rate": 1.966611254206785e-06, "loss": 0.2191, "step": 10136 }, { "epoch": 0.8030897207367795, "grad_norm": 1.49034873828191, "learning_rate": 1.9650833899213383e-06, "loss": 0.302, "step": 10137 }, { "epoch": 0.803168944345415, "grad_norm": 1.2803873712659504, "learning_rate": 1.963556054696487e-06, "loss": 0.1724, "step": 10138 }, { "epoch": 0.8032481679540503, "grad_norm": 1.3334402168903694, "learning_rate": 1.962029248632802e-06, "loss": 0.1791, "step": 10139 }, { "epoch": 0.8033273915626857, "grad_norm": 1.3447278612631246, "learning_rate": 1.9605029718308156e-06, "loss": 0.1735, "step": 10140 }, { "epoch": 0.8034066151713211, "grad_norm": 1.2041955856036584, "learning_rate": 1.958977224391021e-06, "loss": 0.1815, "step": 10141 }, { "epoch": 0.8034858387799564, "grad_norm": 1.2556963140595756, "learning_rate": 1.957452006413889e-06, "loss": 0.2488, "step": 10142 }, { "epoch": 0.8035650623885918, "grad_norm": 1.357159096573162, "learning_rate": 1.955927317999844e-06, "loss": 0.2113, "step": 10143 }, { "epoch": 0.8036442859972271, "grad_norm": 1.346643773313863, "learning_rate": 1.9544031592492763e-06, "loss": 0.2805, "step": 10144 }, { "epoch": 0.8037235096058626, "grad_norm": 1.226808929998541, "learning_rate": 1.9528795302625515e-06, "loss": 0.2114, "step": 10145 }, { "epoch": 0.8038027332144979, "grad_norm": 1.6681317747869757, "learning_rate": 1.951356431139988e-06, "loss": 0.2573, "step": 10146 }, { "epoch": 0.8038819568231333, "grad_norm": 1.657185706385089, "learning_rate": 1.949833861981877e-06, "loss": 0.2374, "step": 10147 }, { "epoch": 0.8039611804317687, "grad_norm": 1.562054000431535, "learning_rate": 1.948311822888468e-06, "loss": 0.3371, "step": 10148 }, { "epoch": 0.804040404040404, "grad_norm": 1.066699671091112, "learning_rate": 1.9467903139599853e-06, "loss": 0.1858, "step": 10149 }, { "epoch": 0.8041196276490394, "grad_norm": 1.4510025018380015, "learning_rate": 1.945269335296611e-06, "loss": 0.1538, "step": 10150 }, { "epoch": 0.8041988512576748, "grad_norm": 1.2258303063238312, "learning_rate": 1.943748886998492e-06, "loss": 0.2048, "step": 10151 }, { "epoch": 0.8042780748663102, "grad_norm": 1.401573926034788, "learning_rate": 1.942228969165748e-06, "loss": 0.2833, "step": 10152 }, { "epoch": 0.8043572984749455, "grad_norm": 1.2453388144194748, "learning_rate": 1.940709581898453e-06, "loss": 0.22, "step": 10153 }, { "epoch": 0.8044365220835809, "grad_norm": 1.2396365590750524, "learning_rate": 1.9391907252966522e-06, "loss": 0.2341, "step": 10154 }, { "epoch": 0.8045157456922163, "grad_norm": 1.4914906871726465, "learning_rate": 1.9376723994603574e-06, "loss": 0.2877, "step": 10155 }, { "epoch": 0.8045949693008516, "grad_norm": 1.2308230245266347, "learning_rate": 1.936154604489543e-06, "loss": 0.2273, "step": 10156 }, { "epoch": 0.804674192909487, "grad_norm": 1.158251685662581, "learning_rate": 1.9346373404841433e-06, "loss": 0.2029, "step": 10157 }, { "epoch": 0.8047534165181224, "grad_norm": 1.4578628459268346, "learning_rate": 1.93312060754407e-06, "loss": 0.2278, "step": 10158 }, { "epoch": 0.8048326401267578, "grad_norm": 1.2239206864037653, "learning_rate": 1.9316044057691886e-06, "loss": 0.1838, "step": 10159 }, { "epoch": 0.8049118637353931, "grad_norm": 1.2614866999120988, "learning_rate": 1.9300887352593355e-06, "loss": 0.2389, "step": 10160 }, { "epoch": 0.8049910873440285, "grad_norm": 1.4627397531773647, "learning_rate": 1.928573596114306e-06, "loss": 0.2518, "step": 10161 }, { "epoch": 0.8050703109526639, "grad_norm": 1.123008349440525, "learning_rate": 1.9270589884338706e-06, "loss": 0.1639, "step": 10162 }, { "epoch": 0.8051495345612992, "grad_norm": 1.3749026691254196, "learning_rate": 1.9255449123177563e-06, "loss": 0.1867, "step": 10163 }, { "epoch": 0.8052287581699347, "grad_norm": 1.679347130175776, "learning_rate": 1.924031367865655e-06, "loss": 0.3011, "step": 10164 }, { "epoch": 0.80530798177857, "grad_norm": 1.1681616226210787, "learning_rate": 1.922518355177232e-06, "loss": 0.1981, "step": 10165 }, { "epoch": 0.8053872053872054, "grad_norm": 1.6358109392165265, "learning_rate": 1.921005874352109e-06, "loss": 0.2858, "step": 10166 }, { "epoch": 0.8054664289958408, "grad_norm": 1.4148121378431107, "learning_rate": 1.9194939254898746e-06, "loss": 0.2671, "step": 10167 }, { "epoch": 0.8055456526044761, "grad_norm": 1.9709461917683746, "learning_rate": 1.917982508690085e-06, "loss": 0.257, "step": 10168 }, { "epoch": 0.8056248762131115, "grad_norm": 1.3283857009639417, "learning_rate": 1.916471624052256e-06, "loss": 0.2407, "step": 10169 }, { "epoch": 0.8057040998217468, "grad_norm": 1.7441939210997033, "learning_rate": 1.914961271675879e-06, "loss": 0.2595, "step": 10170 }, { "epoch": 0.8057833234303823, "grad_norm": 1.3436681496543106, "learning_rate": 1.9134514516603987e-06, "loss": 0.1934, "step": 10171 }, { "epoch": 0.8058625470390176, "grad_norm": 1.0126466646847239, "learning_rate": 1.9119421641052294e-06, "loss": 0.1765, "step": 10172 }, { "epoch": 0.805941770647653, "grad_norm": 1.139293071200593, "learning_rate": 1.91043340910975e-06, "loss": 0.1723, "step": 10173 }, { "epoch": 0.8060209942562884, "grad_norm": 1.024457390453502, "learning_rate": 1.908925186773308e-06, "loss": 0.1129, "step": 10174 }, { "epoch": 0.8061002178649237, "grad_norm": 1.0863679127375503, "learning_rate": 1.907417497195211e-06, "loss": 0.1429, "step": 10175 }, { "epoch": 0.8061794414735591, "grad_norm": 1.2456251375640532, "learning_rate": 1.9059103404747303e-06, "loss": 0.291, "step": 10176 }, { "epoch": 0.8062586650821945, "grad_norm": 1.1604744518959167, "learning_rate": 1.9044037167111096e-06, "loss": 0.2226, "step": 10177 }, { "epoch": 0.8063378886908299, "grad_norm": 1.2719263677423331, "learning_rate": 1.9028976260035515e-06, "loss": 0.1929, "step": 10178 }, { "epoch": 0.8064171122994652, "grad_norm": 1.2327099428680226, "learning_rate": 1.901392068451221e-06, "loss": 0.1935, "step": 10179 }, { "epoch": 0.8064963359081007, "grad_norm": 1.5565207733174424, "learning_rate": 1.8998870441532569e-06, "loss": 0.26, "step": 10180 }, { "epoch": 0.806575559516736, "grad_norm": 1.3136776035339608, "learning_rate": 1.8983825532087551e-06, "loss": 0.1883, "step": 10181 }, { "epoch": 0.8066547831253713, "grad_norm": 1.2064689701188687, "learning_rate": 1.8968785957167779e-06, "loss": 0.2026, "step": 10182 }, { "epoch": 0.8067340067340067, "grad_norm": 1.181341101025212, "learning_rate": 1.8953751717763592e-06, "loss": 0.1685, "step": 10183 }, { "epoch": 0.8068132303426421, "grad_norm": 1.3827006210285366, "learning_rate": 1.8938722814864863e-06, "loss": 0.2018, "step": 10184 }, { "epoch": 0.8068924539512775, "grad_norm": 1.487161883281403, "learning_rate": 1.8923699249461214e-06, "loss": 0.3085, "step": 10185 }, { "epoch": 0.8069716775599128, "grad_norm": 1.3983962053077565, "learning_rate": 1.890868102254182e-06, "loss": 0.2635, "step": 10186 }, { "epoch": 0.8070509011685483, "grad_norm": 2.7845862684748783, "learning_rate": 1.8893668135095611e-06, "loss": 0.2555, "step": 10187 }, { "epoch": 0.8071301247771836, "grad_norm": 1.5844006341350203, "learning_rate": 1.8878660588111108e-06, "loss": 0.2634, "step": 10188 }, { "epoch": 0.8072093483858189, "grad_norm": 1.0989350790019177, "learning_rate": 1.8863658382576444e-06, "loss": 0.1618, "step": 10189 }, { "epoch": 0.8072885719944544, "grad_norm": 1.301647239250617, "learning_rate": 1.8848661519479504e-06, "loss": 0.1981, "step": 10190 }, { "epoch": 0.8073677956030897, "grad_norm": 1.1557738100831196, "learning_rate": 1.8833669999807723e-06, "loss": 0.1836, "step": 10191 }, { "epoch": 0.8074470192117251, "grad_norm": 1.4667194456496075, "learning_rate": 1.88186838245482e-06, "loss": 0.2454, "step": 10192 }, { "epoch": 0.8075262428203605, "grad_norm": 1.2149222998878704, "learning_rate": 1.8803702994687755e-06, "loss": 0.202, "step": 10193 }, { "epoch": 0.8076054664289959, "grad_norm": 1.63789996797214, "learning_rate": 1.8788727511212768e-06, "loss": 0.2799, "step": 10194 }, { "epoch": 0.8076846900376312, "grad_norm": 1.144414113429165, "learning_rate": 1.8773757375109292e-06, "loss": 0.1598, "step": 10195 }, { "epoch": 0.8077639136462665, "grad_norm": 1.2403543074836227, "learning_rate": 1.8758792587363084e-06, "loss": 0.2009, "step": 10196 }, { "epoch": 0.807843137254902, "grad_norm": 1.1765324395672558, "learning_rate": 1.8743833148959479e-06, "loss": 0.1989, "step": 10197 }, { "epoch": 0.8079223608635373, "grad_norm": 1.2257385597523451, "learning_rate": 1.8728879060883443e-06, "loss": 0.2103, "step": 10198 }, { "epoch": 0.8080015844721727, "grad_norm": 1.4361904065234052, "learning_rate": 1.8713930324119711e-06, "loss": 0.2105, "step": 10199 }, { "epoch": 0.8080808080808081, "grad_norm": 1.2684809532953119, "learning_rate": 1.869898693965253e-06, "loss": 0.1979, "step": 10200 }, { "epoch": 0.8081600316894435, "grad_norm": 1.4506908921812272, "learning_rate": 1.868404890846587e-06, "loss": 0.2237, "step": 10201 }, { "epoch": 0.8082392552980788, "grad_norm": 1.4154582042485293, "learning_rate": 1.8669116231543294e-06, "loss": 0.1312, "step": 10202 }, { "epoch": 0.8083184789067142, "grad_norm": 1.1846778449260267, "learning_rate": 1.865418890986811e-06, "loss": 0.2417, "step": 10203 }, { "epoch": 0.8083977025153496, "grad_norm": 0.998375449218653, "learning_rate": 1.8639266944423163e-06, "loss": 0.1259, "step": 10204 }, { "epoch": 0.8084769261239849, "grad_norm": 1.3296422310118596, "learning_rate": 1.8624350336190977e-06, "loss": 0.2569, "step": 10205 }, { "epoch": 0.8085561497326204, "grad_norm": 1.5121592959466554, "learning_rate": 1.8609439086153803e-06, "loss": 0.2357, "step": 10206 }, { "epoch": 0.8086353733412557, "grad_norm": 1.4511070352829922, "learning_rate": 1.859453319529343e-06, "loss": 0.2666, "step": 10207 }, { "epoch": 0.8087145969498911, "grad_norm": 1.254892053827073, "learning_rate": 1.857963266459133e-06, "loss": 0.2159, "step": 10208 }, { "epoch": 0.8087938205585264, "grad_norm": 1.2875845036910536, "learning_rate": 1.8564737495028673e-06, "loss": 0.1779, "step": 10209 }, { "epoch": 0.8088730441671618, "grad_norm": 1.9037867781349262, "learning_rate": 1.854984768758621e-06, "loss": 0.3066, "step": 10210 }, { "epoch": 0.8089522677757972, "grad_norm": 1.3034801439114339, "learning_rate": 1.853496324324434e-06, "loss": 0.1879, "step": 10211 }, { "epoch": 0.8090314913844325, "grad_norm": 1.343642085044102, "learning_rate": 1.8520084162983176e-06, "loss": 0.2259, "step": 10212 }, { "epoch": 0.809110714993068, "grad_norm": 1.115846241283189, "learning_rate": 1.8505210447782418e-06, "loss": 0.1711, "step": 10213 }, { "epoch": 0.8091899386017033, "grad_norm": 1.287983609836565, "learning_rate": 1.8490342098621395e-06, "loss": 0.2299, "step": 10214 }, { "epoch": 0.8092691622103387, "grad_norm": 1.2446600253159052, "learning_rate": 1.8475479116479166e-06, "loss": 0.1993, "step": 10215 }, { "epoch": 0.8093483858189741, "grad_norm": 1.588182118207134, "learning_rate": 1.8460621502334375e-06, "loss": 0.2857, "step": 10216 }, { "epoch": 0.8094276094276094, "grad_norm": 1.814212128768382, "learning_rate": 1.8445769257165314e-06, "loss": 0.3378, "step": 10217 }, { "epoch": 0.8095068330362448, "grad_norm": 1.1729493010856318, "learning_rate": 1.8430922381949912e-06, "loss": 0.1747, "step": 10218 }, { "epoch": 0.8095860566448801, "grad_norm": 1.2348952610724928, "learning_rate": 1.84160808776658e-06, "loss": 0.1726, "step": 10219 }, { "epoch": 0.8096652802535156, "grad_norm": 1.272296879440776, "learning_rate": 1.8401244745290214e-06, "loss": 0.2311, "step": 10220 }, { "epoch": 0.8097445038621509, "grad_norm": 1.4692163605441064, "learning_rate": 1.838641398580001e-06, "loss": 0.2233, "step": 10221 }, { "epoch": 0.8098237274707863, "grad_norm": 1.1636044130383294, "learning_rate": 1.8371588600171764e-06, "loss": 0.2129, "step": 10222 }, { "epoch": 0.8099029510794217, "grad_norm": 1.1582485878060729, "learning_rate": 1.8356768589381646e-06, "loss": 0.1838, "step": 10223 }, { "epoch": 0.809982174688057, "grad_norm": 1.2179009344241836, "learning_rate": 1.8341953954405434e-06, "loss": 0.1652, "step": 10224 }, { "epoch": 0.8100613982966924, "grad_norm": 1.3052393438348684, "learning_rate": 1.832714469621868e-06, "loss": 0.2035, "step": 10225 }, { "epoch": 0.8101406219053278, "grad_norm": 1.2281412441058612, "learning_rate": 1.8312340815796458e-06, "loss": 0.2292, "step": 10226 }, { "epoch": 0.8102198455139632, "grad_norm": 1.467045008574425, "learning_rate": 1.8297542314113515e-06, "loss": 0.2852, "step": 10227 }, { "epoch": 0.8102990691225985, "grad_norm": 1.3758352800801341, "learning_rate": 1.82827491921443e-06, "loss": 0.2579, "step": 10228 }, { "epoch": 0.810378292731234, "grad_norm": 1.29540385183792, "learning_rate": 1.8267961450862859e-06, "loss": 0.204, "step": 10229 }, { "epoch": 0.8104575163398693, "grad_norm": 1.1650957366618704, "learning_rate": 1.8253179091242868e-06, "loss": 0.2194, "step": 10230 }, { "epoch": 0.8105367399485046, "grad_norm": 1.1815721874418448, "learning_rate": 1.8238402114257714e-06, "loss": 0.1963, "step": 10231 }, { "epoch": 0.81061596355714, "grad_norm": 1.3190540223575808, "learning_rate": 1.8223630520880365e-06, "loss": 0.2065, "step": 10232 }, { "epoch": 0.8106951871657754, "grad_norm": 1.2907715712642336, "learning_rate": 1.8208864312083462e-06, "loss": 0.2418, "step": 10233 }, { "epoch": 0.8107744107744108, "grad_norm": 1.0001408320805503, "learning_rate": 1.8194103488839265e-06, "loss": 0.1384, "step": 10234 }, { "epoch": 0.8108536343830461, "grad_norm": 1.414418477532868, "learning_rate": 1.817934805211976e-06, "loss": 0.29, "step": 10235 }, { "epoch": 0.8109328579916815, "grad_norm": 0.9326814359755048, "learning_rate": 1.8164598002896484e-06, "loss": 0.1112, "step": 10236 }, { "epoch": 0.8110120816003169, "grad_norm": 1.4504881888123107, "learning_rate": 1.8149853342140644e-06, "loss": 0.2644, "step": 10237 }, { "epoch": 0.8110913052089522, "grad_norm": 1.409400362517609, "learning_rate": 1.8135114070823145e-06, "loss": 0.1756, "step": 10238 }, { "epoch": 0.8111705288175877, "grad_norm": 1.1782037114620536, "learning_rate": 1.8120380189914476e-06, "loss": 0.1884, "step": 10239 }, { "epoch": 0.811249752426223, "grad_norm": 1.3316947943494324, "learning_rate": 1.8105651700384764e-06, "loss": 0.2428, "step": 10240 }, { "epoch": 0.8113289760348584, "grad_norm": 1.3826143216831768, "learning_rate": 1.8090928603203871e-06, "loss": 0.2192, "step": 10241 }, { "epoch": 0.8114081996434938, "grad_norm": 0.9404104605513147, "learning_rate": 1.8076210899341196e-06, "loss": 0.1084, "step": 10242 }, { "epoch": 0.8114874232521291, "grad_norm": 1.230086720832417, "learning_rate": 1.8061498589765824e-06, "loss": 0.2075, "step": 10243 }, { "epoch": 0.8115666468607645, "grad_norm": 1.2371368633193467, "learning_rate": 1.804679167544655e-06, "loss": 0.1727, "step": 10244 }, { "epoch": 0.8116458704693998, "grad_norm": 1.7424896414268622, "learning_rate": 1.8032090157351701e-06, "loss": 0.2947, "step": 10245 }, { "epoch": 0.8117250940780353, "grad_norm": 1.1787494076789051, "learning_rate": 1.8017394036449276e-06, "loss": 0.1903, "step": 10246 }, { "epoch": 0.8118043176866706, "grad_norm": 1.34669875521707, "learning_rate": 1.8002703313706993e-06, "loss": 0.2065, "step": 10247 }, { "epoch": 0.811883541295306, "grad_norm": 1.3281843386193821, "learning_rate": 1.7988017990092167e-06, "loss": 0.232, "step": 10248 }, { "epoch": 0.8119627649039414, "grad_norm": 1.295546345942209, "learning_rate": 1.797333806657171e-06, "loss": 0.2509, "step": 10249 }, { "epoch": 0.8120419885125767, "grad_norm": 1.3786530864373474, "learning_rate": 1.7958663544112277e-06, "loss": 0.257, "step": 10250 }, { "epoch": 0.8121212121212121, "grad_norm": 1.3995634685977423, "learning_rate": 1.794399442368009e-06, "loss": 0.2737, "step": 10251 }, { "epoch": 0.8122004357298475, "grad_norm": 1.6194665112494062, "learning_rate": 1.7929330706241023e-06, "loss": 0.2472, "step": 10252 }, { "epoch": 0.8122796593384829, "grad_norm": 1.5776913207683405, "learning_rate": 1.7914672392760645e-06, "loss": 0.2712, "step": 10253 }, { "epoch": 0.8123588829471182, "grad_norm": 1.189055709264225, "learning_rate": 1.7900019484204135e-06, "loss": 0.2346, "step": 10254 }, { "epoch": 0.8124381065557537, "grad_norm": 1.6584302929940211, "learning_rate": 1.788537198153627e-06, "loss": 0.2967, "step": 10255 }, { "epoch": 0.812517330164389, "grad_norm": 1.1198439538396494, "learning_rate": 1.787072988572157e-06, "loss": 0.2213, "step": 10256 }, { "epoch": 0.8125965537730243, "grad_norm": 1.297205225142965, "learning_rate": 1.7856093197724133e-06, "loss": 0.2564, "step": 10257 }, { "epoch": 0.8126757773816597, "grad_norm": 0.9705550236969642, "learning_rate": 1.7841461918507708e-06, "loss": 0.1685, "step": 10258 }, { "epoch": 0.8127550009902951, "grad_norm": 1.90949201417659, "learning_rate": 1.7826836049035655e-06, "loss": 0.2706, "step": 10259 }, { "epoch": 0.8128342245989305, "grad_norm": 1.2711044578456732, "learning_rate": 1.7812215590271099e-06, "loss": 0.1941, "step": 10260 }, { "epoch": 0.8129134482075658, "grad_norm": 1.208488851403463, "learning_rate": 1.7797600543176675e-06, "loss": 0.2128, "step": 10261 }, { "epoch": 0.8129926718162013, "grad_norm": 1.3401095932771625, "learning_rate": 1.7782990908714703e-06, "loss": 0.2443, "step": 10262 }, { "epoch": 0.8130718954248366, "grad_norm": 1.1773150423768914, "learning_rate": 1.7768386687847194e-06, "loss": 0.2481, "step": 10263 }, { "epoch": 0.8131511190334719, "grad_norm": 1.1317846167546541, "learning_rate": 1.7753787881535757e-06, "loss": 0.1617, "step": 10264 }, { "epoch": 0.8132303426421074, "grad_norm": 1.2325199084524263, "learning_rate": 1.7739194490741607e-06, "loss": 0.2744, "step": 10265 }, { "epoch": 0.8133095662507427, "grad_norm": 1.0459303766153756, "learning_rate": 1.7724606516425724e-06, "loss": 0.1311, "step": 10266 }, { "epoch": 0.8133887898593781, "grad_norm": 1.4635184626565356, "learning_rate": 1.7710023959548617e-06, "loss": 0.3027, "step": 10267 }, { "epoch": 0.8134680134680135, "grad_norm": 1.4666310331054864, "learning_rate": 1.7695446821070438e-06, "loss": 0.2131, "step": 10268 }, { "epoch": 0.8135472370766489, "grad_norm": 1.0664895229219125, "learning_rate": 1.76808751019511e-06, "loss": 0.1703, "step": 10269 }, { "epoch": 0.8136264606852842, "grad_norm": 1.4358776736052012, "learning_rate": 1.7666308803150045e-06, "loss": 0.2408, "step": 10270 }, { "epoch": 0.8137056842939195, "grad_norm": 1.5802910115029503, "learning_rate": 1.7651747925626383e-06, "loss": 0.2877, "step": 10271 }, { "epoch": 0.813784907902555, "grad_norm": 1.2365544352665416, "learning_rate": 1.763719247033886e-06, "loss": 0.184, "step": 10272 }, { "epoch": 0.8138641315111903, "grad_norm": 1.3473060195080302, "learning_rate": 1.762264243824594e-06, "loss": 0.2266, "step": 10273 }, { "epoch": 0.8139433551198257, "grad_norm": 1.4943388232597272, "learning_rate": 1.7608097830305637e-06, "loss": 0.1842, "step": 10274 }, { "epoch": 0.8140225787284611, "grad_norm": 1.4123639575882567, "learning_rate": 1.7593558647475627e-06, "loss": 0.2691, "step": 10275 }, { "epoch": 0.8141018023370965, "grad_norm": 1.1825159472120483, "learning_rate": 1.7579024890713282e-06, "loss": 0.1955, "step": 10276 }, { "epoch": 0.8141810259457318, "grad_norm": 1.1835983889608375, "learning_rate": 1.7564496560975574e-06, "loss": 0.1859, "step": 10277 }, { "epoch": 0.8142602495543672, "grad_norm": 1.1057596763373965, "learning_rate": 1.7549973659219077e-06, "loss": 0.1672, "step": 10278 }, { "epoch": 0.8143394731630026, "grad_norm": 1.3260921355826003, "learning_rate": 1.7535456186400123e-06, "loss": 0.1994, "step": 10279 }, { "epoch": 0.8144186967716379, "grad_norm": 1.0582699870264463, "learning_rate": 1.7520944143474584e-06, "loss": 0.1808, "step": 10280 }, { "epoch": 0.8144979203802734, "grad_norm": 1.554432483448072, "learning_rate": 1.750643753139798e-06, "loss": 0.2899, "step": 10281 }, { "epoch": 0.8145771439889087, "grad_norm": 1.28817796701486, "learning_rate": 1.749193635112556e-06, "loss": 0.2379, "step": 10282 }, { "epoch": 0.8146563675975441, "grad_norm": 1.1402680726084276, "learning_rate": 1.7477440603612127e-06, "loss": 0.1677, "step": 10283 }, { "epoch": 0.8147355912061794, "grad_norm": 1.5324841499773714, "learning_rate": 1.746295028981213e-06, "loss": 0.246, "step": 10284 }, { "epoch": 0.8148148148148148, "grad_norm": 1.4081360060841852, "learning_rate": 1.7448465410679737e-06, "loss": 0.1446, "step": 10285 }, { "epoch": 0.8148940384234502, "grad_norm": 1.5748660026528625, "learning_rate": 1.7433985967168686e-06, "loss": 0.2066, "step": 10286 }, { "epoch": 0.8149732620320855, "grad_norm": 1.324420943349605, "learning_rate": 1.7419511960232384e-06, "loss": 0.2073, "step": 10287 }, { "epoch": 0.815052485640721, "grad_norm": 1.2594131264642532, "learning_rate": 1.7405043390823827e-06, "loss": 0.1789, "step": 10288 }, { "epoch": 0.8151317092493563, "grad_norm": 1.4520718884925568, "learning_rate": 1.7390580259895783e-06, "loss": 0.1849, "step": 10289 }, { "epoch": 0.8152109328579917, "grad_norm": 1.3862950222123092, "learning_rate": 1.7376122568400533e-06, "loss": 0.2414, "step": 10290 }, { "epoch": 0.8152901564666271, "grad_norm": 1.5026304541181141, "learning_rate": 1.7361670317290014e-06, "loss": 0.3038, "step": 10291 }, { "epoch": 0.8153693800752624, "grad_norm": 1.0904410936026638, "learning_rate": 1.7347223507515908e-06, "loss": 0.2092, "step": 10292 }, { "epoch": 0.8154486036838978, "grad_norm": 1.209528048456497, "learning_rate": 1.7332782140029436e-06, "loss": 0.234, "step": 10293 }, { "epoch": 0.8155278272925331, "grad_norm": 1.3974957146397042, "learning_rate": 1.7318346215781468e-06, "loss": 0.1821, "step": 10294 }, { "epoch": 0.8156070509011686, "grad_norm": 1.274340726574614, "learning_rate": 1.7303915735722586e-06, "loss": 0.2081, "step": 10295 }, { "epoch": 0.8156862745098039, "grad_norm": 1.2421061391655035, "learning_rate": 1.7289490700802947e-06, "loss": 0.2373, "step": 10296 }, { "epoch": 0.8157654981184393, "grad_norm": 1.4259941335948567, "learning_rate": 1.727507111197233e-06, "loss": 0.2602, "step": 10297 }, { "epoch": 0.8158447217270747, "grad_norm": 1.192839607111933, "learning_rate": 1.7260656970180268e-06, "loss": 0.1846, "step": 10298 }, { "epoch": 0.81592394533571, "grad_norm": 1.5370843421180753, "learning_rate": 1.7246248276375832e-06, "loss": 0.2546, "step": 10299 }, { "epoch": 0.8160031689443454, "grad_norm": 1.2597798624620644, "learning_rate": 1.7231845031507732e-06, "loss": 0.2121, "step": 10300 }, { "epoch": 0.8160823925529808, "grad_norm": 1.377613046561397, "learning_rate": 1.72174472365244e-06, "loss": 0.2565, "step": 10301 }, { "epoch": 0.8161616161616162, "grad_norm": 1.785585502046287, "learning_rate": 1.720305489237385e-06, "loss": 0.2387, "step": 10302 }, { "epoch": 0.8162408397702515, "grad_norm": 1.5014508382227219, "learning_rate": 1.718866800000375e-06, "loss": 0.2272, "step": 10303 }, { "epoch": 0.816320063378887, "grad_norm": 1.4412576474118937, "learning_rate": 1.7174286560361364e-06, "loss": 0.3261, "step": 10304 }, { "epoch": 0.8163992869875223, "grad_norm": 1.0957979987181663, "learning_rate": 1.7159910574393702e-06, "loss": 0.1618, "step": 10305 }, { "epoch": 0.8164785105961576, "grad_norm": 1.209546413018405, "learning_rate": 1.7145540043047327e-06, "loss": 0.2248, "step": 10306 }, { "epoch": 0.816557734204793, "grad_norm": 1.1621168537330795, "learning_rate": 1.713117496726845e-06, "loss": 0.1638, "step": 10307 }, { "epoch": 0.8166369578134284, "grad_norm": 1.416173438535659, "learning_rate": 1.711681534800298e-06, "loss": 0.2776, "step": 10308 }, { "epoch": 0.8167161814220638, "grad_norm": 1.437073638808075, "learning_rate": 1.7102461186196418e-06, "loss": 0.2706, "step": 10309 }, { "epoch": 0.8167954050306991, "grad_norm": 1.5157392696702352, "learning_rate": 1.7088112482793872e-06, "loss": 0.2677, "step": 10310 }, { "epoch": 0.8168746286393346, "grad_norm": 1.5381388140838068, "learning_rate": 1.7073769238740213e-06, "loss": 0.2769, "step": 10311 }, { "epoch": 0.8169538522479699, "grad_norm": 1.3247262068757826, "learning_rate": 1.7059431454979825e-06, "loss": 0.1797, "step": 10312 }, { "epoch": 0.8170330758566052, "grad_norm": 1.2728621172571906, "learning_rate": 1.7045099132456766e-06, "loss": 0.1784, "step": 10313 }, { "epoch": 0.8171122994652407, "grad_norm": 2.1232685417655315, "learning_rate": 1.7030772272114803e-06, "loss": 0.3448, "step": 10314 }, { "epoch": 0.817191523073876, "grad_norm": 1.4068864869705144, "learning_rate": 1.7016450874897273e-06, "loss": 0.1907, "step": 10315 }, { "epoch": 0.8172707466825114, "grad_norm": 1.57852913028593, "learning_rate": 1.7002134941747116e-06, "loss": 0.2098, "step": 10316 }, { "epoch": 0.8173499702911468, "grad_norm": 1.2301279745603875, "learning_rate": 1.698782447360705e-06, "loss": 0.1905, "step": 10317 }, { "epoch": 0.8174291938997821, "grad_norm": 1.1207002454014958, "learning_rate": 1.697351947141932e-06, "loss": 0.1333, "step": 10318 }, { "epoch": 0.8175084175084175, "grad_norm": 1.4359269579492713, "learning_rate": 1.6959219936125827e-06, "loss": 0.2583, "step": 10319 }, { "epoch": 0.8175876411170528, "grad_norm": 1.3639522354032674, "learning_rate": 1.6944925868668106e-06, "loss": 0.2161, "step": 10320 }, { "epoch": 0.8176668647256883, "grad_norm": 1.2383139379667367, "learning_rate": 1.6930637269987415e-06, "loss": 0.1981, "step": 10321 }, { "epoch": 0.8177460883343236, "grad_norm": 1.3533050767149346, "learning_rate": 1.691635414102455e-06, "loss": 0.2228, "step": 10322 }, { "epoch": 0.817825311942959, "grad_norm": 1.171941407824025, "learning_rate": 1.6902076482719987e-06, "loss": 0.1889, "step": 10323 }, { "epoch": 0.8179045355515944, "grad_norm": 1.591989080662042, "learning_rate": 1.6887804296013854e-06, "loss": 0.2997, "step": 10324 }, { "epoch": 0.8179837591602297, "grad_norm": 1.4794138470015934, "learning_rate": 1.6873537581845866e-06, "loss": 0.2466, "step": 10325 }, { "epoch": 0.8180629827688651, "grad_norm": 1.6253032975568673, "learning_rate": 1.6859276341155483e-06, "loss": 0.2873, "step": 10326 }, { "epoch": 0.8181422063775005, "grad_norm": 1.4149302691843844, "learning_rate": 1.68450205748817e-06, "loss": 0.2213, "step": 10327 }, { "epoch": 0.8182214299861359, "grad_norm": 1.412223343932639, "learning_rate": 1.6830770283963194e-06, "loss": 0.2295, "step": 10328 }, { "epoch": 0.8183006535947712, "grad_norm": 1.2809231024141314, "learning_rate": 1.6816525469338252e-06, "loss": 0.229, "step": 10329 }, { "epoch": 0.8183798772034067, "grad_norm": 1.2995554972626846, "learning_rate": 1.6802286131944889e-06, "loss": 0.2339, "step": 10330 }, { "epoch": 0.818459100812042, "grad_norm": 1.3471722232092467, "learning_rate": 1.6788052272720656e-06, "loss": 0.2197, "step": 10331 }, { "epoch": 0.8185383244206773, "grad_norm": 1.2927139783459367, "learning_rate": 1.677382389260277e-06, "loss": 0.197, "step": 10332 }, { "epoch": 0.8186175480293127, "grad_norm": 1.2592630129454887, "learning_rate": 1.6759600992528147e-06, "loss": 0.2238, "step": 10333 }, { "epoch": 0.8186967716379481, "grad_norm": 1.213021850314714, "learning_rate": 1.674538357343326e-06, "loss": 0.2326, "step": 10334 }, { "epoch": 0.8187759952465835, "grad_norm": 1.3713100036709034, "learning_rate": 1.6731171636254263e-06, "loss": 0.1972, "step": 10335 }, { "epoch": 0.8188552188552188, "grad_norm": 1.0865475728156522, "learning_rate": 1.6716965181926959e-06, "loss": 0.1969, "step": 10336 }, { "epoch": 0.8189344424638543, "grad_norm": 1.4639722932368535, "learning_rate": 1.670276421138677e-06, "loss": 0.23, "step": 10337 }, { "epoch": 0.8190136660724896, "grad_norm": 1.36637401712028, "learning_rate": 1.6688568725568732e-06, "loss": 0.2396, "step": 10338 }, { "epoch": 0.8190928896811249, "grad_norm": 1.1068080607427992, "learning_rate": 1.6674378725407603e-06, "loss": 0.1854, "step": 10339 }, { "epoch": 0.8191721132897604, "grad_norm": 1.1373757725705118, "learning_rate": 1.6660194211837687e-06, "loss": 0.157, "step": 10340 }, { "epoch": 0.8192513368983957, "grad_norm": 1.4529655792121996, "learning_rate": 1.6646015185792963e-06, "loss": 0.269, "step": 10341 }, { "epoch": 0.8193305605070311, "grad_norm": 1.5709448244830584, "learning_rate": 1.6631841648207092e-06, "loss": 0.2288, "step": 10342 }, { "epoch": 0.8194097841156665, "grad_norm": 1.2709831515115881, "learning_rate": 1.6617673600013295e-06, "loss": 0.2132, "step": 10343 }, { "epoch": 0.8194890077243019, "grad_norm": 1.3817262216962494, "learning_rate": 1.6603511042144494e-06, "loss": 0.2113, "step": 10344 }, { "epoch": 0.8195682313329372, "grad_norm": 1.4004216623460284, "learning_rate": 1.6589353975533174e-06, "loss": 0.2452, "step": 10345 }, { "epoch": 0.8196474549415725, "grad_norm": 1.1397019793174794, "learning_rate": 1.6575202401111578e-06, "loss": 0.2117, "step": 10346 }, { "epoch": 0.819726678550208, "grad_norm": 1.3005336089755082, "learning_rate": 1.6561056319811497e-06, "loss": 0.2337, "step": 10347 }, { "epoch": 0.8198059021588433, "grad_norm": 1.2571241667943538, "learning_rate": 1.654691573256434e-06, "loss": 0.1912, "step": 10348 }, { "epoch": 0.8198851257674787, "grad_norm": 1.4114978669902711, "learning_rate": 1.653278064030126e-06, "loss": 0.2705, "step": 10349 }, { "epoch": 0.8199643493761141, "grad_norm": 2.1951536027815726, "learning_rate": 1.651865104395296e-06, "loss": 0.3389, "step": 10350 }, { "epoch": 0.8200435729847495, "grad_norm": 1.4692628375697956, "learning_rate": 1.6504526944449772e-06, "loss": 0.283, "step": 10351 }, { "epoch": 0.8201227965933848, "grad_norm": 0.9088173713682491, "learning_rate": 1.6490408342721764e-06, "loss": 0.1667, "step": 10352 }, { "epoch": 0.8202020202020202, "grad_norm": 1.4165749674275787, "learning_rate": 1.6476295239698537e-06, "loss": 0.2307, "step": 10353 }, { "epoch": 0.8202812438106556, "grad_norm": 1.5300314861445203, "learning_rate": 1.6462187636309345e-06, "loss": 0.2924, "step": 10354 }, { "epoch": 0.8203604674192909, "grad_norm": 1.4401525327339801, "learning_rate": 1.6448085533483172e-06, "loss": 0.3099, "step": 10355 }, { "epoch": 0.8204396910279264, "grad_norm": 1.3132576232091553, "learning_rate": 1.6433988932148547e-06, "loss": 0.1781, "step": 10356 }, { "epoch": 0.8205189146365617, "grad_norm": 1.421611373861576, "learning_rate": 1.6419897833233644e-06, "loss": 0.2676, "step": 10357 }, { "epoch": 0.8205981382451971, "grad_norm": 1.3529378320975918, "learning_rate": 1.6405812237666296e-06, "loss": 0.2506, "step": 10358 }, { "epoch": 0.8206773618538324, "grad_norm": 1.2486852963495985, "learning_rate": 1.6391732146373994e-06, "loss": 0.2156, "step": 10359 }, { "epoch": 0.8207565854624678, "grad_norm": 1.0582853093689502, "learning_rate": 1.6377657560283844e-06, "loss": 0.1363, "step": 10360 }, { "epoch": 0.8208358090711032, "grad_norm": 1.1741922401650111, "learning_rate": 1.6363588480322545e-06, "loss": 0.1976, "step": 10361 }, { "epoch": 0.8209150326797385, "grad_norm": 1.3491918239893574, "learning_rate": 1.6349524907416536e-06, "loss": 0.2448, "step": 10362 }, { "epoch": 0.820994256288374, "grad_norm": 1.3061875093073403, "learning_rate": 1.6335466842491821e-06, "loss": 0.2547, "step": 10363 }, { "epoch": 0.8210734798970093, "grad_norm": 1.2939148498516355, "learning_rate": 1.6321414286474014e-06, "loss": 0.2417, "step": 10364 }, { "epoch": 0.8211527035056447, "grad_norm": 1.1108571738423376, "learning_rate": 1.6307367240288463e-06, "loss": 0.2071, "step": 10365 }, { "epoch": 0.8212319271142801, "grad_norm": 1.4448227630754369, "learning_rate": 1.6293325704860087e-06, "loss": 0.2877, "step": 10366 }, { "epoch": 0.8213111507229154, "grad_norm": 1.5158763692825543, "learning_rate": 1.6279289681113407e-06, "loss": 0.2142, "step": 10367 }, { "epoch": 0.8213903743315508, "grad_norm": 1.146351910766024, "learning_rate": 1.626525916997269e-06, "loss": 0.1805, "step": 10368 }, { "epoch": 0.8214695979401861, "grad_norm": 1.0646648279273332, "learning_rate": 1.6251234172361763e-06, "loss": 0.1766, "step": 10369 }, { "epoch": 0.8215488215488216, "grad_norm": 1.1809710572867445, "learning_rate": 1.623721468920405e-06, "loss": 0.1829, "step": 10370 }, { "epoch": 0.8216280451574569, "grad_norm": 1.3656010045319111, "learning_rate": 1.6223200721422739e-06, "loss": 0.2001, "step": 10371 }, { "epoch": 0.8217072687660923, "grad_norm": 1.509409543101267, "learning_rate": 1.6209192269940555e-06, "loss": 0.1841, "step": 10372 }, { "epoch": 0.8217864923747277, "grad_norm": 1.324564976827142, "learning_rate": 1.6195189335679884e-06, "loss": 0.2096, "step": 10373 }, { "epoch": 0.821865715983363, "grad_norm": 1.5918482414211266, "learning_rate": 1.6181191919562734e-06, "loss": 0.3143, "step": 10374 }, { "epoch": 0.8219449395919984, "grad_norm": 1.4284160694796504, "learning_rate": 1.6167200022510799e-06, "loss": 0.2241, "step": 10375 }, { "epoch": 0.8220241632006338, "grad_norm": 1.438861035765178, "learning_rate": 1.6153213645445376e-06, "loss": 0.3243, "step": 10376 }, { "epoch": 0.8221033868092692, "grad_norm": 1.2804668749894867, "learning_rate": 1.613923278928735e-06, "loss": 0.2377, "step": 10377 }, { "epoch": 0.8221826104179045, "grad_norm": 1.640306674414265, "learning_rate": 1.6125257454957365e-06, "loss": 0.2982, "step": 10378 }, { "epoch": 0.82226183402654, "grad_norm": 1.473771925798413, "learning_rate": 1.6111287643375607e-06, "loss": 0.2463, "step": 10379 }, { "epoch": 0.8223410576351753, "grad_norm": 1.4404475061089161, "learning_rate": 1.6097323355461869e-06, "loss": 0.181, "step": 10380 }, { "epoch": 0.8224202812438106, "grad_norm": 1.4478799335377857, "learning_rate": 1.6083364592135708e-06, "loss": 0.2052, "step": 10381 }, { "epoch": 0.822499504852446, "grad_norm": 1.2013489423930401, "learning_rate": 1.6069411354316212e-06, "loss": 0.183, "step": 10382 }, { "epoch": 0.8225787284610814, "grad_norm": 1.3995359702064103, "learning_rate": 1.6055463642922098e-06, "loss": 0.177, "step": 10383 }, { "epoch": 0.8226579520697168, "grad_norm": 1.1100114667780525, "learning_rate": 1.6041521458871812e-06, "loss": 0.195, "step": 10384 }, { "epoch": 0.8227371756783521, "grad_norm": 1.392688884767605, "learning_rate": 1.6027584803083351e-06, "loss": 0.2193, "step": 10385 }, { "epoch": 0.8228163992869876, "grad_norm": 1.316965799772601, "learning_rate": 1.6013653676474371e-06, "loss": 0.2444, "step": 10386 }, { "epoch": 0.8228956228956229, "grad_norm": 1.1463397689776766, "learning_rate": 1.5999728079962197e-06, "loss": 0.2095, "step": 10387 }, { "epoch": 0.8229748465042582, "grad_norm": 1.318659053358065, "learning_rate": 1.5985808014463745e-06, "loss": 0.2093, "step": 10388 }, { "epoch": 0.8230540701128937, "grad_norm": 1.4214008979548918, "learning_rate": 1.5971893480895583e-06, "loss": 0.1911, "step": 10389 }, { "epoch": 0.823133293721529, "grad_norm": 1.1974444608587302, "learning_rate": 1.5957984480173893e-06, "loss": 0.2136, "step": 10390 }, { "epoch": 0.8232125173301644, "grad_norm": 1.3597731862588451, "learning_rate": 1.5944081013214575e-06, "loss": 0.1795, "step": 10391 }, { "epoch": 0.8232917409387998, "grad_norm": 1.4632313272608521, "learning_rate": 1.593018308093306e-06, "loss": 0.3382, "step": 10392 }, { "epoch": 0.8233709645474351, "grad_norm": 1.6441841653193927, "learning_rate": 1.5916290684244452e-06, "loss": 0.2671, "step": 10393 }, { "epoch": 0.8234501881560705, "grad_norm": 1.4790059493130998, "learning_rate": 1.5902403824063539e-06, "loss": 0.2519, "step": 10394 }, { "epoch": 0.8235294117647058, "grad_norm": 1.5018856840644295, "learning_rate": 1.5888522501304682e-06, "loss": 0.2535, "step": 10395 }, { "epoch": 0.8236086353733413, "grad_norm": 1.4279542940321752, "learning_rate": 1.587464671688187e-06, "loss": 0.205, "step": 10396 }, { "epoch": 0.8236878589819766, "grad_norm": 1.4944454237399976, "learning_rate": 1.5860776471708816e-06, "loss": 0.2653, "step": 10397 }, { "epoch": 0.823767082590612, "grad_norm": 1.52851382509781, "learning_rate": 1.5846911766698781e-06, "loss": 0.2545, "step": 10398 }, { "epoch": 0.8238463061992474, "grad_norm": 1.5499167860420955, "learning_rate": 1.5833052602764664e-06, "loss": 0.2935, "step": 10399 }, { "epoch": 0.8239255298078827, "grad_norm": 1.2656159569319236, "learning_rate": 1.5819198980819096e-06, "loss": 0.1989, "step": 10400 }, { "epoch": 0.8240047534165181, "grad_norm": 1.2850325877446969, "learning_rate": 1.5805350901774197e-06, "loss": 0.2054, "step": 10401 }, { "epoch": 0.8240839770251535, "grad_norm": 1.1260981729498543, "learning_rate": 1.5791508366541797e-06, "loss": 0.1571, "step": 10402 }, { "epoch": 0.8241632006337889, "grad_norm": 1.4404055901653536, "learning_rate": 1.577767137603341e-06, "loss": 0.1775, "step": 10403 }, { "epoch": 0.8242424242424242, "grad_norm": 0.9534078221552195, "learning_rate": 1.5763839931160108e-06, "loss": 0.1479, "step": 10404 }, { "epoch": 0.8243216478510597, "grad_norm": 1.384704749144126, "learning_rate": 1.5750014032832617e-06, "loss": 0.2461, "step": 10405 }, { "epoch": 0.824400871459695, "grad_norm": 1.3006642087200235, "learning_rate": 1.5736193681961332e-06, "loss": 0.1674, "step": 10406 }, { "epoch": 0.8244800950683303, "grad_norm": 1.1089500614798569, "learning_rate": 1.5722378879456234e-06, "loss": 0.1854, "step": 10407 }, { "epoch": 0.8245593186769657, "grad_norm": 1.095246332754987, "learning_rate": 1.5708569626226954e-06, "loss": 0.1716, "step": 10408 }, { "epoch": 0.8246385422856011, "grad_norm": 1.174671687959912, "learning_rate": 1.5694765923182798e-06, "loss": 0.1643, "step": 10409 }, { "epoch": 0.8247177658942365, "grad_norm": 1.2825065746484636, "learning_rate": 1.5680967771232659e-06, "loss": 0.2172, "step": 10410 }, { "epoch": 0.8247969895028718, "grad_norm": 1.2968781985275724, "learning_rate": 1.5667175171285054e-06, "loss": 0.1824, "step": 10411 }, { "epoch": 0.8248762131115073, "grad_norm": 1.0978913996852, "learning_rate": 1.5653388124248203e-06, "loss": 0.2001, "step": 10412 }, { "epoch": 0.8249554367201426, "grad_norm": 1.377710707717662, "learning_rate": 1.5639606631029892e-06, "loss": 0.2097, "step": 10413 }, { "epoch": 0.8250346603287779, "grad_norm": 1.5427707606580752, "learning_rate": 1.5625830692537569e-06, "loss": 0.3436, "step": 10414 }, { "epoch": 0.8251138839374134, "grad_norm": 1.1786801147703365, "learning_rate": 1.561206030967828e-06, "loss": 0.1814, "step": 10415 }, { "epoch": 0.8251931075460487, "grad_norm": 1.2274595711774268, "learning_rate": 1.5598295483358804e-06, "loss": 0.2014, "step": 10416 }, { "epoch": 0.8252723311546841, "grad_norm": 1.235756359941025, "learning_rate": 1.5584536214485457e-06, "loss": 0.2043, "step": 10417 }, { "epoch": 0.8253515547633195, "grad_norm": 1.788015719964634, "learning_rate": 1.5570782503964188e-06, "loss": 0.2822, "step": 10418 }, { "epoch": 0.8254307783719549, "grad_norm": 1.223167146359783, "learning_rate": 1.5557034352700672e-06, "loss": 0.1676, "step": 10419 }, { "epoch": 0.8255100019805902, "grad_norm": 1.3122266132947737, "learning_rate": 1.5543291761600133e-06, "loss": 0.2539, "step": 10420 }, { "epoch": 0.8255892255892255, "grad_norm": 1.212045610154158, "learning_rate": 1.552955473156742e-06, "loss": 0.2231, "step": 10421 }, { "epoch": 0.825668449197861, "grad_norm": 1.350331950579808, "learning_rate": 1.5515823263507112e-06, "loss": 0.2474, "step": 10422 }, { "epoch": 0.8257476728064963, "grad_norm": 1.0968873116705355, "learning_rate": 1.5502097358323321e-06, "loss": 0.1462, "step": 10423 }, { "epoch": 0.8258268964151317, "grad_norm": 1.1023080961360494, "learning_rate": 1.548837701691983e-06, "loss": 0.1638, "step": 10424 }, { "epoch": 0.8259061200237671, "grad_norm": 1.3081675469735337, "learning_rate": 1.547466224020009e-06, "loss": 0.1831, "step": 10425 }, { "epoch": 0.8259853436324025, "grad_norm": 1.5820404729924766, "learning_rate": 1.5460953029067128e-06, "loss": 0.1919, "step": 10426 }, { "epoch": 0.8260645672410378, "grad_norm": 1.294956337815726, "learning_rate": 1.5447249384423624e-06, "loss": 0.1771, "step": 10427 }, { "epoch": 0.8261437908496732, "grad_norm": 1.3373271013669448, "learning_rate": 1.543355130717189e-06, "loss": 0.2279, "step": 10428 }, { "epoch": 0.8262230144583086, "grad_norm": 1.1791264852630465, "learning_rate": 1.5419858798213928e-06, "loss": 0.2001, "step": 10429 }, { "epoch": 0.8263022380669439, "grad_norm": 1.2237456306618302, "learning_rate": 1.540617185845128e-06, "loss": 0.22, "step": 10430 }, { "epoch": 0.8263814616755794, "grad_norm": 1.158662628670014, "learning_rate": 1.5392490488785151e-06, "loss": 0.1863, "step": 10431 }, { "epoch": 0.8264606852842147, "grad_norm": 1.6245465078614232, "learning_rate": 1.537881469011645e-06, "loss": 0.3369, "step": 10432 }, { "epoch": 0.8265399088928501, "grad_norm": 1.3985079170662937, "learning_rate": 1.5365144463345627e-06, "loss": 0.2179, "step": 10433 }, { "epoch": 0.8266191325014854, "grad_norm": 1.6526039938845045, "learning_rate": 1.5351479809372772e-06, "loss": 0.3087, "step": 10434 }, { "epoch": 0.8266983561101208, "grad_norm": 1.2990904626019235, "learning_rate": 1.5337820729097697e-06, "loss": 0.1781, "step": 10435 }, { "epoch": 0.8267775797187562, "grad_norm": 1.4811222804168258, "learning_rate": 1.5324167223419762e-06, "loss": 0.2721, "step": 10436 }, { "epoch": 0.8268568033273915, "grad_norm": 1.4557939736013057, "learning_rate": 1.5310519293237958e-06, "loss": 0.2832, "step": 10437 }, { "epoch": 0.826936026936027, "grad_norm": 1.23819330798364, "learning_rate": 1.5296876939450978e-06, "loss": 0.1812, "step": 10438 }, { "epoch": 0.8270152505446623, "grad_norm": 1.3790964660525777, "learning_rate": 1.528324016295709e-06, "loss": 0.2235, "step": 10439 }, { "epoch": 0.8270944741532977, "grad_norm": 1.3793749855621042, "learning_rate": 1.5269608964654181e-06, "loss": 0.176, "step": 10440 }, { "epoch": 0.8271736977619331, "grad_norm": 1.4205379380261078, "learning_rate": 1.525598334543985e-06, "loss": 0.1938, "step": 10441 }, { "epoch": 0.8272529213705684, "grad_norm": 1.4264341725788607, "learning_rate": 1.524236330621125e-06, "loss": 0.2735, "step": 10442 }, { "epoch": 0.8273321449792038, "grad_norm": 1.404979967365074, "learning_rate": 1.5228748847865205e-06, "loss": 0.219, "step": 10443 }, { "epoch": 0.8274113685878391, "grad_norm": 1.3157075881798561, "learning_rate": 1.5215139971298131e-06, "loss": 0.2062, "step": 10444 }, { "epoch": 0.8274905921964746, "grad_norm": 1.2271147826848667, "learning_rate": 1.5201536677406147e-06, "loss": 0.215, "step": 10445 }, { "epoch": 0.8275698158051099, "grad_norm": 1.2056536752009905, "learning_rate": 1.518793896708496e-06, "loss": 0.1767, "step": 10446 }, { "epoch": 0.8276490394137453, "grad_norm": 1.4052691483357655, "learning_rate": 1.517434684122987e-06, "loss": 0.1866, "step": 10447 }, { "epoch": 0.8277282630223807, "grad_norm": 1.6209246354830482, "learning_rate": 1.5160760300735911e-06, "loss": 0.3082, "step": 10448 }, { "epoch": 0.827807486631016, "grad_norm": 1.6872191124006044, "learning_rate": 1.5147179346497665e-06, "loss": 0.2915, "step": 10449 }, { "epoch": 0.8278867102396514, "grad_norm": 1.616362676388551, "learning_rate": 1.513360397940935e-06, "loss": 0.364, "step": 10450 }, { "epoch": 0.8279659338482868, "grad_norm": 1.6069752087735503, "learning_rate": 1.5120034200364885e-06, "loss": 0.2942, "step": 10451 }, { "epoch": 0.8280451574569222, "grad_norm": 1.1670965955740882, "learning_rate": 1.5106470010257758e-06, "loss": 0.1975, "step": 10452 }, { "epoch": 0.8281243810655575, "grad_norm": 1.218441728890744, "learning_rate": 1.509291140998107e-06, "loss": 0.1941, "step": 10453 }, { "epoch": 0.828203604674193, "grad_norm": 1.4345412072207402, "learning_rate": 1.5079358400427635e-06, "loss": 0.2281, "step": 10454 }, { "epoch": 0.8282828282828283, "grad_norm": 1.4721384706701444, "learning_rate": 1.5065810982489849e-06, "loss": 0.2506, "step": 10455 }, { "epoch": 0.8283620518914636, "grad_norm": 1.3730560855914193, "learning_rate": 1.5052269157059707e-06, "loss": 0.2224, "step": 10456 }, { "epoch": 0.828441275500099, "grad_norm": 1.0984627790630974, "learning_rate": 1.503873292502892e-06, "loss": 0.1761, "step": 10457 }, { "epoch": 0.8285204991087344, "grad_norm": 1.276739895936656, "learning_rate": 1.5025202287288764e-06, "loss": 0.186, "step": 10458 }, { "epoch": 0.8285997227173698, "grad_norm": 1.158664052023081, "learning_rate": 1.501167724473016e-06, "loss": 0.2495, "step": 10459 }, { "epoch": 0.8286789463260051, "grad_norm": 1.1677818488737044, "learning_rate": 1.499815779824365e-06, "loss": 0.1792, "step": 10460 }, { "epoch": 0.8287581699346406, "grad_norm": 1.2078443445862654, "learning_rate": 1.4984643948719469e-06, "loss": 0.178, "step": 10461 }, { "epoch": 0.8288373935432759, "grad_norm": 1.2939661778658178, "learning_rate": 1.4971135697047422e-06, "loss": 0.2069, "step": 10462 }, { "epoch": 0.8289166171519112, "grad_norm": 1.3709717316926502, "learning_rate": 1.4957633044116925e-06, "loss": 0.2213, "step": 10463 }, { "epoch": 0.8289958407605467, "grad_norm": 1.2694239326792562, "learning_rate": 1.4944135990817121e-06, "loss": 0.2097, "step": 10464 }, { "epoch": 0.829075064369182, "grad_norm": 1.1448841492351802, "learning_rate": 1.4930644538036709e-06, "loss": 0.1331, "step": 10465 }, { "epoch": 0.8291542879778174, "grad_norm": 1.447332060384047, "learning_rate": 1.4917158686663992e-06, "loss": 0.209, "step": 10466 }, { "epoch": 0.8292335115864528, "grad_norm": 1.1696458467418067, "learning_rate": 1.490367843758701e-06, "loss": 0.1956, "step": 10467 }, { "epoch": 0.8293127351950882, "grad_norm": 1.2903550393989425, "learning_rate": 1.4890203791693337e-06, "loss": 0.2446, "step": 10468 }, { "epoch": 0.8293919588037235, "grad_norm": 1.2929151922134108, "learning_rate": 1.4876734749870213e-06, "loss": 0.1887, "step": 10469 }, { "epoch": 0.8294711824123588, "grad_norm": 1.3493758751616824, "learning_rate": 1.4863271313004535e-06, "loss": 0.2603, "step": 10470 }, { "epoch": 0.8295504060209943, "grad_norm": 1.3427871768272837, "learning_rate": 1.4849813481982788e-06, "loss": 0.2038, "step": 10471 }, { "epoch": 0.8296296296296296, "grad_norm": 1.09143748838598, "learning_rate": 1.483636125769108e-06, "loss": 0.1815, "step": 10472 }, { "epoch": 0.829708853238265, "grad_norm": 1.5203669474670853, "learning_rate": 1.482291464101523e-06, "loss": 0.2375, "step": 10473 }, { "epoch": 0.8297880768469004, "grad_norm": 1.3708184548188178, "learning_rate": 1.480947363284061e-06, "loss": 0.2453, "step": 10474 }, { "epoch": 0.8298673004555357, "grad_norm": 1.64704832916517, "learning_rate": 1.4796038234052235e-06, "loss": 0.2428, "step": 10475 }, { "epoch": 0.8299465240641711, "grad_norm": 1.4317492154301195, "learning_rate": 1.4782608445534741e-06, "loss": 0.2513, "step": 10476 }, { "epoch": 0.8300257476728065, "grad_norm": 1.2600340543932351, "learning_rate": 1.4769184268172465e-06, "loss": 0.1873, "step": 10477 }, { "epoch": 0.8301049712814419, "grad_norm": 1.4383762299639489, "learning_rate": 1.4755765702849311e-06, "loss": 0.247, "step": 10478 }, { "epoch": 0.8301841948900772, "grad_norm": 1.4803630563481738, "learning_rate": 1.4742352750448806e-06, "loss": 0.2188, "step": 10479 }, { "epoch": 0.8302634184987127, "grad_norm": 1.354573727373033, "learning_rate": 1.4728945411854135e-06, "loss": 0.2612, "step": 10480 }, { "epoch": 0.830342642107348, "grad_norm": 1.4326339975414173, "learning_rate": 1.4715543687948096e-06, "loss": 0.2588, "step": 10481 }, { "epoch": 0.8304218657159833, "grad_norm": 1.2164016612067483, "learning_rate": 1.470214757961317e-06, "loss": 0.198, "step": 10482 }, { "epoch": 0.8305010893246187, "grad_norm": 1.1555759947209618, "learning_rate": 1.4688757087731386e-06, "loss": 0.2237, "step": 10483 }, { "epoch": 0.8305803129332541, "grad_norm": 1.5733336407777812, "learning_rate": 1.4675372213184458e-06, "loss": 0.2592, "step": 10484 }, { "epoch": 0.8306595365418895, "grad_norm": 1.8759869510185854, "learning_rate": 1.4661992956853699e-06, "loss": 0.259, "step": 10485 }, { "epoch": 0.8307387601505248, "grad_norm": 1.5596886337090299, "learning_rate": 1.4648619319620105e-06, "loss": 0.2756, "step": 10486 }, { "epoch": 0.8308179837591603, "grad_norm": 1.2608031732856335, "learning_rate": 1.463525130236424e-06, "loss": 0.1794, "step": 10487 }, { "epoch": 0.8308972073677956, "grad_norm": 1.1529548292727665, "learning_rate": 1.4621888905966308e-06, "loss": 0.176, "step": 10488 }, { "epoch": 0.8309764309764309, "grad_norm": 1.4339897861899062, "learning_rate": 1.4608532131306198e-06, "loss": 0.3178, "step": 10489 }, { "epoch": 0.8310556545850664, "grad_norm": 1.509207170922193, "learning_rate": 1.459518097926337e-06, "loss": 0.2594, "step": 10490 }, { "epoch": 0.8311348781937017, "grad_norm": 0.933489930944928, "learning_rate": 1.4581835450716907e-06, "loss": 0.122, "step": 10491 }, { "epoch": 0.8312141018023371, "grad_norm": 1.3098267596961726, "learning_rate": 1.4568495546545603e-06, "loss": 0.2302, "step": 10492 }, { "epoch": 0.8312933254109725, "grad_norm": 1.5673986074660746, "learning_rate": 1.4555161267627793e-06, "loss": 0.3526, "step": 10493 }, { "epoch": 0.8313725490196079, "grad_norm": 1.3639975537414533, "learning_rate": 1.4541832614841455e-06, "loss": 0.2062, "step": 10494 }, { "epoch": 0.8314517726282432, "grad_norm": 1.2594827228710086, "learning_rate": 1.4528509589064276e-06, "loss": 0.1386, "step": 10495 }, { "epoch": 0.8315309962368785, "grad_norm": 1.5253252160816442, "learning_rate": 1.4515192191173466e-06, "loss": 0.2561, "step": 10496 }, { "epoch": 0.831610219845514, "grad_norm": 1.2112868283898253, "learning_rate": 1.45018804220459e-06, "loss": 0.1939, "step": 10497 }, { "epoch": 0.8316894434541493, "grad_norm": 1.0772623401697152, "learning_rate": 1.4488574282558143e-06, "loss": 0.1722, "step": 10498 }, { "epoch": 0.8317686670627847, "grad_norm": 1.1517497412415536, "learning_rate": 1.4475273773586319e-06, "loss": 0.191, "step": 10499 }, { "epoch": 0.8318478906714201, "grad_norm": 1.3576760029433559, "learning_rate": 1.446197889600619e-06, "loss": 0.2114, "step": 10500 }, { "epoch": 0.8319271142800555, "grad_norm": 1.3456224959096212, "learning_rate": 1.444868965069315e-06, "loss": 0.2632, "step": 10501 }, { "epoch": 0.8320063378886908, "grad_norm": 1.1746166613153766, "learning_rate": 1.443540603852227e-06, "loss": 0.1889, "step": 10502 }, { "epoch": 0.8320855614973262, "grad_norm": 1.117846493387386, "learning_rate": 1.4422128060368201e-06, "loss": 0.15, "step": 10503 }, { "epoch": 0.8321647851059616, "grad_norm": 1.7555418597228245, "learning_rate": 1.4408855717105197e-06, "loss": 0.2966, "step": 10504 }, { "epoch": 0.8322440087145969, "grad_norm": 1.2560473747015137, "learning_rate": 1.4395589009607225e-06, "loss": 0.2138, "step": 10505 }, { "epoch": 0.8323232323232324, "grad_norm": 1.3595679190079988, "learning_rate": 1.4382327938747808e-06, "loss": 0.2185, "step": 10506 }, { "epoch": 0.8324024559318677, "grad_norm": 1.298919626404769, "learning_rate": 1.4369072505400117e-06, "loss": 0.1877, "step": 10507 }, { "epoch": 0.8324816795405031, "grad_norm": 1.224169809273626, "learning_rate": 1.4355822710436995e-06, "loss": 0.174, "step": 10508 }, { "epoch": 0.8325609031491384, "grad_norm": 1.2992214249532263, "learning_rate": 1.4342578554730858e-06, "loss": 0.1638, "step": 10509 }, { "epoch": 0.8326401267577738, "grad_norm": 1.1051795468237293, "learning_rate": 1.4329340039153738e-06, "loss": 0.1907, "step": 10510 }, { "epoch": 0.8327193503664092, "grad_norm": 1.307194867459968, "learning_rate": 1.4316107164577376e-06, "loss": 0.1835, "step": 10511 }, { "epoch": 0.8327985739750445, "grad_norm": 1.1230924771616186, "learning_rate": 1.430287993187307e-06, "loss": 0.2111, "step": 10512 }, { "epoch": 0.83287779758368, "grad_norm": 1.2699326232187946, "learning_rate": 1.4289658341911782e-06, "loss": 0.1635, "step": 10513 }, { "epoch": 0.8329570211923153, "grad_norm": 1.46608894835311, "learning_rate": 1.4276442395564049e-06, "loss": 0.2441, "step": 10514 }, { "epoch": 0.8330362448009507, "grad_norm": 1.2287292315959046, "learning_rate": 1.426323209370014e-06, "loss": 0.1739, "step": 10515 }, { "epoch": 0.8331154684095861, "grad_norm": 1.5236128841183416, "learning_rate": 1.425002743718985e-06, "loss": 0.2452, "step": 10516 }, { "epoch": 0.8331946920182214, "grad_norm": 1.5824350853744693, "learning_rate": 1.4236828426902626e-06, "loss": 0.271, "step": 10517 }, { "epoch": 0.8332739156268568, "grad_norm": 1.0270909450230639, "learning_rate": 1.4223635063707619e-06, "loss": 0.1875, "step": 10518 }, { "epoch": 0.8333531392354921, "grad_norm": 1.2362274115367011, "learning_rate": 1.421044734847351e-06, "loss": 0.2117, "step": 10519 }, { "epoch": 0.8334323628441276, "grad_norm": 1.5682810614841907, "learning_rate": 1.4197265282068618e-06, "loss": 0.2766, "step": 10520 }, { "epoch": 0.8335115864527629, "grad_norm": 1.8509526057611487, "learning_rate": 1.4184088865360978e-06, "loss": 0.3006, "step": 10521 }, { "epoch": 0.8335908100613983, "grad_norm": 1.2981534492302385, "learning_rate": 1.4170918099218166e-06, "loss": 0.1939, "step": 10522 }, { "epoch": 0.8336700336700337, "grad_norm": 1.6089274125683373, "learning_rate": 1.41577529845074e-06, "loss": 0.2716, "step": 10523 }, { "epoch": 0.833749257278669, "grad_norm": 1.1815635085174374, "learning_rate": 1.4144593522095563e-06, "loss": 0.2106, "step": 10524 }, { "epoch": 0.8338284808873044, "grad_norm": 2.0633184183539854, "learning_rate": 1.4131439712849148e-06, "loss": 0.2271, "step": 10525 }, { "epoch": 0.8339077044959398, "grad_norm": 1.4039238887972836, "learning_rate": 1.4118291557634223e-06, "loss": 0.3216, "step": 10526 }, { "epoch": 0.8339869281045752, "grad_norm": 1.5581597463961219, "learning_rate": 1.410514905731658e-06, "loss": 0.2781, "step": 10527 }, { "epoch": 0.8340661517132105, "grad_norm": 1.2319396328229066, "learning_rate": 1.4092012212761574e-06, "loss": 0.2069, "step": 10528 }, { "epoch": 0.834145375321846, "grad_norm": 1.0120120004353914, "learning_rate": 1.4078881024834213e-06, "loss": 0.1267, "step": 10529 }, { "epoch": 0.8342245989304813, "grad_norm": 1.203910036146133, "learning_rate": 1.406575549439907e-06, "loss": 0.1761, "step": 10530 }, { "epoch": 0.8343038225391166, "grad_norm": 1.3937406881915537, "learning_rate": 1.4052635622320477e-06, "loss": 0.2266, "step": 10531 }, { "epoch": 0.834383046147752, "grad_norm": 1.4767836842521311, "learning_rate": 1.4039521409462265e-06, "loss": 0.2669, "step": 10532 }, { "epoch": 0.8344622697563874, "grad_norm": 1.2466478366329732, "learning_rate": 1.4026412856687931e-06, "loss": 0.2587, "step": 10533 }, { "epoch": 0.8345414933650228, "grad_norm": 1.2716867736926591, "learning_rate": 1.4013309964860667e-06, "loss": 0.1974, "step": 10534 }, { "epoch": 0.8346207169736581, "grad_norm": 1.1441397814842527, "learning_rate": 1.4000212734843187e-06, "loss": 0.1749, "step": 10535 }, { "epoch": 0.8346999405822936, "grad_norm": 1.8932301766464903, "learning_rate": 1.3987121167497874e-06, "loss": 0.2529, "step": 10536 }, { "epoch": 0.8347791641909289, "grad_norm": 1.3135878958835159, "learning_rate": 1.3974035263686792e-06, "loss": 0.2206, "step": 10537 }, { "epoch": 0.8348583877995642, "grad_norm": 0.9846565731925334, "learning_rate": 1.396095502427155e-06, "loss": 0.1067, "step": 10538 }, { "epoch": 0.8349376114081997, "grad_norm": 1.2814572175289134, "learning_rate": 1.3947880450113404e-06, "loss": 0.2197, "step": 10539 }, { "epoch": 0.835016835016835, "grad_norm": 1.3351611547655284, "learning_rate": 1.39348115420733e-06, "loss": 0.2049, "step": 10540 }, { "epoch": 0.8350960586254704, "grad_norm": 1.3186450346978396, "learning_rate": 1.392174830101174e-06, "loss": 0.1883, "step": 10541 }, { "epoch": 0.8351752822341058, "grad_norm": 1.0813180871141732, "learning_rate": 1.3908690727788842e-06, "loss": 0.1367, "step": 10542 }, { "epoch": 0.8352545058427412, "grad_norm": 1.2404744643224725, "learning_rate": 1.3895638823264447e-06, "loss": 0.2313, "step": 10543 }, { "epoch": 0.8353337294513765, "grad_norm": 1.470303528888458, "learning_rate": 1.3882592588297917e-06, "loss": 0.2156, "step": 10544 }, { "epoch": 0.8354129530600118, "grad_norm": 1.1296365480557664, "learning_rate": 1.38695520237483e-06, "loss": 0.2032, "step": 10545 }, { "epoch": 0.8354921766686473, "grad_norm": 1.323176135950088, "learning_rate": 1.3856517130474235e-06, "loss": 0.2389, "step": 10546 }, { "epoch": 0.8355714002772826, "grad_norm": 1.3284572194270088, "learning_rate": 1.384348790933403e-06, "loss": 0.2813, "step": 10547 }, { "epoch": 0.835650623885918, "grad_norm": 1.137063173093719, "learning_rate": 1.3830464361185592e-06, "loss": 0.158, "step": 10548 }, { "epoch": 0.8357298474945534, "grad_norm": 1.3689786919127183, "learning_rate": 1.3817446486886433e-06, "loss": 0.1825, "step": 10549 }, { "epoch": 0.8358090711031888, "grad_norm": 1.166034337622154, "learning_rate": 1.3804434287293756e-06, "loss": 0.1648, "step": 10550 }, { "epoch": 0.8358882947118241, "grad_norm": 1.4022572638477493, "learning_rate": 1.3791427763264342e-06, "loss": 0.2287, "step": 10551 }, { "epoch": 0.8359675183204595, "grad_norm": 1.6221468833774844, "learning_rate": 1.3778426915654575e-06, "loss": 0.2689, "step": 10552 }, { "epoch": 0.8360467419290949, "grad_norm": 1.411561457489168, "learning_rate": 1.3765431745320546e-06, "loss": 0.1948, "step": 10553 }, { "epoch": 0.8361259655377302, "grad_norm": 1.3625077478813943, "learning_rate": 1.3752442253117903e-06, "loss": 0.2672, "step": 10554 }, { "epoch": 0.8362051891463657, "grad_norm": 1.1818437776152657, "learning_rate": 1.373945843990192e-06, "loss": 0.2, "step": 10555 }, { "epoch": 0.836284412755001, "grad_norm": 1.393240285654648, "learning_rate": 1.3726480306527578e-06, "loss": 0.2114, "step": 10556 }, { "epoch": 0.8363636363636363, "grad_norm": 1.3860434319826995, "learning_rate": 1.3713507853849373e-06, "loss": 0.2425, "step": 10557 }, { "epoch": 0.8364428599722717, "grad_norm": 1.0172884683093302, "learning_rate": 1.3700541082721464e-06, "loss": 0.153, "step": 10558 }, { "epoch": 0.8365220835809071, "grad_norm": 1.5240819100071796, "learning_rate": 1.3687579993997703e-06, "loss": 0.2846, "step": 10559 }, { "epoch": 0.8366013071895425, "grad_norm": 1.5108697664713944, "learning_rate": 1.3674624588531481e-06, "loss": 0.2132, "step": 10560 }, { "epoch": 0.8366805307981778, "grad_norm": 1.1935349047651764, "learning_rate": 1.3661674867175844e-06, "loss": 0.2076, "step": 10561 }, { "epoch": 0.8367597544068133, "grad_norm": 1.2433805926004182, "learning_rate": 1.3648730830783507e-06, "loss": 0.2069, "step": 10562 }, { "epoch": 0.8368389780154486, "grad_norm": 1.1183355725368211, "learning_rate": 1.3635792480206744e-06, "loss": 0.168, "step": 10563 }, { "epoch": 0.8369182016240839, "grad_norm": 1.399026550174716, "learning_rate": 1.3622859816297473e-06, "loss": 0.2324, "step": 10564 }, { "epoch": 0.8369974252327194, "grad_norm": 1.2565435085372032, "learning_rate": 1.3609932839907281e-06, "loss": 0.1816, "step": 10565 }, { "epoch": 0.8370766488413547, "grad_norm": 1.4572249630085097, "learning_rate": 1.3597011551887329e-06, "loss": 0.2448, "step": 10566 }, { "epoch": 0.8371558724499901, "grad_norm": 1.2350664164415799, "learning_rate": 1.3584095953088405e-06, "loss": 0.1937, "step": 10567 }, { "epoch": 0.8372350960586254, "grad_norm": 0.9197494095088624, "learning_rate": 1.3571186044360973e-06, "loss": 0.1442, "step": 10568 }, { "epoch": 0.8373143196672609, "grad_norm": 1.4518128342091754, "learning_rate": 1.3558281826555065e-06, "loss": 0.2605, "step": 10569 }, { "epoch": 0.8373935432758962, "grad_norm": 2.0573082274836474, "learning_rate": 1.3545383300520375e-06, "loss": 0.3431, "step": 10570 }, { "epoch": 0.8374727668845315, "grad_norm": 1.352905874637864, "learning_rate": 1.3532490467106186e-06, "loss": 0.2705, "step": 10571 }, { "epoch": 0.837551990493167, "grad_norm": 1.3023197662109454, "learning_rate": 1.3519603327161456e-06, "loss": 0.2966, "step": 10572 }, { "epoch": 0.8376312141018023, "grad_norm": 1.2107913018479692, "learning_rate": 1.3506721881534734e-06, "loss": 0.1523, "step": 10573 }, { "epoch": 0.8377104377104377, "grad_norm": 1.132899724751293, "learning_rate": 1.3493846131074173e-06, "loss": 0.1437, "step": 10574 }, { "epoch": 0.8377896613190731, "grad_norm": 1.5194909336116953, "learning_rate": 1.3480976076627617e-06, "loss": 0.2769, "step": 10575 }, { "epoch": 0.8378688849277085, "grad_norm": 1.3932035569980017, "learning_rate": 1.3468111719042497e-06, "loss": 0.2719, "step": 10576 }, { "epoch": 0.8379481085363438, "grad_norm": 1.035175483166696, "learning_rate": 1.345525305916583e-06, "loss": 0.1058, "step": 10577 }, { "epoch": 0.8380273321449792, "grad_norm": 1.0872354805156903, "learning_rate": 1.3442400097844344e-06, "loss": 0.1681, "step": 10578 }, { "epoch": 0.8381065557536146, "grad_norm": 1.201323635196354, "learning_rate": 1.342955283592432e-06, "loss": 0.1694, "step": 10579 }, { "epoch": 0.8381857793622499, "grad_norm": 1.4367099800645067, "learning_rate": 1.3416711274251671e-06, "loss": 0.2622, "step": 10580 }, { "epoch": 0.8382650029708854, "grad_norm": 1.1950444400949247, "learning_rate": 1.3403875413671997e-06, "loss": 0.2185, "step": 10581 }, { "epoch": 0.8383442265795207, "grad_norm": 1.398346293874672, "learning_rate": 1.3391045255030444e-06, "loss": 0.1955, "step": 10582 }, { "epoch": 0.8384234501881561, "grad_norm": 1.1508050543784236, "learning_rate": 1.3378220799171815e-06, "loss": 0.1867, "step": 10583 }, { "epoch": 0.8385026737967914, "grad_norm": 1.4765358007275802, "learning_rate": 1.3365402046940569e-06, "loss": 0.2996, "step": 10584 }, { "epoch": 0.8385818974054268, "grad_norm": 1.3182695830935403, "learning_rate": 1.3352588999180726e-06, "loss": 0.2307, "step": 10585 }, { "epoch": 0.8386611210140622, "grad_norm": 1.394954662121768, "learning_rate": 1.3339781656735995e-06, "loss": 0.2425, "step": 10586 }, { "epoch": 0.8387403446226975, "grad_norm": 1.2853709706661085, "learning_rate": 1.3326980020449621e-06, "loss": 0.1848, "step": 10587 }, { "epoch": 0.838819568231333, "grad_norm": 1.445014214656466, "learning_rate": 1.3314184091164605e-06, "loss": 0.2581, "step": 10588 }, { "epoch": 0.8388987918399683, "grad_norm": 1.5026310491922557, "learning_rate": 1.3301393869723457e-06, "loss": 0.2612, "step": 10589 }, { "epoch": 0.8389780154486037, "grad_norm": 1.664314382806888, "learning_rate": 1.328860935696833e-06, "loss": 0.2179, "step": 10590 }, { "epoch": 0.8390572390572391, "grad_norm": 1.0423492993377401, "learning_rate": 1.3275830553741066e-06, "loss": 0.1443, "step": 10591 }, { "epoch": 0.8391364626658744, "grad_norm": 1.205682938725894, "learning_rate": 1.3263057460883078e-06, "loss": 0.1595, "step": 10592 }, { "epoch": 0.8392156862745098, "grad_norm": 1.1464046454390402, "learning_rate": 1.3250290079235383e-06, "loss": 0.2173, "step": 10593 }, { "epoch": 0.8392949098831451, "grad_norm": 1.3118975841036833, "learning_rate": 1.3237528409638688e-06, "loss": 0.2052, "step": 10594 }, { "epoch": 0.8393741334917806, "grad_norm": 1.666342912248758, "learning_rate": 1.3224772452933277e-06, "loss": 0.2952, "step": 10595 }, { "epoch": 0.8394533571004159, "grad_norm": 1.296324809678496, "learning_rate": 1.321202220995904e-06, "loss": 0.2191, "step": 10596 }, { "epoch": 0.8395325807090513, "grad_norm": 0.9781787700115439, "learning_rate": 1.3199277681555578e-06, "loss": 0.171, "step": 10597 }, { "epoch": 0.8396118043176867, "grad_norm": 1.3557343932237587, "learning_rate": 1.3186538868562004e-06, "loss": 0.216, "step": 10598 }, { "epoch": 0.839691027926322, "grad_norm": 1.0757046240751333, "learning_rate": 1.3173805771817138e-06, "loss": 0.14, "step": 10599 }, { "epoch": 0.8397702515349574, "grad_norm": 1.3844027667864627, "learning_rate": 1.3161078392159355e-06, "loss": 0.2697, "step": 10600 }, { "epoch": 0.8398494751435928, "grad_norm": 1.2368137242644355, "learning_rate": 1.3148356730426737e-06, "loss": 0.1566, "step": 10601 }, { "epoch": 0.8399286987522282, "grad_norm": 1.475173582252917, "learning_rate": 1.3135640787456926e-06, "loss": 0.2251, "step": 10602 }, { "epoch": 0.8400079223608635, "grad_norm": 1.238176460589784, "learning_rate": 1.312293056408719e-06, "loss": 0.2369, "step": 10603 }, { "epoch": 0.840087145969499, "grad_norm": 1.327112369471202, "learning_rate": 1.3110226061154462e-06, "loss": 0.2565, "step": 10604 }, { "epoch": 0.8401663695781343, "grad_norm": 1.0032639526775977, "learning_rate": 1.309752727949527e-06, "loss": 0.13, "step": 10605 }, { "epoch": 0.8402455931867696, "grad_norm": 0.9827589705352223, "learning_rate": 1.3084834219945731e-06, "loss": 0.1778, "step": 10606 }, { "epoch": 0.840324816795405, "grad_norm": 1.2948837368552348, "learning_rate": 1.3072146883341675e-06, "loss": 0.2109, "step": 10607 }, { "epoch": 0.8404040404040404, "grad_norm": 1.3589571744552542, "learning_rate": 1.3059465270518469e-06, "loss": 0.2164, "step": 10608 }, { "epoch": 0.8404832640126758, "grad_norm": 1.3001982952294369, "learning_rate": 1.3046789382311132e-06, "loss": 0.1718, "step": 10609 }, { "epoch": 0.8405624876213111, "grad_norm": 1.273119143220718, "learning_rate": 1.3034119219554341e-06, "loss": 0.1829, "step": 10610 }, { "epoch": 0.8406417112299466, "grad_norm": 0.9759152295223514, "learning_rate": 1.3021454783082344e-06, "loss": 0.1378, "step": 10611 }, { "epoch": 0.8407209348385819, "grad_norm": 1.405025925539301, "learning_rate": 1.3008796073729013e-06, "loss": 0.2789, "step": 10612 }, { "epoch": 0.8408001584472172, "grad_norm": 1.076075224282842, "learning_rate": 1.2996143092327906e-06, "loss": 0.1881, "step": 10613 }, { "epoch": 0.8408793820558527, "grad_norm": 1.9342460123087852, "learning_rate": 1.2983495839712146e-06, "loss": 0.399, "step": 10614 }, { "epoch": 0.840958605664488, "grad_norm": 1.663021999341558, "learning_rate": 1.2970854316714477e-06, "loss": 0.2585, "step": 10615 }, { "epoch": 0.8410378292731234, "grad_norm": 1.2706319104530757, "learning_rate": 1.2958218524167288e-06, "loss": 0.1883, "step": 10616 }, { "epoch": 0.8411170528817588, "grad_norm": 1.3156460761936768, "learning_rate": 1.2945588462902603e-06, "loss": 0.2403, "step": 10617 }, { "epoch": 0.8411962764903942, "grad_norm": 1.1583838390951062, "learning_rate": 1.2932964133752036e-06, "loss": 0.2221, "step": 10618 }, { "epoch": 0.8412755000990295, "grad_norm": 1.3189785389125046, "learning_rate": 1.292034553754683e-06, "loss": 0.1825, "step": 10619 }, { "epoch": 0.8413547237076648, "grad_norm": 1.4760290426911606, "learning_rate": 1.2907732675117878e-06, "loss": 0.2502, "step": 10620 }, { "epoch": 0.8414339473163003, "grad_norm": 1.0927255835844973, "learning_rate": 1.2895125547295672e-06, "loss": 0.166, "step": 10621 }, { "epoch": 0.8415131709249356, "grad_norm": 1.4071028198388331, "learning_rate": 1.2882524154910314e-06, "loss": 0.2524, "step": 10622 }, { "epoch": 0.841592394533571, "grad_norm": 1.2915084599875934, "learning_rate": 1.2869928498791572e-06, "loss": 0.1916, "step": 10623 }, { "epoch": 0.8416716181422064, "grad_norm": 1.093551277096425, "learning_rate": 1.2857338579768796e-06, "loss": 0.1659, "step": 10624 }, { "epoch": 0.8417508417508418, "grad_norm": 1.5699565818615442, "learning_rate": 1.2844754398670954e-06, "loss": 0.2519, "step": 10625 }, { "epoch": 0.8418300653594771, "grad_norm": 1.2934471388179427, "learning_rate": 1.2832175956326686e-06, "loss": 0.1598, "step": 10626 }, { "epoch": 0.8419092889681125, "grad_norm": 1.4060391609400043, "learning_rate": 1.2819603253564206e-06, "loss": 0.2468, "step": 10627 }, { "epoch": 0.8419885125767479, "grad_norm": 1.5633422596616775, "learning_rate": 1.280703629121135e-06, "loss": 0.25, "step": 10628 }, { "epoch": 0.8420677361853832, "grad_norm": 1.5726215066220275, "learning_rate": 1.2794475070095624e-06, "loss": 0.2799, "step": 10629 }, { "epoch": 0.8421469597940187, "grad_norm": 1.62906028959627, "learning_rate": 1.2781919591044113e-06, "loss": 0.3179, "step": 10630 }, { "epoch": 0.842226183402654, "grad_norm": 1.338619486709502, "learning_rate": 1.2769369854883528e-06, "loss": 0.1927, "step": 10631 }, { "epoch": 0.8423054070112893, "grad_norm": 1.3976863179057673, "learning_rate": 1.2756825862440192e-06, "loss": 0.2424, "step": 10632 }, { "epoch": 0.8423846306199247, "grad_norm": 1.304948150923471, "learning_rate": 1.2744287614540108e-06, "loss": 0.2239, "step": 10633 }, { "epoch": 0.8424638542285601, "grad_norm": 1.2964213021949842, "learning_rate": 1.2731755112008838e-06, "loss": 0.2133, "step": 10634 }, { "epoch": 0.8425430778371955, "grad_norm": 1.2249409427594202, "learning_rate": 1.2719228355671576e-06, "loss": 0.1603, "step": 10635 }, { "epoch": 0.8426223014458308, "grad_norm": 1.103714235205618, "learning_rate": 1.2706707346353165e-06, "loss": 0.1268, "step": 10636 }, { "epoch": 0.8427015250544663, "grad_norm": 1.0457767186940763, "learning_rate": 1.2694192084878032e-06, "loss": 0.1748, "step": 10637 }, { "epoch": 0.8427807486631016, "grad_norm": 1.4361624679253757, "learning_rate": 1.2681682572070275e-06, "loss": 0.2433, "step": 10638 }, { "epoch": 0.8428599722717369, "grad_norm": 1.2751138282489736, "learning_rate": 1.2669178808753568e-06, "loss": 0.2055, "step": 10639 }, { "epoch": 0.8429391958803724, "grad_norm": 1.4395160062166592, "learning_rate": 1.265668079575124e-06, "loss": 0.2665, "step": 10640 }, { "epoch": 0.8430184194890077, "grad_norm": 1.0921868742369334, "learning_rate": 1.264418853388618e-06, "loss": 0.1701, "step": 10641 }, { "epoch": 0.8430976430976431, "grad_norm": 1.1583797119288237, "learning_rate": 1.2631702023980997e-06, "loss": 0.1385, "step": 10642 }, { "epoch": 0.8431768667062784, "grad_norm": 0.9456458598052674, "learning_rate": 1.2619221266857851e-06, "loss": 0.132, "step": 10643 }, { "epoch": 0.8432560903149139, "grad_norm": 1.2293824359425345, "learning_rate": 1.260674626333851e-06, "loss": 0.2095, "step": 10644 }, { "epoch": 0.8433353139235492, "grad_norm": 1.560850569085244, "learning_rate": 1.259427701424445e-06, "loss": 0.2813, "step": 10645 }, { "epoch": 0.8434145375321845, "grad_norm": 1.1257942561351288, "learning_rate": 1.2581813520396668e-06, "loss": 0.1729, "step": 10646 }, { "epoch": 0.84349376114082, "grad_norm": 1.3370231647024697, "learning_rate": 1.256935578261581e-06, "loss": 0.1739, "step": 10647 }, { "epoch": 0.8435729847494553, "grad_norm": 1.6987855992331826, "learning_rate": 1.255690380172222e-06, "loss": 0.2112, "step": 10648 }, { "epoch": 0.8436522083580907, "grad_norm": 1.7299513439146876, "learning_rate": 1.2544457578535764e-06, "loss": 0.3077, "step": 10649 }, { "epoch": 0.8437314319667261, "grad_norm": 1.1309325695999808, "learning_rate": 1.253201711387594e-06, "loss": 0.1584, "step": 10650 }, { "epoch": 0.8438106555753615, "grad_norm": 1.268727736909756, "learning_rate": 1.2519582408561936e-06, "loss": 0.2085, "step": 10651 }, { "epoch": 0.8438898791839968, "grad_norm": 1.0684854951617462, "learning_rate": 1.2507153463412513e-06, "loss": 0.1463, "step": 10652 }, { "epoch": 0.8439691027926322, "grad_norm": 1.1138381582997856, "learning_rate": 1.2494730279246014e-06, "loss": 0.1918, "step": 10653 }, { "epoch": 0.8440483264012676, "grad_norm": 1.1550342154532884, "learning_rate": 1.2482312856880506e-06, "loss": 0.1768, "step": 10654 }, { "epoch": 0.8441275500099029, "grad_norm": 1.4054152283280172, "learning_rate": 1.2469901197133582e-06, "loss": 0.2941, "step": 10655 }, { "epoch": 0.8442067736185384, "grad_norm": 1.3322906002812531, "learning_rate": 1.2457495300822497e-06, "loss": 0.2482, "step": 10656 }, { "epoch": 0.8442859972271737, "grad_norm": 1.1861196564050724, "learning_rate": 1.244509516876411e-06, "loss": 0.1405, "step": 10657 }, { "epoch": 0.8443652208358091, "grad_norm": 1.4745334860123076, "learning_rate": 1.2432700801774923e-06, "loss": 0.3082, "step": 10658 }, { "epoch": 0.8444444444444444, "grad_norm": 1.4664337802225427, "learning_rate": 1.2420312200671048e-06, "loss": 0.2245, "step": 10659 }, { "epoch": 0.8445236680530798, "grad_norm": 1.4975500575346707, "learning_rate": 1.240792936626819e-06, "loss": 0.2828, "step": 10660 }, { "epoch": 0.8446028916617152, "grad_norm": 1.3022236180420792, "learning_rate": 1.2395552299381742e-06, "loss": 0.2191, "step": 10661 }, { "epoch": 0.8446821152703505, "grad_norm": 1.10817952657081, "learning_rate": 1.238318100082664e-06, "loss": 0.1236, "step": 10662 }, { "epoch": 0.844761338878986, "grad_norm": 1.4630776430584254, "learning_rate": 1.2370815471417464e-06, "loss": 0.307, "step": 10663 }, { "epoch": 0.8448405624876213, "grad_norm": 1.1510462680703422, "learning_rate": 1.2358455711968463e-06, "loss": 0.1879, "step": 10664 }, { "epoch": 0.8449197860962567, "grad_norm": 1.2881976419268701, "learning_rate": 1.2346101723293457e-06, "loss": 0.2054, "step": 10665 }, { "epoch": 0.8449990097048921, "grad_norm": 1.0295500437155463, "learning_rate": 1.233375350620587e-06, "loss": 0.1668, "step": 10666 }, { "epoch": 0.8450782333135274, "grad_norm": 1.2843133297267746, "learning_rate": 1.2321411061518807e-06, "loss": 0.1966, "step": 10667 }, { "epoch": 0.8451574569221628, "grad_norm": 1.6546456667167602, "learning_rate": 1.2309074390044939e-06, "loss": 0.2458, "step": 10668 }, { "epoch": 0.8452366805307981, "grad_norm": 1.3914471160023159, "learning_rate": 1.2296743492596587e-06, "loss": 0.1884, "step": 10669 }, { "epoch": 0.8453159041394336, "grad_norm": 1.3961436854037759, "learning_rate": 1.2284418369985651e-06, "loss": 0.2313, "step": 10670 }, { "epoch": 0.8453951277480689, "grad_norm": 1.3242315802655225, "learning_rate": 1.227209902302372e-06, "loss": 0.1847, "step": 10671 }, { "epoch": 0.8454743513567043, "grad_norm": 1.3552325434137533, "learning_rate": 1.2259785452521956e-06, "loss": 0.2196, "step": 10672 }, { "epoch": 0.8455535749653397, "grad_norm": 1.2257620498050938, "learning_rate": 1.2247477659291118e-06, "loss": 0.2352, "step": 10673 }, { "epoch": 0.845632798573975, "grad_norm": 1.196068629668826, "learning_rate": 1.223517564414166e-06, "loss": 0.1463, "step": 10674 }, { "epoch": 0.8457120221826104, "grad_norm": 1.2538461054055066, "learning_rate": 1.2222879407883592e-06, "loss": 0.2, "step": 10675 }, { "epoch": 0.8457912457912458, "grad_norm": 1.3831341506769856, "learning_rate": 1.2210588951326542e-06, "loss": 0.282, "step": 10676 }, { "epoch": 0.8458704693998812, "grad_norm": 1.542175791200493, "learning_rate": 1.2198304275279805e-06, "loss": 0.2721, "step": 10677 }, { "epoch": 0.8459496930085165, "grad_norm": 1.6004554091306153, "learning_rate": 1.2186025380552259e-06, "loss": 0.276, "step": 10678 }, { "epoch": 0.846028916617152, "grad_norm": 1.6496607656213018, "learning_rate": 1.2173752267952376e-06, "loss": 0.3147, "step": 10679 }, { "epoch": 0.8461081402257873, "grad_norm": 1.2859315189884697, "learning_rate": 1.2161484938288348e-06, "loss": 0.2388, "step": 10680 }, { "epoch": 0.8461873638344226, "grad_norm": 1.4647078284338793, "learning_rate": 1.214922339236788e-06, "loss": 0.2573, "step": 10681 }, { "epoch": 0.846266587443058, "grad_norm": 1.1542581593233001, "learning_rate": 1.213696763099832e-06, "loss": 0.187, "step": 10682 }, { "epoch": 0.8463458110516934, "grad_norm": 1.2747255009741134, "learning_rate": 1.2124717654986695e-06, "loss": 0.1758, "step": 10683 }, { "epoch": 0.8464250346603288, "grad_norm": 1.6450273815709602, "learning_rate": 1.2112473465139586e-06, "loss": 0.3133, "step": 10684 }, { "epoch": 0.8465042582689641, "grad_norm": 1.7697703806848302, "learning_rate": 1.210023506226321e-06, "loss": 0.2668, "step": 10685 }, { "epoch": 0.8465834818775996, "grad_norm": 1.4710091514712196, "learning_rate": 1.2088002447163383e-06, "loss": 0.2547, "step": 10686 }, { "epoch": 0.8466627054862349, "grad_norm": 1.2707976982564826, "learning_rate": 1.2075775620645613e-06, "loss": 0.1906, "step": 10687 }, { "epoch": 0.8467419290948702, "grad_norm": 1.3854969085595397, "learning_rate": 1.2063554583514947e-06, "loss": 0.2446, "step": 10688 }, { "epoch": 0.8468211527035057, "grad_norm": 1.1481183761858849, "learning_rate": 1.2051339336576074e-06, "loss": 0.1665, "step": 10689 }, { "epoch": 0.846900376312141, "grad_norm": 1.2568425692457097, "learning_rate": 1.203912988063335e-06, "loss": 0.2257, "step": 10690 }, { "epoch": 0.8469795999207764, "grad_norm": 1.4102680321309617, "learning_rate": 1.2026926216490675e-06, "loss": 0.2875, "step": 10691 }, { "epoch": 0.8470588235294118, "grad_norm": 1.339973681016876, "learning_rate": 1.2014728344951587e-06, "loss": 0.2373, "step": 10692 }, { "epoch": 0.8471380471380472, "grad_norm": 1.1499839158549152, "learning_rate": 1.2002536266819309e-06, "loss": 0.1823, "step": 10693 }, { "epoch": 0.8472172707466825, "grad_norm": 1.794664091498243, "learning_rate": 1.1990349982896598e-06, "loss": 0.3417, "step": 10694 }, { "epoch": 0.8472964943553178, "grad_norm": 1.1466486229571127, "learning_rate": 1.1978169493985836e-06, "loss": 0.2294, "step": 10695 }, { "epoch": 0.8473757179639533, "grad_norm": 1.1792678650295658, "learning_rate": 1.1965994800889113e-06, "loss": 0.2056, "step": 10696 }, { "epoch": 0.8474549415725886, "grad_norm": 1.4172078551720109, "learning_rate": 1.1953825904408033e-06, "loss": 0.2114, "step": 10697 }, { "epoch": 0.847534165181224, "grad_norm": 1.557836188046826, "learning_rate": 1.1941662805343846e-06, "loss": 0.2279, "step": 10698 }, { "epoch": 0.8476133887898594, "grad_norm": 1.1792012710797648, "learning_rate": 1.1929505504497464e-06, "loss": 0.2183, "step": 10699 }, { "epoch": 0.8476926123984948, "grad_norm": 1.6411063481845027, "learning_rate": 1.191735400266939e-06, "loss": 0.2786, "step": 10700 }, { "epoch": 0.8477718360071301, "grad_norm": 1.6415613835517755, "learning_rate": 1.190520830065972e-06, "loss": 0.2377, "step": 10701 }, { "epoch": 0.8478510596157655, "grad_norm": 1.0543882664512312, "learning_rate": 1.189306839926818e-06, "loss": 0.1523, "step": 10702 }, { "epoch": 0.8479302832244009, "grad_norm": 1.1176252181289879, "learning_rate": 1.1880934299294167e-06, "loss": 0.1836, "step": 10703 }, { "epoch": 0.8480095068330362, "grad_norm": 1.3566949125195122, "learning_rate": 1.1868806001536625e-06, "loss": 0.2121, "step": 10704 }, { "epoch": 0.8480887304416717, "grad_norm": 1.1986825866417639, "learning_rate": 1.185668350679413e-06, "loss": 0.1572, "step": 10705 }, { "epoch": 0.848167954050307, "grad_norm": 1.3463912639868627, "learning_rate": 1.1844566815864921e-06, "loss": 0.1892, "step": 10706 }, { "epoch": 0.8482471776589424, "grad_norm": 1.0282939041168102, "learning_rate": 1.1832455929546827e-06, "loss": 0.1428, "step": 10707 }, { "epoch": 0.8483264012675777, "grad_norm": 1.469974731763603, "learning_rate": 1.182035084863724e-06, "loss": 0.2123, "step": 10708 }, { "epoch": 0.8484056248762131, "grad_norm": 1.2432520620284864, "learning_rate": 1.1808251573933272e-06, "loss": 0.2121, "step": 10709 }, { "epoch": 0.8484848484848485, "grad_norm": 1.3296086953019453, "learning_rate": 1.1796158106231603e-06, "loss": 0.2026, "step": 10710 }, { "epoch": 0.8485640720934838, "grad_norm": 1.020753444595949, "learning_rate": 1.1784070446328477e-06, "loss": 0.132, "step": 10711 }, { "epoch": 0.8486432957021193, "grad_norm": 1.5155898658947995, "learning_rate": 1.177198859501989e-06, "loss": 0.2387, "step": 10712 }, { "epoch": 0.8487225193107546, "grad_norm": 1.3879226373955473, "learning_rate": 1.1759912553101316e-06, "loss": 0.2754, "step": 10713 }, { "epoch": 0.8488017429193899, "grad_norm": 1.0574272218745024, "learning_rate": 1.1747842321367886e-06, "loss": 0.1898, "step": 10714 }, { "epoch": 0.8488809665280254, "grad_norm": 1.0685039234016367, "learning_rate": 1.173577790061442e-06, "loss": 0.2158, "step": 10715 }, { "epoch": 0.8489601901366607, "grad_norm": 1.4135611138027584, "learning_rate": 1.1723719291635272e-06, "loss": 0.2401, "step": 10716 }, { "epoch": 0.8490394137452961, "grad_norm": 1.5241627760829384, "learning_rate": 1.171166649522444e-06, "loss": 0.2714, "step": 10717 }, { "epoch": 0.8491186373539314, "grad_norm": 1.3776374019313784, "learning_rate": 1.1699619512175563e-06, "loss": 0.2293, "step": 10718 }, { "epoch": 0.8491978609625669, "grad_norm": 1.3711509282708116, "learning_rate": 1.168757834328188e-06, "loss": 0.2008, "step": 10719 }, { "epoch": 0.8492770845712022, "grad_norm": 1.858661881400827, "learning_rate": 1.1675542989336208e-06, "loss": 0.2804, "step": 10720 }, { "epoch": 0.8493563081798375, "grad_norm": 1.3261155102879394, "learning_rate": 1.1663513451131047e-06, "loss": 0.2105, "step": 10721 }, { "epoch": 0.849435531788473, "grad_norm": 1.1569439477608126, "learning_rate": 1.1651489729458487e-06, "loss": 0.1522, "step": 10722 }, { "epoch": 0.8495147553971083, "grad_norm": 1.1642124932773206, "learning_rate": 1.1639471825110205e-06, "loss": 0.1896, "step": 10723 }, { "epoch": 0.8495939790057437, "grad_norm": 1.3718722293412, "learning_rate": 1.1627459738877557e-06, "loss": 0.2307, "step": 10724 }, { "epoch": 0.8496732026143791, "grad_norm": 1.4593997512460624, "learning_rate": 1.1615453471551462e-06, "loss": 0.2057, "step": 10725 }, { "epoch": 0.8497524262230145, "grad_norm": 1.2000380410783604, "learning_rate": 1.1603453023922473e-06, "loss": 0.1736, "step": 10726 }, { "epoch": 0.8498316498316498, "grad_norm": 1.6404312788650104, "learning_rate": 1.1591458396780753e-06, "loss": 0.3527, "step": 10727 }, { "epoch": 0.8499108734402852, "grad_norm": 1.2913847806829117, "learning_rate": 1.1579469590916125e-06, "loss": 0.2476, "step": 10728 }, { "epoch": 0.8499900970489206, "grad_norm": 1.3501326816108088, "learning_rate": 1.156748660711796e-06, "loss": 0.2673, "step": 10729 }, { "epoch": 0.8500693206575559, "grad_norm": 1.7066732642213205, "learning_rate": 1.1555509446175284e-06, "loss": 0.3191, "step": 10730 }, { "epoch": 0.8501485442661914, "grad_norm": 1.7246599741825346, "learning_rate": 1.1543538108876751e-06, "loss": 0.3213, "step": 10731 }, { "epoch": 0.8502277678748267, "grad_norm": 1.365497405581261, "learning_rate": 1.153157259601062e-06, "loss": 0.2253, "step": 10732 }, { "epoch": 0.8503069914834621, "grad_norm": 1.3103151829961655, "learning_rate": 1.1519612908364718e-06, "loss": 0.2409, "step": 10733 }, { "epoch": 0.8503862150920974, "grad_norm": 0.9441718809920354, "learning_rate": 1.1507659046726605e-06, "loss": 0.1385, "step": 10734 }, { "epoch": 0.8504654387007328, "grad_norm": 1.4687952641057023, "learning_rate": 1.1495711011883325e-06, "loss": 0.2359, "step": 10735 }, { "epoch": 0.8505446623093682, "grad_norm": 1.4886311133744696, "learning_rate": 1.148376880462161e-06, "loss": 0.1923, "step": 10736 }, { "epoch": 0.8506238859180035, "grad_norm": 1.20044250197672, "learning_rate": 1.1471832425727825e-06, "loss": 0.2101, "step": 10737 }, { "epoch": 0.850703109526639, "grad_norm": 1.6085337752774558, "learning_rate": 1.14599018759879e-06, "loss": 0.2954, "step": 10738 }, { "epoch": 0.8507823331352743, "grad_norm": 1.1242902995263895, "learning_rate": 1.1447977156187395e-06, "loss": 0.1499, "step": 10739 }, { "epoch": 0.8508615567439097, "grad_norm": 1.2822827991080508, "learning_rate": 1.1436058267111527e-06, "loss": 0.192, "step": 10740 }, { "epoch": 0.8509407803525451, "grad_norm": 1.2429910285604242, "learning_rate": 1.1424145209545079e-06, "loss": 0.2398, "step": 10741 }, { "epoch": 0.8510200039611804, "grad_norm": 1.1699639202459249, "learning_rate": 1.1412237984272467e-06, "loss": 0.146, "step": 10742 }, { "epoch": 0.8510992275698158, "grad_norm": 1.3107355181733198, "learning_rate": 1.140033659207771e-06, "loss": 0.1699, "step": 10743 }, { "epoch": 0.8511784511784511, "grad_norm": 1.665860324643064, "learning_rate": 1.1388441033744502e-06, "loss": 0.2261, "step": 10744 }, { "epoch": 0.8512576747870866, "grad_norm": 1.325005151797313, "learning_rate": 1.1376551310056073e-06, "loss": 0.2353, "step": 10745 }, { "epoch": 0.8513368983957219, "grad_norm": 1.1877119154206783, "learning_rate": 1.1364667421795283e-06, "loss": 0.2207, "step": 10746 }, { "epoch": 0.8514161220043573, "grad_norm": 1.1700524083376647, "learning_rate": 1.1352789369744688e-06, "loss": 0.1673, "step": 10747 }, { "epoch": 0.8514953456129927, "grad_norm": 1.3556005380129177, "learning_rate": 1.134091715468636e-06, "loss": 0.2347, "step": 10748 }, { "epoch": 0.851574569221628, "grad_norm": 1.640276486104036, "learning_rate": 1.132905077740203e-06, "loss": 0.2507, "step": 10749 }, { "epoch": 0.8516537928302634, "grad_norm": 1.938001337935845, "learning_rate": 1.131719023867306e-06, "loss": 0.2857, "step": 10750 }, { "epoch": 0.8517330164388988, "grad_norm": 1.346372329412587, "learning_rate": 1.1305335539280392e-06, "loss": 0.2302, "step": 10751 }, { "epoch": 0.8518122400475342, "grad_norm": 1.209952336573794, "learning_rate": 1.1293486680004607e-06, "loss": 0.1839, "step": 10752 }, { "epoch": 0.8518914636561695, "grad_norm": 1.5534520465790997, "learning_rate": 1.1281643661625896e-06, "loss": 0.2731, "step": 10753 }, { "epoch": 0.851970687264805, "grad_norm": 1.4358241634171365, "learning_rate": 1.1269806484924072e-06, "loss": 0.2255, "step": 10754 }, { "epoch": 0.8520499108734403, "grad_norm": 1.6615862166193753, "learning_rate": 1.1257975150678557e-06, "loss": 0.2497, "step": 10755 }, { "epoch": 0.8521291344820756, "grad_norm": 1.6042851753083125, "learning_rate": 1.124614965966835e-06, "loss": 0.2753, "step": 10756 }, { "epoch": 0.852208358090711, "grad_norm": 1.1296527761233508, "learning_rate": 1.1234330012672146e-06, "loss": 0.1435, "step": 10757 }, { "epoch": 0.8522875816993464, "grad_norm": 1.4188548771779212, "learning_rate": 1.1222516210468204e-06, "loss": 0.2143, "step": 10758 }, { "epoch": 0.8523668053079818, "grad_norm": 1.212223472158802, "learning_rate": 1.121070825383438e-06, "loss": 0.1516, "step": 10759 }, { "epoch": 0.8524460289166171, "grad_norm": 1.1545776919341748, "learning_rate": 1.1198906143548216e-06, "loss": 0.1561, "step": 10760 }, { "epoch": 0.8525252525252526, "grad_norm": 1.1155731830344275, "learning_rate": 1.1187109880386794e-06, "loss": 0.1724, "step": 10761 }, { "epoch": 0.8526044761338879, "grad_norm": 1.2005002629477377, "learning_rate": 1.117531946512682e-06, "loss": 0.1787, "step": 10762 }, { "epoch": 0.8526836997425232, "grad_norm": 1.2581184976662154, "learning_rate": 1.1163534898544692e-06, "loss": 0.2097, "step": 10763 }, { "epoch": 0.8527629233511587, "grad_norm": 1.1736652767290678, "learning_rate": 1.1151756181416328e-06, "loss": 0.1914, "step": 10764 }, { "epoch": 0.852842146959794, "grad_norm": 1.3530846440988304, "learning_rate": 1.1139983314517288e-06, "loss": 0.1443, "step": 10765 }, { "epoch": 0.8529213705684294, "grad_norm": 1.096651598235833, "learning_rate": 1.1128216298622808e-06, "loss": 0.1463, "step": 10766 }, { "epoch": 0.8530005941770648, "grad_norm": 1.2058003063193818, "learning_rate": 1.1116455134507665e-06, "loss": 0.1839, "step": 10767 }, { "epoch": 0.8530798177857002, "grad_norm": 1.813538853319796, "learning_rate": 1.110469982294624e-06, "loss": 0.2721, "step": 10768 }, { "epoch": 0.8531590413943355, "grad_norm": 1.1984717591463547, "learning_rate": 1.1092950364712617e-06, "loss": 0.1572, "step": 10769 }, { "epoch": 0.8532382650029708, "grad_norm": 1.5130929658460166, "learning_rate": 1.1081206760580422e-06, "loss": 0.2544, "step": 10770 }, { "epoch": 0.8533174886116063, "grad_norm": 1.32466074032802, "learning_rate": 1.1069469011322908e-06, "loss": 0.1893, "step": 10771 }, { "epoch": 0.8533967122202416, "grad_norm": 1.2814813183187004, "learning_rate": 1.1057737117712941e-06, "loss": 0.239, "step": 10772 }, { "epoch": 0.853475935828877, "grad_norm": 1.187383699768313, "learning_rate": 1.1046011080523034e-06, "loss": 0.1888, "step": 10773 }, { "epoch": 0.8535551594375124, "grad_norm": 1.3382170301882963, "learning_rate": 1.1034290900525279e-06, "loss": 0.2381, "step": 10774 }, { "epoch": 0.8536343830461478, "grad_norm": 1.0991829217487112, "learning_rate": 1.1022576578491372e-06, "loss": 0.1784, "step": 10775 }, { "epoch": 0.8537136066547831, "grad_norm": 1.3932733986841541, "learning_rate": 1.1010868115192696e-06, "loss": 0.2344, "step": 10776 }, { "epoch": 0.8537928302634185, "grad_norm": 1.259474974714991, "learning_rate": 1.0999165511400157e-06, "loss": 0.1932, "step": 10777 }, { "epoch": 0.8538720538720539, "grad_norm": 1.3768474669474018, "learning_rate": 1.09874687678843e-06, "loss": 0.1795, "step": 10778 }, { "epoch": 0.8539512774806892, "grad_norm": 1.5358201494201102, "learning_rate": 1.097577788541535e-06, "loss": 0.266, "step": 10779 }, { "epoch": 0.8540305010893247, "grad_norm": 1.4309717707934504, "learning_rate": 1.0964092864763065e-06, "loss": 0.2343, "step": 10780 }, { "epoch": 0.85410972469796, "grad_norm": 1.558988023293492, "learning_rate": 1.095241370669684e-06, "loss": 0.2719, "step": 10781 }, { "epoch": 0.8541889483065954, "grad_norm": 0.9901205071029419, "learning_rate": 1.0940740411985718e-06, "loss": 0.155, "step": 10782 }, { "epoch": 0.8542681719152307, "grad_norm": 1.2218146976745325, "learning_rate": 1.0929072981398313e-06, "loss": 0.1641, "step": 10783 }, { "epoch": 0.8543473955238661, "grad_norm": 1.6441616583651038, "learning_rate": 1.091741141570285e-06, "loss": 0.2327, "step": 10784 }, { "epoch": 0.8544266191325015, "grad_norm": 1.324895351116022, "learning_rate": 1.0905755715667222e-06, "loss": 0.2386, "step": 10785 }, { "epoch": 0.8545058427411368, "grad_norm": 1.5434456112929702, "learning_rate": 1.0894105882058891e-06, "loss": 0.2321, "step": 10786 }, { "epoch": 0.8545850663497723, "grad_norm": 1.4569959671375003, "learning_rate": 1.0882461915644936e-06, "loss": 0.187, "step": 10787 }, { "epoch": 0.8546642899584076, "grad_norm": 1.6230809011020064, "learning_rate": 1.0870823817192045e-06, "loss": 0.3151, "step": 10788 }, { "epoch": 0.854743513567043, "grad_norm": 1.159258523030389, "learning_rate": 1.0859191587466556e-06, "loss": 0.1643, "step": 10789 }, { "epoch": 0.8548227371756784, "grad_norm": 1.3520949967532052, "learning_rate": 1.0847565227234392e-06, "loss": 0.214, "step": 10790 }, { "epoch": 0.8549019607843137, "grad_norm": 1.308914513355494, "learning_rate": 1.0835944737261072e-06, "loss": 0.2241, "step": 10791 }, { "epoch": 0.8549811843929491, "grad_norm": 1.3050524273336934, "learning_rate": 1.0824330118311765e-06, "loss": 0.2001, "step": 10792 }, { "epoch": 0.8550604080015844, "grad_norm": 1.3552041608815961, "learning_rate": 1.0812721371151213e-06, "loss": 0.212, "step": 10793 }, { "epoch": 0.8551396316102199, "grad_norm": 1.0439392340847686, "learning_rate": 1.080111849654384e-06, "loss": 0.1531, "step": 10794 }, { "epoch": 0.8552188552188552, "grad_norm": 1.2531789351974598, "learning_rate": 1.078952149525362e-06, "loss": 0.193, "step": 10795 }, { "epoch": 0.8552980788274905, "grad_norm": 1.1245551316692843, "learning_rate": 1.0777930368044143e-06, "loss": 0.1734, "step": 10796 }, { "epoch": 0.855377302436126, "grad_norm": 1.4592722183116267, "learning_rate": 1.0766345115678633e-06, "loss": 0.2249, "step": 10797 }, { "epoch": 0.8554565260447613, "grad_norm": 1.3809091395215236, "learning_rate": 1.0754765738919947e-06, "loss": 0.2493, "step": 10798 }, { "epoch": 0.8555357496533967, "grad_norm": 1.3542926354254743, "learning_rate": 1.074319223853052e-06, "loss": 0.2727, "step": 10799 }, { "epoch": 0.8556149732620321, "grad_norm": 1.1583382970650118, "learning_rate": 1.0731624615272385e-06, "loss": 0.1644, "step": 10800 }, { "epoch": 0.8556941968706675, "grad_norm": 1.201177784010263, "learning_rate": 1.0720062869907255e-06, "loss": 0.1814, "step": 10801 }, { "epoch": 0.8557734204793028, "grad_norm": 1.1681444882922902, "learning_rate": 1.07085070031964e-06, "loss": 0.1708, "step": 10802 }, { "epoch": 0.8558526440879382, "grad_norm": 1.3391259673757796, "learning_rate": 1.06969570159007e-06, "loss": 0.2817, "step": 10803 }, { "epoch": 0.8559318676965736, "grad_norm": 1.9584411057971687, "learning_rate": 1.0685412908780702e-06, "loss": 0.3552, "step": 10804 }, { "epoch": 0.8560110913052089, "grad_norm": 1.4923312382295966, "learning_rate": 1.0673874682596497e-06, "loss": 0.321, "step": 10805 }, { "epoch": 0.8560903149138444, "grad_norm": 1.0416914219625322, "learning_rate": 1.0662342338107823e-06, "loss": 0.1369, "step": 10806 }, { "epoch": 0.8561695385224797, "grad_norm": 1.642057393170728, "learning_rate": 1.065081587607406e-06, "loss": 0.3597, "step": 10807 }, { "epoch": 0.8562487621311151, "grad_norm": 1.1766502998251729, "learning_rate": 1.0639295297254149e-06, "loss": 0.1882, "step": 10808 }, { "epoch": 0.8563279857397504, "grad_norm": 1.1971173970989457, "learning_rate": 1.0627780602406656e-06, "loss": 0.2041, "step": 10809 }, { "epoch": 0.8564072093483858, "grad_norm": 1.1447713291366683, "learning_rate": 1.061627179228979e-06, "loss": 0.205, "step": 10810 }, { "epoch": 0.8564864329570212, "grad_norm": 1.6830221584923502, "learning_rate": 1.0604768867661342e-06, "loss": 0.2666, "step": 10811 }, { "epoch": 0.8565656565656565, "grad_norm": 1.5616123161453341, "learning_rate": 1.0593271829278718e-06, "loss": 0.3028, "step": 10812 }, { "epoch": 0.856644880174292, "grad_norm": 1.456097720113925, "learning_rate": 1.0581780677898924e-06, "loss": 0.2219, "step": 10813 }, { "epoch": 0.8567241037829273, "grad_norm": 1.3718421469481186, "learning_rate": 1.0570295414278642e-06, "loss": 0.2132, "step": 10814 }, { "epoch": 0.8568033273915627, "grad_norm": 1.2548612172729923, "learning_rate": 1.0558816039174102e-06, "loss": 0.1891, "step": 10815 }, { "epoch": 0.8568825510001981, "grad_norm": 1.2579132836272071, "learning_rate": 1.0547342553341144e-06, "loss": 0.2169, "step": 10816 }, { "epoch": 0.8569617746088334, "grad_norm": 1.087808205142531, "learning_rate": 1.0535874957535275e-06, "loss": 0.1935, "step": 10817 }, { "epoch": 0.8570409982174688, "grad_norm": 1.3092224128447962, "learning_rate": 1.0524413252511567e-06, "loss": 0.1711, "step": 10818 }, { "epoch": 0.8571202218261041, "grad_norm": 1.239999444578251, "learning_rate": 1.0512957439024697e-06, "loss": 0.2013, "step": 10819 }, { "epoch": 0.8571994454347396, "grad_norm": 1.3046931658884642, "learning_rate": 1.0501507517829012e-06, "loss": 0.185, "step": 10820 }, { "epoch": 0.8572786690433749, "grad_norm": 1.3807128633644161, "learning_rate": 1.0490063489678427e-06, "loss": 0.2016, "step": 10821 }, { "epoch": 0.8573578926520103, "grad_norm": 1.465615348103796, "learning_rate": 1.0478625355326445e-06, "loss": 0.2251, "step": 10822 }, { "epoch": 0.8574371162606457, "grad_norm": 1.117290017974565, "learning_rate": 1.0467193115526254e-06, "loss": 0.1679, "step": 10823 }, { "epoch": 0.857516339869281, "grad_norm": 1.4638689416898418, "learning_rate": 1.0455766771030585e-06, "loss": 0.2718, "step": 10824 }, { "epoch": 0.8575955634779164, "grad_norm": 1.6226504992825306, "learning_rate": 1.0444346322591804e-06, "loss": 0.2856, "step": 10825 }, { "epoch": 0.8576747870865518, "grad_norm": 1.5801297461942128, "learning_rate": 1.0432931770961907e-06, "loss": 0.2234, "step": 10826 }, { "epoch": 0.8577540106951872, "grad_norm": 1.3818127549970014, "learning_rate": 1.0421523116892496e-06, "loss": 0.1918, "step": 10827 }, { "epoch": 0.8578332343038225, "grad_norm": 1.2535384563720011, "learning_rate": 1.0410120361134767e-06, "loss": 0.1995, "step": 10828 }, { "epoch": 0.857912457912458, "grad_norm": 1.4460981075519885, "learning_rate": 1.0398723504439512e-06, "loss": 0.2676, "step": 10829 }, { "epoch": 0.8579916815210933, "grad_norm": 1.5803351343786376, "learning_rate": 1.0387332547557194e-06, "loss": 0.1971, "step": 10830 }, { "epoch": 0.8580709051297286, "grad_norm": 1.4507286026817658, "learning_rate": 1.0375947491237836e-06, "loss": 0.2111, "step": 10831 }, { "epoch": 0.858150128738364, "grad_norm": 1.4267644205594114, "learning_rate": 1.0364568336231085e-06, "loss": 0.247, "step": 10832 }, { "epoch": 0.8582293523469994, "grad_norm": 1.1324892138545073, "learning_rate": 1.0353195083286226e-06, "loss": 0.1267, "step": 10833 }, { "epoch": 0.8583085759556348, "grad_norm": 1.2884812199234947, "learning_rate": 1.034182773315211e-06, "loss": 0.2113, "step": 10834 }, { "epoch": 0.8583877995642701, "grad_norm": 1.439443762761076, "learning_rate": 1.0330466286577224e-06, "loss": 0.2463, "step": 10835 }, { "epoch": 0.8584670231729056, "grad_norm": 1.5279724390550258, "learning_rate": 1.031911074430968e-06, "loss": 0.2062, "step": 10836 }, { "epoch": 0.8585462467815409, "grad_norm": 1.3292679260055595, "learning_rate": 1.030776110709718e-06, "loss": 0.2448, "step": 10837 }, { "epoch": 0.8586254703901762, "grad_norm": 1.2214399069232247, "learning_rate": 1.0296417375687017e-06, "loss": 0.2057, "step": 10838 }, { "epoch": 0.8587046939988117, "grad_norm": 1.1564201755316976, "learning_rate": 1.0285079550826172e-06, "loss": 0.2187, "step": 10839 }, { "epoch": 0.858783917607447, "grad_norm": 1.2406853934459663, "learning_rate": 1.0273747633261144e-06, "loss": 0.1961, "step": 10840 }, { "epoch": 0.8588631412160824, "grad_norm": 1.3732701770371192, "learning_rate": 1.0262421623738105e-06, "loss": 0.2393, "step": 10841 }, { "epoch": 0.8589423648247178, "grad_norm": 1.5730219129167005, "learning_rate": 1.0251101523002805e-06, "loss": 0.1727, "step": 10842 }, { "epoch": 0.8590215884333532, "grad_norm": 1.118679662629517, "learning_rate": 1.0239787331800632e-06, "loss": 0.158, "step": 10843 }, { "epoch": 0.8591008120419885, "grad_norm": 1.1631830075625404, "learning_rate": 1.022847905087656e-06, "loss": 0.1615, "step": 10844 }, { "epoch": 0.8591800356506238, "grad_norm": 1.6087535526755574, "learning_rate": 1.0217176680975183e-06, "loss": 0.2423, "step": 10845 }, { "epoch": 0.8592592592592593, "grad_norm": 1.3370618555695546, "learning_rate": 1.0205880222840726e-06, "loss": 0.2383, "step": 10846 }, { "epoch": 0.8593384828678946, "grad_norm": 1.3318962256469504, "learning_rate": 1.0194589677216992e-06, "loss": 0.185, "step": 10847 }, { "epoch": 0.85941770647653, "grad_norm": 1.692345563064196, "learning_rate": 1.0183305044847402e-06, "loss": 0.2175, "step": 10848 }, { "epoch": 0.8594969300851654, "grad_norm": 1.2978267033185473, "learning_rate": 1.0172026326475016e-06, "loss": 0.2141, "step": 10849 }, { "epoch": 0.8595761536938008, "grad_norm": 1.2196388175762076, "learning_rate": 1.0160753522842482e-06, "loss": 0.231, "step": 10850 }, { "epoch": 0.8596553773024361, "grad_norm": 1.6921766287279663, "learning_rate": 1.0149486634692019e-06, "loss": 0.3379, "step": 10851 }, { "epoch": 0.8597346009110715, "grad_norm": 1.47423904976091, "learning_rate": 1.0138225662765555e-06, "loss": 0.2367, "step": 10852 }, { "epoch": 0.8598138245197069, "grad_norm": 1.7374752846928612, "learning_rate": 1.0126970607804532e-06, "loss": 0.1667, "step": 10853 }, { "epoch": 0.8598930481283422, "grad_norm": 1.0170818877706014, "learning_rate": 1.0115721470550045e-06, "loss": 0.1631, "step": 10854 }, { "epoch": 0.8599722717369777, "grad_norm": 1.5170281319712673, "learning_rate": 1.0104478251742822e-06, "loss": 0.2714, "step": 10855 }, { "epoch": 0.860051495345613, "grad_norm": 1.5913677264856463, "learning_rate": 1.009324095212315e-06, "loss": 0.2403, "step": 10856 }, { "epoch": 0.8601307189542484, "grad_norm": 1.3704028689403447, "learning_rate": 1.0082009572430963e-06, "loss": 0.2547, "step": 10857 }, { "epoch": 0.8602099425628837, "grad_norm": 1.4796370476274001, "learning_rate": 1.0070784113405763e-06, "loss": 0.2507, "step": 10858 }, { "epoch": 0.8602891661715191, "grad_norm": 1.3055146840303782, "learning_rate": 1.005956457578675e-06, "loss": 0.1884, "step": 10859 }, { "epoch": 0.8603683897801545, "grad_norm": 1.2035464800675986, "learning_rate": 1.0048350960312637e-06, "loss": 0.1684, "step": 10860 }, { "epoch": 0.8604476133887898, "grad_norm": 1.1532555328066032, "learning_rate": 1.003714326772176e-06, "loss": 0.1463, "step": 10861 }, { "epoch": 0.8605268369974253, "grad_norm": 1.3399555225281417, "learning_rate": 1.0025941498752167e-06, "loss": 0.2066, "step": 10862 }, { "epoch": 0.8606060606060606, "grad_norm": 1.6773949674236435, "learning_rate": 1.001474565414139e-06, "loss": 0.2937, "step": 10863 }, { "epoch": 0.860685284214696, "grad_norm": 0.9738684746189048, "learning_rate": 1.0003555734626603e-06, "loss": 0.1537, "step": 10864 }, { "epoch": 0.8607645078233314, "grad_norm": 1.1208106012646089, "learning_rate": 9.992371740944663e-07, "loss": 0.1972, "step": 10865 }, { "epoch": 0.8608437314319667, "grad_norm": 1.0273624656252895, "learning_rate": 9.981193673831946e-07, "loss": 0.1622, "step": 10866 }, { "epoch": 0.8609229550406021, "grad_norm": 1.6657120843540014, "learning_rate": 9.970021534024476e-07, "loss": 0.2752, "step": 10867 }, { "epoch": 0.8610021786492374, "grad_norm": 1.1879822947993006, "learning_rate": 9.958855322257922e-07, "loss": 0.2283, "step": 10868 }, { "epoch": 0.8610814022578729, "grad_norm": 1.41161644193935, "learning_rate": 9.94769503926748e-07, "loss": 0.2205, "step": 10869 }, { "epoch": 0.8611606258665082, "grad_norm": 0.9753072853180768, "learning_rate": 9.936540685787998e-07, "loss": 0.1499, "step": 10870 }, { "epoch": 0.8612398494751435, "grad_norm": 1.3430045506099497, "learning_rate": 9.925392262553968e-07, "loss": 0.2134, "step": 10871 }, { "epoch": 0.861319073083779, "grad_norm": 1.319930473486728, "learning_rate": 9.914249770299445e-07, "loss": 0.1776, "step": 10872 }, { "epoch": 0.8613982966924143, "grad_norm": 1.1837601979409187, "learning_rate": 9.903113209758098e-07, "loss": 0.2013, "step": 10873 }, { "epoch": 0.8614775203010497, "grad_norm": 1.1854408675832382, "learning_rate": 9.89198258166324e-07, "loss": 0.2166, "step": 10874 }, { "epoch": 0.8615567439096851, "grad_norm": 1.6716272760210298, "learning_rate": 9.880857886747753e-07, "loss": 0.2477, "step": 10875 }, { "epoch": 0.8616359675183205, "grad_norm": 1.7497373625726915, "learning_rate": 9.869739125744138e-07, "loss": 0.2253, "step": 10876 }, { "epoch": 0.8617151911269558, "grad_norm": 1.3116517658669882, "learning_rate": 9.858626299384532e-07, "loss": 0.193, "step": 10877 }, { "epoch": 0.8617944147355912, "grad_norm": 1.3303498927127198, "learning_rate": 9.847519408400663e-07, "loss": 0.1933, "step": 10878 }, { "epoch": 0.8618736383442266, "grad_norm": 1.134190258981955, "learning_rate": 9.836418453523833e-07, "loss": 0.1556, "step": 10879 }, { "epoch": 0.8619528619528619, "grad_norm": 1.2408127782636982, "learning_rate": 9.825323435485024e-07, "loss": 0.1865, "step": 10880 }, { "epoch": 0.8620320855614974, "grad_norm": 1.1714285081219094, "learning_rate": 9.814234355014774e-07, "loss": 0.2211, "step": 10881 }, { "epoch": 0.8621113091701327, "grad_norm": 1.3132939582467724, "learning_rate": 9.803151212843253e-07, "loss": 0.2494, "step": 10882 }, { "epoch": 0.8621905327787681, "grad_norm": 1.1852329333289056, "learning_rate": 9.792074009700192e-07, "loss": 0.1878, "step": 10883 }, { "epoch": 0.8622697563874034, "grad_norm": 1.168486320282818, "learning_rate": 9.781002746315039e-07, "loss": 0.218, "step": 10884 }, { "epoch": 0.8623489799960388, "grad_norm": 1.2109875371880268, "learning_rate": 9.769937423416741e-07, "loss": 0.1728, "step": 10885 }, { "epoch": 0.8624282036046742, "grad_norm": 1.543794392817461, "learning_rate": 9.758878041733877e-07, "loss": 0.235, "step": 10886 }, { "epoch": 0.8625074272133095, "grad_norm": 1.2262341573831652, "learning_rate": 9.747824601994715e-07, "loss": 0.1972, "step": 10887 }, { "epoch": 0.862586650821945, "grad_norm": 1.5004806187741724, "learning_rate": 9.73677710492703e-07, "loss": 0.247, "step": 10888 }, { "epoch": 0.8626658744305803, "grad_norm": 1.1547087878482098, "learning_rate": 9.725735551258241e-07, "loss": 0.143, "step": 10889 }, { "epoch": 0.8627450980392157, "grad_norm": 1.4221493170959851, "learning_rate": 9.7146999417154e-07, "loss": 0.2801, "step": 10890 }, { "epoch": 0.8628243216478511, "grad_norm": 1.3623810348968333, "learning_rate": 9.703670277025158e-07, "loss": 0.2082, "step": 10891 }, { "epoch": 0.8629035452564864, "grad_norm": 1.2665455746022487, "learning_rate": 9.69264655791372e-07, "loss": 0.1748, "step": 10892 }, { "epoch": 0.8629827688651218, "grad_norm": 1.3590893187993502, "learning_rate": 9.681628785107e-07, "loss": 0.1834, "step": 10893 }, { "epoch": 0.8630619924737571, "grad_norm": 1.5082112304555808, "learning_rate": 9.670616959330437e-07, "loss": 0.259, "step": 10894 }, { "epoch": 0.8631412160823926, "grad_norm": 1.3962149660803789, "learning_rate": 9.659611081309095e-07, "loss": 0.2243, "step": 10895 }, { "epoch": 0.8632204396910279, "grad_norm": 1.240801038383661, "learning_rate": 9.648611151767683e-07, "loss": 0.1844, "step": 10896 }, { "epoch": 0.8632996632996633, "grad_norm": 1.2873919308560537, "learning_rate": 9.637617171430492e-07, "loss": 0.1974, "step": 10897 }, { "epoch": 0.8633788869082987, "grad_norm": 1.2778640224426707, "learning_rate": 9.626629141021414e-07, "loss": 0.2188, "step": 10898 }, { "epoch": 0.863458110516934, "grad_norm": 1.606348585382887, "learning_rate": 9.615647061263933e-07, "loss": 0.2703, "step": 10899 }, { "epoch": 0.8635373341255694, "grad_norm": 1.3798048352331447, "learning_rate": 9.604670932881211e-07, "loss": 0.2375, "step": 10900 }, { "epoch": 0.8636165577342048, "grad_norm": 1.211770339108247, "learning_rate": 9.593700756595958e-07, "loss": 0.1788, "step": 10901 }, { "epoch": 0.8636957813428402, "grad_norm": 1.0797022505941447, "learning_rate": 9.582736533130488e-07, "loss": 0.167, "step": 10902 }, { "epoch": 0.8637750049514755, "grad_norm": 1.4780619163654718, "learning_rate": 9.571778263206767e-07, "loss": 0.2458, "step": 10903 }, { "epoch": 0.863854228560111, "grad_norm": 1.043049188107328, "learning_rate": 9.560825947546337e-07, "loss": 0.1337, "step": 10904 }, { "epoch": 0.8639334521687463, "grad_norm": 1.2947803000906086, "learning_rate": 9.549879586870336e-07, "loss": 0.185, "step": 10905 }, { "epoch": 0.8640126757773816, "grad_norm": 1.5344780614419649, "learning_rate": 9.538939181899565e-07, "loss": 0.2566, "step": 10906 }, { "epoch": 0.864091899386017, "grad_norm": 1.5942962347850829, "learning_rate": 9.528004733354379e-07, "loss": 0.3098, "step": 10907 }, { "epoch": 0.8641711229946524, "grad_norm": 1.326237347161247, "learning_rate": 9.517076241954737e-07, "loss": 0.211, "step": 10908 }, { "epoch": 0.8642503466032878, "grad_norm": 1.356001450969405, "learning_rate": 9.506153708420263e-07, "loss": 0.2195, "step": 10909 }, { "epoch": 0.8643295702119231, "grad_norm": 1.6521792743959813, "learning_rate": 9.495237133470148e-07, "loss": 0.2301, "step": 10910 }, { "epoch": 0.8644087938205586, "grad_norm": 1.5180080303497498, "learning_rate": 9.484326517823173e-07, "loss": 0.1946, "step": 10911 }, { "epoch": 0.8644880174291939, "grad_norm": 1.1322782186896199, "learning_rate": 9.473421862197751e-07, "loss": 0.1524, "step": 10912 }, { "epoch": 0.8645672410378292, "grad_norm": 1.4243448995952706, "learning_rate": 9.462523167311943e-07, "loss": 0.1872, "step": 10913 }, { "epoch": 0.8646464646464647, "grad_norm": 1.1732463340075379, "learning_rate": 9.45163043388333e-07, "loss": 0.207, "step": 10914 }, { "epoch": 0.8647256882551, "grad_norm": 1.2158997976338526, "learning_rate": 9.440743662629149e-07, "loss": 0.1815, "step": 10915 }, { "epoch": 0.8648049118637354, "grad_norm": 1.3248290534335512, "learning_rate": 9.429862854266281e-07, "loss": 0.2098, "step": 10916 }, { "epoch": 0.8648841354723708, "grad_norm": 1.2034546424848298, "learning_rate": 9.418988009511143e-07, "loss": 0.2353, "step": 10917 }, { "epoch": 0.8649633590810062, "grad_norm": 1.2895565116565588, "learning_rate": 9.408119129079774e-07, "loss": 0.183, "step": 10918 }, { "epoch": 0.8650425826896415, "grad_norm": 1.554008069256629, "learning_rate": 9.397256213687877e-07, "loss": 0.2293, "step": 10919 }, { "epoch": 0.8651218062982768, "grad_norm": 1.2394962198928428, "learning_rate": 9.386399264050705e-07, "loss": 0.2105, "step": 10920 }, { "epoch": 0.8652010299069123, "grad_norm": 1.339429508837962, "learning_rate": 9.375548280883129e-07, "loss": 0.2375, "step": 10921 }, { "epoch": 0.8652802535155476, "grad_norm": 0.9124102492944345, "learning_rate": 9.364703264899655e-07, "loss": 0.1065, "step": 10922 }, { "epoch": 0.865359477124183, "grad_norm": 1.1151787840672538, "learning_rate": 9.353864216814356e-07, "loss": 0.1661, "step": 10923 }, { "epoch": 0.8654387007328184, "grad_norm": 1.4177055426984204, "learning_rate": 9.34303113734093e-07, "loss": 0.2484, "step": 10924 }, { "epoch": 0.8655179243414538, "grad_norm": 1.5578039263984262, "learning_rate": 9.332204027192693e-07, "loss": 0.2164, "step": 10925 }, { "epoch": 0.8655971479500891, "grad_norm": 1.6193992303596956, "learning_rate": 9.321382887082564e-07, "loss": 0.2751, "step": 10926 }, { "epoch": 0.8656763715587245, "grad_norm": 1.1964422362871079, "learning_rate": 9.310567717723063e-07, "loss": 0.1943, "step": 10927 }, { "epoch": 0.8657555951673599, "grad_norm": 1.6999540086970655, "learning_rate": 9.299758519826274e-07, "loss": 0.2348, "step": 10928 }, { "epoch": 0.8658348187759952, "grad_norm": 1.5117679335126262, "learning_rate": 9.288955294103996e-07, "loss": 0.2438, "step": 10929 }, { "epoch": 0.8659140423846307, "grad_norm": 1.1995615499375527, "learning_rate": 9.278158041267526e-07, "loss": 0.1496, "step": 10930 }, { "epoch": 0.865993265993266, "grad_norm": 1.1587269927524892, "learning_rate": 9.267366762027818e-07, "loss": 0.1585, "step": 10931 }, { "epoch": 0.8660724896019014, "grad_norm": 0.9650094265975712, "learning_rate": 9.256581457095437e-07, "loss": 0.1245, "step": 10932 }, { "epoch": 0.8661517132105367, "grad_norm": 1.380043514146755, "learning_rate": 9.245802127180547e-07, "loss": 0.1904, "step": 10933 }, { "epoch": 0.8662309368191721, "grad_norm": 1.5037731371741119, "learning_rate": 9.235028772992883e-07, "loss": 0.285, "step": 10934 }, { "epoch": 0.8663101604278075, "grad_norm": 1.4442399337657974, "learning_rate": 9.224261395241862e-07, "loss": 0.1578, "step": 10935 }, { "epoch": 0.8663893840364428, "grad_norm": 1.1398482578251599, "learning_rate": 9.213499994636443e-07, "loss": 0.1862, "step": 10936 }, { "epoch": 0.8664686076450783, "grad_norm": 1.2647795753196418, "learning_rate": 9.202744571885191e-07, "loss": 0.2002, "step": 10937 }, { "epoch": 0.8665478312537136, "grad_norm": 1.1883335094521148, "learning_rate": 9.19199512769634e-07, "loss": 0.2333, "step": 10938 }, { "epoch": 0.866627054862349, "grad_norm": 1.1945691477103177, "learning_rate": 9.181251662777668e-07, "loss": 0.2093, "step": 10939 }, { "epoch": 0.8667062784709844, "grad_norm": 1.1913456661792743, "learning_rate": 9.170514177836565e-07, "loss": 0.1837, "step": 10940 }, { "epoch": 0.8667855020796197, "grad_norm": 1.2243469031713343, "learning_rate": 9.159782673580075e-07, "loss": 0.118, "step": 10941 }, { "epoch": 0.8668647256882551, "grad_norm": 1.2756906533400505, "learning_rate": 9.149057150714802e-07, "loss": 0.212, "step": 10942 }, { "epoch": 0.8669439492968904, "grad_norm": 1.1134171618131126, "learning_rate": 9.138337609946979e-07, "loss": 0.2144, "step": 10943 }, { "epoch": 0.8670231729055259, "grad_norm": 1.4768432506185165, "learning_rate": 9.127624051982398e-07, "loss": 0.2339, "step": 10944 }, { "epoch": 0.8671023965141612, "grad_norm": 1.263897220311466, "learning_rate": 9.116916477526539e-07, "loss": 0.1851, "step": 10945 }, { "epoch": 0.8671816201227966, "grad_norm": 1.5791891811291119, "learning_rate": 9.106214887284437e-07, "loss": 0.2852, "step": 10946 }, { "epoch": 0.867260843731432, "grad_norm": 1.2386945625987698, "learning_rate": 9.095519281960729e-07, "loss": 0.2138, "step": 10947 }, { "epoch": 0.8673400673400673, "grad_norm": 1.3290999891055273, "learning_rate": 9.084829662259665e-07, "loss": 0.2024, "step": 10948 }, { "epoch": 0.8674192909487027, "grad_norm": 1.861921203918453, "learning_rate": 9.0741460288851e-07, "loss": 0.342, "step": 10949 }, { "epoch": 0.8674985145573381, "grad_norm": 1.305999328023627, "learning_rate": 9.06346838254053e-07, "loss": 0.1971, "step": 10950 }, { "epoch": 0.8675777381659735, "grad_norm": 1.2046897929786464, "learning_rate": 9.052796723929002e-07, "loss": 0.1916, "step": 10951 }, { "epoch": 0.8676569617746088, "grad_norm": 1.3205026075550685, "learning_rate": 9.042131053753211e-07, "loss": 0.2048, "step": 10952 }, { "epoch": 0.8677361853832442, "grad_norm": 1.4058997461848455, "learning_rate": 9.031471372715405e-07, "loss": 0.266, "step": 10953 }, { "epoch": 0.8678154089918796, "grad_norm": 1.316529575663865, "learning_rate": 9.020817681517513e-07, "loss": 0.2145, "step": 10954 }, { "epoch": 0.8678946326005149, "grad_norm": 1.2794858968322589, "learning_rate": 9.010169980861005e-07, "loss": 0.207, "step": 10955 }, { "epoch": 0.8679738562091504, "grad_norm": 1.2287307359967574, "learning_rate": 8.999528271446989e-07, "loss": 0.1669, "step": 10956 }, { "epoch": 0.8680530798177857, "grad_norm": 1.0903229590726147, "learning_rate": 8.988892553976169e-07, "loss": 0.1455, "step": 10957 }, { "epoch": 0.8681323034264211, "grad_norm": 1.582330315197623, "learning_rate": 8.978262829148876e-07, "loss": 0.2678, "step": 10958 }, { "epoch": 0.8682115270350564, "grad_norm": 1.5708438063577659, "learning_rate": 8.96763909766497e-07, "loss": 0.2695, "step": 10959 }, { "epoch": 0.8682907506436918, "grad_norm": 1.3148791979664767, "learning_rate": 8.957021360224039e-07, "loss": 0.2388, "step": 10960 }, { "epoch": 0.8683699742523272, "grad_norm": 1.7123619185406738, "learning_rate": 8.946409617525175e-07, "loss": 0.2949, "step": 10961 }, { "epoch": 0.8684491978609625, "grad_norm": 1.4435241513846273, "learning_rate": 8.935803870267101e-07, "loss": 0.2522, "step": 10962 }, { "epoch": 0.868528421469598, "grad_norm": 1.120034147840874, "learning_rate": 8.925204119148189e-07, "loss": 0.1554, "step": 10963 }, { "epoch": 0.8686076450782333, "grad_norm": 1.3501466941465314, "learning_rate": 8.914610364866361e-07, "loss": 0.2862, "step": 10964 }, { "epoch": 0.8686868686868687, "grad_norm": 1.452284692523353, "learning_rate": 8.904022608119145e-07, "loss": 0.2429, "step": 10965 }, { "epoch": 0.868766092295504, "grad_norm": 1.2375972280288476, "learning_rate": 8.89344084960374e-07, "loss": 0.2069, "step": 10966 }, { "epoch": 0.8688453159041394, "grad_norm": 1.2389605328924405, "learning_rate": 8.882865090016868e-07, "loss": 0.1876, "step": 10967 }, { "epoch": 0.8689245395127748, "grad_norm": 1.8458958577789004, "learning_rate": 8.872295330054915e-07, "loss": 0.3134, "step": 10968 }, { "epoch": 0.8690037631214101, "grad_norm": 1.371580727712424, "learning_rate": 8.861731570413801e-07, "loss": 0.225, "step": 10969 }, { "epoch": 0.8690829867300456, "grad_norm": 1.7226551739868343, "learning_rate": 8.85117381178916e-07, "loss": 0.2482, "step": 10970 }, { "epoch": 0.8691622103386809, "grad_norm": 1.2290738992879762, "learning_rate": 8.840622054876147e-07, "loss": 0.1513, "step": 10971 }, { "epoch": 0.8692414339473163, "grad_norm": 1.2683605262144488, "learning_rate": 8.830076300369517e-07, "loss": 0.1634, "step": 10972 }, { "epoch": 0.8693206575559517, "grad_norm": 1.34435966259007, "learning_rate": 8.819536548963703e-07, "loss": 0.1668, "step": 10973 }, { "epoch": 0.869399881164587, "grad_norm": 1.386717789523184, "learning_rate": 8.809002801352673e-07, "loss": 0.1848, "step": 10974 }, { "epoch": 0.8694791047732224, "grad_norm": 0.9654778197765824, "learning_rate": 8.798475058230005e-07, "loss": 0.1142, "step": 10975 }, { "epoch": 0.8695583283818578, "grad_norm": 1.6760387777734673, "learning_rate": 8.787953320288945e-07, "loss": 0.2834, "step": 10976 }, { "epoch": 0.8696375519904932, "grad_norm": 1.3551031098416466, "learning_rate": 8.777437588222271e-07, "loss": 0.2072, "step": 10977 }, { "epoch": 0.8697167755991285, "grad_norm": 1.4616504531889354, "learning_rate": 8.766927862722374e-07, "loss": 0.2546, "step": 10978 }, { "epoch": 0.869795999207764, "grad_norm": 1.3499854907353181, "learning_rate": 8.756424144481313e-07, "loss": 0.2135, "step": 10979 }, { "epoch": 0.8698752228163993, "grad_norm": 1.322560336155635, "learning_rate": 8.745926434190688e-07, "loss": 0.1567, "step": 10980 }, { "epoch": 0.8699544464250346, "grad_norm": 1.4502476581345753, "learning_rate": 8.735434732541704e-07, "loss": 0.2297, "step": 10981 }, { "epoch": 0.87003367003367, "grad_norm": 1.1899944458752492, "learning_rate": 8.724949040225217e-07, "loss": 0.1604, "step": 10982 }, { "epoch": 0.8701128936423054, "grad_norm": 1.001816183839164, "learning_rate": 8.714469357931654e-07, "loss": 0.1413, "step": 10983 }, { "epoch": 0.8701921172509408, "grad_norm": 1.3237821533786718, "learning_rate": 8.703995686351041e-07, "loss": 0.2083, "step": 10984 }, { "epoch": 0.8702713408595761, "grad_norm": 1.4795948710707902, "learning_rate": 8.693528026173015e-07, "loss": 0.2303, "step": 10985 }, { "epoch": 0.8703505644682116, "grad_norm": 1.5103329626783464, "learning_rate": 8.683066378086846e-07, "loss": 0.2453, "step": 10986 }, { "epoch": 0.8704297880768469, "grad_norm": 1.3303880241439068, "learning_rate": 8.672610742781363e-07, "loss": 0.266, "step": 10987 }, { "epoch": 0.8705090116854822, "grad_norm": 1.428034222228885, "learning_rate": 8.662161120945e-07, "loss": 0.2029, "step": 10988 }, { "epoch": 0.8705882352941177, "grad_norm": 1.5814420767899395, "learning_rate": 8.651717513265867e-07, "loss": 0.2601, "step": 10989 }, { "epoch": 0.870667458902753, "grad_norm": 1.2654750184815742, "learning_rate": 8.641279920431589e-07, "loss": 0.2102, "step": 10990 }, { "epoch": 0.8707466825113884, "grad_norm": 1.1956528303822411, "learning_rate": 8.630848343129417e-07, "loss": 0.1529, "step": 10991 }, { "epoch": 0.8708259061200238, "grad_norm": 1.432123432044092, "learning_rate": 8.620422782046268e-07, "loss": 0.2574, "step": 10992 }, { "epoch": 0.8709051297286592, "grad_norm": 1.556229503727342, "learning_rate": 8.61000323786858e-07, "loss": 0.2348, "step": 10993 }, { "epoch": 0.8709843533372945, "grad_norm": 1.536150081819509, "learning_rate": 8.599589711282419e-07, "loss": 0.2741, "step": 10994 }, { "epoch": 0.8710635769459298, "grad_norm": 1.326657506866106, "learning_rate": 8.589182202973512e-07, "loss": 0.1705, "step": 10995 }, { "epoch": 0.8711428005545653, "grad_norm": 1.4198472440024905, "learning_rate": 8.578780713627111e-07, "loss": 0.1942, "step": 10996 }, { "epoch": 0.8712220241632006, "grad_norm": 1.5086303009041653, "learning_rate": 8.568385243928112e-07, "loss": 0.2148, "step": 10997 }, { "epoch": 0.871301247771836, "grad_norm": 1.5810375505062402, "learning_rate": 8.55799579456098e-07, "loss": 0.2061, "step": 10998 }, { "epoch": 0.8713804713804714, "grad_norm": 1.407463848256604, "learning_rate": 8.547612366209856e-07, "loss": 0.2059, "step": 10999 }, { "epoch": 0.8714596949891068, "grad_norm": 1.4260066276032026, "learning_rate": 8.537234959558416e-07, "loss": 0.249, "step": 11000 }, { "epoch": 0.8715389185977421, "grad_norm": 1.2726993720274564, "learning_rate": 8.526863575289945e-07, "loss": 0.1818, "step": 11001 }, { "epoch": 0.8716181422063775, "grad_norm": 1.3746304664130624, "learning_rate": 8.516498214087387e-07, "loss": 0.2604, "step": 11002 }, { "epoch": 0.8716973658150129, "grad_norm": 1.3874968294741086, "learning_rate": 8.50613887663323e-07, "loss": 0.211, "step": 11003 }, { "epoch": 0.8717765894236482, "grad_norm": 1.6073515607700228, "learning_rate": 8.495785563609571e-07, "loss": 0.2727, "step": 11004 }, { "epoch": 0.8718558130322837, "grad_norm": 1.091733494208886, "learning_rate": 8.485438275698154e-07, "loss": 0.1515, "step": 11005 }, { "epoch": 0.871935036640919, "grad_norm": 1.598684502948701, "learning_rate": 8.475097013580292e-07, "loss": 0.3159, "step": 11006 }, { "epoch": 0.8720142602495544, "grad_norm": 1.5456911200046748, "learning_rate": 8.46476177793688e-07, "loss": 0.257, "step": 11007 }, { "epoch": 0.8720934838581897, "grad_norm": 1.4604479451342425, "learning_rate": 8.454432569448489e-07, "loss": 0.2196, "step": 11008 }, { "epoch": 0.8721727074668251, "grad_norm": 1.1876870982750651, "learning_rate": 8.444109388795218e-07, "loss": 0.1993, "step": 11009 }, { "epoch": 0.8722519310754605, "grad_norm": 1.4787841642784216, "learning_rate": 8.43379223665679e-07, "loss": 0.2092, "step": 11010 }, { "epoch": 0.8723311546840958, "grad_norm": 1.8490366852740983, "learning_rate": 8.423481113712573e-07, "loss": 0.2833, "step": 11011 }, { "epoch": 0.8724103782927313, "grad_norm": 1.4103347143508123, "learning_rate": 8.413176020641489e-07, "loss": 0.2437, "step": 11012 }, { "epoch": 0.8724896019013666, "grad_norm": 1.7664250082128186, "learning_rate": 8.402876958122075e-07, "loss": 0.2252, "step": 11013 }, { "epoch": 0.872568825510002, "grad_norm": 1.0938766565396143, "learning_rate": 8.392583926832454e-07, "loss": 0.1693, "step": 11014 }, { "epoch": 0.8726480491186374, "grad_norm": 0.9905668909971334, "learning_rate": 8.382296927450417e-07, "loss": 0.1774, "step": 11015 }, { "epoch": 0.8727272727272727, "grad_norm": 1.233948719998692, "learning_rate": 8.37201596065329e-07, "loss": 0.2105, "step": 11016 }, { "epoch": 0.8728064963359081, "grad_norm": 1.3127308656656749, "learning_rate": 8.361741027118009e-07, "loss": 0.1875, "step": 11017 }, { "epoch": 0.8728857199445434, "grad_norm": 1.232155174580531, "learning_rate": 8.351472127521166e-07, "loss": 0.2242, "step": 11018 }, { "epoch": 0.8729649435531789, "grad_norm": 1.0847082777662558, "learning_rate": 8.341209262538896e-07, "loss": 0.1817, "step": 11019 }, { "epoch": 0.8730441671618142, "grad_norm": 1.1706559251979154, "learning_rate": 8.330952432846939e-07, "loss": 0.1875, "step": 11020 }, { "epoch": 0.8731233907704496, "grad_norm": 1.8639322309976276, "learning_rate": 8.320701639120709e-07, "loss": 0.2501, "step": 11021 }, { "epoch": 0.873202614379085, "grad_norm": 1.2378732219655406, "learning_rate": 8.310456882035145e-07, "loss": 0.2084, "step": 11022 }, { "epoch": 0.8732818379877203, "grad_norm": 1.6296874187400598, "learning_rate": 8.300218162264783e-07, "loss": 0.2419, "step": 11023 }, { "epoch": 0.8733610615963557, "grad_norm": 1.6257912942697956, "learning_rate": 8.289985480483864e-07, "loss": 0.245, "step": 11024 }, { "epoch": 0.8734402852049911, "grad_norm": 1.5562493278331457, "learning_rate": 8.279758837366103e-07, "loss": 0.3024, "step": 11025 }, { "epoch": 0.8735195088136265, "grad_norm": 1.0670702602443827, "learning_rate": 8.269538233584884e-07, "loss": 0.1337, "step": 11026 }, { "epoch": 0.8735987324222618, "grad_norm": 1.197466529482208, "learning_rate": 8.259323669813202e-07, "loss": 0.1636, "step": 11027 }, { "epoch": 0.8736779560308973, "grad_norm": 1.0730691179579321, "learning_rate": 8.24911514672363e-07, "loss": 0.182, "step": 11028 }, { "epoch": 0.8737571796395326, "grad_norm": 1.2043224340468681, "learning_rate": 8.23891266498833e-07, "loss": 0.231, "step": 11029 }, { "epoch": 0.8738364032481679, "grad_norm": 1.2343701467696153, "learning_rate": 8.228716225279121e-07, "loss": 0.1678, "step": 11030 }, { "epoch": 0.8739156268568034, "grad_norm": 1.2574325458080249, "learning_rate": 8.218525828267377e-07, "loss": 0.2006, "step": 11031 }, { "epoch": 0.8739948504654387, "grad_norm": 1.3345528043643402, "learning_rate": 8.208341474624071e-07, "loss": 0.2395, "step": 11032 }, { "epoch": 0.8740740740740741, "grad_norm": 1.5516198060829922, "learning_rate": 8.198163165019812e-07, "loss": 0.2861, "step": 11033 }, { "epoch": 0.8741532976827094, "grad_norm": 1.2017585778556426, "learning_rate": 8.187990900124787e-07, "loss": 0.1844, "step": 11034 }, { "epoch": 0.8742325212913448, "grad_norm": 1.2394218360158256, "learning_rate": 8.177824680608781e-07, "loss": 0.202, "step": 11035 }, { "epoch": 0.8743117448999802, "grad_norm": 1.3411817428891513, "learning_rate": 8.167664507141215e-07, "loss": 0.2764, "step": 11036 }, { "epoch": 0.8743909685086155, "grad_norm": 1.3824592149289336, "learning_rate": 8.157510380391065e-07, "loss": 0.2176, "step": 11037 }, { "epoch": 0.874470192117251, "grad_norm": 1.52509665045312, "learning_rate": 8.14736230102694e-07, "loss": 0.2434, "step": 11038 }, { "epoch": 0.8745494157258863, "grad_norm": 1.3003068545630025, "learning_rate": 8.137220269717028e-07, "loss": 0.1838, "step": 11039 }, { "epoch": 0.8746286393345217, "grad_norm": 1.6021208927992185, "learning_rate": 8.127084287129161e-07, "loss": 0.2531, "step": 11040 }, { "epoch": 0.874707862943157, "grad_norm": 1.8677573615930845, "learning_rate": 8.116954353930728e-07, "loss": 0.302, "step": 11041 }, { "epoch": 0.8747870865517924, "grad_norm": 1.401093329763425, "learning_rate": 8.106830470788729e-07, "loss": 0.1962, "step": 11042 }, { "epoch": 0.8748663101604278, "grad_norm": 1.2534528435668444, "learning_rate": 8.096712638369797e-07, "loss": 0.2002, "step": 11043 }, { "epoch": 0.8749455337690631, "grad_norm": 1.0708341198687092, "learning_rate": 8.086600857340121e-07, "loss": 0.1855, "step": 11044 }, { "epoch": 0.8750247573776986, "grad_norm": 1.1905031722178174, "learning_rate": 8.076495128365502e-07, "loss": 0.1736, "step": 11045 }, { "epoch": 0.8751039809863339, "grad_norm": 1.1261796938267055, "learning_rate": 8.066395452111387e-07, "loss": 0.1873, "step": 11046 }, { "epoch": 0.8751832045949693, "grad_norm": 1.271527684546844, "learning_rate": 8.056301829242785e-07, "loss": 0.2105, "step": 11047 }, { "epoch": 0.8752624282036047, "grad_norm": 1.4334977524564723, "learning_rate": 8.046214260424279e-07, "loss": 0.2136, "step": 11048 }, { "epoch": 0.87534165181224, "grad_norm": 1.1686400051912618, "learning_rate": 8.036132746320125e-07, "loss": 0.1832, "step": 11049 }, { "epoch": 0.8754208754208754, "grad_norm": 1.3117493178899653, "learning_rate": 8.026057287594136e-07, "loss": 0.2429, "step": 11050 }, { "epoch": 0.8755000990295108, "grad_norm": 1.22526825990827, "learning_rate": 8.015987884909692e-07, "loss": 0.2219, "step": 11051 }, { "epoch": 0.8755793226381462, "grad_norm": 1.2637910742561942, "learning_rate": 8.005924538929877e-07, "loss": 0.2317, "step": 11052 }, { "epoch": 0.8756585462467815, "grad_norm": 1.2336321698790642, "learning_rate": 7.99586725031728e-07, "loss": 0.1879, "step": 11053 }, { "epoch": 0.875737769855417, "grad_norm": 1.4825172356737804, "learning_rate": 7.985816019734127e-07, "loss": 0.2367, "step": 11054 }, { "epoch": 0.8758169934640523, "grad_norm": 1.0725833525479378, "learning_rate": 7.975770847842234e-07, "loss": 0.1663, "step": 11055 }, { "epoch": 0.8758962170726876, "grad_norm": 1.4873619822004873, "learning_rate": 7.965731735303051e-07, "loss": 0.2388, "step": 11056 }, { "epoch": 0.875975440681323, "grad_norm": 1.2603667465022275, "learning_rate": 7.955698682777601e-07, "loss": 0.1762, "step": 11057 }, { "epoch": 0.8760546642899584, "grad_norm": 1.155112415391232, "learning_rate": 7.945671690926471e-07, "loss": 0.1318, "step": 11058 }, { "epoch": 0.8761338878985938, "grad_norm": 1.3153989966272523, "learning_rate": 7.935650760409952e-07, "loss": 0.2959, "step": 11059 }, { "epoch": 0.8762131115072291, "grad_norm": 1.3061923356999392, "learning_rate": 7.925635891887839e-07, "loss": 0.2124, "step": 11060 }, { "epoch": 0.8762923351158646, "grad_norm": 1.406229190156455, "learning_rate": 7.915627086019561e-07, "loss": 0.1997, "step": 11061 }, { "epoch": 0.8763715587244999, "grad_norm": 1.4943950331766096, "learning_rate": 7.905624343464169e-07, "loss": 0.209, "step": 11062 }, { "epoch": 0.8764507823331352, "grad_norm": 1.1755314891839619, "learning_rate": 7.895627664880278e-07, "loss": 0.1829, "step": 11063 }, { "epoch": 0.8765300059417707, "grad_norm": 1.049558416720703, "learning_rate": 7.88563705092612e-07, "loss": 0.1286, "step": 11064 }, { "epoch": 0.876609229550406, "grad_norm": 1.156833231199455, "learning_rate": 7.875652502259545e-07, "loss": 0.204, "step": 11065 }, { "epoch": 0.8766884531590414, "grad_norm": 1.1483347128609238, "learning_rate": 7.865674019537983e-07, "loss": 0.1588, "step": 11066 }, { "epoch": 0.8767676767676768, "grad_norm": 1.240745984158369, "learning_rate": 7.855701603418442e-07, "loss": 0.2374, "step": 11067 }, { "epoch": 0.8768469003763122, "grad_norm": 1.126659633137706, "learning_rate": 7.845735254557608e-07, "loss": 0.1643, "step": 11068 }, { "epoch": 0.8769261239849475, "grad_norm": 1.3738534308841979, "learning_rate": 7.835774973611687e-07, "loss": 0.2059, "step": 11069 }, { "epoch": 0.8770053475935828, "grad_norm": 1.7316568053771537, "learning_rate": 7.825820761236514e-07, "loss": 0.2723, "step": 11070 }, { "epoch": 0.8770845712022183, "grad_norm": 1.4668483675071118, "learning_rate": 7.815872618087506e-07, "loss": 0.2109, "step": 11071 }, { "epoch": 0.8771637948108536, "grad_norm": 1.255684583962485, "learning_rate": 7.805930544819751e-07, "loss": 0.2296, "step": 11072 }, { "epoch": 0.877243018419489, "grad_norm": 1.479811680000921, "learning_rate": 7.795994542087859e-07, "loss": 0.2477, "step": 11073 }, { "epoch": 0.8773222420281244, "grad_norm": 1.307791566773147, "learning_rate": 7.786064610546051e-07, "loss": 0.2363, "step": 11074 }, { "epoch": 0.8774014656367598, "grad_norm": 1.4106382168016707, "learning_rate": 7.776140750848205e-07, "loss": 0.2341, "step": 11075 }, { "epoch": 0.8774806892453951, "grad_norm": 1.4019740144926003, "learning_rate": 7.766222963647729e-07, "loss": 0.2185, "step": 11076 }, { "epoch": 0.8775599128540305, "grad_norm": 1.361092548799947, "learning_rate": 7.756311249597659e-07, "loss": 0.1828, "step": 11077 }, { "epoch": 0.8776391364626659, "grad_norm": 1.0306974626227006, "learning_rate": 7.746405609350661e-07, "loss": 0.1323, "step": 11078 }, { "epoch": 0.8777183600713012, "grad_norm": 1.461759794181452, "learning_rate": 7.736506043558956e-07, "loss": 0.2763, "step": 11079 }, { "epoch": 0.8777975836799367, "grad_norm": 1.5478319559655647, "learning_rate": 7.726612552874368e-07, "loss": 0.2292, "step": 11080 }, { "epoch": 0.877876807288572, "grad_norm": 1.3286755972548578, "learning_rate": 7.716725137948366e-07, "loss": 0.1805, "step": 11081 }, { "epoch": 0.8779560308972074, "grad_norm": 1.3462673078352407, "learning_rate": 7.706843799431985e-07, "loss": 0.2273, "step": 11082 }, { "epoch": 0.8780352545058427, "grad_norm": 1.3847279568819344, "learning_rate": 7.696968537975847e-07, "loss": 0.2084, "step": 11083 }, { "epoch": 0.8781144781144781, "grad_norm": 1.5835594301569003, "learning_rate": 7.687099354230177e-07, "loss": 0.2529, "step": 11084 }, { "epoch": 0.8781937017231135, "grad_norm": 1.4199092441760521, "learning_rate": 7.677236248844855e-07, "loss": 0.1946, "step": 11085 }, { "epoch": 0.8782729253317488, "grad_norm": 1.3960097555945583, "learning_rate": 7.667379222469295e-07, "loss": 0.2542, "step": 11086 }, { "epoch": 0.8783521489403843, "grad_norm": 1.3809982901171423, "learning_rate": 7.657528275752524e-07, "loss": 0.1681, "step": 11087 }, { "epoch": 0.8784313725490196, "grad_norm": 1.322361657260325, "learning_rate": 7.647683409343198e-07, "loss": 0.2203, "step": 11088 }, { "epoch": 0.878510596157655, "grad_norm": 1.2374436235833877, "learning_rate": 7.637844623889557e-07, "loss": 0.171, "step": 11089 }, { "epoch": 0.8785898197662904, "grad_norm": 1.0636589319077459, "learning_rate": 7.628011920039414e-07, "loss": 0.1251, "step": 11090 }, { "epoch": 0.8786690433749257, "grad_norm": 1.2797663851150352, "learning_rate": 7.618185298440239e-07, "loss": 0.2013, "step": 11091 }, { "epoch": 0.8787482669835611, "grad_norm": 1.017909437188796, "learning_rate": 7.608364759739039e-07, "loss": 0.1548, "step": 11092 }, { "epoch": 0.8788274905921964, "grad_norm": 1.2060453997920095, "learning_rate": 7.598550304582453e-07, "loss": 0.1451, "step": 11093 }, { "epoch": 0.8789067142008319, "grad_norm": 1.4554247336312824, "learning_rate": 7.588741933616728e-07, "loss": 0.2478, "step": 11094 }, { "epoch": 0.8789859378094672, "grad_norm": 1.139426926125611, "learning_rate": 7.578939647487705e-07, "loss": 0.199, "step": 11095 }, { "epoch": 0.8790651614181026, "grad_norm": 1.7164346720472863, "learning_rate": 7.569143446840776e-07, "loss": 0.1908, "step": 11096 }, { "epoch": 0.879144385026738, "grad_norm": 1.2241552551552792, "learning_rate": 7.559353332321029e-07, "loss": 0.1579, "step": 11097 }, { "epoch": 0.8792236086353733, "grad_norm": 1.3042312548615864, "learning_rate": 7.549569304573057e-07, "loss": 0.2983, "step": 11098 }, { "epoch": 0.8793028322440087, "grad_norm": 1.4597314264982106, "learning_rate": 7.539791364241111e-07, "loss": 0.2171, "step": 11099 }, { "epoch": 0.8793820558526441, "grad_norm": 1.288960280731306, "learning_rate": 7.530019511969e-07, "loss": 0.2132, "step": 11100 }, { "epoch": 0.8794612794612795, "grad_norm": 1.2335618199467266, "learning_rate": 7.520253748400175e-07, "loss": 0.1876, "step": 11101 }, { "epoch": 0.8795405030699148, "grad_norm": 1.282500744679882, "learning_rate": 7.510494074177666e-07, "loss": 0.1572, "step": 11102 }, { "epoch": 0.8796197266785503, "grad_norm": 1.3745829898359752, "learning_rate": 7.500740489944092e-07, "loss": 0.204, "step": 11103 }, { "epoch": 0.8796989502871856, "grad_norm": 1.2433566767216206, "learning_rate": 7.490992996341662e-07, "loss": 0.1751, "step": 11104 }, { "epoch": 0.8797781738958209, "grad_norm": 1.2242354687551547, "learning_rate": 7.481251594012218e-07, "loss": 0.2253, "step": 11105 }, { "epoch": 0.8798573975044564, "grad_norm": 1.0540689391313347, "learning_rate": 7.471516283597191e-07, "loss": 0.1301, "step": 11106 }, { "epoch": 0.8799366211130917, "grad_norm": 1.1384716469699712, "learning_rate": 7.461787065737602e-07, "loss": 0.1699, "step": 11107 }, { "epoch": 0.8800158447217271, "grad_norm": 1.1431374500923583, "learning_rate": 7.452063941074073e-07, "loss": 0.1847, "step": 11108 }, { "epoch": 0.8800950683303624, "grad_norm": 1.3455507058059666, "learning_rate": 7.442346910246801e-07, "loss": 0.2071, "step": 11109 }, { "epoch": 0.8801742919389978, "grad_norm": 1.0702238151840346, "learning_rate": 7.432635973895652e-07, "loss": 0.1506, "step": 11110 }, { "epoch": 0.8802535155476332, "grad_norm": 1.4738004171473327, "learning_rate": 7.422931132660005e-07, "loss": 0.2722, "step": 11111 }, { "epoch": 0.8803327391562685, "grad_norm": 1.1988007343533718, "learning_rate": 7.413232387178882e-07, "loss": 0.1566, "step": 11112 }, { "epoch": 0.880411962764904, "grad_norm": 1.5629736332022217, "learning_rate": 7.403539738090914e-07, "loss": 0.182, "step": 11113 }, { "epoch": 0.8804911863735393, "grad_norm": 1.2045602113079472, "learning_rate": 7.393853186034316e-07, "loss": 0.2437, "step": 11114 }, { "epoch": 0.8805704099821747, "grad_norm": 1.7718334320501854, "learning_rate": 7.384172731646877e-07, "loss": 0.2216, "step": 11115 }, { "epoch": 0.88064963359081, "grad_norm": 1.4815659028967725, "learning_rate": 7.374498375566042e-07, "loss": 0.2767, "step": 11116 }, { "epoch": 0.8807288571994454, "grad_norm": 1.3618822420471255, "learning_rate": 7.364830118428801e-07, "loss": 0.3079, "step": 11117 }, { "epoch": 0.8808080808080808, "grad_norm": 1.319939752152183, "learning_rate": 7.355167960871745e-07, "loss": 0.2413, "step": 11118 }, { "epoch": 0.8808873044167161, "grad_norm": 1.3448470650454571, "learning_rate": 7.345511903531122e-07, "loss": 0.206, "step": 11119 }, { "epoch": 0.8809665280253516, "grad_norm": 1.199780251106472, "learning_rate": 7.335861947042711e-07, "loss": 0.1903, "step": 11120 }, { "epoch": 0.8810457516339869, "grad_norm": 1.0882751935282775, "learning_rate": 7.326218092041903e-07, "loss": 0.1897, "step": 11121 }, { "epoch": 0.8811249752426223, "grad_norm": 1.5217220129699023, "learning_rate": 7.316580339163736e-07, "loss": 0.2356, "step": 11122 }, { "epoch": 0.8812041988512577, "grad_norm": 1.0117488945426965, "learning_rate": 7.306948689042792e-07, "loss": 0.1478, "step": 11123 }, { "epoch": 0.881283422459893, "grad_norm": 1.3570468389853543, "learning_rate": 7.297323142313262e-07, "loss": 0.1979, "step": 11124 }, { "epoch": 0.8813626460685284, "grad_norm": 1.1616341187651766, "learning_rate": 7.287703699608928e-07, "loss": 0.2282, "step": 11125 }, { "epoch": 0.8814418696771638, "grad_norm": 1.222054044275733, "learning_rate": 7.278090361563228e-07, "loss": 0.199, "step": 11126 }, { "epoch": 0.8815210932857992, "grad_norm": 1.6931377696165042, "learning_rate": 7.268483128809122e-07, "loss": 0.3056, "step": 11127 }, { "epoch": 0.8816003168944345, "grad_norm": 1.358530680217993, "learning_rate": 7.258882001979184e-07, "loss": 0.2068, "step": 11128 }, { "epoch": 0.88167954050307, "grad_norm": 1.2980385919963116, "learning_rate": 7.24928698170565e-07, "loss": 0.2191, "step": 11129 }, { "epoch": 0.8817587641117053, "grad_norm": 1.7320359056148045, "learning_rate": 7.239698068620272e-07, "loss": 0.3471, "step": 11130 }, { "epoch": 0.8818379877203406, "grad_norm": 1.1214908432589366, "learning_rate": 7.230115263354431e-07, "loss": 0.1731, "step": 11131 }, { "epoch": 0.881917211328976, "grad_norm": 1.4544838823560668, "learning_rate": 7.220538566539137e-07, "loss": 0.2368, "step": 11132 }, { "epoch": 0.8819964349376114, "grad_norm": 1.444508196116555, "learning_rate": 7.21096797880495e-07, "loss": 0.242, "step": 11133 }, { "epoch": 0.8820756585462468, "grad_norm": 1.5998714020825704, "learning_rate": 7.201403500782034e-07, "loss": 0.258, "step": 11134 }, { "epoch": 0.8821548821548821, "grad_norm": 1.5047038343701382, "learning_rate": 7.191845133100195e-07, "loss": 0.2553, "step": 11135 }, { "epoch": 0.8822341057635176, "grad_norm": 1.2335738143808541, "learning_rate": 7.182292876388785e-07, "loss": 0.1876, "step": 11136 }, { "epoch": 0.8823133293721529, "grad_norm": 1.2675700310218028, "learning_rate": 7.17274673127677e-07, "loss": 0.1927, "step": 11137 }, { "epoch": 0.8823925529807882, "grad_norm": 1.6746170357596029, "learning_rate": 7.163206698392744e-07, "loss": 0.235, "step": 11138 }, { "epoch": 0.8824717765894237, "grad_norm": 1.112452350595933, "learning_rate": 7.153672778364851e-07, "loss": 0.1865, "step": 11139 }, { "epoch": 0.882551000198059, "grad_norm": 1.3476963583302197, "learning_rate": 7.144144971820855e-07, "loss": 0.2492, "step": 11140 }, { "epoch": 0.8826302238066944, "grad_norm": 1.3745601291651905, "learning_rate": 7.134623279388098e-07, "loss": 0.2108, "step": 11141 }, { "epoch": 0.8827094474153298, "grad_norm": 1.4471829751991212, "learning_rate": 7.12510770169359e-07, "loss": 0.1942, "step": 11142 }, { "epoch": 0.8827886710239652, "grad_norm": 1.3579750508280246, "learning_rate": 7.115598239363842e-07, "loss": 0.2415, "step": 11143 }, { "epoch": 0.8828678946326005, "grad_norm": 1.649660353080689, "learning_rate": 7.106094893025006e-07, "loss": 0.3616, "step": 11144 }, { "epoch": 0.8829471182412358, "grad_norm": 1.4083689544411717, "learning_rate": 7.096597663302862e-07, "loss": 0.2494, "step": 11145 }, { "epoch": 0.8830263418498713, "grad_norm": 1.1868097209900177, "learning_rate": 7.087106550822731e-07, "loss": 0.2271, "step": 11146 }, { "epoch": 0.8831055654585066, "grad_norm": 0.9655119414661911, "learning_rate": 7.077621556209557e-07, "loss": 0.1362, "step": 11147 }, { "epoch": 0.883184789067142, "grad_norm": 1.25980641754946, "learning_rate": 7.068142680087909e-07, "loss": 0.2124, "step": 11148 }, { "epoch": 0.8832640126757774, "grad_norm": 1.382570708612095, "learning_rate": 7.058669923081896e-07, "loss": 0.2326, "step": 11149 }, { "epoch": 0.8833432362844128, "grad_norm": 1.2965121182964578, "learning_rate": 7.049203285815253e-07, "loss": 0.1973, "step": 11150 }, { "epoch": 0.8834224598930481, "grad_norm": 1.2962697860733432, "learning_rate": 7.03974276891134e-07, "loss": 0.1832, "step": 11151 }, { "epoch": 0.8835016835016835, "grad_norm": 1.1769949871104188, "learning_rate": 7.030288372993066e-07, "loss": 0.1942, "step": 11152 }, { "epoch": 0.8835809071103189, "grad_norm": 1.3493706930594893, "learning_rate": 7.020840098682968e-07, "loss": 0.1892, "step": 11153 }, { "epoch": 0.8836601307189542, "grad_norm": 1.19527452146034, "learning_rate": 7.011397946603138e-07, "loss": 0.1981, "step": 11154 }, { "epoch": 0.8837393543275897, "grad_norm": 1.2802627183424293, "learning_rate": 7.001961917375344e-07, "loss": 0.2223, "step": 11155 }, { "epoch": 0.883818577936225, "grad_norm": 1.0112715127758398, "learning_rate": 6.992532011620878e-07, "loss": 0.1678, "step": 11156 }, { "epoch": 0.8838978015448604, "grad_norm": 1.3056125498038613, "learning_rate": 6.983108229960633e-07, "loss": 0.1819, "step": 11157 }, { "epoch": 0.8839770251534957, "grad_norm": 1.6220393942856794, "learning_rate": 6.973690573015168e-07, "loss": 0.3556, "step": 11158 }, { "epoch": 0.8840562487621311, "grad_norm": 1.2363436001711858, "learning_rate": 6.964279041404553e-07, "loss": 0.2301, "step": 11159 }, { "epoch": 0.8841354723707665, "grad_norm": 1.2124722647500983, "learning_rate": 6.954873635748493e-07, "loss": 0.1897, "step": 11160 }, { "epoch": 0.8842146959794018, "grad_norm": 1.5778933884048083, "learning_rate": 6.945474356666326e-07, "loss": 0.2883, "step": 11161 }, { "epoch": 0.8842939195880373, "grad_norm": 1.5878407158219214, "learning_rate": 6.936081204776913e-07, "loss": 0.243, "step": 11162 }, { "epoch": 0.8843731431966726, "grad_norm": 1.2327575442565035, "learning_rate": 6.926694180698734e-07, "loss": 0.2355, "step": 11163 }, { "epoch": 0.884452366805308, "grad_norm": 1.7790062040204306, "learning_rate": 6.917313285049931e-07, "loss": 0.2953, "step": 11164 }, { "epoch": 0.8845315904139434, "grad_norm": 0.9657329218742188, "learning_rate": 6.907938518448154e-07, "loss": 0.1193, "step": 11165 }, { "epoch": 0.8846108140225787, "grad_norm": 1.7615270238150564, "learning_rate": 6.898569881510686e-07, "loss": 0.2776, "step": 11166 }, { "epoch": 0.8846900376312141, "grad_norm": 1.0801892530572057, "learning_rate": 6.889207374854434e-07, "loss": 0.144, "step": 11167 }, { "epoch": 0.8847692612398494, "grad_norm": 1.3636255150494567, "learning_rate": 6.879850999095849e-07, "loss": 0.1833, "step": 11168 }, { "epoch": 0.8848484848484849, "grad_norm": 1.2873457344877273, "learning_rate": 6.870500754851017e-07, "loss": 0.2284, "step": 11169 }, { "epoch": 0.8849277084571202, "grad_norm": 1.3261903324981807, "learning_rate": 6.861156642735578e-07, "loss": 0.2459, "step": 11170 }, { "epoch": 0.8850069320657556, "grad_norm": 1.3679142563892701, "learning_rate": 6.851818663364839e-07, "loss": 0.2227, "step": 11171 }, { "epoch": 0.885086155674391, "grad_norm": 1.259947387383908, "learning_rate": 6.842486817353633e-07, "loss": 0.1846, "step": 11172 }, { "epoch": 0.8851653792830263, "grad_norm": 1.255798605486765, "learning_rate": 6.833161105316421e-07, "loss": 0.1529, "step": 11173 }, { "epoch": 0.8852446028916617, "grad_norm": 1.3375287177979553, "learning_rate": 6.823841527867259e-07, "loss": 0.1565, "step": 11174 }, { "epoch": 0.8853238265002971, "grad_norm": 1.1555889860871866, "learning_rate": 6.814528085619809e-07, "loss": 0.2071, "step": 11175 }, { "epoch": 0.8854030501089325, "grad_norm": 1.607548745541149, "learning_rate": 6.805220779187293e-07, "loss": 0.2745, "step": 11176 }, { "epoch": 0.8854822737175678, "grad_norm": 1.562189023855713, "learning_rate": 6.795919609182566e-07, "loss": 0.2255, "step": 11177 }, { "epoch": 0.8855614973262033, "grad_norm": 1.1739269051586865, "learning_rate": 6.78662457621807e-07, "loss": 0.171, "step": 11178 }, { "epoch": 0.8856407209348386, "grad_norm": 1.4386673258686113, "learning_rate": 6.777335680905817e-07, "loss": 0.2452, "step": 11179 }, { "epoch": 0.8857199445434739, "grad_norm": 1.4385564294416149, "learning_rate": 6.768052923857482e-07, "loss": 0.2663, "step": 11180 }, { "epoch": 0.8857991681521094, "grad_norm": 1.2830687063579134, "learning_rate": 6.758776305684245e-07, "loss": 0.1993, "step": 11181 }, { "epoch": 0.8858783917607447, "grad_norm": 1.2216385577401376, "learning_rate": 6.749505826996927e-07, "loss": 0.1905, "step": 11182 }, { "epoch": 0.8859576153693801, "grad_norm": 1.2403824442901343, "learning_rate": 6.740241488405963e-07, "loss": 0.1887, "step": 11183 }, { "epoch": 0.8860368389780154, "grad_norm": 1.481847442711561, "learning_rate": 6.730983290521365e-07, "loss": 0.2798, "step": 11184 }, { "epoch": 0.8861160625866509, "grad_norm": 1.3141211503990882, "learning_rate": 6.721731233952722e-07, "loss": 0.1858, "step": 11185 }, { "epoch": 0.8861952861952862, "grad_norm": 1.3968216571322727, "learning_rate": 6.712485319309258e-07, "loss": 0.2189, "step": 11186 }, { "epoch": 0.8862745098039215, "grad_norm": 1.4851416177370635, "learning_rate": 6.703245547199777e-07, "loss": 0.2847, "step": 11187 }, { "epoch": 0.886353733412557, "grad_norm": 1.1651476446106792, "learning_rate": 6.694011918232635e-07, "loss": 0.1892, "step": 11188 }, { "epoch": 0.8864329570211923, "grad_norm": 1.7532545089733287, "learning_rate": 6.684784433015867e-07, "loss": 0.3073, "step": 11189 }, { "epoch": 0.8865121806298277, "grad_norm": 1.4838859852101163, "learning_rate": 6.675563092157044e-07, "loss": 0.2549, "step": 11190 }, { "epoch": 0.886591404238463, "grad_norm": 1.345930590631144, "learning_rate": 6.666347896263326e-07, "loss": 0.23, "step": 11191 }, { "epoch": 0.8866706278470984, "grad_norm": 1.3008548402229794, "learning_rate": 6.657138845941524e-07, "loss": 0.215, "step": 11192 }, { "epoch": 0.8867498514557338, "grad_norm": 1.302783979038374, "learning_rate": 6.64793594179799e-07, "loss": 0.2004, "step": 11193 }, { "epoch": 0.8868290750643691, "grad_norm": 1.3305961263873558, "learning_rate": 6.638739184438681e-07, "loss": 0.2573, "step": 11194 }, { "epoch": 0.8869082986730046, "grad_norm": 1.4755547664068145, "learning_rate": 6.629548574469169e-07, "loss": 0.2312, "step": 11195 }, { "epoch": 0.8869875222816399, "grad_norm": 1.4096334066281144, "learning_rate": 6.620364112494627e-07, "loss": 0.2833, "step": 11196 }, { "epoch": 0.8870667458902753, "grad_norm": 1.601073610284219, "learning_rate": 6.611185799119791e-07, "loss": 0.303, "step": 11197 }, { "epoch": 0.8871459694989107, "grad_norm": 1.2344496729232577, "learning_rate": 6.602013634949001e-07, "loss": 0.1862, "step": 11198 }, { "epoch": 0.887225193107546, "grad_norm": 1.12707585042057, "learning_rate": 6.592847620586217e-07, "loss": 0.173, "step": 11199 }, { "epoch": 0.8873044167161814, "grad_norm": 1.4024972166229444, "learning_rate": 6.583687756634982e-07, "loss": 0.284, "step": 11200 }, { "epoch": 0.8873836403248168, "grad_norm": 1.3061542272842148, "learning_rate": 6.574534043698399e-07, "loss": 0.1859, "step": 11201 }, { "epoch": 0.8874628639334522, "grad_norm": 1.2102091399374055, "learning_rate": 6.565386482379221e-07, "loss": 0.1791, "step": 11202 }, { "epoch": 0.8875420875420875, "grad_norm": 1.3560088952632925, "learning_rate": 6.556245073279777e-07, "loss": 0.2441, "step": 11203 }, { "epoch": 0.887621311150723, "grad_norm": 1.3616742472740673, "learning_rate": 6.547109817001951e-07, "loss": 0.269, "step": 11204 }, { "epoch": 0.8877005347593583, "grad_norm": 1.5537426699024823, "learning_rate": 6.537980714147285e-07, "loss": 0.1964, "step": 11205 }, { "epoch": 0.8877797583679936, "grad_norm": 1.4469620018299898, "learning_rate": 6.528857765316887e-07, "loss": 0.1995, "step": 11206 }, { "epoch": 0.887858981976629, "grad_norm": 1.3594143263762029, "learning_rate": 6.519740971111432e-07, "loss": 0.2197, "step": 11207 }, { "epoch": 0.8879382055852644, "grad_norm": 1.330943572967816, "learning_rate": 6.510630332131262e-07, "loss": 0.2282, "step": 11208 }, { "epoch": 0.8880174291938998, "grad_norm": 1.12589695284791, "learning_rate": 6.501525848976231e-07, "loss": 0.1755, "step": 11209 }, { "epoch": 0.8880966528025351, "grad_norm": 1.2699153044146974, "learning_rate": 6.492427522245836e-07, "loss": 0.255, "step": 11210 }, { "epoch": 0.8881758764111706, "grad_norm": 1.0919734239627077, "learning_rate": 6.483335352539144e-07, "loss": 0.1212, "step": 11211 }, { "epoch": 0.8882551000198059, "grad_norm": 1.5067786466742912, "learning_rate": 6.474249340454874e-07, "loss": 0.1916, "step": 11212 }, { "epoch": 0.8883343236284412, "grad_norm": 1.4560719171590666, "learning_rate": 6.46516948659125e-07, "loss": 0.2527, "step": 11213 }, { "epoch": 0.8884135472370767, "grad_norm": 1.3816567045350425, "learning_rate": 6.456095791546147e-07, "loss": 0.1945, "step": 11214 }, { "epoch": 0.888492770845712, "grad_norm": 1.283116813443388, "learning_rate": 6.447028255917054e-07, "loss": 0.1947, "step": 11215 }, { "epoch": 0.8885719944543474, "grad_norm": 1.2115017691856218, "learning_rate": 6.437966880300995e-07, "loss": 0.163, "step": 11216 }, { "epoch": 0.8886512180629828, "grad_norm": 1.1112203629223338, "learning_rate": 6.428911665294601e-07, "loss": 0.1313, "step": 11217 }, { "epoch": 0.8887304416716182, "grad_norm": 1.201416995681213, "learning_rate": 6.419862611494165e-07, "loss": 0.1937, "step": 11218 }, { "epoch": 0.8888096652802535, "grad_norm": 1.4623509539080328, "learning_rate": 6.410819719495498e-07, "loss": 0.2313, "step": 11219 }, { "epoch": 0.8888888888888888, "grad_norm": 1.5902800834121844, "learning_rate": 6.401782989894012e-07, "loss": 0.2523, "step": 11220 }, { "epoch": 0.8889681124975243, "grad_norm": 1.5290063797646887, "learning_rate": 6.392752423284765e-07, "loss": 0.2426, "step": 11221 }, { "epoch": 0.8890473361061596, "grad_norm": 1.5289862780140833, "learning_rate": 6.383728020262359e-07, "loss": 0.2038, "step": 11222 }, { "epoch": 0.889126559714795, "grad_norm": 1.1468066913938832, "learning_rate": 6.374709781420995e-07, "loss": 0.1976, "step": 11223 }, { "epoch": 0.8892057833234304, "grad_norm": 1.226412591385611, "learning_rate": 6.365697707354512e-07, "loss": 0.2047, "step": 11224 }, { "epoch": 0.8892850069320658, "grad_norm": 1.7639263508154128, "learning_rate": 6.3566917986563e-07, "loss": 0.2615, "step": 11225 }, { "epoch": 0.8893642305407011, "grad_norm": 1.0820193125245585, "learning_rate": 6.347692055919353e-07, "loss": 0.1468, "step": 11226 }, { "epoch": 0.8894434541493365, "grad_norm": 1.2999128873784225, "learning_rate": 6.338698479736227e-07, "loss": 0.2429, "step": 11227 }, { "epoch": 0.8895226777579719, "grad_norm": 1.1729513853060665, "learning_rate": 6.329711070699162e-07, "loss": 0.178, "step": 11228 }, { "epoch": 0.8896019013666072, "grad_norm": 1.4321886833420465, "learning_rate": 6.320729829399918e-07, "loss": 0.2703, "step": 11229 }, { "epoch": 0.8896811249752427, "grad_norm": 1.384799836562096, "learning_rate": 6.311754756429833e-07, "loss": 0.2262, "step": 11230 }, { "epoch": 0.889760348583878, "grad_norm": 1.5089776676328601, "learning_rate": 6.302785852379911e-07, "loss": 0.3245, "step": 11231 }, { "epoch": 0.8898395721925134, "grad_norm": 1.440779782303088, "learning_rate": 6.293823117840703e-07, "loss": 0.2533, "step": 11232 }, { "epoch": 0.8899187958011487, "grad_norm": 1.3279719044597766, "learning_rate": 6.284866553402347e-07, "loss": 0.2717, "step": 11233 }, { "epoch": 0.8899980194097841, "grad_norm": 1.3136427436009237, "learning_rate": 6.275916159654616e-07, "loss": 0.1951, "step": 11234 }, { "epoch": 0.8900772430184195, "grad_norm": 1.3931944722997267, "learning_rate": 6.266971937186827e-07, "loss": 0.2134, "step": 11235 }, { "epoch": 0.8901564666270548, "grad_norm": 1.833767100352713, "learning_rate": 6.258033886587911e-07, "loss": 0.2147, "step": 11236 }, { "epoch": 0.8902356902356903, "grad_norm": 1.1945451950074149, "learning_rate": 6.249102008446418e-07, "loss": 0.1916, "step": 11237 }, { "epoch": 0.8903149138443256, "grad_norm": 1.4529406545023558, "learning_rate": 6.240176303350453e-07, "loss": 0.243, "step": 11238 }, { "epoch": 0.890394137452961, "grad_norm": 1.105851681039205, "learning_rate": 6.231256771887739e-07, "loss": 0.1409, "step": 11239 }, { "epoch": 0.8904733610615964, "grad_norm": 1.2816397853825638, "learning_rate": 6.222343414645571e-07, "loss": 0.2286, "step": 11240 }, { "epoch": 0.8905525846702317, "grad_norm": 1.2509633005763492, "learning_rate": 6.213436232210868e-07, "loss": 0.2671, "step": 11241 }, { "epoch": 0.8906318082788671, "grad_norm": 1.7968282501337458, "learning_rate": 6.204535225170116e-07, "loss": 0.2696, "step": 11242 }, { "epoch": 0.8907110318875024, "grad_norm": 1.3941002108600529, "learning_rate": 6.195640394109393e-07, "loss": 0.229, "step": 11243 }, { "epoch": 0.8907902554961379, "grad_norm": 1.2665362964850582, "learning_rate": 6.186751739614405e-07, "loss": 0.2198, "step": 11244 }, { "epoch": 0.8908694791047732, "grad_norm": 1.4517396183242446, "learning_rate": 6.177869262270419e-07, "loss": 0.1801, "step": 11245 }, { "epoch": 0.8909487027134086, "grad_norm": 1.30729734339586, "learning_rate": 6.168992962662279e-07, "loss": 0.2201, "step": 11246 }, { "epoch": 0.891027926322044, "grad_norm": 1.6369681960276554, "learning_rate": 6.160122841374482e-07, "loss": 0.3008, "step": 11247 }, { "epoch": 0.8911071499306793, "grad_norm": 1.6392414945503126, "learning_rate": 6.151258898991064e-07, "loss": 0.3393, "step": 11248 }, { "epoch": 0.8911863735393147, "grad_norm": 1.3285111060496306, "learning_rate": 6.142401136095666e-07, "loss": 0.2212, "step": 11249 }, { "epoch": 0.8912655971479501, "grad_norm": 1.0393734254481144, "learning_rate": 6.133549553271556e-07, "loss": 0.159, "step": 11250 }, { "epoch": 0.8913448207565855, "grad_norm": 1.1275538965744762, "learning_rate": 6.124704151101546e-07, "loss": 0.1616, "step": 11251 }, { "epoch": 0.8914240443652208, "grad_norm": 1.359945567692041, "learning_rate": 6.115864930168058e-07, "loss": 0.2762, "step": 11252 }, { "epoch": 0.8915032679738563, "grad_norm": 1.4855710788796759, "learning_rate": 6.107031891053139e-07, "loss": 0.2547, "step": 11253 }, { "epoch": 0.8915824915824916, "grad_norm": 1.1614740301086464, "learning_rate": 6.098205034338378e-07, "loss": 0.1446, "step": 11254 }, { "epoch": 0.8916617151911269, "grad_norm": 1.186898767952395, "learning_rate": 6.089384360605e-07, "loss": 0.2581, "step": 11255 }, { "epoch": 0.8917409387997624, "grad_norm": 1.618945819690544, "learning_rate": 6.080569870433773e-07, "loss": 0.2041, "step": 11256 }, { "epoch": 0.8918201624083977, "grad_norm": 1.4846299746216791, "learning_rate": 6.071761564405121e-07, "loss": 0.2572, "step": 11257 }, { "epoch": 0.8918993860170331, "grad_norm": 1.367366346891495, "learning_rate": 6.062959443099014e-07, "loss": 0.2785, "step": 11258 }, { "epoch": 0.8919786096256684, "grad_norm": 1.653177036324328, "learning_rate": 6.054163507095035e-07, "loss": 0.3113, "step": 11259 }, { "epoch": 0.8920578332343039, "grad_norm": 1.1624161213576714, "learning_rate": 6.04537375697235e-07, "loss": 0.1656, "step": 11260 }, { "epoch": 0.8921370568429392, "grad_norm": 1.4649129596611634, "learning_rate": 6.036590193309711e-07, "loss": 0.2073, "step": 11261 }, { "epoch": 0.8922162804515745, "grad_norm": 1.2371825682247837, "learning_rate": 6.027812816685497e-07, "loss": 0.2097, "step": 11262 }, { "epoch": 0.89229550406021, "grad_norm": 1.3852611306041136, "learning_rate": 6.019041627677635e-07, "loss": 0.213, "step": 11263 }, { "epoch": 0.8923747276688453, "grad_norm": 1.4409802160224254, "learning_rate": 6.010276626863687e-07, "loss": 0.2515, "step": 11264 }, { "epoch": 0.8924539512774807, "grad_norm": 1.5774758971813887, "learning_rate": 6.001517814820757e-07, "loss": 0.2279, "step": 11265 }, { "epoch": 0.892533174886116, "grad_norm": 1.454816758733758, "learning_rate": 5.992765192125594e-07, "loss": 0.2403, "step": 11266 }, { "epoch": 0.8926123984947515, "grad_norm": 1.441668957544932, "learning_rate": 5.984018759354515e-07, "loss": 0.2557, "step": 11267 }, { "epoch": 0.8926916221033868, "grad_norm": 1.3576792572336833, "learning_rate": 5.975278517083405e-07, "loss": 0.2057, "step": 11268 }, { "epoch": 0.8927708457120221, "grad_norm": 1.1483281862788846, "learning_rate": 5.966544465887803e-07, "loss": 0.1932, "step": 11269 }, { "epoch": 0.8928500693206576, "grad_norm": 1.0860636050183037, "learning_rate": 5.957816606342792e-07, "loss": 0.1744, "step": 11270 }, { "epoch": 0.8929292929292929, "grad_norm": 1.4043056715639777, "learning_rate": 5.949094939023037e-07, "loss": 0.2335, "step": 11271 }, { "epoch": 0.8930085165379283, "grad_norm": 1.3232268237447964, "learning_rate": 5.940379464502854e-07, "loss": 0.1651, "step": 11272 }, { "epoch": 0.8930877401465637, "grad_norm": 1.6890526154789487, "learning_rate": 5.931670183356097e-07, "loss": 0.2741, "step": 11273 }, { "epoch": 0.893166963755199, "grad_norm": 1.044260339267375, "learning_rate": 5.922967096156218e-07, "loss": 0.1823, "step": 11274 }, { "epoch": 0.8932461873638344, "grad_norm": 1.540836548776842, "learning_rate": 5.914270203476291e-07, "loss": 0.2923, "step": 11275 }, { "epoch": 0.8933254109724698, "grad_norm": 1.2671417892062502, "learning_rate": 5.90557950588897e-07, "loss": 0.1631, "step": 11276 }, { "epoch": 0.8934046345811052, "grad_norm": 1.436723689610927, "learning_rate": 5.896895003966463e-07, "loss": 0.2105, "step": 11277 }, { "epoch": 0.8934838581897405, "grad_norm": 1.1905789304282068, "learning_rate": 5.888216698280646e-07, "loss": 0.1683, "step": 11278 }, { "epoch": 0.893563081798376, "grad_norm": 1.2743115790041264, "learning_rate": 5.879544589402919e-07, "loss": 0.1936, "step": 11279 }, { "epoch": 0.8936423054070113, "grad_norm": 1.7323363350840604, "learning_rate": 5.870878677904302e-07, "loss": 0.3622, "step": 11280 }, { "epoch": 0.8937215290156466, "grad_norm": 1.5065999345055028, "learning_rate": 5.862218964355382e-07, "loss": 0.2784, "step": 11281 }, { "epoch": 0.893800752624282, "grad_norm": 1.5383097990301149, "learning_rate": 5.853565449326404e-07, "loss": 0.2466, "step": 11282 }, { "epoch": 0.8938799762329174, "grad_norm": 1.2400261136630268, "learning_rate": 5.844918133387134e-07, "loss": 0.199, "step": 11283 }, { "epoch": 0.8939591998415528, "grad_norm": 1.2155040059420341, "learning_rate": 5.836277017106951e-07, "loss": 0.1675, "step": 11284 }, { "epoch": 0.8940384234501881, "grad_norm": 1.2856551693108929, "learning_rate": 5.827642101054854e-07, "loss": 0.2081, "step": 11285 }, { "epoch": 0.8941176470588236, "grad_norm": 1.6931471362544004, "learning_rate": 5.819013385799388e-07, "loss": 0.2524, "step": 11286 }, { "epoch": 0.8941968706674589, "grad_norm": 1.5484010143534122, "learning_rate": 5.810390871908711e-07, "loss": 0.2477, "step": 11287 }, { "epoch": 0.8942760942760942, "grad_norm": 1.310045657882032, "learning_rate": 5.801774559950591e-07, "loss": 0.1863, "step": 11288 }, { "epoch": 0.8943553178847297, "grad_norm": 1.1867356211394389, "learning_rate": 5.793164450492372e-07, "loss": 0.1453, "step": 11289 }, { "epoch": 0.894434541493365, "grad_norm": 1.8848513460708247, "learning_rate": 5.784560544100959e-07, "loss": 0.3078, "step": 11290 }, { "epoch": 0.8945137651020004, "grad_norm": 1.2974320595761797, "learning_rate": 5.775962841342919e-07, "loss": 0.2261, "step": 11291 }, { "epoch": 0.8945929887106358, "grad_norm": 1.6114914646046061, "learning_rate": 5.767371342784345e-07, "loss": 0.2344, "step": 11292 }, { "epoch": 0.8946722123192712, "grad_norm": 1.5415672994218592, "learning_rate": 5.758786048990939e-07, "loss": 0.2413, "step": 11293 }, { "epoch": 0.8947514359279065, "grad_norm": 1.1072199847719513, "learning_rate": 5.750206960528027e-07, "loss": 0.1888, "step": 11294 }, { "epoch": 0.8948306595365418, "grad_norm": 1.5734904174558717, "learning_rate": 5.741634077960479e-07, "loss": 0.2771, "step": 11295 }, { "epoch": 0.8949098831451773, "grad_norm": 1.2777103962039311, "learning_rate": 5.733067401852788e-07, "loss": 0.2692, "step": 11296 }, { "epoch": 0.8949891067538126, "grad_norm": 1.2976147376667901, "learning_rate": 5.724506932769014e-07, "loss": 0.2269, "step": 11297 }, { "epoch": 0.895068330362448, "grad_norm": 1.1156311606426472, "learning_rate": 5.71595267127284e-07, "loss": 0.1258, "step": 11298 }, { "epoch": 0.8951475539710834, "grad_norm": 1.3322417333742287, "learning_rate": 5.707404617927526e-07, "loss": 0.1839, "step": 11299 }, { "epoch": 0.8952267775797188, "grad_norm": 1.3975783059514086, "learning_rate": 5.698862773295888e-07, "loss": 0.2276, "step": 11300 }, { "epoch": 0.8953060011883541, "grad_norm": 1.426479682980726, "learning_rate": 5.69032713794041e-07, "loss": 0.226, "step": 11301 }, { "epoch": 0.8953852247969895, "grad_norm": 1.1900460493453853, "learning_rate": 5.681797712423099e-07, "loss": 0.1541, "step": 11302 }, { "epoch": 0.8954644484056249, "grad_norm": 1.1942963362216403, "learning_rate": 5.673274497305559e-07, "loss": 0.1883, "step": 11303 }, { "epoch": 0.8955436720142602, "grad_norm": 1.4018705923639465, "learning_rate": 5.664757493149042e-07, "loss": 0.2837, "step": 11304 }, { "epoch": 0.8956228956228957, "grad_norm": 1.7135132582349892, "learning_rate": 5.656246700514323e-07, "loss": 0.2488, "step": 11305 }, { "epoch": 0.895702119231531, "grad_norm": 1.4224352111800682, "learning_rate": 5.647742119961797e-07, "loss": 0.2305, "step": 11306 }, { "epoch": 0.8957813428401664, "grad_norm": 1.1836426972003882, "learning_rate": 5.639243752051482e-07, "loss": 0.1725, "step": 11307 }, { "epoch": 0.8958605664488017, "grad_norm": 1.2796218846211957, "learning_rate": 5.630751597342921e-07, "loss": 0.1904, "step": 11308 }, { "epoch": 0.8959397900574371, "grad_norm": 1.2689183194157867, "learning_rate": 5.622265656395276e-07, "loss": 0.1936, "step": 11309 }, { "epoch": 0.8960190136660725, "grad_norm": 1.483024624501899, "learning_rate": 5.613785929767335e-07, "loss": 0.2514, "step": 11310 }, { "epoch": 0.8960982372747078, "grad_norm": 1.6310744359522076, "learning_rate": 5.605312418017439e-07, "loss": 0.3105, "step": 11311 }, { "epoch": 0.8961774608833433, "grad_norm": 1.2218020080492402, "learning_rate": 5.59684512170352e-07, "loss": 0.2179, "step": 11312 }, { "epoch": 0.8962566844919786, "grad_norm": 1.2910209724626072, "learning_rate": 5.588384041383089e-07, "loss": 0.1773, "step": 11313 }, { "epoch": 0.896335908100614, "grad_norm": 1.406795183977652, "learning_rate": 5.579929177613308e-07, "loss": 0.2849, "step": 11314 }, { "epoch": 0.8964151317092494, "grad_norm": 1.235775171695432, "learning_rate": 5.571480530950879e-07, "loss": 0.1988, "step": 11315 }, { "epoch": 0.8964943553178847, "grad_norm": 1.4740244839958896, "learning_rate": 5.563038101952067e-07, "loss": 0.2841, "step": 11316 }, { "epoch": 0.8965735789265201, "grad_norm": 1.1070878368035737, "learning_rate": 5.554601891172817e-07, "loss": 0.1612, "step": 11317 }, { "epoch": 0.8966528025351554, "grad_norm": 1.3406334628096848, "learning_rate": 5.546171899168595e-07, "loss": 0.2282, "step": 11318 }, { "epoch": 0.8967320261437909, "grad_norm": 1.6442479065037026, "learning_rate": 5.537748126494446e-07, "loss": 0.2977, "step": 11319 }, { "epoch": 0.8968112497524262, "grad_norm": 1.072812409772159, "learning_rate": 5.529330573705083e-07, "loss": 0.1704, "step": 11320 }, { "epoch": 0.8968904733610616, "grad_norm": 1.3470759345771661, "learning_rate": 5.520919241354728e-07, "loss": 0.2299, "step": 11321 }, { "epoch": 0.896969696969697, "grad_norm": 1.3190738239635866, "learning_rate": 5.512514129997227e-07, "loss": 0.2375, "step": 11322 }, { "epoch": 0.8970489205783323, "grad_norm": 1.275169964574663, "learning_rate": 5.504115240186048e-07, "loss": 0.1591, "step": 11323 }, { "epoch": 0.8971281441869677, "grad_norm": 1.3796100413778525, "learning_rate": 5.495722572474183e-07, "loss": 0.2421, "step": 11324 }, { "epoch": 0.8972073677956031, "grad_norm": 1.3259854659581767, "learning_rate": 5.487336127414267e-07, "loss": 0.2102, "step": 11325 }, { "epoch": 0.8972865914042385, "grad_norm": 1.3242590469525624, "learning_rate": 5.478955905558491e-07, "loss": 0.2455, "step": 11326 }, { "epoch": 0.8973658150128738, "grad_norm": 1.106817363897461, "learning_rate": 5.470581907458672e-07, "loss": 0.156, "step": 11327 }, { "epoch": 0.8974450386215093, "grad_norm": 1.2317653774955672, "learning_rate": 5.462214133666189e-07, "loss": 0.1671, "step": 11328 }, { "epoch": 0.8975242622301446, "grad_norm": 1.4126933902094934, "learning_rate": 5.453852584732e-07, "loss": 0.2161, "step": 11329 }, { "epoch": 0.8976034858387799, "grad_norm": 1.1733881551598588, "learning_rate": 5.4454972612067e-07, "loss": 0.1817, "step": 11330 }, { "epoch": 0.8976827094474154, "grad_norm": 1.3312278010518537, "learning_rate": 5.437148163640449e-07, "loss": 0.2291, "step": 11331 }, { "epoch": 0.8977619330560507, "grad_norm": 1.7906696721780908, "learning_rate": 5.428805292582973e-07, "loss": 0.2741, "step": 11332 }, { "epoch": 0.8978411566646861, "grad_norm": 1.1303085992557462, "learning_rate": 5.420468648583621e-07, "loss": 0.1477, "step": 11333 }, { "epoch": 0.8979203802733214, "grad_norm": 1.2503010189911672, "learning_rate": 5.412138232191333e-07, "loss": 0.1767, "step": 11334 }, { "epoch": 0.8979996038819569, "grad_norm": 1.5633845706763296, "learning_rate": 5.403814043954592e-07, "loss": 0.2378, "step": 11335 }, { "epoch": 0.8980788274905922, "grad_norm": 1.1167936422622222, "learning_rate": 5.39549608442157e-07, "loss": 0.1331, "step": 11336 }, { "epoch": 0.8981580510992275, "grad_norm": 1.1267852080403595, "learning_rate": 5.387184354139896e-07, "loss": 0.136, "step": 11337 }, { "epoch": 0.898237274707863, "grad_norm": 1.1560933617445144, "learning_rate": 5.378878853656877e-07, "loss": 0.1894, "step": 11338 }, { "epoch": 0.8983164983164983, "grad_norm": 1.526294067325196, "learning_rate": 5.370579583519409e-07, "loss": 0.2311, "step": 11339 }, { "epoch": 0.8983957219251337, "grad_norm": 1.3672258240134598, "learning_rate": 5.362286544273942e-07, "loss": 0.1984, "step": 11340 }, { "epoch": 0.898474945533769, "grad_norm": 1.203141230594665, "learning_rate": 5.353999736466531e-07, "loss": 0.1692, "step": 11341 }, { "epoch": 0.8985541691424045, "grad_norm": 0.9958655829035286, "learning_rate": 5.345719160642848e-07, "loss": 0.175, "step": 11342 }, { "epoch": 0.8986333927510398, "grad_norm": 1.073971393542406, "learning_rate": 5.337444817348103e-07, "loss": 0.128, "step": 11343 }, { "epoch": 0.8987126163596751, "grad_norm": 1.2378900609734436, "learning_rate": 5.329176707127115e-07, "loss": 0.2107, "step": 11344 }, { "epoch": 0.8987918399683106, "grad_norm": 1.222133507604698, "learning_rate": 5.320914830524337e-07, "loss": 0.1871, "step": 11345 }, { "epoch": 0.8988710635769459, "grad_norm": 1.098244225356646, "learning_rate": 5.312659188083746e-07, "loss": 0.1445, "step": 11346 }, { "epoch": 0.8989502871855813, "grad_norm": 1.3675979063567736, "learning_rate": 5.304409780348919e-07, "loss": 0.2535, "step": 11347 }, { "epoch": 0.8990295107942167, "grad_norm": 1.2724222370788936, "learning_rate": 5.296166607863085e-07, "loss": 0.2551, "step": 11348 }, { "epoch": 0.899108734402852, "grad_norm": 1.203831052219527, "learning_rate": 5.287929671168989e-07, "loss": 0.1477, "step": 11349 }, { "epoch": 0.8991879580114874, "grad_norm": 1.317295899865152, "learning_rate": 5.279698970809011e-07, "loss": 0.2385, "step": 11350 }, { "epoch": 0.8992671816201228, "grad_norm": 1.326966316900384, "learning_rate": 5.271474507325058e-07, "loss": 0.21, "step": 11351 }, { "epoch": 0.8993464052287582, "grad_norm": 1.2145670078554756, "learning_rate": 5.263256281258733e-07, "loss": 0.1637, "step": 11352 }, { "epoch": 0.8994256288373935, "grad_norm": 1.4211902363494058, "learning_rate": 5.255044293151135e-07, "loss": 0.2873, "step": 11353 }, { "epoch": 0.899504852446029, "grad_norm": 1.3854266328194298, "learning_rate": 5.246838543542964e-07, "loss": 0.2233, "step": 11354 }, { "epoch": 0.8995840760546643, "grad_norm": 1.4085883118439881, "learning_rate": 5.23863903297458e-07, "loss": 0.1979, "step": 11355 }, { "epoch": 0.8996632996632996, "grad_norm": 1.4769738598097448, "learning_rate": 5.230445761985836e-07, "loss": 0.2596, "step": 11356 }, { "epoch": 0.899742523271935, "grad_norm": 1.6099820062460852, "learning_rate": 5.222258731116237e-07, "loss": 0.2623, "step": 11357 }, { "epoch": 0.8998217468805704, "grad_norm": 1.3185848590239624, "learning_rate": 5.214077940904872e-07, "loss": 0.2258, "step": 11358 }, { "epoch": 0.8999009704892058, "grad_norm": 1.6162709943845397, "learning_rate": 5.205903391890387e-07, "loss": 0.2308, "step": 11359 }, { "epoch": 0.8999801940978411, "grad_norm": 1.070711772449832, "learning_rate": 5.197735084611033e-07, "loss": 0.1879, "step": 11360 }, { "epoch": 0.9000594177064766, "grad_norm": 1.219055837136359, "learning_rate": 5.189573019604676e-07, "loss": 0.1758, "step": 11361 }, { "epoch": 0.9001386413151119, "grad_norm": 1.537556658752876, "learning_rate": 5.181417197408733e-07, "loss": 0.2714, "step": 11362 }, { "epoch": 0.9002178649237472, "grad_norm": 1.2285730873010796, "learning_rate": 5.173267618560229e-07, "loss": 0.2427, "step": 11363 }, { "epoch": 0.9002970885323827, "grad_norm": 0.9861842337554035, "learning_rate": 5.165124283595779e-07, "loss": 0.1185, "step": 11364 }, { "epoch": 0.900376312141018, "grad_norm": 1.5344212455510402, "learning_rate": 5.156987193051577e-07, "loss": 0.2746, "step": 11365 }, { "epoch": 0.9004555357496534, "grad_norm": 1.408660037451225, "learning_rate": 5.148856347463416e-07, "loss": 0.1962, "step": 11366 }, { "epoch": 0.9005347593582887, "grad_norm": 1.4697398999513414, "learning_rate": 5.140731747366656e-07, "loss": 0.2069, "step": 11367 }, { "epoch": 0.9006139829669242, "grad_norm": 1.2461685686705504, "learning_rate": 5.132613393296293e-07, "loss": 0.1997, "step": 11368 }, { "epoch": 0.9006932065755595, "grad_norm": 1.3460139831610238, "learning_rate": 5.124501285786865e-07, "loss": 0.1745, "step": 11369 }, { "epoch": 0.9007724301841948, "grad_norm": 1.5533842665404258, "learning_rate": 5.1163954253725e-07, "loss": 0.2504, "step": 11370 }, { "epoch": 0.9008516537928303, "grad_norm": 1.2402129098705812, "learning_rate": 5.108295812586961e-07, "loss": 0.2011, "step": 11371 }, { "epoch": 0.9009308774014656, "grad_norm": 1.620876653689994, "learning_rate": 5.100202447963553e-07, "loss": 0.248, "step": 11372 }, { "epoch": 0.901010101010101, "grad_norm": 1.5318757161205356, "learning_rate": 5.092115332035163e-07, "loss": 0.2377, "step": 11373 }, { "epoch": 0.9010893246187364, "grad_norm": 1.387867466782114, "learning_rate": 5.084034465334342e-07, "loss": 0.1932, "step": 11374 }, { "epoch": 0.9011685482273718, "grad_norm": 1.3972242720614534, "learning_rate": 5.07595984839313e-07, "loss": 0.2267, "step": 11375 }, { "epoch": 0.9012477718360071, "grad_norm": 1.2386671868902543, "learning_rate": 5.067891481743203e-07, "loss": 0.1836, "step": 11376 }, { "epoch": 0.9013269954446425, "grad_norm": 1.6107819433048094, "learning_rate": 5.059829365915859e-07, "loss": 0.3054, "step": 11377 }, { "epoch": 0.9014062190532779, "grad_norm": 1.5661555145482802, "learning_rate": 5.051773501441926e-07, "loss": 0.2306, "step": 11378 }, { "epoch": 0.9014854426619132, "grad_norm": 1.3683896645436369, "learning_rate": 5.043723888851837e-07, "loss": 0.2496, "step": 11379 }, { "epoch": 0.9015646662705487, "grad_norm": 1.587397155778934, "learning_rate": 5.035680528675635e-07, "loss": 0.2659, "step": 11380 }, { "epoch": 0.901643889879184, "grad_norm": 1.6449214395639375, "learning_rate": 5.027643421442929e-07, "loss": 0.2473, "step": 11381 }, { "epoch": 0.9017231134878194, "grad_norm": 1.5590032711446369, "learning_rate": 5.01961256768293e-07, "loss": 0.1907, "step": 11382 }, { "epoch": 0.9018023370964547, "grad_norm": 1.2802405210780758, "learning_rate": 5.011587967924414e-07, "loss": 0.1234, "step": 11383 }, { "epoch": 0.9018815607050901, "grad_norm": 1.098312871652314, "learning_rate": 5.003569622695792e-07, "loss": 0.1402, "step": 11384 }, { "epoch": 0.9019607843137255, "grad_norm": 1.2568472561837125, "learning_rate": 4.99555753252502e-07, "loss": 0.2283, "step": 11385 }, { "epoch": 0.9020400079223608, "grad_norm": 1.33091756087352, "learning_rate": 4.987551697939629e-07, "loss": 0.2554, "step": 11386 }, { "epoch": 0.9021192315309963, "grad_norm": 1.0895109755664834, "learning_rate": 4.979552119466802e-07, "loss": 0.1598, "step": 11387 }, { "epoch": 0.9021984551396316, "grad_norm": 1.3668570458311966, "learning_rate": 4.971558797633258e-07, "loss": 0.2479, "step": 11388 }, { "epoch": 0.902277678748267, "grad_norm": 1.2760477458937718, "learning_rate": 4.963571732965311e-07, "loss": 0.197, "step": 11389 }, { "epoch": 0.9023569023569024, "grad_norm": 1.6020991949566248, "learning_rate": 4.955590925988896e-07, "loss": 0.3074, "step": 11390 }, { "epoch": 0.9024361259655377, "grad_norm": 1.7250715360888407, "learning_rate": 4.947616377229492e-07, "loss": 0.2336, "step": 11391 }, { "epoch": 0.9025153495741731, "grad_norm": 1.488642712588369, "learning_rate": 4.939648087212168e-07, "loss": 0.2536, "step": 11392 }, { "epoch": 0.9025945731828084, "grad_norm": 1.5629289950185032, "learning_rate": 4.931686056461626e-07, "loss": 0.2336, "step": 11393 }, { "epoch": 0.9026737967914439, "grad_norm": 1.583776097965649, "learning_rate": 4.923730285502126e-07, "loss": 0.2509, "step": 11394 }, { "epoch": 0.9027530204000792, "grad_norm": 1.5154665756283534, "learning_rate": 4.915780774857504e-07, "loss": 0.2884, "step": 11395 }, { "epoch": 0.9028322440087146, "grad_norm": 1.2517962124246766, "learning_rate": 4.907837525051196e-07, "loss": 0.1816, "step": 11396 }, { "epoch": 0.90291146761735, "grad_norm": 1.5462027095360786, "learning_rate": 4.89990053660624e-07, "loss": 0.3189, "step": 11397 }, { "epoch": 0.9029906912259853, "grad_norm": 1.4039947543600597, "learning_rate": 4.891969810045239e-07, "loss": 0.2237, "step": 11398 }, { "epoch": 0.9030699148346207, "grad_norm": 1.3042630988143886, "learning_rate": 4.884045345890387e-07, "loss": 0.2005, "step": 11399 }, { "epoch": 0.9031491384432561, "grad_norm": 1.479450632462305, "learning_rate": 4.87612714466349e-07, "loss": 0.3089, "step": 11400 }, { "epoch": 0.9032283620518915, "grad_norm": 1.5594688532818757, "learning_rate": 4.868215206885918e-07, "loss": 0.2101, "step": 11401 }, { "epoch": 0.9033075856605268, "grad_norm": 1.0299462844120528, "learning_rate": 4.860309533078611e-07, "loss": 0.1274, "step": 11402 }, { "epoch": 0.9033868092691623, "grad_norm": 1.9166456177510156, "learning_rate": 4.852410123762164e-07, "loss": 0.3007, "step": 11403 }, { "epoch": 0.9034660328777976, "grad_norm": 1.338086690352521, "learning_rate": 4.844516979456671e-07, "loss": 0.2041, "step": 11404 }, { "epoch": 0.9035452564864329, "grad_norm": 1.2322799528405464, "learning_rate": 4.836630100681872e-07, "loss": 0.1688, "step": 11405 }, { "epoch": 0.9036244800950683, "grad_norm": 1.6135528697994006, "learning_rate": 4.828749487957097e-07, "loss": 0.2882, "step": 11406 }, { "epoch": 0.9037037037037037, "grad_norm": 1.1655779112500846, "learning_rate": 4.82087514180124e-07, "loss": 0.1856, "step": 11407 }, { "epoch": 0.9037829273123391, "grad_norm": 1.4718796388116533, "learning_rate": 4.813007062732756e-07, "loss": 0.2251, "step": 11408 }, { "epoch": 0.9038621509209744, "grad_norm": 1.0698800138197295, "learning_rate": 4.805145251269772e-07, "loss": 0.1676, "step": 11409 }, { "epoch": 0.9039413745296099, "grad_norm": 1.1870476041713254, "learning_rate": 4.797289707929919e-07, "loss": 0.168, "step": 11410 }, { "epoch": 0.9040205981382452, "grad_norm": 1.269900936575648, "learning_rate": 4.789440433230452e-07, "loss": 0.2004, "step": 11411 }, { "epoch": 0.9040998217468805, "grad_norm": 1.315033862582558, "learning_rate": 4.781597427688189e-07, "loss": 0.1933, "step": 11412 }, { "epoch": 0.904179045355516, "grad_norm": 1.3314371072151376, "learning_rate": 4.773760691819596e-07, "loss": 0.2066, "step": 11413 }, { "epoch": 0.9042582689641513, "grad_norm": 1.6267462378265747, "learning_rate": 4.765930226140658e-07, "loss": 0.2276, "step": 11414 }, { "epoch": 0.9043374925727867, "grad_norm": 1.5170199275185874, "learning_rate": 4.7581060311669757e-07, "loss": 0.1854, "step": 11415 }, { "epoch": 0.904416716181422, "grad_norm": 1.2337378966475265, "learning_rate": 4.7502881074137476e-07, "loss": 0.2203, "step": 11416 }, { "epoch": 0.9044959397900575, "grad_norm": 1.279520330318968, "learning_rate": 4.742476455395706e-07, "loss": 0.2192, "step": 11417 }, { "epoch": 0.9045751633986928, "grad_norm": 1.2879903944159783, "learning_rate": 4.734671075627262e-07, "loss": 0.1888, "step": 11418 }, { "epoch": 0.9046543870073281, "grad_norm": 1.349526012663776, "learning_rate": 4.726871968622337e-07, "loss": 0.2327, "step": 11419 }, { "epoch": 0.9047336106159636, "grad_norm": 1.3792906672124408, "learning_rate": 4.7190791348944777e-07, "loss": 0.2125, "step": 11420 }, { "epoch": 0.9048128342245989, "grad_norm": 0.9746645782948918, "learning_rate": 4.711292574956772e-07, "loss": 0.1673, "step": 11421 }, { "epoch": 0.9048920578332343, "grad_norm": 1.625455595410993, "learning_rate": 4.7035122893219653e-07, "loss": 0.2276, "step": 11422 }, { "epoch": 0.9049712814418697, "grad_norm": 1.0555706634774598, "learning_rate": 4.695738278502338e-07, "loss": 0.151, "step": 11423 }, { "epoch": 0.9050505050505051, "grad_norm": 1.5723002557247647, "learning_rate": 4.6879705430097566e-07, "loss": 0.2218, "step": 11424 }, { "epoch": 0.9051297286591404, "grad_norm": 1.3062239306314902, "learning_rate": 4.6802090833557136e-07, "loss": 0.2295, "step": 11425 }, { "epoch": 0.9052089522677758, "grad_norm": 0.9010926330492873, "learning_rate": 4.6724539000512546e-07, "loss": 0.1112, "step": 11426 }, { "epoch": 0.9052881758764112, "grad_norm": 1.2737771223156693, "learning_rate": 4.6647049936070054e-07, "loss": 0.183, "step": 11427 }, { "epoch": 0.9053673994850465, "grad_norm": 1.142861898741502, "learning_rate": 4.656962364533224e-07, "loss": 0.1404, "step": 11428 }, { "epoch": 0.905446623093682, "grad_norm": 1.1701301630882575, "learning_rate": 4.649226013339703e-07, "loss": 0.2177, "step": 11429 }, { "epoch": 0.9055258467023173, "grad_norm": 1.1783358495937835, "learning_rate": 4.641495940535845e-07, "loss": 0.1904, "step": 11430 }, { "epoch": 0.9056050703109526, "grad_norm": 1.3653305134647453, "learning_rate": 4.633772146630655e-07, "loss": 0.2385, "step": 11431 }, { "epoch": 0.905684293919588, "grad_norm": 1.5685578557173798, "learning_rate": 4.626054632132693e-07, "loss": 0.2705, "step": 11432 }, { "epoch": 0.9057635175282234, "grad_norm": 1.5574355920238032, "learning_rate": 4.6183433975501067e-07, "loss": 0.1969, "step": 11433 }, { "epoch": 0.9058427411368588, "grad_norm": 1.2512610793744947, "learning_rate": 4.61063844339068e-07, "loss": 0.1829, "step": 11434 }, { "epoch": 0.9059219647454941, "grad_norm": 1.5736830496131415, "learning_rate": 4.6029397701617296e-07, "loss": 0.2147, "step": 11435 }, { "epoch": 0.9060011883541296, "grad_norm": 1.8558708006423443, "learning_rate": 4.595247378370171e-07, "loss": 0.3252, "step": 11436 }, { "epoch": 0.9060804119627649, "grad_norm": 1.1974188610541265, "learning_rate": 4.5875612685225e-07, "loss": 0.1582, "step": 11437 }, { "epoch": 0.9061596355714002, "grad_norm": 1.329125266802523, "learning_rate": 4.5798814411248336e-07, "loss": 0.1838, "step": 11438 }, { "epoch": 0.9062388591800357, "grad_norm": 1.3581890926221736, "learning_rate": 4.5722078966828455e-07, "loss": 0.2199, "step": 11439 }, { "epoch": 0.906318082788671, "grad_norm": 1.5281357014004326, "learning_rate": 4.5645406357017865e-07, "loss": 0.2612, "step": 11440 }, { "epoch": 0.9063973063973064, "grad_norm": 1.3358689675729154, "learning_rate": 4.5568796586865304e-07, "loss": 0.1903, "step": 11441 }, { "epoch": 0.9064765300059417, "grad_norm": 1.6621730605961096, "learning_rate": 4.5492249661415077e-07, "loss": 0.2606, "step": 11442 }, { "epoch": 0.9065557536145772, "grad_norm": 1.4293273923108951, "learning_rate": 4.541576558570726e-07, "loss": 0.2354, "step": 11443 }, { "epoch": 0.9066349772232125, "grad_norm": 1.2242375786311315, "learning_rate": 4.533934436477827e-07, "loss": 0.1627, "step": 11444 }, { "epoch": 0.9067142008318478, "grad_norm": 1.3474590617236502, "learning_rate": 4.526298600365997e-07, "loss": 0.2429, "step": 11445 }, { "epoch": 0.9067934244404833, "grad_norm": 1.2770921670065432, "learning_rate": 4.5186690507379894e-07, "loss": 0.1935, "step": 11446 }, { "epoch": 0.9068726480491186, "grad_norm": 1.1095923965120869, "learning_rate": 4.5110457880962246e-07, "loss": 0.1302, "step": 11447 }, { "epoch": 0.906951871657754, "grad_norm": 1.317481680057043, "learning_rate": 4.503428812942623e-07, "loss": 0.2187, "step": 11448 }, { "epoch": 0.9070310952663894, "grad_norm": 1.8330943823497057, "learning_rate": 4.495818125778717e-07, "loss": 0.2661, "step": 11449 }, { "epoch": 0.9071103188750248, "grad_norm": 1.2650905901969383, "learning_rate": 4.488213727105672e-07, "loss": 0.147, "step": 11450 }, { "epoch": 0.9071895424836601, "grad_norm": 1.5642395395973636, "learning_rate": 4.4806156174241776e-07, "loss": 0.2679, "step": 11451 }, { "epoch": 0.9072687660922955, "grad_norm": 1.1922316840883551, "learning_rate": 4.4730237972345326e-07, "loss": 0.1472, "step": 11452 }, { "epoch": 0.9073479897009309, "grad_norm": 1.172952372995457, "learning_rate": 4.465438267036604e-07, "loss": 0.2279, "step": 11453 }, { "epoch": 0.9074272133095662, "grad_norm": 1.264911390030226, "learning_rate": 4.4578590273299027e-07, "loss": 0.1922, "step": 11454 }, { "epoch": 0.9075064369182017, "grad_norm": 1.5933968405366234, "learning_rate": 4.4502860786134747e-07, "loss": 0.195, "step": 11455 }, { "epoch": 0.907585660526837, "grad_norm": 1.5812907823759608, "learning_rate": 4.4427194213859216e-07, "loss": 0.2335, "step": 11456 }, { "epoch": 0.9076648841354724, "grad_norm": 1.4334218364671192, "learning_rate": 4.435159056145533e-07, "loss": 0.1957, "step": 11457 }, { "epoch": 0.9077441077441077, "grad_norm": 1.2027290573049838, "learning_rate": 4.427604983390077e-07, "loss": 0.2466, "step": 11458 }, { "epoch": 0.9078233313527431, "grad_norm": 1.6145628015650884, "learning_rate": 4.420057203616956e-07, "loss": 0.2571, "step": 11459 }, { "epoch": 0.9079025549613785, "grad_norm": 0.9934166053794611, "learning_rate": 4.4125157173231847e-07, "loss": 0.1174, "step": 11460 }, { "epoch": 0.9079817785700138, "grad_norm": 1.4222208978210948, "learning_rate": 4.40498052500532e-07, "loss": 0.2629, "step": 11461 }, { "epoch": 0.9080610021786493, "grad_norm": 1.4192110266480258, "learning_rate": 4.397451627159499e-07, "loss": 0.186, "step": 11462 }, { "epoch": 0.9081402257872846, "grad_norm": 1.746785174805083, "learning_rate": 4.389929024281492e-07, "loss": 0.2759, "step": 11463 }, { "epoch": 0.90821944939592, "grad_norm": 1.3220487872127502, "learning_rate": 4.382412716866602e-07, "loss": 0.207, "step": 11464 }, { "epoch": 0.9082986730045554, "grad_norm": 1.2338070256531364, "learning_rate": 4.374902705409745e-07, "loss": 0.2041, "step": 11465 }, { "epoch": 0.9083778966131907, "grad_norm": 1.0736160557902221, "learning_rate": 4.367398990405447e-07, "loss": 0.157, "step": 11466 }, { "epoch": 0.9084571202218261, "grad_norm": 1.2817395609640396, "learning_rate": 4.359901572347758e-07, "loss": 0.2443, "step": 11467 }, { "epoch": 0.9085363438304614, "grad_norm": 1.1816610246249397, "learning_rate": 4.3524104517303714e-07, "loss": 0.1717, "step": 11468 }, { "epoch": 0.9086155674390969, "grad_norm": 1.7560676930716415, "learning_rate": 4.3449256290465035e-07, "loss": 0.2375, "step": 11469 }, { "epoch": 0.9086947910477322, "grad_norm": 1.2582724501020333, "learning_rate": 4.3374471047890497e-07, "loss": 0.2147, "step": 11470 }, { "epoch": 0.9087740146563676, "grad_norm": 1.418905410575443, "learning_rate": 4.329974879450394e-07, "loss": 0.2459, "step": 11471 }, { "epoch": 0.908853238265003, "grad_norm": 1.342316449061536, "learning_rate": 4.3225089535225415e-07, "loss": 0.2317, "step": 11472 }, { "epoch": 0.9089324618736383, "grad_norm": 1.1916797329317954, "learning_rate": 4.3150493274971227e-07, "loss": 0.2177, "step": 11473 }, { "epoch": 0.9090116854822737, "grad_norm": 1.0863981432213492, "learning_rate": 4.3075960018652995e-07, "loss": 0.1758, "step": 11474 }, { "epoch": 0.9090909090909091, "grad_norm": 1.3191141858103075, "learning_rate": 4.300148977117824e-07, "loss": 0.2255, "step": 11475 }, { "epoch": 0.9091701326995445, "grad_norm": 1.2096794936794457, "learning_rate": 4.2927082537450705e-07, "loss": 0.1697, "step": 11476 }, { "epoch": 0.9092493563081798, "grad_norm": 1.2901020677158936, "learning_rate": 4.285273832236969e-07, "loss": 0.1951, "step": 11477 }, { "epoch": 0.9093285799168153, "grad_norm": 1.5552055235717805, "learning_rate": 4.277845713083018e-07, "loss": 0.2683, "step": 11478 }, { "epoch": 0.9094078035254506, "grad_norm": 1.485532598975266, "learning_rate": 4.2704238967723574e-07, "loss": 0.2339, "step": 11479 }, { "epoch": 0.9094870271340859, "grad_norm": 1.2712454608590502, "learning_rate": 4.2630083837936654e-07, "loss": 0.1791, "step": 11480 }, { "epoch": 0.9095662507427213, "grad_norm": 1.352239512518404, "learning_rate": 4.2555991746352054e-07, "loss": 0.2581, "step": 11481 }, { "epoch": 0.9096454743513567, "grad_norm": 1.6863382835731167, "learning_rate": 4.2481962697848323e-07, "loss": 0.2527, "step": 11482 }, { "epoch": 0.9097246979599921, "grad_norm": 1.5150735796303765, "learning_rate": 4.240799669730034e-07, "loss": 0.2929, "step": 11483 }, { "epoch": 0.9098039215686274, "grad_norm": 1.4252381334508766, "learning_rate": 4.2334093749577975e-07, "loss": 0.2147, "step": 11484 }, { "epoch": 0.9098831451772629, "grad_norm": 1.2594869124074355, "learning_rate": 4.226025385954746e-07, "loss": 0.1932, "step": 11485 }, { "epoch": 0.9099623687858982, "grad_norm": 1.092966590490661, "learning_rate": 4.218647703207113e-07, "loss": 0.1283, "step": 11486 }, { "epoch": 0.9100415923945335, "grad_norm": 1.3090466555396847, "learning_rate": 4.211276327200642e-07, "loss": 0.1911, "step": 11487 }, { "epoch": 0.910120816003169, "grad_norm": 1.2063203457894478, "learning_rate": 4.203911258420712e-07, "loss": 0.2237, "step": 11488 }, { "epoch": 0.9102000396118043, "grad_norm": 1.3012628403675948, "learning_rate": 4.196552497352302e-07, "loss": 0.1464, "step": 11489 }, { "epoch": 0.9102792632204397, "grad_norm": 1.1912771895504253, "learning_rate": 4.189200044479924e-07, "loss": 0.2109, "step": 11490 }, { "epoch": 0.910358486829075, "grad_norm": 1.5954214973115788, "learning_rate": 4.1818539002877024e-07, "loss": 0.299, "step": 11491 }, { "epoch": 0.9104377104377105, "grad_norm": 1.271666131692804, "learning_rate": 4.174514065259383e-07, "loss": 0.1936, "step": 11492 }, { "epoch": 0.9105169340463458, "grad_norm": 1.2435706009038738, "learning_rate": 4.167180539878213e-07, "loss": 0.1538, "step": 11493 }, { "epoch": 0.9105961576549811, "grad_norm": 1.5786516255402805, "learning_rate": 4.1598533246270833e-07, "loss": 0.2379, "step": 11494 }, { "epoch": 0.9106753812636166, "grad_norm": 1.6056421538436658, "learning_rate": 4.152532419988453e-07, "loss": 0.3461, "step": 11495 }, { "epoch": 0.9107546048722519, "grad_norm": 1.3375083548694011, "learning_rate": 4.145217826444392e-07, "loss": 0.2484, "step": 11496 }, { "epoch": 0.9108338284808873, "grad_norm": 1.42747690083719, "learning_rate": 4.1379095444764926e-07, "loss": 0.2001, "step": 11497 }, { "epoch": 0.9109130520895227, "grad_norm": 1.29900205376338, "learning_rate": 4.130607574566003e-07, "loss": 0.2205, "step": 11498 }, { "epoch": 0.9109922756981581, "grad_norm": 1.337424357474979, "learning_rate": 4.1233119171937065e-07, "loss": 0.2248, "step": 11499 }, { "epoch": 0.9110714993067934, "grad_norm": 1.2766738544700231, "learning_rate": 4.116022572839984e-07, "loss": 0.1813, "step": 11500 }, { "epoch": 0.9111507229154288, "grad_norm": 1.438992602713393, "learning_rate": 4.1087395419848186e-07, "loss": 0.2116, "step": 11501 }, { "epoch": 0.9112299465240642, "grad_norm": 1.1742965260043927, "learning_rate": 4.10146282510776e-07, "loss": 0.19, "step": 11502 }, { "epoch": 0.9113091701326995, "grad_norm": 1.1030349158739612, "learning_rate": 4.094192422687926e-07, "loss": 0.1208, "step": 11503 }, { "epoch": 0.911388393741335, "grad_norm": 1.326589982163388, "learning_rate": 4.0869283352040656e-07, "loss": 0.1956, "step": 11504 }, { "epoch": 0.9114676173499703, "grad_norm": 1.2188409136395084, "learning_rate": 4.079670563134475e-07, "loss": 0.2337, "step": 11505 }, { "epoch": 0.9115468409586057, "grad_norm": 1.3075058874056278, "learning_rate": 4.072419106957026e-07, "loss": 0.1806, "step": 11506 }, { "epoch": 0.911626064567241, "grad_norm": 1.3822003492298027, "learning_rate": 4.065173967149205e-07, "loss": 0.2074, "step": 11507 }, { "epoch": 0.9117052881758764, "grad_norm": 0.9843325340828633, "learning_rate": 4.057935144188074e-07, "loss": 0.1264, "step": 11508 }, { "epoch": 0.9117845117845118, "grad_norm": 1.3264894111292864, "learning_rate": 4.0507026385502747e-07, "loss": 0.184, "step": 11509 }, { "epoch": 0.9118637353931471, "grad_norm": 1.0693847780856147, "learning_rate": 4.043476450712014e-07, "loss": 0.1918, "step": 11510 }, { "epoch": 0.9119429590017826, "grad_norm": 1.4229257269074096, "learning_rate": 4.036256581149123e-07, "loss": 0.2436, "step": 11511 }, { "epoch": 0.9120221826104179, "grad_norm": 1.488066558518975, "learning_rate": 4.0290430303369876e-07, "loss": 0.2477, "step": 11512 }, { "epoch": 0.9121014062190532, "grad_norm": 1.5473374802435573, "learning_rate": 4.021835798750584e-07, "loss": 0.2782, "step": 11513 }, { "epoch": 0.9121806298276887, "grad_norm": 1.3482564116305311, "learning_rate": 4.0146348868644767e-07, "loss": 0.2488, "step": 11514 }, { "epoch": 0.912259853436324, "grad_norm": 1.235627441540292, "learning_rate": 4.0074402951528204e-07, "loss": 0.1719, "step": 11515 }, { "epoch": 0.9123390770449594, "grad_norm": 1.3888607368526977, "learning_rate": 4.000252024089313e-07, "loss": 0.1853, "step": 11516 }, { "epoch": 0.9124183006535947, "grad_norm": 1.4064493236762259, "learning_rate": 3.9930700741473093e-07, "loss": 0.3289, "step": 11517 }, { "epoch": 0.9124975242622302, "grad_norm": 1.3067992716682535, "learning_rate": 3.985894445799676e-07, "loss": 0.2175, "step": 11518 }, { "epoch": 0.9125767478708655, "grad_norm": 1.265268119282097, "learning_rate": 3.978725139518891e-07, "loss": 0.1967, "step": 11519 }, { "epoch": 0.9126559714795008, "grad_norm": 1.5062649490315358, "learning_rate": 3.9715621557770535e-07, "loss": 0.2556, "step": 11520 }, { "epoch": 0.9127351950881363, "grad_norm": 1.4729512504434277, "learning_rate": 3.9644054950457753e-07, "loss": 0.2359, "step": 11521 }, { "epoch": 0.9128144186967716, "grad_norm": 1.5356793515122895, "learning_rate": 3.9572551577963135e-07, "loss": 0.2523, "step": 11522 }, { "epoch": 0.912893642305407, "grad_norm": 1.2987969898787666, "learning_rate": 3.9501111444994576e-07, "loss": 0.2169, "step": 11523 }, { "epoch": 0.9129728659140424, "grad_norm": 1.658436110892631, "learning_rate": 3.9429734556256205e-07, "loss": 0.3422, "step": 11524 }, { "epoch": 0.9130520895226778, "grad_norm": 1.5970255325394085, "learning_rate": 3.9358420916447927e-07, "loss": 0.2978, "step": 11525 }, { "epoch": 0.9131313131313131, "grad_norm": 1.0917172516092473, "learning_rate": 3.9287170530265206e-07, "loss": 0.1808, "step": 11526 }, { "epoch": 0.9132105367399485, "grad_norm": 1.0599886427716367, "learning_rate": 3.9215983402399736e-07, "loss": 0.1689, "step": 11527 }, { "epoch": 0.9132897603485839, "grad_norm": 0.9021125677081997, "learning_rate": 3.914485953753888e-07, "loss": 0.1323, "step": 11528 }, { "epoch": 0.9133689839572192, "grad_norm": 1.2952944025341806, "learning_rate": 3.907379894036545e-07, "loss": 0.1833, "step": 11529 }, { "epoch": 0.9134482075658547, "grad_norm": 1.831734041119413, "learning_rate": 3.9002801615558805e-07, "loss": 0.3419, "step": 11530 }, { "epoch": 0.91352743117449, "grad_norm": 1.5053007261768667, "learning_rate": 3.893186756779366e-07, "loss": 0.2432, "step": 11531 }, { "epoch": 0.9136066547831254, "grad_norm": 1.2037043793227422, "learning_rate": 3.886099680174049e-07, "loss": 0.2157, "step": 11532 }, { "epoch": 0.9136858783917607, "grad_norm": 1.1053531206404543, "learning_rate": 3.879018932206624e-07, "loss": 0.2351, "step": 11533 }, { "epoch": 0.9137651020003961, "grad_norm": 1.5164012115154661, "learning_rate": 3.871944513343284e-07, "loss": 0.2526, "step": 11534 }, { "epoch": 0.9138443256090315, "grad_norm": 1.2831309728208964, "learning_rate": 3.864876424049857e-07, "loss": 0.2123, "step": 11535 }, { "epoch": 0.9139235492176668, "grad_norm": 1.0502753441279835, "learning_rate": 3.857814664791748e-07, "loss": 0.1217, "step": 11536 }, { "epoch": 0.9140027728263023, "grad_norm": 1.3249857518200205, "learning_rate": 3.8507592360339407e-07, "loss": 0.2294, "step": 11537 }, { "epoch": 0.9140819964349376, "grad_norm": 1.3539375859531648, "learning_rate": 3.843710138240997e-07, "loss": 0.196, "step": 11538 }, { "epoch": 0.914161220043573, "grad_norm": 1.2569589096592837, "learning_rate": 3.8366673718770564e-07, "loss": 0.1603, "step": 11539 }, { "epoch": 0.9142404436522084, "grad_norm": 1.2484832368530978, "learning_rate": 3.8296309374058704e-07, "loss": 0.2062, "step": 11540 }, { "epoch": 0.9143196672608437, "grad_norm": 1.3439230322929494, "learning_rate": 3.8226008352907464e-07, "loss": 0.1846, "step": 11541 }, { "epoch": 0.9143988908694791, "grad_norm": 1.1118837846291858, "learning_rate": 3.815577065994569e-07, "loss": 0.1274, "step": 11542 }, { "epoch": 0.9144781144781144, "grad_norm": 1.8464745876317965, "learning_rate": 3.8085596299798465e-07, "loss": 0.3058, "step": 11543 }, { "epoch": 0.9145573380867499, "grad_norm": 1.4617184678386403, "learning_rate": 3.801548527708621e-07, "loss": 0.2403, "step": 11544 }, { "epoch": 0.9146365616953852, "grad_norm": 1.4169994515179263, "learning_rate": 3.794543759642544e-07, "loss": 0.2094, "step": 11545 }, { "epoch": 0.9147157853040206, "grad_norm": 1.1984561231244761, "learning_rate": 3.7875453262428584e-07, "loss": 0.2073, "step": 11546 }, { "epoch": 0.914795008912656, "grad_norm": 1.251204102318907, "learning_rate": 3.7805532279703625e-07, "loss": 0.164, "step": 11547 }, { "epoch": 0.9148742325212913, "grad_norm": 1.2280814821146404, "learning_rate": 3.773567465285455e-07, "loss": 0.2151, "step": 11548 }, { "epoch": 0.9149534561299267, "grad_norm": 1.6956879370774005, "learning_rate": 3.7665880386481226e-07, "loss": 0.286, "step": 11549 }, { "epoch": 0.9150326797385621, "grad_norm": 1.2065091343224346, "learning_rate": 3.759614948517931e-07, "loss": 0.1587, "step": 11550 }, { "epoch": 0.9151119033471975, "grad_norm": 1.4489850524812282, "learning_rate": 3.7526481953539915e-07, "loss": 0.2455, "step": 11551 }, { "epoch": 0.9151911269558328, "grad_norm": 1.6145481975224287, "learning_rate": 3.74568777961507e-07, "loss": 0.2454, "step": 11552 }, { "epoch": 0.9152703505644683, "grad_norm": 1.2488806129259216, "learning_rate": 3.7387337017594674e-07, "loss": 0.2035, "step": 11553 }, { "epoch": 0.9153495741731036, "grad_norm": 1.2325590316906303, "learning_rate": 3.7317859622450714e-07, "loss": 0.183, "step": 11554 }, { "epoch": 0.9154287977817389, "grad_norm": 1.4137943114641929, "learning_rate": 3.7248445615293506e-07, "loss": 0.2378, "step": 11555 }, { "epoch": 0.9155080213903743, "grad_norm": 1.5916797291463736, "learning_rate": 3.7179095000693723e-07, "loss": 0.2517, "step": 11556 }, { "epoch": 0.9155872449990097, "grad_norm": 1.2816295579161405, "learning_rate": 3.710980778321771e-07, "loss": 0.2073, "step": 11557 }, { "epoch": 0.9156664686076451, "grad_norm": 1.2927875508823483, "learning_rate": 3.70405839674276e-07, "loss": 0.1847, "step": 11558 }, { "epoch": 0.9157456922162804, "grad_norm": 1.5335384834886192, "learning_rate": 3.697142355788175e-07, "loss": 0.2399, "step": 11559 }, { "epoch": 0.9158249158249159, "grad_norm": 1.5337601451572123, "learning_rate": 3.6902326559133836e-07, "loss": 0.2786, "step": 11560 }, { "epoch": 0.9159041394335512, "grad_norm": 1.3051444378305266, "learning_rate": 3.683329297573346e-07, "loss": 0.2275, "step": 11561 }, { "epoch": 0.9159833630421865, "grad_norm": 1.349157211582938, "learning_rate": 3.6764322812226416e-07, "loss": 0.2044, "step": 11562 }, { "epoch": 0.916062586650822, "grad_norm": 1.3388097820910023, "learning_rate": 3.669541607315397e-07, "loss": 0.1757, "step": 11563 }, { "epoch": 0.9161418102594573, "grad_norm": 1.353712757381436, "learning_rate": 3.6626572763053034e-07, "loss": 0.2119, "step": 11564 }, { "epoch": 0.9162210338680927, "grad_norm": 1.6305765603502218, "learning_rate": 3.6557792886457e-07, "loss": 0.1964, "step": 11565 }, { "epoch": 0.916300257476728, "grad_norm": 1.3653558641364365, "learning_rate": 3.6489076447894456e-07, "loss": 0.2429, "step": 11566 }, { "epoch": 0.9163794810853635, "grad_norm": 1.2884789791330342, "learning_rate": 3.642042345189023e-07, "loss": 0.2238, "step": 11567 }, { "epoch": 0.9164587046939988, "grad_norm": 1.4047165662094148, "learning_rate": 3.6351833902964485e-07, "loss": 0.2012, "step": 11568 }, { "epoch": 0.9165379283026341, "grad_norm": 1.3995022979289622, "learning_rate": 3.6283307805633714e-07, "loss": 0.2704, "step": 11569 }, { "epoch": 0.9166171519112696, "grad_norm": 1.2249043838575662, "learning_rate": 3.6214845164410205e-07, "loss": 0.1589, "step": 11570 }, { "epoch": 0.9166963755199049, "grad_norm": 1.4284337286404245, "learning_rate": 3.614644598380157e-07, "loss": 0.3002, "step": 11571 }, { "epoch": 0.9167755991285403, "grad_norm": 1.571096364990784, "learning_rate": 3.607811026831176e-07, "loss": 0.3304, "step": 11572 }, { "epoch": 0.9168548227371757, "grad_norm": 1.3252266398167374, "learning_rate": 3.600983802244007e-07, "loss": 0.2241, "step": 11573 }, { "epoch": 0.9169340463458111, "grad_norm": 1.2526626682659778, "learning_rate": 3.594162925068234e-07, "loss": 0.1782, "step": 11574 }, { "epoch": 0.9170132699544464, "grad_norm": 1.4905217388324608, "learning_rate": 3.587348395752954e-07, "loss": 0.2308, "step": 11575 }, { "epoch": 0.9170924935630818, "grad_norm": 1.5160617197509325, "learning_rate": 3.5805402147468746e-07, "loss": 0.3075, "step": 11576 }, { "epoch": 0.9171717171717172, "grad_norm": 1.4184027858690453, "learning_rate": 3.573738382498271e-07, "loss": 0.2455, "step": 11577 }, { "epoch": 0.9172509407803525, "grad_norm": 1.426169947430461, "learning_rate": 3.566942899455039e-07, "loss": 0.2211, "step": 11578 }, { "epoch": 0.917330164388988, "grad_norm": 1.1709391058722793, "learning_rate": 3.5601537660646e-07, "loss": 0.1648, "step": 11579 }, { "epoch": 0.9174093879976233, "grad_norm": 1.3088221228553496, "learning_rate": 3.553370982773985e-07, "loss": 0.1954, "step": 11580 }, { "epoch": 0.9174886116062587, "grad_norm": 1.318887886039781, "learning_rate": 3.546594550029836e-07, "loss": 0.2274, "step": 11581 }, { "epoch": 0.917567835214894, "grad_norm": 1.1032652733466042, "learning_rate": 3.53982446827833e-07, "loss": 0.1927, "step": 11582 }, { "epoch": 0.9176470588235294, "grad_norm": 1.0856985427116774, "learning_rate": 3.533060737965244e-07, "loss": 0.1727, "step": 11583 }, { "epoch": 0.9177262824321648, "grad_norm": 1.3215648081236284, "learning_rate": 3.526303359535932e-07, "loss": 0.1879, "step": 11584 }, { "epoch": 0.9178055060408001, "grad_norm": 1.274311315833374, "learning_rate": 3.519552333435361e-07, "loss": 0.2261, "step": 11585 }, { "epoch": 0.9178847296494356, "grad_norm": 1.341233000779832, "learning_rate": 3.5128076601080087e-07, "loss": 0.1673, "step": 11586 }, { "epoch": 0.9179639532580709, "grad_norm": 1.532310091893599, "learning_rate": 3.5060693399980194e-07, "loss": 0.277, "step": 11587 }, { "epoch": 0.9180431768667062, "grad_norm": 1.3132039142085052, "learning_rate": 3.499337373549072e-07, "loss": 0.1724, "step": 11588 }, { "epoch": 0.9181224004753417, "grad_norm": 1.4648295216107914, "learning_rate": 3.4926117612044117e-07, "loss": 0.2561, "step": 11589 }, { "epoch": 0.918201624083977, "grad_norm": 1.3670028036780484, "learning_rate": 3.485892503406907e-07, "loss": 0.2308, "step": 11590 }, { "epoch": 0.9182808476926124, "grad_norm": 1.413317174962045, "learning_rate": 3.4791796005989917e-07, "loss": 0.2048, "step": 11591 }, { "epoch": 0.9183600713012477, "grad_norm": 1.3156485684355186, "learning_rate": 3.4724730532226693e-07, "loss": 0.2426, "step": 11592 }, { "epoch": 0.9184392949098832, "grad_norm": 1.2839334148701198, "learning_rate": 3.4657728617195295e-07, "loss": 0.2208, "step": 11593 }, { "epoch": 0.9185185185185185, "grad_norm": 1.0337204612195527, "learning_rate": 3.459079026530754e-07, "loss": 0.1628, "step": 11594 }, { "epoch": 0.9185977421271538, "grad_norm": 1.0214973701723287, "learning_rate": 3.4523915480971113e-07, "loss": 0.1495, "step": 11595 }, { "epoch": 0.9186769657357893, "grad_norm": 1.3430927803644317, "learning_rate": 3.445710426858906e-07, "loss": 0.2427, "step": 11596 }, { "epoch": 0.9187561893444246, "grad_norm": 0.9737141241836886, "learning_rate": 3.439035663256096e-07, "loss": 0.1646, "step": 11597 }, { "epoch": 0.91883541295306, "grad_norm": 1.2833807636694672, "learning_rate": 3.4323672577281754e-07, "loss": 0.2414, "step": 11598 }, { "epoch": 0.9189146365616954, "grad_norm": 1.3823219654489336, "learning_rate": 3.425705210714192e-07, "loss": 0.1954, "step": 11599 }, { "epoch": 0.9189938601703308, "grad_norm": 1.6658482083365656, "learning_rate": 3.419049522652851e-07, "loss": 0.2866, "step": 11600 }, { "epoch": 0.9190730837789661, "grad_norm": 1.2244437966818023, "learning_rate": 3.412400193982379e-07, "loss": 0.1693, "step": 11601 }, { "epoch": 0.9191523073876015, "grad_norm": 1.3093001524476615, "learning_rate": 3.4057572251405936e-07, "loss": 0.2053, "step": 11602 }, { "epoch": 0.9192315309962369, "grad_norm": 1.4559454081304735, "learning_rate": 3.3991206165649213e-07, "loss": 0.2062, "step": 11603 }, { "epoch": 0.9193107546048722, "grad_norm": 1.3119344660267651, "learning_rate": 3.392490368692347e-07, "loss": 0.1806, "step": 11604 }, { "epoch": 0.9193899782135077, "grad_norm": 1.53034192515243, "learning_rate": 3.385866481959432e-07, "loss": 0.256, "step": 11605 }, { "epoch": 0.919469201822143, "grad_norm": 1.128099510379244, "learning_rate": 3.379248956802328e-07, "loss": 0.1875, "step": 11606 }, { "epoch": 0.9195484254307784, "grad_norm": 1.260348781554864, "learning_rate": 3.3726377936567856e-07, "loss": 0.1794, "step": 11607 }, { "epoch": 0.9196276490394137, "grad_norm": 1.903357490299525, "learning_rate": 3.3660329929580904e-07, "loss": 0.2116, "step": 11608 }, { "epoch": 0.9197068726480491, "grad_norm": 1.1099084181269925, "learning_rate": 3.3594345551411503e-07, "loss": 0.194, "step": 11609 }, { "epoch": 0.9197860962566845, "grad_norm": 1.2170520120294799, "learning_rate": 3.352842480640439e-07, "loss": 0.1524, "step": 11610 }, { "epoch": 0.9198653198653198, "grad_norm": 1.1582542632832862, "learning_rate": 3.346256769890022e-07, "loss": 0.1702, "step": 11611 }, { "epoch": 0.9199445434739553, "grad_norm": 1.0699894486719395, "learning_rate": 3.3396774233235173e-07, "loss": 0.1468, "step": 11612 }, { "epoch": 0.9200237670825906, "grad_norm": 1.35999773543236, "learning_rate": 3.333104441374158e-07, "loss": 0.2873, "step": 11613 }, { "epoch": 0.920102990691226, "grad_norm": 1.6271186321335922, "learning_rate": 3.32653782447474e-07, "loss": 0.2807, "step": 11614 }, { "epoch": 0.9201822142998614, "grad_norm": 1.0585003754224247, "learning_rate": 3.319977573057642e-07, "loss": 0.1852, "step": 11615 }, { "epoch": 0.9202614379084967, "grad_norm": 1.4875737306188574, "learning_rate": 3.313423687554829e-07, "loss": 0.2728, "step": 11616 }, { "epoch": 0.9203406615171321, "grad_norm": 1.2213897286790911, "learning_rate": 3.3068761683978434e-07, "loss": 0.154, "step": 11617 }, { "epoch": 0.9204198851257674, "grad_norm": 1.2515154194584592, "learning_rate": 3.3003350160177974e-07, "loss": 0.2217, "step": 11618 }, { "epoch": 0.9204991087344029, "grad_norm": 1.1100112385452725, "learning_rate": 3.293800230845412e-07, "loss": 0.1606, "step": 11619 }, { "epoch": 0.9205783323430382, "grad_norm": 1.1502153782740052, "learning_rate": 3.287271813310955e-07, "loss": 0.19, "step": 11620 }, { "epoch": 0.9206575559516736, "grad_norm": 1.129163978637922, "learning_rate": 3.280749763844293e-07, "loss": 0.1483, "step": 11621 }, { "epoch": 0.920736779560309, "grad_norm": 1.2468665673009325, "learning_rate": 3.274234082874872e-07, "loss": 0.2402, "step": 11622 }, { "epoch": 0.9208160031689443, "grad_norm": 1.4164191628158815, "learning_rate": 3.267724770831737e-07, "loss": 0.1996, "step": 11623 }, { "epoch": 0.9208952267775797, "grad_norm": 1.1816855471896477, "learning_rate": 3.2612218281434794e-07, "loss": 0.2219, "step": 11624 }, { "epoch": 0.9209744503862151, "grad_norm": 1.52934294030688, "learning_rate": 3.254725255238267e-07, "loss": 0.2989, "step": 11625 }, { "epoch": 0.9210536739948505, "grad_norm": 1.3934782321165347, "learning_rate": 3.2482350525439023e-07, "loss": 0.2248, "step": 11626 }, { "epoch": 0.9211328976034858, "grad_norm": 1.3166476886642682, "learning_rate": 3.241751220487721e-07, "loss": 0.2082, "step": 11627 }, { "epoch": 0.9212121212121213, "grad_norm": 1.2818403386536417, "learning_rate": 3.235273759496638e-07, "loss": 0.1888, "step": 11628 }, { "epoch": 0.9212913448207566, "grad_norm": 0.903189518569157, "learning_rate": 3.2288026699971884e-07, "loss": 0.1058, "step": 11629 }, { "epoch": 0.9213705684293919, "grad_norm": 1.2610339211557926, "learning_rate": 3.222337952415455e-07, "loss": 0.1703, "step": 11630 }, { "epoch": 0.9214497920380273, "grad_norm": 1.3696459012125872, "learning_rate": 3.215879607177086e-07, "loss": 0.1951, "step": 11631 }, { "epoch": 0.9215290156466627, "grad_norm": 1.593612213382193, "learning_rate": 3.2094276347073626e-07, "loss": 0.2844, "step": 11632 }, { "epoch": 0.9216082392552981, "grad_norm": 1.1646523413033234, "learning_rate": 3.2029820354311014e-07, "loss": 0.1878, "step": 11633 }, { "epoch": 0.9216874628639334, "grad_norm": 1.3376368005026216, "learning_rate": 3.196542809772707e-07, "loss": 0.212, "step": 11634 }, { "epoch": 0.9217666864725689, "grad_norm": 1.2901264582424004, "learning_rate": 3.1901099581561846e-07, "loss": 0.2094, "step": 11635 }, { "epoch": 0.9218459100812042, "grad_norm": 1.0463639860805685, "learning_rate": 3.183683481005106e-07, "loss": 0.1692, "step": 11636 }, { "epoch": 0.9219251336898395, "grad_norm": 1.5067208914503585, "learning_rate": 3.1772633787426233e-07, "loss": 0.1934, "step": 11637 }, { "epoch": 0.922004357298475, "grad_norm": 1.578674209466328, "learning_rate": 3.1708496517914523e-07, "loss": 0.294, "step": 11638 }, { "epoch": 0.9220835809071103, "grad_norm": 1.3687116099502004, "learning_rate": 3.1644423005739335e-07, "loss": 0.2224, "step": 11639 }, { "epoch": 0.9221628045157457, "grad_norm": 1.2486326512391224, "learning_rate": 3.15804132551194e-07, "loss": 0.2072, "step": 11640 }, { "epoch": 0.922242028124381, "grad_norm": 1.3632927631783953, "learning_rate": 3.151646727026947e-07, "loss": 0.2052, "step": 11641 }, { "epoch": 0.9223212517330165, "grad_norm": 1.3330007786657825, "learning_rate": 3.1452585055400167e-07, "loss": 0.2881, "step": 11642 }, { "epoch": 0.9224004753416518, "grad_norm": 1.3946377233132328, "learning_rate": 3.138876661471779e-07, "loss": 0.2007, "step": 11643 }, { "epoch": 0.9224796989502871, "grad_norm": 1.1238048063461563, "learning_rate": 3.1325011952424435e-07, "loss": 0.1847, "step": 11644 }, { "epoch": 0.9225589225589226, "grad_norm": 1.4675888098580654, "learning_rate": 3.1261321072718063e-07, "loss": 0.2639, "step": 11645 }, { "epoch": 0.9226381461675579, "grad_norm": 1.3228553078447403, "learning_rate": 3.1197693979792556e-07, "loss": 0.2151, "step": 11646 }, { "epoch": 0.9227173697761933, "grad_norm": 1.3301417266819258, "learning_rate": 3.1134130677837103e-07, "loss": 0.2039, "step": 11647 }, { "epoch": 0.9227965933848287, "grad_norm": 1.281197228734371, "learning_rate": 3.107063117103759e-07, "loss": 0.1446, "step": 11648 }, { "epoch": 0.9228758169934641, "grad_norm": 1.28368766585353, "learning_rate": 3.100719546357467e-07, "loss": 0.1773, "step": 11649 }, { "epoch": 0.9229550406020994, "grad_norm": 1.3951118455185139, "learning_rate": 3.0943823559625217e-07, "loss": 0.266, "step": 11650 }, { "epoch": 0.9230342642107348, "grad_norm": 1.5610755211539782, "learning_rate": 3.088051546336246e-07, "loss": 0.3311, "step": 11651 }, { "epoch": 0.9231134878193702, "grad_norm": 1.554292572928912, "learning_rate": 3.08172711789545e-07, "loss": 0.2494, "step": 11652 }, { "epoch": 0.9231927114280055, "grad_norm": 1.4840477482654226, "learning_rate": 3.0754090710565785e-07, "loss": 0.3146, "step": 11653 }, { "epoch": 0.923271935036641, "grad_norm": 1.5159598526576488, "learning_rate": 3.069097406235666e-07, "loss": 0.2676, "step": 11654 }, { "epoch": 0.9233511586452763, "grad_norm": 1.0977865496127512, "learning_rate": 3.0627921238482794e-07, "loss": 0.2278, "step": 11655 }, { "epoch": 0.9234303822539117, "grad_norm": 1.4228183867391526, "learning_rate": 3.056493224309587e-07, "loss": 0.2674, "step": 11656 }, { "epoch": 0.923509605862547, "grad_norm": 1.5689402868935982, "learning_rate": 3.0502007080343675e-07, "loss": 0.2302, "step": 11657 }, { "epoch": 0.9235888294711824, "grad_norm": 1.4263459428708165, "learning_rate": 3.043914575436946e-07, "loss": 0.1644, "step": 11658 }, { "epoch": 0.9236680530798178, "grad_norm": 1.2053126750803078, "learning_rate": 3.0376348269312017e-07, "loss": 0.175, "step": 11659 }, { "epoch": 0.9237472766884531, "grad_norm": 1.1665812498854211, "learning_rate": 3.031361462930671e-07, "loss": 0.1886, "step": 11660 }, { "epoch": 0.9238265002970886, "grad_norm": 1.7057341045544592, "learning_rate": 3.025094483848401e-07, "loss": 0.2302, "step": 11661 }, { "epoch": 0.9239057239057239, "grad_norm": 1.4555151962626849, "learning_rate": 3.0188338900970505e-07, "loss": 0.3122, "step": 11662 }, { "epoch": 0.9239849475143593, "grad_norm": 1.2178807449347981, "learning_rate": 3.0125796820888343e-07, "loss": 0.1907, "step": 11663 }, { "epoch": 0.9240641711229947, "grad_norm": 1.229881971219856, "learning_rate": 3.0063318602355787e-07, "loss": 0.1871, "step": 11664 }, { "epoch": 0.92414339473163, "grad_norm": 1.498836699257237, "learning_rate": 3.000090424948665e-07, "loss": 0.2405, "step": 11665 }, { "epoch": 0.9242226183402654, "grad_norm": 1.8367582583925424, "learning_rate": 2.993855376639054e-07, "loss": 0.3306, "step": 11666 }, { "epoch": 0.9243018419489007, "grad_norm": 1.6411745186639422, "learning_rate": 2.987626715717318e-07, "loss": 0.3118, "step": 11667 }, { "epoch": 0.9243810655575362, "grad_norm": 1.2635489572839607, "learning_rate": 2.9814044425935605e-07, "loss": 0.2014, "step": 11668 }, { "epoch": 0.9244602891661715, "grad_norm": 1.124021977600041, "learning_rate": 2.9751885576774887e-07, "loss": 0.2154, "step": 11669 }, { "epoch": 0.9245395127748068, "grad_norm": 1.2599962752123208, "learning_rate": 2.9689790613784073e-07, "loss": 0.195, "step": 11670 }, { "epoch": 0.9246187363834423, "grad_norm": 1.2991942394353995, "learning_rate": 2.962775954105179e-07, "loss": 0.1925, "step": 11671 }, { "epoch": 0.9246979599920776, "grad_norm": 1.0306786580531144, "learning_rate": 2.9565792362662213e-07, "loss": 0.1287, "step": 11672 }, { "epoch": 0.924777183600713, "grad_norm": 1.4610051297952606, "learning_rate": 2.9503889082695967e-07, "loss": 0.2558, "step": 11673 }, { "epoch": 0.9248564072093484, "grad_norm": 1.3780365414436075, "learning_rate": 2.9442049705228794e-07, "loss": 0.1729, "step": 11674 }, { "epoch": 0.9249356308179838, "grad_norm": 1.2355285899968955, "learning_rate": 2.938027423433254e-07, "loss": 0.1671, "step": 11675 }, { "epoch": 0.9250148544266191, "grad_norm": 1.336103346660533, "learning_rate": 2.931856267407507e-07, "loss": 0.1895, "step": 11676 }, { "epoch": 0.9250940780352545, "grad_norm": 1.4343131387109438, "learning_rate": 2.9256915028519575e-07, "loss": 0.216, "step": 11677 }, { "epoch": 0.9251733016438899, "grad_norm": 1.5446231381017235, "learning_rate": 2.919533130172536e-07, "loss": 0.2473, "step": 11678 }, { "epoch": 0.9252525252525252, "grad_norm": 1.5536035127580397, "learning_rate": 2.913381149774719e-07, "loss": 0.1562, "step": 11679 }, { "epoch": 0.9253317488611607, "grad_norm": 1.1999161943769499, "learning_rate": 2.907235562063615e-07, "loss": 0.177, "step": 11680 }, { "epoch": 0.925410972469796, "grad_norm": 1.622409687988493, "learning_rate": 2.9010963674438674e-07, "loss": 0.3556, "step": 11681 }, { "epoch": 0.9254901960784314, "grad_norm": 1.1075209765826464, "learning_rate": 2.8949635663197087e-07, "loss": 0.1625, "step": 11682 }, { "epoch": 0.9255694196870667, "grad_norm": 1.1862006430947292, "learning_rate": 2.8888371590949703e-07, "loss": 0.1844, "step": 11683 }, { "epoch": 0.9256486432957021, "grad_norm": 1.3955708626069496, "learning_rate": 2.882717146173031e-07, "loss": 0.2439, "step": 11684 }, { "epoch": 0.9257278669043375, "grad_norm": 1.3698786986885638, "learning_rate": 2.8766035279568563e-07, "loss": 0.2144, "step": 11685 }, { "epoch": 0.9258070905129728, "grad_norm": 1.3196321968617435, "learning_rate": 2.8704963048490243e-07, "loss": 0.1931, "step": 11686 }, { "epoch": 0.9258863141216083, "grad_norm": 1.1230422783815341, "learning_rate": 2.864395477251658e-07, "loss": 0.1855, "step": 11687 }, { "epoch": 0.9259655377302436, "grad_norm": 1.3814388039259153, "learning_rate": 2.858301045566447e-07, "loss": 0.2268, "step": 11688 }, { "epoch": 0.926044761338879, "grad_norm": 1.3684293879363767, "learning_rate": 2.8522130101947045e-07, "loss": 0.2, "step": 11689 }, { "epoch": 0.9261239849475144, "grad_norm": 1.5205480670126952, "learning_rate": 2.8461313715372976e-07, "loss": 0.1734, "step": 11690 }, { "epoch": 0.9262032085561497, "grad_norm": 1.679026175862578, "learning_rate": 2.8400561299946503e-07, "loss": 0.2363, "step": 11691 }, { "epoch": 0.9262824321647851, "grad_norm": 1.4086595284165107, "learning_rate": 2.8339872859668103e-07, "loss": 0.2298, "step": 11692 }, { "epoch": 0.9263616557734204, "grad_norm": 1.4842261889138826, "learning_rate": 2.82792483985338e-07, "loss": 0.2081, "step": 11693 }, { "epoch": 0.9264408793820559, "grad_norm": 1.4308514433780373, "learning_rate": 2.8218687920535395e-07, "loss": 0.183, "step": 11694 }, { "epoch": 0.9265201029906912, "grad_norm": 1.2687038438754916, "learning_rate": 2.8158191429660364e-07, "loss": 0.1659, "step": 11695 }, { "epoch": 0.9265993265993266, "grad_norm": 1.149224754780243, "learning_rate": 2.8097758929892196e-07, "loss": 0.1634, "step": 11696 }, { "epoch": 0.926678550207962, "grad_norm": 1.319276523406566, "learning_rate": 2.803739042521025e-07, "loss": 0.2519, "step": 11697 }, { "epoch": 0.9267577738165973, "grad_norm": 1.4928639946480549, "learning_rate": 2.7977085919589253e-07, "loss": 0.2254, "step": 11698 }, { "epoch": 0.9268369974252327, "grad_norm": 1.527422502656232, "learning_rate": 2.791684541700013e-07, "loss": 0.2377, "step": 11699 }, { "epoch": 0.9269162210338681, "grad_norm": 1.3227684548630245, "learning_rate": 2.785666892140937e-07, "loss": 0.1947, "step": 11700 }, { "epoch": 0.9269954446425035, "grad_norm": 1.0447110915081543, "learning_rate": 2.7796556436779144e-07, "loss": 0.1611, "step": 11701 }, { "epoch": 0.9270746682511388, "grad_norm": 1.1712675378076414, "learning_rate": 2.773650796706795e-07, "loss": 0.1813, "step": 11702 }, { "epoch": 0.9271538918597743, "grad_norm": 1.613198350624746, "learning_rate": 2.7676523516229404e-07, "loss": 0.2333, "step": 11703 }, { "epoch": 0.9272331154684096, "grad_norm": 1.0544246153595418, "learning_rate": 2.7616603088213126e-07, "loss": 0.1411, "step": 11704 }, { "epoch": 0.9273123390770449, "grad_norm": 1.606025795651259, "learning_rate": 2.755674668696495e-07, "loss": 0.213, "step": 11705 }, { "epoch": 0.9273915626856803, "grad_norm": 1.3550933607013556, "learning_rate": 2.749695431642574e-07, "loss": 0.224, "step": 11706 }, { "epoch": 0.9274707862943157, "grad_norm": 1.265046808553349, "learning_rate": 2.743722598053278e-07, "loss": 0.2412, "step": 11707 }, { "epoch": 0.9275500099029511, "grad_norm": 1.2838966147096786, "learning_rate": 2.737756168321881e-07, "loss": 0.2409, "step": 11708 }, { "epoch": 0.9276292335115864, "grad_norm": 1.3318208352489131, "learning_rate": 2.7317961428412475e-07, "loss": 0.1744, "step": 11709 }, { "epoch": 0.9277084571202219, "grad_norm": 1.558522848240427, "learning_rate": 2.7258425220038077e-07, "loss": 0.282, "step": 11710 }, { "epoch": 0.9277876807288572, "grad_norm": 1.4114619915282693, "learning_rate": 2.719895306201581e-07, "loss": 0.215, "step": 11711 }, { "epoch": 0.9278669043374925, "grad_norm": 1.7033313063993536, "learning_rate": 2.7139544958261765e-07, "loss": 0.2641, "step": 11712 }, { "epoch": 0.927946127946128, "grad_norm": 1.345063763021351, "learning_rate": 2.7080200912687484e-07, "loss": 0.2015, "step": 11713 }, { "epoch": 0.9280253515547633, "grad_norm": 1.3396521024860375, "learning_rate": 2.702092092920061e-07, "loss": 0.2231, "step": 11714 }, { "epoch": 0.9281045751633987, "grad_norm": 1.7315056954293184, "learning_rate": 2.6961705011704475e-07, "loss": 0.3107, "step": 11715 }, { "epoch": 0.928183798772034, "grad_norm": 1.4026607903232158, "learning_rate": 2.6902553164098065e-07, "loss": 0.2113, "step": 11716 }, { "epoch": 0.9282630223806695, "grad_norm": 1.609693795305664, "learning_rate": 2.684346539027616e-07, "loss": 0.2437, "step": 11717 }, { "epoch": 0.9283422459893048, "grad_norm": 1.2518693612663423, "learning_rate": 2.6784441694129747e-07, "loss": 0.2092, "step": 11718 }, { "epoch": 0.9284214695979401, "grad_norm": 1.36093872256006, "learning_rate": 2.672548207954495e-07, "loss": 0.1746, "step": 11719 }, { "epoch": 0.9285006932065756, "grad_norm": 1.6777580367695801, "learning_rate": 2.6666586550403884e-07, "loss": 0.3009, "step": 11720 }, { "epoch": 0.9285799168152109, "grad_norm": 1.3154426416127023, "learning_rate": 2.6607755110584886e-07, "loss": 0.2764, "step": 11721 }, { "epoch": 0.9286591404238463, "grad_norm": 1.4638309939909384, "learning_rate": 2.654898776396164e-07, "loss": 0.2446, "step": 11722 }, { "epoch": 0.9287383640324817, "grad_norm": 1.131494028007227, "learning_rate": 2.64902845144035e-07, "loss": 0.1637, "step": 11723 }, { "epoch": 0.9288175876411171, "grad_norm": 1.2881899757114494, "learning_rate": 2.6431645365775806e-07, "loss": 0.2406, "step": 11724 }, { "epoch": 0.9288968112497524, "grad_norm": 1.386066966201406, "learning_rate": 2.637307032193992e-07, "loss": 0.2477, "step": 11725 }, { "epoch": 0.9289760348583878, "grad_norm": 1.5188150558481415, "learning_rate": 2.6314559386752423e-07, "loss": 0.2473, "step": 11726 }, { "epoch": 0.9290552584670232, "grad_norm": 1.4528315736885065, "learning_rate": 2.6256112564066236e-07, "loss": 0.2333, "step": 11727 }, { "epoch": 0.9291344820756585, "grad_norm": 0.9592547249325707, "learning_rate": 2.6197729857729617e-07, "loss": 0.1532, "step": 11728 }, { "epoch": 0.929213705684294, "grad_norm": 1.6710092286615459, "learning_rate": 2.613941127158681e-07, "loss": 0.2725, "step": 11729 }, { "epoch": 0.9292929292929293, "grad_norm": 1.3420527437980327, "learning_rate": 2.608115680947787e-07, "loss": 0.1488, "step": 11730 }, { "epoch": 0.9293721529015647, "grad_norm": 1.2712597518432591, "learning_rate": 2.602296647523861e-07, "loss": 0.2262, "step": 11731 }, { "epoch": 0.9294513765102, "grad_norm": 1.3733931666446226, "learning_rate": 2.596484027270041e-07, "loss": 0.1986, "step": 11732 }, { "epoch": 0.9295306001188354, "grad_norm": 1.4692234321747957, "learning_rate": 2.5906778205690876e-07, "loss": 0.2243, "step": 11733 }, { "epoch": 0.9296098237274708, "grad_norm": 1.2491528105606493, "learning_rate": 2.5848780278032836e-07, "loss": 0.1973, "step": 11734 }, { "epoch": 0.9296890473361061, "grad_norm": 1.30864716497283, "learning_rate": 2.579084649354546e-07, "loss": 0.1998, "step": 11735 }, { "epoch": 0.9297682709447416, "grad_norm": 1.0601137595023613, "learning_rate": 2.5732976856043034e-07, "loss": 0.1891, "step": 11736 }, { "epoch": 0.9298474945533769, "grad_norm": 1.2006555755373662, "learning_rate": 2.5675171369336284e-07, "loss": 0.1529, "step": 11737 }, { "epoch": 0.9299267181620123, "grad_norm": 1.5486600087431033, "learning_rate": 2.5617430037231495e-07, "loss": 0.3102, "step": 11738 }, { "epoch": 0.9300059417706477, "grad_norm": 1.197424746422277, "learning_rate": 2.5559752863530295e-07, "loss": 0.1999, "step": 11739 }, { "epoch": 0.930085165379283, "grad_norm": 1.307429626907592, "learning_rate": 2.550213985203076e-07, "loss": 0.1876, "step": 11740 }, { "epoch": 0.9301643889879184, "grad_norm": 1.5232249416873884, "learning_rate": 2.54445910065263e-07, "loss": 0.3259, "step": 11741 }, { "epoch": 0.9302436125965537, "grad_norm": 1.2806496961754996, "learning_rate": 2.538710633080621e-07, "loss": 0.1925, "step": 11742 }, { "epoch": 0.9303228362051892, "grad_norm": 1.2686683746155034, "learning_rate": 2.5329685828655803e-07, "loss": 0.1877, "step": 11743 }, { "epoch": 0.9304020598138245, "grad_norm": 1.3152483961455235, "learning_rate": 2.527232950385572e-07, "loss": 0.2267, "step": 11744 }, { "epoch": 0.93048128342246, "grad_norm": 1.1581671923904353, "learning_rate": 2.521503736018249e-07, "loss": 0.1611, "step": 11745 }, { "epoch": 0.9305605070310953, "grad_norm": 1.2851059149252215, "learning_rate": 2.5157809401408775e-07, "loss": 0.1614, "step": 11746 }, { "epoch": 0.9306397306397306, "grad_norm": 1.642225014022437, "learning_rate": 2.510064563130277e-07, "loss": 0.298, "step": 11747 }, { "epoch": 0.930718954248366, "grad_norm": 1.1884245041748536, "learning_rate": 2.5043546053628245e-07, "loss": 0.1749, "step": 11748 }, { "epoch": 0.9307981778570014, "grad_norm": 1.360637026091205, "learning_rate": 2.498651067214497e-07, "loss": 0.22, "step": 11749 }, { "epoch": 0.9308774014656368, "grad_norm": 1.4865996775409334, "learning_rate": 2.4929539490608614e-07, "loss": 0.2048, "step": 11750 }, { "epoch": 0.9309566250742721, "grad_norm": 1.4758000955801802, "learning_rate": 2.487263251277028e-07, "loss": 0.256, "step": 11751 }, { "epoch": 0.9310358486829075, "grad_norm": 1.1446928284980966, "learning_rate": 2.481578974237697e-07, "loss": 0.1872, "step": 11752 }, { "epoch": 0.9311150722915429, "grad_norm": 1.3106486960672494, "learning_rate": 2.475901118317181e-07, "loss": 0.1878, "step": 11753 }, { "epoch": 0.9311942959001782, "grad_norm": 1.7531819922993446, "learning_rate": 2.4702296838893134e-07, "loss": 0.2608, "step": 11754 }, { "epoch": 0.9312735195088137, "grad_norm": 1.2512840045916154, "learning_rate": 2.464564671327529e-07, "loss": 0.2088, "step": 11755 }, { "epoch": 0.931352743117449, "grad_norm": 1.9232297755269545, "learning_rate": 2.4589060810048635e-07, "loss": 0.3096, "step": 11756 }, { "epoch": 0.9314319667260844, "grad_norm": 1.3321777398321328, "learning_rate": 2.453253913293896e-07, "loss": 0.2394, "step": 11757 }, { "epoch": 0.9315111903347197, "grad_norm": 1.397992454416382, "learning_rate": 2.447608168566784e-07, "loss": 0.1881, "step": 11758 }, { "epoch": 0.9315904139433551, "grad_norm": 1.5472119994517586, "learning_rate": 2.441968847195286e-07, "loss": 0.2444, "step": 11759 }, { "epoch": 0.9316696375519905, "grad_norm": 1.5574448752611958, "learning_rate": 2.4363359495507166e-07, "loss": 0.2581, "step": 11760 }, { "epoch": 0.9317488611606258, "grad_norm": 1.245997876680775, "learning_rate": 2.430709476003978e-07, "loss": 0.1879, "step": 11761 }, { "epoch": 0.9318280847692613, "grad_norm": 1.3332456244341295, "learning_rate": 2.425089426925553e-07, "loss": 0.1686, "step": 11762 }, { "epoch": 0.9319073083778966, "grad_norm": 1.0194501328885548, "learning_rate": 2.419475802685489e-07, "loss": 0.1845, "step": 11763 }, { "epoch": 0.931986531986532, "grad_norm": 2.2879964073497865, "learning_rate": 2.413868603653413e-07, "loss": 0.202, "step": 11764 }, { "epoch": 0.9320657555951674, "grad_norm": 1.4269892370423047, "learning_rate": 2.4082678301985297e-07, "loss": 0.2323, "step": 11765 }, { "epoch": 0.9321449792038027, "grad_norm": 1.5580562973218195, "learning_rate": 2.402673482689633e-07, "loss": 0.2113, "step": 11766 }, { "epoch": 0.9322242028124381, "grad_norm": 1.3423216614956248, "learning_rate": 2.3970855614950827e-07, "loss": 0.2551, "step": 11767 }, { "epoch": 0.9323034264210734, "grad_norm": 1.106902026684924, "learning_rate": 2.3915040669828084e-07, "loss": 0.175, "step": 11768 }, { "epoch": 0.9323826500297089, "grad_norm": 1.5574270599329445, "learning_rate": 2.385928999520326e-07, "loss": 0.2487, "step": 11769 }, { "epoch": 0.9324618736383442, "grad_norm": 1.7952622775714446, "learning_rate": 2.3803603594747427e-07, "loss": 0.2541, "step": 11770 }, { "epoch": 0.9325410972469796, "grad_norm": 0.9879754028872343, "learning_rate": 2.374798147212698e-07, "loss": 0.1352, "step": 11771 }, { "epoch": 0.932620320855615, "grad_norm": 1.454845325079651, "learning_rate": 2.3692423631004658e-07, "loss": 0.2332, "step": 11772 }, { "epoch": 0.9326995444642503, "grad_norm": 1.411908066399433, "learning_rate": 2.3636930075038534e-07, "loss": 0.2538, "step": 11773 }, { "epoch": 0.9327787680728857, "grad_norm": 1.0538057797970626, "learning_rate": 2.3581500807882462e-07, "loss": 0.1435, "step": 11774 }, { "epoch": 0.9328579916815211, "grad_norm": 1.0969157894523553, "learning_rate": 2.3526135833186527e-07, "loss": 0.163, "step": 11775 }, { "epoch": 0.9329372152901565, "grad_norm": 1.3326822905880578, "learning_rate": 2.3470835154595918e-07, "loss": 0.2275, "step": 11776 }, { "epoch": 0.9330164388987918, "grad_norm": 1.5433416754030453, "learning_rate": 2.3415598775752057e-07, "loss": 0.2288, "step": 11777 }, { "epoch": 0.9330956625074273, "grad_norm": 1.341940863702087, "learning_rate": 2.3360426700292038e-07, "loss": 0.2157, "step": 11778 }, { "epoch": 0.9331748861160626, "grad_norm": 1.166842390274979, "learning_rate": 2.330531893184873e-07, "loss": 0.1794, "step": 11779 }, { "epoch": 0.9332541097246979, "grad_norm": 1.4192506291092686, "learning_rate": 2.3250275474050565e-07, "loss": 0.2002, "step": 11780 }, { "epoch": 0.9333333333333333, "grad_norm": 1.424225249593969, "learning_rate": 2.3195296330521756e-07, "loss": 0.2512, "step": 11781 }, { "epoch": 0.9334125569419687, "grad_norm": 0.9888248994080405, "learning_rate": 2.3140381504882736e-07, "loss": 0.1083, "step": 11782 }, { "epoch": 0.9334917805506041, "grad_norm": 1.1909576708458314, "learning_rate": 2.3085531000749285e-07, "loss": 0.1856, "step": 11783 }, { "epoch": 0.9335710041592394, "grad_norm": 1.1589324636910048, "learning_rate": 2.3030744821732953e-07, "loss": 0.1711, "step": 11784 }, { "epoch": 0.9336502277678749, "grad_norm": 1.568533870473859, "learning_rate": 2.297602297144119e-07, "loss": 0.2569, "step": 11785 }, { "epoch": 0.9337294513765102, "grad_norm": 1.8304215803466244, "learning_rate": 2.2921365453477229e-07, "loss": 0.2194, "step": 11786 }, { "epoch": 0.9338086749851455, "grad_norm": 1.5067639696282564, "learning_rate": 2.286677227143985e-07, "loss": 0.2173, "step": 11787 }, { "epoch": 0.933887898593781, "grad_norm": 1.4508389071037184, "learning_rate": 2.2812243428923964e-07, "loss": 0.252, "step": 11788 }, { "epoch": 0.9339671222024163, "grad_norm": 1.2177733074988366, "learning_rate": 2.2757778929519914e-07, "loss": 0.1851, "step": 11789 }, { "epoch": 0.9340463458110517, "grad_norm": 1.3164968723123085, "learning_rate": 2.2703378776813833e-07, "loss": 0.2263, "step": 11790 }, { "epoch": 0.934125569419687, "grad_norm": 1.2687385192977794, "learning_rate": 2.2649042974387858e-07, "loss": 0.1925, "step": 11791 }, { "epoch": 0.9342047930283225, "grad_norm": 1.2555332335414413, "learning_rate": 2.259477152581979e-07, "loss": 0.234, "step": 11792 }, { "epoch": 0.9342840166369578, "grad_norm": 1.3827845142854447, "learning_rate": 2.2540564434682998e-07, "loss": 0.1958, "step": 11793 }, { "epoch": 0.9343632402455931, "grad_norm": 1.1396192031512402, "learning_rate": 2.2486421704546623e-07, "loss": 0.2036, "step": 11794 }, { "epoch": 0.9344424638542286, "grad_norm": 1.46414449126329, "learning_rate": 2.2432343338976038e-07, "loss": 0.2115, "step": 11795 }, { "epoch": 0.9345216874628639, "grad_norm": 1.6883736874608692, "learning_rate": 2.2378329341531946e-07, "loss": 0.2014, "step": 11796 }, { "epoch": 0.9346009110714993, "grad_norm": 1.4931527294248408, "learning_rate": 2.2324379715770728e-07, "loss": 0.2402, "step": 11797 }, { "epoch": 0.9346801346801347, "grad_norm": 1.7027946803534397, "learning_rate": 2.2270494465244874e-07, "loss": 0.2599, "step": 11798 }, { "epoch": 0.9347593582887701, "grad_norm": 1.1970302088718698, "learning_rate": 2.2216673593502437e-07, "loss": 0.1852, "step": 11799 }, { "epoch": 0.9348385818974054, "grad_norm": 1.1463715935250338, "learning_rate": 2.2162917104087245e-07, "loss": 0.1987, "step": 11800 }, { "epoch": 0.9349178055060408, "grad_norm": 1.45743511857333, "learning_rate": 2.2109225000538915e-07, "loss": 0.2497, "step": 11801 }, { "epoch": 0.9349970291146762, "grad_norm": 1.3384941021037486, "learning_rate": 2.2055597286392838e-07, "loss": 0.1948, "step": 11802 }, { "epoch": 0.9350762527233115, "grad_norm": 1.1027871857955747, "learning_rate": 2.200203396517997e-07, "loss": 0.1413, "step": 11803 }, { "epoch": 0.935155476331947, "grad_norm": 1.4002335100430188, "learning_rate": 2.19485350404276e-07, "loss": 0.2083, "step": 11804 }, { "epoch": 0.9352346999405823, "grad_norm": 1.5358531501775807, "learning_rate": 2.1895100515658019e-07, "loss": 0.2254, "step": 11805 }, { "epoch": 0.9353139235492177, "grad_norm": 1.417322382088549, "learning_rate": 2.1841730394389527e-07, "loss": 0.2401, "step": 11806 }, { "epoch": 0.935393147157853, "grad_norm": 1.2639871788483603, "learning_rate": 2.1788424680136756e-07, "loss": 0.1979, "step": 11807 }, { "epoch": 0.9354723707664884, "grad_norm": 1.3536508211154703, "learning_rate": 2.173518337640923e-07, "loss": 0.1954, "step": 11808 }, { "epoch": 0.9355515943751238, "grad_norm": 2.1250739121836664, "learning_rate": 2.1682006486712703e-07, "loss": 0.2318, "step": 11809 }, { "epoch": 0.9356308179837591, "grad_norm": 1.543122021361864, "learning_rate": 2.1628894014548819e-07, "loss": 0.2467, "step": 11810 }, { "epoch": 0.9357100415923946, "grad_norm": 1.3945230761445655, "learning_rate": 2.1575845963414555e-07, "loss": 0.2167, "step": 11811 }, { "epoch": 0.9357892652010299, "grad_norm": 1.4518405474756406, "learning_rate": 2.1522862336803008e-07, "loss": 0.2723, "step": 11812 }, { "epoch": 0.9358684888096653, "grad_norm": 1.1833232743575877, "learning_rate": 2.146994313820283e-07, "loss": 0.2198, "step": 11813 }, { "epoch": 0.9359477124183007, "grad_norm": 1.192544749969018, "learning_rate": 2.141708837109846e-07, "loss": 0.1932, "step": 11814 }, { "epoch": 0.936026936026936, "grad_norm": 1.2390482454080411, "learning_rate": 2.136429803897022e-07, "loss": 0.2383, "step": 11815 }, { "epoch": 0.9361061596355714, "grad_norm": 1.5052267080951667, "learning_rate": 2.1311572145294114e-07, "loss": 0.2986, "step": 11816 }, { "epoch": 0.9361853832442067, "grad_norm": 1.0716443373011708, "learning_rate": 2.1258910693541802e-07, "loss": 0.1591, "step": 11817 }, { "epoch": 0.9362646068528422, "grad_norm": 1.2531648900834316, "learning_rate": 2.1206313687180845e-07, "loss": 0.2283, "step": 11818 }, { "epoch": 0.9363438304614775, "grad_norm": 1.0294217551869718, "learning_rate": 2.1153781129674367e-07, "loss": 0.1538, "step": 11819 }, { "epoch": 0.936423054070113, "grad_norm": 1.825043959452312, "learning_rate": 2.1101313024481595e-07, "loss": 0.2709, "step": 11820 }, { "epoch": 0.9365022776787483, "grad_norm": 1.4082009893038205, "learning_rate": 2.1048909375057103e-07, "loss": 0.2025, "step": 11821 }, { "epoch": 0.9365815012873836, "grad_norm": 1.0714974453063149, "learning_rate": 2.0996570184851572e-07, "loss": 0.1522, "step": 11822 }, { "epoch": 0.936660724896019, "grad_norm": 1.3841663975626486, "learning_rate": 2.0944295457311247e-07, "loss": 0.2994, "step": 11823 }, { "epoch": 0.9367399485046544, "grad_norm": 1.4817367386534586, "learning_rate": 2.0892085195878154e-07, "loss": 0.3112, "step": 11824 }, { "epoch": 0.9368191721132898, "grad_norm": 0.9474804587485585, "learning_rate": 2.0839939403989984e-07, "loss": 0.1766, "step": 11825 }, { "epoch": 0.9368983957219251, "grad_norm": 1.392698330739467, "learning_rate": 2.078785808508055e-07, "loss": 0.1892, "step": 11826 }, { "epoch": 0.9369776193305605, "grad_norm": 1.3232401112575685, "learning_rate": 2.0735841242578992e-07, "loss": 0.249, "step": 11827 }, { "epoch": 0.9370568429391959, "grad_norm": 1.337518976621295, "learning_rate": 2.068388887991013e-07, "loss": 0.248, "step": 11828 }, { "epoch": 0.9371360665478312, "grad_norm": 1.5996375940632537, "learning_rate": 2.0632001000495228e-07, "loss": 0.2656, "step": 11829 }, { "epoch": 0.9372152901564667, "grad_norm": 1.1307932127574574, "learning_rate": 2.0580177607750663e-07, "loss": 0.1834, "step": 11830 }, { "epoch": 0.937294513765102, "grad_norm": 1.2999057101886673, "learning_rate": 2.0528418705088592e-07, "loss": 0.2439, "step": 11831 }, { "epoch": 0.9373737373737374, "grad_norm": 1.4831531820373913, "learning_rate": 2.0476724295917294e-07, "loss": 0.2556, "step": 11832 }, { "epoch": 0.9374529609823727, "grad_norm": 1.173939555744418, "learning_rate": 2.04250943836406e-07, "loss": 0.1857, "step": 11833 }, { "epoch": 0.9375321845910081, "grad_norm": 1.2739064473636263, "learning_rate": 2.0373528971658009e-07, "loss": 0.2136, "step": 11834 }, { "epoch": 0.9376114081996435, "grad_norm": 1.272453047747186, "learning_rate": 2.0322028063364806e-07, "loss": 0.2141, "step": 11835 }, { "epoch": 0.9376906318082788, "grad_norm": 1.4034895278096364, "learning_rate": 2.0270591662152173e-07, "loss": 0.2449, "step": 11836 }, { "epoch": 0.9377698554169143, "grad_norm": 1.8051136931278835, "learning_rate": 2.0219219771406952e-07, "loss": 0.269, "step": 11837 }, { "epoch": 0.9378490790255496, "grad_norm": 1.4023868618819069, "learning_rate": 2.0167912394511657e-07, "loss": 0.2285, "step": 11838 }, { "epoch": 0.937928302634185, "grad_norm": 1.1885430928745575, "learning_rate": 2.01166695348447e-07, "loss": 0.2219, "step": 11839 }, { "epoch": 0.9380075262428204, "grad_norm": 1.334351575441163, "learning_rate": 2.0065491195780163e-07, "loss": 0.2218, "step": 11840 }, { "epoch": 0.9380867498514557, "grad_norm": 1.4406217702623154, "learning_rate": 2.00143773806879e-07, "loss": 0.2303, "step": 11841 }, { "epoch": 0.9381659734600911, "grad_norm": 1.329706289025817, "learning_rate": 1.9963328092933444e-07, "loss": 0.234, "step": 11842 }, { "epoch": 0.9382451970687264, "grad_norm": 1.662954207655982, "learning_rate": 1.9912343335878326e-07, "loss": 0.2913, "step": 11843 }, { "epoch": 0.9383244206773619, "grad_norm": 1.2153611402017082, "learning_rate": 1.9861423112879308e-07, "loss": 0.1916, "step": 11844 }, { "epoch": 0.9384036442859972, "grad_norm": 1.5296047956071133, "learning_rate": 1.9810567427289596e-07, "loss": 0.2323, "step": 11845 }, { "epoch": 0.9384828678946326, "grad_norm": 1.156034604355825, "learning_rate": 1.9759776282457731e-07, "loss": 0.1302, "step": 11846 }, { "epoch": 0.938562091503268, "grad_norm": 1.0828072375206133, "learning_rate": 1.970904968172771e-07, "loss": 0.1434, "step": 11847 }, { "epoch": 0.9386413151119033, "grad_norm": 1.2738723994992955, "learning_rate": 1.965838762844019e-07, "loss": 0.2556, "step": 11848 }, { "epoch": 0.9387205387205387, "grad_norm": 1.4287699753356409, "learning_rate": 1.9607790125930614e-07, "loss": 0.2085, "step": 11849 }, { "epoch": 0.9387997623291741, "grad_norm": 1.6615611135323411, "learning_rate": 1.9557257177530763e-07, "loss": 0.3128, "step": 11850 }, { "epoch": 0.9388789859378095, "grad_norm": 1.0842100642350225, "learning_rate": 1.9506788786567865e-07, "loss": 0.1545, "step": 11851 }, { "epoch": 0.9389582095464448, "grad_norm": 1.461056663648525, "learning_rate": 1.9456384956365149e-07, "loss": 0.2749, "step": 11852 }, { "epoch": 0.9390374331550803, "grad_norm": 1.5240099852363538, "learning_rate": 1.9406045690241404e-07, "loss": 0.221, "step": 11853 }, { "epoch": 0.9391166567637156, "grad_norm": 1.1995326697564606, "learning_rate": 1.935577099151109e-07, "loss": 0.1955, "step": 11854 }, { "epoch": 0.9391958803723509, "grad_norm": 1.4228484989316121, "learning_rate": 1.9305560863484896e-07, "loss": 0.2337, "step": 11855 }, { "epoch": 0.9392751039809863, "grad_norm": 1.3293574139390814, "learning_rate": 1.9255415309468618e-07, "loss": 0.1741, "step": 11856 }, { "epoch": 0.9393543275896217, "grad_norm": 1.653499143709913, "learning_rate": 1.920533433276417e-07, "loss": 0.2914, "step": 11857 }, { "epoch": 0.9394335511982571, "grad_norm": 1.2517050235985543, "learning_rate": 1.9155317936669248e-07, "loss": 0.198, "step": 11858 }, { "epoch": 0.9395127748068924, "grad_norm": 1.5424045510061735, "learning_rate": 1.910536612447711e-07, "loss": 0.2703, "step": 11859 }, { "epoch": 0.9395919984155279, "grad_norm": 1.1542399376246517, "learning_rate": 1.9055478899476788e-07, "loss": 0.1434, "step": 11860 }, { "epoch": 0.9396712220241632, "grad_norm": 1.624431413875575, "learning_rate": 1.900565626495332e-07, "loss": 0.2699, "step": 11861 }, { "epoch": 0.9397504456327985, "grad_norm": 1.2519920728417075, "learning_rate": 1.8955898224187086e-07, "loss": 0.2199, "step": 11862 }, { "epoch": 0.939829669241434, "grad_norm": 1.556699824477557, "learning_rate": 1.890620478045435e-07, "loss": 0.293, "step": 11863 }, { "epoch": 0.9399088928500693, "grad_norm": 1.062172475506258, "learning_rate": 1.8856575937027388e-07, "loss": 0.1908, "step": 11864 }, { "epoch": 0.9399881164587047, "grad_norm": 1.3599914032952658, "learning_rate": 1.8807011697174027e-07, "loss": 0.2473, "step": 11865 }, { "epoch": 0.94006734006734, "grad_norm": 1.470812827272954, "learning_rate": 1.8757512064157658e-07, "loss": 0.3064, "step": 11866 }, { "epoch": 0.9401465636759755, "grad_norm": 1.6214258861285569, "learning_rate": 1.870807704123756e-07, "loss": 0.275, "step": 11867 }, { "epoch": 0.9402257872846108, "grad_norm": 1.3242406876468993, "learning_rate": 1.8658706631669133e-07, "loss": 0.1892, "step": 11868 }, { "epoch": 0.9403050108932461, "grad_norm": 1.4675579235927214, "learning_rate": 1.8609400838702884e-07, "loss": 0.1647, "step": 11869 }, { "epoch": 0.9403842345018816, "grad_norm": 0.9899325417985122, "learning_rate": 1.856015966558533e-07, "loss": 0.1272, "step": 11870 }, { "epoch": 0.9404634581105169, "grad_norm": 1.2658252014544822, "learning_rate": 1.8510983115558988e-07, "loss": 0.1425, "step": 11871 }, { "epoch": 0.9405426817191523, "grad_norm": 1.0409987464124033, "learning_rate": 1.8461871191861825e-07, "loss": 0.1782, "step": 11872 }, { "epoch": 0.9406219053277877, "grad_norm": 1.3545409745235588, "learning_rate": 1.8412823897727473e-07, "loss": 0.2687, "step": 11873 }, { "epoch": 0.9407011289364231, "grad_norm": 1.2156091717253168, "learning_rate": 1.8363841236385571e-07, "loss": 0.1493, "step": 11874 }, { "epoch": 0.9407803525450584, "grad_norm": 1.231647848831258, "learning_rate": 1.8314923211061542e-07, "loss": 0.1705, "step": 11875 }, { "epoch": 0.9408595761536938, "grad_norm": 1.1761801458719676, "learning_rate": 1.826606982497603e-07, "loss": 0.1718, "step": 11876 }, { "epoch": 0.9409387997623292, "grad_norm": 1.5123812859875192, "learning_rate": 1.8217281081346238e-07, "loss": 0.2675, "step": 11877 }, { "epoch": 0.9410180233709645, "grad_norm": 1.1870596934246855, "learning_rate": 1.8168556983384377e-07, "loss": 0.1693, "step": 11878 }, { "epoch": 0.9410972469796, "grad_norm": 1.1783524117068334, "learning_rate": 1.811989753429877e-07, "loss": 0.2107, "step": 11879 }, { "epoch": 0.9411764705882353, "grad_norm": 1.436980441136393, "learning_rate": 1.8071302737293294e-07, "loss": 0.2459, "step": 11880 }, { "epoch": 0.9412556941968707, "grad_norm": 1.1959506585518205, "learning_rate": 1.802277259556784e-07, "loss": 0.1734, "step": 11881 }, { "epoch": 0.941334917805506, "grad_norm": 1.3158445148595443, "learning_rate": 1.7974307112317957e-07, "loss": 0.1918, "step": 11882 }, { "epoch": 0.9414141414141414, "grad_norm": 1.1075428525038007, "learning_rate": 1.7925906290734653e-07, "loss": 0.1592, "step": 11883 }, { "epoch": 0.9414933650227768, "grad_norm": 1.0829546841492328, "learning_rate": 1.787757013400504e-07, "loss": 0.1682, "step": 11884 }, { "epoch": 0.9415725886314121, "grad_norm": 1.3924413530700357, "learning_rate": 1.7829298645311688e-07, "loss": 0.2251, "step": 11885 }, { "epoch": 0.9416518122400476, "grad_norm": 1.4827696162012518, "learning_rate": 1.7781091827833164e-07, "loss": 0.2148, "step": 11886 }, { "epoch": 0.9417310358486829, "grad_norm": 1.2280209201682328, "learning_rate": 1.7732949684743593e-07, "loss": 0.2367, "step": 11887 }, { "epoch": 0.9418102594573183, "grad_norm": 1.101994998358428, "learning_rate": 1.768487221921278e-07, "loss": 0.147, "step": 11888 }, { "epoch": 0.9418894830659537, "grad_norm": 1.5072352873412898, "learning_rate": 1.763685943440674e-07, "loss": 0.2646, "step": 11889 }, { "epoch": 0.941968706674589, "grad_norm": 1.4695441262736455, "learning_rate": 1.7588911333486614e-07, "loss": 0.2227, "step": 11890 }, { "epoch": 0.9420479302832244, "grad_norm": 1.4941616695678268, "learning_rate": 1.7541027919609545e-07, "loss": 0.21, "step": 11891 }, { "epoch": 0.9421271538918597, "grad_norm": 1.3070088914423765, "learning_rate": 1.7493209195928562e-07, "loss": 0.1709, "step": 11892 }, { "epoch": 0.9422063775004952, "grad_norm": 1.0303539809339672, "learning_rate": 1.7445455165592262e-07, "loss": 0.1516, "step": 11893 }, { "epoch": 0.9422856011091305, "grad_norm": 1.5832898035687808, "learning_rate": 1.7397765831744905e-07, "loss": 0.2752, "step": 11894 }, { "epoch": 0.942364824717766, "grad_norm": 1.6027844603903292, "learning_rate": 1.7350141197526648e-07, "loss": 0.2743, "step": 11895 }, { "epoch": 0.9424440483264013, "grad_norm": 1.2047544621482034, "learning_rate": 1.7302581266073537e-07, "loss": 0.1545, "step": 11896 }, { "epoch": 0.9425232719350366, "grad_norm": 0.9941809397859586, "learning_rate": 1.7255086040516954e-07, "loss": 0.1159, "step": 11897 }, { "epoch": 0.942602495543672, "grad_norm": 1.3059711228896056, "learning_rate": 1.7207655523984179e-07, "loss": 0.1886, "step": 11898 }, { "epoch": 0.9426817191523074, "grad_norm": 1.7801572217739896, "learning_rate": 1.71602897195986e-07, "loss": 0.2573, "step": 11899 }, { "epoch": 0.9427609427609428, "grad_norm": 1.356885211025602, "learning_rate": 1.711298863047872e-07, "loss": 0.1654, "step": 11900 }, { "epoch": 0.9428401663695781, "grad_norm": 1.8828074228791027, "learning_rate": 1.7065752259739056e-07, "loss": 0.3406, "step": 11901 }, { "epoch": 0.9429193899782136, "grad_norm": 1.378281485369158, "learning_rate": 1.701858061049022e-07, "loss": 0.2399, "step": 11902 }, { "epoch": 0.9429986135868489, "grad_norm": 1.7622914263581961, "learning_rate": 1.697147368583796e-07, "loss": 0.3128, "step": 11903 }, { "epoch": 0.9430778371954842, "grad_norm": 1.1779012537425344, "learning_rate": 1.692443148888412e-07, "loss": 0.2598, "step": 11904 }, { "epoch": 0.9431570608041197, "grad_norm": 1.2626440688762763, "learning_rate": 1.6877454022726225e-07, "loss": 0.1856, "step": 11905 }, { "epoch": 0.943236284412755, "grad_norm": 1.021109736272606, "learning_rate": 1.6830541290457468e-07, "loss": 0.1653, "step": 11906 }, { "epoch": 0.9433155080213904, "grad_norm": 1.2120086583644574, "learning_rate": 1.6783693295166935e-07, "loss": 0.2067, "step": 11907 }, { "epoch": 0.9433947316300257, "grad_norm": 1.3630737181410553, "learning_rate": 1.6736910039939159e-07, "loss": 0.2496, "step": 11908 }, { "epoch": 0.9434739552386611, "grad_norm": 1.5236820255868397, "learning_rate": 1.6690191527854782e-07, "loss": 0.1782, "step": 11909 }, { "epoch": 0.9435531788472965, "grad_norm": 1.2197213354954548, "learning_rate": 1.6643537761989904e-07, "loss": 0.2251, "step": 11910 }, { "epoch": 0.9436324024559318, "grad_norm": 1.3848311345328754, "learning_rate": 1.6596948745416397e-07, "loss": 0.2144, "step": 11911 }, { "epoch": 0.9437116260645673, "grad_norm": 1.1850209270462837, "learning_rate": 1.6550424481202032e-07, "loss": 0.2018, "step": 11912 }, { "epoch": 0.9437908496732026, "grad_norm": 2.039681188470805, "learning_rate": 1.6503964972410136e-07, "loss": 0.3152, "step": 11913 }, { "epoch": 0.943870073281838, "grad_norm": 1.289540182328846, "learning_rate": 1.6457570222099816e-07, "loss": 0.1948, "step": 11914 }, { "epoch": 0.9439492968904734, "grad_norm": 1.3152030920513889, "learning_rate": 1.6411240233326076e-07, "loss": 0.2046, "step": 11915 }, { "epoch": 0.9440285204991087, "grad_norm": 1.4460436261278127, "learning_rate": 1.6364975009139473e-07, "loss": 0.2182, "step": 11916 }, { "epoch": 0.9441077441077441, "grad_norm": 1.1534976684157143, "learning_rate": 1.6318774552586237e-07, "loss": 0.1397, "step": 11917 }, { "epoch": 0.9441869677163794, "grad_norm": 1.2984169973201547, "learning_rate": 1.627263886670849e-07, "loss": 0.2758, "step": 11918 }, { "epoch": 0.9442661913250149, "grad_norm": 1.4242951790041694, "learning_rate": 1.6226567954544248e-07, "loss": 0.2169, "step": 11919 }, { "epoch": 0.9443454149336502, "grad_norm": 1.2893162730102112, "learning_rate": 1.618056181912675e-07, "loss": 0.2592, "step": 11920 }, { "epoch": 0.9444246385422856, "grad_norm": 0.99595852530852, "learning_rate": 1.6134620463485352e-07, "loss": 0.1254, "step": 11921 }, { "epoch": 0.944503862150921, "grad_norm": 1.468256810202704, "learning_rate": 1.6088743890645297e-07, "loss": 0.2421, "step": 11922 }, { "epoch": 0.9445830857595563, "grad_norm": 1.4257736491814927, "learning_rate": 1.6042932103627174e-07, "loss": 0.2402, "step": 11923 }, { "epoch": 0.9446623093681917, "grad_norm": 1.190154964733546, "learning_rate": 1.5997185105447344e-07, "loss": 0.2205, "step": 11924 }, { "epoch": 0.9447415329768271, "grad_norm": 1.3351385482760902, "learning_rate": 1.5951502899118176e-07, "loss": 0.1855, "step": 11925 }, { "epoch": 0.9448207565854625, "grad_norm": 1.1923291436262913, "learning_rate": 1.590588548764771e-07, "loss": 0.1585, "step": 11926 }, { "epoch": 0.9448999801940978, "grad_norm": 1.225062720953491, "learning_rate": 1.586033287403943e-07, "loss": 0.1951, "step": 11927 }, { "epoch": 0.9449792038027333, "grad_norm": 1.0672946209416674, "learning_rate": 1.5814845061292938e-07, "loss": 0.1606, "step": 11928 }, { "epoch": 0.9450584274113686, "grad_norm": 1.1799627462184714, "learning_rate": 1.5769422052403172e-07, "loss": 0.1642, "step": 11929 }, { "epoch": 0.9451376510200039, "grad_norm": 1.323578162600058, "learning_rate": 1.572406385036118e-07, "loss": 0.2022, "step": 11930 }, { "epoch": 0.9452168746286393, "grad_norm": 1.6741497543502415, "learning_rate": 1.5678770458153693e-07, "loss": 0.2528, "step": 11931 }, { "epoch": 0.9452960982372747, "grad_norm": 1.2580078193246054, "learning_rate": 1.563354187876287e-07, "loss": 0.2059, "step": 11932 }, { "epoch": 0.9453753218459101, "grad_norm": 1.2188005409246228, "learning_rate": 1.558837811516667e-07, "loss": 0.1993, "step": 11933 }, { "epoch": 0.9454545454545454, "grad_norm": 1.1386068903986977, "learning_rate": 1.5543279170339265e-07, "loss": 0.1405, "step": 11934 }, { "epoch": 0.9455337690631809, "grad_norm": 1.4518773886646272, "learning_rate": 1.5498245047249948e-07, "loss": 0.2792, "step": 11935 }, { "epoch": 0.9456129926718162, "grad_norm": 1.0379932828164715, "learning_rate": 1.5453275748864128e-07, "loss": 0.181, "step": 11936 }, { "epoch": 0.9456922162804515, "grad_norm": 1.2527407608144574, "learning_rate": 1.5408371278142652e-07, "loss": 0.2136, "step": 11937 }, { "epoch": 0.945771439889087, "grad_norm": 1.133051970395139, "learning_rate": 1.5363531638042494e-07, "loss": 0.1694, "step": 11938 }, { "epoch": 0.9458506634977223, "grad_norm": 0.9453520298459362, "learning_rate": 1.5318756831516069e-07, "loss": 0.1444, "step": 11939 }, { "epoch": 0.9459298871063577, "grad_norm": 1.1661787678538582, "learning_rate": 1.5274046861511348e-07, "loss": 0.1334, "step": 11940 }, { "epoch": 0.946009110714993, "grad_norm": 1.7486606422793853, "learning_rate": 1.5229401730972536e-07, "loss": 0.2351, "step": 11941 }, { "epoch": 0.9460883343236285, "grad_norm": 1.596072679826886, "learning_rate": 1.518482144283917e-07, "loss": 0.318, "step": 11942 }, { "epoch": 0.9461675579322638, "grad_norm": 1.4173459579703596, "learning_rate": 1.514030600004668e-07, "loss": 0.2463, "step": 11943 }, { "epoch": 0.9462467815408991, "grad_norm": 1.3730255622837788, "learning_rate": 1.5095855405526272e-07, "loss": 0.2797, "step": 11944 }, { "epoch": 0.9463260051495346, "grad_norm": 1.3236403827608467, "learning_rate": 1.505146966220461e-07, "loss": 0.1998, "step": 11945 }, { "epoch": 0.9464052287581699, "grad_norm": 1.389233998474259, "learning_rate": 1.5007148773004466e-07, "loss": 0.2002, "step": 11946 }, { "epoch": 0.9464844523668053, "grad_norm": 1.7784165772092895, "learning_rate": 1.496289274084417e-07, "loss": 0.2197, "step": 11947 }, { "epoch": 0.9465636759754407, "grad_norm": 1.5493614019702404, "learning_rate": 1.4918701568637618e-07, "loss": 0.2967, "step": 11948 }, { "epoch": 0.9466428995840761, "grad_norm": 1.2418625160201957, "learning_rate": 1.4874575259294588e-07, "loss": 0.2176, "step": 11949 }, { "epoch": 0.9467221231927114, "grad_norm": 1.4411002547260734, "learning_rate": 1.483051381572076e-07, "loss": 0.1918, "step": 11950 }, { "epoch": 0.9468013468013468, "grad_norm": 1.1050156657550234, "learning_rate": 1.4786517240817255e-07, "loss": 0.1983, "step": 11951 }, { "epoch": 0.9468805704099822, "grad_norm": 1.3573581722178696, "learning_rate": 1.474258553748098e-07, "loss": 0.2327, "step": 11952 }, { "epoch": 0.9469597940186175, "grad_norm": 2.149163762593121, "learning_rate": 1.469871870860473e-07, "loss": 0.2086, "step": 11953 }, { "epoch": 0.947039017627253, "grad_norm": 1.2791287067992043, "learning_rate": 1.4654916757076865e-07, "loss": 0.2405, "step": 11954 }, { "epoch": 0.9471182412358883, "grad_norm": 1.6083277190772298, "learning_rate": 1.461117968578163e-07, "loss": 0.204, "step": 11955 }, { "epoch": 0.9471974648445237, "grad_norm": 1.3285162745799173, "learning_rate": 1.4567507497598722e-07, "loss": 0.2018, "step": 11956 }, { "epoch": 0.947276688453159, "grad_norm": 1.7184821307080291, "learning_rate": 1.452390019540384e-07, "loss": 0.2823, "step": 11957 }, { "epoch": 0.9473559120617944, "grad_norm": 1.6791100265502252, "learning_rate": 1.4480357782068467e-07, "loss": 0.1741, "step": 11958 }, { "epoch": 0.9474351356704298, "grad_norm": 1.293959205928631, "learning_rate": 1.4436880260459307e-07, "loss": 0.2105, "step": 11959 }, { "epoch": 0.9475143592790651, "grad_norm": 1.5912953725969694, "learning_rate": 1.4393467633439629e-07, "loss": 0.2497, "step": 11960 }, { "epoch": 0.9475935828877006, "grad_norm": 1.2873370480244775, "learning_rate": 1.4350119903867477e-07, "loss": 0.2066, "step": 11961 }, { "epoch": 0.9476728064963359, "grad_norm": 1.3278285073873823, "learning_rate": 1.4306837074597235e-07, "loss": 0.1883, "step": 11962 }, { "epoch": 0.9477520301049713, "grad_norm": 1.4386089560988997, "learning_rate": 1.426361914847907e-07, "loss": 0.2089, "step": 11963 }, { "epoch": 0.9478312537136067, "grad_norm": 1.2522204529530243, "learning_rate": 1.422046612835848e-07, "loss": 0.2395, "step": 11964 }, { "epoch": 0.947910477322242, "grad_norm": 1.4331094810636582, "learning_rate": 1.417737801707686e-07, "loss": 0.2941, "step": 11965 }, { "epoch": 0.9479897009308774, "grad_norm": 1.2559555286142803, "learning_rate": 1.4134354817471497e-07, "loss": 0.1773, "step": 11966 }, { "epoch": 0.9480689245395127, "grad_norm": 1.5234539943660057, "learning_rate": 1.4091396532375123e-07, "loss": 0.2936, "step": 11967 }, { "epoch": 0.9481481481481482, "grad_norm": 1.4017798717037955, "learning_rate": 1.4048503164616367e-07, "loss": 0.1725, "step": 11968 }, { "epoch": 0.9482273717567835, "grad_norm": 1.7403147759700393, "learning_rate": 1.4005674717019746e-07, "loss": 0.3533, "step": 11969 }, { "epoch": 0.948306595365419, "grad_norm": 1.3541541850831256, "learning_rate": 1.3962911192405004e-07, "loss": 0.2474, "step": 11970 }, { "epoch": 0.9483858189740543, "grad_norm": 1.475853449203787, "learning_rate": 1.3920212593588113e-07, "loss": 0.2532, "step": 11971 }, { "epoch": 0.9484650425826896, "grad_norm": 1.5552424109613492, "learning_rate": 1.3877578923380486e-07, "loss": 0.1956, "step": 11972 }, { "epoch": 0.948544266191325, "grad_norm": 1.80668159592404, "learning_rate": 1.3835010184589325e-07, "loss": 0.2312, "step": 11973 }, { "epoch": 0.9486234897999604, "grad_norm": 1.411971935682486, "learning_rate": 1.3792506380017612e-07, "loss": 0.2772, "step": 11974 }, { "epoch": 0.9487027134085958, "grad_norm": 1.3883863741424434, "learning_rate": 1.3750067512464105e-07, "loss": 0.1856, "step": 11975 }, { "epoch": 0.9487819370172311, "grad_norm": 1.235841226080657, "learning_rate": 1.3707693584723124e-07, "loss": 0.2193, "step": 11976 }, { "epoch": 0.9488611606258666, "grad_norm": 1.2413515833972935, "learning_rate": 1.3665384599584774e-07, "loss": 0.2418, "step": 11977 }, { "epoch": 0.9489403842345019, "grad_norm": 1.4092986017668696, "learning_rate": 1.3623140559834824e-07, "loss": 0.2984, "step": 11978 }, { "epoch": 0.9490196078431372, "grad_norm": 1.2776247619251861, "learning_rate": 1.358096146825505e-07, "loss": 0.2324, "step": 11979 }, { "epoch": 0.9490988314517727, "grad_norm": 0.8980753348335349, "learning_rate": 1.353884732762256e-07, "loss": 0.0778, "step": 11980 }, { "epoch": 0.949178055060408, "grad_norm": 1.1800663763844146, "learning_rate": 1.3496798140710365e-07, "loss": 0.1331, "step": 11981 }, { "epoch": 0.9492572786690434, "grad_norm": 1.3320287231195085, "learning_rate": 1.3454813910287358e-07, "loss": 0.243, "step": 11982 }, { "epoch": 0.9493365022776787, "grad_norm": 1.5349659838867427, "learning_rate": 1.341289463911788e-07, "loss": 0.2524, "step": 11983 }, { "epoch": 0.9494157258863142, "grad_norm": 0.9779419419521583, "learning_rate": 1.337104032996206e-07, "loss": 0.1249, "step": 11984 }, { "epoch": 0.9494949494949495, "grad_norm": 1.3130984466198592, "learning_rate": 1.3329250985575915e-07, "loss": 0.182, "step": 11985 }, { "epoch": 0.9495741731035848, "grad_norm": 1.1407139271416284, "learning_rate": 1.3287526608711132e-07, "loss": 0.1682, "step": 11986 }, { "epoch": 0.9496533967122203, "grad_norm": 1.4095186487183038, "learning_rate": 1.324586720211485e-07, "loss": 0.268, "step": 11987 }, { "epoch": 0.9497326203208556, "grad_norm": 1.269723617351791, "learning_rate": 1.3204272768530313e-07, "loss": 0.2292, "step": 11988 }, { "epoch": 0.949811843929491, "grad_norm": 1.2776761685822047, "learning_rate": 1.3162743310696224e-07, "loss": 0.2249, "step": 11989 }, { "epoch": 0.9498910675381264, "grad_norm": 1.2221732045848455, "learning_rate": 1.3121278831347172e-07, "loss": 0.225, "step": 11990 }, { "epoch": 0.9499702911467617, "grad_norm": 1.3028100527904278, "learning_rate": 1.3079879333213308e-07, "loss": 0.2261, "step": 11991 }, { "epoch": 0.9500495147553971, "grad_norm": 1.2897115282589904, "learning_rate": 1.303854481902067e-07, "loss": 0.2044, "step": 11992 }, { "epoch": 0.9501287383640324, "grad_norm": 1.3020085071512875, "learning_rate": 1.2997275291490863e-07, "loss": 0.2259, "step": 11993 }, { "epoch": 0.9502079619726679, "grad_norm": 1.1185168777319725, "learning_rate": 1.2956070753341265e-07, "loss": 0.2063, "step": 11994 }, { "epoch": 0.9502871855813032, "grad_norm": 1.0354552482974677, "learning_rate": 1.2914931207285154e-07, "loss": 0.1454, "step": 11995 }, { "epoch": 0.9503664091899386, "grad_norm": 1.5246919721748133, "learning_rate": 1.2873856656031358e-07, "loss": 0.2671, "step": 11996 }, { "epoch": 0.950445632798574, "grad_norm": 1.245796593054884, "learning_rate": 1.2832847102284162e-07, "loss": 0.2281, "step": 11997 }, { "epoch": 0.9505248564072093, "grad_norm": 1.527279786575422, "learning_rate": 1.2791902548744185e-07, "loss": 0.2932, "step": 11998 }, { "epoch": 0.9506040800158447, "grad_norm": 1.4930144641370797, "learning_rate": 1.2751022998107154e-07, "loss": 0.3583, "step": 11999 }, { "epoch": 0.9506833036244801, "grad_norm": 1.3516748208563374, "learning_rate": 1.271020845306492e-07, "loss": 0.2185, "step": 12000 }, { "epoch": 0.9507625272331155, "grad_norm": 1.2888948217148029, "learning_rate": 1.2669458916305112e-07, "loss": 0.2254, "step": 12001 }, { "epoch": 0.9508417508417508, "grad_norm": 0.9919838545110675, "learning_rate": 1.2628774390510578e-07, "loss": 0.1362, "step": 12002 }, { "epoch": 0.9509209744503863, "grad_norm": 0.8817573968500436, "learning_rate": 1.2588154878360293e-07, "loss": 0.1129, "step": 12003 }, { "epoch": 0.9510001980590216, "grad_norm": 1.5010132874448798, "learning_rate": 1.254760038252889e-07, "loss": 0.2369, "step": 12004 }, { "epoch": 0.9510794216676569, "grad_norm": 1.686453830077245, "learning_rate": 1.2507110905686793e-07, "loss": 0.2635, "step": 12005 }, { "epoch": 0.9511586452762923, "grad_norm": 1.2310034150257587, "learning_rate": 1.2466686450499866e-07, "loss": 0.194, "step": 12006 }, { "epoch": 0.9512378688849277, "grad_norm": 1.2440299999995539, "learning_rate": 1.242632701962987e-07, "loss": 0.1745, "step": 12007 }, { "epoch": 0.9513170924935631, "grad_norm": 1.5254919100564839, "learning_rate": 1.2386032615734345e-07, "loss": 0.2648, "step": 12008 }, { "epoch": 0.9513963161021984, "grad_norm": 1.2466469382638743, "learning_rate": 1.2345803241466504e-07, "loss": 0.1748, "step": 12009 }, { "epoch": 0.9514755397108339, "grad_norm": 1.4911780575393123, "learning_rate": 1.2305638899475226e-07, "loss": 0.2403, "step": 12010 }, { "epoch": 0.9515547633194692, "grad_norm": 1.1133615498707368, "learning_rate": 1.2265539592405173e-07, "loss": 0.21, "step": 12011 }, { "epoch": 0.9516339869281045, "grad_norm": 1.438323333057415, "learning_rate": 1.222550532289668e-07, "loss": 0.2717, "step": 12012 }, { "epoch": 0.95171321053674, "grad_norm": 1.338172658249403, "learning_rate": 1.218553609358575e-07, "loss": 0.2196, "step": 12013 }, { "epoch": 0.9517924341453753, "grad_norm": 1.6368377244901853, "learning_rate": 1.214563190710416e-07, "loss": 0.2596, "step": 12014 }, { "epoch": 0.9518716577540107, "grad_norm": 1.4715574813175563, "learning_rate": 1.2105792766079594e-07, "loss": 0.2666, "step": 12015 }, { "epoch": 0.951950881362646, "grad_norm": 1.2140343872631034, "learning_rate": 1.2066018673134948e-07, "loss": 0.16, "step": 12016 }, { "epoch": 0.9520301049712815, "grad_norm": 1.518032396287896, "learning_rate": 1.2026309630889465e-07, "loss": 0.21, "step": 12017 }, { "epoch": 0.9521093285799168, "grad_norm": 1.4641034407477171, "learning_rate": 1.1986665641957718e-07, "loss": 0.252, "step": 12018 }, { "epoch": 0.9521885521885521, "grad_norm": 1.581106214000562, "learning_rate": 1.194708670894984e-07, "loss": 0.2676, "step": 12019 }, { "epoch": 0.9522677757971876, "grad_norm": 1.3851883197961747, "learning_rate": 1.1907572834472303e-07, "loss": 0.2207, "step": 12020 }, { "epoch": 0.9523469994058229, "grad_norm": 1.412009989454947, "learning_rate": 1.1868124021126582e-07, "loss": 0.2378, "step": 12021 }, { "epoch": 0.9524262230144583, "grad_norm": 1.5077565675306255, "learning_rate": 1.1828740271510375e-07, "loss": 0.2936, "step": 12022 }, { "epoch": 0.9525054466230937, "grad_norm": 1.2407322179599092, "learning_rate": 1.1789421588216721e-07, "loss": 0.16, "step": 12023 }, { "epoch": 0.9525846702317291, "grad_norm": 1.121054097699786, "learning_rate": 1.1750167973834769e-07, "loss": 0.1802, "step": 12024 }, { "epoch": 0.9526638938403644, "grad_norm": 1.2123476573175656, "learning_rate": 1.171097943094912e-07, "loss": 0.1826, "step": 12025 }, { "epoch": 0.9527431174489998, "grad_norm": 1.695234520379659, "learning_rate": 1.1671855962140045e-07, "loss": 0.2148, "step": 12026 }, { "epoch": 0.9528223410576352, "grad_norm": 1.3540368347767922, "learning_rate": 1.1632797569983811e-07, "loss": 0.2015, "step": 12027 }, { "epoch": 0.9529015646662705, "grad_norm": 1.3755359478661886, "learning_rate": 1.1593804257052143e-07, "loss": 0.22, "step": 12028 }, { "epoch": 0.952980788274906, "grad_norm": 1.2045428756173873, "learning_rate": 1.1554876025912432e-07, "loss": 0.183, "step": 12029 }, { "epoch": 0.9530600118835413, "grad_norm": 1.191458811297058, "learning_rate": 1.151601287912818e-07, "loss": 0.1591, "step": 12030 }, { "epoch": 0.9531392354921767, "grad_norm": 1.51003280604779, "learning_rate": 1.147721481925812e-07, "loss": 0.2629, "step": 12031 }, { "epoch": 0.953218459100812, "grad_norm": 1.5766495149396162, "learning_rate": 1.1438481848856986e-07, "loss": 0.2871, "step": 12032 }, { "epoch": 0.9532976827094474, "grad_norm": 1.624416702400341, "learning_rate": 1.1399813970475293e-07, "loss": 0.2511, "step": 12033 }, { "epoch": 0.9533769063180828, "grad_norm": 1.2759760450675603, "learning_rate": 1.1361211186658893e-07, "loss": 0.2226, "step": 12034 }, { "epoch": 0.9534561299267181, "grad_norm": 1.3488130915437462, "learning_rate": 1.1322673499949754e-07, "loss": 0.1872, "step": 12035 }, { "epoch": 0.9535353535353536, "grad_norm": 1.2469218248306462, "learning_rate": 1.1284200912885291e-07, "loss": 0.224, "step": 12036 }, { "epoch": 0.9536145771439889, "grad_norm": 1.1826302128327042, "learning_rate": 1.1245793427998919e-07, "loss": 0.2229, "step": 12037 }, { "epoch": 0.9536938007526243, "grad_norm": 1.2200646355315476, "learning_rate": 1.1207451047819396e-07, "loss": 0.209, "step": 12038 }, { "epoch": 0.9537730243612597, "grad_norm": 1.2437426914285388, "learning_rate": 1.1169173774871478e-07, "loss": 0.1563, "step": 12039 }, { "epoch": 0.953852247969895, "grad_norm": 1.4813359499281296, "learning_rate": 1.1130961611675484e-07, "loss": 0.2738, "step": 12040 }, { "epoch": 0.9539314715785304, "grad_norm": 1.4717511548655786, "learning_rate": 1.1092814560747511e-07, "loss": 0.2404, "step": 12041 }, { "epoch": 0.9540106951871657, "grad_norm": 1.2797721495979215, "learning_rate": 1.105473262459944e-07, "loss": 0.2002, "step": 12042 }, { "epoch": 0.9540899187958012, "grad_norm": 1.5027525510214748, "learning_rate": 1.1016715805738709e-07, "loss": 0.2407, "step": 12043 }, { "epoch": 0.9541691424044365, "grad_norm": 1.5141907228535005, "learning_rate": 1.0978764106668538e-07, "loss": 0.2723, "step": 12044 }, { "epoch": 0.954248366013072, "grad_norm": 1.7346619579244726, "learning_rate": 1.0940877529887928e-07, "loss": 0.3685, "step": 12045 }, { "epoch": 0.9543275896217073, "grad_norm": 1.9299077359652532, "learning_rate": 1.0903056077891438e-07, "loss": 0.2515, "step": 12046 }, { "epoch": 0.9544068132303426, "grad_norm": 1.4152862560784865, "learning_rate": 1.0865299753169522e-07, "loss": 0.3484, "step": 12047 }, { "epoch": 0.954486036838978, "grad_norm": 1.0916035270612172, "learning_rate": 1.0827608558208192e-07, "loss": 0.1754, "step": 12048 }, { "epoch": 0.9545652604476134, "grad_norm": 1.65197444466612, "learning_rate": 1.0789982495489238e-07, "loss": 0.2704, "step": 12049 }, { "epoch": 0.9546444840562488, "grad_norm": 1.2835648265580597, "learning_rate": 1.0752421567490123e-07, "loss": 0.1797, "step": 12050 }, { "epoch": 0.9547237076648841, "grad_norm": 1.497835429754647, "learning_rate": 1.0714925776684093e-07, "loss": 0.1938, "step": 12051 }, { "epoch": 0.9548029312735196, "grad_norm": 1.6822844337763083, "learning_rate": 1.067749512554006e-07, "loss": 0.2433, "step": 12052 }, { "epoch": 0.9548821548821549, "grad_norm": 1.009573782527316, "learning_rate": 1.0640129616522721e-07, "loss": 0.1241, "step": 12053 }, { "epoch": 0.9549613784907902, "grad_norm": 1.2860634601832368, "learning_rate": 1.0602829252092328e-07, "loss": 0.1797, "step": 12054 }, { "epoch": 0.9550406020994257, "grad_norm": 1.6118354973779467, "learning_rate": 1.0565594034704918e-07, "loss": 0.3022, "step": 12055 }, { "epoch": 0.955119825708061, "grad_norm": 1.2625732874743605, "learning_rate": 1.0528423966812307e-07, "loss": 0.1993, "step": 12056 }, { "epoch": 0.9551990493166964, "grad_norm": 1.4915002915896953, "learning_rate": 1.0491319050861981e-07, "loss": 0.3321, "step": 12057 }, { "epoch": 0.9552782729253317, "grad_norm": 1.002769121406365, "learning_rate": 1.0454279289296987e-07, "loss": 0.1489, "step": 12058 }, { "epoch": 0.9553574965339672, "grad_norm": 1.4980894120193144, "learning_rate": 1.0417304684556373e-07, "loss": 0.2009, "step": 12059 }, { "epoch": 0.9554367201426025, "grad_norm": 1.6452384749232496, "learning_rate": 1.0380395239074747e-07, "loss": 0.2951, "step": 12060 }, { "epoch": 0.9555159437512378, "grad_norm": 1.4340916612071166, "learning_rate": 1.0343550955282278e-07, "loss": 0.2014, "step": 12061 }, { "epoch": 0.9555951673598733, "grad_norm": 1.498196402153841, "learning_rate": 1.0306771835605022e-07, "loss": 0.2857, "step": 12062 }, { "epoch": 0.9556743909685086, "grad_norm": 1.2314404875989995, "learning_rate": 1.0270057882464823e-07, "loss": 0.2106, "step": 12063 }, { "epoch": 0.955753614577144, "grad_norm": 1.3877809760831388, "learning_rate": 1.0233409098278967e-07, "loss": 0.2483, "step": 12064 }, { "epoch": 0.9558328381857794, "grad_norm": 1.1610400820542772, "learning_rate": 1.0196825485460637e-07, "loss": 0.1796, "step": 12065 }, { "epoch": 0.9559120617944147, "grad_norm": 1.1650929796542495, "learning_rate": 1.0160307046418794e-07, "loss": 0.1475, "step": 12066 }, { "epoch": 0.9559912854030501, "grad_norm": 1.0850260526726183, "learning_rate": 1.0123853783557847e-07, "loss": 0.1349, "step": 12067 }, { "epoch": 0.9560705090116854, "grad_norm": 1.6217811538467826, "learning_rate": 1.0087465699278321e-07, "loss": 0.2582, "step": 12068 }, { "epoch": 0.9561497326203209, "grad_norm": 1.384486882112382, "learning_rate": 1.0051142795975855e-07, "loss": 0.2294, "step": 12069 }, { "epoch": 0.9562289562289562, "grad_norm": 1.3157176927898737, "learning_rate": 1.0014885076042313e-07, "loss": 0.1801, "step": 12070 }, { "epoch": 0.9563081798375916, "grad_norm": 1.5019837687230437, "learning_rate": 9.978692541865121e-08, "loss": 0.2447, "step": 12071 }, { "epoch": 0.956387403446227, "grad_norm": 1.2018344616119367, "learning_rate": 9.94256519582748e-08, "loss": 0.2104, "step": 12072 }, { "epoch": 0.9564666270548623, "grad_norm": 1.382753501926394, "learning_rate": 9.906503040307824e-08, "loss": 0.2516, "step": 12073 }, { "epoch": 0.9565458506634977, "grad_norm": 1.4339537821265524, "learning_rate": 9.87050607768103e-08, "loss": 0.2741, "step": 12074 }, { "epoch": 0.9566250742721331, "grad_norm": 1.3794841903610584, "learning_rate": 9.834574310317313e-08, "loss": 0.1599, "step": 12075 }, { "epoch": 0.9567042978807685, "grad_norm": 1.1467058783384767, "learning_rate": 9.798707740582447e-08, "loss": 0.1667, "step": 12076 }, { "epoch": 0.9567835214894038, "grad_norm": 1.2536122321258512, "learning_rate": 9.762906370837988e-08, "loss": 0.1844, "step": 12077 }, { "epoch": 0.9568627450980393, "grad_norm": 1.124703618109494, "learning_rate": 9.727170203441605e-08, "loss": 0.1499, "step": 12078 }, { "epoch": 0.9569419687066746, "grad_norm": 1.2182006055880223, "learning_rate": 9.691499240746083e-08, "loss": 0.1705, "step": 12079 }, { "epoch": 0.9570211923153099, "grad_norm": 1.2238522846079825, "learning_rate": 9.65589348510032e-08, "loss": 0.2093, "step": 12080 }, { "epoch": 0.9571004159239453, "grad_norm": 1.513681469823502, "learning_rate": 9.620352938848665e-08, "loss": 0.2353, "step": 12081 }, { "epoch": 0.9571796395325807, "grad_norm": 1.1708921392011702, "learning_rate": 9.584877604331467e-08, "loss": 0.1225, "step": 12082 }, { "epoch": 0.9572588631412161, "grad_norm": 1.49444931593593, "learning_rate": 9.549467483884412e-08, "loss": 0.2378, "step": 12083 }, { "epoch": 0.9573380867498514, "grad_norm": 1.3839567343048198, "learning_rate": 9.514122579839302e-08, "loss": 0.1519, "step": 12084 }, { "epoch": 0.9574173103584869, "grad_norm": 1.1023302408782591, "learning_rate": 9.478842894523165e-08, "loss": 0.1929, "step": 12085 }, { "epoch": 0.9574965339671222, "grad_norm": 1.730421586430579, "learning_rate": 9.443628430259144e-08, "loss": 0.3391, "step": 12086 }, { "epoch": 0.9575757575757575, "grad_norm": 1.3556957041593796, "learning_rate": 9.408479189366049e-08, "loss": 0.2192, "step": 12087 }, { "epoch": 0.957654981184393, "grad_norm": 1.3446272689870877, "learning_rate": 9.37339517415814e-08, "loss": 0.1944, "step": 12088 }, { "epoch": 0.9577342047930283, "grad_norm": 1.3150633615907967, "learning_rate": 9.33837638694557e-08, "loss": 0.1487, "step": 12089 }, { "epoch": 0.9578134284016637, "grad_norm": 1.1507819122392342, "learning_rate": 9.30342283003416e-08, "loss": 0.1915, "step": 12090 }, { "epoch": 0.957892652010299, "grad_norm": 1.5636916708326578, "learning_rate": 9.268534505725402e-08, "loss": 0.2226, "step": 12091 }, { "epoch": 0.9579718756189345, "grad_norm": 0.9988760320364604, "learning_rate": 9.233711416316571e-08, "loss": 0.1333, "step": 12092 }, { "epoch": 0.9580510992275698, "grad_norm": 1.4073140713551204, "learning_rate": 9.1989535641005e-08, "loss": 0.2329, "step": 12093 }, { "epoch": 0.9581303228362051, "grad_norm": 1.7331972313718855, "learning_rate": 9.164260951366021e-08, "loss": 0.2993, "step": 12094 }, { "epoch": 0.9582095464448406, "grad_norm": 1.4223058469021153, "learning_rate": 9.129633580397312e-08, "loss": 0.2253, "step": 12095 }, { "epoch": 0.9582887700534759, "grad_norm": 1.3186301104674516, "learning_rate": 9.095071453474435e-08, "loss": 0.2388, "step": 12096 }, { "epoch": 0.9583679936621113, "grad_norm": 1.3543933527280094, "learning_rate": 9.060574572873238e-08, "loss": 0.2262, "step": 12097 }, { "epoch": 0.9584472172707467, "grad_norm": 1.8175150985146755, "learning_rate": 9.026142940865013e-08, "loss": 0.1552, "step": 12098 }, { "epoch": 0.9585264408793821, "grad_norm": 1.2335238287866004, "learning_rate": 8.991776559717058e-08, "loss": 0.2188, "step": 12099 }, { "epoch": 0.9586056644880174, "grad_norm": 1.6544648377718518, "learning_rate": 8.95747543169223e-08, "loss": 0.3067, "step": 12100 }, { "epoch": 0.9586848880966528, "grad_norm": 1.1345685436292745, "learning_rate": 8.923239559049057e-08, "loss": 0.1721, "step": 12101 }, { "epoch": 0.9587641117052882, "grad_norm": 1.3399825959972054, "learning_rate": 8.889068944041734e-08, "loss": 0.2523, "step": 12102 }, { "epoch": 0.9588433353139235, "grad_norm": 1.6606470916426763, "learning_rate": 8.854963588920351e-08, "loss": 0.2674, "step": 12103 }, { "epoch": 0.958922558922559, "grad_norm": 1.2664170017616017, "learning_rate": 8.820923495930556e-08, "loss": 0.2405, "step": 12104 }, { "epoch": 0.9590017825311943, "grad_norm": 1.4517399427834006, "learning_rate": 8.786948667313667e-08, "loss": 0.2839, "step": 12105 }, { "epoch": 0.9590810061398297, "grad_norm": 1.436688799028156, "learning_rate": 8.753039105306782e-08, "loss": 0.2045, "step": 12106 }, { "epoch": 0.959160229748465, "grad_norm": 1.3075966837930841, "learning_rate": 8.719194812142673e-08, "loss": 0.2106, "step": 12107 }, { "epoch": 0.9592394533571004, "grad_norm": 1.2739832681844057, "learning_rate": 8.685415790049889e-08, "loss": 0.1766, "step": 12108 }, { "epoch": 0.9593186769657358, "grad_norm": 1.381004367940717, "learning_rate": 8.651702041252541e-08, "loss": 0.219, "step": 12109 }, { "epoch": 0.9593979005743711, "grad_norm": 1.5719520999738097, "learning_rate": 8.61805356797063e-08, "loss": 0.2708, "step": 12110 }, { "epoch": 0.9594771241830066, "grad_norm": 1.1898290102794342, "learning_rate": 8.584470372419606e-08, "loss": 0.2438, "step": 12111 }, { "epoch": 0.9595563477916419, "grad_norm": 1.7353954479277225, "learning_rate": 8.550952456810813e-08, "loss": 0.2483, "step": 12112 }, { "epoch": 0.9596355714002773, "grad_norm": 1.22742945273216, "learning_rate": 8.517499823351261e-08, "loss": 0.2291, "step": 12113 }, { "epoch": 0.9597147950089127, "grad_norm": 1.3177081652211198, "learning_rate": 8.484112474243633e-08, "loss": 0.2294, "step": 12114 }, { "epoch": 0.959794018617548, "grad_norm": 1.3204271554155063, "learning_rate": 8.450790411686282e-08, "loss": 0.2163, "step": 12115 }, { "epoch": 0.9598732422261834, "grad_norm": 1.1210764762060492, "learning_rate": 8.417533637873454e-08, "loss": 0.189, "step": 12116 }, { "epoch": 0.9599524658348187, "grad_norm": 1.4746511371134137, "learning_rate": 8.384342154994841e-08, "loss": 0.2361, "step": 12117 }, { "epoch": 0.9600316894434542, "grad_norm": 1.4551871061446462, "learning_rate": 8.351215965235915e-08, "loss": 0.1973, "step": 12118 }, { "epoch": 0.9601109130520895, "grad_norm": 1.343528999564082, "learning_rate": 8.318155070777822e-08, "loss": 0.2135, "step": 12119 }, { "epoch": 0.960190136660725, "grad_norm": 1.1672042063154022, "learning_rate": 8.28515947379771e-08, "loss": 0.1513, "step": 12120 }, { "epoch": 0.9602693602693603, "grad_norm": 1.2958760868955908, "learning_rate": 8.252229176467841e-08, "loss": 0.2067, "step": 12121 }, { "epoch": 0.9603485838779956, "grad_norm": 1.1238609629028475, "learning_rate": 8.219364180956812e-08, "loss": 0.1562, "step": 12122 }, { "epoch": 0.960427807486631, "grad_norm": 1.5411071517664339, "learning_rate": 8.186564489428561e-08, "loss": 0.2308, "step": 12123 }, { "epoch": 0.9605070310952664, "grad_norm": 1.2910708605082721, "learning_rate": 8.153830104042582e-08, "loss": 0.1994, "step": 12124 }, { "epoch": 0.9605862547039018, "grad_norm": 1.2090214632643919, "learning_rate": 8.121161026954482e-08, "loss": 0.1796, "step": 12125 }, { "epoch": 0.9606654783125371, "grad_norm": 1.2293102610274222, "learning_rate": 8.088557260315322e-08, "loss": 0.1869, "step": 12126 }, { "epoch": 0.9607447019211726, "grad_norm": 1.5758302338841237, "learning_rate": 8.056018806271937e-08, "loss": 0.3025, "step": 12127 }, { "epoch": 0.9608239255298079, "grad_norm": 1.2662352546161477, "learning_rate": 8.023545666966726e-08, "loss": 0.1979, "step": 12128 }, { "epoch": 0.9609031491384432, "grad_norm": 1.5212885895466786, "learning_rate": 7.991137844537977e-08, "loss": 0.2857, "step": 12129 }, { "epoch": 0.9609823727470787, "grad_norm": 1.0824801357269576, "learning_rate": 7.958795341119541e-08, "loss": 0.1143, "step": 12130 }, { "epoch": 0.961061596355714, "grad_norm": 1.3017967915763433, "learning_rate": 7.926518158841045e-08, "loss": 0.2157, "step": 12131 }, { "epoch": 0.9611408199643494, "grad_norm": 1.3201811404147326, "learning_rate": 7.894306299827791e-08, "loss": 0.2127, "step": 12132 }, { "epoch": 0.9612200435729847, "grad_norm": 1.4514946484897493, "learning_rate": 7.86215976620075e-08, "loss": 0.1764, "step": 12133 }, { "epoch": 0.9612992671816202, "grad_norm": 1.4849218175548835, "learning_rate": 7.83007856007667e-08, "loss": 0.2878, "step": 12134 }, { "epoch": 0.9613784907902555, "grad_norm": 1.5690597622736902, "learning_rate": 7.798062683567864e-08, "loss": 0.1912, "step": 12135 }, { "epoch": 0.9614577143988908, "grad_norm": 1.3691484049840832, "learning_rate": 7.766112138782422e-08, "loss": 0.1742, "step": 12136 }, { "epoch": 0.9615369380075263, "grad_norm": 1.6543121613121445, "learning_rate": 7.734226927824106e-08, "loss": 0.2167, "step": 12137 }, { "epoch": 0.9616161616161616, "grad_norm": 1.4726366517948368, "learning_rate": 7.70240705279257e-08, "loss": 0.2216, "step": 12138 }, { "epoch": 0.961695385224797, "grad_norm": 1.4127443340806638, "learning_rate": 7.670652515782917e-08, "loss": 0.2281, "step": 12139 }, { "epoch": 0.9617746088334324, "grad_norm": 1.0391781337030863, "learning_rate": 7.638963318886028e-08, "loss": 0.1488, "step": 12140 }, { "epoch": 0.9618538324420678, "grad_norm": 1.3354249076797793, "learning_rate": 7.607339464188346e-08, "loss": 0.2121, "step": 12141 }, { "epoch": 0.9619330560507031, "grad_norm": 1.4241452585947816, "learning_rate": 7.575780953772427e-08, "loss": 0.2593, "step": 12142 }, { "epoch": 0.9620122796593384, "grad_norm": 1.1991295839562486, "learning_rate": 7.544287789715943e-08, "loss": 0.2001, "step": 12143 }, { "epoch": 0.9620915032679739, "grad_norm": 1.5383734362394947, "learning_rate": 7.51285997409279e-08, "loss": 0.2076, "step": 12144 }, { "epoch": 0.9621707268766092, "grad_norm": 1.704234911213951, "learning_rate": 7.481497508972313e-08, "loss": 0.2625, "step": 12145 }, { "epoch": 0.9622499504852446, "grad_norm": 1.4030967323240011, "learning_rate": 7.450200396419416e-08, "loss": 0.2675, "step": 12146 }, { "epoch": 0.96232917409388, "grad_norm": 1.9239969177192455, "learning_rate": 7.418968638495006e-08, "loss": 0.3094, "step": 12147 }, { "epoch": 0.9624083977025153, "grad_norm": 1.0788767502670733, "learning_rate": 7.387802237255658e-08, "loss": 0.1421, "step": 12148 }, { "epoch": 0.9624876213111507, "grad_norm": 1.6326189032316254, "learning_rate": 7.35670119475329e-08, "loss": 0.2816, "step": 12149 }, { "epoch": 0.9625668449197861, "grad_norm": 1.2659298189789456, "learning_rate": 7.325665513035707e-08, "loss": 0.1891, "step": 12150 }, { "epoch": 0.9626460685284215, "grad_norm": 1.3321865926123506, "learning_rate": 7.294695194146829e-08, "loss": 0.1996, "step": 12151 }, { "epoch": 0.9627252921370568, "grad_norm": 1.4562287523024298, "learning_rate": 7.263790240125579e-08, "loss": 0.2582, "step": 12152 }, { "epoch": 0.9628045157456923, "grad_norm": 2.0075950733529773, "learning_rate": 7.232950653006998e-08, "loss": 0.3089, "step": 12153 }, { "epoch": 0.9628837393543276, "grad_norm": 1.4084725111007126, "learning_rate": 7.202176434821683e-08, "loss": 0.2484, "step": 12154 }, { "epoch": 0.9629629629629629, "grad_norm": 1.5399977729776648, "learning_rate": 7.171467587596126e-08, "loss": 0.2149, "step": 12155 }, { "epoch": 0.9630421865715983, "grad_norm": 1.5275181949076877, "learning_rate": 7.140824113352151e-08, "loss": 0.2532, "step": 12156 }, { "epoch": 0.9631214101802337, "grad_norm": 1.287590968924547, "learning_rate": 7.110246014107592e-08, "loss": 0.255, "step": 12157 }, { "epoch": 0.9632006337888691, "grad_norm": 1.3973797913588788, "learning_rate": 7.079733291875945e-08, "loss": 0.2133, "step": 12158 }, { "epoch": 0.9632798573975044, "grad_norm": 1.2654956713772518, "learning_rate": 7.049285948666052e-08, "loss": 0.2054, "step": 12159 }, { "epoch": 0.9633590810061399, "grad_norm": 1.5181264535255565, "learning_rate": 7.018903986483083e-08, "loss": 0.2483, "step": 12160 }, { "epoch": 0.9634383046147752, "grad_norm": 1.4993303828743456, "learning_rate": 6.988587407327219e-08, "loss": 0.2477, "step": 12161 }, { "epoch": 0.9635175282234105, "grad_norm": 1.168916372065898, "learning_rate": 6.958336213194972e-08, "loss": 0.1949, "step": 12162 }, { "epoch": 0.963596751832046, "grad_norm": 1.3396899518409298, "learning_rate": 6.928150406077861e-08, "loss": 0.1886, "step": 12163 }, { "epoch": 0.9636759754406813, "grad_norm": 1.4996940315474447, "learning_rate": 6.89802998796385e-08, "loss": 0.2348, "step": 12164 }, { "epoch": 0.9637551990493167, "grad_norm": 1.8643842749256838, "learning_rate": 6.867974960836022e-08, "loss": 0.2822, "step": 12165 }, { "epoch": 0.963834422657952, "grad_norm": 1.2574672922472474, "learning_rate": 6.837985326673457e-08, "loss": 0.2161, "step": 12166 }, { "epoch": 0.9639136462665875, "grad_norm": 1.6438673262321906, "learning_rate": 6.80806108745069e-08, "loss": 0.2964, "step": 12167 }, { "epoch": 0.9639928698752228, "grad_norm": 1.176001171388162, "learning_rate": 6.778202245138144e-08, "loss": 0.2096, "step": 12168 }, { "epoch": 0.9640720934838581, "grad_norm": 1.0239928894708468, "learning_rate": 6.748408801701911e-08, "loss": 0.1241, "step": 12169 }, { "epoch": 0.9641513170924936, "grad_norm": 1.571309231946613, "learning_rate": 6.718680759103757e-08, "loss": 0.2577, "step": 12170 }, { "epoch": 0.9642305407011289, "grad_norm": 1.3598451065031205, "learning_rate": 6.689018119301227e-08, "loss": 0.25, "step": 12171 }, { "epoch": 0.9643097643097643, "grad_norm": 1.2925687705380562, "learning_rate": 6.659420884247203e-08, "loss": 0.2392, "step": 12172 }, { "epoch": 0.9643889879183997, "grad_norm": 1.1308155036818477, "learning_rate": 6.629889055890682e-08, "loss": 0.1639, "step": 12173 }, { "epoch": 0.9644682115270351, "grad_norm": 1.4475982701983245, "learning_rate": 6.600422636176219e-08, "loss": 0.2576, "step": 12174 }, { "epoch": 0.9645474351356704, "grad_norm": 1.4380596796821712, "learning_rate": 6.571021627043928e-08, "loss": 0.1797, "step": 12175 }, { "epoch": 0.9646266587443058, "grad_norm": 1.4300944381680842, "learning_rate": 6.541686030429817e-08, "loss": 0.235, "step": 12176 }, { "epoch": 0.9647058823529412, "grad_norm": 1.1656434602092571, "learning_rate": 6.512415848265453e-08, "loss": 0.1854, "step": 12177 }, { "epoch": 0.9647851059615765, "grad_norm": 1.2666882505308366, "learning_rate": 6.48321108247818e-08, "loss": 0.2255, "step": 12178 }, { "epoch": 0.964864329570212, "grad_norm": 1.50194016194839, "learning_rate": 6.454071734990907e-08, "loss": 0.2744, "step": 12179 }, { "epoch": 0.9649435531788473, "grad_norm": 1.4170857624721638, "learning_rate": 6.424997807722433e-08, "loss": 0.2931, "step": 12180 }, { "epoch": 0.9650227767874827, "grad_norm": 1.3123720705969035, "learning_rate": 6.395989302587113e-08, "loss": 0.1836, "step": 12181 }, { "epoch": 0.965102000396118, "grad_norm": 1.6992903764607232, "learning_rate": 6.367046221494866e-08, "loss": 0.3634, "step": 12182 }, { "epoch": 0.9651812240047534, "grad_norm": 1.1401655244332354, "learning_rate": 6.33816856635161e-08, "loss": 0.1586, "step": 12183 }, { "epoch": 0.9652604476133888, "grad_norm": 1.2693838860272517, "learning_rate": 6.309356339058825e-08, "loss": 0.1629, "step": 12184 }, { "epoch": 0.9653396712220241, "grad_norm": 1.1035292437370114, "learning_rate": 6.28060954151355e-08, "loss": 0.1323, "step": 12185 }, { "epoch": 0.9654188948306596, "grad_norm": 1.3287846184987673, "learning_rate": 6.251928175608602e-08, "loss": 0.234, "step": 12186 }, { "epoch": 0.9654981184392949, "grad_norm": 1.6808753389531697, "learning_rate": 6.223312243232693e-08, "loss": 0.3173, "step": 12187 }, { "epoch": 0.9655773420479303, "grad_norm": 1.4629514101098156, "learning_rate": 6.194761746269762e-08, "loss": 0.2741, "step": 12188 }, { "epoch": 0.9656565656565657, "grad_norm": 1.3981916529423903, "learning_rate": 6.16627668659997e-08, "loss": 0.1928, "step": 12189 }, { "epoch": 0.965735789265201, "grad_norm": 1.1763928950181257, "learning_rate": 6.137857066098929e-08, "loss": 0.2251, "step": 12190 }, { "epoch": 0.9658150128738364, "grad_norm": 1.3421797233630617, "learning_rate": 6.109502886637697e-08, "loss": 0.224, "step": 12191 }, { "epoch": 0.9658942364824717, "grad_norm": 0.9270520369504419, "learning_rate": 6.081214150083447e-08, "loss": 0.112, "step": 12192 }, { "epoch": 0.9659734600911072, "grad_norm": 1.8481709396715922, "learning_rate": 6.052990858298801e-08, "loss": 0.2632, "step": 12193 }, { "epoch": 0.9660526836997425, "grad_norm": 1.3208908447697942, "learning_rate": 6.024833013142272e-08, "loss": 0.2372, "step": 12194 }, { "epoch": 0.966131907308378, "grad_norm": 1.2458708480604703, "learning_rate": 5.9967406164676e-08, "loss": 0.2134, "step": 12195 }, { "epoch": 0.9662111309170133, "grad_norm": 1.163062056507131, "learning_rate": 5.96871367012486e-08, "loss": 0.1485, "step": 12196 }, { "epoch": 0.9662903545256486, "grad_norm": 1.2097916449634558, "learning_rate": 5.9407521759592414e-08, "loss": 0.2029, "step": 12197 }, { "epoch": 0.966369578134284, "grad_norm": 1.282762612822857, "learning_rate": 5.912856135812051e-08, "loss": 0.2042, "step": 12198 }, { "epoch": 0.9664488017429194, "grad_norm": 1.4703193284617515, "learning_rate": 5.8850255515200405e-08, "loss": 0.2417, "step": 12199 }, { "epoch": 0.9665280253515548, "grad_norm": 1.3913644006621462, "learning_rate": 5.857260424915634e-08, "loss": 0.2386, "step": 12200 }, { "epoch": 0.9666072489601901, "grad_norm": 1.4006172209198122, "learning_rate": 5.8295607578272575e-08, "loss": 0.28, "step": 12201 }, { "epoch": 0.9666864725688256, "grad_norm": 1.3877347369665778, "learning_rate": 5.801926552078563e-08, "loss": 0.1874, "step": 12202 }, { "epoch": 0.9667656961774609, "grad_norm": 1.1495006728063186, "learning_rate": 5.774357809489317e-08, "loss": 0.1623, "step": 12203 }, { "epoch": 0.9668449197860962, "grad_norm": 1.3752093057341217, "learning_rate": 5.746854531874624e-08, "loss": 0.2532, "step": 12204 }, { "epoch": 0.9669241433947316, "grad_norm": 1.2165236408239686, "learning_rate": 5.7194167210454785e-08, "loss": 0.1689, "step": 12205 }, { "epoch": 0.967003367003367, "grad_norm": 1.2438981028003795, "learning_rate": 5.692044378808659e-08, "loss": 0.1825, "step": 12206 }, { "epoch": 0.9670825906120024, "grad_norm": 1.2642222220469412, "learning_rate": 5.664737506966389e-08, "loss": 0.2264, "step": 12207 }, { "epoch": 0.9671618142206377, "grad_norm": 1.494274586391317, "learning_rate": 5.6374961073166757e-08, "loss": 0.2515, "step": 12208 }, { "epoch": 0.9672410378292732, "grad_norm": 1.2357348088375435, "learning_rate": 5.610320181653306e-08, "loss": 0.2515, "step": 12209 }, { "epoch": 0.9673202614379085, "grad_norm": 1.2427218164644418, "learning_rate": 5.583209731765626e-08, "loss": 0.1704, "step": 12210 }, { "epoch": 0.9673994850465438, "grad_norm": 1.3440637586162874, "learning_rate": 5.5561647594388756e-08, "loss": 0.2034, "step": 12211 }, { "epoch": 0.9674787086551793, "grad_norm": 1.336502308107966, "learning_rate": 5.529185266453629e-08, "loss": 0.2411, "step": 12212 }, { "epoch": 0.9675579322638146, "grad_norm": 1.1072609553865793, "learning_rate": 5.502271254586356e-08, "loss": 0.1679, "step": 12213 }, { "epoch": 0.96763715587245, "grad_norm": 1.350825278239869, "learning_rate": 5.4754227256094136e-08, "loss": 0.1897, "step": 12214 }, { "epoch": 0.9677163794810854, "grad_norm": 1.21358631090734, "learning_rate": 5.4486396812906125e-08, "loss": 0.1393, "step": 12215 }, { "epoch": 0.9677956030897208, "grad_norm": 1.4883511399097444, "learning_rate": 5.421922123393208e-08, "loss": 0.2684, "step": 12216 }, { "epoch": 0.9678748266983561, "grad_norm": 1.3351623786611855, "learning_rate": 5.395270053676793e-08, "loss": 0.1686, "step": 12217 }, { "epoch": 0.9679540503069914, "grad_norm": 1.3770433965151345, "learning_rate": 5.3686834738960744e-08, "loss": 0.2728, "step": 12218 }, { "epoch": 0.9680332739156269, "grad_norm": 1.2701327612679396, "learning_rate": 5.3421623858016525e-08, "loss": 0.2238, "step": 12219 }, { "epoch": 0.9681124975242622, "grad_norm": 1.3082860632045146, "learning_rate": 5.3157067911399076e-08, "loss": 0.1544, "step": 12220 }, { "epoch": 0.9681917211328976, "grad_norm": 1.1999361763084468, "learning_rate": 5.289316691652668e-08, "loss": 0.1822, "step": 12221 }, { "epoch": 0.968270944741533, "grad_norm": 1.4864271015934059, "learning_rate": 5.2629920890777676e-08, "loss": 0.2312, "step": 12222 }, { "epoch": 0.9683501683501684, "grad_norm": 1.2592982820226126, "learning_rate": 5.236732985148374e-08, "loss": 0.2206, "step": 12223 }, { "epoch": 0.9684293919588037, "grad_norm": 1.2311695794374358, "learning_rate": 5.21053938159366e-08, "loss": 0.157, "step": 12224 }, { "epoch": 0.9685086155674391, "grad_norm": 1.3616873828598695, "learning_rate": 5.1844112801383576e-08, "loss": 0.223, "step": 12225 }, { "epoch": 0.9685878391760745, "grad_norm": 1.412709609714862, "learning_rate": 5.158348682502756e-08, "loss": 0.2149, "step": 12226 }, { "epoch": 0.9686670627847098, "grad_norm": 1.5097490557116413, "learning_rate": 5.1323515904031506e-08, "loss": 0.2774, "step": 12227 }, { "epoch": 0.9687462863933453, "grad_norm": 1.8206702170482414, "learning_rate": 5.1064200055510606e-08, "loss": 0.2846, "step": 12228 }, { "epoch": 0.9688255100019806, "grad_norm": 1.4739707672587357, "learning_rate": 5.080553929654119e-08, "loss": 0.2358, "step": 12229 }, { "epoch": 0.9689047336106159, "grad_norm": 1.222935833540115, "learning_rate": 5.05475336441541e-08, "loss": 0.1656, "step": 12230 }, { "epoch": 0.9689839572192513, "grad_norm": 1.3430587472010265, "learning_rate": 5.0290183115339065e-08, "loss": 0.2383, "step": 12231 }, { "epoch": 0.9690631808278867, "grad_norm": 1.4457664497185594, "learning_rate": 5.003348772704031e-08, "loss": 0.1964, "step": 12232 }, { "epoch": 0.9691424044365221, "grad_norm": 1.4614767210708277, "learning_rate": 4.977744749615987e-08, "loss": 0.2413, "step": 12233 }, { "epoch": 0.9692216280451574, "grad_norm": 1.1267893603809416, "learning_rate": 4.9522062439557595e-08, "loss": 0.1579, "step": 12234 }, { "epoch": 0.9693008516537929, "grad_norm": 1.0472532628652576, "learning_rate": 4.926733257404892e-08, "loss": 0.158, "step": 12235 }, { "epoch": 0.9693800752624282, "grad_norm": 1.6538958547189024, "learning_rate": 4.901325791640599e-08, "loss": 0.2851, "step": 12236 }, { "epoch": 0.9694592988710635, "grad_norm": 1.2190111872149716, "learning_rate": 4.8759838483358745e-08, "loss": 0.1763, "step": 12237 }, { "epoch": 0.969538522479699, "grad_norm": 1.396709109728519, "learning_rate": 4.850707429159496e-08, "loss": 0.1826, "step": 12238 }, { "epoch": 0.9696177460883343, "grad_norm": 1.5303925469033033, "learning_rate": 4.825496535775576e-08, "loss": 0.3026, "step": 12239 }, { "epoch": 0.9696969696969697, "grad_norm": 1.4589705348855644, "learning_rate": 4.800351169844231e-08, "loss": 0.3439, "step": 12240 }, { "epoch": 0.969776193305605, "grad_norm": 1.305475434919313, "learning_rate": 4.7752713330212475e-08, "loss": 0.1979, "step": 12241 }, { "epoch": 0.9698554169142405, "grad_norm": 1.4282557192748924, "learning_rate": 4.7502570269578605e-08, "loss": 0.2291, "step": 12242 }, { "epoch": 0.9699346405228758, "grad_norm": 1.199495373585879, "learning_rate": 4.725308253301197e-08, "loss": 0.1762, "step": 12243 }, { "epoch": 0.9700138641315111, "grad_norm": 1.1789346213559573, "learning_rate": 4.7004250136940547e-08, "loss": 0.2084, "step": 12244 }, { "epoch": 0.9700930877401466, "grad_norm": 1.2697096279483457, "learning_rate": 4.675607309774899e-08, "loss": 0.2192, "step": 12245 }, { "epoch": 0.9701723113487819, "grad_norm": 1.2500150954125666, "learning_rate": 4.650855143177757e-08, "loss": 0.2069, "step": 12246 }, { "epoch": 0.9702515349574173, "grad_norm": 1.381256867920319, "learning_rate": 4.626168515532548e-08, "loss": 0.2122, "step": 12247 }, { "epoch": 0.9703307585660527, "grad_norm": 1.224437497239653, "learning_rate": 4.6015474284646366e-08, "loss": 0.182, "step": 12248 }, { "epoch": 0.9704099821746881, "grad_norm": 1.2441310782635118, "learning_rate": 4.576991883595283e-08, "loss": 0.1689, "step": 12249 }, { "epoch": 0.9704892057833234, "grad_norm": 1.3024198692000433, "learning_rate": 4.5525018825414157e-08, "loss": 0.2386, "step": 12250 }, { "epoch": 0.9705684293919588, "grad_norm": 1.4568418455316767, "learning_rate": 4.528077426915412e-08, "loss": 0.2258, "step": 12251 }, { "epoch": 0.9706476530005942, "grad_norm": 1.608530520043343, "learning_rate": 4.50371851832565e-08, "loss": 0.2535, "step": 12252 }, { "epoch": 0.9707268766092295, "grad_norm": 1.2553087254675201, "learning_rate": 4.4794251583759604e-08, "loss": 0.194, "step": 12253 }, { "epoch": 0.970806100217865, "grad_norm": 1.5261982368658091, "learning_rate": 4.4551973486660625e-08, "loss": 0.187, "step": 12254 }, { "epoch": 0.9708853238265003, "grad_norm": 1.5382949903498677, "learning_rate": 4.431035090791125e-08, "loss": 0.2929, "step": 12255 }, { "epoch": 0.9709645474351357, "grad_norm": 1.1914975062853832, "learning_rate": 4.4069383863420966e-08, "loss": 0.1717, "step": 12256 }, { "epoch": 0.971043771043771, "grad_norm": 2.2593160730728066, "learning_rate": 4.38290723690582e-08, "loss": 0.3368, "step": 12257 }, { "epoch": 0.9711229946524064, "grad_norm": 1.1762963558647608, "learning_rate": 4.3589416440643626e-08, "loss": 0.1665, "step": 12258 }, { "epoch": 0.9712022182610418, "grad_norm": 0.9756039236291572, "learning_rate": 4.335041609396018e-08, "loss": 0.1266, "step": 12259 }, { "epoch": 0.9712814418696771, "grad_norm": 1.4976348064572005, "learning_rate": 4.3112071344741935e-08, "loss": 0.188, "step": 12260 }, { "epoch": 0.9713606654783126, "grad_norm": 1.5689923083047694, "learning_rate": 4.287438220868523e-08, "loss": 0.2385, "step": 12261 }, { "epoch": 0.9714398890869479, "grad_norm": 1.3845497814235785, "learning_rate": 4.263734870143976e-08, "loss": 0.2366, "step": 12262 }, { "epoch": 0.9715191126955833, "grad_norm": 1.3799185290855691, "learning_rate": 4.2400970838613057e-08, "loss": 0.2636, "step": 12263 }, { "epoch": 0.9715983363042187, "grad_norm": 1.355173656741787, "learning_rate": 4.216524863576932e-08, "loss": 0.2045, "step": 12264 }, { "epoch": 0.971677559912854, "grad_norm": 1.284122106106047, "learning_rate": 4.1930182108430584e-08, "loss": 0.1743, "step": 12265 }, { "epoch": 0.9717567835214894, "grad_norm": 1.2559448791372945, "learning_rate": 4.1695771272073357e-08, "loss": 0.1773, "step": 12266 }, { "epoch": 0.9718360071301247, "grad_norm": 1.4297530117167156, "learning_rate": 4.146201614213419e-08, "loss": 0.2075, "step": 12267 }, { "epoch": 0.9719152307387602, "grad_norm": 1.334435378369409, "learning_rate": 4.1228916734002976e-08, "loss": 0.1922, "step": 12268 }, { "epoch": 0.9719944543473955, "grad_norm": 1.5325580662944063, "learning_rate": 4.099647306302856e-08, "loss": 0.3257, "step": 12269 }, { "epoch": 0.972073677956031, "grad_norm": 1.3515630324357597, "learning_rate": 4.076468514451759e-08, "loss": 0.2589, "step": 12270 }, { "epoch": 0.9721529015646663, "grad_norm": 1.16160651733541, "learning_rate": 4.0533552993731186e-08, "loss": 0.1773, "step": 12271 }, { "epoch": 0.9722321251733016, "grad_norm": 1.4589711874460212, "learning_rate": 4.030307662588939e-08, "loss": 0.2091, "step": 12272 }, { "epoch": 0.972311348781937, "grad_norm": 1.1488169550799014, "learning_rate": 4.007325605616563e-08, "loss": 0.1677, "step": 12273 }, { "epoch": 0.9723905723905724, "grad_norm": 1.4421467699722428, "learning_rate": 3.9844091299694466e-08, "loss": 0.2703, "step": 12274 }, { "epoch": 0.9724697959992078, "grad_norm": 1.5500475396421267, "learning_rate": 3.961558237156493e-08, "loss": 0.2479, "step": 12275 }, { "epoch": 0.9725490196078431, "grad_norm": 1.272127161554797, "learning_rate": 3.9387729286821666e-08, "loss": 0.163, "step": 12276 }, { "epoch": 0.9726282432164786, "grad_norm": 1.1947098119092487, "learning_rate": 3.9160532060470435e-08, "loss": 0.2116, "step": 12277 }, { "epoch": 0.9727074668251139, "grad_norm": 1.4089561696816784, "learning_rate": 3.893399070746928e-08, "loss": 0.2096, "step": 12278 }, { "epoch": 0.9727866904337492, "grad_norm": 1.421915447324665, "learning_rate": 3.870810524273516e-08, "loss": 0.2272, "step": 12279 }, { "epoch": 0.9728659140423846, "grad_norm": 1.5210323484100234, "learning_rate": 3.8482875681140616e-08, "loss": 0.2108, "step": 12280 }, { "epoch": 0.97294513765102, "grad_norm": 1.5624821094387638, "learning_rate": 3.8258302037518234e-08, "loss": 0.2369, "step": 12281 }, { "epoch": 0.9730243612596554, "grad_norm": 1.2131119567447837, "learning_rate": 3.803438432665396e-08, "loss": 0.2071, "step": 12282 }, { "epoch": 0.9731035848682907, "grad_norm": 0.9724486801869996, "learning_rate": 3.781112256329045e-08, "loss": 0.1472, "step": 12283 }, { "epoch": 0.9731828084769262, "grad_norm": 1.6714186472791708, "learning_rate": 3.758851676213038e-08, "loss": 0.2658, "step": 12284 }, { "epoch": 0.9732620320855615, "grad_norm": 1.5664184736157616, "learning_rate": 3.7366566937829804e-08, "loss": 0.2461, "step": 12285 }, { "epoch": 0.9733412556941968, "grad_norm": 1.3309298727417753, "learning_rate": 3.714527310500371e-08, "loss": 0.2491, "step": 12286 }, { "epoch": 0.9734204793028323, "grad_norm": 1.3131425720222438, "learning_rate": 3.692463527822376e-08, "loss": 0.2796, "step": 12287 }, { "epoch": 0.9734997029114676, "grad_norm": 1.3179025994874145, "learning_rate": 3.670465347201724e-08, "loss": 0.2765, "step": 12288 }, { "epoch": 0.973578926520103, "grad_norm": 1.4936406331142933, "learning_rate": 3.6485327700869214e-08, "loss": 0.2073, "step": 12289 }, { "epoch": 0.9736581501287384, "grad_norm": 1.381280612021301, "learning_rate": 3.6266657979220356e-08, "loss": 0.2424, "step": 12290 }, { "epoch": 0.9737373737373738, "grad_norm": 1.3052940856114268, "learning_rate": 3.604864432147026e-08, "loss": 0.2037, "step": 12291 }, { "epoch": 0.9738165973460091, "grad_norm": 1.1378913188808923, "learning_rate": 3.5831286741973006e-08, "loss": 0.1711, "step": 12292 }, { "epoch": 0.9738958209546444, "grad_norm": 1.1890096230685943, "learning_rate": 3.561458525504047e-08, "loss": 0.2222, "step": 12293 }, { "epoch": 0.9739750445632799, "grad_norm": 1.255975546601389, "learning_rate": 3.539853987494235e-08, "loss": 0.2285, "step": 12294 }, { "epoch": 0.9740542681719152, "grad_norm": 0.8770316061226621, "learning_rate": 3.518315061590394e-08, "loss": 0.123, "step": 12295 }, { "epoch": 0.9741334917805506, "grad_norm": 1.1420133507240764, "learning_rate": 3.496841749210722e-08, "loss": 0.1556, "step": 12296 }, { "epoch": 0.974212715389186, "grad_norm": 1.1972061063191477, "learning_rate": 3.4754340517691996e-08, "loss": 0.1921, "step": 12297 }, { "epoch": 0.9742919389978214, "grad_norm": 1.4783612595674176, "learning_rate": 3.454091970675366e-08, "loss": 0.2618, "step": 12298 }, { "epoch": 0.9743711626064567, "grad_norm": 1.5203103031150713, "learning_rate": 3.4328155073344306e-08, "loss": 0.2446, "step": 12299 }, { "epoch": 0.9744503862150921, "grad_norm": 1.4796996817951515, "learning_rate": 3.411604663147494e-08, "loss": 0.2551, "step": 12300 }, { "epoch": 0.9745296098237275, "grad_norm": 1.354552098173944, "learning_rate": 3.3904594395111066e-08, "loss": 0.2264, "step": 12301 }, { "epoch": 0.9746088334323628, "grad_norm": 1.4754575807933092, "learning_rate": 3.369379837817599e-08, "loss": 0.2449, "step": 12302 }, { "epoch": 0.9746880570409983, "grad_norm": 1.3076793838394922, "learning_rate": 3.3483658594548606e-08, "loss": 0.1774, "step": 12303 }, { "epoch": 0.9747672806496336, "grad_norm": 1.2089916805193341, "learning_rate": 3.327417505806785e-08, "loss": 0.1595, "step": 12304 }, { "epoch": 0.9748465042582689, "grad_norm": 1.1542735074613908, "learning_rate": 3.30653477825249e-08, "loss": 0.1992, "step": 12305 }, { "epoch": 0.9749257278669043, "grad_norm": 1.4061416701127778, "learning_rate": 3.2857176781671e-08, "loss": 0.2293, "step": 12306 }, { "epoch": 0.9750049514755397, "grad_norm": 1.2558966329422117, "learning_rate": 3.264966206921294e-08, "loss": 0.1994, "step": 12307 }, { "epoch": 0.9750841750841751, "grad_norm": 1.140288189456784, "learning_rate": 3.244280365881536e-08, "loss": 0.1593, "step": 12308 }, { "epoch": 0.9751633986928104, "grad_norm": 1.6389677037609929, "learning_rate": 3.223660156409847e-08, "loss": 0.2855, "step": 12309 }, { "epoch": 0.9752426223014459, "grad_norm": 1.4261694395806452, "learning_rate": 3.203105579863919e-08, "loss": 0.228, "step": 12310 }, { "epoch": 0.9753218459100812, "grad_norm": 1.3820384936318337, "learning_rate": 3.1826166375972246e-08, "loss": 0.2292, "step": 12311 }, { "epoch": 0.9754010695187165, "grad_norm": 1.1456312426525317, "learning_rate": 3.162193330958796e-08, "loss": 0.212, "step": 12312 }, { "epoch": 0.975480293127352, "grad_norm": 1.4301660536888057, "learning_rate": 3.141835661293557e-08, "loss": 0.2159, "step": 12313 }, { "epoch": 0.9755595167359873, "grad_norm": 1.115726228691136, "learning_rate": 3.12154362994177e-08, "loss": 0.1604, "step": 12314 }, { "epoch": 0.9756387403446227, "grad_norm": 1.1364463494656156, "learning_rate": 3.1013172382396984e-08, "loss": 0.1496, "step": 12315 }, { "epoch": 0.975717963953258, "grad_norm": 1.3332340304254042, "learning_rate": 3.0811564875190544e-08, "loss": 0.2186, "step": 12316 }, { "epoch": 0.9757971875618935, "grad_norm": 1.2461023124452304, "learning_rate": 3.061061379107555e-08, "loss": 0.1854, "step": 12317 }, { "epoch": 0.9758764111705288, "grad_norm": 1.1891070730527542, "learning_rate": 3.04103191432803e-08, "loss": 0.1698, "step": 12318 }, { "epoch": 0.9759556347791641, "grad_norm": 1.592263278429394, "learning_rate": 3.0210680944995354e-08, "loss": 0.2442, "step": 12319 }, { "epoch": 0.9760348583877996, "grad_norm": 1.1530731052234549, "learning_rate": 3.001169920936575e-08, "loss": 0.1622, "step": 12320 }, { "epoch": 0.9761140819964349, "grad_norm": 1.2915830873242329, "learning_rate": 2.981337394949324e-08, "loss": 0.241, "step": 12321 }, { "epoch": 0.9761933056050703, "grad_norm": 1.489731003599819, "learning_rate": 2.961570517843626e-08, "loss": 0.2431, "step": 12322 }, { "epoch": 0.9762725292137057, "grad_norm": 1.2207160785053095, "learning_rate": 2.9418692909211066e-08, "loss": 0.1441, "step": 12323 }, { "epoch": 0.9763517528223411, "grad_norm": 1.631887040649819, "learning_rate": 2.9222337154789504e-08, "loss": 0.2329, "step": 12324 }, { "epoch": 0.9764309764309764, "grad_norm": 1.3069338171926856, "learning_rate": 2.902663792810012e-08, "loss": 0.2222, "step": 12325 }, { "epoch": 0.9765102000396118, "grad_norm": 1.2699449337567243, "learning_rate": 2.8831595242030387e-08, "loss": 0.1864, "step": 12326 }, { "epoch": 0.9765894236482472, "grad_norm": 1.4541733198233229, "learning_rate": 2.863720910942114e-08, "loss": 0.2041, "step": 12327 }, { "epoch": 0.9766686472568825, "grad_norm": 1.022381580231622, "learning_rate": 2.8443479543073248e-08, "loss": 0.1575, "step": 12328 }, { "epoch": 0.976747870865518, "grad_norm": 1.6720382870379475, "learning_rate": 2.825040655574207e-08, "loss": 0.1741, "step": 12329 }, { "epoch": 0.9768270944741533, "grad_norm": 1.66110496468617, "learning_rate": 2.8057990160139658e-08, "loss": 0.3156, "step": 12330 }, { "epoch": 0.9769063180827887, "grad_norm": 1.367090983003149, "learning_rate": 2.7866230368936986e-08, "loss": 0.1691, "step": 12331 }, { "epoch": 0.976985541691424, "grad_norm": 1.3079821258042446, "learning_rate": 2.767512719476062e-08, "loss": 0.2624, "step": 12332 }, { "epoch": 0.9770647653000594, "grad_norm": 1.2170933379518558, "learning_rate": 2.7484680650193827e-08, "loss": 0.173, "step": 12333 }, { "epoch": 0.9771439889086948, "grad_norm": 1.3423451587573787, "learning_rate": 2.729489074777547e-08, "loss": 0.1918, "step": 12334 }, { "epoch": 0.9772232125173301, "grad_norm": 1.290424328664151, "learning_rate": 2.7105757500002215e-08, "loss": 0.2279, "step": 12335 }, { "epoch": 0.9773024361259656, "grad_norm": 1.4204912700790133, "learning_rate": 2.6917280919329656e-08, "loss": 0.2871, "step": 12336 }, { "epoch": 0.9773816597346009, "grad_norm": 1.2567117459778772, "learning_rate": 2.6729461018166758e-08, "loss": 0.1684, "step": 12337 }, { "epoch": 0.9774608833432363, "grad_norm": 1.1438383613017145, "learning_rate": 2.654229780887918e-08, "loss": 0.1392, "step": 12338 }, { "epoch": 0.9775401069518717, "grad_norm": 1.569488934340685, "learning_rate": 2.6355791303792622e-08, "loss": 0.2032, "step": 12339 }, { "epoch": 0.977619330560507, "grad_norm": 1.6808363837369735, "learning_rate": 2.6169941515188368e-08, "loss": 0.3481, "step": 12340 }, { "epoch": 0.9776985541691424, "grad_norm": 1.0962788413502957, "learning_rate": 2.5984748455301077e-08, "loss": 0.204, "step": 12341 }, { "epoch": 0.9777777777777777, "grad_norm": 1.3503058540728166, "learning_rate": 2.5800212136326552e-08, "loss": 0.2045, "step": 12342 }, { "epoch": 0.9778570013864132, "grad_norm": 1.3639522368819534, "learning_rate": 2.561633257041507e-08, "loss": 0.2583, "step": 12343 }, { "epoch": 0.9779362249950485, "grad_norm": 1.535935546539105, "learning_rate": 2.5433109769674724e-08, "loss": 0.3171, "step": 12344 }, { "epoch": 0.978015448603684, "grad_norm": 1.4627871385836193, "learning_rate": 2.52505437461692e-08, "loss": 0.2164, "step": 12345 }, { "epoch": 0.9780946722123193, "grad_norm": 1.2946980640255934, "learning_rate": 2.5068634511919986e-08, "loss": 0.2444, "step": 12346 }, { "epoch": 0.9781738958209546, "grad_norm": 1.1692818989338118, "learning_rate": 2.4887382078905287e-08, "loss": 0.1782, "step": 12347 }, { "epoch": 0.97825311942959, "grad_norm": 1.798326315808199, "learning_rate": 2.4706786459058885e-08, "loss": 0.3196, "step": 12348 }, { "epoch": 0.9783323430382254, "grad_norm": 1.5160327265129587, "learning_rate": 2.4526847664273488e-08, "loss": 0.2312, "step": 12349 }, { "epoch": 0.9784115666468608, "grad_norm": 1.089235382405294, "learning_rate": 2.434756570639518e-08, "loss": 0.1652, "step": 12350 }, { "epoch": 0.9784907902554961, "grad_norm": 1.519519715802541, "learning_rate": 2.4168940597230074e-08, "loss": 0.3181, "step": 12351 }, { "epoch": 0.9785700138641316, "grad_norm": 1.1257134158129076, "learning_rate": 2.3990972348539864e-08, "loss": 0.2043, "step": 12352 }, { "epoch": 0.9786492374727669, "grad_norm": 1.303173375760026, "learning_rate": 2.381366097204296e-08, "loss": 0.249, "step": 12353 }, { "epoch": 0.9787284610814022, "grad_norm": 1.4167374269998783, "learning_rate": 2.363700647941336e-08, "loss": 0.2469, "step": 12354 }, { "epoch": 0.9788076846900376, "grad_norm": 1.2069445342672047, "learning_rate": 2.3461008882283977e-08, "loss": 0.1623, "step": 12355 }, { "epoch": 0.978886908298673, "grad_norm": 1.351024222252906, "learning_rate": 2.3285668192243317e-08, "loss": 0.181, "step": 12356 }, { "epoch": 0.9789661319073084, "grad_norm": 1.3085318095105418, "learning_rate": 2.311098442083659e-08, "loss": 0.2291, "step": 12357 }, { "epoch": 0.9790453555159437, "grad_norm": 1.3948311518110854, "learning_rate": 2.293695757956571e-08, "loss": 0.1872, "step": 12358 }, { "epoch": 0.9791245791245792, "grad_norm": 1.3861269503243618, "learning_rate": 2.2763587679889288e-08, "loss": 0.2599, "step": 12359 }, { "epoch": 0.9792038027332145, "grad_norm": 1.1915911119804863, "learning_rate": 2.2590874733223744e-08, "loss": 0.1882, "step": 12360 }, { "epoch": 0.9792830263418498, "grad_norm": 1.2021813832864123, "learning_rate": 2.2418818750939986e-08, "loss": 0.2026, "step": 12361 }, { "epoch": 0.9793622499504853, "grad_norm": 1.6682931894189295, "learning_rate": 2.2247419744368946e-08, "loss": 0.2834, "step": 12362 }, { "epoch": 0.9794414735591206, "grad_norm": 1.4471310801991073, "learning_rate": 2.207667772479494e-08, "loss": 0.2511, "step": 12363 }, { "epoch": 0.979520697167756, "grad_norm": 1.4350412222295283, "learning_rate": 2.190659270346118e-08, "loss": 0.1995, "step": 12364 }, { "epoch": 0.9795999207763914, "grad_norm": 1.563790131584391, "learning_rate": 2.1737164691566502e-08, "loss": 0.2792, "step": 12365 }, { "epoch": 0.9796791443850268, "grad_norm": 1.2037339848124784, "learning_rate": 2.156839370026753e-08, "loss": 0.1796, "step": 12366 }, { "epoch": 0.9797583679936621, "grad_norm": 1.3618015145621454, "learning_rate": 2.140027974067649e-08, "loss": 0.2438, "step": 12367 }, { "epoch": 0.9798375916022974, "grad_norm": 1.2552708385352784, "learning_rate": 2.1232822823862297e-08, "loss": 0.1826, "step": 12368 }, { "epoch": 0.9799168152109329, "grad_norm": 1.5051866327523513, "learning_rate": 2.1066022960852806e-08, "loss": 0.2612, "step": 12369 }, { "epoch": 0.9799960388195682, "grad_norm": 1.4032366641715137, "learning_rate": 2.0899880162630336e-08, "loss": 0.2488, "step": 12370 }, { "epoch": 0.9800752624282036, "grad_norm": 1.4064475514994181, "learning_rate": 2.073439444013392e-08, "loss": 0.1988, "step": 12371 }, { "epoch": 0.980154486036839, "grad_norm": 0.9951831481828061, "learning_rate": 2.0569565804260393e-08, "loss": 0.174, "step": 12372 }, { "epoch": 0.9802337096454744, "grad_norm": 1.2529452381863029, "learning_rate": 2.04053942658633e-08, "loss": 0.176, "step": 12373 }, { "epoch": 0.9803129332541097, "grad_norm": 1.7136635696712321, "learning_rate": 2.0241879835752875e-08, "loss": 0.2561, "step": 12374 }, { "epoch": 0.9803921568627451, "grad_norm": 0.9178366534683317, "learning_rate": 2.0079022524694957e-08, "loss": 0.1184, "step": 12375 }, { "epoch": 0.9804713804713805, "grad_norm": 1.485368168420031, "learning_rate": 1.991682234341208e-08, "loss": 0.2245, "step": 12376 }, { "epoch": 0.9805506040800158, "grad_norm": 1.5752433191734283, "learning_rate": 1.9755279302585696e-08, "loss": 0.258, "step": 12377 }, { "epoch": 0.9806298276886513, "grad_norm": 1.2115654497400714, "learning_rate": 1.959439341285285e-08, "loss": 0.1801, "step": 12378 }, { "epoch": 0.9807090512972866, "grad_norm": 1.7486470193716777, "learning_rate": 1.943416468480619e-08, "loss": 0.279, "step": 12379 }, { "epoch": 0.980788274905922, "grad_norm": 1.4098978744442567, "learning_rate": 1.9274593128996155e-08, "loss": 0.2638, "step": 12380 }, { "epoch": 0.9808674985145573, "grad_norm": 0.9557778347678417, "learning_rate": 1.9115678755929902e-08, "loss": 0.1507, "step": 12381 }, { "epoch": 0.9809467221231927, "grad_norm": 1.3095147222633758, "learning_rate": 1.8957421576071277e-08, "loss": 0.2289, "step": 12382 }, { "epoch": 0.9810259457318281, "grad_norm": 1.560136172695027, "learning_rate": 1.879982159984084e-08, "loss": 0.2265, "step": 12383 }, { "epoch": 0.9811051693404634, "grad_norm": 1.479422861917443, "learning_rate": 1.864287883761695e-08, "loss": 0.2631, "step": 12384 }, { "epoch": 0.9811843929490989, "grad_norm": 1.1434767942922264, "learning_rate": 1.8486593299730236e-08, "loss": 0.147, "step": 12385 }, { "epoch": 0.9812636165577342, "grad_norm": 1.4227158335024508, "learning_rate": 1.8330964996474688e-08, "loss": 0.2394, "step": 12386 }, { "epoch": 0.9813428401663695, "grad_norm": 1.2161179458531532, "learning_rate": 1.817599393809544e-08, "loss": 0.1734, "step": 12387 }, { "epoch": 0.981422063775005, "grad_norm": 1.3204361413124135, "learning_rate": 1.802168013479877e-08, "loss": 0.1937, "step": 12388 }, { "epoch": 0.9815012873836403, "grad_norm": 1.4210907675251467, "learning_rate": 1.7868023596743224e-08, "loss": 0.2738, "step": 12389 }, { "epoch": 0.9815805109922757, "grad_norm": 1.5423237245530157, "learning_rate": 1.771502433404737e-08, "loss": 0.2223, "step": 12390 }, { "epoch": 0.981659734600911, "grad_norm": 1.0960636524734113, "learning_rate": 1.7562682356786488e-08, "loss": 0.1692, "step": 12391 }, { "epoch": 0.9817389582095465, "grad_norm": 1.224037969364072, "learning_rate": 1.7410997674989215e-08, "loss": 0.1646, "step": 12392 }, { "epoch": 0.9818181818181818, "grad_norm": 1.2287911671495872, "learning_rate": 1.7259970298645345e-08, "loss": 0.2018, "step": 12393 }, { "epoch": 0.9818974054268171, "grad_norm": 1.4248427765283922, "learning_rate": 1.7109600237698032e-08, "loss": 0.2008, "step": 12394 }, { "epoch": 0.9819766290354526, "grad_norm": 1.3351943952400513, "learning_rate": 1.6959887502049356e-08, "loss": 0.1931, "step": 12395 }, { "epoch": 0.9820558526440879, "grad_norm": 1.4110675976471403, "learning_rate": 1.6810832101556984e-08, "loss": 0.2579, "step": 12396 }, { "epoch": 0.9821350762527233, "grad_norm": 1.3507169345682914, "learning_rate": 1.666243404603529e-08, "loss": 0.1886, "step": 12397 }, { "epoch": 0.9822142998613587, "grad_norm": 1.1322106240504406, "learning_rate": 1.651469334525424e-08, "loss": 0.1872, "step": 12398 }, { "epoch": 0.9822935234699941, "grad_norm": 1.113384015483527, "learning_rate": 1.6367610008944935e-08, "loss": 0.1432, "step": 12399 }, { "epoch": 0.9823727470786294, "grad_norm": 1.493085021191041, "learning_rate": 1.622118404678963e-08, "loss": 0.2413, "step": 12400 }, { "epoch": 0.9824519706872648, "grad_norm": 1.4337184817119455, "learning_rate": 1.607541546843061e-08, "loss": 0.2505, "step": 12401 }, { "epoch": 0.9825311942959002, "grad_norm": 1.302138429353761, "learning_rate": 1.593030428346576e-08, "loss": 0.1516, "step": 12402 }, { "epoch": 0.9826104179045355, "grad_norm": 1.305931013942236, "learning_rate": 1.578585050144965e-08, "loss": 0.213, "step": 12403 }, { "epoch": 0.982689641513171, "grad_norm": 1.3937899573322106, "learning_rate": 1.564205413189468e-08, "loss": 0.219, "step": 12404 }, { "epoch": 0.9827688651218063, "grad_norm": 1.2300172769040794, "learning_rate": 1.5498915184268826e-08, "loss": 0.2192, "step": 12405 }, { "epoch": 0.9828480887304417, "grad_norm": 1.3598427424642636, "learning_rate": 1.5356433667996772e-08, "loss": 0.2048, "step": 12406 }, { "epoch": 0.982927312339077, "grad_norm": 1.2476670997547008, "learning_rate": 1.5214609592461015e-08, "loss": 0.1848, "step": 12407 }, { "epoch": 0.9830065359477124, "grad_norm": 1.3477909011329958, "learning_rate": 1.507344296699964e-08, "loss": 0.2286, "step": 12408 }, { "epoch": 0.9830857595563478, "grad_norm": 1.402477249128504, "learning_rate": 1.4932933800907435e-08, "loss": 0.2577, "step": 12409 }, { "epoch": 0.9831649831649831, "grad_norm": 1.4452043374940193, "learning_rate": 1.4793082103435885e-08, "loss": 0.2927, "step": 12410 }, { "epoch": 0.9832442067736186, "grad_norm": 1.1156485681800947, "learning_rate": 1.4653887883794293e-08, "loss": 0.1546, "step": 12411 }, { "epoch": 0.9833234303822539, "grad_norm": 1.2941942429050544, "learning_rate": 1.451535115114866e-08, "loss": 0.2238, "step": 12412 }, { "epoch": 0.9834026539908893, "grad_norm": 1.3692368823279657, "learning_rate": 1.4377471914619468e-08, "loss": 0.2268, "step": 12413 }, { "epoch": 0.9834818775995247, "grad_norm": 1.1764729320468845, "learning_rate": 1.424025018328612e-08, "loss": 0.1671, "step": 12414 }, { "epoch": 0.98356110120816, "grad_norm": 1.6737251884922335, "learning_rate": 1.4103685966183612e-08, "loss": 0.2855, "step": 12415 }, { "epoch": 0.9836403248167954, "grad_norm": 1.291085058232385, "learning_rate": 1.396777927230475e-08, "loss": 0.1876, "step": 12416 }, { "epoch": 0.9837195484254307, "grad_norm": 1.2580872819884064, "learning_rate": 1.383253011059682e-08, "loss": 0.2, "step": 12417 }, { "epoch": 0.9837987720340662, "grad_norm": 1.1638219042848887, "learning_rate": 1.3697938489967144e-08, "loss": 0.1481, "step": 12418 }, { "epoch": 0.9838779956427015, "grad_norm": 1.3406977954707553, "learning_rate": 1.3564004419277522e-08, "loss": 0.2469, "step": 12419 }, { "epoch": 0.983957219251337, "grad_norm": 1.3012301114446034, "learning_rate": 1.3430727907346453e-08, "loss": 0.3466, "step": 12420 }, { "epoch": 0.9840364428599723, "grad_norm": 1.6150807931230433, "learning_rate": 1.329810896294914e-08, "loss": 0.2725, "step": 12421 }, { "epoch": 0.9841156664686076, "grad_norm": 1.4440963179670876, "learning_rate": 1.3166147594818601e-08, "loss": 0.2576, "step": 12422 }, { "epoch": 0.984194890077243, "grad_norm": 1.38243655507372, "learning_rate": 1.3034843811644548e-08, "loss": 0.195, "step": 12423 }, { "epoch": 0.9842741136858784, "grad_norm": 1.5422851378192501, "learning_rate": 1.290419762207007e-08, "loss": 0.2858, "step": 12424 }, { "epoch": 0.9843533372945138, "grad_norm": 1.1204227120903085, "learning_rate": 1.2774209034700503e-08, "loss": 0.1734, "step": 12425 }, { "epoch": 0.9844325609031491, "grad_norm": 1.2065474351513097, "learning_rate": 1.2644878058093446e-08, "loss": 0.1607, "step": 12426 }, { "epoch": 0.9845117845117846, "grad_norm": 1.2996072629065045, "learning_rate": 1.2516204700765422e-08, "loss": 0.1969, "step": 12427 }, { "epoch": 0.9845910081204199, "grad_norm": 1.386928936396314, "learning_rate": 1.2388188971188542e-08, "loss": 0.2429, "step": 12428 }, { "epoch": 0.9846702317290552, "grad_norm": 1.6411707783276153, "learning_rate": 1.2260830877792729e-08, "loss": 0.2725, "step": 12429 }, { "epoch": 0.9847494553376906, "grad_norm": 1.4835494526385964, "learning_rate": 1.2134130428962387e-08, "loss": 0.2509, "step": 12430 }, { "epoch": 0.984828678946326, "grad_norm": 1.3389969580612024, "learning_rate": 1.2008087633040843e-08, "loss": 0.207, "step": 12431 }, { "epoch": 0.9849079025549614, "grad_norm": 1.5434304870871656, "learning_rate": 1.1882702498328125e-08, "loss": 0.2799, "step": 12432 }, { "epoch": 0.9849871261635967, "grad_norm": 1.209208353919731, "learning_rate": 1.175797503307874e-08, "loss": 0.155, "step": 12433 }, { "epoch": 0.9850663497722322, "grad_norm": 1.406233772110184, "learning_rate": 1.1633905245507227e-08, "loss": 0.2149, "step": 12434 }, { "epoch": 0.9851455733808675, "grad_norm": 1.4076835467397089, "learning_rate": 1.1510493143782609e-08, "loss": 0.1754, "step": 12435 }, { "epoch": 0.9852247969895028, "grad_norm": 1.468031163373947, "learning_rate": 1.1387738736029496e-08, "loss": 0.2353, "step": 12436 }, { "epoch": 0.9853040205981383, "grad_norm": 1.4114296259216852, "learning_rate": 1.1265642030331426e-08, "loss": 0.1832, "step": 12437 }, { "epoch": 0.9853832442067736, "grad_norm": 1.3433495501088457, "learning_rate": 1.114420303472974e-08, "loss": 0.2018, "step": 12438 }, { "epoch": 0.985462467815409, "grad_norm": 0.9853083831674087, "learning_rate": 1.1023421757216934e-08, "loss": 0.1178, "step": 12439 }, { "epoch": 0.9855416914240444, "grad_norm": 1.0935194771472654, "learning_rate": 1.090329820574887e-08, "loss": 0.1462, "step": 12440 }, { "epoch": 0.9856209150326798, "grad_norm": 1.3992800499309292, "learning_rate": 1.0783832388234772e-08, "loss": 0.283, "step": 12441 }, { "epoch": 0.9857001386413151, "grad_norm": 1.4623034187907258, "learning_rate": 1.0665024312539462e-08, "loss": 0.3125, "step": 12442 }, { "epoch": 0.9857793622499504, "grad_norm": 1.1049513044985437, "learning_rate": 1.0546873986486682e-08, "loss": 0.1399, "step": 12443 }, { "epoch": 0.9858585858585859, "grad_norm": 1.2271985889529136, "learning_rate": 1.0429381417856877e-08, "loss": 0.2278, "step": 12444 }, { "epoch": 0.9859378094672212, "grad_norm": 1.2211581373473384, "learning_rate": 1.0312546614384966e-08, "loss": 0.1739, "step": 12445 }, { "epoch": 0.9860170330758566, "grad_norm": 1.5085648544878496, "learning_rate": 1.0196369583763688e-08, "loss": 0.2606, "step": 12446 }, { "epoch": 0.986096256684492, "grad_norm": 1.3721600368115283, "learning_rate": 1.0080850333644698e-08, "loss": 0.192, "step": 12447 }, { "epoch": 0.9861754802931274, "grad_norm": 1.1780353310856728, "learning_rate": 9.965988871633025e-09, "loss": 0.1655, "step": 12448 }, { "epoch": 0.9862547039017627, "grad_norm": 1.3939693766988244, "learning_rate": 9.851785205291508e-09, "loss": 0.2347, "step": 12449 }, { "epoch": 0.9863339275103981, "grad_norm": 1.615554871611278, "learning_rate": 9.738239342141909e-09, "loss": 0.2794, "step": 12450 }, { "epoch": 0.9864131511190335, "grad_norm": 1.603367285531167, "learning_rate": 9.625351289658247e-09, "loss": 0.1984, "step": 12451 }, { "epoch": 0.9864923747276688, "grad_norm": 1.4516254351938802, "learning_rate": 9.513121055273467e-09, "loss": 0.2268, "step": 12452 }, { "epoch": 0.9865715983363043, "grad_norm": 1.0507069300196927, "learning_rate": 9.401548646380543e-09, "loss": 0.1204, "step": 12453 }, { "epoch": 0.9866508219449396, "grad_norm": 1.2739488848925171, "learning_rate": 9.290634070322491e-09, "loss": 0.2212, "step": 12454 }, { "epoch": 0.986730045553575, "grad_norm": 1.2125383514988055, "learning_rate": 9.180377334404577e-09, "loss": 0.1475, "step": 12455 }, { "epoch": 0.9868092691622103, "grad_norm": 1.391328333397416, "learning_rate": 9.070778445885442e-09, "loss": 0.255, "step": 12456 }, { "epoch": 0.9868884927708457, "grad_norm": 1.275436941861454, "learning_rate": 8.961837411982643e-09, "loss": 0.2208, "step": 12457 }, { "epoch": 0.9869677163794811, "grad_norm": 1.2900627248811802, "learning_rate": 8.853554239869333e-09, "loss": 0.2339, "step": 12458 }, { "epoch": 0.9870469399881164, "grad_norm": 1.8050407324485647, "learning_rate": 8.745928936675363e-09, "loss": 0.2669, "step": 12459 }, { "epoch": 0.9871261635967519, "grad_norm": 1.3327343595256749, "learning_rate": 8.638961509486177e-09, "loss": 0.2097, "step": 12460 }, { "epoch": 0.9872053872053872, "grad_norm": 1.0723984626526875, "learning_rate": 8.53265196534725e-09, "loss": 0.1696, "step": 12461 }, { "epoch": 0.9872846108140226, "grad_norm": 1.4359811901078419, "learning_rate": 8.427000311256317e-09, "loss": 0.238, "step": 12462 }, { "epoch": 0.987363834422658, "grad_norm": 1.1545614969400706, "learning_rate": 8.322006554171147e-09, "loss": 0.1606, "step": 12463 }, { "epoch": 0.9874430580312933, "grad_norm": 1.4254148973056375, "learning_rate": 8.217670701005098e-09, "loss": 0.2308, "step": 12464 }, { "epoch": 0.9875222816399287, "grad_norm": 1.4059356464485842, "learning_rate": 8.113992758628231e-09, "loss": 0.2048, "step": 12465 }, { "epoch": 0.987601505248564, "grad_norm": 1.3924541739055427, "learning_rate": 8.010972733867306e-09, "loss": 0.2202, "step": 12466 }, { "epoch": 0.9876807288571995, "grad_norm": 1.5624754217451045, "learning_rate": 7.908610633504676e-09, "loss": 0.2209, "step": 12467 }, { "epoch": 0.9877599524658348, "grad_norm": 1.4149611229759678, "learning_rate": 7.806906464281617e-09, "loss": 0.18, "step": 12468 }, { "epoch": 0.9878391760744701, "grad_norm": 1.2260131965110193, "learning_rate": 7.70586023289388e-09, "loss": 0.1544, "step": 12469 }, { "epoch": 0.9879183996831056, "grad_norm": 1.5891808434540446, "learning_rate": 7.605471945996146e-09, "loss": 0.2488, "step": 12470 }, { "epoch": 0.9879976232917409, "grad_norm": 1.5374319157316712, "learning_rate": 7.50574161019757e-09, "loss": 0.2085, "step": 12471 }, { "epoch": 0.9880768469003763, "grad_norm": 1.2509710981086186, "learning_rate": 7.406669232065122e-09, "loss": 0.2224, "step": 12472 }, { "epoch": 0.9881560705090117, "grad_norm": 1.400817291463732, "learning_rate": 7.3082548181213635e-09, "loss": 0.2511, "step": 12473 }, { "epoch": 0.9882352941176471, "grad_norm": 1.2273374374997892, "learning_rate": 7.210498374848884e-09, "loss": 0.2638, "step": 12474 }, { "epoch": 0.9883145177262824, "grad_norm": 1.0755615754982213, "learning_rate": 7.113399908681429e-09, "loss": 0.1561, "step": 12475 }, { "epoch": 0.9883937413349178, "grad_norm": 1.204940158789142, "learning_rate": 7.016959426013881e-09, "loss": 0.149, "step": 12476 }, { "epoch": 0.9884729649435532, "grad_norm": 1.346058835354687, "learning_rate": 6.9211769331978265e-09, "loss": 0.2227, "step": 12477 }, { "epoch": 0.9885521885521885, "grad_norm": 1.2698331741183637, "learning_rate": 6.8260524365371115e-09, "loss": 0.2025, "step": 12478 }, { "epoch": 0.988631412160824, "grad_norm": 1.7364152816557938, "learning_rate": 6.731585942297836e-09, "loss": 0.3232, "step": 12479 }, { "epoch": 0.9887106357694593, "grad_norm": 1.3574165046155162, "learning_rate": 6.637777456698358e-09, "loss": 0.1702, "step": 12480 }, { "epoch": 0.9887898593780947, "grad_norm": 1.6303718539574523, "learning_rate": 6.544626985915958e-09, "loss": 0.2278, "step": 12481 }, { "epoch": 0.98886908298673, "grad_norm": 1.206890183157199, "learning_rate": 6.45213453608573e-09, "loss": 0.1769, "step": 12482 }, { "epoch": 0.9889483065953654, "grad_norm": 1.1760049911837305, "learning_rate": 6.360300113295026e-09, "loss": 0.2082, "step": 12483 }, { "epoch": 0.9890275302040008, "grad_norm": 1.2813541597013667, "learning_rate": 6.269123723593451e-09, "loss": 0.1837, "step": 12484 }, { "epoch": 0.9891067538126361, "grad_norm": 1.1498826292844289, "learning_rate": 6.178605372982871e-09, "loss": 0.1922, "step": 12485 }, { "epoch": 0.9891859774212716, "grad_norm": 1.7405664475519997, "learning_rate": 6.088745067424073e-09, "loss": 0.2988, "step": 12486 }, { "epoch": 0.9892652010299069, "grad_norm": 1.5269990083452556, "learning_rate": 5.9995428128334365e-09, "loss": 0.191, "step": 12487 }, { "epoch": 0.9893444246385423, "grad_norm": 1.5024018254912053, "learning_rate": 5.910998615085151e-09, "loss": 0.1996, "step": 12488 }, { "epoch": 0.9894236482471777, "grad_norm": 1.3464939126797384, "learning_rate": 5.8231124800089965e-09, "loss": 0.1994, "step": 12489 }, { "epoch": 0.989502871855813, "grad_norm": 1.2805824796965277, "learning_rate": 5.735884413391457e-09, "loss": 0.2178, "step": 12490 }, { "epoch": 0.9895820954644484, "grad_norm": 1.3781739595855038, "learning_rate": 5.6493144209768255e-09, "loss": 0.2164, "step": 12491 }, { "epoch": 0.9896613190730837, "grad_norm": 1.1950693183716967, "learning_rate": 5.5634025084660985e-09, "loss": 0.2031, "step": 12492 }, { "epoch": 0.9897405426817192, "grad_norm": 1.0637787291335812, "learning_rate": 5.47814868151364e-09, "loss": 0.1474, "step": 12493 }, { "epoch": 0.9898197662903545, "grad_norm": 1.0776936021291765, "learning_rate": 5.393552945736069e-09, "loss": 0.1498, "step": 12494 }, { "epoch": 0.98989898989899, "grad_norm": 1.4013138962482272, "learning_rate": 5.309615306701155e-09, "loss": 0.2517, "step": 12495 }, { "epoch": 0.9899782135076253, "grad_norm": 1.4067846097530692, "learning_rate": 5.226335769936697e-09, "loss": 0.2073, "step": 12496 }, { "epoch": 0.9900574371162606, "grad_norm": 1.8322357689087203, "learning_rate": 5.143714340926087e-09, "loss": 0.308, "step": 12497 }, { "epoch": 0.990136660724896, "grad_norm": 1.5401922238827737, "learning_rate": 5.0617510251105284e-09, "loss": 0.2772, "step": 12498 }, { "epoch": 0.9902158843335314, "grad_norm": 1.6471308717890774, "learning_rate": 4.980445827885705e-09, "loss": 0.2281, "step": 12499 }, { "epoch": 0.9902951079421668, "grad_norm": 1.3308290591423746, "learning_rate": 4.899798754605112e-09, "loss": 0.2108, "step": 12500 }, { "epoch": 0.9903743315508021, "grad_norm": 1.420277492743954, "learning_rate": 4.819809810578946e-09, "loss": 0.2673, "step": 12501 }, { "epoch": 0.9904535551594376, "grad_norm": 1.5495186136199472, "learning_rate": 4.740479001076326e-09, "loss": 0.2455, "step": 12502 }, { "epoch": 0.9905327787680729, "grad_norm": 1.2769506170301432, "learning_rate": 4.66180633131752e-09, "loss": 0.2283, "step": 12503 }, { "epoch": 0.9906120023767082, "grad_norm": 1.4813104757547562, "learning_rate": 4.583791806485049e-09, "loss": 0.2703, "step": 12504 }, { "epoch": 0.9906912259853436, "grad_norm": 1.506462318180011, "learning_rate": 4.506435431714806e-09, "loss": 0.2793, "step": 12505 }, { "epoch": 0.990770449593979, "grad_norm": 1.6290961485009878, "learning_rate": 4.429737212100493e-09, "loss": 0.2757, "step": 12506 }, { "epoch": 0.9908496732026144, "grad_norm": 1.3324355530366954, "learning_rate": 4.353697152692515e-09, "loss": 0.193, "step": 12507 }, { "epoch": 0.9909288968112497, "grad_norm": 1.2225821225844495, "learning_rate": 4.278315258496868e-09, "loss": 0.1683, "step": 12508 }, { "epoch": 0.9910081204198852, "grad_norm": 1.5974530087833683, "learning_rate": 4.203591534478468e-09, "loss": 0.2666, "step": 12509 }, { "epoch": 0.9910873440285205, "grad_norm": 1.489577522516601, "learning_rate": 4.129525985556715e-09, "loss": 0.1789, "step": 12510 }, { "epoch": 0.9911665676371558, "grad_norm": 1.2444406845720806, "learning_rate": 4.056118616608817e-09, "loss": 0.1616, "step": 12511 }, { "epoch": 0.9912457912457913, "grad_norm": 1.51248730938774, "learning_rate": 3.9833694324686864e-09, "loss": 0.2074, "step": 12512 }, { "epoch": 0.9913250148544266, "grad_norm": 1.3547145750551723, "learning_rate": 3.9112784379247145e-09, "loss": 0.339, "step": 12513 }, { "epoch": 0.991404238463062, "grad_norm": 1.1830324648650488, "learning_rate": 3.839845637725326e-09, "loss": 0.1624, "step": 12514 }, { "epoch": 0.9914834620716974, "grad_norm": 1.1104159225502654, "learning_rate": 3.769071036573424e-09, "loss": 0.1302, "step": 12515 }, { "epoch": 0.9915626856803328, "grad_norm": 1.244599216165791, "learning_rate": 3.698954639129726e-09, "loss": 0.2414, "step": 12516 }, { "epoch": 0.9916419092889681, "grad_norm": 1.4195685557322446, "learning_rate": 3.6294964500116492e-09, "loss": 0.2579, "step": 12517 }, { "epoch": 0.9917211328976034, "grad_norm": 1.3235364134475698, "learning_rate": 3.560696473789982e-09, "loss": 0.2403, "step": 12518 }, { "epoch": 0.9918003565062389, "grad_norm": 1.3700588990464042, "learning_rate": 3.4925547149977645e-09, "loss": 0.1979, "step": 12519 }, { "epoch": 0.9918795801148742, "grad_norm": 1.3963926339025485, "learning_rate": 3.425071178120298e-09, "loss": 0.2536, "step": 12520 }, { "epoch": 0.9919588037235096, "grad_norm": 0.9766851949468118, "learning_rate": 3.3582458676018058e-09, "loss": 0.1561, "step": 12521 }, { "epoch": 0.992038027332145, "grad_norm": 1.432457159825864, "learning_rate": 3.292078787842101e-09, "loss": 0.2354, "step": 12522 }, { "epoch": 0.9921172509407804, "grad_norm": 1.1668504643305284, "learning_rate": 3.226569943197699e-09, "loss": 0.1651, "step": 12523 }, { "epoch": 0.9921964745494157, "grad_norm": 1.5761229671699646, "learning_rate": 3.1617193379818167e-09, "loss": 0.2292, "step": 12524 }, { "epoch": 0.9922756981580511, "grad_norm": 1.1865416227179866, "learning_rate": 3.0975269764654816e-09, "loss": 0.1749, "step": 12525 }, { "epoch": 0.9923549217666865, "grad_norm": 1.6039717782862073, "learning_rate": 3.033992862875312e-09, "loss": 0.2588, "step": 12526 }, { "epoch": 0.9924341453753218, "grad_norm": 1.2687919021186191, "learning_rate": 2.9711170013935196e-09, "loss": 0.1949, "step": 12527 }, { "epoch": 0.9925133689839573, "grad_norm": 1.4537660251490192, "learning_rate": 2.9088993961612355e-09, "loss": 0.2499, "step": 12528 }, { "epoch": 0.9925925925925926, "grad_norm": 0.9534090552544947, "learning_rate": 2.8473400512762928e-09, "loss": 0.1255, "step": 12529 }, { "epoch": 0.992671816201228, "grad_norm": 1.253448052631948, "learning_rate": 2.7864389707887853e-09, "loss": 0.1799, "step": 12530 }, { "epoch": 0.9927510398098633, "grad_norm": 1.211191951328995, "learning_rate": 2.726196158712169e-09, "loss": 0.1381, "step": 12531 }, { "epoch": 0.9928302634184987, "grad_norm": 1.2675340787779716, "learning_rate": 2.66661161901105e-09, "loss": 0.1947, "step": 12532 }, { "epoch": 0.9929094870271341, "grad_norm": 1.4126343689953031, "learning_rate": 2.607685355610068e-09, "loss": 0.2765, "step": 12533 }, { "epoch": 0.9929887106357694, "grad_norm": 1.4856731382979294, "learning_rate": 2.549417372388341e-09, "loss": 0.2855, "step": 12534 }, { "epoch": 0.9930679342444049, "grad_norm": 1.4146051243797273, "learning_rate": 2.4918076731828e-09, "loss": 0.2223, "step": 12535 }, { "epoch": 0.9931471578530402, "grad_norm": 1.2259398907907308, "learning_rate": 2.434856261785967e-09, "loss": 0.1704, "step": 12536 }, { "epoch": 0.9932263814616756, "grad_norm": 1.2589088474837895, "learning_rate": 2.378563141949286e-09, "loss": 0.1738, "step": 12537 }, { "epoch": 0.993305605070311, "grad_norm": 1.1551345494029515, "learning_rate": 2.322928317378681e-09, "loss": 0.1906, "step": 12538 }, { "epoch": 0.9933848286789463, "grad_norm": 1.3547682596441615, "learning_rate": 2.267951791737888e-09, "loss": 0.2077, "step": 12539 }, { "epoch": 0.9934640522875817, "grad_norm": 1.7153493443236285, "learning_rate": 2.213633568646234e-09, "loss": 0.3191, "step": 12540 }, { "epoch": 0.993543275896217, "grad_norm": 1.1419776843512282, "learning_rate": 2.1599736516808577e-09, "loss": 0.1814, "step": 12541 }, { "epoch": 0.9936224995048525, "grad_norm": 1.0637268349366085, "learning_rate": 2.106972044373379e-09, "loss": 0.1589, "step": 12542 }, { "epoch": 0.9937017231134878, "grad_norm": 1.727835615080682, "learning_rate": 2.0546287502165583e-09, "loss": 0.2757, "step": 12543 }, { "epoch": 0.9937809467221231, "grad_norm": 1.3561804102516777, "learning_rate": 2.002943772654309e-09, "loss": 0.2229, "step": 12544 }, { "epoch": 0.9938601703307586, "grad_norm": 1.4376269485878115, "learning_rate": 1.951917115091684e-09, "loss": 0.2939, "step": 12545 }, { "epoch": 0.9939393939393939, "grad_norm": 1.6512973400898159, "learning_rate": 1.901548780887108e-09, "loss": 0.2115, "step": 12546 }, { "epoch": 0.9940186175480293, "grad_norm": 1.320015208949776, "learning_rate": 1.851838773357928e-09, "loss": 0.2318, "step": 12547 }, { "epoch": 0.9940978411566647, "grad_norm": 1.198533008666455, "learning_rate": 1.8027870957781912e-09, "loss": 0.1648, "step": 12548 }, { "epoch": 0.9941770647653001, "grad_norm": 1.2946999466391675, "learning_rate": 1.7543937513753161e-09, "loss": 0.2149, "step": 12549 }, { "epoch": 0.9942562883739354, "grad_norm": 1.4405576583346942, "learning_rate": 1.7066587433378634e-09, "loss": 0.2208, "step": 12550 }, { "epoch": 0.9943355119825708, "grad_norm": 1.5084074757572192, "learning_rate": 1.659582074807764e-09, "loss": 0.1872, "step": 12551 }, { "epoch": 0.9944147355912062, "grad_norm": 1.4747730778220889, "learning_rate": 1.6131637488858708e-09, "loss": 0.1867, "step": 12552 }, { "epoch": 0.9944939591998415, "grad_norm": 1.268587688193285, "learning_rate": 1.5674037686275178e-09, "loss": 0.2054, "step": 12553 }, { "epoch": 0.994573182808477, "grad_norm": 1.2447409302626689, "learning_rate": 1.5223021370458502e-09, "loss": 0.239, "step": 12554 }, { "epoch": 0.9946524064171123, "grad_norm": 1.3362271588865342, "learning_rate": 1.4778588571107144e-09, "loss": 0.1694, "step": 12555 }, { "epoch": 0.9947316300257477, "grad_norm": 1.830965598598806, "learning_rate": 1.4340739317497688e-09, "loss": 0.3172, "step": 12556 }, { "epoch": 0.994810853634383, "grad_norm": 1.6419578688167966, "learning_rate": 1.390947363845152e-09, "loss": 0.3192, "step": 12557 }, { "epoch": 0.9948900772430184, "grad_norm": 1.142639020114391, "learning_rate": 1.3484791562357048e-09, "loss": 0.1564, "step": 12558 }, { "epoch": 0.9949693008516538, "grad_norm": 1.5160810611758526, "learning_rate": 1.3066693117191886e-09, "loss": 0.2822, "step": 12559 }, { "epoch": 0.9950485244602891, "grad_norm": 1.3972647357200154, "learning_rate": 1.2655178330467366e-09, "loss": 0.22, "step": 12560 }, { "epoch": 0.9951277480689246, "grad_norm": 1.4662781990439455, "learning_rate": 1.2250247229295132e-09, "loss": 0.3091, "step": 12561 }, { "epoch": 0.9952069716775599, "grad_norm": 1.1682890671085937, "learning_rate": 1.185189984034274e-09, "loss": 0.1672, "step": 12562 }, { "epoch": 0.9952861952861953, "grad_norm": 1.1987516313351618, "learning_rate": 1.1460136189822556e-09, "loss": 0.148, "step": 12563 }, { "epoch": 0.9953654188948307, "grad_norm": 1.4186857466682117, "learning_rate": 1.1074956303536165e-09, "loss": 0.2453, "step": 12564 }, { "epoch": 0.995444642503466, "grad_norm": 1.1778637829024925, "learning_rate": 1.0696360206852162e-09, "loss": 0.2096, "step": 12565 }, { "epoch": 0.9955238661121014, "grad_norm": 1.3644194785082215, "learning_rate": 1.0324347924695055e-09, "loss": 0.2905, "step": 12566 }, { "epoch": 0.9956030897207367, "grad_norm": 1.2889824217233188, "learning_rate": 9.958919481556362e-10, "loss": 0.172, "step": 12567 }, { "epoch": 0.9956823133293722, "grad_norm": 1.5109538498877142, "learning_rate": 9.600074901505718e-10, "loss": 0.2479, "step": 12568 }, { "epoch": 0.9957615369380075, "grad_norm": 1.6098756681008, "learning_rate": 9.24781420816867e-10, "loss": 0.272, "step": 12569 }, { "epoch": 0.995840760546643, "grad_norm": 1.5930147949606568, "learning_rate": 8.902137424726675e-10, "loss": 0.2082, "step": 12570 }, { "epoch": 0.9959199841552783, "grad_norm": 1.6230997570751065, "learning_rate": 8.56304457396151e-10, "loss": 0.2867, "step": 12571 }, { "epoch": 0.9959992077639136, "grad_norm": 1.5188729303389858, "learning_rate": 8.230535678188656e-10, "loss": 0.2123, "step": 12572 }, { "epoch": 0.996078431372549, "grad_norm": 1.2508980642777277, "learning_rate": 7.904610759312814e-10, "loss": 0.1552, "step": 12573 }, { "epoch": 0.9961576549811844, "grad_norm": 1.4361496615147935, "learning_rate": 7.585269838783494e-10, "loss": 0.3118, "step": 12574 }, { "epoch": 0.9962368785898198, "grad_norm": 1.1542101467217232, "learning_rate": 7.272512937628318e-10, "loss": 0.1722, "step": 12575 }, { "epoch": 0.9963161021984551, "grad_norm": 1.278491052498372, "learning_rate": 6.966340076441924e-10, "loss": 0.2072, "step": 12576 }, { "epoch": 0.9963953258070906, "grad_norm": 1.1746040879529536, "learning_rate": 6.666751275385963e-10, "loss": 0.1828, "step": 12577 }, { "epoch": 0.9964745494157259, "grad_norm": 1.4330130828120105, "learning_rate": 6.3737465542002e-10, "loss": 0.241, "step": 12578 }, { "epoch": 0.9965537730243612, "grad_norm": 1.4271566692606346, "learning_rate": 6.087325932147003e-10, "loss": 0.24, "step": 12579 }, { "epoch": 0.9966329966329966, "grad_norm": 1.3332745611384074, "learning_rate": 5.807489428111268e-10, "loss": 0.2682, "step": 12580 }, { "epoch": 0.996712220241632, "grad_norm": 1.314716955830647, "learning_rate": 5.534237060511594e-10, "loss": 0.1759, "step": 12581 }, { "epoch": 0.9967914438502674, "grad_norm": 2.2100600515436515, "learning_rate": 5.267568847344695e-10, "loss": 0.2297, "step": 12582 }, { "epoch": 0.9968706674589027, "grad_norm": 1.7602103323485883, "learning_rate": 5.007484806152097e-10, "loss": 0.3064, "step": 12583 }, { "epoch": 0.9969498910675382, "grad_norm": 1.426143301755599, "learning_rate": 4.753984954086743e-10, "loss": 0.2395, "step": 12584 }, { "epoch": 0.9970291146761735, "grad_norm": 1.211768410738935, "learning_rate": 4.5070693078130834e-10, "loss": 0.1538, "step": 12585 }, { "epoch": 0.9971083382848088, "grad_norm": 1.5085316651191683, "learning_rate": 4.266737883606986e-10, "loss": 0.2579, "step": 12586 }, { "epoch": 0.9971875618934443, "grad_norm": 1.2719726097229966, "learning_rate": 4.0329906972780276e-10, "loss": 0.1893, "step": 12587 }, { "epoch": 0.9972667855020796, "grad_norm": 1.3224030935410247, "learning_rate": 3.805827764236103e-10, "loss": 0.224, "step": 12588 }, { "epoch": 0.997346009110715, "grad_norm": 1.2289755597251892, "learning_rate": 3.585249099435917e-10, "loss": 0.2635, "step": 12589 }, { "epoch": 0.9974252327193504, "grad_norm": 1.3221490130214644, "learning_rate": 3.3712547173769816e-10, "loss": 0.2204, "step": 12590 }, { "epoch": 0.9975044563279858, "grad_norm": 1.4595330106386109, "learning_rate": 3.163844632181334e-10, "loss": 0.2284, "step": 12591 }, { "epoch": 0.9975836799366211, "grad_norm": 1.2733603141038226, "learning_rate": 2.963018857493616e-10, "loss": 0.1997, "step": 12592 }, { "epoch": 0.9976629035452564, "grad_norm": 1.2851104650545306, "learning_rate": 2.7687774065254804e-10, "loss": 0.1541, "step": 12593 }, { "epoch": 0.9977421271538919, "grad_norm": 1.2610574675323607, "learning_rate": 2.581120292077799e-10, "loss": 0.2074, "step": 12594 }, { "epoch": 0.9978213507625272, "grad_norm": 1.3439470654486827, "learning_rate": 2.400047526518456e-10, "loss": 0.2202, "step": 12595 }, { "epoch": 0.9979005743711626, "grad_norm": 1.4223196445066777, "learning_rate": 2.2255591217490437e-10, "loss": 0.2449, "step": 12596 }, { "epoch": 0.997979797979798, "grad_norm": 1.169471863088717, "learning_rate": 2.057655089271471e-10, "loss": 0.1459, "step": 12597 }, { "epoch": 0.9980590215884334, "grad_norm": 1.8938312357980984, "learning_rate": 1.8963354401324575e-10, "loss": 0.3463, "step": 12598 }, { "epoch": 0.9981382451970687, "grad_norm": 1.5770285442906824, "learning_rate": 1.74160018496794e-10, "loss": 0.2438, "step": 12599 }, { "epoch": 0.9982174688057041, "grad_norm": 1.2867503443460573, "learning_rate": 1.593449333947561e-10, "loss": 0.2098, "step": 12600 }, { "epoch": 0.9982966924143395, "grad_norm": 1.4130448920366796, "learning_rate": 1.4518828968523857e-10, "loss": 0.2302, "step": 12601 }, { "epoch": 0.9983759160229748, "grad_norm": 1.1190835363853238, "learning_rate": 1.3169008829749808e-10, "loss": 0.2064, "step": 12602 }, { "epoch": 0.9984551396316103, "grad_norm": 1.307888018530365, "learning_rate": 1.1885033012193348e-10, "loss": 0.1674, "step": 12603 }, { "epoch": 0.9985343632402456, "grad_norm": 1.193597720975357, "learning_rate": 1.0666901600453473e-10, "loss": 0.1736, "step": 12604 }, { "epoch": 0.998613586848881, "grad_norm": 1.392658771335661, "learning_rate": 9.51461467457726e-11, "loss": 0.2241, "step": 12605 }, { "epoch": 0.9986928104575163, "grad_norm": 1.25744168470611, "learning_rate": 8.428172310503968e-11, "loss": 0.2231, "step": 12606 }, { "epoch": 0.9987720340661517, "grad_norm": 1.1388855690170891, "learning_rate": 7.40757457984298e-11, "loss": 0.1712, "step": 12607 }, { "epoch": 0.9988512576747871, "grad_norm": 1.7328102242920655, "learning_rate": 6.452821549651766e-11, "loss": 0.3831, "step": 12608 }, { "epoch": 0.9989304812834224, "grad_norm": 1.5719729743532507, "learning_rate": 5.563913282990996e-11, "loss": 0.2842, "step": 12609 }, { "epoch": 0.9990097048920579, "grad_norm": 1.557877226047442, "learning_rate": 4.7408498381473765e-11, "loss": 0.339, "step": 12610 }, { "epoch": 0.9990889285006932, "grad_norm": 1.4930107277820386, "learning_rate": 3.983631269521837e-11, "loss": 0.226, "step": 12611 }, { "epoch": 0.9991681521093286, "grad_norm": 1.228601240977321, "learning_rate": 3.292257626963391e-11, "loss": 0.2042, "step": 12612 }, { "epoch": 0.999247375717964, "grad_norm": 1.206532423282528, "learning_rate": 2.6667289557691378e-11, "loss": 0.2225, "step": 12613 }, { "epoch": 0.9993265993265993, "grad_norm": 1.245138462654191, "learning_rate": 2.1070452974614187e-11, "loss": 0.2038, "step": 12614 }, { "epoch": 0.9994058229352347, "grad_norm": 1.6390682665890544, "learning_rate": 1.6132066886775932e-11, "loss": 0.2894, "step": 12615 }, { "epoch": 0.99948504654387, "grad_norm": 1.1032165949364316, "learning_rate": 1.1852131619471963e-11, "loss": 0.1323, "step": 12616 }, { "epoch": 0.9995642701525055, "grad_norm": 1.3807418347483535, "learning_rate": 8.230647454698926e-12, "loss": 0.2222, "step": 12617 }, { "epoch": 0.9996434937611408, "grad_norm": 1.674992983819206, "learning_rate": 5.267614631154772e-12, "loss": 0.3302, "step": 12618 }, { "epoch": 0.9997227173697762, "grad_norm": 1.412962469950124, "learning_rate": 2.9630333442387525e-12, "loss": 0.1729, "step": 12619 }, { "epoch": 0.9998019409784116, "grad_norm": 1.618717443879094, "learning_rate": 1.3169037449412004e-12, "loss": 0.2662, "step": 12620 }, { "epoch": 0.9998811645870469, "grad_norm": 1.429308333073465, "learning_rate": 3.29225942063971e-13, "loss": 0.2245, "step": 12621 }, { "epoch": 0.9999603881956823, "grad_norm": 1.335405194521231, "learning_rate": 0.0, "loss": 0.2444, "step": 12622 }, { "epoch": 0.9999603881956823, "step": 12622, "total_flos": 6735811612704768.0, "train_loss": 0.3364984280516995, "train_runtime": 50045.9274, "train_samples_per_second": 32.284, "train_steps_per_second": 0.252 } ], "logging_steps": 1.0, "max_steps": 12622, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6735811612704768.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }