{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 30057, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.0465035438537598, "learning_rate": 4e-08, "loss": 3.7659, "step": 1 }, { "epoch": 0.0, "grad_norm": 3.150376796722412, "learning_rate": 8e-08, "loss": 3.7818, "step": 2 }, { "epoch": 0.0, "grad_norm": 2.9999306201934814, "learning_rate": 1.2000000000000002e-07, "loss": 3.7177, "step": 3 }, { "epoch": 0.0, "grad_norm": 2.96038556098938, "learning_rate": 1.6e-07, "loss": 3.7639, "step": 4 }, { "epoch": 0.0, "grad_norm": 2.9733896255493164, "learning_rate": 2.0000000000000002e-07, "loss": 3.7662, "step": 5 }, { "epoch": 0.0, "grad_norm": 2.938199996948242, "learning_rate": 2.4000000000000003e-07, "loss": 3.7483, "step": 6 }, { "epoch": 0.0, "grad_norm": 3.0853888988494873, "learning_rate": 2.8e-07, "loss": 3.7558, "step": 7 }, { "epoch": 0.0, "grad_norm": 3.0000698566436768, "learning_rate": 3.2e-07, "loss": 3.7618, "step": 8 }, { "epoch": 0.0, "grad_norm": 2.9343674182891846, "learning_rate": 3.6e-07, "loss": 3.747, "step": 9 }, { "epoch": 0.0, "grad_norm": 2.893684148788452, "learning_rate": 4.0000000000000003e-07, "loss": 3.7545, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.076113700866699, "learning_rate": 4.4e-07, "loss": 3.7813, "step": 11 }, { "epoch": 0.0, "grad_norm": 2.9859817028045654, "learning_rate": 4.800000000000001e-07, "loss": 3.7794, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.052562713623047, "learning_rate": 5.2e-07, "loss": 3.7256, "step": 13 }, { "epoch": 0.0, "grad_norm": 2.993824005126953, "learning_rate": 5.6e-07, "loss": 3.7362, "step": 14 }, { "epoch": 0.0, "grad_norm": 3.080522060394287, "learning_rate": 6.000000000000001e-07, "loss": 3.7595, "step": 15 }, { "epoch": 0.0, "grad_norm": 2.9374191761016846, "learning_rate": 6.4e-07, "loss": 3.7689, "step": 16 }, { "epoch": 0.0, "grad_norm": 2.9945151805877686, "learning_rate": 6.800000000000001e-07, "loss": 3.7712, "step": 17 }, { "epoch": 0.0, "grad_norm": 2.9775748252868652, "learning_rate": 7.2e-07, "loss": 3.7035, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.0326344966888428, "learning_rate": 7.6e-07, "loss": 3.7356, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.946686029434204, "learning_rate": 8.000000000000001e-07, "loss": 3.739, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.9172182083129883, "learning_rate": 8.400000000000001e-07, "loss": 3.7233, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.9136874675750732, "learning_rate": 8.8e-07, "loss": 3.7611, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.94134521484375, "learning_rate": 9.200000000000001e-07, "loss": 3.7566, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.8853530883789062, "learning_rate": 9.600000000000001e-07, "loss": 3.7405, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.9336092472076416, "learning_rate": 1.0000000000000002e-06, "loss": 3.7482, "step": 25 }, { "epoch": 0.0, "grad_norm": 3.00654673576355, "learning_rate": 1.04e-06, "loss": 3.6785, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.8633406162261963, "learning_rate": 1.08e-06, "loss": 3.7673, "step": 27 }, { "epoch": 0.0, "grad_norm": 2.936859369277954, "learning_rate": 1.12e-06, "loss": 3.7625, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.8874454498291016, "learning_rate": 1.1600000000000001e-06, "loss": 3.7298, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.8940701484680176, "learning_rate": 1.2000000000000002e-06, "loss": 3.7046, "step": 30 }, { "epoch": 0.0, "grad_norm": 2.86255145072937, "learning_rate": 1.2400000000000002e-06, "loss": 3.7295, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.8795437812805176, "learning_rate": 1.28e-06, "loss": 3.6807, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.8854005336761475, "learning_rate": 1.32e-06, "loss": 3.7068, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.959508180618286, "learning_rate": 1.3600000000000001e-06, "loss": 3.7281, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.840251922607422, "learning_rate": 1.4000000000000001e-06, "loss": 3.6889, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.74824857711792, "learning_rate": 1.44e-06, "loss": 3.6982, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.772235631942749, "learning_rate": 1.48e-06, "loss": 3.6856, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.7824931144714355, "learning_rate": 1.52e-06, "loss": 3.6938, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.721611261367798, "learning_rate": 1.56e-06, "loss": 3.6782, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.773103952407837, "learning_rate": 1.6000000000000001e-06, "loss": 3.6949, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.671250581741333, "learning_rate": 1.6400000000000002e-06, "loss": 3.6881, "step": 41 }, { "epoch": 0.0, "grad_norm": 2.7398197650909424, "learning_rate": 1.6800000000000002e-06, "loss": 3.7502, "step": 42 }, { "epoch": 0.0, "grad_norm": 2.7565064430236816, "learning_rate": 1.72e-06, "loss": 3.6778, "step": 43 }, { "epoch": 0.0, "grad_norm": 2.6314198970794678, "learning_rate": 1.76e-06, "loss": 3.7221, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.642784595489502, "learning_rate": 1.8000000000000001e-06, "loss": 3.6884, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.591935873031616, "learning_rate": 1.8400000000000002e-06, "loss": 3.6326, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.6190409660339355, "learning_rate": 1.8800000000000002e-06, "loss": 3.6362, "step": 47 }, { "epoch": 0.0, "grad_norm": 2.597822904586792, "learning_rate": 1.9200000000000003e-06, "loss": 3.6443, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.5975561141967773, "learning_rate": 1.9600000000000003e-06, "loss": 3.6985, "step": 49 }, { "epoch": 0.0, "grad_norm": 2.5982890129089355, "learning_rate": 2.0000000000000003e-06, "loss": 3.7032, "step": 50 }, { "epoch": 0.0, "grad_norm": 2.6563563346862793, "learning_rate": 2.04e-06, "loss": 3.6719, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.49603009223938, "learning_rate": 2.08e-06, "loss": 3.6482, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.5944085121154785, "learning_rate": 2.12e-06, "loss": 3.6701, "step": 53 }, { "epoch": 0.0, "grad_norm": 2.583873987197876, "learning_rate": 2.16e-06, "loss": 3.6752, "step": 54 }, { "epoch": 0.0, "grad_norm": 2.457430601119995, "learning_rate": 2.2e-06, "loss": 3.6557, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.4788320064544678, "learning_rate": 2.24e-06, "loss": 3.619, "step": 56 }, { "epoch": 0.0, "grad_norm": 2.5159473419189453, "learning_rate": 2.28e-06, "loss": 3.5979, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.412249803543091, "learning_rate": 2.3200000000000002e-06, "loss": 3.6781, "step": 58 }, { "epoch": 0.0, "grad_norm": 2.4523682594299316, "learning_rate": 2.3600000000000003e-06, "loss": 3.6549, "step": 59 }, { "epoch": 0.0, "grad_norm": 2.4825010299682617, "learning_rate": 2.4000000000000003e-06, "loss": 3.6678, "step": 60 }, { "epoch": 0.0, "grad_norm": 2.424654960632324, "learning_rate": 2.4400000000000004e-06, "loss": 3.6639, "step": 61 }, { "epoch": 0.0, "grad_norm": 2.4321727752685547, "learning_rate": 2.4800000000000004e-06, "loss": 3.6527, "step": 62 }, { "epoch": 0.0, "grad_norm": 2.429910182952881, "learning_rate": 2.52e-06, "loss": 3.6242, "step": 63 }, { "epoch": 0.0, "grad_norm": 2.3708126544952393, "learning_rate": 2.56e-06, "loss": 3.6498, "step": 64 }, { "epoch": 0.0, "grad_norm": 2.3835256099700928, "learning_rate": 2.6e-06, "loss": 3.6286, "step": 65 }, { "epoch": 0.0, "grad_norm": 2.327960729598999, "learning_rate": 2.64e-06, "loss": 3.5969, "step": 66 }, { "epoch": 0.0, "grad_norm": 2.318544864654541, "learning_rate": 2.68e-06, "loss": 3.6336, "step": 67 }, { "epoch": 0.0, "grad_norm": 2.247493028640747, "learning_rate": 2.7200000000000002e-06, "loss": 3.6448, "step": 68 }, { "epoch": 0.0, "grad_norm": 2.3056132793426514, "learning_rate": 2.7600000000000003e-06, "loss": 3.6668, "step": 69 }, { "epoch": 0.0, "grad_norm": 2.285571813583374, "learning_rate": 2.8000000000000003e-06, "loss": 3.6235, "step": 70 }, { "epoch": 0.0, "grad_norm": 2.2843549251556396, "learning_rate": 2.84e-06, "loss": 3.5946, "step": 71 }, { "epoch": 0.0, "grad_norm": 2.2572786808013916, "learning_rate": 2.88e-06, "loss": 3.5957, "step": 72 }, { "epoch": 0.0, "grad_norm": 2.1217710971832275, "learning_rate": 2.92e-06, "loss": 3.5305, "step": 73 }, { "epoch": 0.0, "grad_norm": 2.0943527221679688, "learning_rate": 2.96e-06, "loss": 3.5057, "step": 74 }, { "epoch": 0.0, "grad_norm": 2.1098923683166504, "learning_rate": 3e-06, "loss": 3.5331, "step": 75 }, { "epoch": 0.0, "grad_norm": 2.1389548778533936, "learning_rate": 3.04e-06, "loss": 3.598, "step": 76 }, { "epoch": 0.0, "grad_norm": 2.1352689266204834, "learning_rate": 3.08e-06, "loss": 3.5457, "step": 77 }, { "epoch": 0.0, "grad_norm": 2.105759382247925, "learning_rate": 3.12e-06, "loss": 3.5604, "step": 78 }, { "epoch": 0.0, "grad_norm": 2.1769442558288574, "learning_rate": 3.1600000000000002e-06, "loss": 3.576, "step": 79 }, { "epoch": 0.0, "grad_norm": 2.043865442276001, "learning_rate": 3.2000000000000003e-06, "loss": 3.486, "step": 80 }, { "epoch": 0.0, "grad_norm": 2.0029489994049072, "learning_rate": 3.2400000000000003e-06, "loss": 3.5656, "step": 81 }, { "epoch": 0.0, "grad_norm": 1.9802006483078003, "learning_rate": 3.2800000000000004e-06, "loss": 3.5367, "step": 82 }, { "epoch": 0.0, "grad_norm": 1.99752676486969, "learning_rate": 3.3200000000000004e-06, "loss": 3.5021, "step": 83 }, { "epoch": 0.0, "grad_norm": 1.9963101148605347, "learning_rate": 3.3600000000000004e-06, "loss": 3.5611, "step": 84 }, { "epoch": 0.0, "grad_norm": 1.9249954223632812, "learning_rate": 3.4000000000000005e-06, "loss": 3.5318, "step": 85 }, { "epoch": 0.0, "grad_norm": 1.9695982933044434, "learning_rate": 3.44e-06, "loss": 3.5412, "step": 86 }, { "epoch": 0.0, "grad_norm": 1.9668208360671997, "learning_rate": 3.48e-06, "loss": 3.5427, "step": 87 }, { "epoch": 0.0, "grad_norm": 1.8661860227584839, "learning_rate": 3.52e-06, "loss": 3.5171, "step": 88 }, { "epoch": 0.0, "grad_norm": 1.9764589071273804, "learning_rate": 3.5600000000000002e-06, "loss": 3.4664, "step": 89 }, { "epoch": 0.0, "grad_norm": 1.8449292182922363, "learning_rate": 3.6000000000000003e-06, "loss": 3.4972, "step": 90 }, { "epoch": 0.0, "grad_norm": 1.8575401306152344, "learning_rate": 3.6400000000000003e-06, "loss": 3.4985, "step": 91 }, { "epoch": 0.0, "grad_norm": 1.8120278120040894, "learning_rate": 3.6800000000000003e-06, "loss": 3.5152, "step": 92 }, { "epoch": 0.0, "grad_norm": 1.7762423753738403, "learning_rate": 3.7200000000000004e-06, "loss": 3.5163, "step": 93 }, { "epoch": 0.0, "grad_norm": 1.766471266746521, "learning_rate": 3.7600000000000004e-06, "loss": 3.4853, "step": 94 }, { "epoch": 0.0, "grad_norm": 1.8277347087860107, "learning_rate": 3.8000000000000005e-06, "loss": 3.4618, "step": 95 }, { "epoch": 0.0, "grad_norm": 1.7625563144683838, "learning_rate": 3.8400000000000005e-06, "loss": 3.4531, "step": 96 }, { "epoch": 0.0, "grad_norm": 1.7076278924942017, "learning_rate": 3.88e-06, "loss": 3.4581, "step": 97 }, { "epoch": 0.0, "grad_norm": 1.7219356298446655, "learning_rate": 3.920000000000001e-06, "loss": 3.5053, "step": 98 }, { "epoch": 0.0, "grad_norm": 1.6866194009780884, "learning_rate": 3.96e-06, "loss": 3.4822, "step": 99 }, { "epoch": 0.0, "grad_norm": 1.6395337581634521, "learning_rate": 4.000000000000001e-06, "loss": 3.4826, "step": 100 }, { "epoch": 0.0, "grad_norm": 1.6182422637939453, "learning_rate": 4.04e-06, "loss": 3.4144, "step": 101 }, { "epoch": 0.0, "grad_norm": 1.6277965307235718, "learning_rate": 4.08e-06, "loss": 3.4628, "step": 102 }, { "epoch": 0.0, "grad_norm": 1.6274092197418213, "learning_rate": 4.12e-06, "loss": 3.4251, "step": 103 }, { "epoch": 0.0, "grad_norm": 1.5992956161499023, "learning_rate": 4.16e-06, "loss": 3.4467, "step": 104 }, { "epoch": 0.0, "grad_norm": 1.5614099502563477, "learning_rate": 4.2000000000000004e-06, "loss": 3.4939, "step": 105 }, { "epoch": 0.0, "grad_norm": 1.5395946502685547, "learning_rate": 4.24e-06, "loss": 3.4063, "step": 106 }, { "epoch": 0.0, "grad_norm": 1.5085062980651855, "learning_rate": 4.2800000000000005e-06, "loss": 3.4316, "step": 107 }, { "epoch": 0.0, "grad_norm": 1.5296846628189087, "learning_rate": 4.32e-06, "loss": 3.4604, "step": 108 }, { "epoch": 0.0, "grad_norm": 1.5282198190689087, "learning_rate": 4.360000000000001e-06, "loss": 3.4176, "step": 109 }, { "epoch": 0.0, "grad_norm": 1.5429089069366455, "learning_rate": 4.4e-06, "loss": 3.4957, "step": 110 }, { "epoch": 0.0, "grad_norm": 1.4946658611297607, "learning_rate": 4.440000000000001e-06, "loss": 3.4818, "step": 111 }, { "epoch": 0.0, "grad_norm": 1.4442675113677979, "learning_rate": 4.48e-06, "loss": 3.4255, "step": 112 }, { "epoch": 0.0, "grad_norm": 1.4594030380249023, "learning_rate": 4.520000000000001e-06, "loss": 3.3965, "step": 113 }, { "epoch": 0.0, "grad_norm": 1.4332916736602783, "learning_rate": 4.56e-06, "loss": 3.362, "step": 114 }, { "epoch": 0.0, "grad_norm": 1.431638479232788, "learning_rate": 4.600000000000001e-06, "loss": 3.4073, "step": 115 }, { "epoch": 0.0, "grad_norm": 1.4588367938995361, "learning_rate": 4.6400000000000005e-06, "loss": 3.4292, "step": 116 }, { "epoch": 0.0, "grad_norm": 1.3806376457214355, "learning_rate": 4.680000000000001e-06, "loss": 3.4422, "step": 117 }, { "epoch": 0.0, "grad_norm": 1.3173532485961914, "learning_rate": 4.7200000000000005e-06, "loss": 3.3783, "step": 118 }, { "epoch": 0.0, "grad_norm": 1.369532823562622, "learning_rate": 4.76e-06, "loss": 3.3777, "step": 119 }, { "epoch": 0.0, "grad_norm": 1.3398481607437134, "learning_rate": 4.800000000000001e-06, "loss": 3.4096, "step": 120 }, { "epoch": 0.0, "grad_norm": 1.2976102828979492, "learning_rate": 4.84e-06, "loss": 3.3861, "step": 121 }, { "epoch": 0.0, "grad_norm": 1.3191262483596802, "learning_rate": 4.880000000000001e-06, "loss": 3.4147, "step": 122 }, { "epoch": 0.0, "grad_norm": 1.3748778104782104, "learning_rate": 4.92e-06, "loss": 3.3931, "step": 123 }, { "epoch": 0.0, "grad_norm": 1.2943419218063354, "learning_rate": 4.960000000000001e-06, "loss": 3.3655, "step": 124 }, { "epoch": 0.0, "grad_norm": 1.3425780534744263, "learning_rate": 5e-06, "loss": 3.4313, "step": 125 }, { "epoch": 0.0, "grad_norm": 1.278464436531067, "learning_rate": 5.04e-06, "loss": 3.3519, "step": 126 }, { "epoch": 0.0, "grad_norm": 1.2368496656417847, "learning_rate": 5.0800000000000005e-06, "loss": 3.3372, "step": 127 }, { "epoch": 0.0, "grad_norm": 1.2316712141036987, "learning_rate": 5.12e-06, "loss": 3.325, "step": 128 }, { "epoch": 0.0, "grad_norm": 1.177514672279358, "learning_rate": 5.1600000000000006e-06, "loss": 3.3653, "step": 129 }, { "epoch": 0.0, "grad_norm": 1.2126216888427734, "learning_rate": 5.2e-06, "loss": 3.3799, "step": 130 }, { "epoch": 0.0, "grad_norm": 1.159932017326355, "learning_rate": 5.240000000000001e-06, "loss": 3.3239, "step": 131 }, { "epoch": 0.0, "grad_norm": 1.2238554954528809, "learning_rate": 5.28e-06, "loss": 3.3393, "step": 132 }, { "epoch": 0.0, "grad_norm": 1.1728185415267944, "learning_rate": 5.320000000000001e-06, "loss": 3.3066, "step": 133 }, { "epoch": 0.0, "grad_norm": 1.16297447681427, "learning_rate": 5.36e-06, "loss": 3.306, "step": 134 }, { "epoch": 0.0, "grad_norm": 1.1513962745666504, "learning_rate": 5.400000000000001e-06, "loss": 3.3035, "step": 135 }, { "epoch": 0.0, "grad_norm": 1.190287470817566, "learning_rate": 5.4400000000000004e-06, "loss": 3.311, "step": 136 }, { "epoch": 0.0, "grad_norm": 1.1670467853546143, "learning_rate": 5.480000000000001e-06, "loss": 3.2978, "step": 137 }, { "epoch": 0.0, "grad_norm": 1.0985170602798462, "learning_rate": 5.5200000000000005e-06, "loss": 3.3456, "step": 138 }, { "epoch": 0.0, "grad_norm": 1.119649887084961, "learning_rate": 5.560000000000001e-06, "loss": 3.2801, "step": 139 }, { "epoch": 0.0, "grad_norm": 1.1619948148727417, "learning_rate": 5.600000000000001e-06, "loss": 3.2839, "step": 140 }, { "epoch": 0.0, "grad_norm": 1.0866366624832153, "learning_rate": 5.64e-06, "loss": 3.2348, "step": 141 }, { "epoch": 0.0, "grad_norm": 1.0735145807266235, "learning_rate": 5.68e-06, "loss": 3.2547, "step": 142 }, { "epoch": 0.0, "grad_norm": 1.08341646194458, "learning_rate": 5.72e-06, "loss": 3.2889, "step": 143 }, { "epoch": 0.0, "grad_norm": 1.0448150634765625, "learning_rate": 5.76e-06, "loss": 3.2736, "step": 144 }, { "epoch": 0.0, "grad_norm": 1.1440234184265137, "learning_rate": 5.8e-06, "loss": 3.2545, "step": 145 }, { "epoch": 0.0, "grad_norm": 1.0658066272735596, "learning_rate": 5.84e-06, "loss": 3.2877, "step": 146 }, { "epoch": 0.0, "grad_norm": 1.024755597114563, "learning_rate": 5.8800000000000005e-06, "loss": 3.3036, "step": 147 }, { "epoch": 0.0, "grad_norm": 1.0687769651412964, "learning_rate": 5.92e-06, "loss": 3.2758, "step": 148 }, { "epoch": 0.0, "grad_norm": 1.014294147491455, "learning_rate": 5.9600000000000005e-06, "loss": 3.2958, "step": 149 }, { "epoch": 0.0, "grad_norm": 1.03204345703125, "learning_rate": 6e-06, "loss": 3.2687, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.007180094718933, "learning_rate": 6.040000000000001e-06, "loss": 3.2561, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.020760178565979, "learning_rate": 6.08e-06, "loss": 3.2406, "step": 152 }, { "epoch": 0.01, "grad_norm": 0.9961280226707458, "learning_rate": 6.120000000000001e-06, "loss": 3.2289, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.0122206211090088, "learning_rate": 6.16e-06, "loss": 3.2285, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.0377811193466187, "learning_rate": 6.200000000000001e-06, "loss": 3.1956, "step": 155 }, { "epoch": 0.01, "grad_norm": 0.9753139019012451, "learning_rate": 6.24e-06, "loss": 3.2803, "step": 156 }, { "epoch": 0.01, "grad_norm": 0.9736471772193909, "learning_rate": 6.280000000000001e-06, "loss": 3.2468, "step": 157 }, { "epoch": 0.01, "grad_norm": 0.9775617122650146, "learning_rate": 6.3200000000000005e-06, "loss": 3.1796, "step": 158 }, { "epoch": 0.01, "grad_norm": 0.9449413418769836, "learning_rate": 6.360000000000001e-06, "loss": 3.2506, "step": 159 }, { "epoch": 0.01, "grad_norm": 0.9355918169021606, "learning_rate": 6.4000000000000006e-06, "loss": 3.2165, "step": 160 }, { "epoch": 0.01, "grad_norm": 0.9462165236473083, "learning_rate": 6.440000000000001e-06, "loss": 3.2012, "step": 161 }, { "epoch": 0.01, "grad_norm": 0.9783335328102112, "learning_rate": 6.480000000000001e-06, "loss": 3.2207, "step": 162 }, { "epoch": 0.01, "grad_norm": 0.9430908560752869, "learning_rate": 6.520000000000001e-06, "loss": 3.2187, "step": 163 }, { "epoch": 0.01, "grad_norm": 0.9193970561027527, "learning_rate": 6.560000000000001e-06, "loss": 3.1716, "step": 164 }, { "epoch": 0.01, "grad_norm": 0.9126341938972473, "learning_rate": 6.600000000000001e-06, "loss": 3.2563, "step": 165 }, { "epoch": 0.01, "grad_norm": 0.9091991186141968, "learning_rate": 6.640000000000001e-06, "loss": 3.1949, "step": 166 }, { "epoch": 0.01, "grad_norm": 0.9056283235549927, "learning_rate": 6.680000000000001e-06, "loss": 3.1961, "step": 167 }, { "epoch": 0.01, "grad_norm": 0.9239982962608337, "learning_rate": 6.720000000000001e-06, "loss": 3.2164, "step": 168 }, { "epoch": 0.01, "grad_norm": 0.8863268494606018, "learning_rate": 6.760000000000001e-06, "loss": 3.1951, "step": 169 }, { "epoch": 0.01, "grad_norm": 0.9363348484039307, "learning_rate": 6.800000000000001e-06, "loss": 3.1816, "step": 170 }, { "epoch": 0.01, "grad_norm": 0.9083738923072815, "learning_rate": 6.8400000000000014e-06, "loss": 3.18, "step": 171 }, { "epoch": 0.01, "grad_norm": 0.8851535320281982, "learning_rate": 6.88e-06, "loss": 3.1975, "step": 172 }, { "epoch": 0.01, "grad_norm": 0.8942703604698181, "learning_rate": 6.92e-06, "loss": 3.2521, "step": 173 }, { "epoch": 0.01, "grad_norm": 0.8518399000167847, "learning_rate": 6.96e-06, "loss": 3.1863, "step": 174 }, { "epoch": 0.01, "grad_norm": 0.8763868808746338, "learning_rate": 7e-06, "loss": 3.1749, "step": 175 }, { "epoch": 0.01, "grad_norm": 0.8461930155754089, "learning_rate": 7.04e-06, "loss": 3.2139, "step": 176 }, { "epoch": 0.01, "grad_norm": 0.879628598690033, "learning_rate": 7.08e-06, "loss": 3.2323, "step": 177 }, { "epoch": 0.01, "grad_norm": 0.890159010887146, "learning_rate": 7.1200000000000004e-06, "loss": 3.1331, "step": 178 }, { "epoch": 0.01, "grad_norm": 0.8850238919258118, "learning_rate": 7.16e-06, "loss": 3.1267, "step": 179 }, { "epoch": 0.01, "grad_norm": 0.8491682410240173, "learning_rate": 7.2000000000000005e-06, "loss": 3.1237, "step": 180 }, { "epoch": 0.01, "grad_norm": 0.8608806133270264, "learning_rate": 7.24e-06, "loss": 3.1464, "step": 181 }, { "epoch": 0.01, "grad_norm": 0.8595521450042725, "learning_rate": 7.280000000000001e-06, "loss": 3.1473, "step": 182 }, { "epoch": 0.01, "grad_norm": 0.8413703441619873, "learning_rate": 7.32e-06, "loss": 3.1822, "step": 183 }, { "epoch": 0.01, "grad_norm": 0.8773772120475769, "learning_rate": 7.360000000000001e-06, "loss": 3.1492, "step": 184 }, { "epoch": 0.01, "grad_norm": 0.8412697911262512, "learning_rate": 7.4e-06, "loss": 3.2014, "step": 185 }, { "epoch": 0.01, "grad_norm": 0.8224411606788635, "learning_rate": 7.440000000000001e-06, "loss": 3.1793, "step": 186 }, { "epoch": 0.01, "grad_norm": 0.8693506717681885, "learning_rate": 7.48e-06, "loss": 3.1193, "step": 187 }, { "epoch": 0.01, "grad_norm": 0.873849093914032, "learning_rate": 7.520000000000001e-06, "loss": 3.0898, "step": 188 }, { "epoch": 0.01, "grad_norm": 0.8525444865226746, "learning_rate": 7.5600000000000005e-06, "loss": 3.0659, "step": 189 }, { "epoch": 0.01, "grad_norm": 0.8342667818069458, "learning_rate": 7.600000000000001e-06, "loss": 3.1473, "step": 190 }, { "epoch": 0.01, "grad_norm": 0.8115031719207764, "learning_rate": 7.640000000000001e-06, "loss": 3.1542, "step": 191 }, { "epoch": 0.01, "grad_norm": 0.8224273324012756, "learning_rate": 7.680000000000001e-06, "loss": 3.1223, "step": 192 }, { "epoch": 0.01, "grad_norm": 0.8318130970001221, "learning_rate": 7.72e-06, "loss": 3.1424, "step": 193 }, { "epoch": 0.01, "grad_norm": 0.857342541217804, "learning_rate": 7.76e-06, "loss": 3.1513, "step": 194 }, { "epoch": 0.01, "grad_norm": 0.8313223719596863, "learning_rate": 7.800000000000002e-06, "loss": 3.1, "step": 195 }, { "epoch": 0.01, "grad_norm": 0.8101739883422852, "learning_rate": 7.840000000000001e-06, "loss": 3.1125, "step": 196 }, { "epoch": 0.01, "grad_norm": 0.8099242448806763, "learning_rate": 7.88e-06, "loss": 3.164, "step": 197 }, { "epoch": 0.01, "grad_norm": 0.8177672028541565, "learning_rate": 7.92e-06, "loss": 3.1515, "step": 198 }, { "epoch": 0.01, "grad_norm": 0.8439546823501587, "learning_rate": 7.960000000000002e-06, "loss": 3.1087, "step": 199 }, { "epoch": 0.01, "grad_norm": 0.80745929479599, "learning_rate": 8.000000000000001e-06, "loss": 3.1463, "step": 200 }, { "epoch": 0.01, "grad_norm": 0.8696184754371643, "learning_rate": 8.040000000000001e-06, "loss": 3.1645, "step": 201 }, { "epoch": 0.01, "grad_norm": 0.7973880767822266, "learning_rate": 8.08e-06, "loss": 3.111, "step": 202 }, { "epoch": 0.01, "grad_norm": 0.8098641037940979, "learning_rate": 8.120000000000002e-06, "loss": 3.0834, "step": 203 }, { "epoch": 0.01, "grad_norm": 0.8531391620635986, "learning_rate": 8.16e-06, "loss": 3.0951, "step": 204 }, { "epoch": 0.01, "grad_norm": 0.8008347749710083, "learning_rate": 8.2e-06, "loss": 3.1017, "step": 205 }, { "epoch": 0.01, "grad_norm": 0.7965345978736877, "learning_rate": 8.24e-06, "loss": 3.1638, "step": 206 }, { "epoch": 0.01, "grad_norm": 0.7986938953399658, "learning_rate": 8.28e-06, "loss": 3.0561, "step": 207 }, { "epoch": 0.01, "grad_norm": 0.7870634198188782, "learning_rate": 8.32e-06, "loss": 3.0892, "step": 208 }, { "epoch": 0.01, "grad_norm": 0.7703922986984253, "learning_rate": 8.36e-06, "loss": 3.1091, "step": 209 }, { "epoch": 0.01, "grad_norm": 0.804821252822876, "learning_rate": 8.400000000000001e-06, "loss": 3.094, "step": 210 }, { "epoch": 0.01, "grad_norm": 0.7656739950180054, "learning_rate": 8.44e-06, "loss": 3.1213, "step": 211 }, { "epoch": 0.01, "grad_norm": 0.8040069937705994, "learning_rate": 8.48e-06, "loss": 3.0878, "step": 212 }, { "epoch": 0.01, "grad_norm": 0.7826994061470032, "learning_rate": 8.52e-06, "loss": 3.0809, "step": 213 }, { "epoch": 0.01, "grad_norm": 0.7905282378196716, "learning_rate": 8.560000000000001e-06, "loss": 3.033, "step": 214 }, { "epoch": 0.01, "grad_norm": 0.7479476928710938, "learning_rate": 8.6e-06, "loss": 3.0792, "step": 215 }, { "epoch": 0.01, "grad_norm": 0.7685500979423523, "learning_rate": 8.64e-06, "loss": 3.1118, "step": 216 }, { "epoch": 0.01, "grad_norm": 0.7756505012512207, "learning_rate": 8.68e-06, "loss": 3.0581, "step": 217 }, { "epoch": 0.01, "grad_norm": 0.7733978629112244, "learning_rate": 8.720000000000001e-06, "loss": 3.1143, "step": 218 }, { "epoch": 0.01, "grad_norm": 0.785362720489502, "learning_rate": 8.76e-06, "loss": 3.0869, "step": 219 }, { "epoch": 0.01, "grad_norm": 0.7898158431053162, "learning_rate": 8.8e-06, "loss": 3.1271, "step": 220 }, { "epoch": 0.01, "grad_norm": 0.7982162833213806, "learning_rate": 8.84e-06, "loss": 3.052, "step": 221 }, { "epoch": 0.01, "grad_norm": 0.7816964983940125, "learning_rate": 8.880000000000001e-06, "loss": 3.0878, "step": 222 }, { "epoch": 0.01, "grad_norm": 0.7702066898345947, "learning_rate": 8.920000000000001e-06, "loss": 3.0505, "step": 223 }, { "epoch": 0.01, "grad_norm": 0.753216028213501, "learning_rate": 8.96e-06, "loss": 3.0464, "step": 224 }, { "epoch": 0.01, "grad_norm": 0.8036941885948181, "learning_rate": 9e-06, "loss": 3.0093, "step": 225 }, { "epoch": 0.01, "grad_norm": 0.7765317559242249, "learning_rate": 9.040000000000002e-06, "loss": 3.0214, "step": 226 }, { "epoch": 0.01, "grad_norm": 0.7712041139602661, "learning_rate": 9.080000000000001e-06, "loss": 3.0204, "step": 227 }, { "epoch": 0.01, "grad_norm": 0.7455520033836365, "learning_rate": 9.12e-06, "loss": 3.089, "step": 228 }, { "epoch": 0.01, "grad_norm": 0.7890076637268066, "learning_rate": 9.16e-06, "loss": 2.9956, "step": 229 }, { "epoch": 0.01, "grad_norm": 0.7368062734603882, "learning_rate": 9.200000000000002e-06, "loss": 2.95, "step": 230 }, { "epoch": 0.01, "grad_norm": 0.7542151212692261, "learning_rate": 9.240000000000001e-06, "loss": 3.0833, "step": 231 }, { "epoch": 0.01, "grad_norm": 0.7973144054412842, "learning_rate": 9.280000000000001e-06, "loss": 2.9593, "step": 232 }, { "epoch": 0.01, "grad_norm": 0.7747605443000793, "learning_rate": 9.32e-06, "loss": 3.0537, "step": 233 }, { "epoch": 0.01, "grad_norm": 0.7799984812736511, "learning_rate": 9.360000000000002e-06, "loss": 3.0924, "step": 234 }, { "epoch": 0.01, "grad_norm": 0.760521650314331, "learning_rate": 9.4e-06, "loss": 3.0049, "step": 235 }, { "epoch": 0.01, "grad_norm": 0.7392805218696594, "learning_rate": 9.440000000000001e-06, "loss": 3.0076, "step": 236 }, { "epoch": 0.01, "grad_norm": 0.7766023278236389, "learning_rate": 9.48e-06, "loss": 3.0305, "step": 237 }, { "epoch": 0.01, "grad_norm": 0.7325561046600342, "learning_rate": 9.52e-06, "loss": 3.0102, "step": 238 }, { "epoch": 0.01, "grad_norm": 0.7446618676185608, "learning_rate": 9.56e-06, "loss": 3.0799, "step": 239 }, { "epoch": 0.01, "grad_norm": 0.7309117913246155, "learning_rate": 9.600000000000001e-06, "loss": 3.0657, "step": 240 }, { "epoch": 0.01, "grad_norm": 0.7644285559654236, "learning_rate": 9.640000000000001e-06, "loss": 2.9969, "step": 241 }, { "epoch": 0.01, "grad_norm": 0.7534785866737366, "learning_rate": 9.68e-06, "loss": 3.043, "step": 242 }, { "epoch": 0.01, "grad_norm": 0.756115198135376, "learning_rate": 9.72e-06, "loss": 3.034, "step": 243 }, { "epoch": 0.01, "grad_norm": 0.7088952660560608, "learning_rate": 9.760000000000001e-06, "loss": 3.0089, "step": 244 }, { "epoch": 0.01, "grad_norm": 0.7141646146774292, "learning_rate": 9.800000000000001e-06, "loss": 2.9914, "step": 245 }, { "epoch": 0.01, "grad_norm": 0.7243670225143433, "learning_rate": 9.84e-06, "loss": 3.0275, "step": 246 }, { "epoch": 0.01, "grad_norm": 0.7706453204154968, "learning_rate": 9.88e-06, "loss": 2.9697, "step": 247 }, { "epoch": 0.01, "grad_norm": 0.7732541561126709, "learning_rate": 9.920000000000002e-06, "loss": 3.0194, "step": 248 }, { "epoch": 0.01, "grad_norm": 0.7577429413795471, "learning_rate": 9.960000000000001e-06, "loss": 2.983, "step": 249 }, { "epoch": 0.01, "grad_norm": 0.7420429587364197, "learning_rate": 1e-05, "loss": 3.0236, "step": 250 }, { "epoch": 0.01, "grad_norm": 0.7250332236289978, "learning_rate": 1.004e-05, "loss": 3.0748, "step": 251 }, { "epoch": 0.01, "grad_norm": 0.7337601780891418, "learning_rate": 1.008e-05, "loss": 2.95, "step": 252 }, { "epoch": 0.01, "grad_norm": 0.7252420783042908, "learning_rate": 1.0120000000000001e-05, "loss": 2.9728, "step": 253 }, { "epoch": 0.01, "grad_norm": 0.7484934329986572, "learning_rate": 1.0160000000000001e-05, "loss": 3.026, "step": 254 }, { "epoch": 0.01, "grad_norm": 0.751584529876709, "learning_rate": 1.02e-05, "loss": 3.0389, "step": 255 }, { "epoch": 0.01, "grad_norm": 0.716543972492218, "learning_rate": 1.024e-05, "loss": 2.9926, "step": 256 }, { "epoch": 0.01, "grad_norm": 0.7203758955001831, "learning_rate": 1.0280000000000002e-05, "loss": 3.0239, "step": 257 }, { "epoch": 0.01, "grad_norm": 0.7086358070373535, "learning_rate": 1.0320000000000001e-05, "loss": 2.9674, "step": 258 }, { "epoch": 0.01, "grad_norm": 0.6801788210868835, "learning_rate": 1.036e-05, "loss": 2.9808, "step": 259 }, { "epoch": 0.01, "grad_norm": 0.7126123309135437, "learning_rate": 1.04e-05, "loss": 3.0083, "step": 260 }, { "epoch": 0.01, "grad_norm": 0.7446174025535583, "learning_rate": 1.0440000000000002e-05, "loss": 2.9397, "step": 261 }, { "epoch": 0.01, "grad_norm": 0.7366334199905396, "learning_rate": 1.0480000000000001e-05, "loss": 3.0222, "step": 262 }, { "epoch": 0.01, "grad_norm": 0.7224438786506653, "learning_rate": 1.0520000000000001e-05, "loss": 2.9666, "step": 263 }, { "epoch": 0.01, "grad_norm": 0.7296943664550781, "learning_rate": 1.056e-05, "loss": 2.9767, "step": 264 }, { "epoch": 0.01, "grad_norm": 0.7285534143447876, "learning_rate": 1.0600000000000002e-05, "loss": 2.9713, "step": 265 }, { "epoch": 0.01, "grad_norm": 0.7207460999488831, "learning_rate": 1.0640000000000001e-05, "loss": 2.9924, "step": 266 }, { "epoch": 0.01, "grad_norm": 0.7306897640228271, "learning_rate": 1.0680000000000001e-05, "loss": 3.0354, "step": 267 }, { "epoch": 0.01, "grad_norm": 0.6970009803771973, "learning_rate": 1.072e-05, "loss": 2.9905, "step": 268 }, { "epoch": 0.01, "grad_norm": 0.7516326904296875, "learning_rate": 1.0760000000000002e-05, "loss": 2.9387, "step": 269 }, { "epoch": 0.01, "grad_norm": 0.7385818958282471, "learning_rate": 1.0800000000000002e-05, "loss": 2.95, "step": 270 }, { "epoch": 0.01, "grad_norm": 0.7146047353744507, "learning_rate": 1.0840000000000001e-05, "loss": 2.929, "step": 271 }, { "epoch": 0.01, "grad_norm": 0.7485090494155884, "learning_rate": 1.0880000000000001e-05, "loss": 2.932, "step": 272 }, { "epoch": 0.01, "grad_norm": 0.7019323110580444, "learning_rate": 1.0920000000000002e-05, "loss": 3.0092, "step": 273 }, { "epoch": 0.01, "grad_norm": 0.7071720361709595, "learning_rate": 1.0960000000000002e-05, "loss": 2.9729, "step": 274 }, { "epoch": 0.01, "grad_norm": 0.7166799306869507, "learning_rate": 1.1000000000000001e-05, "loss": 2.9956, "step": 275 }, { "epoch": 0.01, "grad_norm": 0.7043001055717468, "learning_rate": 1.1040000000000001e-05, "loss": 2.9648, "step": 276 }, { "epoch": 0.01, "grad_norm": 0.6917253732681274, "learning_rate": 1.1080000000000002e-05, "loss": 3.01, "step": 277 }, { "epoch": 0.01, "grad_norm": 0.7230256199836731, "learning_rate": 1.1120000000000002e-05, "loss": 2.9972, "step": 278 }, { "epoch": 0.01, "grad_norm": 0.7071083188056946, "learning_rate": 1.1160000000000002e-05, "loss": 2.9082, "step": 279 }, { "epoch": 0.01, "grad_norm": 0.7028868794441223, "learning_rate": 1.1200000000000001e-05, "loss": 2.9577, "step": 280 }, { "epoch": 0.01, "grad_norm": 0.7002159357070923, "learning_rate": 1.1240000000000002e-05, "loss": 2.9465, "step": 281 }, { "epoch": 0.01, "grad_norm": 0.7107498645782471, "learning_rate": 1.128e-05, "loss": 2.9748, "step": 282 }, { "epoch": 0.01, "grad_norm": 0.7359018325805664, "learning_rate": 1.132e-05, "loss": 2.9724, "step": 283 }, { "epoch": 0.01, "grad_norm": 0.7131576538085938, "learning_rate": 1.136e-05, "loss": 2.9494, "step": 284 }, { "epoch": 0.01, "grad_norm": 0.7292269468307495, "learning_rate": 1.14e-05, "loss": 2.9838, "step": 285 }, { "epoch": 0.01, "grad_norm": 0.7375213503837585, "learning_rate": 1.144e-05, "loss": 2.9809, "step": 286 }, { "epoch": 0.01, "grad_norm": 0.7094799876213074, "learning_rate": 1.148e-05, "loss": 2.993, "step": 287 }, { "epoch": 0.01, "grad_norm": 0.7429577708244324, "learning_rate": 1.152e-05, "loss": 2.8905, "step": 288 }, { "epoch": 0.01, "grad_norm": 0.7328982353210449, "learning_rate": 1.156e-05, "loss": 2.9366, "step": 289 }, { "epoch": 0.01, "grad_norm": 0.7140700221061707, "learning_rate": 1.16e-05, "loss": 2.9992, "step": 290 }, { "epoch": 0.01, "grad_norm": 0.6961988210678101, "learning_rate": 1.164e-05, "loss": 2.8959, "step": 291 }, { "epoch": 0.01, "grad_norm": 0.7188063263893127, "learning_rate": 1.168e-05, "loss": 2.9282, "step": 292 }, { "epoch": 0.01, "grad_norm": 0.7033876180648804, "learning_rate": 1.172e-05, "loss": 2.9789, "step": 293 }, { "epoch": 0.01, "grad_norm": 0.6968033313751221, "learning_rate": 1.1760000000000001e-05, "loss": 2.8764, "step": 294 }, { "epoch": 0.01, "grad_norm": 0.7316171526908875, "learning_rate": 1.18e-05, "loss": 2.9158, "step": 295 }, { "epoch": 0.01, "grad_norm": 0.7288230061531067, "learning_rate": 1.184e-05, "loss": 2.9427, "step": 296 }, { "epoch": 0.01, "grad_norm": 0.7153042554855347, "learning_rate": 1.188e-05, "loss": 2.9563, "step": 297 }, { "epoch": 0.01, "grad_norm": 0.6916014552116394, "learning_rate": 1.1920000000000001e-05, "loss": 2.9446, "step": 298 }, { "epoch": 0.01, "grad_norm": 0.6955585479736328, "learning_rate": 1.196e-05, "loss": 2.9574, "step": 299 }, { "epoch": 0.01, "grad_norm": 0.7189493179321289, "learning_rate": 1.2e-05, "loss": 2.9507, "step": 300 }, { "epoch": 0.01, "grad_norm": 0.7120326161384583, "learning_rate": 1.204e-05, "loss": 2.9545, "step": 301 }, { "epoch": 0.01, "grad_norm": 0.7113020420074463, "learning_rate": 1.2080000000000001e-05, "loss": 2.9529, "step": 302 }, { "epoch": 0.01, "grad_norm": 0.6919089555740356, "learning_rate": 1.2120000000000001e-05, "loss": 2.922, "step": 303 }, { "epoch": 0.01, "grad_norm": 0.6794406175613403, "learning_rate": 1.216e-05, "loss": 2.8967, "step": 304 }, { "epoch": 0.01, "grad_norm": 0.6871897578239441, "learning_rate": 1.22e-05, "loss": 2.9408, "step": 305 }, { "epoch": 0.01, "grad_norm": 0.6826269030570984, "learning_rate": 1.2240000000000001e-05, "loss": 2.9366, "step": 306 }, { "epoch": 0.01, "grad_norm": 0.6960402131080627, "learning_rate": 1.2280000000000001e-05, "loss": 2.9211, "step": 307 }, { "epoch": 0.01, "grad_norm": 0.7198580503463745, "learning_rate": 1.232e-05, "loss": 2.901, "step": 308 }, { "epoch": 0.01, "grad_norm": 0.7064822912216187, "learning_rate": 1.236e-05, "loss": 3.0367, "step": 309 }, { "epoch": 0.01, "grad_norm": 0.6707227230072021, "learning_rate": 1.2400000000000002e-05, "loss": 2.9083, "step": 310 }, { "epoch": 0.01, "grad_norm": 0.696674644947052, "learning_rate": 1.2440000000000001e-05, "loss": 2.9226, "step": 311 }, { "epoch": 0.01, "grad_norm": 0.6905208230018616, "learning_rate": 1.248e-05, "loss": 2.9, "step": 312 }, { "epoch": 0.01, "grad_norm": 0.7072402238845825, "learning_rate": 1.252e-05, "loss": 2.9486, "step": 313 }, { "epoch": 0.01, "grad_norm": 0.7020350098609924, "learning_rate": 1.2560000000000002e-05, "loss": 2.8765, "step": 314 }, { "epoch": 0.01, "grad_norm": 0.6880193948745728, "learning_rate": 1.2600000000000001e-05, "loss": 2.9133, "step": 315 }, { "epoch": 0.01, "grad_norm": 0.6863030791282654, "learning_rate": 1.2640000000000001e-05, "loss": 2.8506, "step": 316 }, { "epoch": 0.01, "grad_norm": 0.7022314071655273, "learning_rate": 1.268e-05, "loss": 2.8672, "step": 317 }, { "epoch": 0.01, "grad_norm": 0.7113156914710999, "learning_rate": 1.2720000000000002e-05, "loss": 2.9071, "step": 318 }, { "epoch": 0.01, "grad_norm": 0.7123892903327942, "learning_rate": 1.2760000000000001e-05, "loss": 2.9003, "step": 319 }, { "epoch": 0.01, "grad_norm": 0.7179726958274841, "learning_rate": 1.2800000000000001e-05, "loss": 2.986, "step": 320 }, { "epoch": 0.01, "grad_norm": 0.7394989728927612, "learning_rate": 1.284e-05, "loss": 2.9379, "step": 321 }, { "epoch": 0.01, "grad_norm": 0.6829982399940491, "learning_rate": 1.2880000000000002e-05, "loss": 2.8756, "step": 322 }, { "epoch": 0.01, "grad_norm": 0.7195976972579956, "learning_rate": 1.2920000000000002e-05, "loss": 2.9435, "step": 323 }, { "epoch": 0.01, "grad_norm": 0.7341179251670837, "learning_rate": 1.2960000000000001e-05, "loss": 2.9394, "step": 324 }, { "epoch": 0.01, "grad_norm": 0.6563425064086914, "learning_rate": 1.3000000000000001e-05, "loss": 2.8949, "step": 325 }, { "epoch": 0.01, "grad_norm": 0.6852063536643982, "learning_rate": 1.3040000000000002e-05, "loss": 2.8471, "step": 326 }, { "epoch": 0.01, "grad_norm": 0.6816968321800232, "learning_rate": 1.3080000000000002e-05, "loss": 2.8727, "step": 327 }, { "epoch": 0.01, "grad_norm": 0.6697700619697571, "learning_rate": 1.3120000000000001e-05, "loss": 2.8435, "step": 328 }, { "epoch": 0.01, "grad_norm": 0.7308948636054993, "learning_rate": 1.3160000000000001e-05, "loss": 2.8462, "step": 329 }, { "epoch": 0.01, "grad_norm": 0.6817566752433777, "learning_rate": 1.3200000000000002e-05, "loss": 2.8388, "step": 330 }, { "epoch": 0.01, "grad_norm": 0.698800265789032, "learning_rate": 1.3240000000000002e-05, "loss": 2.8698, "step": 331 }, { "epoch": 0.01, "grad_norm": 0.6767727136611938, "learning_rate": 1.3280000000000002e-05, "loss": 2.8912, "step": 332 }, { "epoch": 0.01, "grad_norm": 0.6929364204406738, "learning_rate": 1.3320000000000001e-05, "loss": 2.8582, "step": 333 }, { "epoch": 0.01, "grad_norm": 0.7088776230812073, "learning_rate": 1.3360000000000003e-05, "loss": 2.8587, "step": 334 }, { "epoch": 0.01, "grad_norm": 0.6721728444099426, "learning_rate": 1.3400000000000002e-05, "loss": 2.8662, "step": 335 }, { "epoch": 0.01, "grad_norm": 0.6921935677528381, "learning_rate": 1.3440000000000002e-05, "loss": 2.8704, "step": 336 }, { "epoch": 0.01, "grad_norm": 0.6760930418968201, "learning_rate": 1.3480000000000001e-05, "loss": 2.8223, "step": 337 }, { "epoch": 0.01, "grad_norm": 0.7038088440895081, "learning_rate": 1.3520000000000003e-05, "loss": 2.8716, "step": 338 }, { "epoch": 0.01, "grad_norm": 0.6843159794807434, "learning_rate": 1.3560000000000002e-05, "loss": 2.832, "step": 339 }, { "epoch": 0.01, "grad_norm": 0.7381107807159424, "learning_rate": 1.3600000000000002e-05, "loss": 2.8612, "step": 340 }, { "epoch": 0.01, "grad_norm": 0.728920042514801, "learning_rate": 1.3640000000000002e-05, "loss": 2.8409, "step": 341 }, { "epoch": 0.01, "grad_norm": 0.6590824127197266, "learning_rate": 1.3680000000000003e-05, "loss": 2.8014, "step": 342 }, { "epoch": 0.01, "grad_norm": 0.7200900912284851, "learning_rate": 1.3720000000000002e-05, "loss": 2.8949, "step": 343 }, { "epoch": 0.01, "grad_norm": 0.7507455348968506, "learning_rate": 1.376e-05, "loss": 2.8645, "step": 344 }, { "epoch": 0.01, "grad_norm": 0.6519306898117065, "learning_rate": 1.38e-05, "loss": 2.8846, "step": 345 }, { "epoch": 0.01, "grad_norm": 0.65754234790802, "learning_rate": 1.384e-05, "loss": 2.831, "step": 346 }, { "epoch": 0.01, "grad_norm": 0.6758772730827332, "learning_rate": 1.3880000000000001e-05, "loss": 2.8777, "step": 347 }, { "epoch": 0.01, "grad_norm": 0.6951612234115601, "learning_rate": 1.392e-05, "loss": 2.843, "step": 348 }, { "epoch": 0.01, "grad_norm": 0.6868584156036377, "learning_rate": 1.396e-05, "loss": 2.8518, "step": 349 }, { "epoch": 0.01, "grad_norm": 0.7019930481910706, "learning_rate": 1.4e-05, "loss": 2.8578, "step": 350 }, { "epoch": 0.01, "grad_norm": 0.6601002216339111, "learning_rate": 1.4040000000000001e-05, "loss": 2.9024, "step": 351 }, { "epoch": 0.01, "grad_norm": 0.6641451716423035, "learning_rate": 1.408e-05, "loss": 2.8361, "step": 352 }, { "epoch": 0.01, "grad_norm": 0.6696758270263672, "learning_rate": 1.412e-05, "loss": 2.8426, "step": 353 }, { "epoch": 0.01, "grad_norm": 0.6642027497291565, "learning_rate": 1.416e-05, "loss": 2.7937, "step": 354 }, { "epoch": 0.01, "grad_norm": 0.6785578727722168, "learning_rate": 1.4200000000000001e-05, "loss": 2.8962, "step": 355 }, { "epoch": 0.01, "grad_norm": 0.6706116795539856, "learning_rate": 1.4240000000000001e-05, "loss": 2.9164, "step": 356 }, { "epoch": 0.01, "grad_norm": 0.6934748291969299, "learning_rate": 1.428e-05, "loss": 2.8039, "step": 357 }, { "epoch": 0.01, "grad_norm": 0.6594704985618591, "learning_rate": 1.432e-05, "loss": 2.9253, "step": 358 }, { "epoch": 0.01, "grad_norm": 0.6488956809043884, "learning_rate": 1.4360000000000001e-05, "loss": 2.856, "step": 359 }, { "epoch": 0.01, "grad_norm": 0.6686562895774841, "learning_rate": 1.4400000000000001e-05, "loss": 2.811, "step": 360 }, { "epoch": 0.01, "grad_norm": 0.6786785125732422, "learning_rate": 1.444e-05, "loss": 2.79, "step": 361 }, { "epoch": 0.01, "grad_norm": 0.6907126307487488, "learning_rate": 1.448e-05, "loss": 2.8413, "step": 362 }, { "epoch": 0.01, "grad_norm": 0.697150468826294, "learning_rate": 1.4520000000000002e-05, "loss": 2.9201, "step": 363 }, { "epoch": 0.01, "grad_norm": 0.6734716296195984, "learning_rate": 1.4560000000000001e-05, "loss": 2.8742, "step": 364 }, { "epoch": 0.01, "grad_norm": 0.6664982438087463, "learning_rate": 1.46e-05, "loss": 2.8202, "step": 365 }, { "epoch": 0.01, "grad_norm": 0.69278484582901, "learning_rate": 1.464e-05, "loss": 2.8756, "step": 366 }, { "epoch": 0.01, "grad_norm": 0.6641896963119507, "learning_rate": 1.4680000000000002e-05, "loss": 2.7929, "step": 367 }, { "epoch": 0.01, "grad_norm": 0.6322620511054993, "learning_rate": 1.4720000000000001e-05, "loss": 2.8188, "step": 368 }, { "epoch": 0.01, "grad_norm": 0.725165605545044, "learning_rate": 1.4760000000000001e-05, "loss": 2.8202, "step": 369 }, { "epoch": 0.01, "grad_norm": 0.6899129748344421, "learning_rate": 1.48e-05, "loss": 2.8277, "step": 370 }, { "epoch": 0.01, "grad_norm": 0.6651990413665771, "learning_rate": 1.4840000000000002e-05, "loss": 2.839, "step": 371 }, { "epoch": 0.01, "grad_norm": 0.6614822149276733, "learning_rate": 1.4880000000000002e-05, "loss": 2.8896, "step": 372 }, { "epoch": 0.01, "grad_norm": 0.6750214099884033, "learning_rate": 1.4920000000000001e-05, "loss": 2.7936, "step": 373 }, { "epoch": 0.01, "grad_norm": 0.6761062741279602, "learning_rate": 1.496e-05, "loss": 2.8398, "step": 374 }, { "epoch": 0.01, "grad_norm": 0.6523001194000244, "learning_rate": 1.5000000000000002e-05, "loss": 2.8743, "step": 375 }, { "epoch": 0.01, "grad_norm": 0.6744080185890198, "learning_rate": 1.5040000000000002e-05, "loss": 2.8022, "step": 376 }, { "epoch": 0.01, "grad_norm": 0.6425319314002991, "learning_rate": 1.5080000000000001e-05, "loss": 2.785, "step": 377 }, { "epoch": 0.01, "grad_norm": 0.6872668266296387, "learning_rate": 1.5120000000000001e-05, "loss": 2.8434, "step": 378 }, { "epoch": 0.01, "grad_norm": 0.6897056102752686, "learning_rate": 1.516e-05, "loss": 2.8262, "step": 379 }, { "epoch": 0.01, "grad_norm": 0.6656489968299866, "learning_rate": 1.5200000000000002e-05, "loss": 2.8067, "step": 380 }, { "epoch": 0.01, "grad_norm": 0.6480472683906555, "learning_rate": 1.5240000000000001e-05, "loss": 2.8246, "step": 381 }, { "epoch": 0.01, "grad_norm": 0.6812193989753723, "learning_rate": 1.5280000000000003e-05, "loss": 2.7835, "step": 382 }, { "epoch": 0.01, "grad_norm": 0.6591140031814575, "learning_rate": 1.5320000000000002e-05, "loss": 2.8388, "step": 383 }, { "epoch": 0.01, "grad_norm": 0.6715266704559326, "learning_rate": 1.5360000000000002e-05, "loss": 2.8536, "step": 384 }, { "epoch": 0.01, "grad_norm": 0.7042847275733948, "learning_rate": 1.54e-05, "loss": 2.7849, "step": 385 }, { "epoch": 0.01, "grad_norm": 0.6863837242126465, "learning_rate": 1.544e-05, "loss": 2.8218, "step": 386 }, { "epoch": 0.01, "grad_norm": 0.6509554982185364, "learning_rate": 1.548e-05, "loss": 2.8161, "step": 387 }, { "epoch": 0.01, "grad_norm": 0.6764253973960876, "learning_rate": 1.552e-05, "loss": 2.8487, "step": 388 }, { "epoch": 0.01, "grad_norm": 0.6829914450645447, "learning_rate": 1.556e-05, "loss": 2.8393, "step": 389 }, { "epoch": 0.01, "grad_norm": 0.6526641249656677, "learning_rate": 1.5600000000000003e-05, "loss": 2.7928, "step": 390 }, { "epoch": 0.01, "grad_norm": 0.6726005673408508, "learning_rate": 1.5640000000000003e-05, "loss": 2.7424, "step": 391 }, { "epoch": 0.01, "grad_norm": 0.6745621562004089, "learning_rate": 1.5680000000000002e-05, "loss": 2.8066, "step": 392 }, { "epoch": 0.01, "grad_norm": 0.7079181671142578, "learning_rate": 1.5720000000000002e-05, "loss": 2.8011, "step": 393 }, { "epoch": 0.01, "grad_norm": 0.7400582432746887, "learning_rate": 1.576e-05, "loss": 2.8468, "step": 394 }, { "epoch": 0.01, "grad_norm": 0.6972211003303528, "learning_rate": 1.58e-05, "loss": 2.864, "step": 395 }, { "epoch": 0.01, "grad_norm": 0.6685325503349304, "learning_rate": 1.584e-05, "loss": 2.808, "step": 396 }, { "epoch": 0.01, "grad_norm": 0.6581268310546875, "learning_rate": 1.588e-05, "loss": 2.8589, "step": 397 }, { "epoch": 0.01, "grad_norm": 0.6619047522544861, "learning_rate": 1.5920000000000003e-05, "loss": 2.8215, "step": 398 }, { "epoch": 0.01, "grad_norm": 0.6885353326797485, "learning_rate": 1.5960000000000003e-05, "loss": 2.8224, "step": 399 }, { "epoch": 0.01, "grad_norm": 0.6520381569862366, "learning_rate": 1.6000000000000003e-05, "loss": 2.88, "step": 400 }, { "epoch": 0.01, "grad_norm": 0.6369414925575256, "learning_rate": 1.6040000000000002e-05, "loss": 2.7924, "step": 401 }, { "epoch": 0.01, "grad_norm": 0.733415424823761, "learning_rate": 1.6080000000000002e-05, "loss": 2.7917, "step": 402 }, { "epoch": 0.01, "grad_norm": 0.6861952543258667, "learning_rate": 1.612e-05, "loss": 2.8369, "step": 403 }, { "epoch": 0.01, "grad_norm": 0.6843462586402893, "learning_rate": 1.616e-05, "loss": 2.8397, "step": 404 }, { "epoch": 0.01, "grad_norm": 0.6312543153762817, "learning_rate": 1.62e-05, "loss": 2.7933, "step": 405 }, { "epoch": 0.01, "grad_norm": 0.6630678176879883, "learning_rate": 1.6240000000000004e-05, "loss": 2.7953, "step": 406 }, { "epoch": 0.01, "grad_norm": 0.6870042681694031, "learning_rate": 1.628e-05, "loss": 2.8, "step": 407 }, { "epoch": 0.01, "grad_norm": 0.6712439060211182, "learning_rate": 1.632e-05, "loss": 2.8001, "step": 408 }, { "epoch": 0.01, "grad_norm": 0.6623268127441406, "learning_rate": 1.636e-05, "loss": 2.8163, "step": 409 }, { "epoch": 0.01, "grad_norm": 0.7159444689750671, "learning_rate": 1.64e-05, "loss": 2.8255, "step": 410 }, { "epoch": 0.01, "grad_norm": 0.6556423902511597, "learning_rate": 1.6440000000000002e-05, "loss": 2.7916, "step": 411 }, { "epoch": 0.01, "grad_norm": 0.7008485794067383, "learning_rate": 1.648e-05, "loss": 2.7285, "step": 412 }, { "epoch": 0.01, "grad_norm": 0.686048686504364, "learning_rate": 1.652e-05, "loss": 2.7355, "step": 413 }, { "epoch": 0.01, "grad_norm": 0.6576399207115173, "learning_rate": 1.656e-05, "loss": 2.7946, "step": 414 }, { "epoch": 0.01, "grad_norm": 0.6346274018287659, "learning_rate": 1.66e-05, "loss": 2.8302, "step": 415 }, { "epoch": 0.01, "grad_norm": 0.678077220916748, "learning_rate": 1.664e-05, "loss": 2.7957, "step": 416 }, { "epoch": 0.01, "grad_norm": 0.6638278365135193, "learning_rate": 1.668e-05, "loss": 2.7873, "step": 417 }, { "epoch": 0.01, "grad_norm": 0.7128313183784485, "learning_rate": 1.672e-05, "loss": 2.7681, "step": 418 }, { "epoch": 0.01, "grad_norm": 0.662057638168335, "learning_rate": 1.6760000000000002e-05, "loss": 2.7985, "step": 419 }, { "epoch": 0.01, "grad_norm": 0.6595914959907532, "learning_rate": 1.6800000000000002e-05, "loss": 2.7548, "step": 420 }, { "epoch": 0.01, "grad_norm": 0.6682320237159729, "learning_rate": 1.684e-05, "loss": 2.7219, "step": 421 }, { "epoch": 0.01, "grad_norm": 0.6644014120101929, "learning_rate": 1.688e-05, "loss": 2.7981, "step": 422 }, { "epoch": 0.01, "grad_norm": 0.6698048114776611, "learning_rate": 1.692e-05, "loss": 2.7955, "step": 423 }, { "epoch": 0.01, "grad_norm": 0.6910617351531982, "learning_rate": 1.696e-05, "loss": 2.7795, "step": 424 }, { "epoch": 0.01, "grad_norm": 0.6983128786087036, "learning_rate": 1.7e-05, "loss": 2.7893, "step": 425 }, { "epoch": 0.01, "grad_norm": 0.6963634490966797, "learning_rate": 1.704e-05, "loss": 2.754, "step": 426 }, { "epoch": 0.01, "grad_norm": 0.6780691742897034, "learning_rate": 1.7080000000000002e-05, "loss": 2.777, "step": 427 }, { "epoch": 0.01, "grad_norm": 0.6582500338554382, "learning_rate": 1.7120000000000002e-05, "loss": 2.7576, "step": 428 }, { "epoch": 0.01, "grad_norm": 0.6800390481948853, "learning_rate": 1.7160000000000002e-05, "loss": 2.8126, "step": 429 }, { "epoch": 0.01, "grad_norm": 0.6702778339385986, "learning_rate": 1.72e-05, "loss": 2.7482, "step": 430 }, { "epoch": 0.01, "grad_norm": 0.6659464240074158, "learning_rate": 1.724e-05, "loss": 2.803, "step": 431 }, { "epoch": 0.01, "grad_norm": 0.6686845421791077, "learning_rate": 1.728e-05, "loss": 2.7626, "step": 432 }, { "epoch": 0.01, "grad_norm": 0.6473426818847656, "learning_rate": 1.732e-05, "loss": 2.7717, "step": 433 }, { "epoch": 0.01, "grad_norm": 0.7178492546081543, "learning_rate": 1.736e-05, "loss": 2.8441, "step": 434 }, { "epoch": 0.01, "grad_norm": 0.6490538716316223, "learning_rate": 1.7400000000000003e-05, "loss": 2.7456, "step": 435 }, { "epoch": 0.01, "grad_norm": 0.6390330791473389, "learning_rate": 1.7440000000000002e-05, "loss": 2.7918, "step": 436 }, { "epoch": 0.01, "grad_norm": 0.6829019784927368, "learning_rate": 1.7480000000000002e-05, "loss": 2.8072, "step": 437 }, { "epoch": 0.01, "grad_norm": 0.6568471193313599, "learning_rate": 1.752e-05, "loss": 2.824, "step": 438 }, { "epoch": 0.01, "grad_norm": 0.6428776383399963, "learning_rate": 1.756e-05, "loss": 2.7419, "step": 439 }, { "epoch": 0.01, "grad_norm": 0.654801607131958, "learning_rate": 1.76e-05, "loss": 2.7541, "step": 440 }, { "epoch": 0.01, "grad_norm": 0.7071441411972046, "learning_rate": 1.764e-05, "loss": 2.7414, "step": 441 }, { "epoch": 0.01, "grad_norm": 0.672399640083313, "learning_rate": 1.768e-05, "loss": 2.7646, "step": 442 }, { "epoch": 0.01, "grad_norm": 0.655575692653656, "learning_rate": 1.7720000000000003e-05, "loss": 2.7776, "step": 443 }, { "epoch": 0.01, "grad_norm": 0.6810017228126526, "learning_rate": 1.7760000000000003e-05, "loss": 2.8182, "step": 444 }, { "epoch": 0.01, "grad_norm": 0.7078613042831421, "learning_rate": 1.7800000000000002e-05, "loss": 2.8253, "step": 445 }, { "epoch": 0.01, "grad_norm": 0.7234245538711548, "learning_rate": 1.7840000000000002e-05, "loss": 2.7853, "step": 446 }, { "epoch": 0.01, "grad_norm": 0.662403404712677, "learning_rate": 1.788e-05, "loss": 2.7444, "step": 447 }, { "epoch": 0.01, "grad_norm": 0.6751728057861328, "learning_rate": 1.792e-05, "loss": 2.7368, "step": 448 }, { "epoch": 0.01, "grad_norm": 0.6327471733093262, "learning_rate": 1.796e-05, "loss": 2.7995, "step": 449 }, { "epoch": 0.01, "grad_norm": 0.6822514533996582, "learning_rate": 1.8e-05, "loss": 2.8227, "step": 450 }, { "epoch": 0.02, "grad_norm": 0.6658573746681213, "learning_rate": 1.8040000000000003e-05, "loss": 2.7752, "step": 451 }, { "epoch": 0.02, "grad_norm": 0.6631488800048828, "learning_rate": 1.8080000000000003e-05, "loss": 2.7549, "step": 452 }, { "epoch": 0.02, "grad_norm": 0.6481293439865112, "learning_rate": 1.8120000000000003e-05, "loss": 2.8309, "step": 453 }, { "epoch": 0.02, "grad_norm": 0.657621443271637, "learning_rate": 1.8160000000000002e-05, "loss": 2.7902, "step": 454 }, { "epoch": 0.02, "grad_norm": 0.6684351563453674, "learning_rate": 1.8200000000000002e-05, "loss": 2.7507, "step": 455 }, { "epoch": 0.02, "grad_norm": 0.6583429574966431, "learning_rate": 1.824e-05, "loss": 2.7849, "step": 456 }, { "epoch": 0.02, "grad_norm": 0.6741542220115662, "learning_rate": 1.828e-05, "loss": 2.7141, "step": 457 }, { "epoch": 0.02, "grad_norm": 0.675751805305481, "learning_rate": 1.832e-05, "loss": 2.7368, "step": 458 }, { "epoch": 0.02, "grad_norm": 0.7003194093704224, "learning_rate": 1.8360000000000004e-05, "loss": 2.7647, "step": 459 }, { "epoch": 0.02, "grad_norm": 0.6883391737937927, "learning_rate": 1.8400000000000003e-05, "loss": 2.725, "step": 460 }, { "epoch": 0.02, "grad_norm": 0.6776552200317383, "learning_rate": 1.8440000000000003e-05, "loss": 2.7805, "step": 461 }, { "epoch": 0.02, "grad_norm": 0.6525712013244629, "learning_rate": 1.8480000000000003e-05, "loss": 2.766, "step": 462 }, { "epoch": 0.02, "grad_norm": 0.7016055583953857, "learning_rate": 1.8520000000000002e-05, "loss": 2.7154, "step": 463 }, { "epoch": 0.02, "grad_norm": 0.6612386107444763, "learning_rate": 1.8560000000000002e-05, "loss": 2.7324, "step": 464 }, { "epoch": 0.02, "grad_norm": 0.6514604687690735, "learning_rate": 1.86e-05, "loss": 2.7433, "step": 465 }, { "epoch": 0.02, "grad_norm": 0.6845662593841553, "learning_rate": 1.864e-05, "loss": 2.7655, "step": 466 }, { "epoch": 0.02, "grad_norm": 0.6435591578483582, "learning_rate": 1.8680000000000004e-05, "loss": 2.7784, "step": 467 }, { "epoch": 0.02, "grad_norm": 0.7050468325614929, "learning_rate": 1.8720000000000004e-05, "loss": 2.7106, "step": 468 }, { "epoch": 0.02, "grad_norm": 0.6791504621505737, "learning_rate": 1.876e-05, "loss": 2.7462, "step": 469 }, { "epoch": 0.02, "grad_norm": 0.6569073796272278, "learning_rate": 1.88e-05, "loss": 2.758, "step": 470 }, { "epoch": 0.02, "grad_norm": 0.6673763990402222, "learning_rate": 1.884e-05, "loss": 2.7988, "step": 471 }, { "epoch": 0.02, "grad_norm": 0.6800898313522339, "learning_rate": 1.8880000000000002e-05, "loss": 2.7622, "step": 472 }, { "epoch": 0.02, "grad_norm": 0.6763074994087219, "learning_rate": 1.8920000000000002e-05, "loss": 2.7525, "step": 473 }, { "epoch": 0.02, "grad_norm": 0.6501556634902954, "learning_rate": 1.896e-05, "loss": 2.7921, "step": 474 }, { "epoch": 0.02, "grad_norm": 0.6746877431869507, "learning_rate": 1.9e-05, "loss": 2.7625, "step": 475 }, { "epoch": 0.02, "grad_norm": 0.6819021701812744, "learning_rate": 1.904e-05, "loss": 2.697, "step": 476 }, { "epoch": 0.02, "grad_norm": 0.664782702922821, "learning_rate": 1.908e-05, "loss": 2.727, "step": 477 }, { "epoch": 0.02, "grad_norm": 0.6812900304794312, "learning_rate": 1.912e-05, "loss": 2.7274, "step": 478 }, { "epoch": 0.02, "grad_norm": 0.6716505289077759, "learning_rate": 1.916e-05, "loss": 2.7538, "step": 479 }, { "epoch": 0.02, "grad_norm": 0.7137351632118225, "learning_rate": 1.9200000000000003e-05, "loss": 2.8465, "step": 480 }, { "epoch": 0.02, "grad_norm": 0.6622009873390198, "learning_rate": 1.9240000000000002e-05, "loss": 2.701, "step": 481 }, { "epoch": 0.02, "grad_norm": 0.6714833974838257, "learning_rate": 1.9280000000000002e-05, "loss": 2.6702, "step": 482 }, { "epoch": 0.02, "grad_norm": 0.6557419300079346, "learning_rate": 1.932e-05, "loss": 2.7614, "step": 483 }, { "epoch": 0.02, "grad_norm": 0.6650912761688232, "learning_rate": 1.936e-05, "loss": 2.7574, "step": 484 }, { "epoch": 0.02, "grad_norm": 0.6602796912193298, "learning_rate": 1.94e-05, "loss": 2.681, "step": 485 }, { "epoch": 0.02, "grad_norm": 0.6280622482299805, "learning_rate": 1.944e-05, "loss": 2.7098, "step": 486 }, { "epoch": 0.02, "grad_norm": 0.6766891479492188, "learning_rate": 1.948e-05, "loss": 2.7737, "step": 487 }, { "epoch": 0.02, "grad_norm": 0.6489236950874329, "learning_rate": 1.9520000000000003e-05, "loss": 2.7599, "step": 488 }, { "epoch": 0.02, "grad_norm": 0.6757470965385437, "learning_rate": 1.9560000000000002e-05, "loss": 2.7535, "step": 489 }, { "epoch": 0.02, "grad_norm": 0.696772038936615, "learning_rate": 1.9600000000000002e-05, "loss": 2.7023, "step": 490 }, { "epoch": 0.02, "grad_norm": 0.6625874638557434, "learning_rate": 1.9640000000000002e-05, "loss": 2.7597, "step": 491 }, { "epoch": 0.02, "grad_norm": 0.6646535992622375, "learning_rate": 1.968e-05, "loss": 2.7365, "step": 492 }, { "epoch": 0.02, "grad_norm": 0.6632100343704224, "learning_rate": 1.972e-05, "loss": 2.796, "step": 493 }, { "epoch": 0.02, "grad_norm": 0.6804953217506409, "learning_rate": 1.976e-05, "loss": 2.7826, "step": 494 }, { "epoch": 0.02, "grad_norm": 0.6702829599380493, "learning_rate": 1.98e-05, "loss": 2.736, "step": 495 }, { "epoch": 0.02, "grad_norm": 0.667017936706543, "learning_rate": 1.9840000000000003e-05, "loss": 2.7403, "step": 496 }, { "epoch": 0.02, "grad_norm": 0.6766886711120605, "learning_rate": 1.9880000000000003e-05, "loss": 2.7651, "step": 497 }, { "epoch": 0.02, "grad_norm": 0.6707794666290283, "learning_rate": 1.9920000000000002e-05, "loss": 2.7039, "step": 498 }, { "epoch": 0.02, "grad_norm": 0.6904656291007996, "learning_rate": 1.9960000000000002e-05, "loss": 2.6754, "step": 499 }, { "epoch": 0.02, "grad_norm": 0.6930393576622009, "learning_rate": 2e-05, "loss": 2.7282, "step": 500 }, { "epoch": 0.02, "grad_norm": 0.6510630249977112, "learning_rate": 1.9999999943512933e-05, "loss": 2.6839, "step": 501 }, { "epoch": 0.02, "grad_norm": 0.74442458152771, "learning_rate": 1.9999999774051723e-05, "loss": 2.6939, "step": 502 }, { "epoch": 0.02, "grad_norm": 0.6496220827102661, "learning_rate": 1.9999999491616376e-05, "loss": 2.7106, "step": 503 }, { "epoch": 0.02, "grad_norm": 0.686384916305542, "learning_rate": 1.99999990962069e-05, "loss": 2.703, "step": 504 }, { "epoch": 0.02, "grad_norm": 0.6780949234962463, "learning_rate": 1.999999858782329e-05, "loss": 2.7165, "step": 505 }, { "epoch": 0.02, "grad_norm": 0.6805633306503296, "learning_rate": 1.999999796646556e-05, "loss": 2.7438, "step": 506 }, { "epoch": 0.02, "grad_norm": 0.6860983967781067, "learning_rate": 1.999999723213371e-05, "loss": 2.6235, "step": 507 }, { "epoch": 0.02, "grad_norm": 0.6692408919334412, "learning_rate": 1.9999996384827757e-05, "loss": 2.6712, "step": 508 }, { "epoch": 0.02, "grad_norm": 0.676289439201355, "learning_rate": 1.99999954245477e-05, "loss": 2.7818, "step": 509 }, { "epoch": 0.02, "grad_norm": 0.6712286472320557, "learning_rate": 1.9999994351293557e-05, "loss": 2.7149, "step": 510 }, { "epoch": 0.02, "grad_norm": 0.6756742000579834, "learning_rate": 1.9999993165065338e-05, "loss": 2.7127, "step": 511 }, { "epoch": 0.02, "grad_norm": 0.6648697853088379, "learning_rate": 1.999999186586306e-05, "loss": 2.725, "step": 512 }, { "epoch": 0.02, "grad_norm": 0.6322717666625977, "learning_rate": 1.999999045368673e-05, "loss": 2.7108, "step": 513 }, { "epoch": 0.02, "grad_norm": 0.6628721952438354, "learning_rate": 1.999998892853637e-05, "loss": 2.7401, "step": 514 }, { "epoch": 0.02, "grad_norm": 0.6629957556724548, "learning_rate": 1.9999987290411998e-05, "loss": 2.6868, "step": 515 }, { "epoch": 0.02, "grad_norm": 0.665867269039154, "learning_rate": 1.9999985539313627e-05, "loss": 2.7289, "step": 516 }, { "epoch": 0.02, "grad_norm": 0.6519331336021423, "learning_rate": 1.999998367524128e-05, "loss": 2.7796, "step": 517 }, { "epoch": 0.02, "grad_norm": 0.6539009213447571, "learning_rate": 1.999998169819498e-05, "loss": 2.7258, "step": 518 }, { "epoch": 0.02, "grad_norm": 0.6564478278160095, "learning_rate": 1.9999979608174746e-05, "loss": 2.7301, "step": 519 }, { "epoch": 0.02, "grad_norm": 0.6618817448616028, "learning_rate": 1.9999977405180604e-05, "loss": 2.7535, "step": 520 }, { "epoch": 0.02, "grad_norm": 0.6433423161506653, "learning_rate": 1.9999975089212576e-05, "loss": 2.7296, "step": 521 }, { "epoch": 0.02, "grad_norm": 0.6719511151313782, "learning_rate": 1.9999972660270695e-05, "loss": 2.7366, "step": 522 }, { "epoch": 0.02, "grad_norm": 0.6653806567192078, "learning_rate": 1.9999970118354978e-05, "loss": 2.7508, "step": 523 }, { "epoch": 0.02, "grad_norm": 0.712948739528656, "learning_rate": 1.999996746346546e-05, "loss": 2.7074, "step": 524 }, { "epoch": 0.02, "grad_norm": 0.6509090065956116, "learning_rate": 1.9999964695602172e-05, "loss": 2.72, "step": 525 }, { "epoch": 0.02, "grad_norm": 0.6476247906684875, "learning_rate": 1.9999961814765144e-05, "loss": 2.665, "step": 526 }, { "epoch": 0.02, "grad_norm": 0.6574861407279968, "learning_rate": 1.9999958820954405e-05, "loss": 2.7054, "step": 527 }, { "epoch": 0.02, "grad_norm": 0.6908071637153625, "learning_rate": 1.9999955714169994e-05, "loss": 2.6794, "step": 528 }, { "epoch": 0.02, "grad_norm": 0.6708104610443115, "learning_rate": 1.9999952494411942e-05, "loss": 2.7726, "step": 529 }, { "epoch": 0.02, "grad_norm": 0.6745717525482178, "learning_rate": 1.999994916168029e-05, "loss": 2.6955, "step": 530 }, { "epoch": 0.02, "grad_norm": 0.6689810752868652, "learning_rate": 1.9999945715975068e-05, "loss": 2.6382, "step": 531 }, { "epoch": 0.02, "grad_norm": 0.6525925397872925, "learning_rate": 1.9999942157296325e-05, "loss": 2.715, "step": 532 }, { "epoch": 0.02, "grad_norm": 0.6486679911613464, "learning_rate": 1.9999938485644096e-05, "loss": 2.7475, "step": 533 }, { "epoch": 0.02, "grad_norm": 0.6664499640464783, "learning_rate": 1.999993470101842e-05, "loss": 2.7645, "step": 534 }, { "epoch": 0.02, "grad_norm": 0.6587504744529724, "learning_rate": 1.999993080341934e-05, "loss": 2.699, "step": 535 }, { "epoch": 0.02, "grad_norm": 0.6723874807357788, "learning_rate": 1.9999926792846907e-05, "loss": 2.7214, "step": 536 }, { "epoch": 0.02, "grad_norm": 0.674460232257843, "learning_rate": 1.999992266930116e-05, "loss": 2.747, "step": 537 }, { "epoch": 0.02, "grad_norm": 0.7156085968017578, "learning_rate": 1.9999918432782147e-05, "loss": 2.7014, "step": 538 }, { "epoch": 0.02, "grad_norm": 0.6768877506256104, "learning_rate": 1.9999914083289918e-05, "loss": 2.645, "step": 539 }, { "epoch": 0.02, "grad_norm": 0.6726024746894836, "learning_rate": 1.9999909620824517e-05, "loss": 2.6898, "step": 540 }, { "epoch": 0.02, "grad_norm": 0.6556665301322937, "learning_rate": 1.9999905045386e-05, "loss": 2.7152, "step": 541 }, { "epoch": 0.02, "grad_norm": 0.6767259836196899, "learning_rate": 1.9999900356974417e-05, "loss": 2.6983, "step": 542 }, { "epoch": 0.02, "grad_norm": 0.6736220121383667, "learning_rate": 1.9999895555589816e-05, "loss": 2.6764, "step": 543 }, { "epoch": 0.02, "grad_norm": 0.6530930399894714, "learning_rate": 1.9999890641232256e-05, "loss": 2.7439, "step": 544 }, { "epoch": 0.02, "grad_norm": 0.6498753428459167, "learning_rate": 1.9999885613901796e-05, "loss": 2.6926, "step": 545 }, { "epoch": 0.02, "grad_norm": 0.6203898191452026, "learning_rate": 1.9999880473598486e-05, "loss": 2.6375, "step": 546 }, { "epoch": 0.02, "grad_norm": 0.6569789052009583, "learning_rate": 1.999987522032239e-05, "loss": 2.676, "step": 547 }, { "epoch": 0.02, "grad_norm": 0.6854923367500305, "learning_rate": 1.9999869854073563e-05, "loss": 2.676, "step": 548 }, { "epoch": 0.02, "grad_norm": 0.6644775867462158, "learning_rate": 1.9999864374852065e-05, "loss": 2.6663, "step": 549 }, { "epoch": 0.02, "grad_norm": 0.6881424188613892, "learning_rate": 1.9999858782657962e-05, "loss": 2.7408, "step": 550 }, { "epoch": 0.02, "grad_norm": 0.6853224635124207, "learning_rate": 1.9999853077491313e-05, "loss": 2.6661, "step": 551 }, { "epoch": 0.02, "grad_norm": 0.6817418932914734, "learning_rate": 1.9999847259352188e-05, "loss": 2.6988, "step": 552 }, { "epoch": 0.02, "grad_norm": 0.667364239692688, "learning_rate": 1.9999841328240642e-05, "loss": 2.6994, "step": 553 }, { "epoch": 0.02, "grad_norm": 0.6320204138755798, "learning_rate": 1.9999835284156755e-05, "loss": 2.6949, "step": 554 }, { "epoch": 0.02, "grad_norm": 0.6620521545410156, "learning_rate": 1.999982912710059e-05, "loss": 2.7165, "step": 555 }, { "epoch": 0.02, "grad_norm": 0.6537284255027771, "learning_rate": 1.9999822857072214e-05, "loss": 2.7449, "step": 556 }, { "epoch": 0.02, "grad_norm": 0.6808788776397705, "learning_rate": 1.9999816474071698e-05, "loss": 2.7075, "step": 557 }, { "epoch": 0.02, "grad_norm": 0.6649172306060791, "learning_rate": 1.999980997809912e-05, "loss": 2.7462, "step": 558 }, { "epoch": 0.02, "grad_norm": 0.6705018877983093, "learning_rate": 1.9999803369154546e-05, "loss": 2.676, "step": 559 }, { "epoch": 0.02, "grad_norm": 0.6761060953140259, "learning_rate": 1.999979664723806e-05, "loss": 2.6689, "step": 560 }, { "epoch": 0.02, "grad_norm": 0.6275860071182251, "learning_rate": 1.999978981234972e-05, "loss": 2.7209, "step": 561 }, { "epoch": 0.02, "grad_norm": 0.6734023690223694, "learning_rate": 1.9999782864489624e-05, "loss": 2.7057, "step": 562 }, { "epoch": 0.02, "grad_norm": 0.67657470703125, "learning_rate": 1.9999775803657843e-05, "loss": 2.7356, "step": 563 }, { "epoch": 0.02, "grad_norm": 0.6549201607704163, "learning_rate": 1.999976862985445e-05, "loss": 2.7061, "step": 564 }, { "epoch": 0.02, "grad_norm": 0.6668903827667236, "learning_rate": 1.9999761343079536e-05, "loss": 2.7054, "step": 565 }, { "epoch": 0.02, "grad_norm": 0.6936042904853821, "learning_rate": 1.9999753943333173e-05, "loss": 2.6769, "step": 566 }, { "epoch": 0.02, "grad_norm": 0.6518430113792419, "learning_rate": 1.9999746430615454e-05, "loss": 2.6028, "step": 567 }, { "epoch": 0.02, "grad_norm": 0.6561490893363953, "learning_rate": 1.9999738804926463e-05, "loss": 2.7336, "step": 568 }, { "epoch": 0.02, "grad_norm": 0.6570992469787598, "learning_rate": 1.999973106626628e-05, "loss": 2.6616, "step": 569 }, { "epoch": 0.02, "grad_norm": 0.6535264253616333, "learning_rate": 1.9999723214634995e-05, "loss": 2.7044, "step": 570 }, { "epoch": 0.02, "grad_norm": 0.66302490234375, "learning_rate": 1.99997152500327e-05, "loss": 2.7032, "step": 571 }, { "epoch": 0.02, "grad_norm": 0.6635570526123047, "learning_rate": 1.9999707172459483e-05, "loss": 2.6611, "step": 572 }, { "epoch": 0.02, "grad_norm": 0.6971732974052429, "learning_rate": 1.999969898191543e-05, "loss": 2.6773, "step": 573 }, { "epoch": 0.02, "grad_norm": 0.6466277837753296, "learning_rate": 1.9999690678400644e-05, "loss": 2.6378, "step": 574 }, { "epoch": 0.02, "grad_norm": 0.6631940007209778, "learning_rate": 1.9999682261915213e-05, "loss": 2.6803, "step": 575 }, { "epoch": 0.02, "grad_norm": 0.6431226134300232, "learning_rate": 1.9999673732459226e-05, "loss": 2.6544, "step": 576 }, { "epoch": 0.02, "grad_norm": 0.7220097184181213, "learning_rate": 1.999966509003279e-05, "loss": 2.6962, "step": 577 }, { "epoch": 0.02, "grad_norm": 0.6664537191390991, "learning_rate": 1.9999656334636e-05, "loss": 2.6991, "step": 578 }, { "epoch": 0.02, "grad_norm": 0.6441308259963989, "learning_rate": 1.999964746626895e-05, "loss": 2.6741, "step": 579 }, { "epoch": 0.02, "grad_norm": 0.658420205116272, "learning_rate": 1.9999638484931744e-05, "loss": 2.6795, "step": 580 }, { "epoch": 0.02, "grad_norm": 0.6778020858764648, "learning_rate": 1.9999629390624484e-05, "loss": 2.7202, "step": 581 }, { "epoch": 0.02, "grad_norm": 0.6455041170120239, "learning_rate": 1.9999620183347267e-05, "loss": 2.7167, "step": 582 }, { "epoch": 0.02, "grad_norm": 0.6703903675079346, "learning_rate": 1.9999610863100205e-05, "loss": 2.7611, "step": 583 }, { "epoch": 0.02, "grad_norm": 0.6881388425827026, "learning_rate": 1.99996014298834e-05, "loss": 2.6577, "step": 584 }, { "epoch": 0.02, "grad_norm": 0.6835602521896362, "learning_rate": 1.9999591883696958e-05, "loss": 2.6309, "step": 585 }, { "epoch": 0.02, "grad_norm": 0.7014316916465759, "learning_rate": 1.999958222454099e-05, "loss": 2.6758, "step": 586 }, { "epoch": 0.02, "grad_norm": 0.682877779006958, "learning_rate": 1.99995724524156e-05, "loss": 2.6665, "step": 587 }, { "epoch": 0.02, "grad_norm": 0.6595706939697266, "learning_rate": 1.99995625673209e-05, "loss": 2.7068, "step": 588 }, { "epoch": 0.02, "grad_norm": 0.6841151118278503, "learning_rate": 1.9999552569257003e-05, "loss": 2.6623, "step": 589 }, { "epoch": 0.02, "grad_norm": 0.6765275001525879, "learning_rate": 1.999954245822402e-05, "loss": 2.638, "step": 590 }, { "epoch": 0.02, "grad_norm": 0.6671230792999268, "learning_rate": 1.9999532234222067e-05, "loss": 2.6075, "step": 591 }, { "epoch": 0.02, "grad_norm": 0.6446040868759155, "learning_rate": 1.999952189725126e-05, "loss": 2.6693, "step": 592 }, { "epoch": 0.02, "grad_norm": 0.6680780053138733, "learning_rate": 1.9999511447311713e-05, "loss": 2.6921, "step": 593 }, { "epoch": 0.02, "grad_norm": 0.6937766075134277, "learning_rate": 1.999950088440355e-05, "loss": 2.7175, "step": 594 }, { "epoch": 0.02, "grad_norm": 0.7104864120483398, "learning_rate": 1.9999490208526884e-05, "loss": 2.7056, "step": 595 }, { "epoch": 0.02, "grad_norm": 0.6789050698280334, "learning_rate": 1.9999479419681837e-05, "loss": 2.6834, "step": 596 }, { "epoch": 0.02, "grad_norm": 0.6690239906311035, "learning_rate": 1.9999468517868534e-05, "loss": 2.7016, "step": 597 }, { "epoch": 0.02, "grad_norm": 0.6288480758666992, "learning_rate": 1.9999457503087094e-05, "loss": 2.6674, "step": 598 }, { "epoch": 0.02, "grad_norm": 0.6539272665977478, "learning_rate": 1.9999446375337642e-05, "loss": 2.6529, "step": 599 }, { "epoch": 0.02, "grad_norm": 0.6495893597602844, "learning_rate": 1.9999435134620308e-05, "loss": 2.6728, "step": 600 }, { "epoch": 0.02, "grad_norm": 0.6763492226600647, "learning_rate": 1.9999423780935216e-05, "loss": 2.6764, "step": 601 }, { "epoch": 0.02, "grad_norm": 0.6604596376419067, "learning_rate": 1.9999412314282495e-05, "loss": 2.6782, "step": 602 }, { "epoch": 0.02, "grad_norm": 0.6613422632217407, "learning_rate": 1.999940073466227e-05, "loss": 2.6759, "step": 603 }, { "epoch": 0.02, "grad_norm": 0.6469343304634094, "learning_rate": 1.9999389042074678e-05, "loss": 2.5908, "step": 604 }, { "epoch": 0.02, "grad_norm": 0.6556013822555542, "learning_rate": 1.9999377236519853e-05, "loss": 2.699, "step": 605 }, { "epoch": 0.02, "grad_norm": 0.6714016199111938, "learning_rate": 1.9999365317997918e-05, "loss": 2.6614, "step": 606 }, { "epoch": 0.02, "grad_norm": 0.6952104568481445, "learning_rate": 1.9999353286509015e-05, "loss": 2.6303, "step": 607 }, { "epoch": 0.02, "grad_norm": 0.6528002619743347, "learning_rate": 1.9999341142053282e-05, "loss": 2.6291, "step": 608 }, { "epoch": 0.02, "grad_norm": 0.6445615887641907, "learning_rate": 1.999932888463085e-05, "loss": 2.5913, "step": 609 }, { "epoch": 0.02, "grad_norm": 0.6463565826416016, "learning_rate": 1.999931651424186e-05, "loss": 2.6824, "step": 610 }, { "epoch": 0.02, "grad_norm": 0.73136967420578, "learning_rate": 1.9999304030886454e-05, "loss": 2.6801, "step": 611 }, { "epoch": 0.02, "grad_norm": 0.6517104506492615, "learning_rate": 1.9999291434564774e-05, "loss": 2.6405, "step": 612 }, { "epoch": 0.02, "grad_norm": 0.6433521509170532, "learning_rate": 1.9999278725276955e-05, "loss": 2.6644, "step": 613 }, { "epoch": 0.02, "grad_norm": 0.619030237197876, "learning_rate": 1.9999265903023147e-05, "loss": 2.641, "step": 614 }, { "epoch": 0.02, "grad_norm": 0.7042115330696106, "learning_rate": 1.999925296780349e-05, "loss": 2.6658, "step": 615 }, { "epoch": 0.02, "grad_norm": 0.6369872689247131, "learning_rate": 1.9999239919618138e-05, "loss": 2.5443, "step": 616 }, { "epoch": 0.02, "grad_norm": 0.6518825888633728, "learning_rate": 1.999922675846723e-05, "loss": 2.6424, "step": 617 }, { "epoch": 0.02, "grad_norm": 0.6387699246406555, "learning_rate": 1.9999213484350917e-05, "loss": 2.6548, "step": 618 }, { "epoch": 0.02, "grad_norm": 0.6791165471076965, "learning_rate": 1.9999200097269354e-05, "loss": 2.6927, "step": 619 }, { "epoch": 0.02, "grad_norm": 0.660203754901886, "learning_rate": 1.9999186597222685e-05, "loss": 2.6376, "step": 620 }, { "epoch": 0.02, "grad_norm": 0.6412535905838013, "learning_rate": 1.999917298421107e-05, "loss": 2.6069, "step": 621 }, { "epoch": 0.02, "grad_norm": 0.6473847031593323, "learning_rate": 1.9999159258234658e-05, "loss": 2.6949, "step": 622 }, { "epoch": 0.02, "grad_norm": 0.6589338779449463, "learning_rate": 1.99991454192936e-05, "loss": 2.6337, "step": 623 }, { "epoch": 0.02, "grad_norm": 0.6539522409439087, "learning_rate": 1.9999131467388062e-05, "loss": 2.672, "step": 624 }, { "epoch": 0.02, "grad_norm": 0.6796513795852661, "learning_rate": 1.9999117402518194e-05, "loss": 2.6508, "step": 625 }, { "epoch": 0.02, "grad_norm": 0.6824401021003723, "learning_rate": 1.999910322468416e-05, "loss": 2.6328, "step": 626 }, { "epoch": 0.02, "grad_norm": 0.6502139568328857, "learning_rate": 1.9999088933886117e-05, "loss": 2.7286, "step": 627 }, { "epoch": 0.02, "grad_norm": 0.6533864736557007, "learning_rate": 1.999907453012423e-05, "loss": 2.5907, "step": 628 }, { "epoch": 0.02, "grad_norm": 0.6478393077850342, "learning_rate": 1.999906001339865e-05, "loss": 2.5878, "step": 629 }, { "epoch": 0.02, "grad_norm": 0.6769579648971558, "learning_rate": 1.999904538370956e-05, "loss": 2.6677, "step": 630 }, { "epoch": 0.02, "grad_norm": 0.6979731321334839, "learning_rate": 1.9999030641057108e-05, "loss": 2.6418, "step": 631 }, { "epoch": 0.02, "grad_norm": 0.6344602704048157, "learning_rate": 1.9999015785441472e-05, "loss": 2.6724, "step": 632 }, { "epoch": 0.02, "grad_norm": 0.6661173105239868, "learning_rate": 1.9999000816862812e-05, "loss": 2.6335, "step": 633 }, { "epoch": 0.02, "grad_norm": 0.6379936933517456, "learning_rate": 1.9998985735321304e-05, "loss": 2.6162, "step": 634 }, { "epoch": 0.02, "grad_norm": 0.646026074886322, "learning_rate": 1.9998970540817114e-05, "loss": 2.6188, "step": 635 }, { "epoch": 0.02, "grad_norm": 0.659218430519104, "learning_rate": 1.9998955233350418e-05, "loss": 2.6599, "step": 636 }, { "epoch": 0.02, "grad_norm": 0.7054409980773926, "learning_rate": 1.999893981292138e-05, "loss": 2.6265, "step": 637 }, { "epoch": 0.02, "grad_norm": 0.6546733379364014, "learning_rate": 1.9998924279530185e-05, "loss": 2.6258, "step": 638 }, { "epoch": 0.02, "grad_norm": 0.6818681359291077, "learning_rate": 1.9998908633177e-05, "loss": 2.6579, "step": 639 }, { "epoch": 0.02, "grad_norm": 0.6307359933853149, "learning_rate": 1.999889287386201e-05, "loss": 2.6183, "step": 640 }, { "epoch": 0.02, "grad_norm": 0.6655917763710022, "learning_rate": 1.999887700158538e-05, "loss": 2.7134, "step": 641 }, { "epoch": 0.02, "grad_norm": 0.6762456297874451, "learning_rate": 1.9998861016347305e-05, "loss": 2.6249, "step": 642 }, { "epoch": 0.02, "grad_norm": 0.6500999331474304, "learning_rate": 1.9998844918147956e-05, "loss": 2.6587, "step": 643 }, { "epoch": 0.02, "grad_norm": 0.6819188594818115, "learning_rate": 1.9998828706987517e-05, "loss": 2.6848, "step": 644 }, { "epoch": 0.02, "grad_norm": 0.6916864514350891, "learning_rate": 1.9998812382866172e-05, "loss": 2.6942, "step": 645 }, { "epoch": 0.02, "grad_norm": 0.6661266088485718, "learning_rate": 1.9998795945784102e-05, "loss": 2.6034, "step": 646 }, { "epoch": 0.02, "grad_norm": 0.777336835861206, "learning_rate": 1.9998779395741496e-05, "loss": 2.6785, "step": 647 }, { "epoch": 0.02, "grad_norm": 0.6751137375831604, "learning_rate": 1.9998762732738538e-05, "loss": 2.6326, "step": 648 }, { "epoch": 0.02, "grad_norm": 0.667270839214325, "learning_rate": 1.9998745956775422e-05, "loss": 2.6204, "step": 649 }, { "epoch": 0.02, "grad_norm": 0.6546599864959717, "learning_rate": 1.9998729067852335e-05, "loss": 2.6316, "step": 650 }, { "epoch": 0.02, "grad_norm": 0.6469268202781677, "learning_rate": 1.9998712065969465e-05, "loss": 2.6385, "step": 651 }, { "epoch": 0.02, "grad_norm": 0.6478886604309082, "learning_rate": 1.9998694951127007e-05, "loss": 2.6157, "step": 652 }, { "epoch": 0.02, "grad_norm": 0.6824221014976501, "learning_rate": 1.999867772332515e-05, "loss": 2.617, "step": 653 }, { "epoch": 0.02, "grad_norm": 0.6531621217727661, "learning_rate": 1.9998660382564097e-05, "loss": 2.5561, "step": 654 }, { "epoch": 0.02, "grad_norm": 0.6488674283027649, "learning_rate": 1.9998642928844035e-05, "loss": 2.5838, "step": 655 }, { "epoch": 0.02, "grad_norm": 0.6546266078948975, "learning_rate": 1.9998625362165166e-05, "loss": 2.7035, "step": 656 }, { "epoch": 0.02, "grad_norm": 0.6306750774383545, "learning_rate": 1.9998607682527688e-05, "loss": 2.635, "step": 657 }, { "epoch": 0.02, "grad_norm": 0.675563633441925, "learning_rate": 1.9998589889931797e-05, "loss": 2.6274, "step": 658 }, { "epoch": 0.02, "grad_norm": 0.6688238382339478, "learning_rate": 1.99985719843777e-05, "loss": 2.6487, "step": 659 }, { "epoch": 0.02, "grad_norm": 0.6733666658401489, "learning_rate": 1.9998553965865598e-05, "loss": 2.6012, "step": 660 }, { "epoch": 0.02, "grad_norm": 0.6428125500679016, "learning_rate": 1.999853583439569e-05, "loss": 2.6639, "step": 661 }, { "epoch": 0.02, "grad_norm": 0.714256227016449, "learning_rate": 1.9998517589968183e-05, "loss": 2.6594, "step": 662 }, { "epoch": 0.02, "grad_norm": 0.6464070677757263, "learning_rate": 1.999849923258329e-05, "loss": 2.674, "step": 663 }, { "epoch": 0.02, "grad_norm": 0.6534208059310913, "learning_rate": 1.9998480762241206e-05, "loss": 2.626, "step": 664 }, { "epoch": 0.02, "grad_norm": 0.6571592688560486, "learning_rate": 1.999846217894215e-05, "loss": 2.6457, "step": 665 }, { "epoch": 0.02, "grad_norm": 0.6504070162773132, "learning_rate": 1.9998443482686325e-05, "loss": 2.6566, "step": 666 }, { "epoch": 0.02, "grad_norm": 0.6504026651382446, "learning_rate": 1.9998424673473945e-05, "loss": 2.622, "step": 667 }, { "epoch": 0.02, "grad_norm": 0.6780567765235901, "learning_rate": 1.9998405751305226e-05, "loss": 2.6311, "step": 668 }, { "epoch": 0.02, "grad_norm": 0.6685227155685425, "learning_rate": 1.9998386716180377e-05, "loss": 2.7076, "step": 669 }, { "epoch": 0.02, "grad_norm": 0.7406482100486755, "learning_rate": 1.9998367568099612e-05, "loss": 2.5774, "step": 670 }, { "epoch": 0.02, "grad_norm": 0.6598697900772095, "learning_rate": 1.9998348307063154e-05, "loss": 2.6187, "step": 671 }, { "epoch": 0.02, "grad_norm": 0.633364737033844, "learning_rate": 1.9998328933071215e-05, "loss": 2.6663, "step": 672 }, { "epoch": 0.02, "grad_norm": 0.6449758410453796, "learning_rate": 1.9998309446124013e-05, "loss": 2.5952, "step": 673 }, { "epoch": 0.02, "grad_norm": 0.6687312722206116, "learning_rate": 1.9998289846221772e-05, "loss": 2.624, "step": 674 }, { "epoch": 0.02, "grad_norm": 0.6782962679862976, "learning_rate": 1.9998270133364713e-05, "loss": 2.6179, "step": 675 }, { "epoch": 0.02, "grad_norm": 0.6188567280769348, "learning_rate": 1.9998250307553056e-05, "loss": 2.5809, "step": 676 }, { "epoch": 0.02, "grad_norm": 0.6713731288909912, "learning_rate": 1.999823036878703e-05, "loss": 2.5994, "step": 677 }, { "epoch": 0.02, "grad_norm": 0.6576871871948242, "learning_rate": 1.9998210317066853e-05, "loss": 2.5337, "step": 678 }, { "epoch": 0.02, "grad_norm": 0.6445668339729309, "learning_rate": 1.9998190152392757e-05, "loss": 2.6452, "step": 679 }, { "epoch": 0.02, "grad_norm": 0.6413486003875732, "learning_rate": 1.9998169874764968e-05, "loss": 2.6452, "step": 680 }, { "epoch": 0.02, "grad_norm": 0.6560171842575073, "learning_rate": 1.9998149484183712e-05, "loss": 2.5967, "step": 681 }, { "epoch": 0.02, "grad_norm": 0.6555939316749573, "learning_rate": 1.9998128980649228e-05, "loss": 2.6335, "step": 682 }, { "epoch": 0.02, "grad_norm": 0.6610609889030457, "learning_rate": 1.999810836416174e-05, "loss": 2.6311, "step": 683 }, { "epoch": 0.02, "grad_norm": 0.6651795506477356, "learning_rate": 1.999808763472148e-05, "loss": 2.6476, "step": 684 }, { "epoch": 0.02, "grad_norm": 0.6404531002044678, "learning_rate": 1.999806679232869e-05, "loss": 2.6348, "step": 685 }, { "epoch": 0.02, "grad_norm": 0.6947271823883057, "learning_rate": 1.99980458369836e-05, "loss": 2.597, "step": 686 }, { "epoch": 0.02, "grad_norm": 0.6479017734527588, "learning_rate": 1.9998024768686445e-05, "loss": 2.6128, "step": 687 }, { "epoch": 0.02, "grad_norm": 0.6455385684967041, "learning_rate": 1.999800358743747e-05, "loss": 2.6298, "step": 688 }, { "epoch": 0.02, "grad_norm": 0.6617756485939026, "learning_rate": 1.9997982293236906e-05, "loss": 2.632, "step": 689 }, { "epoch": 0.02, "grad_norm": 0.6836099028587341, "learning_rate": 1.9997960886084996e-05, "loss": 2.591, "step": 690 }, { "epoch": 0.02, "grad_norm": 0.6432330012321472, "learning_rate": 1.9997939365981988e-05, "loss": 2.6081, "step": 691 }, { "epoch": 0.02, "grad_norm": 0.6443216800689697, "learning_rate": 1.999791773292812e-05, "loss": 2.6129, "step": 692 }, { "epoch": 0.02, "grad_norm": 0.6464120745658875, "learning_rate": 1.9997895986923634e-05, "loss": 2.6489, "step": 693 }, { "epoch": 0.02, "grad_norm": 0.6480265855789185, "learning_rate": 1.9997874127968778e-05, "loss": 2.6174, "step": 694 }, { "epoch": 0.02, "grad_norm": 0.6874150037765503, "learning_rate": 1.99978521560638e-05, "loss": 2.6843, "step": 695 }, { "epoch": 0.02, "grad_norm": 0.6760188937187195, "learning_rate": 1.999783007120895e-05, "loss": 2.6175, "step": 696 }, { "epoch": 0.02, "grad_norm": 0.6596389412879944, "learning_rate": 1.9997807873404477e-05, "loss": 2.6296, "step": 697 }, { "epoch": 0.02, "grad_norm": 0.6853276491165161, "learning_rate": 1.9997785562650626e-05, "loss": 2.616, "step": 698 }, { "epoch": 0.02, "grad_norm": 0.6755525469779968, "learning_rate": 1.9997763138947653e-05, "loss": 2.6266, "step": 699 }, { "epoch": 0.02, "grad_norm": 0.6434488296508789, "learning_rate": 1.999774060229581e-05, "loss": 2.6305, "step": 700 }, { "epoch": 0.02, "grad_norm": 0.634428858757019, "learning_rate": 1.9997717952695357e-05, "loss": 2.5954, "step": 701 }, { "epoch": 0.02, "grad_norm": 0.6757248640060425, "learning_rate": 1.9997695190146543e-05, "loss": 2.6468, "step": 702 }, { "epoch": 0.02, "grad_norm": 0.6874866485595703, "learning_rate": 1.999767231464963e-05, "loss": 2.581, "step": 703 }, { "epoch": 0.02, "grad_norm": 0.6547470092773438, "learning_rate": 1.9997649326204874e-05, "loss": 2.6262, "step": 704 }, { "epoch": 0.02, "grad_norm": 0.6525161862373352, "learning_rate": 1.9997626224812533e-05, "loss": 2.56, "step": 705 }, { "epoch": 0.02, "grad_norm": 0.6906682848930359, "learning_rate": 1.999760301047287e-05, "loss": 2.6286, "step": 706 }, { "epoch": 0.02, "grad_norm": 0.6766973733901978, "learning_rate": 1.999757968318615e-05, "loss": 2.6347, "step": 707 }, { "epoch": 0.02, "grad_norm": 0.6386931538581848, "learning_rate": 1.9997556242952633e-05, "loss": 2.5733, "step": 708 }, { "epoch": 0.02, "grad_norm": 0.6369397640228271, "learning_rate": 1.9997532689772584e-05, "loss": 2.5631, "step": 709 }, { "epoch": 0.02, "grad_norm": 0.6598371267318726, "learning_rate": 1.999750902364627e-05, "loss": 2.6438, "step": 710 }, { "epoch": 0.02, "grad_norm": 0.630191445350647, "learning_rate": 1.999748524457396e-05, "loss": 2.5638, "step": 711 }, { "epoch": 0.02, "grad_norm": 0.6679804921150208, "learning_rate": 1.999746135255592e-05, "loss": 2.6384, "step": 712 }, { "epoch": 0.02, "grad_norm": 0.6520076394081116, "learning_rate": 1.999743734759242e-05, "loss": 2.6141, "step": 713 }, { "epoch": 0.02, "grad_norm": 0.6715118885040283, "learning_rate": 1.999741322968373e-05, "loss": 2.5587, "step": 714 }, { "epoch": 0.02, "grad_norm": 0.683879017829895, "learning_rate": 1.9997388998830125e-05, "loss": 2.6455, "step": 715 }, { "epoch": 0.02, "grad_norm": 0.6658943891525269, "learning_rate": 1.999736465503188e-05, "loss": 2.6418, "step": 716 }, { "epoch": 0.02, "grad_norm": 0.6488131880760193, "learning_rate": 1.9997340198289266e-05, "loss": 2.5955, "step": 717 }, { "epoch": 0.02, "grad_norm": 0.6493475437164307, "learning_rate": 1.9997315628602564e-05, "loss": 2.6433, "step": 718 }, { "epoch": 0.02, "grad_norm": 0.6291093230247498, "learning_rate": 1.999729094597205e-05, "loss": 2.5813, "step": 719 }, { "epoch": 0.02, "grad_norm": 0.6662304997444153, "learning_rate": 1.9997266150398e-05, "loss": 2.6456, "step": 720 }, { "epoch": 0.02, "grad_norm": 0.63725745677948, "learning_rate": 1.9997241241880695e-05, "loss": 2.6204, "step": 721 }, { "epoch": 0.02, "grad_norm": 0.652138888835907, "learning_rate": 1.9997216220420415e-05, "loss": 2.6295, "step": 722 }, { "epoch": 0.02, "grad_norm": 0.654179573059082, "learning_rate": 1.9997191086017454e-05, "loss": 2.6221, "step": 723 }, { "epoch": 0.02, "grad_norm": 0.6462941765785217, "learning_rate": 1.9997165838672078e-05, "loss": 2.603, "step": 724 }, { "epoch": 0.02, "grad_norm": 0.6551197171211243, "learning_rate": 1.9997140478384586e-05, "loss": 2.6124, "step": 725 }, { "epoch": 0.02, "grad_norm": 0.6915521025657654, "learning_rate": 1.999711500515526e-05, "loss": 2.522, "step": 726 }, { "epoch": 0.02, "grad_norm": 0.6308431625366211, "learning_rate": 1.9997089418984385e-05, "loss": 2.5981, "step": 727 }, { "epoch": 0.02, "grad_norm": 0.6642795205116272, "learning_rate": 1.9997063719872252e-05, "loss": 2.5855, "step": 728 }, { "epoch": 0.02, "grad_norm": 0.6863251328468323, "learning_rate": 1.9997037907819155e-05, "loss": 2.5617, "step": 729 }, { "epoch": 0.02, "grad_norm": 0.6366772055625916, "learning_rate": 1.9997011982825382e-05, "loss": 2.6026, "step": 730 }, { "epoch": 0.02, "grad_norm": 0.6680012345314026, "learning_rate": 1.9996985944891223e-05, "loss": 2.5771, "step": 731 }, { "epoch": 0.02, "grad_norm": 0.6940551996231079, "learning_rate": 1.999695979401698e-05, "loss": 2.5927, "step": 732 }, { "epoch": 0.02, "grad_norm": 0.6337440609931946, "learning_rate": 1.999693353020294e-05, "loss": 2.5422, "step": 733 }, { "epoch": 0.02, "grad_norm": 0.7387632131576538, "learning_rate": 1.9996907153449408e-05, "loss": 2.5464, "step": 734 }, { "epoch": 0.02, "grad_norm": 0.654753565788269, "learning_rate": 1.9996880663756678e-05, "loss": 2.5678, "step": 735 }, { "epoch": 0.02, "grad_norm": 0.6943520307540894, "learning_rate": 1.9996854061125044e-05, "loss": 2.6603, "step": 736 }, { "epoch": 0.02, "grad_norm": 0.6481813788414001, "learning_rate": 1.9996827345554814e-05, "loss": 2.5998, "step": 737 }, { "epoch": 0.02, "grad_norm": 0.6417766213417053, "learning_rate": 1.999680051704629e-05, "loss": 2.5979, "step": 738 }, { "epoch": 0.02, "grad_norm": 0.6641683578491211, "learning_rate": 1.999677357559977e-05, "loss": 2.5453, "step": 739 }, { "epoch": 0.02, "grad_norm": 0.6462195515632629, "learning_rate": 1.9996746521215558e-05, "loss": 2.5863, "step": 740 }, { "epoch": 0.02, "grad_norm": 0.6744074821472168, "learning_rate": 1.9996719353893967e-05, "loss": 2.614, "step": 741 }, { "epoch": 0.02, "grad_norm": 0.6678904294967651, "learning_rate": 1.9996692073635297e-05, "loss": 2.6401, "step": 742 }, { "epoch": 0.02, "grad_norm": 0.6701170802116394, "learning_rate": 1.999666468043986e-05, "loss": 2.6479, "step": 743 }, { "epoch": 0.02, "grad_norm": 0.6428682208061218, "learning_rate": 1.999663717430796e-05, "loss": 2.576, "step": 744 }, { "epoch": 0.02, "grad_norm": 0.6440454721450806, "learning_rate": 1.9996609555239918e-05, "loss": 2.6177, "step": 745 }, { "epoch": 0.02, "grad_norm": 0.6575391292572021, "learning_rate": 1.9996581823236037e-05, "loss": 2.5527, "step": 746 }, { "epoch": 0.02, "grad_norm": 0.6564323902130127, "learning_rate": 1.9996553978296632e-05, "loss": 2.6262, "step": 747 }, { "epoch": 0.02, "grad_norm": 0.6306195259094238, "learning_rate": 1.9996526020422024e-05, "loss": 2.613, "step": 748 }, { "epoch": 0.02, "grad_norm": 0.6638990640640259, "learning_rate": 1.9996497949612516e-05, "loss": 2.6113, "step": 749 }, { "epoch": 0.02, "grad_norm": 0.6677066683769226, "learning_rate": 1.9996469765868437e-05, "loss": 2.6854, "step": 750 }, { "epoch": 0.02, "grad_norm": 0.6621503829956055, "learning_rate": 1.99964414691901e-05, "loss": 2.64, "step": 751 }, { "epoch": 0.03, "grad_norm": 0.629335880279541, "learning_rate": 1.9996413059577827e-05, "loss": 2.5173, "step": 752 }, { "epoch": 0.03, "grad_norm": 0.633747398853302, "learning_rate": 1.9996384537031936e-05, "loss": 2.5694, "step": 753 }, { "epoch": 0.03, "grad_norm": 0.6534228324890137, "learning_rate": 1.9996355901552752e-05, "loss": 2.5989, "step": 754 }, { "epoch": 0.03, "grad_norm": 0.6445450186729431, "learning_rate": 1.9996327153140596e-05, "loss": 2.603, "step": 755 }, { "epoch": 0.03, "grad_norm": 0.6484500169754028, "learning_rate": 1.9996298291795798e-05, "loss": 2.6099, "step": 756 }, { "epoch": 0.03, "grad_norm": 0.6291806101799011, "learning_rate": 1.9996269317518675e-05, "loss": 2.6044, "step": 757 }, { "epoch": 0.03, "grad_norm": 0.6487911939620972, "learning_rate": 1.9996240230309562e-05, "loss": 2.5647, "step": 758 }, { "epoch": 0.03, "grad_norm": 0.6223908066749573, "learning_rate": 1.9996211030168786e-05, "loss": 2.5609, "step": 759 }, { "epoch": 0.03, "grad_norm": 0.6909140348434448, "learning_rate": 1.999618171709668e-05, "loss": 2.6373, "step": 760 }, { "epoch": 0.03, "grad_norm": 0.642636239528656, "learning_rate": 1.9996152291093564e-05, "loss": 2.5481, "step": 761 }, { "epoch": 0.03, "grad_norm": 0.6456642150878906, "learning_rate": 1.9996122752159782e-05, "loss": 2.5837, "step": 762 }, { "epoch": 0.03, "grad_norm": 0.6698892116546631, "learning_rate": 1.999609310029566e-05, "loss": 2.5738, "step": 763 }, { "epoch": 0.03, "grad_norm": 0.6802642941474915, "learning_rate": 1.999606333550154e-05, "loss": 2.6074, "step": 764 }, { "epoch": 0.03, "grad_norm": 0.6410853862762451, "learning_rate": 1.9996033457777753e-05, "loss": 2.5666, "step": 765 }, { "epoch": 0.03, "grad_norm": 0.7001330852508545, "learning_rate": 1.9996003467124638e-05, "loss": 2.5943, "step": 766 }, { "epoch": 0.03, "grad_norm": 0.6452129483222961, "learning_rate": 1.9995973363542535e-05, "loss": 2.5296, "step": 767 }, { "epoch": 0.03, "grad_norm": 0.7368355393409729, "learning_rate": 1.9995943147031784e-05, "loss": 2.5956, "step": 768 }, { "epoch": 0.03, "grad_norm": 0.6864637732505798, "learning_rate": 1.9995912817592724e-05, "loss": 2.6749, "step": 769 }, { "epoch": 0.03, "grad_norm": 0.6283546090126038, "learning_rate": 1.99958823752257e-05, "loss": 2.5796, "step": 770 }, { "epoch": 0.03, "grad_norm": 0.6918172240257263, "learning_rate": 1.9995851819931053e-05, "loss": 2.5778, "step": 771 }, { "epoch": 0.03, "grad_norm": 0.6927192211151123, "learning_rate": 1.999582115170913e-05, "loss": 2.5523, "step": 772 }, { "epoch": 0.03, "grad_norm": 0.7102265954017639, "learning_rate": 1.9995790370560284e-05, "loss": 2.6021, "step": 773 }, { "epoch": 0.03, "grad_norm": 0.6565168499946594, "learning_rate": 1.999575947648485e-05, "loss": 2.5271, "step": 774 }, { "epoch": 0.03, "grad_norm": 0.6337746977806091, "learning_rate": 1.9995728469483185e-05, "loss": 2.6003, "step": 775 }, { "epoch": 0.03, "grad_norm": 0.6818331480026245, "learning_rate": 1.999569734955564e-05, "loss": 2.6418, "step": 776 }, { "epoch": 0.03, "grad_norm": 0.6816015839576721, "learning_rate": 1.9995666116702562e-05, "loss": 2.6616, "step": 777 }, { "epoch": 0.03, "grad_norm": 0.6537896990776062, "learning_rate": 1.9995634770924308e-05, "loss": 2.6179, "step": 778 }, { "epoch": 0.03, "grad_norm": 0.6448139548301697, "learning_rate": 1.999560331222123e-05, "loss": 2.5361, "step": 779 }, { "epoch": 0.03, "grad_norm": 0.6397492289543152, "learning_rate": 1.9995571740593682e-05, "loss": 2.5291, "step": 780 }, { "epoch": 0.03, "grad_norm": 0.6577461957931519, "learning_rate": 1.9995540056042025e-05, "loss": 2.5625, "step": 781 }, { "epoch": 0.03, "grad_norm": 0.6541372537612915, "learning_rate": 1.9995508258566613e-05, "loss": 2.5595, "step": 782 }, { "epoch": 0.03, "grad_norm": 0.7245307564735413, "learning_rate": 1.9995476348167807e-05, "loss": 2.6065, "step": 783 }, { "epoch": 0.03, "grad_norm": 0.6526775360107422, "learning_rate": 1.9995444324845967e-05, "loss": 2.5722, "step": 784 }, { "epoch": 0.03, "grad_norm": 0.6915601491928101, "learning_rate": 1.9995412188601455e-05, "loss": 2.5494, "step": 785 }, { "epoch": 0.03, "grad_norm": 0.6616017818450928, "learning_rate": 1.9995379939434634e-05, "loss": 2.5553, "step": 786 }, { "epoch": 0.03, "grad_norm": 0.6422218680381775, "learning_rate": 1.999534757734587e-05, "loss": 2.5838, "step": 787 }, { "epoch": 0.03, "grad_norm": 0.6715728044509888, "learning_rate": 1.999531510233552e-05, "loss": 2.6227, "step": 788 }, { "epoch": 0.03, "grad_norm": 0.6504473686218262, "learning_rate": 1.9995282514403965e-05, "loss": 2.5664, "step": 789 }, { "epoch": 0.03, "grad_norm": 0.649907648563385, "learning_rate": 1.9995249813551566e-05, "loss": 2.5876, "step": 790 }, { "epoch": 0.03, "grad_norm": 0.6391369104385376, "learning_rate": 1.999521699977869e-05, "loss": 2.6225, "step": 791 }, { "epoch": 0.03, "grad_norm": 0.6849904656410217, "learning_rate": 1.999518407308571e-05, "loss": 2.6067, "step": 792 }, { "epoch": 0.03, "grad_norm": 0.6667712926864624, "learning_rate": 1.9995151033472998e-05, "loss": 2.5911, "step": 793 }, { "epoch": 0.03, "grad_norm": 0.6548218727111816, "learning_rate": 1.9995117880940927e-05, "loss": 2.5405, "step": 794 }, { "epoch": 0.03, "grad_norm": 0.6442228555679321, "learning_rate": 1.999508461548987e-05, "loss": 2.5827, "step": 795 }, { "epoch": 0.03, "grad_norm": 0.6406388878822327, "learning_rate": 1.999505123712021e-05, "loss": 2.5889, "step": 796 }, { "epoch": 0.03, "grad_norm": 0.6656167507171631, "learning_rate": 1.9995017745832318e-05, "loss": 2.5114, "step": 797 }, { "epoch": 0.03, "grad_norm": 0.6376672387123108, "learning_rate": 1.9994984141626572e-05, "loss": 2.5279, "step": 798 }, { "epoch": 0.03, "grad_norm": 0.6719859838485718, "learning_rate": 1.999495042450335e-05, "loss": 2.6545, "step": 799 }, { "epoch": 0.03, "grad_norm": 0.6306279897689819, "learning_rate": 1.9994916594463038e-05, "loss": 2.5553, "step": 800 }, { "epoch": 0.03, "grad_norm": 0.6618303656578064, "learning_rate": 1.9994882651506015e-05, "loss": 2.6115, "step": 801 }, { "epoch": 0.03, "grad_norm": 0.6502376794815063, "learning_rate": 1.999484859563267e-05, "loss": 2.5675, "step": 802 }, { "epoch": 0.03, "grad_norm": 0.6250829696655273, "learning_rate": 1.9994814426843377e-05, "loss": 2.575, "step": 803 }, { "epoch": 0.03, "grad_norm": 0.6839468479156494, "learning_rate": 1.999478014513853e-05, "loss": 2.6039, "step": 804 }, { "epoch": 0.03, "grad_norm": 0.6998984813690186, "learning_rate": 1.9994745750518517e-05, "loss": 2.573, "step": 805 }, { "epoch": 0.03, "grad_norm": 0.6413207650184631, "learning_rate": 1.999471124298372e-05, "loss": 2.6052, "step": 806 }, { "epoch": 0.03, "grad_norm": 0.6643601655960083, "learning_rate": 1.9994676622534537e-05, "loss": 2.5425, "step": 807 }, { "epoch": 0.03, "grad_norm": 0.6712691187858582, "learning_rate": 1.999464188917135e-05, "loss": 2.6618, "step": 808 }, { "epoch": 0.03, "grad_norm": 0.7033228874206543, "learning_rate": 1.999460704289456e-05, "loss": 2.6179, "step": 809 }, { "epoch": 0.03, "grad_norm": 0.6693732738494873, "learning_rate": 1.9994572083704558e-05, "loss": 2.5589, "step": 810 }, { "epoch": 0.03, "grad_norm": 0.6438301205635071, "learning_rate": 1.9994537011601737e-05, "loss": 2.561, "step": 811 }, { "epoch": 0.03, "grad_norm": 0.6705179214477539, "learning_rate": 1.9994501826586493e-05, "loss": 2.5712, "step": 812 }, { "epoch": 0.03, "grad_norm": 0.6689388751983643, "learning_rate": 1.9994466528659222e-05, "loss": 2.6007, "step": 813 }, { "epoch": 0.03, "grad_norm": 0.7577647566795349, "learning_rate": 1.999443111782033e-05, "loss": 2.5506, "step": 814 }, { "epoch": 0.03, "grad_norm": 0.6508129239082336, "learning_rate": 1.999439559407021e-05, "loss": 2.5662, "step": 815 }, { "epoch": 0.03, "grad_norm": 0.6562962532043457, "learning_rate": 1.999435995740927e-05, "loss": 2.5491, "step": 816 }, { "epoch": 0.03, "grad_norm": 0.6568298935890198, "learning_rate": 1.9994324207837902e-05, "loss": 2.5852, "step": 817 }, { "epoch": 0.03, "grad_norm": 0.6908994913101196, "learning_rate": 1.9994288345356522e-05, "loss": 2.567, "step": 818 }, { "epoch": 0.03, "grad_norm": 0.6363824009895325, "learning_rate": 1.9994252369965527e-05, "loss": 2.5956, "step": 819 }, { "epoch": 0.03, "grad_norm": 0.6722463369369507, "learning_rate": 1.9994216281665326e-05, "loss": 2.5892, "step": 820 }, { "epoch": 0.03, "grad_norm": 0.6953749060630798, "learning_rate": 1.9994180080456327e-05, "loss": 2.6019, "step": 821 }, { "epoch": 0.03, "grad_norm": 0.6519436836242676, "learning_rate": 1.9994143766338937e-05, "loss": 2.6338, "step": 822 }, { "epoch": 0.03, "grad_norm": 0.6399454474449158, "learning_rate": 1.999410733931357e-05, "loss": 2.5947, "step": 823 }, { "epoch": 0.03, "grad_norm": 0.6465761661529541, "learning_rate": 1.9994070799380636e-05, "loss": 2.5638, "step": 824 }, { "epoch": 0.03, "grad_norm": 0.6767170429229736, "learning_rate": 1.9994034146540544e-05, "loss": 2.5329, "step": 825 }, { "epoch": 0.03, "grad_norm": 0.6768900752067566, "learning_rate": 1.9993997380793714e-05, "loss": 2.5725, "step": 826 }, { "epoch": 0.03, "grad_norm": 0.6659291982650757, "learning_rate": 1.9993960502140558e-05, "loss": 2.5938, "step": 827 }, { "epoch": 0.03, "grad_norm": 0.6561067700386047, "learning_rate": 1.999392351058149e-05, "loss": 2.5791, "step": 828 }, { "epoch": 0.03, "grad_norm": 0.6598148941993713, "learning_rate": 1.9993886406116936e-05, "loss": 2.5582, "step": 829 }, { "epoch": 0.03, "grad_norm": 0.6995514631271362, "learning_rate": 1.9993849188747312e-05, "loss": 2.5651, "step": 830 }, { "epoch": 0.03, "grad_norm": 0.6523492932319641, "learning_rate": 1.999381185847303e-05, "loss": 2.5384, "step": 831 }, { "epoch": 0.03, "grad_norm": 0.661693274974823, "learning_rate": 1.9993774415294525e-05, "loss": 2.6182, "step": 832 }, { "epoch": 0.03, "grad_norm": 0.6545494198799133, "learning_rate": 1.999373685921221e-05, "loss": 2.5564, "step": 833 }, { "epoch": 0.03, "grad_norm": 0.6930851936340332, "learning_rate": 1.9993699190226516e-05, "loss": 2.6188, "step": 834 }, { "epoch": 0.03, "grad_norm": 0.655387818813324, "learning_rate": 1.999366140833786e-05, "loss": 2.5456, "step": 835 }, { "epoch": 0.03, "grad_norm": 0.65291827917099, "learning_rate": 1.9993623513546678e-05, "loss": 2.5705, "step": 836 }, { "epoch": 0.03, "grad_norm": 0.6709448099136353, "learning_rate": 1.9993585505853392e-05, "loss": 2.6113, "step": 837 }, { "epoch": 0.03, "grad_norm": 0.6598061323165894, "learning_rate": 1.9993547385258438e-05, "loss": 2.5802, "step": 838 }, { "epoch": 0.03, "grad_norm": 0.6680628657341003, "learning_rate": 1.999350915176224e-05, "loss": 2.5515, "step": 839 }, { "epoch": 0.03, "grad_norm": 0.7139716744422913, "learning_rate": 1.999347080536523e-05, "loss": 2.534, "step": 840 }, { "epoch": 0.03, "grad_norm": 0.7240101099014282, "learning_rate": 1.9993432346067848e-05, "loss": 2.634, "step": 841 }, { "epoch": 0.03, "grad_norm": 0.6930354237556458, "learning_rate": 1.9993393773870522e-05, "loss": 2.5582, "step": 842 }, { "epoch": 0.03, "grad_norm": 0.6855827569961548, "learning_rate": 1.999335508877369e-05, "loss": 2.604, "step": 843 }, { "epoch": 0.03, "grad_norm": 0.6531574130058289, "learning_rate": 1.9993316290777786e-05, "loss": 2.5969, "step": 844 }, { "epoch": 0.03, "grad_norm": 0.6608261466026306, "learning_rate": 1.9993277379883256e-05, "loss": 2.5502, "step": 845 }, { "epoch": 0.03, "grad_norm": 0.6478691101074219, "learning_rate": 1.999323835609053e-05, "loss": 2.5824, "step": 846 }, { "epoch": 0.03, "grad_norm": 0.6754261255264282, "learning_rate": 1.999319921940006e-05, "loss": 2.5495, "step": 847 }, { "epoch": 0.03, "grad_norm": 0.6677714586257935, "learning_rate": 1.999315996981228e-05, "loss": 2.5857, "step": 848 }, { "epoch": 0.03, "grad_norm": 0.6594604849815369, "learning_rate": 1.9993120607327632e-05, "loss": 2.5494, "step": 849 }, { "epoch": 0.03, "grad_norm": 0.736385703086853, "learning_rate": 1.9993081131946566e-05, "loss": 2.559, "step": 850 }, { "epoch": 0.03, "grad_norm": 0.6718853712081909, "learning_rate": 1.9993041543669522e-05, "loss": 2.6263, "step": 851 }, { "epoch": 0.03, "grad_norm": 0.6250438094139099, "learning_rate": 1.9993001842496956e-05, "loss": 2.5529, "step": 852 }, { "epoch": 0.03, "grad_norm": 0.6653687953948975, "learning_rate": 1.999296202842931e-05, "loss": 2.5763, "step": 853 }, { "epoch": 0.03, "grad_norm": 0.6824021339416504, "learning_rate": 1.9992922101467035e-05, "loss": 2.5404, "step": 854 }, { "epoch": 0.03, "grad_norm": 0.6717216968536377, "learning_rate": 1.9992882061610585e-05, "loss": 2.5815, "step": 855 }, { "epoch": 0.03, "grad_norm": 0.6455835700035095, "learning_rate": 1.9992841908860408e-05, "loss": 2.5123, "step": 856 }, { "epoch": 0.03, "grad_norm": 0.6545624136924744, "learning_rate": 1.9992801643216958e-05, "loss": 2.552, "step": 857 }, { "epoch": 0.03, "grad_norm": 0.6351718306541443, "learning_rate": 1.9992761264680692e-05, "loss": 2.5563, "step": 858 }, { "epoch": 0.03, "grad_norm": 0.6632485389709473, "learning_rate": 1.999272077325207e-05, "loss": 2.5478, "step": 859 }, { "epoch": 0.03, "grad_norm": 0.6408211588859558, "learning_rate": 1.9992680168931537e-05, "loss": 2.5527, "step": 860 }, { "epoch": 0.03, "grad_norm": 0.668522298336029, "learning_rate": 1.9992639451719563e-05, "loss": 2.5704, "step": 861 }, { "epoch": 0.03, "grad_norm": 0.6781651973724365, "learning_rate": 1.999259862161661e-05, "loss": 2.5559, "step": 862 }, { "epoch": 0.03, "grad_norm": 0.687554657459259, "learning_rate": 1.9992557678623127e-05, "loss": 2.528, "step": 863 }, { "epoch": 0.03, "grad_norm": 0.6585277318954468, "learning_rate": 1.9992516622739588e-05, "loss": 2.5532, "step": 864 }, { "epoch": 0.03, "grad_norm": 0.6704707145690918, "learning_rate": 1.9992475453966448e-05, "loss": 2.52, "step": 865 }, { "epoch": 0.03, "grad_norm": 0.6548302173614502, "learning_rate": 1.9992434172304182e-05, "loss": 2.6041, "step": 866 }, { "epoch": 0.03, "grad_norm": 0.6596145629882812, "learning_rate": 1.999239277775325e-05, "loss": 2.5099, "step": 867 }, { "epoch": 0.03, "grad_norm": 0.6465863585472107, "learning_rate": 1.9992351270314114e-05, "loss": 2.5177, "step": 868 }, { "epoch": 0.03, "grad_norm": 0.653800368309021, "learning_rate": 1.9992309649987256e-05, "loss": 2.5483, "step": 869 }, { "epoch": 0.03, "grad_norm": 0.6711927056312561, "learning_rate": 1.999226791677314e-05, "loss": 2.5558, "step": 870 }, { "epoch": 0.03, "grad_norm": 0.7025273442268372, "learning_rate": 1.9992226070672234e-05, "loss": 2.6125, "step": 871 }, { "epoch": 0.03, "grad_norm": 0.6411182284355164, "learning_rate": 1.9992184111685012e-05, "loss": 2.5495, "step": 872 }, { "epoch": 0.03, "grad_norm": 0.6790621876716614, "learning_rate": 1.9992142039811954e-05, "loss": 2.5984, "step": 873 }, { "epoch": 0.03, "grad_norm": 0.633189857006073, "learning_rate": 1.999209985505353e-05, "loss": 2.4997, "step": 874 }, { "epoch": 0.03, "grad_norm": 0.6589198708534241, "learning_rate": 1.9992057557410218e-05, "loss": 2.5168, "step": 875 }, { "epoch": 0.03, "grad_norm": 0.6354584097862244, "learning_rate": 1.9992015146882495e-05, "loss": 2.5859, "step": 876 }, { "epoch": 0.03, "grad_norm": 0.6384302973747253, "learning_rate": 1.999197262347084e-05, "loss": 2.5465, "step": 877 }, { "epoch": 0.03, "grad_norm": 0.6694098711013794, "learning_rate": 1.9991929987175734e-05, "loss": 2.5271, "step": 878 }, { "epoch": 0.03, "grad_norm": 0.658335268497467, "learning_rate": 1.9991887237997663e-05, "loss": 2.5177, "step": 879 }, { "epoch": 0.03, "grad_norm": 0.654478907585144, "learning_rate": 1.9991844375937103e-05, "loss": 2.5239, "step": 880 }, { "epoch": 0.03, "grad_norm": 0.6494770050048828, "learning_rate": 1.999180140099454e-05, "loss": 2.6151, "step": 881 }, { "epoch": 0.03, "grad_norm": 0.6490781307220459, "learning_rate": 1.999175831317046e-05, "loss": 2.5109, "step": 882 }, { "epoch": 0.03, "grad_norm": 0.6607816815376282, "learning_rate": 1.999171511246535e-05, "loss": 2.5087, "step": 883 }, { "epoch": 0.03, "grad_norm": 0.6606966853141785, "learning_rate": 1.9991671798879705e-05, "loss": 2.5071, "step": 884 }, { "epoch": 0.03, "grad_norm": 0.6823313236236572, "learning_rate": 1.9991628372414e-05, "loss": 2.6367, "step": 885 }, { "epoch": 0.03, "grad_norm": 0.6771349906921387, "learning_rate": 1.9991584833068738e-05, "loss": 2.569, "step": 886 }, { "epoch": 0.03, "grad_norm": 0.6409324407577515, "learning_rate": 1.9991541180844403e-05, "loss": 2.5392, "step": 887 }, { "epoch": 0.03, "grad_norm": 0.6323897242546082, "learning_rate": 1.9991497415741492e-05, "loss": 2.5465, "step": 888 }, { "epoch": 0.03, "grad_norm": 0.705708384513855, "learning_rate": 1.9991453537760498e-05, "loss": 2.5695, "step": 889 }, { "epoch": 0.03, "grad_norm": 0.6647756099700928, "learning_rate": 1.999140954690192e-05, "loss": 2.5555, "step": 890 }, { "epoch": 0.03, "grad_norm": 0.6726013422012329, "learning_rate": 1.9991365443166253e-05, "loss": 2.5399, "step": 891 }, { "epoch": 0.03, "grad_norm": 0.6620315909385681, "learning_rate": 1.9991321226553992e-05, "loss": 2.573, "step": 892 }, { "epoch": 0.03, "grad_norm": 0.674985408782959, "learning_rate": 1.9991276897065642e-05, "loss": 2.4754, "step": 893 }, { "epoch": 0.03, "grad_norm": 0.6528021097183228, "learning_rate": 1.99912324547017e-05, "loss": 2.5645, "step": 894 }, { "epoch": 0.03, "grad_norm": 0.6696645021438599, "learning_rate": 1.9991187899462668e-05, "loss": 2.5324, "step": 895 }, { "epoch": 0.03, "grad_norm": 0.6654466390609741, "learning_rate": 1.9991143231349052e-05, "loss": 2.5137, "step": 896 }, { "epoch": 0.03, "grad_norm": 0.7002142667770386, "learning_rate": 1.9991098450361354e-05, "loss": 2.5477, "step": 897 }, { "epoch": 0.03, "grad_norm": 0.6448867321014404, "learning_rate": 1.999105355650008e-05, "loss": 2.5726, "step": 898 }, { "epoch": 0.03, "grad_norm": 0.6595983505249023, "learning_rate": 1.999100854976574e-05, "loss": 2.544, "step": 899 }, { "epoch": 0.03, "grad_norm": 0.6471636295318604, "learning_rate": 1.999096343015884e-05, "loss": 2.5599, "step": 900 }, { "epoch": 0.03, "grad_norm": 0.669156551361084, "learning_rate": 1.9990918197679893e-05, "loss": 2.5373, "step": 901 }, { "epoch": 0.03, "grad_norm": 0.6504368782043457, "learning_rate": 1.9990872852329402e-05, "loss": 2.6072, "step": 902 }, { "epoch": 0.03, "grad_norm": 0.651648759841919, "learning_rate": 1.999082739410789e-05, "loss": 2.5806, "step": 903 }, { "epoch": 0.03, "grad_norm": 0.7247188687324524, "learning_rate": 1.9990781823015863e-05, "loss": 2.557, "step": 904 }, { "epoch": 0.03, "grad_norm": 0.7064061164855957, "learning_rate": 1.9990736139053838e-05, "loss": 2.5846, "step": 905 }, { "epoch": 0.03, "grad_norm": 0.6628711223602295, "learning_rate": 1.999069034222233e-05, "loss": 2.5903, "step": 906 }, { "epoch": 0.03, "grad_norm": 0.6578488349914551, "learning_rate": 1.9990644432521862e-05, "loss": 2.5834, "step": 907 }, { "epoch": 0.03, "grad_norm": 0.6611040234565735, "learning_rate": 1.9990598409952944e-05, "loss": 2.5381, "step": 908 }, { "epoch": 0.03, "grad_norm": 0.6613810062408447, "learning_rate": 1.9990552274516104e-05, "loss": 2.5014, "step": 909 }, { "epoch": 0.03, "grad_norm": 0.6636083722114563, "learning_rate": 1.9990506026211856e-05, "loss": 2.58, "step": 910 }, { "epoch": 0.03, "grad_norm": 0.6571870446205139, "learning_rate": 1.9990459665040728e-05, "loss": 2.5282, "step": 911 }, { "epoch": 0.03, "grad_norm": 0.7053223848342896, "learning_rate": 1.9990413191003243e-05, "loss": 2.5964, "step": 912 }, { "epoch": 0.03, "grad_norm": 0.6731041073799133, "learning_rate": 1.999036660409992e-05, "loss": 2.5569, "step": 913 }, { "epoch": 0.03, "grad_norm": 0.6329122185707092, "learning_rate": 1.9990319904331293e-05, "loss": 2.5011, "step": 914 }, { "epoch": 0.03, "grad_norm": 0.6895326972007751, "learning_rate": 1.9990273091697886e-05, "loss": 2.5158, "step": 915 }, { "epoch": 0.03, "grad_norm": 0.6591411828994751, "learning_rate": 1.999022616620023e-05, "loss": 2.5797, "step": 916 }, { "epoch": 0.03, "grad_norm": 0.637761116027832, "learning_rate": 1.9990179127838854e-05, "loss": 2.5037, "step": 917 }, { "epoch": 0.03, "grad_norm": 0.6519005298614502, "learning_rate": 1.9990131976614285e-05, "loss": 2.558, "step": 918 }, { "epoch": 0.03, "grad_norm": 0.6424410343170166, "learning_rate": 1.9990084712527063e-05, "loss": 2.5329, "step": 919 }, { "epoch": 0.03, "grad_norm": 0.6602208614349365, "learning_rate": 1.9990037335577718e-05, "loss": 2.6004, "step": 920 }, { "epoch": 0.03, "grad_norm": 0.6828067302703857, "learning_rate": 1.9989989845766785e-05, "loss": 2.5192, "step": 921 }, { "epoch": 0.03, "grad_norm": 0.6715533137321472, "learning_rate": 1.99899422430948e-05, "loss": 2.5238, "step": 922 }, { "epoch": 0.03, "grad_norm": 0.6458566784858704, "learning_rate": 1.9989894527562308e-05, "loss": 2.5603, "step": 923 }, { "epoch": 0.03, "grad_norm": 0.6470980048179626, "learning_rate": 1.998984669916984e-05, "loss": 2.5343, "step": 924 }, { "epoch": 0.03, "grad_norm": 0.6514043211936951, "learning_rate": 1.9989798757917936e-05, "loss": 2.5131, "step": 925 }, { "epoch": 0.03, "grad_norm": 0.6701914668083191, "learning_rate": 1.998975070380714e-05, "loss": 2.514, "step": 926 }, { "epoch": 0.03, "grad_norm": 0.6525755524635315, "learning_rate": 1.9989702536837998e-05, "loss": 2.5436, "step": 927 }, { "epoch": 0.03, "grad_norm": 0.6849528551101685, "learning_rate": 1.998965425701105e-05, "loss": 2.5781, "step": 928 }, { "epoch": 0.03, "grad_norm": 0.636716365814209, "learning_rate": 1.9989605864326842e-05, "loss": 2.5617, "step": 929 }, { "epoch": 0.03, "grad_norm": 0.6573632955551147, "learning_rate": 1.9989557358785923e-05, "loss": 2.5828, "step": 930 }, { "epoch": 0.03, "grad_norm": 0.6261988878250122, "learning_rate": 1.998950874038884e-05, "loss": 2.5312, "step": 931 }, { "epoch": 0.03, "grad_norm": 0.6581807732582092, "learning_rate": 1.9989460009136138e-05, "loss": 2.4843, "step": 932 }, { "epoch": 0.03, "grad_norm": 0.6432085037231445, "learning_rate": 1.9989411165028373e-05, "loss": 2.6105, "step": 933 }, { "epoch": 0.03, "grad_norm": 0.6609717011451721, "learning_rate": 1.9989362208066096e-05, "loss": 2.5045, "step": 934 }, { "epoch": 0.03, "grad_norm": 0.6460633277893066, "learning_rate": 1.998931313824986e-05, "loss": 2.5195, "step": 935 }, { "epoch": 0.03, "grad_norm": 0.651472270488739, "learning_rate": 1.9989263955580217e-05, "loss": 2.5869, "step": 936 }, { "epoch": 0.03, "grad_norm": 0.6696816682815552, "learning_rate": 1.9989214660057722e-05, "loss": 2.5588, "step": 937 }, { "epoch": 0.03, "grad_norm": 0.6702761650085449, "learning_rate": 1.9989165251682937e-05, "loss": 2.4629, "step": 938 }, { "epoch": 0.03, "grad_norm": 0.633421778678894, "learning_rate": 1.9989115730456415e-05, "loss": 2.5433, "step": 939 }, { "epoch": 0.03, "grad_norm": 0.6504607200622559, "learning_rate": 1.998906609637872e-05, "loss": 2.5454, "step": 940 }, { "epoch": 0.03, "grad_norm": 0.6157642602920532, "learning_rate": 1.9989016349450413e-05, "loss": 2.4893, "step": 941 }, { "epoch": 0.03, "grad_norm": 0.6734313368797302, "learning_rate": 1.998896648967205e-05, "loss": 2.533, "step": 942 }, { "epoch": 0.03, "grad_norm": 0.7046616673469543, "learning_rate": 1.9988916517044193e-05, "loss": 2.5584, "step": 943 }, { "epoch": 0.03, "grad_norm": 0.6411042809486389, "learning_rate": 1.9988866431567418e-05, "loss": 2.5767, "step": 944 }, { "epoch": 0.03, "grad_norm": 0.7102134227752686, "learning_rate": 1.9988816233242283e-05, "loss": 2.4594, "step": 945 }, { "epoch": 0.03, "grad_norm": 0.6810867190361023, "learning_rate": 1.998876592206935e-05, "loss": 2.5435, "step": 946 }, { "epoch": 0.03, "grad_norm": 0.6689637303352356, "learning_rate": 1.99887154980492e-05, "loss": 2.5045, "step": 947 }, { "epoch": 0.03, "grad_norm": 0.6038787364959717, "learning_rate": 1.99886649611824e-05, "loss": 2.5357, "step": 948 }, { "epoch": 0.03, "grad_norm": 0.6568419337272644, "learning_rate": 1.998861431146951e-05, "loss": 2.5547, "step": 949 }, { "epoch": 0.03, "grad_norm": 0.691541850566864, "learning_rate": 1.998856354891111e-05, "loss": 2.4988, "step": 950 }, { "epoch": 0.03, "grad_norm": 0.6747137904167175, "learning_rate": 1.9988512673507778e-05, "loss": 2.5228, "step": 951 }, { "epoch": 0.03, "grad_norm": 0.6712235808372498, "learning_rate": 1.998846168526008e-05, "loss": 2.5865, "step": 952 }, { "epoch": 0.03, "grad_norm": 0.6347312927246094, "learning_rate": 1.9988410584168595e-05, "loss": 2.5126, "step": 953 }, { "epoch": 0.03, "grad_norm": 0.6735199689865112, "learning_rate": 1.9988359370233904e-05, "loss": 2.5157, "step": 954 }, { "epoch": 0.03, "grad_norm": 0.6382014751434326, "learning_rate": 1.998830804345658e-05, "loss": 2.4868, "step": 955 }, { "epoch": 0.03, "grad_norm": 0.6418236494064331, "learning_rate": 1.9988256603837204e-05, "loss": 2.5458, "step": 956 }, { "epoch": 0.03, "grad_norm": 0.6546878814697266, "learning_rate": 1.9988205051376363e-05, "loss": 2.559, "step": 957 }, { "epoch": 0.03, "grad_norm": 0.6818661093711853, "learning_rate": 1.998815338607463e-05, "loss": 2.5724, "step": 958 }, { "epoch": 0.03, "grad_norm": 0.6850147247314453, "learning_rate": 1.9988101607932597e-05, "loss": 2.5586, "step": 959 }, { "epoch": 0.03, "grad_norm": 0.649859607219696, "learning_rate": 1.9988049716950845e-05, "loss": 2.5217, "step": 960 }, { "epoch": 0.03, "grad_norm": 0.6329843401908875, "learning_rate": 1.998799771312996e-05, "loss": 2.5878, "step": 961 }, { "epoch": 0.03, "grad_norm": 0.6516240239143372, "learning_rate": 1.998794559647053e-05, "loss": 2.5493, "step": 962 }, { "epoch": 0.03, "grad_norm": 0.6538608074188232, "learning_rate": 1.9987893366973145e-05, "loss": 2.5151, "step": 963 }, { "epoch": 0.03, "grad_norm": 0.6739526391029358, "learning_rate": 1.9987841024638397e-05, "loss": 2.5796, "step": 964 }, { "epoch": 0.03, "grad_norm": 0.6733515858650208, "learning_rate": 1.9987788569466873e-05, "loss": 2.53, "step": 965 }, { "epoch": 0.03, "grad_norm": 0.648504912853241, "learning_rate": 1.9987736001459167e-05, "loss": 2.5676, "step": 966 }, { "epoch": 0.03, "grad_norm": 0.6834846138954163, "learning_rate": 1.998768332061587e-05, "loss": 2.5821, "step": 967 }, { "epoch": 0.03, "grad_norm": 0.6586293578147888, "learning_rate": 1.9987630526937584e-05, "loss": 2.5652, "step": 968 }, { "epoch": 0.03, "grad_norm": 0.6561817526817322, "learning_rate": 1.99875776204249e-05, "loss": 2.5752, "step": 969 }, { "epoch": 0.03, "grad_norm": 0.6438115835189819, "learning_rate": 1.9987524601078415e-05, "loss": 2.5822, "step": 970 }, { "epoch": 0.03, "grad_norm": 0.6456990838050842, "learning_rate": 1.9987471468898732e-05, "loss": 2.5854, "step": 971 }, { "epoch": 0.03, "grad_norm": 0.6499293446540833, "learning_rate": 1.998741822388645e-05, "loss": 2.5406, "step": 972 }, { "epoch": 0.03, "grad_norm": 0.6462386846542358, "learning_rate": 1.9987364866042172e-05, "loss": 2.5013, "step": 973 }, { "epoch": 0.03, "grad_norm": 0.6346514821052551, "learning_rate": 1.9987311395366497e-05, "loss": 2.5343, "step": 974 }, { "epoch": 0.03, "grad_norm": 0.6457494497299194, "learning_rate": 1.9987257811860033e-05, "loss": 2.5178, "step": 975 }, { "epoch": 0.03, "grad_norm": 0.6581549644470215, "learning_rate": 1.9987204115523383e-05, "loss": 2.5252, "step": 976 }, { "epoch": 0.03, "grad_norm": 0.6944414973258972, "learning_rate": 1.9987150306357154e-05, "loss": 2.5317, "step": 977 }, { "epoch": 0.03, "grad_norm": 0.6609979271888733, "learning_rate": 1.998709638436195e-05, "loss": 2.4422, "step": 978 }, { "epoch": 0.03, "grad_norm": 0.6353131532669067, "learning_rate": 1.9987042349538386e-05, "loss": 2.5222, "step": 979 }, { "epoch": 0.03, "grad_norm": 0.6307664513587952, "learning_rate": 1.998698820188707e-05, "loss": 2.4825, "step": 980 }, { "epoch": 0.03, "grad_norm": 0.64664226770401, "learning_rate": 1.9986933941408617e-05, "loss": 2.5223, "step": 981 }, { "epoch": 0.03, "grad_norm": 0.6608175039291382, "learning_rate": 1.9986879568103635e-05, "loss": 2.5063, "step": 982 }, { "epoch": 0.03, "grad_norm": 0.6379284262657166, "learning_rate": 1.9986825081972743e-05, "loss": 2.4928, "step": 983 }, { "epoch": 0.03, "grad_norm": 0.649713397026062, "learning_rate": 1.998677048301655e-05, "loss": 2.497, "step": 984 }, { "epoch": 0.03, "grad_norm": 0.7069547772407532, "learning_rate": 1.998671577123568e-05, "loss": 2.5541, "step": 985 }, { "epoch": 0.03, "grad_norm": 0.6581557989120483, "learning_rate": 1.9986660946630747e-05, "loss": 2.5545, "step": 986 }, { "epoch": 0.03, "grad_norm": 0.6670517325401306, "learning_rate": 1.9986606009202372e-05, "loss": 2.5601, "step": 987 }, { "epoch": 0.03, "grad_norm": 0.6529316306114197, "learning_rate": 1.9986550958951173e-05, "loss": 2.5066, "step": 988 }, { "epoch": 0.03, "grad_norm": 0.649377167224884, "learning_rate": 1.9986495795877777e-05, "loss": 2.5401, "step": 989 }, { "epoch": 0.03, "grad_norm": 0.6688002347946167, "learning_rate": 1.99864405199828e-05, "loss": 2.566, "step": 990 }, { "epoch": 0.03, "grad_norm": 0.6878781318664551, "learning_rate": 1.9986385131266876e-05, "loss": 2.5301, "step": 991 }, { "epoch": 0.03, "grad_norm": 0.6727731823921204, "learning_rate": 1.9986329629730624e-05, "loss": 2.551, "step": 992 }, { "epoch": 0.03, "grad_norm": 0.7031287550926208, "learning_rate": 1.998627401537467e-05, "loss": 2.5642, "step": 993 }, { "epoch": 0.03, "grad_norm": 0.6427720189094543, "learning_rate": 1.9986218288199644e-05, "loss": 2.4915, "step": 994 }, { "epoch": 0.03, "grad_norm": 0.673617959022522, "learning_rate": 1.9986162448206177e-05, "loss": 2.5943, "step": 995 }, { "epoch": 0.03, "grad_norm": 0.642113983631134, "learning_rate": 1.9986106495394905e-05, "loss": 2.51, "step": 996 }, { "epoch": 0.03, "grad_norm": 0.7054376602172852, "learning_rate": 1.9986050429766447e-05, "loss": 2.4431, "step": 997 }, { "epoch": 0.03, "grad_norm": 0.6631945967674255, "learning_rate": 1.9985994251321447e-05, "loss": 2.5198, "step": 998 }, { "epoch": 0.03, "grad_norm": 0.6940524578094482, "learning_rate": 1.9985937960060535e-05, "loss": 2.5656, "step": 999 }, { "epoch": 0.03, "grad_norm": 0.654110312461853, "learning_rate": 1.998588155598435e-05, "loss": 2.4896, "step": 1000 }, { "epoch": 0.03, "grad_norm": 0.6796788573265076, "learning_rate": 1.9985825039093524e-05, "loss": 2.5385, "step": 1001 }, { "epoch": 0.03, "grad_norm": 0.662747859954834, "learning_rate": 1.9985768409388703e-05, "loss": 2.567, "step": 1002 }, { "epoch": 0.03, "grad_norm": 0.6902099251747131, "learning_rate": 1.998571166687052e-05, "loss": 2.5387, "step": 1003 }, { "epoch": 0.03, "grad_norm": 0.6432726979255676, "learning_rate": 1.998565481153962e-05, "loss": 2.4743, "step": 1004 }, { "epoch": 0.03, "grad_norm": 0.6323050856590271, "learning_rate": 1.9985597843396644e-05, "loss": 2.4914, "step": 1005 }, { "epoch": 0.03, "grad_norm": 0.6601359844207764, "learning_rate": 1.9985540762442237e-05, "loss": 2.4834, "step": 1006 }, { "epoch": 0.03, "grad_norm": 0.6630439758300781, "learning_rate": 1.998548356867704e-05, "loss": 2.5815, "step": 1007 }, { "epoch": 0.03, "grad_norm": 0.6577600240707397, "learning_rate": 1.9985426262101702e-05, "loss": 2.5185, "step": 1008 }, { "epoch": 0.03, "grad_norm": 0.6772057414054871, "learning_rate": 1.9985368842716873e-05, "loss": 2.49, "step": 1009 }, { "epoch": 0.03, "grad_norm": 0.6612805724143982, "learning_rate": 1.9985311310523193e-05, "loss": 2.5623, "step": 1010 }, { "epoch": 0.03, "grad_norm": 0.6473296880722046, "learning_rate": 1.9985253665521323e-05, "loss": 2.5419, "step": 1011 }, { "epoch": 0.03, "grad_norm": 0.6880604028701782, "learning_rate": 1.9985195907711907e-05, "loss": 2.5163, "step": 1012 }, { "epoch": 0.03, "grad_norm": 0.66536945104599, "learning_rate": 1.9985138037095598e-05, "loss": 2.5087, "step": 1013 }, { "epoch": 0.03, "grad_norm": 0.6414652466773987, "learning_rate": 1.9985080053673057e-05, "loss": 2.5284, "step": 1014 }, { "epoch": 0.03, "grad_norm": 0.6616004109382629, "learning_rate": 1.998502195744493e-05, "loss": 2.5418, "step": 1015 }, { "epoch": 0.03, "grad_norm": 0.6728946566581726, "learning_rate": 1.9984963748411873e-05, "loss": 2.4478, "step": 1016 }, { "epoch": 0.03, "grad_norm": 0.6659784317016602, "learning_rate": 1.9984905426574553e-05, "loss": 2.4466, "step": 1017 }, { "epoch": 0.03, "grad_norm": 0.6631350517272949, "learning_rate": 1.998484699193362e-05, "loss": 2.4974, "step": 1018 }, { "epoch": 0.03, "grad_norm": 0.7058129906654358, "learning_rate": 1.998478844448974e-05, "loss": 2.5615, "step": 1019 }, { "epoch": 0.03, "grad_norm": 0.6937713027000427, "learning_rate": 1.9984729784243572e-05, "loss": 2.5515, "step": 1020 }, { "epoch": 0.03, "grad_norm": 0.6276131272315979, "learning_rate": 1.9984671011195776e-05, "loss": 2.4617, "step": 1021 }, { "epoch": 0.03, "grad_norm": 0.6824151277542114, "learning_rate": 1.9984612125347017e-05, "loss": 2.4982, "step": 1022 }, { "epoch": 0.03, "grad_norm": 0.6697954535484314, "learning_rate": 1.9984553126697965e-05, "loss": 2.5127, "step": 1023 }, { "epoch": 0.03, "grad_norm": 0.6829949617385864, "learning_rate": 1.998449401524928e-05, "loss": 2.5176, "step": 1024 }, { "epoch": 0.03, "grad_norm": 0.6801559329032898, "learning_rate": 1.9984434791001637e-05, "loss": 2.4857, "step": 1025 }, { "epoch": 0.03, "grad_norm": 0.65267413854599, "learning_rate": 1.9984375453955703e-05, "loss": 2.5389, "step": 1026 }, { "epoch": 0.03, "grad_norm": 0.6562937498092651, "learning_rate": 1.998431600411214e-05, "loss": 2.5219, "step": 1027 }, { "epoch": 0.03, "grad_norm": 0.6441564559936523, "learning_rate": 1.998425644147163e-05, "loss": 2.5182, "step": 1028 }, { "epoch": 0.03, "grad_norm": 0.6421415209770203, "learning_rate": 1.9984196766034842e-05, "loss": 2.4975, "step": 1029 }, { "epoch": 0.03, "grad_norm": 0.6659252643585205, "learning_rate": 1.998413697780245e-05, "loss": 2.5263, "step": 1030 }, { "epoch": 0.03, "grad_norm": 0.6721411347389221, "learning_rate": 1.9984077076775132e-05, "loss": 2.4861, "step": 1031 }, { "epoch": 0.03, "grad_norm": 0.6564668416976929, "learning_rate": 1.9984017062953556e-05, "loss": 2.5109, "step": 1032 }, { "epoch": 0.03, "grad_norm": 0.6405675411224365, "learning_rate": 1.9983956936338413e-05, "loss": 2.5029, "step": 1033 }, { "epoch": 0.03, "grad_norm": 0.6317880749702454, "learning_rate": 1.998389669693037e-05, "loss": 2.4935, "step": 1034 }, { "epoch": 0.03, "grad_norm": 0.630470871925354, "learning_rate": 1.9983836344730116e-05, "loss": 2.5577, "step": 1035 }, { "epoch": 0.03, "grad_norm": 0.6827713847160339, "learning_rate": 1.998377587973833e-05, "loss": 2.4935, "step": 1036 }, { "epoch": 0.03, "grad_norm": 0.7336617112159729, "learning_rate": 1.9983715301955696e-05, "loss": 2.5107, "step": 1037 }, { "epoch": 0.03, "grad_norm": 0.6455040574073792, "learning_rate": 1.9983654611382897e-05, "loss": 2.4355, "step": 1038 }, { "epoch": 0.03, "grad_norm": 0.6398971676826477, "learning_rate": 1.998359380802062e-05, "loss": 2.5325, "step": 1039 }, { "epoch": 0.03, "grad_norm": 0.6680201292037964, "learning_rate": 1.998353289186955e-05, "loss": 2.5276, "step": 1040 }, { "epoch": 0.03, "grad_norm": 0.683854877948761, "learning_rate": 1.9983471862930377e-05, "loss": 2.4616, "step": 1041 }, { "epoch": 0.03, "grad_norm": 0.6794975996017456, "learning_rate": 1.998341072120379e-05, "loss": 2.5964, "step": 1042 }, { "epoch": 0.03, "grad_norm": 0.6380020976066589, "learning_rate": 1.9983349466690478e-05, "loss": 2.5364, "step": 1043 }, { "epoch": 0.03, "grad_norm": 0.6510722637176514, "learning_rate": 1.9983288099391137e-05, "loss": 2.5136, "step": 1044 }, { "epoch": 0.03, "grad_norm": 0.6584125757217407, "learning_rate": 1.9983226619306455e-05, "loss": 2.5509, "step": 1045 }, { "epoch": 0.03, "grad_norm": 0.6628699898719788, "learning_rate": 1.998316502643713e-05, "loss": 2.5028, "step": 1046 }, { "epoch": 0.03, "grad_norm": 0.6480531692504883, "learning_rate": 1.998310332078386e-05, "loss": 2.4796, "step": 1047 }, { "epoch": 0.03, "grad_norm": 0.6770830154418945, "learning_rate": 1.9983041502347337e-05, "loss": 2.5208, "step": 1048 }, { "epoch": 0.03, "grad_norm": 0.6712511777877808, "learning_rate": 1.998297957112826e-05, "loss": 2.5165, "step": 1049 }, { "epoch": 0.03, "grad_norm": 0.664597749710083, "learning_rate": 1.9982917527127334e-05, "loss": 2.4918, "step": 1050 }, { "epoch": 0.03, "grad_norm": 0.6505897641181946, "learning_rate": 1.9982855370345256e-05, "loss": 2.5114, "step": 1051 }, { "epoch": 0.04, "grad_norm": 0.6663317084312439, "learning_rate": 1.9982793100782727e-05, "loss": 2.5902, "step": 1052 }, { "epoch": 0.04, "grad_norm": 0.6359817981719971, "learning_rate": 1.998273071844045e-05, "loss": 2.5107, "step": 1053 }, { "epoch": 0.04, "grad_norm": 0.6670275330543518, "learning_rate": 1.9982668223319135e-05, "loss": 2.4613, "step": 1054 }, { "epoch": 0.04, "grad_norm": 0.6699387431144714, "learning_rate": 1.9982605615419483e-05, "loss": 2.5226, "step": 1055 }, { "epoch": 0.04, "grad_norm": 0.6547272801399231, "learning_rate": 1.9982542894742205e-05, "loss": 2.5363, "step": 1056 }, { "epoch": 0.04, "grad_norm": 0.6491037011146545, "learning_rate": 1.9982480061288007e-05, "loss": 2.5274, "step": 1057 }, { "epoch": 0.04, "grad_norm": 0.6636024713516235, "learning_rate": 1.9982417115057598e-05, "loss": 2.5055, "step": 1058 }, { "epoch": 0.04, "grad_norm": 0.7068822979927063, "learning_rate": 1.9982354056051695e-05, "loss": 2.5102, "step": 1059 }, { "epoch": 0.04, "grad_norm": 0.6638553738594055, "learning_rate": 1.9982290884271002e-05, "loss": 2.5082, "step": 1060 }, { "epoch": 0.04, "grad_norm": 0.6805009841918945, "learning_rate": 1.9982227599716237e-05, "loss": 2.5645, "step": 1061 }, { "epoch": 0.04, "grad_norm": 0.6925747990608215, "learning_rate": 1.9982164202388116e-05, "loss": 2.5329, "step": 1062 }, { "epoch": 0.04, "grad_norm": 0.7054527997970581, "learning_rate": 1.9982100692287356e-05, "loss": 2.5673, "step": 1063 }, { "epoch": 0.04, "grad_norm": 0.6304382681846619, "learning_rate": 1.998203706941467e-05, "loss": 2.4827, "step": 1064 }, { "epoch": 0.04, "grad_norm": 0.7037270069122314, "learning_rate": 1.998197333377078e-05, "loss": 2.5371, "step": 1065 }, { "epoch": 0.04, "grad_norm": 0.6439043879508972, "learning_rate": 1.9981909485356405e-05, "loss": 2.4485, "step": 1066 }, { "epoch": 0.04, "grad_norm": 0.6447720527648926, "learning_rate": 1.9981845524172264e-05, "loss": 2.5516, "step": 1067 }, { "epoch": 0.04, "grad_norm": 0.6577944755554199, "learning_rate": 1.9981781450219086e-05, "loss": 2.4957, "step": 1068 }, { "epoch": 0.04, "grad_norm": 0.6646751761436462, "learning_rate": 1.998171726349759e-05, "loss": 2.4641, "step": 1069 }, { "epoch": 0.04, "grad_norm": 0.6744557023048401, "learning_rate": 1.99816529640085e-05, "loss": 2.4677, "step": 1070 }, { "epoch": 0.04, "grad_norm": 0.6579509973526001, "learning_rate": 1.9981588551752548e-05, "loss": 2.4634, "step": 1071 }, { "epoch": 0.04, "grad_norm": 0.6456015706062317, "learning_rate": 1.9981524026730453e-05, "loss": 2.4869, "step": 1072 }, { "epoch": 0.04, "grad_norm": 0.6739247441291809, "learning_rate": 1.9981459388942957e-05, "loss": 2.5445, "step": 1073 }, { "epoch": 0.04, "grad_norm": 0.6222259402275085, "learning_rate": 1.9981394638390777e-05, "loss": 2.5074, "step": 1074 }, { "epoch": 0.04, "grad_norm": 0.6447417140007019, "learning_rate": 1.9981329775074653e-05, "loss": 2.4642, "step": 1075 }, { "epoch": 0.04, "grad_norm": 0.6405796408653259, "learning_rate": 1.9981264798995313e-05, "loss": 2.509, "step": 1076 }, { "epoch": 0.04, "grad_norm": 0.6870517134666443, "learning_rate": 1.9981199710153495e-05, "loss": 2.514, "step": 1077 }, { "epoch": 0.04, "grad_norm": 0.6250279545783997, "learning_rate": 1.998113450854993e-05, "loss": 2.5398, "step": 1078 }, { "epoch": 0.04, "grad_norm": 0.6689534187316895, "learning_rate": 1.9981069194185355e-05, "loss": 2.5051, "step": 1079 }, { "epoch": 0.04, "grad_norm": 0.6547778248786926, "learning_rate": 1.9981003767060513e-05, "loss": 2.4934, "step": 1080 }, { "epoch": 0.04, "grad_norm": 0.6595370769500732, "learning_rate": 1.998093822717614e-05, "loss": 2.4987, "step": 1081 }, { "epoch": 0.04, "grad_norm": 0.6451910138130188, "learning_rate": 1.9980872574532975e-05, "loss": 2.4533, "step": 1082 }, { "epoch": 0.04, "grad_norm": 0.6690686345100403, "learning_rate": 1.998080680913176e-05, "loss": 2.5395, "step": 1083 }, { "epoch": 0.04, "grad_norm": 0.6448755264282227, "learning_rate": 1.998074093097324e-05, "loss": 2.5186, "step": 1084 }, { "epoch": 0.04, "grad_norm": 0.6348246335983276, "learning_rate": 1.9980674940058163e-05, "loss": 2.5081, "step": 1085 }, { "epoch": 0.04, "grad_norm": 0.628466010093689, "learning_rate": 1.9980608836387263e-05, "loss": 2.4754, "step": 1086 }, { "epoch": 0.04, "grad_norm": 0.7128363847732544, "learning_rate": 1.9980542619961298e-05, "loss": 2.5362, "step": 1087 }, { "epoch": 0.04, "grad_norm": 0.6283236145973206, "learning_rate": 1.9980476290781007e-05, "loss": 2.5807, "step": 1088 }, { "epoch": 0.04, "grad_norm": 0.6560370326042175, "learning_rate": 1.998040984884715e-05, "loss": 2.5315, "step": 1089 }, { "epoch": 0.04, "grad_norm": 0.6526508927345276, "learning_rate": 1.998034329416047e-05, "loss": 2.5156, "step": 1090 }, { "epoch": 0.04, "grad_norm": 0.6681846976280212, "learning_rate": 1.9980276626721714e-05, "loss": 2.5672, "step": 1091 }, { "epoch": 0.04, "grad_norm": 0.6755825281143188, "learning_rate": 1.998020984653165e-05, "loss": 2.4959, "step": 1092 }, { "epoch": 0.04, "grad_norm": 0.6869197487831116, "learning_rate": 1.9980142953591017e-05, "loss": 2.4981, "step": 1093 }, { "epoch": 0.04, "grad_norm": 0.6513407826423645, "learning_rate": 1.9980075947900582e-05, "loss": 2.5005, "step": 1094 }, { "epoch": 0.04, "grad_norm": 0.6888585686683655, "learning_rate": 1.9980008829461097e-05, "loss": 2.5683, "step": 1095 }, { "epoch": 0.04, "grad_norm": 0.6763022541999817, "learning_rate": 1.9979941598273323e-05, "loss": 2.5077, "step": 1096 }, { "epoch": 0.04, "grad_norm": 0.6463805437088013, "learning_rate": 1.9979874254338014e-05, "loss": 2.5235, "step": 1097 }, { "epoch": 0.04, "grad_norm": 0.6434422731399536, "learning_rate": 1.997980679765594e-05, "loss": 2.5124, "step": 1098 }, { "epoch": 0.04, "grad_norm": 0.6373206973075867, "learning_rate": 1.9979739228227852e-05, "loss": 2.4345, "step": 1099 }, { "epoch": 0.04, "grad_norm": 0.6587280035018921, "learning_rate": 1.9979671546054523e-05, "loss": 2.5792, "step": 1100 }, { "epoch": 0.04, "grad_norm": 0.7423656582832336, "learning_rate": 1.9979603751136708e-05, "loss": 2.4723, "step": 1101 }, { "epoch": 0.04, "grad_norm": 0.6750621795654297, "learning_rate": 1.9979535843475185e-05, "loss": 2.5118, "step": 1102 }, { "epoch": 0.04, "grad_norm": 0.687773585319519, "learning_rate": 1.997946782307071e-05, "loss": 2.5273, "step": 1103 }, { "epoch": 0.04, "grad_norm": 0.6503051519393921, "learning_rate": 1.9979399689924058e-05, "loss": 2.4664, "step": 1104 }, { "epoch": 0.04, "grad_norm": 0.6419747471809387, "learning_rate": 1.9979331444035998e-05, "loss": 2.535, "step": 1105 }, { "epoch": 0.04, "grad_norm": 0.664393961429596, "learning_rate": 1.9979263085407297e-05, "loss": 2.5013, "step": 1106 }, { "epoch": 0.04, "grad_norm": 0.6789538860321045, "learning_rate": 1.997919461403873e-05, "loss": 2.5463, "step": 1107 }, { "epoch": 0.04, "grad_norm": 0.6544622778892517, "learning_rate": 1.9979126029931072e-05, "loss": 2.4452, "step": 1108 }, { "epoch": 0.04, "grad_norm": 0.6510677337646484, "learning_rate": 1.9979057333085097e-05, "loss": 2.5176, "step": 1109 }, { "epoch": 0.04, "grad_norm": 0.6380664110183716, "learning_rate": 1.9978988523501578e-05, "loss": 2.5324, "step": 1110 }, { "epoch": 0.04, "grad_norm": 0.6380454897880554, "learning_rate": 1.99789196011813e-05, "loss": 2.4898, "step": 1111 }, { "epoch": 0.04, "grad_norm": 0.6302733421325684, "learning_rate": 1.997885056612503e-05, "loss": 2.51, "step": 1112 }, { "epoch": 0.04, "grad_norm": 0.6730328798294067, "learning_rate": 1.997878141833356e-05, "loss": 2.5229, "step": 1113 }, { "epoch": 0.04, "grad_norm": 0.6901289820671082, "learning_rate": 1.997871215780766e-05, "loss": 2.4794, "step": 1114 }, { "epoch": 0.04, "grad_norm": 0.6590227484703064, "learning_rate": 1.9978642784548127e-05, "loss": 2.5325, "step": 1115 }, { "epoch": 0.04, "grad_norm": 0.6847108602523804, "learning_rate": 1.997857329855573e-05, "loss": 2.4953, "step": 1116 }, { "epoch": 0.04, "grad_norm": 0.7078675031661987, "learning_rate": 1.997850369983126e-05, "loss": 2.5027, "step": 1117 }, { "epoch": 0.04, "grad_norm": 0.6547713279724121, "learning_rate": 1.9978433988375504e-05, "loss": 2.4926, "step": 1118 }, { "epoch": 0.04, "grad_norm": 0.6798385977745056, "learning_rate": 1.997836416418925e-05, "loss": 2.4743, "step": 1119 }, { "epoch": 0.04, "grad_norm": 0.6295072436332703, "learning_rate": 1.9978294227273283e-05, "loss": 2.5097, "step": 1120 }, { "epoch": 0.04, "grad_norm": 0.6345159411430359, "learning_rate": 1.9978224177628396e-05, "loss": 2.5039, "step": 1121 }, { "epoch": 0.04, "grad_norm": 0.622097909450531, "learning_rate": 1.997815401525538e-05, "loss": 2.4708, "step": 1122 }, { "epoch": 0.04, "grad_norm": 0.6950234174728394, "learning_rate": 1.997808374015503e-05, "loss": 2.4114, "step": 1123 }, { "epoch": 0.04, "grad_norm": 0.6793590188026428, "learning_rate": 1.9978013352328135e-05, "loss": 2.5618, "step": 1124 }, { "epoch": 0.04, "grad_norm": 0.6684526205062866, "learning_rate": 1.9977942851775497e-05, "loss": 2.5244, "step": 1125 }, { "epoch": 0.04, "grad_norm": 0.6539324522018433, "learning_rate": 1.9977872238497902e-05, "loss": 2.478, "step": 1126 }, { "epoch": 0.04, "grad_norm": 0.6612284779548645, "learning_rate": 1.9977801512496156e-05, "loss": 2.5333, "step": 1127 }, { "epoch": 0.04, "grad_norm": 0.6670842170715332, "learning_rate": 1.997773067377106e-05, "loss": 2.5114, "step": 1128 }, { "epoch": 0.04, "grad_norm": 0.7031385898590088, "learning_rate": 1.9977659722323407e-05, "loss": 2.5545, "step": 1129 }, { "epoch": 0.04, "grad_norm": 0.7332308888435364, "learning_rate": 1.9977588658154003e-05, "loss": 2.4543, "step": 1130 }, { "epoch": 0.04, "grad_norm": 0.6536996960639954, "learning_rate": 1.997751748126365e-05, "loss": 2.5495, "step": 1131 }, { "epoch": 0.04, "grad_norm": 0.6535505652427673, "learning_rate": 1.9977446191653153e-05, "loss": 2.4958, "step": 1132 }, { "epoch": 0.04, "grad_norm": 0.6539435386657715, "learning_rate": 1.997737478932331e-05, "loss": 2.5181, "step": 1133 }, { "epoch": 0.04, "grad_norm": 0.6357453465461731, "learning_rate": 1.997730327427494e-05, "loss": 2.5334, "step": 1134 }, { "epoch": 0.04, "grad_norm": 0.6689437031745911, "learning_rate": 1.9977231646508845e-05, "loss": 2.4705, "step": 1135 }, { "epoch": 0.04, "grad_norm": 0.6831153631210327, "learning_rate": 1.997715990602583e-05, "loss": 2.5026, "step": 1136 }, { "epoch": 0.04, "grad_norm": 0.6624339818954468, "learning_rate": 1.9977088052826713e-05, "loss": 2.4989, "step": 1137 }, { "epoch": 0.04, "grad_norm": 0.6393553018569946, "learning_rate": 1.99770160869123e-05, "loss": 2.5103, "step": 1138 }, { "epoch": 0.04, "grad_norm": 0.6625566482543945, "learning_rate": 1.997694400828341e-05, "loss": 2.501, "step": 1139 }, { "epoch": 0.04, "grad_norm": 0.6858672499656677, "learning_rate": 1.9976871816940854e-05, "loss": 2.5205, "step": 1140 }, { "epoch": 0.04, "grad_norm": 0.7091208100318909, "learning_rate": 1.997679951288544e-05, "loss": 2.4409, "step": 1141 }, { "epoch": 0.04, "grad_norm": 0.6620068550109863, "learning_rate": 1.9976727096117997e-05, "loss": 2.4803, "step": 1142 }, { "epoch": 0.04, "grad_norm": 0.643110990524292, "learning_rate": 1.9976654566639338e-05, "loss": 2.4967, "step": 1143 }, { "epoch": 0.04, "grad_norm": 0.686629593372345, "learning_rate": 1.9976581924450284e-05, "loss": 2.4724, "step": 1144 }, { "epoch": 0.04, "grad_norm": 0.650425136089325, "learning_rate": 1.9976509169551655e-05, "loss": 2.5077, "step": 1145 }, { "epoch": 0.04, "grad_norm": 0.6549974083900452, "learning_rate": 1.997643630194427e-05, "loss": 2.4616, "step": 1146 }, { "epoch": 0.04, "grad_norm": 0.6313915848731995, "learning_rate": 1.9976363321628957e-05, "loss": 2.4944, "step": 1147 }, { "epoch": 0.04, "grad_norm": 0.6537429094314575, "learning_rate": 1.997629022860653e-05, "loss": 2.492, "step": 1148 }, { "epoch": 0.04, "grad_norm": 0.6742879152297974, "learning_rate": 1.997621702287783e-05, "loss": 2.5183, "step": 1149 }, { "epoch": 0.04, "grad_norm": 0.6380449533462524, "learning_rate": 1.9976143704443676e-05, "loss": 2.5207, "step": 1150 }, { "epoch": 0.04, "grad_norm": 0.7004801034927368, "learning_rate": 1.9976070273304896e-05, "loss": 2.5485, "step": 1151 }, { "epoch": 0.04, "grad_norm": 0.7349691390991211, "learning_rate": 1.997599672946232e-05, "loss": 2.4807, "step": 1152 }, { "epoch": 0.04, "grad_norm": 0.650223970413208, "learning_rate": 1.997592307291678e-05, "loss": 2.4691, "step": 1153 }, { "epoch": 0.04, "grad_norm": 0.6400137543678284, "learning_rate": 1.9975849303669104e-05, "loss": 2.4726, "step": 1154 }, { "epoch": 0.04, "grad_norm": 0.6403338313102722, "learning_rate": 1.9975775421720135e-05, "loss": 2.4817, "step": 1155 }, { "epoch": 0.04, "grad_norm": 0.6457794308662415, "learning_rate": 1.99757014270707e-05, "loss": 2.5177, "step": 1156 }, { "epoch": 0.04, "grad_norm": 0.688164472579956, "learning_rate": 1.9975627319721635e-05, "loss": 2.5542, "step": 1157 }, { "epoch": 0.04, "grad_norm": 0.7092720866203308, "learning_rate": 1.997555309967378e-05, "loss": 2.5016, "step": 1158 }, { "epoch": 0.04, "grad_norm": 0.7378236651420593, "learning_rate": 1.9975478766927973e-05, "loss": 2.504, "step": 1159 }, { "epoch": 0.04, "grad_norm": 0.6497578024864197, "learning_rate": 1.997540432148505e-05, "loss": 2.4617, "step": 1160 }, { "epoch": 0.04, "grad_norm": 0.6363285183906555, "learning_rate": 1.997532976334586e-05, "loss": 2.5158, "step": 1161 }, { "epoch": 0.04, "grad_norm": 0.6313586831092834, "learning_rate": 1.9975255092511236e-05, "loss": 2.5007, "step": 1162 }, { "epoch": 0.04, "grad_norm": 0.6521604657173157, "learning_rate": 1.9975180308982027e-05, "loss": 2.5119, "step": 1163 }, { "epoch": 0.04, "grad_norm": 0.6570411324501038, "learning_rate": 1.997510541275908e-05, "loss": 2.4148, "step": 1164 }, { "epoch": 0.04, "grad_norm": 0.6731036305427551, "learning_rate": 1.9975030403843232e-05, "loss": 2.4901, "step": 1165 }, { "epoch": 0.04, "grad_norm": 0.6561059951782227, "learning_rate": 1.9974955282235343e-05, "loss": 2.4606, "step": 1166 }, { "epoch": 0.04, "grad_norm": 0.6508211493492126, "learning_rate": 1.997488004793625e-05, "loss": 2.5522, "step": 1167 }, { "epoch": 0.04, "grad_norm": 0.6997376680374146, "learning_rate": 1.997480470094681e-05, "loss": 2.5891, "step": 1168 }, { "epoch": 0.04, "grad_norm": 0.6416847109794617, "learning_rate": 1.9974729241267875e-05, "loss": 2.5059, "step": 1169 }, { "epoch": 0.04, "grad_norm": 0.6912742257118225, "learning_rate": 1.997465366890029e-05, "loss": 2.5393, "step": 1170 }, { "epoch": 0.04, "grad_norm": 0.74570232629776, "learning_rate": 1.9974577983844917e-05, "loss": 2.438, "step": 1171 }, { "epoch": 0.04, "grad_norm": 0.6463032364845276, "learning_rate": 1.9974502186102604e-05, "loss": 2.5287, "step": 1172 }, { "epoch": 0.04, "grad_norm": 0.6580987572669983, "learning_rate": 1.9974426275674216e-05, "loss": 2.4704, "step": 1173 }, { "epoch": 0.04, "grad_norm": 0.6640790700912476, "learning_rate": 1.9974350252560603e-05, "loss": 2.5309, "step": 1174 }, { "epoch": 0.04, "grad_norm": 0.71222323179245, "learning_rate": 1.9974274116762626e-05, "loss": 2.4359, "step": 1175 }, { "epoch": 0.04, "grad_norm": 0.6606084704399109, "learning_rate": 1.9974197868281146e-05, "loss": 2.4748, "step": 1176 }, { "epoch": 0.04, "grad_norm": 0.6555746793746948, "learning_rate": 1.9974121507117023e-05, "loss": 2.4552, "step": 1177 }, { "epoch": 0.04, "grad_norm": 0.6389197707176208, "learning_rate": 1.997404503327112e-05, "loss": 2.4947, "step": 1178 }, { "epoch": 0.04, "grad_norm": 0.6883360147476196, "learning_rate": 1.9973968446744304e-05, "loss": 2.4779, "step": 1179 }, { "epoch": 0.04, "grad_norm": 0.6988077759742737, "learning_rate": 1.9973891747537436e-05, "loss": 2.4605, "step": 1180 }, { "epoch": 0.04, "grad_norm": 0.6717812418937683, "learning_rate": 1.9973814935651384e-05, "loss": 2.526, "step": 1181 }, { "epoch": 0.04, "grad_norm": 0.686249315738678, "learning_rate": 1.997373801108702e-05, "loss": 2.5249, "step": 1182 }, { "epoch": 0.04, "grad_norm": 0.6526146531105042, "learning_rate": 1.9973660973845206e-05, "loss": 2.5213, "step": 1183 }, { "epoch": 0.04, "grad_norm": 0.6313891410827637, "learning_rate": 1.9973583823926815e-05, "loss": 2.5178, "step": 1184 }, { "epoch": 0.04, "grad_norm": 0.6542050242424011, "learning_rate": 1.997350656133272e-05, "loss": 2.5248, "step": 1185 }, { "epoch": 0.04, "grad_norm": 0.6645975112915039, "learning_rate": 1.9973429186063794e-05, "loss": 2.4744, "step": 1186 }, { "epoch": 0.04, "grad_norm": 0.7057430148124695, "learning_rate": 1.997335169812091e-05, "loss": 2.5526, "step": 1187 }, { "epoch": 0.04, "grad_norm": 0.6480329632759094, "learning_rate": 1.997327409750494e-05, "loss": 2.5371, "step": 1188 }, { "epoch": 0.04, "grad_norm": 0.6526750922203064, "learning_rate": 1.9973196384216767e-05, "loss": 2.489, "step": 1189 }, { "epoch": 0.04, "grad_norm": 0.6777902841567993, "learning_rate": 1.9973118558257267e-05, "loss": 2.511, "step": 1190 }, { "epoch": 0.04, "grad_norm": 0.652764081954956, "learning_rate": 1.9973040619627318e-05, "loss": 2.5249, "step": 1191 }, { "epoch": 0.04, "grad_norm": 0.650433361530304, "learning_rate": 1.9972962568327797e-05, "loss": 2.5095, "step": 1192 }, { "epoch": 0.04, "grad_norm": 0.6494837403297424, "learning_rate": 1.9972884404359593e-05, "loss": 2.5041, "step": 1193 }, { "epoch": 0.04, "grad_norm": 0.6666718125343323, "learning_rate": 1.9972806127723586e-05, "loss": 2.4341, "step": 1194 }, { "epoch": 0.04, "grad_norm": 0.6602668762207031, "learning_rate": 1.997272773842066e-05, "loss": 2.4594, "step": 1195 }, { "epoch": 0.04, "grad_norm": 0.6686780452728271, "learning_rate": 1.9972649236451697e-05, "loss": 2.4198, "step": 1196 }, { "epoch": 0.04, "grad_norm": 0.6318867802619934, "learning_rate": 1.997257062181759e-05, "loss": 2.5143, "step": 1197 }, { "epoch": 0.04, "grad_norm": 0.6604911088943481, "learning_rate": 1.9972491894519225e-05, "loss": 2.483, "step": 1198 }, { "epoch": 0.04, "grad_norm": 0.7131222486495972, "learning_rate": 1.997241305455749e-05, "loss": 2.4482, "step": 1199 }, { "epoch": 0.04, "grad_norm": 0.6666430234909058, "learning_rate": 1.9972334101933278e-05, "loss": 2.5161, "step": 1200 }, { "epoch": 0.04, "grad_norm": 0.6561883091926575, "learning_rate": 1.9972255036647482e-05, "loss": 2.4734, "step": 1201 }, { "epoch": 0.04, "grad_norm": 0.6368906497955322, "learning_rate": 1.997217585870099e-05, "loss": 2.448, "step": 1202 }, { "epoch": 0.04, "grad_norm": 0.6550028324127197, "learning_rate": 1.9972096568094698e-05, "loss": 2.4797, "step": 1203 }, { "epoch": 0.04, "grad_norm": 0.6599278450012207, "learning_rate": 1.9972017164829506e-05, "loss": 2.4667, "step": 1204 }, { "epoch": 0.04, "grad_norm": 0.6668063402175903, "learning_rate": 1.997193764890631e-05, "loss": 2.4909, "step": 1205 }, { "epoch": 0.04, "grad_norm": 0.6499751806259155, "learning_rate": 1.9971858020326002e-05, "loss": 2.5002, "step": 1206 }, { "epoch": 0.04, "grad_norm": 0.6999074220657349, "learning_rate": 1.9971778279089483e-05, "loss": 2.4121, "step": 1207 }, { "epoch": 0.04, "grad_norm": 0.6676874756813049, "learning_rate": 1.9971698425197666e-05, "loss": 2.4775, "step": 1208 }, { "epoch": 0.04, "grad_norm": 0.6871338486671448, "learning_rate": 1.9971618458651437e-05, "loss": 2.4924, "step": 1209 }, { "epoch": 0.04, "grad_norm": 0.6738062500953674, "learning_rate": 1.9971538379451713e-05, "loss": 2.4968, "step": 1210 }, { "epoch": 0.04, "grad_norm": 0.6423949599266052, "learning_rate": 1.9971458187599385e-05, "loss": 2.4529, "step": 1211 }, { "epoch": 0.04, "grad_norm": 0.6390488743782043, "learning_rate": 1.9971377883095372e-05, "loss": 2.4903, "step": 1212 }, { "epoch": 0.04, "grad_norm": 0.6854246258735657, "learning_rate": 1.9971297465940574e-05, "loss": 2.5195, "step": 1213 }, { "epoch": 0.04, "grad_norm": 0.67038893699646, "learning_rate": 1.99712169361359e-05, "loss": 2.5053, "step": 1214 }, { "epoch": 0.04, "grad_norm": 0.67030930519104, "learning_rate": 1.997113629368226e-05, "loss": 2.5108, "step": 1215 }, { "epoch": 0.04, "grad_norm": 0.65314781665802, "learning_rate": 1.997105553858057e-05, "loss": 2.4184, "step": 1216 }, { "epoch": 0.04, "grad_norm": 0.709152102470398, "learning_rate": 1.9970974670831732e-05, "loss": 2.5151, "step": 1217 }, { "epoch": 0.04, "grad_norm": 0.6381149291992188, "learning_rate": 1.997089369043667e-05, "loss": 2.5103, "step": 1218 }, { "epoch": 0.04, "grad_norm": 0.6892956495285034, "learning_rate": 1.9970812597396293e-05, "loss": 2.5183, "step": 1219 }, { "epoch": 0.04, "grad_norm": 0.7551789879798889, "learning_rate": 1.997073139171152e-05, "loss": 2.4912, "step": 1220 }, { "epoch": 0.04, "grad_norm": 0.6651291847229004, "learning_rate": 1.9970650073383267e-05, "loss": 2.4808, "step": 1221 }, { "epoch": 0.04, "grad_norm": 0.6543241739273071, "learning_rate": 1.997056864241245e-05, "loss": 2.5344, "step": 1222 }, { "epoch": 0.04, "grad_norm": 0.6531636118888855, "learning_rate": 1.99704870988e-05, "loss": 2.5213, "step": 1223 }, { "epoch": 0.04, "grad_norm": 0.6759975552558899, "learning_rate": 1.9970405442546822e-05, "loss": 2.4585, "step": 1224 }, { "epoch": 0.04, "grad_norm": 0.6605139374732971, "learning_rate": 1.9970323673653848e-05, "loss": 2.5745, "step": 1225 }, { "epoch": 0.04, "grad_norm": 0.7091869711875916, "learning_rate": 1.9970241792122e-05, "loss": 2.4767, "step": 1226 }, { "epoch": 0.04, "grad_norm": 0.6900755167007446, "learning_rate": 1.9970159797952204e-05, "loss": 2.5013, "step": 1227 }, { "epoch": 0.04, "grad_norm": 0.6499425768852234, "learning_rate": 1.9970077691145387e-05, "loss": 2.4593, "step": 1228 }, { "epoch": 0.04, "grad_norm": 0.6916418671607971, "learning_rate": 1.9969995471702474e-05, "loss": 2.5865, "step": 1229 }, { "epoch": 0.04, "grad_norm": 0.696540117263794, "learning_rate": 1.9969913139624398e-05, "loss": 2.4665, "step": 1230 }, { "epoch": 0.04, "grad_norm": 0.7007367610931396, "learning_rate": 1.9969830694912083e-05, "loss": 2.4568, "step": 1231 }, { "epoch": 0.04, "grad_norm": 0.6523316502571106, "learning_rate": 1.9969748137566466e-05, "loss": 2.5282, "step": 1232 }, { "epoch": 0.04, "grad_norm": 0.6738514304161072, "learning_rate": 1.9969665467588473e-05, "loss": 2.5556, "step": 1233 }, { "epoch": 0.04, "grad_norm": 0.6605009436607361, "learning_rate": 1.9969582684979044e-05, "loss": 2.5033, "step": 1234 }, { "epoch": 0.04, "grad_norm": 0.6775720119476318, "learning_rate": 1.9969499789739117e-05, "loss": 2.4566, "step": 1235 }, { "epoch": 0.04, "grad_norm": 0.6666238307952881, "learning_rate": 1.9969416781869618e-05, "loss": 2.4371, "step": 1236 }, { "epoch": 0.04, "grad_norm": 0.7366918921470642, "learning_rate": 1.9969333661371496e-05, "loss": 2.4109, "step": 1237 }, { "epoch": 0.04, "grad_norm": 0.6864652633666992, "learning_rate": 1.996925042824568e-05, "loss": 2.5461, "step": 1238 }, { "epoch": 0.04, "grad_norm": 0.6476344466209412, "learning_rate": 1.996916708249312e-05, "loss": 2.4926, "step": 1239 }, { "epoch": 0.04, "grad_norm": 0.6725216507911682, "learning_rate": 1.996908362411475e-05, "loss": 2.4423, "step": 1240 }, { "epoch": 0.04, "grad_norm": 0.6774707436561584, "learning_rate": 1.996900005311152e-05, "loss": 2.4367, "step": 1241 }, { "epoch": 0.04, "grad_norm": 0.6803374886512756, "learning_rate": 1.9968916369484362e-05, "loss": 2.3583, "step": 1242 }, { "epoch": 0.04, "grad_norm": 0.6821350455284119, "learning_rate": 1.9968832573234235e-05, "loss": 2.4773, "step": 1243 }, { "epoch": 0.04, "grad_norm": 0.6509692668914795, "learning_rate": 1.9968748664362078e-05, "loss": 2.4623, "step": 1244 }, { "epoch": 0.04, "grad_norm": 0.6849445104598999, "learning_rate": 1.996866464286884e-05, "loss": 2.5288, "step": 1245 }, { "epoch": 0.04, "grad_norm": 0.6803519129753113, "learning_rate": 1.9968580508755472e-05, "loss": 2.5977, "step": 1246 }, { "epoch": 0.04, "grad_norm": 0.6328503489494324, "learning_rate": 1.9968496262022926e-05, "loss": 2.4331, "step": 1247 }, { "epoch": 0.04, "grad_norm": 0.6332231760025024, "learning_rate": 1.996841190267215e-05, "loss": 2.4471, "step": 1248 }, { "epoch": 0.04, "grad_norm": 0.6625400185585022, "learning_rate": 1.9968327430704094e-05, "loss": 2.4662, "step": 1249 }, { "epoch": 0.04, "grad_norm": 0.6836045384407043, "learning_rate": 1.996824284611972e-05, "loss": 2.5067, "step": 1250 }, { "epoch": 0.04, "grad_norm": 0.6243551969528198, "learning_rate": 1.9968158148919984e-05, "loss": 2.4404, "step": 1251 }, { "epoch": 0.04, "grad_norm": 0.6712891459465027, "learning_rate": 1.9968073339105834e-05, "loss": 2.4645, "step": 1252 }, { "epoch": 0.04, "grad_norm": 0.6650150418281555, "learning_rate": 1.9967988416678234e-05, "loss": 2.4724, "step": 1253 }, { "epoch": 0.04, "grad_norm": 0.688060462474823, "learning_rate": 1.9967903381638145e-05, "loss": 2.4722, "step": 1254 }, { "epoch": 0.04, "grad_norm": 0.6574703454971313, "learning_rate": 1.9967818233986524e-05, "loss": 2.4526, "step": 1255 }, { "epoch": 0.04, "grad_norm": 0.642695963382721, "learning_rate": 1.9967732973724335e-05, "loss": 2.4421, "step": 1256 }, { "epoch": 0.04, "grad_norm": 0.6508745551109314, "learning_rate": 1.996764760085254e-05, "loss": 2.492, "step": 1257 }, { "epoch": 0.04, "grad_norm": 0.6607152223587036, "learning_rate": 1.9967562115372102e-05, "loss": 2.5089, "step": 1258 }, { "epoch": 0.04, "grad_norm": 0.6321751475334167, "learning_rate": 1.996747651728399e-05, "loss": 2.5196, "step": 1259 }, { "epoch": 0.04, "grad_norm": 0.6504089832305908, "learning_rate": 1.9967390806589174e-05, "loss": 2.4672, "step": 1260 }, { "epoch": 0.04, "grad_norm": 0.6916740536689758, "learning_rate": 1.9967304983288614e-05, "loss": 2.5157, "step": 1261 }, { "epoch": 0.04, "grad_norm": 0.630699872970581, "learning_rate": 1.9967219047383283e-05, "loss": 2.4376, "step": 1262 }, { "epoch": 0.04, "grad_norm": 0.6660124659538269, "learning_rate": 1.9967132998874152e-05, "loss": 2.4542, "step": 1263 }, { "epoch": 0.04, "grad_norm": 0.6522161960601807, "learning_rate": 1.9967046837762195e-05, "loss": 2.517, "step": 1264 }, { "epoch": 0.04, "grad_norm": 0.6715566515922546, "learning_rate": 1.996696056404839e-05, "loss": 2.4071, "step": 1265 }, { "epoch": 0.04, "grad_norm": 0.6703926920890808, "learning_rate": 1.9966874177733698e-05, "loss": 2.4235, "step": 1266 }, { "epoch": 0.04, "grad_norm": 0.6756909489631653, "learning_rate": 1.9966787678819103e-05, "loss": 2.3735, "step": 1267 }, { "epoch": 0.04, "grad_norm": 0.6688052415847778, "learning_rate": 1.9966701067305586e-05, "loss": 2.3846, "step": 1268 }, { "epoch": 0.04, "grad_norm": 0.684777557849884, "learning_rate": 1.9966614343194116e-05, "loss": 2.525, "step": 1269 }, { "epoch": 0.04, "grad_norm": 0.6482384204864502, "learning_rate": 1.9966527506485685e-05, "loss": 2.4885, "step": 1270 }, { "epoch": 0.04, "grad_norm": 0.6748205423355103, "learning_rate": 1.9966440557181263e-05, "loss": 2.4762, "step": 1271 }, { "epoch": 0.04, "grad_norm": 0.6453205943107605, "learning_rate": 1.996635349528184e-05, "loss": 2.522, "step": 1272 }, { "epoch": 0.04, "grad_norm": 0.663126528263092, "learning_rate": 1.996626632078839e-05, "loss": 2.4957, "step": 1273 }, { "epoch": 0.04, "grad_norm": 0.657171905040741, "learning_rate": 1.9966179033701907e-05, "loss": 2.5044, "step": 1274 }, { "epoch": 0.04, "grad_norm": 0.638403058052063, "learning_rate": 1.9966091634023376e-05, "loss": 2.5283, "step": 1275 }, { "epoch": 0.04, "grad_norm": 0.6649624705314636, "learning_rate": 1.996600412175378e-05, "loss": 2.4927, "step": 1276 }, { "epoch": 0.04, "grad_norm": 0.6398544907569885, "learning_rate": 1.996591649689411e-05, "loss": 2.4976, "step": 1277 }, { "epoch": 0.04, "grad_norm": 0.6636454463005066, "learning_rate": 1.9965828759445357e-05, "loss": 2.4077, "step": 1278 }, { "epoch": 0.04, "grad_norm": 0.7010447978973389, "learning_rate": 1.996574090940851e-05, "loss": 2.4973, "step": 1279 }, { "epoch": 0.04, "grad_norm": 0.6408699154853821, "learning_rate": 1.9965652946784565e-05, "loss": 2.5336, "step": 1280 }, { "epoch": 0.04, "grad_norm": 0.6317563652992249, "learning_rate": 1.9965564871574513e-05, "loss": 2.4476, "step": 1281 }, { "epoch": 0.04, "grad_norm": 0.6677871346473694, "learning_rate": 1.996547668377935e-05, "loss": 2.5296, "step": 1282 }, { "epoch": 0.04, "grad_norm": 0.6846206784248352, "learning_rate": 1.996538838340007e-05, "loss": 2.4617, "step": 1283 }, { "epoch": 0.04, "grad_norm": 0.6472628712654114, "learning_rate": 1.996529997043767e-05, "loss": 2.5552, "step": 1284 }, { "epoch": 0.04, "grad_norm": 0.6841762065887451, "learning_rate": 1.9965211444893156e-05, "loss": 2.4944, "step": 1285 }, { "epoch": 0.04, "grad_norm": 0.6732828617095947, "learning_rate": 1.996512280676752e-05, "loss": 2.515, "step": 1286 }, { "epoch": 0.04, "grad_norm": 0.6832213997840881, "learning_rate": 1.996503405606177e-05, "loss": 2.5194, "step": 1287 }, { "epoch": 0.04, "grad_norm": 0.6445607542991638, "learning_rate": 1.99649451927769e-05, "loss": 2.455, "step": 1288 }, { "epoch": 0.04, "grad_norm": 0.689518928527832, "learning_rate": 1.9964856216913925e-05, "loss": 2.4003, "step": 1289 }, { "epoch": 0.04, "grad_norm": 0.6411250233650208, "learning_rate": 1.996476712847384e-05, "loss": 2.4268, "step": 1290 }, { "epoch": 0.04, "grad_norm": 0.6646576523780823, "learning_rate": 1.9964677927457658e-05, "loss": 2.5256, "step": 1291 }, { "epoch": 0.04, "grad_norm": 0.630534291267395, "learning_rate": 1.9964588613866385e-05, "loss": 2.434, "step": 1292 }, { "epoch": 0.04, "grad_norm": 0.6903734803199768, "learning_rate": 1.996449918770103e-05, "loss": 2.4734, "step": 1293 }, { "epoch": 0.04, "grad_norm": 0.6380398273468018, "learning_rate": 1.9964409648962603e-05, "loss": 2.4945, "step": 1294 }, { "epoch": 0.04, "grad_norm": 0.6500599384307861, "learning_rate": 1.9964319997652112e-05, "loss": 2.473, "step": 1295 }, { "epoch": 0.04, "grad_norm": 0.6738811731338501, "learning_rate": 1.9964230233770576e-05, "loss": 2.5539, "step": 1296 }, { "epoch": 0.04, "grad_norm": 0.6428767442703247, "learning_rate": 1.996414035731901e-05, "loss": 2.4458, "step": 1297 }, { "epoch": 0.04, "grad_norm": 0.6268296837806702, "learning_rate": 1.9964050368298417e-05, "loss": 2.4535, "step": 1298 }, { "epoch": 0.04, "grad_norm": 0.6365980505943298, "learning_rate": 1.996396026670983e-05, "loss": 2.4581, "step": 1299 }, { "epoch": 0.04, "grad_norm": 0.6351314783096313, "learning_rate": 1.9963870052554256e-05, "loss": 2.5259, "step": 1300 }, { "epoch": 0.04, "grad_norm": 0.6886266469955444, "learning_rate": 1.996377972583272e-05, "loss": 2.5028, "step": 1301 }, { "epoch": 0.04, "grad_norm": 0.6512261033058167, "learning_rate": 1.9963689286546238e-05, "loss": 2.5232, "step": 1302 }, { "epoch": 0.04, "grad_norm": 0.6695678234100342, "learning_rate": 1.9963598734695834e-05, "loss": 2.4975, "step": 1303 }, { "epoch": 0.04, "grad_norm": 0.6604126691818237, "learning_rate": 1.996350807028253e-05, "loss": 2.4152, "step": 1304 }, { "epoch": 0.04, "grad_norm": 0.6352373957633972, "learning_rate": 1.9963417293307353e-05, "loss": 2.4842, "step": 1305 }, { "epoch": 0.04, "grad_norm": 0.6646503806114197, "learning_rate": 1.9963326403771326e-05, "loss": 2.4807, "step": 1306 }, { "epoch": 0.04, "grad_norm": 0.6240456104278564, "learning_rate": 1.9963235401675476e-05, "loss": 2.4038, "step": 1307 }, { "epoch": 0.04, "grad_norm": 0.6661893129348755, "learning_rate": 1.996314428702083e-05, "loss": 2.4963, "step": 1308 }, { "epoch": 0.04, "grad_norm": 0.655264675617218, "learning_rate": 1.996305305980842e-05, "loss": 2.4834, "step": 1309 }, { "epoch": 0.04, "grad_norm": 0.696756899356842, "learning_rate": 1.9962961720039278e-05, "loss": 2.4746, "step": 1310 }, { "epoch": 0.04, "grad_norm": 0.7081061601638794, "learning_rate": 1.996287026771443e-05, "loss": 2.4903, "step": 1311 }, { "epoch": 0.04, "grad_norm": 0.657322883605957, "learning_rate": 1.9962778702834913e-05, "loss": 2.4367, "step": 1312 }, { "epoch": 0.04, "grad_norm": 0.7017743587493896, "learning_rate": 1.996268702540176e-05, "loss": 2.4314, "step": 1313 }, { "epoch": 0.04, "grad_norm": 0.6454604864120483, "learning_rate": 1.996259523541601e-05, "loss": 2.4836, "step": 1314 }, { "epoch": 0.04, "grad_norm": 0.6961708068847656, "learning_rate": 1.99625033328787e-05, "loss": 2.438, "step": 1315 }, { "epoch": 0.04, "grad_norm": 0.6980037093162537, "learning_rate": 1.996241131779086e-05, "loss": 2.4708, "step": 1316 }, { "epoch": 0.04, "grad_norm": 0.7056499123573303, "learning_rate": 1.996231919015354e-05, "loss": 2.4033, "step": 1317 }, { "epoch": 0.04, "grad_norm": 0.6699267625808716, "learning_rate": 1.9962226949967774e-05, "loss": 2.4356, "step": 1318 }, { "epoch": 0.04, "grad_norm": 0.6619434952735901, "learning_rate": 1.9962134597234606e-05, "loss": 2.4638, "step": 1319 }, { "epoch": 0.04, "grad_norm": 0.7110757827758789, "learning_rate": 1.9962042131955083e-05, "loss": 2.4656, "step": 1320 }, { "epoch": 0.04, "grad_norm": 0.742756187915802, "learning_rate": 1.9961949554130242e-05, "loss": 2.4659, "step": 1321 }, { "epoch": 0.04, "grad_norm": 0.6969943642616272, "learning_rate": 1.9961856863761135e-05, "loss": 2.4452, "step": 1322 }, { "epoch": 0.04, "grad_norm": 0.6458531022071838, "learning_rate": 1.9961764060848808e-05, "loss": 2.4329, "step": 1323 }, { "epoch": 0.04, "grad_norm": 0.6976578235626221, "learning_rate": 1.996167114539431e-05, "loss": 2.5283, "step": 1324 }, { "epoch": 0.04, "grad_norm": 0.6413078904151917, "learning_rate": 1.9961578117398687e-05, "loss": 2.4435, "step": 1325 }, { "epoch": 0.04, "grad_norm": 0.6445743441581726, "learning_rate": 1.996148497686299e-05, "loss": 2.4572, "step": 1326 }, { "epoch": 0.04, "grad_norm": 0.6960339546203613, "learning_rate": 1.9961391723788278e-05, "loss": 2.4592, "step": 1327 }, { "epoch": 0.04, "grad_norm": 0.6371101140975952, "learning_rate": 1.99612983581756e-05, "loss": 2.4695, "step": 1328 }, { "epoch": 0.04, "grad_norm": 0.6478706002235413, "learning_rate": 1.996120488002601e-05, "loss": 2.4022, "step": 1329 }, { "epoch": 0.04, "grad_norm": 0.6440830826759338, "learning_rate": 1.9961111289340567e-05, "loss": 2.4604, "step": 1330 }, { "epoch": 0.04, "grad_norm": 0.661811888217926, "learning_rate": 1.9961017586120325e-05, "loss": 2.4909, "step": 1331 }, { "epoch": 0.04, "grad_norm": 0.6538639068603516, "learning_rate": 1.9960923770366345e-05, "loss": 2.4333, "step": 1332 }, { "epoch": 0.04, "grad_norm": 0.6719698905944824, "learning_rate": 1.9960829842079685e-05, "loss": 2.5043, "step": 1333 }, { "epoch": 0.04, "grad_norm": 0.640352725982666, "learning_rate": 1.9960735801261407e-05, "loss": 2.5373, "step": 1334 }, { "epoch": 0.04, "grad_norm": 0.673474907875061, "learning_rate": 1.9960641647912573e-05, "loss": 2.4131, "step": 1335 }, { "epoch": 0.04, "grad_norm": 0.6996123790740967, "learning_rate": 1.9960547382034246e-05, "loss": 2.5014, "step": 1336 }, { "epoch": 0.04, "grad_norm": 0.6799312829971313, "learning_rate": 1.9960453003627494e-05, "loss": 2.4825, "step": 1337 }, { "epoch": 0.04, "grad_norm": 0.6481389999389648, "learning_rate": 1.9960358512693384e-05, "loss": 2.4664, "step": 1338 }, { "epoch": 0.04, "grad_norm": 0.6614128947257996, "learning_rate": 1.9960263909232977e-05, "loss": 2.4074, "step": 1339 }, { "epoch": 0.04, "grad_norm": 0.635061502456665, "learning_rate": 1.9960169193247346e-05, "loss": 2.5352, "step": 1340 }, { "epoch": 0.04, "grad_norm": 0.6372793316841125, "learning_rate": 1.9960074364737562e-05, "loss": 2.4957, "step": 1341 }, { "epoch": 0.04, "grad_norm": 0.6712480783462524, "learning_rate": 1.9959979423704692e-05, "loss": 2.4521, "step": 1342 }, { "epoch": 0.04, "grad_norm": 0.6700102686882019, "learning_rate": 1.9959884370149815e-05, "loss": 2.4668, "step": 1343 }, { "epoch": 0.04, "grad_norm": 0.6707836389541626, "learning_rate": 1.9959789204074e-05, "loss": 2.4943, "step": 1344 }, { "epoch": 0.04, "grad_norm": 0.6268095374107361, "learning_rate": 1.9959693925478324e-05, "loss": 2.4197, "step": 1345 }, { "epoch": 0.04, "grad_norm": 0.6622927784919739, "learning_rate": 1.9959598534363863e-05, "loss": 2.4782, "step": 1346 }, { "epoch": 0.04, "grad_norm": 0.6450578570365906, "learning_rate": 1.9959503030731696e-05, "loss": 2.4494, "step": 1347 }, { "epoch": 0.04, "grad_norm": 0.6368165016174316, "learning_rate": 1.9959407414582895e-05, "loss": 2.4482, "step": 1348 }, { "epoch": 0.04, "grad_norm": 0.6570246815681458, "learning_rate": 1.9959311685918553e-05, "loss": 2.4203, "step": 1349 }, { "epoch": 0.04, "grad_norm": 0.6869211196899414, "learning_rate": 1.9959215844739742e-05, "loss": 2.5324, "step": 1350 }, { "epoch": 0.04, "grad_norm": 0.7127034664154053, "learning_rate": 1.9959119891047546e-05, "loss": 2.5216, "step": 1351 }, { "epoch": 0.04, "grad_norm": 0.6510097980499268, "learning_rate": 1.995902382484305e-05, "loss": 2.4695, "step": 1352 }, { "epoch": 0.05, "grad_norm": 0.6794703602790833, "learning_rate": 1.995892764612734e-05, "loss": 2.4854, "step": 1353 }, { "epoch": 0.05, "grad_norm": 0.6507699489593506, "learning_rate": 1.99588313549015e-05, "loss": 2.3988, "step": 1354 }, { "epoch": 0.05, "grad_norm": 0.6657196283340454, "learning_rate": 1.9958734951166624e-05, "loss": 2.4094, "step": 1355 }, { "epoch": 0.05, "grad_norm": 0.6738486289978027, "learning_rate": 1.9958638434923795e-05, "loss": 2.4578, "step": 1356 }, { "epoch": 0.05, "grad_norm": 0.6376387476921082, "learning_rate": 1.99585418061741e-05, "loss": 2.4534, "step": 1357 }, { "epoch": 0.05, "grad_norm": 0.6589769124984741, "learning_rate": 1.9958445064918646e-05, "loss": 2.4137, "step": 1358 }, { "epoch": 0.05, "grad_norm": 0.649523138999939, "learning_rate": 1.995834821115851e-05, "loss": 2.459, "step": 1359 }, { "epoch": 0.05, "grad_norm": 0.6733065247535706, "learning_rate": 1.9958251244894793e-05, "loss": 2.5466, "step": 1360 }, { "epoch": 0.05, "grad_norm": 0.6583726406097412, "learning_rate": 1.9958154166128588e-05, "loss": 2.5025, "step": 1361 }, { "epoch": 0.05, "grad_norm": 0.6689596176147461, "learning_rate": 1.9958056974860994e-05, "loss": 2.3948, "step": 1362 }, { "epoch": 0.05, "grad_norm": 0.6504204273223877, "learning_rate": 1.9957959671093107e-05, "loss": 2.4677, "step": 1363 }, { "epoch": 0.05, "grad_norm": 0.6603761315345764, "learning_rate": 1.9957862254826034e-05, "loss": 2.439, "step": 1364 }, { "epoch": 0.05, "grad_norm": 0.655724048614502, "learning_rate": 1.9957764726060863e-05, "loss": 2.4584, "step": 1365 }, { "epoch": 0.05, "grad_norm": 0.6528634428977966, "learning_rate": 1.99576670847987e-05, "loss": 2.4153, "step": 1366 }, { "epoch": 0.05, "grad_norm": 0.6665694117546082, "learning_rate": 1.9957569331040658e-05, "loss": 2.5118, "step": 1367 }, { "epoch": 0.05, "grad_norm": 0.6297091245651245, "learning_rate": 1.9957471464787826e-05, "loss": 2.4325, "step": 1368 }, { "epoch": 0.05, "grad_norm": 0.7026760578155518, "learning_rate": 1.995737348604132e-05, "loss": 2.4842, "step": 1369 }, { "epoch": 0.05, "grad_norm": 0.6293516755104065, "learning_rate": 1.9957275394802244e-05, "loss": 2.4273, "step": 1370 }, { "epoch": 0.05, "grad_norm": 0.6752887964248657, "learning_rate": 1.995717719107171e-05, "loss": 2.4505, "step": 1371 }, { "epoch": 0.05, "grad_norm": 0.6465798616409302, "learning_rate": 1.9957078874850816e-05, "loss": 2.4412, "step": 1372 }, { "epoch": 0.05, "grad_norm": 0.6404193043708801, "learning_rate": 1.9956980446140687e-05, "loss": 2.5465, "step": 1373 }, { "epoch": 0.05, "grad_norm": 0.6800628304481506, "learning_rate": 1.995688190494242e-05, "loss": 2.4986, "step": 1374 }, { "epoch": 0.05, "grad_norm": 0.6417450904846191, "learning_rate": 1.9956783251257144e-05, "loss": 2.4614, "step": 1375 }, { "epoch": 0.05, "grad_norm": 0.671389102935791, "learning_rate": 1.9956684485085965e-05, "loss": 2.5032, "step": 1376 }, { "epoch": 0.05, "grad_norm": 0.659179151058197, "learning_rate": 1.995658560643e-05, "loss": 2.5366, "step": 1377 }, { "epoch": 0.05, "grad_norm": 0.6766591668128967, "learning_rate": 1.9956486615290363e-05, "loss": 2.5038, "step": 1378 }, { "epoch": 0.05, "grad_norm": 0.6527840495109558, "learning_rate": 1.9956387511668177e-05, "loss": 2.5286, "step": 1379 }, { "epoch": 0.05, "grad_norm": 0.6626148819923401, "learning_rate": 1.9956288295564562e-05, "loss": 2.4467, "step": 1380 }, { "epoch": 0.05, "grad_norm": 0.6345047354698181, "learning_rate": 1.9956188966980633e-05, "loss": 2.5187, "step": 1381 }, { "epoch": 0.05, "grad_norm": 0.6495223641395569, "learning_rate": 1.9956089525917516e-05, "loss": 2.3816, "step": 1382 }, { "epoch": 0.05, "grad_norm": 0.6508678793907166, "learning_rate": 1.9955989972376337e-05, "loss": 2.5325, "step": 1383 }, { "epoch": 0.05, "grad_norm": 0.6658614873886108, "learning_rate": 1.9955890306358216e-05, "loss": 2.4572, "step": 1384 }, { "epoch": 0.05, "grad_norm": 0.6279206275939941, "learning_rate": 1.9955790527864284e-05, "loss": 2.4017, "step": 1385 }, { "epoch": 0.05, "grad_norm": 0.6657488346099854, "learning_rate": 1.9955690636895663e-05, "loss": 2.4689, "step": 1386 }, { "epoch": 0.05, "grad_norm": 0.6805073022842407, "learning_rate": 1.9955590633453483e-05, "loss": 2.4432, "step": 1387 }, { "epoch": 0.05, "grad_norm": 0.6697499752044678, "learning_rate": 1.9955490517538877e-05, "loss": 2.4984, "step": 1388 }, { "epoch": 0.05, "grad_norm": 0.6372087597846985, "learning_rate": 1.995539028915297e-05, "loss": 2.4308, "step": 1389 }, { "epoch": 0.05, "grad_norm": 0.6644550561904907, "learning_rate": 1.99552899482969e-05, "loss": 2.433, "step": 1390 }, { "epoch": 0.05, "grad_norm": 0.6407074332237244, "learning_rate": 1.9955189494971802e-05, "loss": 2.4654, "step": 1391 }, { "epoch": 0.05, "grad_norm": 0.6997047662734985, "learning_rate": 1.9955088929178804e-05, "loss": 2.4656, "step": 1392 }, { "epoch": 0.05, "grad_norm": 0.6186793446540833, "learning_rate": 1.9954988250919045e-05, "loss": 2.3622, "step": 1393 }, { "epoch": 0.05, "grad_norm": 0.6754992604255676, "learning_rate": 1.9954887460193663e-05, "loss": 2.5099, "step": 1394 }, { "epoch": 0.05, "grad_norm": 0.6849030256271362, "learning_rate": 1.9954786557003797e-05, "loss": 2.5153, "step": 1395 }, { "epoch": 0.05, "grad_norm": 0.6449416279792786, "learning_rate": 1.9954685541350587e-05, "loss": 2.4798, "step": 1396 }, { "epoch": 0.05, "grad_norm": 0.6258751153945923, "learning_rate": 1.9954584413235174e-05, "loss": 2.4951, "step": 1397 }, { "epoch": 0.05, "grad_norm": 0.6558632254600525, "learning_rate": 1.9954483172658698e-05, "loss": 2.412, "step": 1398 }, { "epoch": 0.05, "grad_norm": 0.6475643515586853, "learning_rate": 1.995438181962231e-05, "loss": 2.5025, "step": 1399 }, { "epoch": 0.05, "grad_norm": 0.6989474296569824, "learning_rate": 1.9954280354127147e-05, "loss": 2.4506, "step": 1400 }, { "epoch": 0.05, "grad_norm": 0.6567366719245911, "learning_rate": 1.9954178776174358e-05, "loss": 2.4485, "step": 1401 }, { "epoch": 0.05, "grad_norm": 0.6459561586380005, "learning_rate": 1.9954077085765087e-05, "loss": 2.3865, "step": 1402 }, { "epoch": 0.05, "grad_norm": 0.6543406844139099, "learning_rate": 1.9953975282900494e-05, "loss": 2.4356, "step": 1403 }, { "epoch": 0.05, "grad_norm": 0.6842741370201111, "learning_rate": 1.995387336758172e-05, "loss": 2.416, "step": 1404 }, { "epoch": 0.05, "grad_norm": 0.6660739779472351, "learning_rate": 1.995377133980992e-05, "loss": 2.4348, "step": 1405 }, { "epoch": 0.05, "grad_norm": 0.6702792048454285, "learning_rate": 1.9953669199586238e-05, "loss": 2.5076, "step": 1406 }, { "epoch": 0.05, "grad_norm": 0.6631298661231995, "learning_rate": 1.995356694691184e-05, "loss": 2.4301, "step": 1407 }, { "epoch": 0.05, "grad_norm": 0.6334249377250671, "learning_rate": 1.9953464581787873e-05, "loss": 2.4175, "step": 1408 }, { "epoch": 0.05, "grad_norm": 0.6762251257896423, "learning_rate": 1.99533621042155e-05, "loss": 2.4747, "step": 1409 }, { "epoch": 0.05, "grad_norm": 0.6757739186286926, "learning_rate": 1.9953259514195872e-05, "loss": 2.4625, "step": 1410 }, { "epoch": 0.05, "grad_norm": 0.6670404672622681, "learning_rate": 1.9953156811730153e-05, "loss": 2.4713, "step": 1411 }, { "epoch": 0.05, "grad_norm": 0.6381444334983826, "learning_rate": 1.99530539968195e-05, "loss": 2.3485, "step": 1412 }, { "epoch": 0.05, "grad_norm": 0.7113683223724365, "learning_rate": 1.995295106946508e-05, "loss": 2.4354, "step": 1413 }, { "epoch": 0.05, "grad_norm": 0.6402413845062256, "learning_rate": 1.9952848029668045e-05, "loss": 2.4277, "step": 1414 }, { "epoch": 0.05, "grad_norm": 0.6554989218711853, "learning_rate": 1.995274487742957e-05, "loss": 2.4241, "step": 1415 }, { "epoch": 0.05, "grad_norm": 0.6495899558067322, "learning_rate": 1.995264161275082e-05, "loss": 2.4419, "step": 1416 }, { "epoch": 0.05, "grad_norm": 0.6453097462654114, "learning_rate": 1.995253823563295e-05, "loss": 2.4402, "step": 1417 }, { "epoch": 0.05, "grad_norm": 0.6719436049461365, "learning_rate": 1.9952434746077142e-05, "loss": 2.4673, "step": 1418 }, { "epoch": 0.05, "grad_norm": 0.6509451866149902, "learning_rate": 1.9952331144084557e-05, "loss": 2.4369, "step": 1419 }, { "epoch": 0.05, "grad_norm": 0.6536533236503601, "learning_rate": 1.9952227429656367e-05, "loss": 2.4723, "step": 1420 }, { "epoch": 0.05, "grad_norm": 0.6670294404029846, "learning_rate": 1.9952123602793746e-05, "loss": 2.4833, "step": 1421 }, { "epoch": 0.05, "grad_norm": 0.6520291566848755, "learning_rate": 1.995201966349786e-05, "loss": 2.445, "step": 1422 }, { "epoch": 0.05, "grad_norm": 0.6867606043815613, "learning_rate": 1.995191561176989e-05, "loss": 2.4425, "step": 1423 }, { "epoch": 0.05, "grad_norm": 0.6837676763534546, "learning_rate": 1.995181144761101e-05, "loss": 2.5294, "step": 1424 }, { "epoch": 0.05, "grad_norm": 0.6840876340866089, "learning_rate": 1.99517071710224e-05, "loss": 2.4623, "step": 1425 }, { "epoch": 0.05, "grad_norm": 0.6418316960334778, "learning_rate": 1.9951602782005234e-05, "loss": 2.4164, "step": 1426 }, { "epoch": 0.05, "grad_norm": 0.6555109620094299, "learning_rate": 1.995149828056069e-05, "loss": 2.355, "step": 1427 }, { "epoch": 0.05, "grad_norm": 0.697262704372406, "learning_rate": 1.995139366668995e-05, "loss": 2.432, "step": 1428 }, { "epoch": 0.05, "grad_norm": 0.684881865978241, "learning_rate": 1.9951288940394196e-05, "loss": 2.4478, "step": 1429 }, { "epoch": 0.05, "grad_norm": 0.6949005126953125, "learning_rate": 1.9951184101674615e-05, "loss": 2.4227, "step": 1430 }, { "epoch": 0.05, "grad_norm": 0.6596503257751465, "learning_rate": 1.9951079150532387e-05, "loss": 2.4358, "step": 1431 }, { "epoch": 0.05, "grad_norm": 0.6502809524536133, "learning_rate": 1.9950974086968696e-05, "loss": 2.4377, "step": 1432 }, { "epoch": 0.05, "grad_norm": 0.6620749831199646, "learning_rate": 1.9950868910984736e-05, "loss": 2.4739, "step": 1433 }, { "epoch": 0.05, "grad_norm": 0.6707249283790588, "learning_rate": 1.9950763622581685e-05, "loss": 2.478, "step": 1434 }, { "epoch": 0.05, "grad_norm": 0.6480233073234558, "learning_rate": 1.9950658221760744e-05, "loss": 2.4544, "step": 1435 }, { "epoch": 0.05, "grad_norm": 0.7066438794136047, "learning_rate": 1.9950552708523095e-05, "loss": 2.5056, "step": 1436 }, { "epoch": 0.05, "grad_norm": 0.6745548844337463, "learning_rate": 1.9950447082869932e-05, "loss": 2.4512, "step": 1437 }, { "epoch": 0.05, "grad_norm": 0.6837611198425293, "learning_rate": 1.9950341344802454e-05, "loss": 2.5036, "step": 1438 }, { "epoch": 0.05, "grad_norm": 0.7114278674125671, "learning_rate": 1.9950235494321845e-05, "loss": 2.4821, "step": 1439 }, { "epoch": 0.05, "grad_norm": 0.735874593257904, "learning_rate": 1.995012953142931e-05, "loss": 2.4204, "step": 1440 }, { "epoch": 0.05, "grad_norm": 0.704858124256134, "learning_rate": 1.9950023456126043e-05, "loss": 2.4549, "step": 1441 }, { "epoch": 0.05, "grad_norm": 0.6343608498573303, "learning_rate": 1.9949917268413245e-05, "loss": 2.3616, "step": 1442 }, { "epoch": 0.05, "grad_norm": 0.6664688587188721, "learning_rate": 1.9949810968292108e-05, "loss": 2.4674, "step": 1443 }, { "epoch": 0.05, "grad_norm": 0.6743026375770569, "learning_rate": 1.9949704555763837e-05, "loss": 2.448, "step": 1444 }, { "epoch": 0.05, "grad_norm": 0.6959954500198364, "learning_rate": 1.994959803082964e-05, "loss": 2.4971, "step": 1445 }, { "epoch": 0.05, "grad_norm": 0.6768322587013245, "learning_rate": 1.9949491393490712e-05, "loss": 2.4193, "step": 1446 }, { "epoch": 0.05, "grad_norm": 0.6748964190483093, "learning_rate": 1.9949384643748264e-05, "loss": 2.4642, "step": 1447 }, { "epoch": 0.05, "grad_norm": 0.6769373416900635, "learning_rate": 1.9949277781603497e-05, "loss": 2.4381, "step": 1448 }, { "epoch": 0.05, "grad_norm": 0.6413769125938416, "learning_rate": 1.9949170807057624e-05, "loss": 2.5033, "step": 1449 }, { "epoch": 0.05, "grad_norm": 0.6787489056587219, "learning_rate": 1.9949063720111844e-05, "loss": 2.4863, "step": 1450 }, { "epoch": 0.05, "grad_norm": 0.6813258528709412, "learning_rate": 1.9948956520767377e-05, "loss": 2.4412, "step": 1451 }, { "epoch": 0.05, "grad_norm": 0.6843349933624268, "learning_rate": 1.994884920902543e-05, "loss": 2.4557, "step": 1452 }, { "epoch": 0.05, "grad_norm": 0.6443434953689575, "learning_rate": 1.9948741784887214e-05, "loss": 2.4452, "step": 1453 }, { "epoch": 0.05, "grad_norm": 0.632203221321106, "learning_rate": 1.9948634248353943e-05, "loss": 2.4124, "step": 1454 }, { "epoch": 0.05, "grad_norm": 0.6675472259521484, "learning_rate": 1.9948526599426834e-05, "loss": 2.4227, "step": 1455 }, { "epoch": 0.05, "grad_norm": 0.6401450037956238, "learning_rate": 1.9948418838107103e-05, "loss": 2.364, "step": 1456 }, { "epoch": 0.05, "grad_norm": 0.6327877044677734, "learning_rate": 1.9948310964395963e-05, "loss": 2.4603, "step": 1457 }, { "epoch": 0.05, "grad_norm": 0.6701040267944336, "learning_rate": 1.9948202978294636e-05, "loss": 2.4826, "step": 1458 }, { "epoch": 0.05, "grad_norm": 0.7000370621681213, "learning_rate": 1.9948094879804344e-05, "loss": 2.4118, "step": 1459 }, { "epoch": 0.05, "grad_norm": 0.6528130173683167, "learning_rate": 1.994798666892631e-05, "loss": 2.4789, "step": 1460 }, { "epoch": 0.05, "grad_norm": 0.6773544549942017, "learning_rate": 1.9947878345661748e-05, "loss": 2.4532, "step": 1461 }, { "epoch": 0.05, "grad_norm": 0.6997448801994324, "learning_rate": 1.9947769910011887e-05, "loss": 2.4173, "step": 1462 }, { "epoch": 0.05, "grad_norm": 0.6456718444824219, "learning_rate": 1.994766136197795e-05, "loss": 2.4131, "step": 1463 }, { "epoch": 0.05, "grad_norm": 0.6528633832931519, "learning_rate": 1.9947552701561168e-05, "loss": 2.4492, "step": 1464 }, { "epoch": 0.05, "grad_norm": 0.6766114234924316, "learning_rate": 1.9947443928762766e-05, "loss": 2.4406, "step": 1465 }, { "epoch": 0.05, "grad_norm": 0.6753530502319336, "learning_rate": 1.9947335043583965e-05, "loss": 2.4842, "step": 1466 }, { "epoch": 0.05, "grad_norm": 0.6650490760803223, "learning_rate": 1.994722604602601e-05, "loss": 2.44, "step": 1467 }, { "epoch": 0.05, "grad_norm": 0.6383075714111328, "learning_rate": 1.994711693609012e-05, "loss": 2.4848, "step": 1468 }, { "epoch": 0.05, "grad_norm": 0.6608891487121582, "learning_rate": 1.9947007713777537e-05, "loss": 2.3905, "step": 1469 }, { "epoch": 0.05, "grad_norm": 0.7179611325263977, "learning_rate": 1.994689837908949e-05, "loss": 2.4612, "step": 1470 }, { "epoch": 0.05, "grad_norm": 0.6353431940078735, "learning_rate": 1.9946788932027212e-05, "loss": 2.366, "step": 1471 }, { "epoch": 0.05, "grad_norm": 0.6647564768791199, "learning_rate": 1.994667937259194e-05, "loss": 2.4345, "step": 1472 }, { "epoch": 0.05, "grad_norm": 0.6502447724342346, "learning_rate": 1.9946569700784918e-05, "loss": 2.4329, "step": 1473 }, { "epoch": 0.05, "grad_norm": 0.6676260828971863, "learning_rate": 1.994645991660738e-05, "loss": 2.4561, "step": 1474 }, { "epoch": 0.05, "grad_norm": 0.6598013043403625, "learning_rate": 1.9946350020060565e-05, "loss": 2.4406, "step": 1475 }, { "epoch": 0.05, "grad_norm": 0.64496248960495, "learning_rate": 1.9946240011145722e-05, "loss": 2.4118, "step": 1476 }, { "epoch": 0.05, "grad_norm": 0.6442719101905823, "learning_rate": 1.994612988986408e-05, "loss": 2.4802, "step": 1477 }, { "epoch": 0.05, "grad_norm": 0.6791317462921143, "learning_rate": 1.9946019656216896e-05, "loss": 2.4789, "step": 1478 }, { "epoch": 0.05, "grad_norm": 0.6613801717758179, "learning_rate": 1.9945909310205413e-05, "loss": 2.3991, "step": 1479 }, { "epoch": 0.05, "grad_norm": 0.6486839652061462, "learning_rate": 1.9945798851830873e-05, "loss": 2.4855, "step": 1480 }, { "epoch": 0.05, "grad_norm": 0.6682941317558289, "learning_rate": 1.994568828109453e-05, "loss": 2.5033, "step": 1481 }, { "epoch": 0.05, "grad_norm": 0.6302229762077332, "learning_rate": 1.9945577597997622e-05, "loss": 2.4025, "step": 1482 }, { "epoch": 0.05, "grad_norm": 0.6426867842674255, "learning_rate": 1.9945466802541415e-05, "loss": 2.4209, "step": 1483 }, { "epoch": 0.05, "grad_norm": 0.6459939479827881, "learning_rate": 1.9945355894727147e-05, "loss": 2.4595, "step": 1484 }, { "epoch": 0.05, "grad_norm": 0.7037394642829895, "learning_rate": 1.9945244874556077e-05, "loss": 2.4495, "step": 1485 }, { "epoch": 0.05, "grad_norm": 0.6541789174079895, "learning_rate": 1.994513374202946e-05, "loss": 2.4435, "step": 1486 }, { "epoch": 0.05, "grad_norm": 0.6893953084945679, "learning_rate": 1.9945022497148553e-05, "loss": 2.4323, "step": 1487 }, { "epoch": 0.05, "grad_norm": 0.6722946166992188, "learning_rate": 1.994491113991461e-05, "loss": 2.4677, "step": 1488 }, { "epoch": 0.05, "grad_norm": 0.7074438333511353, "learning_rate": 1.9944799670328888e-05, "loss": 2.4202, "step": 1489 }, { "epoch": 0.05, "grad_norm": 0.6615106463432312, "learning_rate": 1.9944688088392644e-05, "loss": 2.4234, "step": 1490 }, { "epoch": 0.05, "grad_norm": 0.6668723821640015, "learning_rate": 1.9944576394107144e-05, "loss": 2.3832, "step": 1491 }, { "epoch": 0.05, "grad_norm": 0.6496140956878662, "learning_rate": 1.994446458747365e-05, "loss": 2.4246, "step": 1492 }, { "epoch": 0.05, "grad_norm": 0.6784415245056152, "learning_rate": 1.9944352668493422e-05, "loss": 2.3942, "step": 1493 }, { "epoch": 0.05, "grad_norm": 0.697881817817688, "learning_rate": 1.9944240637167728e-05, "loss": 2.4847, "step": 1494 }, { "epoch": 0.05, "grad_norm": 0.6393730044364929, "learning_rate": 1.994412849349783e-05, "loss": 2.4267, "step": 1495 }, { "epoch": 0.05, "grad_norm": 0.6433627605438232, "learning_rate": 1.994401623748499e-05, "loss": 2.4117, "step": 1496 }, { "epoch": 0.05, "grad_norm": 0.7175366878509521, "learning_rate": 1.994390386913049e-05, "loss": 2.3979, "step": 1497 }, { "epoch": 0.05, "grad_norm": 0.6627487540245056, "learning_rate": 1.994379138843559e-05, "loss": 2.482, "step": 1498 }, { "epoch": 0.05, "grad_norm": 0.6880819201469421, "learning_rate": 1.994367879540156e-05, "loss": 2.4718, "step": 1499 }, { "epoch": 0.05, "grad_norm": 0.6599110960960388, "learning_rate": 1.9943566090029675e-05, "loss": 2.3975, "step": 1500 }, { "epoch": 0.05, "grad_norm": 0.66238933801651, "learning_rate": 1.994345327232121e-05, "loss": 2.4052, "step": 1501 }, { "epoch": 0.05, "grad_norm": 0.6406200528144836, "learning_rate": 1.9943340342277437e-05, "loss": 2.4193, "step": 1502 }, { "epoch": 0.05, "grad_norm": 0.643899142742157, "learning_rate": 1.994322729989963e-05, "loss": 2.4207, "step": 1503 }, { "epoch": 0.05, "grad_norm": 0.6523613929748535, "learning_rate": 1.994311414518907e-05, "loss": 2.3813, "step": 1504 }, { "epoch": 0.05, "grad_norm": 0.6831004023551941, "learning_rate": 1.9943000878147034e-05, "loss": 2.4456, "step": 1505 }, { "epoch": 0.05, "grad_norm": 0.6590916514396667, "learning_rate": 1.9942887498774798e-05, "loss": 2.4683, "step": 1506 }, { "epoch": 0.05, "grad_norm": 0.6561903953552246, "learning_rate": 1.994277400707365e-05, "loss": 2.4258, "step": 1507 }, { "epoch": 0.05, "grad_norm": 0.6884208917617798, "learning_rate": 1.994266040304487e-05, "loss": 2.4078, "step": 1508 }, { "epoch": 0.05, "grad_norm": 0.6622353792190552, "learning_rate": 1.9942546686689736e-05, "loss": 2.4797, "step": 1509 }, { "epoch": 0.05, "grad_norm": 0.6397408246994019, "learning_rate": 1.9942432858009537e-05, "loss": 2.4671, "step": 1510 }, { "epoch": 0.05, "grad_norm": 0.6506167650222778, "learning_rate": 1.994231891700556e-05, "loss": 2.4709, "step": 1511 }, { "epoch": 0.05, "grad_norm": 0.67271488904953, "learning_rate": 1.9942204863679087e-05, "loss": 2.4528, "step": 1512 }, { "epoch": 0.05, "grad_norm": 0.6726418137550354, "learning_rate": 1.9942090698031415e-05, "loss": 2.448, "step": 1513 }, { "epoch": 0.05, "grad_norm": 0.6408865451812744, "learning_rate": 1.9941976420063826e-05, "loss": 2.4428, "step": 1514 }, { "epoch": 0.05, "grad_norm": 0.6632372140884399, "learning_rate": 1.994186202977761e-05, "loss": 2.3638, "step": 1515 }, { "epoch": 0.05, "grad_norm": 0.6702383756637573, "learning_rate": 1.994174752717407e-05, "loss": 2.4671, "step": 1516 }, { "epoch": 0.05, "grad_norm": 0.6440345048904419, "learning_rate": 1.994163291225449e-05, "loss": 2.4225, "step": 1517 }, { "epoch": 0.05, "grad_norm": 0.6779958009719849, "learning_rate": 1.994151818502017e-05, "loss": 2.4266, "step": 1518 }, { "epoch": 0.05, "grad_norm": 0.6389851570129395, "learning_rate": 1.99414033454724e-05, "loss": 2.4749, "step": 1519 }, { "epoch": 0.05, "grad_norm": 0.6462379693984985, "learning_rate": 1.9941288393612482e-05, "loss": 2.4849, "step": 1520 }, { "epoch": 0.05, "grad_norm": 0.6789087653160095, "learning_rate": 1.9941173329441716e-05, "loss": 2.4594, "step": 1521 }, { "epoch": 0.05, "grad_norm": 0.7006807923316956, "learning_rate": 1.99410581529614e-05, "loss": 2.3854, "step": 1522 }, { "epoch": 0.05, "grad_norm": 0.6482889652252197, "learning_rate": 1.9940942864172834e-05, "loss": 2.4475, "step": 1523 }, { "epoch": 0.05, "grad_norm": 0.6787514686584473, "learning_rate": 1.9940827463077325e-05, "loss": 2.4828, "step": 1524 }, { "epoch": 0.05, "grad_norm": 0.6578748226165771, "learning_rate": 1.9940711949676166e-05, "loss": 2.4834, "step": 1525 }, { "epoch": 0.05, "grad_norm": 0.6645954847335815, "learning_rate": 1.9940596323970674e-05, "loss": 2.422, "step": 1526 }, { "epoch": 0.05, "grad_norm": 0.6895474195480347, "learning_rate": 1.9940480585962148e-05, "loss": 2.5313, "step": 1527 }, { "epoch": 0.05, "grad_norm": 0.6712403297424316, "learning_rate": 1.99403647356519e-05, "loss": 2.4299, "step": 1528 }, { "epoch": 0.05, "grad_norm": 0.655525267124176, "learning_rate": 1.994024877304124e-05, "loss": 2.3436, "step": 1529 }, { "epoch": 0.05, "grad_norm": 0.6689746379852295, "learning_rate": 1.994013269813147e-05, "loss": 2.4294, "step": 1530 }, { "epoch": 0.05, "grad_norm": 0.6445267200469971, "learning_rate": 1.9940016510923907e-05, "loss": 2.4486, "step": 1531 }, { "epoch": 0.05, "grad_norm": 0.6358187794685364, "learning_rate": 1.9939900211419863e-05, "loss": 2.423, "step": 1532 }, { "epoch": 0.05, "grad_norm": 0.6302741765975952, "learning_rate": 1.9939783799620653e-05, "loss": 2.3782, "step": 1533 }, { "epoch": 0.05, "grad_norm": 0.6615095138549805, "learning_rate": 1.993966727552759e-05, "loss": 2.4817, "step": 1534 }, { "epoch": 0.05, "grad_norm": 0.6813662052154541, "learning_rate": 1.993955063914199e-05, "loss": 2.3707, "step": 1535 }, { "epoch": 0.05, "grad_norm": 0.7147194743156433, "learning_rate": 1.9939433890465173e-05, "loss": 2.3969, "step": 1536 }, { "epoch": 0.05, "grad_norm": 0.6699428558349609, "learning_rate": 1.9939317029498458e-05, "loss": 2.4376, "step": 1537 }, { "epoch": 0.05, "grad_norm": 0.6649152040481567, "learning_rate": 1.9939200056243164e-05, "loss": 2.4177, "step": 1538 }, { "epoch": 0.05, "grad_norm": 0.6570271253585815, "learning_rate": 1.9939082970700612e-05, "loss": 2.3663, "step": 1539 }, { "epoch": 0.05, "grad_norm": 0.6725113987922668, "learning_rate": 1.9938965772872124e-05, "loss": 2.3496, "step": 1540 }, { "epoch": 0.05, "grad_norm": 0.6776495575904846, "learning_rate": 1.993884846275903e-05, "loss": 2.4243, "step": 1541 }, { "epoch": 0.05, "grad_norm": 0.6712349057197571, "learning_rate": 1.993873104036265e-05, "loss": 2.4676, "step": 1542 }, { "epoch": 0.05, "grad_norm": 0.7130674123764038, "learning_rate": 1.993861350568431e-05, "loss": 2.4445, "step": 1543 }, { "epoch": 0.05, "grad_norm": 0.7261738181114197, "learning_rate": 1.9938495858725334e-05, "loss": 2.4102, "step": 1544 }, { "epoch": 0.05, "grad_norm": 0.6606306433677673, "learning_rate": 1.993837809948706e-05, "loss": 2.458, "step": 1545 }, { "epoch": 0.05, "grad_norm": 0.7028478384017944, "learning_rate": 1.993826022797082e-05, "loss": 2.4384, "step": 1546 }, { "epoch": 0.05, "grad_norm": 0.6371980905532837, "learning_rate": 1.9938142244177935e-05, "loss": 2.4642, "step": 1547 }, { "epoch": 0.05, "grad_norm": 0.6604588627815247, "learning_rate": 1.9938024148109742e-05, "loss": 2.3999, "step": 1548 }, { "epoch": 0.05, "grad_norm": 0.6582524180412292, "learning_rate": 1.9937905939767583e-05, "loss": 2.422, "step": 1549 }, { "epoch": 0.05, "grad_norm": 0.6685971617698669, "learning_rate": 1.993778761915278e-05, "loss": 2.4648, "step": 1550 }, { "epoch": 0.05, "grad_norm": 0.6981533169746399, "learning_rate": 1.9937669186266678e-05, "loss": 2.4648, "step": 1551 }, { "epoch": 0.05, "grad_norm": 0.6834406852722168, "learning_rate": 1.9937550641110615e-05, "loss": 2.429, "step": 1552 }, { "epoch": 0.05, "grad_norm": 0.6810020804405212, "learning_rate": 1.993743198368593e-05, "loss": 2.4485, "step": 1553 }, { "epoch": 0.05, "grad_norm": 0.6478970646858215, "learning_rate": 1.9937313213993962e-05, "loss": 2.4009, "step": 1554 }, { "epoch": 0.05, "grad_norm": 0.6841230392456055, "learning_rate": 1.993719433203605e-05, "loss": 2.4762, "step": 1555 }, { "epoch": 0.05, "grad_norm": 0.6615786552429199, "learning_rate": 1.9937075337813545e-05, "loss": 2.4232, "step": 1556 }, { "epoch": 0.05, "grad_norm": 0.6891390085220337, "learning_rate": 1.9936956231327783e-05, "loss": 2.4517, "step": 1557 }, { "epoch": 0.05, "grad_norm": 0.6467688679695129, "learning_rate": 1.9936837012580113e-05, "loss": 2.4618, "step": 1558 }, { "epoch": 0.05, "grad_norm": 0.6353906393051147, "learning_rate": 1.9936717681571883e-05, "loss": 2.3032, "step": 1559 }, { "epoch": 0.05, "grad_norm": 0.662642240524292, "learning_rate": 1.9936598238304442e-05, "loss": 2.3919, "step": 1560 }, { "epoch": 0.05, "grad_norm": 0.6605132818222046, "learning_rate": 1.9936478682779135e-05, "loss": 2.4362, "step": 1561 }, { "epoch": 0.05, "grad_norm": 0.6424047350883484, "learning_rate": 1.9936359014997314e-05, "loss": 2.4628, "step": 1562 }, { "epoch": 0.05, "grad_norm": 0.676135241985321, "learning_rate": 1.9936239234960337e-05, "loss": 2.4219, "step": 1563 }, { "epoch": 0.05, "grad_norm": 0.6838515996932983, "learning_rate": 1.9936119342669546e-05, "loss": 2.4443, "step": 1564 }, { "epoch": 0.05, "grad_norm": 0.6867080330848694, "learning_rate": 1.9935999338126307e-05, "loss": 2.4302, "step": 1565 }, { "epoch": 0.05, "grad_norm": 0.6572315692901611, "learning_rate": 1.9935879221331968e-05, "loss": 2.442, "step": 1566 }, { "epoch": 0.05, "grad_norm": 0.6354489326477051, "learning_rate": 1.9935758992287888e-05, "loss": 2.4566, "step": 1567 }, { "epoch": 0.05, "grad_norm": 0.6707313656806946, "learning_rate": 1.993563865099543e-05, "loss": 2.4622, "step": 1568 }, { "epoch": 0.05, "grad_norm": 0.6528519988059998, "learning_rate": 1.9935518197455945e-05, "loss": 2.3938, "step": 1569 }, { "epoch": 0.05, "grad_norm": 0.7114832401275635, "learning_rate": 1.99353976316708e-05, "loss": 2.4286, "step": 1570 }, { "epoch": 0.05, "grad_norm": 0.6804948449134827, "learning_rate": 1.9935276953641357e-05, "loss": 2.3996, "step": 1571 }, { "epoch": 0.05, "grad_norm": 0.6786565184593201, "learning_rate": 1.993515616336897e-05, "loss": 2.3967, "step": 1572 }, { "epoch": 0.05, "grad_norm": 0.6636152863502502, "learning_rate": 1.993503526085502e-05, "loss": 2.4333, "step": 1573 }, { "epoch": 0.05, "grad_norm": 0.6386958956718445, "learning_rate": 1.9934914246100862e-05, "loss": 2.3645, "step": 1574 }, { "epoch": 0.05, "grad_norm": 0.6662698984146118, "learning_rate": 1.9934793119107864e-05, "loss": 2.4286, "step": 1575 }, { "epoch": 0.05, "grad_norm": 0.6887092590332031, "learning_rate": 1.9934671879877393e-05, "loss": 2.4271, "step": 1576 }, { "epoch": 0.05, "grad_norm": 0.6771765351295471, "learning_rate": 1.9934550528410825e-05, "loss": 2.5199, "step": 1577 }, { "epoch": 0.05, "grad_norm": 0.6455226540565491, "learning_rate": 1.9934429064709525e-05, "loss": 2.4054, "step": 1578 }, { "epoch": 0.05, "grad_norm": 0.7230437397956848, "learning_rate": 1.9934307488774872e-05, "loss": 2.5009, "step": 1579 }, { "epoch": 0.05, "grad_norm": 0.6783396601676941, "learning_rate": 1.9934185800608232e-05, "loss": 2.472, "step": 1580 }, { "epoch": 0.05, "grad_norm": 0.6692841053009033, "learning_rate": 1.9934064000210983e-05, "loss": 2.4002, "step": 1581 }, { "epoch": 0.05, "grad_norm": 0.6551260352134705, "learning_rate": 1.99339420875845e-05, "loss": 2.4324, "step": 1582 }, { "epoch": 0.05, "grad_norm": 0.672709047794342, "learning_rate": 1.9933820062730162e-05, "loss": 2.4216, "step": 1583 }, { "epoch": 0.05, "grad_norm": 0.6607254147529602, "learning_rate": 1.993369792564935e-05, "loss": 2.4349, "step": 1584 }, { "epoch": 0.05, "grad_norm": 0.6395419239997864, "learning_rate": 1.9933575676343435e-05, "loss": 2.3873, "step": 1585 }, { "epoch": 0.05, "grad_norm": 0.6873798370361328, "learning_rate": 1.9933453314813808e-05, "loss": 2.4809, "step": 1586 }, { "epoch": 0.05, "grad_norm": 0.734438955783844, "learning_rate": 1.9933330841061842e-05, "loss": 2.4436, "step": 1587 }, { "epoch": 0.05, "grad_norm": 0.67962646484375, "learning_rate": 1.9933208255088928e-05, "loss": 2.4159, "step": 1588 }, { "epoch": 0.05, "grad_norm": 0.6698024272918701, "learning_rate": 1.993308555689645e-05, "loss": 2.4169, "step": 1589 }, { "epoch": 0.05, "grad_norm": 0.6622918248176575, "learning_rate": 1.9932962746485793e-05, "loss": 2.4357, "step": 1590 }, { "epoch": 0.05, "grad_norm": 0.6909894347190857, "learning_rate": 1.9932839823858343e-05, "loss": 2.4753, "step": 1591 }, { "epoch": 0.05, "grad_norm": 0.6382946968078613, "learning_rate": 1.993271678901549e-05, "loss": 2.4403, "step": 1592 }, { "epoch": 0.05, "grad_norm": 0.6618476510047913, "learning_rate": 1.9932593641958624e-05, "loss": 2.3979, "step": 1593 }, { "epoch": 0.05, "grad_norm": 0.6698614954948425, "learning_rate": 1.9932470382689137e-05, "loss": 2.3737, "step": 1594 }, { "epoch": 0.05, "grad_norm": 0.6669596433639526, "learning_rate": 1.993234701120842e-05, "loss": 2.3625, "step": 1595 }, { "epoch": 0.05, "grad_norm": 0.7005311250686646, "learning_rate": 1.9932223527517864e-05, "loss": 2.4255, "step": 1596 }, { "epoch": 0.05, "grad_norm": 0.6587526798248291, "learning_rate": 1.9932099931618873e-05, "loss": 2.4389, "step": 1597 }, { "epoch": 0.05, "grad_norm": 0.6763669848442078, "learning_rate": 1.9931976223512834e-05, "loss": 2.4017, "step": 1598 }, { "epoch": 0.05, "grad_norm": 0.645672619342804, "learning_rate": 1.993185240320115e-05, "loss": 2.3812, "step": 1599 }, { "epoch": 0.05, "grad_norm": 0.6708835363388062, "learning_rate": 1.9931728470685215e-05, "loss": 2.4076, "step": 1600 }, { "epoch": 0.05, "grad_norm": 0.6925601959228516, "learning_rate": 1.9931604425966437e-05, "loss": 2.4903, "step": 1601 }, { "epoch": 0.05, "grad_norm": 0.6661214828491211, "learning_rate": 1.993148026904621e-05, "loss": 2.4037, "step": 1602 }, { "epoch": 0.05, "grad_norm": 0.6573379635810852, "learning_rate": 1.9931355999925942e-05, "loss": 2.4497, "step": 1603 }, { "epoch": 0.05, "grad_norm": 0.6751570105552673, "learning_rate": 1.993123161860703e-05, "loss": 2.4477, "step": 1604 }, { "epoch": 0.05, "grad_norm": 0.6991583704948425, "learning_rate": 1.9931107125090888e-05, "loss": 2.4284, "step": 1605 }, { "epoch": 0.05, "grad_norm": 0.7075982689857483, "learning_rate": 1.9930982519378913e-05, "loss": 2.4594, "step": 1606 }, { "epoch": 0.05, "grad_norm": 0.6452618837356567, "learning_rate": 1.9930857801472524e-05, "loss": 2.4403, "step": 1607 }, { "epoch": 0.05, "grad_norm": 0.684503972530365, "learning_rate": 1.9930732971373118e-05, "loss": 2.414, "step": 1608 }, { "epoch": 0.05, "grad_norm": 0.6694889068603516, "learning_rate": 1.9930608029082114e-05, "loss": 2.537, "step": 1609 }, { "epoch": 0.05, "grad_norm": 0.6663505434989929, "learning_rate": 1.9930482974600916e-05, "loss": 2.4709, "step": 1610 }, { "epoch": 0.05, "grad_norm": 0.6709225177764893, "learning_rate": 1.9930357807930946e-05, "loss": 2.4, "step": 1611 }, { "epoch": 0.05, "grad_norm": 0.6499970555305481, "learning_rate": 1.9930232529073613e-05, "loss": 2.4283, "step": 1612 }, { "epoch": 0.05, "grad_norm": 0.6813496351242065, "learning_rate": 1.993010713803033e-05, "loss": 2.4253, "step": 1613 }, { "epoch": 0.05, "grad_norm": 0.6806023716926575, "learning_rate": 1.992998163480252e-05, "loss": 2.3823, "step": 1614 }, { "epoch": 0.05, "grad_norm": 0.6823223829269409, "learning_rate": 1.9929856019391593e-05, "loss": 2.3888, "step": 1615 }, { "epoch": 0.05, "grad_norm": 0.6910129189491272, "learning_rate": 1.9929730291798973e-05, "loss": 2.3644, "step": 1616 }, { "epoch": 0.05, "grad_norm": 0.6463114619255066, "learning_rate": 1.992960445202608e-05, "loss": 2.4517, "step": 1617 }, { "epoch": 0.05, "grad_norm": 0.6912835240364075, "learning_rate": 1.9929478500074336e-05, "loss": 2.4715, "step": 1618 }, { "epoch": 0.05, "grad_norm": 0.6860828995704651, "learning_rate": 1.9929352435945166e-05, "loss": 2.4235, "step": 1619 }, { "epoch": 0.05, "grad_norm": 0.7106069922447205, "learning_rate": 1.9929226259639983e-05, "loss": 2.482, "step": 1620 }, { "epoch": 0.05, "grad_norm": 0.6664543747901917, "learning_rate": 1.9929099971160228e-05, "loss": 2.4067, "step": 1621 }, { "epoch": 0.05, "grad_norm": 0.6521303653717041, "learning_rate": 1.9928973570507317e-05, "loss": 2.3995, "step": 1622 }, { "epoch": 0.05, "grad_norm": 0.660266637802124, "learning_rate": 1.9928847057682683e-05, "loss": 2.4643, "step": 1623 }, { "epoch": 0.05, "grad_norm": 0.643988847732544, "learning_rate": 1.9928720432687753e-05, "loss": 2.4582, "step": 1624 }, { "epoch": 0.05, "grad_norm": 0.684067964553833, "learning_rate": 1.992859369552396e-05, "loss": 2.4127, "step": 1625 }, { "epoch": 0.05, "grad_norm": 0.665391743183136, "learning_rate": 1.9928466846192732e-05, "loss": 2.4157, "step": 1626 }, { "epoch": 0.05, "grad_norm": 0.6780904531478882, "learning_rate": 1.9928339884695505e-05, "loss": 2.4072, "step": 1627 }, { "epoch": 0.05, "grad_norm": 0.6330990791320801, "learning_rate": 1.9928212811033714e-05, "loss": 2.3887, "step": 1628 }, { "epoch": 0.05, "grad_norm": 0.667547345161438, "learning_rate": 1.992808562520879e-05, "loss": 2.4574, "step": 1629 }, { "epoch": 0.05, "grad_norm": 0.6760832071304321, "learning_rate": 1.9927958327222178e-05, "loss": 2.3935, "step": 1630 }, { "epoch": 0.05, "grad_norm": 0.6605042815208435, "learning_rate": 1.992783091707531e-05, "loss": 2.4841, "step": 1631 }, { "epoch": 0.05, "grad_norm": 0.6503846645355225, "learning_rate": 1.9927703394769623e-05, "loss": 2.422, "step": 1632 }, { "epoch": 0.05, "grad_norm": 0.6339687705039978, "learning_rate": 1.9927575760306562e-05, "loss": 2.455, "step": 1633 }, { "epoch": 0.05, "grad_norm": 0.6454290747642517, "learning_rate": 1.9927448013687568e-05, "loss": 2.3573, "step": 1634 }, { "epoch": 0.05, "grad_norm": 0.6633464694023132, "learning_rate": 1.9927320154914086e-05, "loss": 2.4031, "step": 1635 }, { "epoch": 0.05, "grad_norm": 0.6876009702682495, "learning_rate": 1.992719218398756e-05, "loss": 2.4137, "step": 1636 }, { "epoch": 0.05, "grad_norm": 0.6541686058044434, "learning_rate": 1.9927064100909433e-05, "loss": 2.3556, "step": 1637 }, { "epoch": 0.05, "grad_norm": 0.6572750806808472, "learning_rate": 1.9926935905681152e-05, "loss": 2.4638, "step": 1638 }, { "epoch": 0.05, "grad_norm": 0.6526206731796265, "learning_rate": 1.992680759830417e-05, "loss": 2.4229, "step": 1639 }, { "epoch": 0.05, "grad_norm": 0.6827916502952576, "learning_rate": 1.9926679178779933e-05, "loss": 2.3862, "step": 1640 }, { "epoch": 0.05, "grad_norm": 0.6964197158813477, "learning_rate": 1.992655064710989e-05, "loss": 2.4201, "step": 1641 }, { "epoch": 0.05, "grad_norm": 0.6807335615158081, "learning_rate": 1.9926422003295497e-05, "loss": 2.4586, "step": 1642 }, { "epoch": 0.05, "grad_norm": 0.6591370105743408, "learning_rate": 1.9926293247338205e-05, "loss": 2.3595, "step": 1643 }, { "epoch": 0.05, "grad_norm": 0.6882135272026062, "learning_rate": 1.992616437923947e-05, "loss": 2.4286, "step": 1644 }, { "epoch": 0.05, "grad_norm": 0.6745463013648987, "learning_rate": 1.9926035399000746e-05, "loss": 2.4701, "step": 1645 }, { "epoch": 0.05, "grad_norm": 0.664196789264679, "learning_rate": 1.9925906306623492e-05, "loss": 2.4409, "step": 1646 }, { "epoch": 0.05, "grad_norm": 0.6802324056625366, "learning_rate": 1.9925777102109166e-05, "loss": 2.4362, "step": 1647 }, { "epoch": 0.05, "grad_norm": 0.6947612762451172, "learning_rate": 1.992564778545923e-05, "loss": 2.3822, "step": 1648 }, { "epoch": 0.05, "grad_norm": 0.705377459526062, "learning_rate": 1.992551835667514e-05, "loss": 2.4534, "step": 1649 }, { "epoch": 0.05, "grad_norm": 0.6672513484954834, "learning_rate": 1.9925388815758358e-05, "loss": 2.4627, "step": 1650 }, { "epoch": 0.05, "grad_norm": 0.6844388246536255, "learning_rate": 1.9925259162710352e-05, "loss": 2.4271, "step": 1651 }, { "epoch": 0.05, "grad_norm": 0.6461808681488037, "learning_rate": 1.9925129397532582e-05, "loss": 2.4519, "step": 1652 }, { "epoch": 0.05, "grad_norm": 0.6839893460273743, "learning_rate": 1.992499952022652e-05, "loss": 2.4025, "step": 1653 }, { "epoch": 0.06, "grad_norm": 0.6754345893859863, "learning_rate": 1.992486953079363e-05, "loss": 2.4176, "step": 1654 }, { "epoch": 0.06, "grad_norm": 0.6423801779747009, "learning_rate": 1.9924739429235382e-05, "loss": 2.4293, "step": 1655 }, { "epoch": 0.06, "grad_norm": 0.6501866579055786, "learning_rate": 1.9924609215553243e-05, "loss": 2.3941, "step": 1656 }, { "epoch": 0.06, "grad_norm": 0.655415952205658, "learning_rate": 1.9924478889748685e-05, "loss": 2.366, "step": 1657 }, { "epoch": 0.06, "grad_norm": 0.6413949131965637, "learning_rate": 1.992434845182318e-05, "loss": 2.3983, "step": 1658 }, { "epoch": 0.06, "grad_norm": 0.689906895160675, "learning_rate": 1.9924217901778202e-05, "loss": 2.4081, "step": 1659 }, { "epoch": 0.06, "grad_norm": 0.6907313466072083, "learning_rate": 1.9924087239615225e-05, "loss": 2.507, "step": 1660 }, { "epoch": 0.06, "grad_norm": 0.6539212465286255, "learning_rate": 1.992395646533573e-05, "loss": 2.4434, "step": 1661 }, { "epoch": 0.06, "grad_norm": 0.7002682685852051, "learning_rate": 1.992382557894119e-05, "loss": 2.4123, "step": 1662 }, { "epoch": 0.06, "grad_norm": 0.6133070588111877, "learning_rate": 1.9923694580433085e-05, "loss": 2.4207, "step": 1663 }, { "epoch": 0.06, "grad_norm": 0.6680501103401184, "learning_rate": 1.9923563469812898e-05, "loss": 2.3983, "step": 1664 }, { "epoch": 0.06, "grad_norm": 0.7163813710212708, "learning_rate": 1.99234322470821e-05, "loss": 2.414, "step": 1665 }, { "epoch": 0.06, "grad_norm": 0.6384507417678833, "learning_rate": 1.992330091224218e-05, "loss": 2.4251, "step": 1666 }, { "epoch": 0.06, "grad_norm": 0.653640627861023, "learning_rate": 1.9923169465294627e-05, "loss": 2.4372, "step": 1667 }, { "epoch": 0.06, "grad_norm": 0.6543007493019104, "learning_rate": 1.992303790624092e-05, "loss": 2.4155, "step": 1668 }, { "epoch": 0.06, "grad_norm": 0.6777818202972412, "learning_rate": 1.992290623508254e-05, "loss": 2.3688, "step": 1669 }, { "epoch": 0.06, "grad_norm": 0.673274040222168, "learning_rate": 1.9922774451820988e-05, "loss": 2.427, "step": 1670 }, { "epoch": 0.06, "grad_norm": 0.6461830139160156, "learning_rate": 1.9922642556457745e-05, "loss": 2.3944, "step": 1671 }, { "epoch": 0.06, "grad_norm": 0.6684697270393372, "learning_rate": 1.99225105489943e-05, "loss": 2.4229, "step": 1672 }, { "epoch": 0.06, "grad_norm": 0.6868581771850586, "learning_rate": 1.9922378429432142e-05, "loss": 2.4115, "step": 1673 }, { "epoch": 0.06, "grad_norm": 0.7135509848594666, "learning_rate": 1.9922246197772774e-05, "loss": 2.3571, "step": 1674 }, { "epoch": 0.06, "grad_norm": 0.6706013083457947, "learning_rate": 1.992211385401768e-05, "loss": 2.4206, "step": 1675 }, { "epoch": 0.06, "grad_norm": 0.6538805961608887, "learning_rate": 1.992198139816836e-05, "loss": 2.4177, "step": 1676 }, { "epoch": 0.06, "grad_norm": 0.6860239505767822, "learning_rate": 1.992184883022631e-05, "loss": 2.5078, "step": 1677 }, { "epoch": 0.06, "grad_norm": 0.6961548924446106, "learning_rate": 1.9921716150193022e-05, "loss": 2.379, "step": 1678 }, { "epoch": 0.06, "grad_norm": 0.6848573684692383, "learning_rate": 1.9921583358070005e-05, "loss": 2.3842, "step": 1679 }, { "epoch": 0.06, "grad_norm": 0.6472194790840149, "learning_rate": 1.992145045385875e-05, "loss": 2.391, "step": 1680 }, { "epoch": 0.06, "grad_norm": 0.6962064504623413, "learning_rate": 1.9921317437560766e-05, "loss": 2.5019, "step": 1681 }, { "epoch": 0.06, "grad_norm": 0.6933851838111877, "learning_rate": 1.992118430917755e-05, "loss": 2.4058, "step": 1682 }, { "epoch": 0.06, "grad_norm": 0.6392589211463928, "learning_rate": 1.9921051068710605e-05, "loss": 2.4153, "step": 1683 }, { "epoch": 0.06, "grad_norm": 0.6323282718658447, "learning_rate": 1.992091771616144e-05, "loss": 2.3463, "step": 1684 }, { "epoch": 0.06, "grad_norm": 0.6988240480422974, "learning_rate": 1.9920784251531567e-05, "loss": 2.4045, "step": 1685 }, { "epoch": 0.06, "grad_norm": 0.6445634365081787, "learning_rate": 1.9920650674822486e-05, "loss": 2.4675, "step": 1686 }, { "epoch": 0.06, "grad_norm": 0.6991953253746033, "learning_rate": 1.9920516986035703e-05, "loss": 2.4553, "step": 1687 }, { "epoch": 0.06, "grad_norm": 0.6528216004371643, "learning_rate": 1.9920383185172736e-05, "loss": 2.4123, "step": 1688 }, { "epoch": 0.06, "grad_norm": 0.6391637921333313, "learning_rate": 1.9920249272235095e-05, "loss": 2.3931, "step": 1689 }, { "epoch": 0.06, "grad_norm": 0.6762369871139526, "learning_rate": 1.992011524722429e-05, "loss": 2.4311, "step": 1690 }, { "epoch": 0.06, "grad_norm": 0.6571438312530518, "learning_rate": 1.9919981110141836e-05, "loss": 2.4362, "step": 1691 }, { "epoch": 0.06, "grad_norm": 0.6934826374053955, "learning_rate": 1.991984686098925e-05, "loss": 2.4657, "step": 1692 }, { "epoch": 0.06, "grad_norm": 0.6816678643226624, "learning_rate": 1.9919712499768048e-05, "loss": 2.4073, "step": 1693 }, { "epoch": 0.06, "grad_norm": 0.6610366702079773, "learning_rate": 1.9919578026479745e-05, "loss": 2.4363, "step": 1694 }, { "epoch": 0.06, "grad_norm": 0.6828557848930359, "learning_rate": 1.9919443441125867e-05, "loss": 2.4359, "step": 1695 }, { "epoch": 0.06, "grad_norm": 0.6646338105201721, "learning_rate": 1.9919308743707927e-05, "loss": 2.4003, "step": 1696 }, { "epoch": 0.06, "grad_norm": 0.68185955286026, "learning_rate": 1.991917393422745e-05, "loss": 2.4012, "step": 1697 }, { "epoch": 0.06, "grad_norm": 0.6393457055091858, "learning_rate": 1.9919039012685962e-05, "loss": 2.3605, "step": 1698 }, { "epoch": 0.06, "grad_norm": 0.6696728467941284, "learning_rate": 1.9918903979084985e-05, "loss": 2.4643, "step": 1699 }, { "epoch": 0.06, "grad_norm": 0.7348231077194214, "learning_rate": 1.991876883342604e-05, "loss": 2.3806, "step": 1700 }, { "epoch": 0.06, "grad_norm": 0.7297537326812744, "learning_rate": 1.9918633575710662e-05, "loss": 2.4512, "step": 1701 }, { "epoch": 0.06, "grad_norm": 0.6754732728004456, "learning_rate": 1.991849820594037e-05, "loss": 2.4335, "step": 1702 }, { "epoch": 0.06, "grad_norm": 0.6498231291770935, "learning_rate": 1.99183627241167e-05, "loss": 2.4156, "step": 1703 }, { "epoch": 0.06, "grad_norm": 0.6874418258666992, "learning_rate": 1.991822713024118e-05, "loss": 2.3507, "step": 1704 }, { "epoch": 0.06, "grad_norm": 0.6841293573379517, "learning_rate": 1.9918091424315348e-05, "loss": 2.3088, "step": 1705 }, { "epoch": 0.06, "grad_norm": 0.6642603278160095, "learning_rate": 1.9917955606340724e-05, "loss": 2.4436, "step": 1706 }, { "epoch": 0.06, "grad_norm": 0.6520339250564575, "learning_rate": 1.9917819676318854e-05, "loss": 2.4135, "step": 1707 }, { "epoch": 0.06, "grad_norm": 0.6486449837684631, "learning_rate": 1.9917683634251272e-05, "loss": 2.483, "step": 1708 }, { "epoch": 0.06, "grad_norm": 0.6944218873977661, "learning_rate": 1.9917547480139507e-05, "loss": 2.4216, "step": 1709 }, { "epoch": 0.06, "grad_norm": 0.6373897790908813, "learning_rate": 1.9917411213985107e-05, "loss": 2.4388, "step": 1710 }, { "epoch": 0.06, "grad_norm": 0.6460530757904053, "learning_rate": 1.9917274835789607e-05, "loss": 2.4096, "step": 1711 }, { "epoch": 0.06, "grad_norm": 0.6609753966331482, "learning_rate": 1.9917138345554545e-05, "loss": 2.4114, "step": 1712 }, { "epoch": 0.06, "grad_norm": 0.6893213987350464, "learning_rate": 1.991700174328147e-05, "loss": 2.3611, "step": 1713 }, { "epoch": 0.06, "grad_norm": 0.6588318943977356, "learning_rate": 1.9916865028971918e-05, "loss": 2.4215, "step": 1714 }, { "epoch": 0.06, "grad_norm": 0.6442292928695679, "learning_rate": 1.9916728202627437e-05, "loss": 2.4183, "step": 1715 }, { "epoch": 0.06, "grad_norm": 0.6383470892906189, "learning_rate": 1.991659126424957e-05, "loss": 2.4195, "step": 1716 }, { "epoch": 0.06, "grad_norm": 0.648501455783844, "learning_rate": 1.9916454213839873e-05, "loss": 2.5091, "step": 1717 }, { "epoch": 0.06, "grad_norm": 0.6657336354255676, "learning_rate": 1.9916317051399885e-05, "loss": 2.4844, "step": 1718 }, { "epoch": 0.06, "grad_norm": 0.6767327785491943, "learning_rate": 1.9916179776931156e-05, "loss": 2.3965, "step": 1719 }, { "epoch": 0.06, "grad_norm": 0.6539074778556824, "learning_rate": 1.9916042390435245e-05, "loss": 2.3497, "step": 1720 }, { "epoch": 0.06, "grad_norm": 0.6573300957679749, "learning_rate": 1.9915904891913694e-05, "loss": 2.399, "step": 1721 }, { "epoch": 0.06, "grad_norm": 0.6630330085754395, "learning_rate": 1.9915767281368065e-05, "loss": 2.3945, "step": 1722 }, { "epoch": 0.06, "grad_norm": 0.6443758010864258, "learning_rate": 1.9915629558799903e-05, "loss": 2.3415, "step": 1723 }, { "epoch": 0.06, "grad_norm": 0.673997700214386, "learning_rate": 1.9915491724210772e-05, "loss": 2.3922, "step": 1724 }, { "epoch": 0.06, "grad_norm": 0.6867017149925232, "learning_rate": 1.9915353777602226e-05, "loss": 2.4566, "step": 1725 }, { "epoch": 0.06, "grad_norm": 0.6897754073143005, "learning_rate": 1.9915215718975826e-05, "loss": 2.403, "step": 1726 }, { "epoch": 0.06, "grad_norm": 0.6551098823547363, "learning_rate": 1.9915077548333127e-05, "loss": 2.425, "step": 1727 }, { "epoch": 0.06, "grad_norm": 0.6676865816116333, "learning_rate": 1.9914939265675694e-05, "loss": 2.3723, "step": 1728 }, { "epoch": 0.06, "grad_norm": 0.6359845995903015, "learning_rate": 1.9914800871005085e-05, "loss": 2.3827, "step": 1729 }, { "epoch": 0.06, "grad_norm": 0.652776300907135, "learning_rate": 1.991466236432287e-05, "loss": 2.4373, "step": 1730 }, { "epoch": 0.06, "grad_norm": 0.6557121872901917, "learning_rate": 1.9914523745630608e-05, "loss": 2.4005, "step": 1731 }, { "epoch": 0.06, "grad_norm": 0.654423713684082, "learning_rate": 1.991438501492987e-05, "loss": 2.3411, "step": 1732 }, { "epoch": 0.06, "grad_norm": 0.6976038217544556, "learning_rate": 1.9914246172222217e-05, "loss": 2.3755, "step": 1733 }, { "epoch": 0.06, "grad_norm": 0.6480667591094971, "learning_rate": 1.9914107217509223e-05, "loss": 2.3473, "step": 1734 }, { "epoch": 0.06, "grad_norm": 0.6538426280021667, "learning_rate": 1.9913968150792456e-05, "loss": 2.4296, "step": 1735 }, { "epoch": 0.06, "grad_norm": 0.6696330308914185, "learning_rate": 1.9913828972073488e-05, "loss": 2.4355, "step": 1736 }, { "epoch": 0.06, "grad_norm": 0.6323601603507996, "learning_rate": 1.991368968135389e-05, "loss": 2.3574, "step": 1737 }, { "epoch": 0.06, "grad_norm": 0.658532977104187, "learning_rate": 1.9913550278635234e-05, "loss": 2.3955, "step": 1738 }, { "epoch": 0.06, "grad_norm": 0.7092984914779663, "learning_rate": 1.99134107639191e-05, "loss": 2.3814, "step": 1739 }, { "epoch": 0.06, "grad_norm": 0.6931276321411133, "learning_rate": 1.9913271137207057e-05, "loss": 2.395, "step": 1740 }, { "epoch": 0.06, "grad_norm": 0.641856849193573, "learning_rate": 1.991313139850069e-05, "loss": 2.3926, "step": 1741 }, { "epoch": 0.06, "grad_norm": 0.6465002298355103, "learning_rate": 1.991299154780157e-05, "loss": 2.3829, "step": 1742 }, { "epoch": 0.06, "grad_norm": 0.7714751958847046, "learning_rate": 1.9912851585111284e-05, "loss": 2.4126, "step": 1743 }, { "epoch": 0.06, "grad_norm": 0.6506043672561646, "learning_rate": 1.9912711510431412e-05, "loss": 2.4933, "step": 1744 }, { "epoch": 0.06, "grad_norm": 0.6575149893760681, "learning_rate": 1.991257132376353e-05, "loss": 2.4109, "step": 1745 }, { "epoch": 0.06, "grad_norm": 0.6435120105743408, "learning_rate": 1.9912431025109232e-05, "loss": 2.3785, "step": 1746 }, { "epoch": 0.06, "grad_norm": 0.653723955154419, "learning_rate": 1.9912290614470092e-05, "loss": 2.4823, "step": 1747 }, { "epoch": 0.06, "grad_norm": 0.6825549602508545, "learning_rate": 1.9912150091847705e-05, "loss": 2.4414, "step": 1748 }, { "epoch": 0.06, "grad_norm": 0.6390737891197205, "learning_rate": 1.991200945724366e-05, "loss": 2.4204, "step": 1749 }, { "epoch": 0.06, "grad_norm": 0.6617243885993958, "learning_rate": 1.9911868710659532e-05, "loss": 2.3667, "step": 1750 }, { "epoch": 0.06, "grad_norm": 0.6704902052879333, "learning_rate": 1.9911727852096925e-05, "loss": 2.393, "step": 1751 }, { "epoch": 0.06, "grad_norm": 0.6647295355796814, "learning_rate": 1.9911586881557423e-05, "loss": 2.434, "step": 1752 }, { "epoch": 0.06, "grad_norm": 0.6292001605033875, "learning_rate": 1.9911445799042622e-05, "loss": 2.3912, "step": 1753 }, { "epoch": 0.06, "grad_norm": 0.6650229692459106, "learning_rate": 1.9911304604554118e-05, "loss": 2.4618, "step": 1754 }, { "epoch": 0.06, "grad_norm": 0.6842104196548462, "learning_rate": 1.9911163298093502e-05, "loss": 2.3811, "step": 1755 }, { "epoch": 0.06, "grad_norm": 0.6654988527297974, "learning_rate": 1.991102187966237e-05, "loss": 2.4264, "step": 1756 }, { "epoch": 0.06, "grad_norm": 0.6416323781013489, "learning_rate": 1.991088034926232e-05, "loss": 2.4401, "step": 1757 }, { "epoch": 0.06, "grad_norm": 0.6998594999313354, "learning_rate": 1.9910738706894957e-05, "loss": 2.4209, "step": 1758 }, { "epoch": 0.06, "grad_norm": 0.6584749817848206, "learning_rate": 1.9910596952561873e-05, "loss": 2.4313, "step": 1759 }, { "epoch": 0.06, "grad_norm": 0.6843457818031311, "learning_rate": 1.9910455086264673e-05, "loss": 2.3678, "step": 1760 }, { "epoch": 0.06, "grad_norm": 0.6927817463874817, "learning_rate": 1.991031310800496e-05, "loss": 2.4414, "step": 1761 }, { "epoch": 0.06, "grad_norm": 0.6730621457099915, "learning_rate": 1.991017101778434e-05, "loss": 2.3603, "step": 1762 }, { "epoch": 0.06, "grad_norm": 0.6471081972122192, "learning_rate": 1.991002881560441e-05, "loss": 2.4396, "step": 1763 }, { "epoch": 0.06, "grad_norm": 0.6782976984977722, "learning_rate": 1.9909886501466787e-05, "loss": 2.3799, "step": 1764 }, { "epoch": 0.06, "grad_norm": 0.6571404337882996, "learning_rate": 1.990974407537307e-05, "loss": 2.447, "step": 1765 }, { "epoch": 0.06, "grad_norm": 0.6603661775588989, "learning_rate": 1.9909601537324877e-05, "loss": 2.3868, "step": 1766 }, { "epoch": 0.06, "grad_norm": 0.6608498096466064, "learning_rate": 1.9909458887323812e-05, "loss": 2.4492, "step": 1767 }, { "epoch": 0.06, "grad_norm": 0.6713616847991943, "learning_rate": 1.9909316125371488e-05, "loss": 2.4248, "step": 1768 }, { "epoch": 0.06, "grad_norm": 0.6860651969909668, "learning_rate": 1.9909173251469515e-05, "loss": 2.3522, "step": 1769 }, { "epoch": 0.06, "grad_norm": 0.7090104818344116, "learning_rate": 1.9909030265619514e-05, "loss": 2.4264, "step": 1770 }, { "epoch": 0.06, "grad_norm": 0.6741381883621216, "learning_rate": 1.990888716782309e-05, "loss": 2.3504, "step": 1771 }, { "epoch": 0.06, "grad_norm": 0.6544789671897888, "learning_rate": 1.9908743958081873e-05, "loss": 2.3994, "step": 1772 }, { "epoch": 0.06, "grad_norm": 0.6701503992080688, "learning_rate": 1.990860063639747e-05, "loss": 2.3968, "step": 1773 }, { "epoch": 0.06, "grad_norm": 0.680515468120575, "learning_rate": 1.990845720277151e-05, "loss": 2.3581, "step": 1774 }, { "epoch": 0.06, "grad_norm": 0.6981581449508667, "learning_rate": 1.99083136572056e-05, "loss": 2.46, "step": 1775 }, { "epoch": 0.06, "grad_norm": 0.6608291864395142, "learning_rate": 1.990816999970137e-05, "loss": 2.4174, "step": 1776 }, { "epoch": 0.06, "grad_norm": 0.700094997882843, "learning_rate": 1.9908026230260446e-05, "loss": 2.4271, "step": 1777 }, { "epoch": 0.06, "grad_norm": 0.6515207290649414, "learning_rate": 1.9907882348884445e-05, "loss": 2.4263, "step": 1778 }, { "epoch": 0.06, "grad_norm": 0.6882498860359192, "learning_rate": 1.9907738355575e-05, "loss": 2.458, "step": 1779 }, { "epoch": 0.06, "grad_norm": 0.6586911082267761, "learning_rate": 1.990759425033373e-05, "loss": 2.3839, "step": 1780 }, { "epoch": 0.06, "grad_norm": 0.6677483916282654, "learning_rate": 1.9907450033162267e-05, "loss": 2.3727, "step": 1781 }, { "epoch": 0.06, "grad_norm": 0.6829531192779541, "learning_rate": 1.9907305704062238e-05, "loss": 2.3717, "step": 1782 }, { "epoch": 0.06, "grad_norm": 0.6769689321517944, "learning_rate": 1.990716126303528e-05, "loss": 2.3915, "step": 1783 }, { "epoch": 0.06, "grad_norm": 0.7131335735321045, "learning_rate": 1.9907016710083015e-05, "loss": 2.3727, "step": 1784 }, { "epoch": 0.06, "grad_norm": 0.6324228644371033, "learning_rate": 1.9906872045207084e-05, "loss": 2.3224, "step": 1785 }, { "epoch": 0.06, "grad_norm": 0.6493406891822815, "learning_rate": 1.9906727268409116e-05, "loss": 2.3651, "step": 1786 }, { "epoch": 0.06, "grad_norm": 0.64736407995224, "learning_rate": 1.9906582379690753e-05, "loss": 2.4296, "step": 1787 }, { "epoch": 0.06, "grad_norm": 0.6784501075744629, "learning_rate": 1.9906437379053628e-05, "loss": 2.4315, "step": 1788 }, { "epoch": 0.06, "grad_norm": 0.677405059337616, "learning_rate": 1.9906292266499375e-05, "loss": 2.3455, "step": 1789 }, { "epoch": 0.06, "grad_norm": 0.6765218377113342, "learning_rate": 1.9906147042029644e-05, "loss": 2.4255, "step": 1790 }, { "epoch": 0.06, "grad_norm": 0.6785590052604675, "learning_rate": 1.9906001705646064e-05, "loss": 2.3912, "step": 1791 }, { "epoch": 0.06, "grad_norm": 0.6327767372131348, "learning_rate": 1.9905856257350285e-05, "loss": 2.356, "step": 1792 }, { "epoch": 0.06, "grad_norm": 0.7202600836753845, "learning_rate": 1.9905710697143944e-05, "loss": 2.4297, "step": 1793 }, { "epoch": 0.06, "grad_norm": 0.6425220966339111, "learning_rate": 1.9905565025028694e-05, "loss": 2.4092, "step": 1794 }, { "epoch": 0.06, "grad_norm": 0.6445935964584351, "learning_rate": 1.9905419241006172e-05, "loss": 2.4094, "step": 1795 }, { "epoch": 0.06, "grad_norm": 0.687806248664856, "learning_rate": 1.990527334507803e-05, "loss": 2.3995, "step": 1796 }, { "epoch": 0.06, "grad_norm": 0.6905810236930847, "learning_rate": 1.9905127337245915e-05, "loss": 2.4002, "step": 1797 }, { "epoch": 0.06, "grad_norm": 0.6583490371704102, "learning_rate": 1.9904981217511476e-05, "loss": 2.3864, "step": 1798 }, { "epoch": 0.06, "grad_norm": 0.6532671451568604, "learning_rate": 1.9904834985876365e-05, "loss": 2.3931, "step": 1799 }, { "epoch": 0.06, "grad_norm": 0.6690351963043213, "learning_rate": 1.9904688642342228e-05, "loss": 2.41, "step": 1800 }, { "epoch": 0.06, "grad_norm": 0.6628871560096741, "learning_rate": 1.990454218691073e-05, "loss": 2.4035, "step": 1801 }, { "epoch": 0.06, "grad_norm": 0.6618131995201111, "learning_rate": 1.9904395619583515e-05, "loss": 2.3896, "step": 1802 }, { "epoch": 0.06, "grad_norm": 0.6890316009521484, "learning_rate": 1.9904248940362246e-05, "loss": 2.3503, "step": 1803 }, { "epoch": 0.06, "grad_norm": 0.639540433883667, "learning_rate": 1.9904102149248574e-05, "loss": 2.4145, "step": 1804 }, { "epoch": 0.06, "grad_norm": 0.6708518862724304, "learning_rate": 1.990395524624416e-05, "loss": 2.401, "step": 1805 }, { "epoch": 0.06, "grad_norm": 0.66914963722229, "learning_rate": 1.9903808231350664e-05, "loss": 2.4246, "step": 1806 }, { "epoch": 0.06, "grad_norm": 0.6464137434959412, "learning_rate": 1.990366110456975e-05, "loss": 2.3966, "step": 1807 }, { "epoch": 0.06, "grad_norm": 0.7029147148132324, "learning_rate": 1.9903513865903075e-05, "loss": 2.3999, "step": 1808 }, { "epoch": 0.06, "grad_norm": 0.6588590145111084, "learning_rate": 1.9903366515352304e-05, "loss": 2.432, "step": 1809 }, { "epoch": 0.06, "grad_norm": 0.6760753989219666, "learning_rate": 1.9903219052919103e-05, "loss": 2.3811, "step": 1810 }, { "epoch": 0.06, "grad_norm": 0.6614328026771545, "learning_rate": 1.9903071478605138e-05, "loss": 2.4046, "step": 1811 }, { "epoch": 0.06, "grad_norm": 0.7194113731384277, "learning_rate": 1.9902923792412073e-05, "loss": 2.3518, "step": 1812 }, { "epoch": 0.06, "grad_norm": 0.6925607919692993, "learning_rate": 1.990277599434158e-05, "loss": 2.4237, "step": 1813 }, { "epoch": 0.06, "grad_norm": 0.646809995174408, "learning_rate": 1.990262808439533e-05, "loss": 2.4628, "step": 1814 }, { "epoch": 0.06, "grad_norm": 0.6585157513618469, "learning_rate": 1.9902480062574986e-05, "loss": 2.3831, "step": 1815 }, { "epoch": 0.06, "grad_norm": 0.6462484002113342, "learning_rate": 1.9902331928882228e-05, "loss": 2.3346, "step": 1816 }, { "epoch": 0.06, "grad_norm": 0.6405508518218994, "learning_rate": 1.9902183683318725e-05, "loss": 2.3453, "step": 1817 }, { "epoch": 0.06, "grad_norm": 0.6426177024841309, "learning_rate": 1.9902035325886158e-05, "loss": 2.4001, "step": 1818 }, { "epoch": 0.06, "grad_norm": 0.692316472530365, "learning_rate": 1.9901886856586197e-05, "loss": 2.408, "step": 1819 }, { "epoch": 0.06, "grad_norm": 0.6814116835594177, "learning_rate": 1.990173827542052e-05, "loss": 2.414, "step": 1820 }, { "epoch": 0.06, "grad_norm": 0.7283808588981628, "learning_rate": 1.9901589582390807e-05, "loss": 2.3991, "step": 1821 }, { "epoch": 0.06, "grad_norm": 0.6647890210151672, "learning_rate": 1.990144077749874e-05, "loss": 2.3838, "step": 1822 }, { "epoch": 0.06, "grad_norm": 0.6502346396446228, "learning_rate": 1.9901291860745998e-05, "loss": 2.383, "step": 1823 }, { "epoch": 0.06, "grad_norm": 0.6670609712600708, "learning_rate": 1.9901142832134264e-05, "loss": 2.3836, "step": 1824 }, { "epoch": 0.06, "grad_norm": 0.6736798286437988, "learning_rate": 1.990099369166522e-05, "loss": 2.4128, "step": 1825 }, { "epoch": 0.06, "grad_norm": 0.6703789234161377, "learning_rate": 1.990084443934055e-05, "loss": 2.3482, "step": 1826 }, { "epoch": 0.06, "grad_norm": 0.6589123606681824, "learning_rate": 1.9900695075161943e-05, "loss": 2.4003, "step": 1827 }, { "epoch": 0.06, "grad_norm": 0.6856886744499207, "learning_rate": 1.9900545599131086e-05, "loss": 2.3682, "step": 1828 }, { "epoch": 0.06, "grad_norm": 0.6985341310501099, "learning_rate": 1.9900396011249667e-05, "loss": 2.3766, "step": 1829 }, { "epoch": 0.06, "grad_norm": 0.6431722044944763, "learning_rate": 1.9900246311519374e-05, "loss": 2.4331, "step": 1830 }, { "epoch": 0.06, "grad_norm": 0.652043879032135, "learning_rate": 1.9900096499941904e-05, "loss": 2.3688, "step": 1831 }, { "epoch": 0.06, "grad_norm": 0.6494725942611694, "learning_rate": 1.9899946576518943e-05, "loss": 2.394, "step": 1832 }, { "epoch": 0.06, "grad_norm": 0.6854714751243591, "learning_rate": 1.989979654125219e-05, "loss": 2.4537, "step": 1833 }, { "epoch": 0.06, "grad_norm": 0.6890335083007812, "learning_rate": 1.9899646394143334e-05, "loss": 2.405, "step": 1834 }, { "epoch": 0.06, "grad_norm": 0.6724991798400879, "learning_rate": 1.9899496135194075e-05, "loss": 2.4273, "step": 1835 }, { "epoch": 0.06, "grad_norm": 0.6392744183540344, "learning_rate": 1.9899345764406114e-05, "loss": 2.3527, "step": 1836 }, { "epoch": 0.06, "grad_norm": 0.637627363204956, "learning_rate": 1.9899195281781143e-05, "loss": 2.4107, "step": 1837 }, { "epoch": 0.06, "grad_norm": 0.6806239485740662, "learning_rate": 1.9899044687320867e-05, "loss": 2.4054, "step": 1838 }, { "epoch": 0.06, "grad_norm": 0.7183214426040649, "learning_rate": 1.9898893981026984e-05, "loss": 2.4416, "step": 1839 }, { "epoch": 0.06, "grad_norm": 0.6456092000007629, "learning_rate": 1.98987431629012e-05, "loss": 2.4358, "step": 1840 }, { "epoch": 0.06, "grad_norm": 0.6367050409317017, "learning_rate": 1.989859223294522e-05, "loss": 2.3597, "step": 1841 }, { "epoch": 0.06, "grad_norm": 0.6637138724327087, "learning_rate": 1.9898441191160738e-05, "loss": 2.3776, "step": 1842 }, { "epoch": 0.06, "grad_norm": 0.7038841247558594, "learning_rate": 1.9898290037549474e-05, "loss": 2.4314, "step": 1843 }, { "epoch": 0.06, "grad_norm": 0.6421082019805908, "learning_rate": 1.989813877211313e-05, "loss": 2.35, "step": 1844 }, { "epoch": 0.06, "grad_norm": 0.6555576324462891, "learning_rate": 1.989798739485341e-05, "loss": 2.4033, "step": 1845 }, { "epoch": 0.06, "grad_norm": 0.6614798307418823, "learning_rate": 1.9897835905772033e-05, "loss": 2.359, "step": 1846 }, { "epoch": 0.06, "grad_norm": 0.6740425825119019, "learning_rate": 1.989768430487071e-05, "loss": 2.3709, "step": 1847 }, { "epoch": 0.06, "grad_norm": 0.6593112349510193, "learning_rate": 1.9897532592151146e-05, "loss": 2.3757, "step": 1848 }, { "epoch": 0.06, "grad_norm": 0.645943820476532, "learning_rate": 1.989738076761506e-05, "loss": 2.3386, "step": 1849 }, { "epoch": 0.06, "grad_norm": 0.6590797901153564, "learning_rate": 1.9897228831264165e-05, "loss": 2.3417, "step": 1850 }, { "epoch": 0.06, "grad_norm": 0.6435642838478088, "learning_rate": 1.9897076783100182e-05, "loss": 2.3346, "step": 1851 }, { "epoch": 0.06, "grad_norm": 0.6621133089065552, "learning_rate": 1.9896924623124824e-05, "loss": 2.4472, "step": 1852 }, { "epoch": 0.06, "grad_norm": 0.6617000102996826, "learning_rate": 1.9896772351339813e-05, "loss": 2.3989, "step": 1853 }, { "epoch": 0.06, "grad_norm": 0.6556550860404968, "learning_rate": 1.989661996774687e-05, "loss": 2.4081, "step": 1854 }, { "epoch": 0.06, "grad_norm": 0.6734105348587036, "learning_rate": 1.9896467472347708e-05, "loss": 2.374, "step": 1855 }, { "epoch": 0.06, "grad_norm": 0.6760770678520203, "learning_rate": 1.9896314865144063e-05, "loss": 2.3653, "step": 1856 }, { "epoch": 0.06, "grad_norm": 0.6664807200431824, "learning_rate": 1.989616214613765e-05, "loss": 2.4349, "step": 1857 }, { "epoch": 0.06, "grad_norm": 0.6725401282310486, "learning_rate": 1.9896009315330195e-05, "loss": 2.3643, "step": 1858 }, { "epoch": 0.06, "grad_norm": 0.6584186553955078, "learning_rate": 1.9895856372723428e-05, "loss": 2.4042, "step": 1859 }, { "epoch": 0.06, "grad_norm": 0.6646932363510132, "learning_rate": 1.9895703318319076e-05, "loss": 2.4155, "step": 1860 }, { "epoch": 0.06, "grad_norm": 0.6928250193595886, "learning_rate": 1.9895550152118867e-05, "loss": 2.4126, "step": 1861 }, { "epoch": 0.06, "grad_norm": 0.6571219563484192, "learning_rate": 1.9895396874124532e-05, "loss": 2.3967, "step": 1862 }, { "epoch": 0.06, "grad_norm": 0.6730003952980042, "learning_rate": 1.98952434843378e-05, "loss": 2.3757, "step": 1863 }, { "epoch": 0.06, "grad_norm": 0.7038864493370056, "learning_rate": 1.989508998276041e-05, "loss": 2.3982, "step": 1864 }, { "epoch": 0.06, "grad_norm": 0.7045702338218689, "learning_rate": 1.989493636939409e-05, "loss": 2.3419, "step": 1865 }, { "epoch": 0.06, "grad_norm": 0.6351613402366638, "learning_rate": 1.9894782644240577e-05, "loss": 2.3944, "step": 1866 }, { "epoch": 0.06, "grad_norm": 0.643223226070404, "learning_rate": 1.989462880730161e-05, "loss": 2.3195, "step": 1867 }, { "epoch": 0.06, "grad_norm": 0.7200911641120911, "learning_rate": 1.9894474858578924e-05, "loss": 2.4814, "step": 1868 }, { "epoch": 0.06, "grad_norm": 0.6807271242141724, "learning_rate": 1.989432079807426e-05, "loss": 2.4219, "step": 1869 }, { "epoch": 0.06, "grad_norm": 0.7036699056625366, "learning_rate": 1.989416662578936e-05, "loss": 2.3488, "step": 1870 }, { "epoch": 0.06, "grad_norm": 0.6819643378257751, "learning_rate": 1.9894012341725965e-05, "loss": 2.4499, "step": 1871 }, { "epoch": 0.06, "grad_norm": 0.66123366355896, "learning_rate": 1.9893857945885817e-05, "loss": 2.3581, "step": 1872 }, { "epoch": 0.06, "grad_norm": 0.6614993810653687, "learning_rate": 1.9893703438270656e-05, "loss": 2.395, "step": 1873 }, { "epoch": 0.06, "grad_norm": 0.6651284098625183, "learning_rate": 1.9893548818882234e-05, "loss": 2.4768, "step": 1874 }, { "epoch": 0.06, "grad_norm": 0.7005410194396973, "learning_rate": 1.9893394087722298e-05, "loss": 2.3912, "step": 1875 }, { "epoch": 0.06, "grad_norm": 0.6791648268699646, "learning_rate": 1.9893239244792594e-05, "loss": 2.4434, "step": 1876 }, { "epoch": 0.06, "grad_norm": 0.6413533687591553, "learning_rate": 1.989308429009487e-05, "loss": 2.4116, "step": 1877 }, { "epoch": 0.06, "grad_norm": 0.6797276735305786, "learning_rate": 1.9892929223630877e-05, "loss": 2.4005, "step": 1878 }, { "epoch": 0.06, "grad_norm": 0.6793906092643738, "learning_rate": 1.9892774045402364e-05, "loss": 2.3685, "step": 1879 }, { "epoch": 0.06, "grad_norm": 0.6800909638404846, "learning_rate": 1.9892618755411093e-05, "loss": 2.3956, "step": 1880 }, { "epoch": 0.06, "grad_norm": 0.6815364956855774, "learning_rate": 1.9892463353658812e-05, "loss": 2.3914, "step": 1881 }, { "epoch": 0.06, "grad_norm": 0.6469994187355042, "learning_rate": 1.9892307840147276e-05, "loss": 2.4199, "step": 1882 }, { "epoch": 0.06, "grad_norm": 0.7118571400642395, "learning_rate": 1.9892152214878243e-05, "loss": 2.4018, "step": 1883 }, { "epoch": 0.06, "grad_norm": 0.6700411438941956, "learning_rate": 1.989199647785347e-05, "loss": 2.3576, "step": 1884 }, { "epoch": 0.06, "grad_norm": 0.6585733294487, "learning_rate": 1.989184062907472e-05, "loss": 2.3376, "step": 1885 }, { "epoch": 0.06, "grad_norm": 0.6642573475837708, "learning_rate": 1.9891684668543757e-05, "loss": 2.4196, "step": 1886 }, { "epoch": 0.06, "grad_norm": 0.6474754214286804, "learning_rate": 1.9891528596262332e-05, "loss": 2.3563, "step": 1887 }, { "epoch": 0.06, "grad_norm": 0.6807147860527039, "learning_rate": 1.9891372412232213e-05, "loss": 2.3796, "step": 1888 }, { "epoch": 0.06, "grad_norm": 0.6752489805221558, "learning_rate": 1.989121611645517e-05, "loss": 2.3887, "step": 1889 }, { "epoch": 0.06, "grad_norm": 0.6685085892677307, "learning_rate": 1.989105970893296e-05, "loss": 2.3934, "step": 1890 }, { "epoch": 0.06, "grad_norm": 0.6582318544387817, "learning_rate": 1.9890903189667354e-05, "loss": 2.3518, "step": 1891 }, { "epoch": 0.06, "grad_norm": 0.7093725204467773, "learning_rate": 1.9890746558660125e-05, "loss": 2.3663, "step": 1892 }, { "epoch": 0.06, "grad_norm": 0.6949980854988098, "learning_rate": 1.989058981591303e-05, "loss": 2.3679, "step": 1893 }, { "epoch": 0.06, "grad_norm": 0.693029522895813, "learning_rate": 1.9890432961427853e-05, "loss": 2.4068, "step": 1894 }, { "epoch": 0.06, "grad_norm": 0.6516783237457275, "learning_rate": 1.9890275995206362e-05, "loss": 2.3933, "step": 1895 }, { "epoch": 0.06, "grad_norm": 0.6877561211585999, "learning_rate": 1.9890118917250326e-05, "loss": 2.4009, "step": 1896 }, { "epoch": 0.06, "grad_norm": 0.6436954140663147, "learning_rate": 1.9889961727561523e-05, "loss": 2.3639, "step": 1897 }, { "epoch": 0.06, "grad_norm": 0.6731047630310059, "learning_rate": 1.9889804426141732e-05, "loss": 2.4678, "step": 1898 }, { "epoch": 0.06, "grad_norm": 0.7072241306304932, "learning_rate": 1.9889647012992722e-05, "loss": 2.3922, "step": 1899 }, { "epoch": 0.06, "grad_norm": 0.6704971194267273, "learning_rate": 1.988948948811628e-05, "loss": 2.3587, "step": 1900 }, { "epoch": 0.06, "grad_norm": 0.6448014974594116, "learning_rate": 1.988933185151418e-05, "loss": 2.3384, "step": 1901 }, { "epoch": 0.06, "grad_norm": 0.6710502505302429, "learning_rate": 1.9889174103188205e-05, "loss": 2.3918, "step": 1902 }, { "epoch": 0.06, "grad_norm": 0.6166720390319824, "learning_rate": 1.9889016243140133e-05, "loss": 2.4035, "step": 1903 }, { "epoch": 0.06, "grad_norm": 0.6438337564468384, "learning_rate": 1.9888858271371755e-05, "loss": 2.3678, "step": 1904 }, { "epoch": 0.06, "grad_norm": 0.6769886612892151, "learning_rate": 1.9888700187884852e-05, "loss": 2.3884, "step": 1905 }, { "epoch": 0.06, "grad_norm": 0.7140762209892273, "learning_rate": 1.9888541992681208e-05, "loss": 2.446, "step": 1906 }, { "epoch": 0.06, "grad_norm": 0.7310478091239929, "learning_rate": 1.9888383685762612e-05, "loss": 2.3836, "step": 1907 }, { "epoch": 0.06, "grad_norm": 0.726624608039856, "learning_rate": 1.9888225267130853e-05, "loss": 2.4926, "step": 1908 }, { "epoch": 0.06, "grad_norm": 0.6651246547698975, "learning_rate": 1.988806673678772e-05, "loss": 2.347, "step": 1909 }, { "epoch": 0.06, "grad_norm": 0.68213951587677, "learning_rate": 1.9887908094735002e-05, "loss": 2.4096, "step": 1910 }, { "epoch": 0.06, "grad_norm": 0.6780643463134766, "learning_rate": 1.9887749340974495e-05, "loss": 2.4552, "step": 1911 }, { "epoch": 0.06, "grad_norm": 0.6292219161987305, "learning_rate": 1.9887590475507988e-05, "loss": 2.3672, "step": 1912 }, { "epoch": 0.06, "grad_norm": 0.6707997918128967, "learning_rate": 1.9887431498337283e-05, "loss": 2.4766, "step": 1913 }, { "epoch": 0.06, "grad_norm": 0.6433650255203247, "learning_rate": 1.9887272409464165e-05, "loss": 2.4221, "step": 1914 }, { "epoch": 0.06, "grad_norm": 0.6633206009864807, "learning_rate": 1.9887113208890444e-05, "loss": 2.4195, "step": 1915 }, { "epoch": 0.06, "grad_norm": 0.6569668054580688, "learning_rate": 1.988695389661791e-05, "loss": 2.3693, "step": 1916 }, { "epoch": 0.06, "grad_norm": 0.6609025001525879, "learning_rate": 1.9886794472648367e-05, "loss": 2.376, "step": 1917 }, { "epoch": 0.06, "grad_norm": 0.6585745215415955, "learning_rate": 1.9886634936983614e-05, "loss": 2.386, "step": 1918 }, { "epoch": 0.06, "grad_norm": 0.6758257746696472, "learning_rate": 1.9886475289625452e-05, "loss": 2.3482, "step": 1919 }, { "epoch": 0.06, "grad_norm": 0.6775721311569214, "learning_rate": 1.9886315530575686e-05, "loss": 2.3974, "step": 1920 }, { "epoch": 0.06, "grad_norm": 0.6903815865516663, "learning_rate": 1.9886155659836118e-05, "loss": 2.3932, "step": 1921 }, { "epoch": 0.06, "grad_norm": 0.71864253282547, "learning_rate": 1.9885995677408564e-05, "loss": 2.3851, "step": 1922 }, { "epoch": 0.06, "grad_norm": 0.6632323265075684, "learning_rate": 1.988583558329482e-05, "loss": 2.3965, "step": 1923 }, { "epoch": 0.06, "grad_norm": 0.6756615042686462, "learning_rate": 1.9885675377496703e-05, "loss": 2.3585, "step": 1924 }, { "epoch": 0.06, "grad_norm": 0.6681207418441772, "learning_rate": 1.9885515060016017e-05, "loss": 2.299, "step": 1925 }, { "epoch": 0.06, "grad_norm": 0.6748582720756531, "learning_rate": 1.9885354630854573e-05, "loss": 2.4134, "step": 1926 }, { "epoch": 0.06, "grad_norm": 0.6963973045349121, "learning_rate": 1.988519409001419e-05, "loss": 2.4039, "step": 1927 }, { "epoch": 0.06, "grad_norm": 0.6478883624076843, "learning_rate": 1.988503343749668e-05, "loss": 2.3677, "step": 1928 }, { "epoch": 0.06, "grad_norm": 0.6691045761108398, "learning_rate": 1.9884872673303847e-05, "loss": 2.3676, "step": 1929 }, { "epoch": 0.06, "grad_norm": 0.6562191247940063, "learning_rate": 1.9884711797437518e-05, "loss": 2.4296, "step": 1930 }, { "epoch": 0.06, "grad_norm": 0.68257075548172, "learning_rate": 1.988455080989951e-05, "loss": 2.3776, "step": 1931 }, { "epoch": 0.06, "grad_norm": 0.6754991412162781, "learning_rate": 1.988438971069164e-05, "loss": 2.3978, "step": 1932 }, { "epoch": 0.06, "grad_norm": 0.702732503414154, "learning_rate": 1.9884228499815726e-05, "loss": 2.3875, "step": 1933 }, { "epoch": 0.06, "grad_norm": 0.67500239610672, "learning_rate": 1.9884067177273592e-05, "loss": 2.4485, "step": 1934 }, { "epoch": 0.06, "grad_norm": 0.718306303024292, "learning_rate": 1.988390574306706e-05, "loss": 2.3784, "step": 1935 }, { "epoch": 0.06, "grad_norm": 0.6888105273246765, "learning_rate": 1.988374419719796e-05, "loss": 2.3815, "step": 1936 }, { "epoch": 0.06, "grad_norm": 0.678712785243988, "learning_rate": 1.9883582539668104e-05, "loss": 2.4398, "step": 1937 }, { "epoch": 0.06, "grad_norm": 0.6974777579307556, "learning_rate": 1.9883420770479324e-05, "loss": 2.4074, "step": 1938 }, { "epoch": 0.06, "grad_norm": 0.6886699199676514, "learning_rate": 1.9883258889633448e-05, "loss": 2.3408, "step": 1939 }, { "epoch": 0.06, "grad_norm": 0.6882629990577698, "learning_rate": 1.9883096897132305e-05, "loss": 2.3495, "step": 1940 }, { "epoch": 0.06, "grad_norm": 0.6942282319068909, "learning_rate": 1.988293479297773e-05, "loss": 2.3103, "step": 1941 }, { "epoch": 0.06, "grad_norm": 0.6663061380386353, "learning_rate": 1.9882772577171546e-05, "loss": 2.394, "step": 1942 }, { "epoch": 0.06, "grad_norm": 0.646624743938446, "learning_rate": 1.9882610249715588e-05, "loss": 2.3551, "step": 1943 }, { "epoch": 0.06, "grad_norm": 0.7030312418937683, "learning_rate": 1.9882447810611692e-05, "loss": 2.3462, "step": 1944 }, { "epoch": 0.06, "grad_norm": 0.6428135633468628, "learning_rate": 1.9882285259861695e-05, "loss": 2.3417, "step": 1945 }, { "epoch": 0.06, "grad_norm": 0.6615855097770691, "learning_rate": 1.988212259746743e-05, "loss": 2.3529, "step": 1946 }, { "epoch": 0.06, "grad_norm": 0.6686455011367798, "learning_rate": 1.9881959823430735e-05, "loss": 2.3863, "step": 1947 }, { "epoch": 0.06, "grad_norm": 0.6516574025154114, "learning_rate": 1.9881796937753448e-05, "loss": 2.3393, "step": 1948 }, { "epoch": 0.06, "grad_norm": 0.690444827079773, "learning_rate": 1.988163394043741e-05, "loss": 2.4368, "step": 1949 }, { "epoch": 0.06, "grad_norm": 0.7215588688850403, "learning_rate": 1.9881470831484465e-05, "loss": 2.4541, "step": 1950 }, { "epoch": 0.06, "grad_norm": 0.6870972514152527, "learning_rate": 1.9881307610896453e-05, "loss": 2.3809, "step": 1951 }, { "epoch": 0.06, "grad_norm": 0.683038592338562, "learning_rate": 1.988114427867522e-05, "loss": 2.4195, "step": 1952 }, { "epoch": 0.06, "grad_norm": 0.6629478335380554, "learning_rate": 1.988098083482261e-05, "loss": 2.3159, "step": 1953 }, { "epoch": 0.07, "grad_norm": 0.6993348598480225, "learning_rate": 1.988081727934047e-05, "loss": 2.4497, "step": 1954 }, { "epoch": 0.07, "grad_norm": 0.6564309000968933, "learning_rate": 1.988065361223064e-05, "loss": 2.3749, "step": 1955 }, { "epoch": 0.07, "grad_norm": 0.6449017524719238, "learning_rate": 1.988048983349498e-05, "loss": 2.3683, "step": 1956 }, { "epoch": 0.07, "grad_norm": 0.692620038986206, "learning_rate": 1.988032594313534e-05, "loss": 2.3219, "step": 1957 }, { "epoch": 0.07, "grad_norm": 0.6503558158874512, "learning_rate": 1.9880161941153564e-05, "loss": 2.4109, "step": 1958 }, { "epoch": 0.07, "grad_norm": 0.6510140895843506, "learning_rate": 1.987999782755151e-05, "loss": 2.2771, "step": 1959 }, { "epoch": 0.07, "grad_norm": 0.6401605606079102, "learning_rate": 1.987983360233103e-05, "loss": 2.4024, "step": 1960 }, { "epoch": 0.07, "grad_norm": 0.6664703488349915, "learning_rate": 1.9879669265493984e-05, "loss": 2.3935, "step": 1961 }, { "epoch": 0.07, "grad_norm": 0.7180161476135254, "learning_rate": 1.987950481704222e-05, "loss": 2.3931, "step": 1962 }, { "epoch": 0.07, "grad_norm": 0.6274951100349426, "learning_rate": 1.9879340256977603e-05, "loss": 2.3548, "step": 1963 }, { "epoch": 0.07, "grad_norm": 0.6671566367149353, "learning_rate": 1.987917558530199e-05, "loss": 2.3166, "step": 1964 }, { "epoch": 0.07, "grad_norm": 0.6738818287849426, "learning_rate": 1.9879010802017237e-05, "loss": 2.4312, "step": 1965 }, { "epoch": 0.07, "grad_norm": 0.6993537545204163, "learning_rate": 1.9878845907125215e-05, "loss": 2.4354, "step": 1966 }, { "epoch": 0.07, "grad_norm": 0.6515253782272339, "learning_rate": 1.9878680900627776e-05, "loss": 2.3889, "step": 1967 }, { "epoch": 0.07, "grad_norm": 0.6461590528488159, "learning_rate": 1.9878515782526795e-05, "loss": 2.3306, "step": 1968 }, { "epoch": 0.07, "grad_norm": 0.64005047082901, "learning_rate": 1.987835055282413e-05, "loss": 2.3448, "step": 1969 }, { "epoch": 0.07, "grad_norm": 0.7339893579483032, "learning_rate": 1.987818521152165e-05, "loss": 2.5097, "step": 1970 }, { "epoch": 0.07, "grad_norm": 0.6623417735099792, "learning_rate": 1.987801975862122e-05, "loss": 2.3238, "step": 1971 }, { "epoch": 0.07, "grad_norm": 0.70616215467453, "learning_rate": 1.9877854194124714e-05, "loss": 2.4033, "step": 1972 }, { "epoch": 0.07, "grad_norm": 0.6597620248794556, "learning_rate": 1.9877688518034002e-05, "loss": 2.3966, "step": 1973 }, { "epoch": 0.07, "grad_norm": 0.633905827999115, "learning_rate": 1.987752273035095e-05, "loss": 2.3723, "step": 1974 }, { "epoch": 0.07, "grad_norm": 0.6773349046707153, "learning_rate": 1.987735683107744e-05, "loss": 2.3748, "step": 1975 }, { "epoch": 0.07, "grad_norm": 0.6817188858985901, "learning_rate": 1.9877190820215338e-05, "loss": 2.3489, "step": 1976 }, { "epoch": 0.07, "grad_norm": 0.6746551990509033, "learning_rate": 1.9877024697766523e-05, "loss": 2.3408, "step": 1977 }, { "epoch": 0.07, "grad_norm": 0.7102041244506836, "learning_rate": 1.987685846373287e-05, "loss": 2.3338, "step": 1978 }, { "epoch": 0.07, "grad_norm": 0.6619156002998352, "learning_rate": 1.987669211811626e-05, "loss": 2.4134, "step": 1979 }, { "epoch": 0.07, "grad_norm": 0.6872266530990601, "learning_rate": 1.9876525660918572e-05, "loss": 2.395, "step": 1980 }, { "epoch": 0.07, "grad_norm": 0.6938050389289856, "learning_rate": 1.9876359092141687e-05, "loss": 2.3238, "step": 1981 }, { "epoch": 0.07, "grad_norm": 0.6569597125053406, "learning_rate": 1.987619241178748e-05, "loss": 2.3686, "step": 1982 }, { "epoch": 0.07, "grad_norm": 0.6240250468254089, "learning_rate": 1.9876025619857843e-05, "loss": 2.3393, "step": 1983 }, { "epoch": 0.07, "grad_norm": 0.6742483377456665, "learning_rate": 1.9875858716354655e-05, "loss": 2.3847, "step": 1984 }, { "epoch": 0.07, "grad_norm": 0.6481521129608154, "learning_rate": 1.9875691701279803e-05, "loss": 2.3429, "step": 1985 }, { "epoch": 0.07, "grad_norm": 0.6890261769294739, "learning_rate": 1.9875524574635173e-05, "loss": 2.3927, "step": 1986 }, { "epoch": 0.07, "grad_norm": 0.7430751919746399, "learning_rate": 1.9875357336422657e-05, "loss": 2.3593, "step": 1987 }, { "epoch": 0.07, "grad_norm": 0.6601622700691223, "learning_rate": 1.9875189986644135e-05, "loss": 2.423, "step": 1988 }, { "epoch": 0.07, "grad_norm": 0.6857054233551025, "learning_rate": 1.987502252530151e-05, "loss": 2.3402, "step": 1989 }, { "epoch": 0.07, "grad_norm": 0.663474977016449, "learning_rate": 1.9874854952396666e-05, "loss": 2.308, "step": 1990 }, { "epoch": 0.07, "grad_norm": 0.6763882637023926, "learning_rate": 1.98746872679315e-05, "loss": 2.3502, "step": 1991 }, { "epoch": 0.07, "grad_norm": 0.6983333230018616, "learning_rate": 1.98745194719079e-05, "loss": 2.368, "step": 1992 }, { "epoch": 0.07, "grad_norm": 0.700821042060852, "learning_rate": 1.987435156432777e-05, "loss": 2.4557, "step": 1993 }, { "epoch": 0.07, "grad_norm": 0.6614567637443542, "learning_rate": 1.9874183545193e-05, "loss": 2.4166, "step": 1994 }, { "epoch": 0.07, "grad_norm": 0.6725916862487793, "learning_rate": 1.987401541450549e-05, "loss": 2.4023, "step": 1995 }, { "epoch": 0.07, "grad_norm": 0.6512778997421265, "learning_rate": 1.9873847172267146e-05, "loss": 2.3874, "step": 1996 }, { "epoch": 0.07, "grad_norm": 0.6550699472427368, "learning_rate": 1.9873678818479862e-05, "loss": 2.3988, "step": 1997 }, { "epoch": 0.07, "grad_norm": 0.6466767191886902, "learning_rate": 1.9873510353145538e-05, "loss": 2.3601, "step": 1998 }, { "epoch": 0.07, "grad_norm": 0.6573099493980408, "learning_rate": 1.9873341776266083e-05, "loss": 2.3733, "step": 1999 }, { "epoch": 0.07, "grad_norm": 0.6988899111747742, "learning_rate": 1.98731730878434e-05, "loss": 2.3703, "step": 2000 }, { "epoch": 0.07, "grad_norm": 0.6830779314041138, "learning_rate": 1.987300428787939e-05, "loss": 2.3948, "step": 2001 }, { "epoch": 0.07, "grad_norm": 0.6640918254852295, "learning_rate": 1.9872835376375966e-05, "loss": 2.3053, "step": 2002 }, { "epoch": 0.07, "grad_norm": 0.6555066108703613, "learning_rate": 1.9872666353335034e-05, "loss": 2.4064, "step": 2003 }, { "epoch": 0.07, "grad_norm": 0.6405621767044067, "learning_rate": 1.98724972187585e-05, "loss": 2.3567, "step": 2004 }, { "epoch": 0.07, "grad_norm": 0.6533565521240234, "learning_rate": 1.9872327972648282e-05, "loss": 2.3832, "step": 2005 }, { "epoch": 0.07, "grad_norm": 0.6543545126914978, "learning_rate": 1.9872158615006287e-05, "loss": 2.3483, "step": 2006 }, { "epoch": 0.07, "grad_norm": 0.7175511717796326, "learning_rate": 1.987198914583443e-05, "loss": 2.3405, "step": 2007 }, { "epoch": 0.07, "grad_norm": 0.6671443581581116, "learning_rate": 1.9871819565134626e-05, "loss": 2.3703, "step": 2008 }, { "epoch": 0.07, "grad_norm": 0.7089993357658386, "learning_rate": 1.987164987290879e-05, "loss": 2.3992, "step": 2009 }, { "epoch": 0.07, "grad_norm": 0.6659372448921204, "learning_rate": 1.987148006915884e-05, "loss": 2.3296, "step": 2010 }, { "epoch": 0.07, "grad_norm": 0.7050767540931702, "learning_rate": 1.987131015388669e-05, "loss": 2.4049, "step": 2011 }, { "epoch": 0.07, "grad_norm": 0.6699216365814209, "learning_rate": 1.9871140127094268e-05, "loss": 2.3962, "step": 2012 }, { "epoch": 0.07, "grad_norm": 0.6618857979774475, "learning_rate": 1.9870969988783483e-05, "loss": 2.387, "step": 2013 }, { "epoch": 0.07, "grad_norm": 0.7021678686141968, "learning_rate": 1.987079973895627e-05, "loss": 2.437, "step": 2014 }, { "epoch": 0.07, "grad_norm": 0.6738669872283936, "learning_rate": 1.9870629377614544e-05, "loss": 2.4115, "step": 2015 }, { "epoch": 0.07, "grad_norm": 0.674420177936554, "learning_rate": 1.9870458904760232e-05, "loss": 2.4134, "step": 2016 }, { "epoch": 0.07, "grad_norm": 0.6766080856323242, "learning_rate": 1.987028832039526e-05, "loss": 2.3895, "step": 2017 }, { "epoch": 0.07, "grad_norm": 0.6816107034683228, "learning_rate": 1.9870117624521557e-05, "loss": 2.3732, "step": 2018 }, { "epoch": 0.07, "grad_norm": 0.6705644130706787, "learning_rate": 1.9869946817141046e-05, "loss": 2.4621, "step": 2019 }, { "epoch": 0.07, "grad_norm": 0.7104842066764832, "learning_rate": 1.9869775898255665e-05, "loss": 2.4223, "step": 2020 }, { "epoch": 0.07, "grad_norm": 0.6839065551757812, "learning_rate": 1.9869604867867337e-05, "loss": 2.4159, "step": 2021 }, { "epoch": 0.07, "grad_norm": 0.7369197607040405, "learning_rate": 1.9869433725977997e-05, "loss": 2.3866, "step": 2022 }, { "epoch": 0.07, "grad_norm": 0.7267538905143738, "learning_rate": 1.9869262472589577e-05, "loss": 2.4151, "step": 2023 }, { "epoch": 0.07, "grad_norm": 0.6522248387336731, "learning_rate": 1.9869091107704018e-05, "loss": 2.3232, "step": 2024 }, { "epoch": 0.07, "grad_norm": 0.6542271375656128, "learning_rate": 1.986891963132325e-05, "loss": 2.3985, "step": 2025 }, { "epoch": 0.07, "grad_norm": 0.6832755208015442, "learning_rate": 1.986874804344921e-05, "loss": 2.428, "step": 2026 }, { "epoch": 0.07, "grad_norm": 0.6650073528289795, "learning_rate": 1.9868576344083837e-05, "loss": 2.4005, "step": 2027 }, { "epoch": 0.07, "grad_norm": 0.6918550729751587, "learning_rate": 1.986840453322908e-05, "loss": 2.3573, "step": 2028 }, { "epoch": 0.07, "grad_norm": 0.6809049248695374, "learning_rate": 1.9868232610886865e-05, "loss": 2.3346, "step": 2029 }, { "epoch": 0.07, "grad_norm": 0.6784788966178894, "learning_rate": 1.986806057705914e-05, "loss": 2.3566, "step": 2030 }, { "epoch": 0.07, "grad_norm": 0.66108638048172, "learning_rate": 1.9867888431747856e-05, "loss": 2.3627, "step": 2031 }, { "epoch": 0.07, "grad_norm": 0.6386533379554749, "learning_rate": 1.9867716174954947e-05, "loss": 2.3714, "step": 2032 }, { "epoch": 0.07, "grad_norm": 0.6785694360733032, "learning_rate": 1.9867543806682364e-05, "loss": 2.3925, "step": 2033 }, { "epoch": 0.07, "grad_norm": 0.6885417699813843, "learning_rate": 1.9867371326932057e-05, "loss": 2.4287, "step": 2034 }, { "epoch": 0.07, "grad_norm": 0.6983519196510315, "learning_rate": 1.9867198735705967e-05, "loss": 2.3495, "step": 2035 }, { "epoch": 0.07, "grad_norm": 0.673380970954895, "learning_rate": 1.9867026033006054e-05, "loss": 2.4587, "step": 2036 }, { "epoch": 0.07, "grad_norm": 0.6670863032341003, "learning_rate": 1.9866853218834258e-05, "loss": 2.3746, "step": 2037 }, { "epoch": 0.07, "grad_norm": 0.6862033605575562, "learning_rate": 1.9866680293192543e-05, "loss": 2.3832, "step": 2038 }, { "epoch": 0.07, "grad_norm": 0.6466721296310425, "learning_rate": 1.986650725608285e-05, "loss": 2.3868, "step": 2039 }, { "epoch": 0.07, "grad_norm": 0.663447380065918, "learning_rate": 1.9866334107507142e-05, "loss": 2.4216, "step": 2040 }, { "epoch": 0.07, "grad_norm": 0.6897183656692505, "learning_rate": 1.986616084746738e-05, "loss": 2.3383, "step": 2041 }, { "epoch": 0.07, "grad_norm": 0.6826854944229126, "learning_rate": 1.986598747596551e-05, "loss": 2.3601, "step": 2042 }, { "epoch": 0.07, "grad_norm": 0.6641092300415039, "learning_rate": 1.9865813993003495e-05, "loss": 2.3777, "step": 2043 }, { "epoch": 0.07, "grad_norm": 0.6822092533111572, "learning_rate": 1.986564039858329e-05, "loss": 2.3741, "step": 2044 }, { "epoch": 0.07, "grad_norm": 0.6672011613845825, "learning_rate": 1.9865466692706873e-05, "loss": 2.3549, "step": 2045 }, { "epoch": 0.07, "grad_norm": 0.6960858702659607, "learning_rate": 1.9865292875376187e-05, "loss": 2.3543, "step": 2046 }, { "epoch": 0.07, "grad_norm": 0.6726285815238953, "learning_rate": 1.9865118946593206e-05, "loss": 2.4135, "step": 2047 }, { "epoch": 0.07, "grad_norm": 0.6630895733833313, "learning_rate": 1.9864944906359894e-05, "loss": 2.398, "step": 2048 }, { "epoch": 0.07, "grad_norm": 0.6435898542404175, "learning_rate": 1.9864770754678212e-05, "loss": 2.3481, "step": 2049 }, { "epoch": 0.07, "grad_norm": 0.6689082384109497, "learning_rate": 1.9864596491550138e-05, "loss": 2.4276, "step": 2050 }, { "epoch": 0.07, "grad_norm": 0.6663751602172852, "learning_rate": 1.9864422116977628e-05, "loss": 2.4089, "step": 2051 }, { "epoch": 0.07, "grad_norm": 0.7422425150871277, "learning_rate": 1.986424763096266e-05, "loss": 2.4257, "step": 2052 }, { "epoch": 0.07, "grad_norm": 0.6590396761894226, "learning_rate": 1.9864073033507205e-05, "loss": 2.3919, "step": 2053 }, { "epoch": 0.07, "grad_norm": 0.6558582782745361, "learning_rate": 1.986389832461323e-05, "loss": 2.3787, "step": 2054 }, { "epoch": 0.07, "grad_norm": 0.6467647552490234, "learning_rate": 1.9863723504282716e-05, "loss": 2.3168, "step": 2055 }, { "epoch": 0.07, "grad_norm": 0.6688591241836548, "learning_rate": 1.9863548572517635e-05, "loss": 2.3702, "step": 2056 }, { "epoch": 0.07, "grad_norm": 0.6677448749542236, "learning_rate": 1.9863373529319962e-05, "loss": 2.3814, "step": 2057 }, { "epoch": 0.07, "grad_norm": 0.6953178644180298, "learning_rate": 1.9863198374691673e-05, "loss": 2.4052, "step": 2058 }, { "epoch": 0.07, "grad_norm": 0.6931257247924805, "learning_rate": 1.986302310863475e-05, "loss": 2.4291, "step": 2059 }, { "epoch": 0.07, "grad_norm": 0.6836228966712952, "learning_rate": 1.9862847731151174e-05, "loss": 2.3587, "step": 2060 }, { "epoch": 0.07, "grad_norm": 0.6498184204101562, "learning_rate": 1.986267224224292e-05, "loss": 2.3757, "step": 2061 }, { "epoch": 0.07, "grad_norm": 0.6620586514472961, "learning_rate": 1.986249664191198e-05, "loss": 2.4577, "step": 2062 }, { "epoch": 0.07, "grad_norm": 0.6652518510818481, "learning_rate": 1.9862320930160333e-05, "loss": 2.4013, "step": 2063 }, { "epoch": 0.07, "grad_norm": 0.6732465624809265, "learning_rate": 1.9862145106989962e-05, "loss": 2.3901, "step": 2064 }, { "epoch": 0.07, "grad_norm": 0.6599177122116089, "learning_rate": 1.9861969172402856e-05, "loss": 2.3518, "step": 2065 }, { "epoch": 0.07, "grad_norm": 0.6582318544387817, "learning_rate": 1.9861793126401e-05, "loss": 2.4471, "step": 2066 }, { "epoch": 0.07, "grad_norm": 0.6476503610610962, "learning_rate": 1.986161696898639e-05, "loss": 2.3543, "step": 2067 }, { "epoch": 0.07, "grad_norm": 0.652982234954834, "learning_rate": 1.9861440700161004e-05, "loss": 2.384, "step": 2068 }, { "epoch": 0.07, "grad_norm": 0.6581761240959167, "learning_rate": 1.9861264319926845e-05, "loss": 2.4123, "step": 2069 }, { "epoch": 0.07, "grad_norm": 0.6495481133460999, "learning_rate": 1.98610878282859e-05, "loss": 2.3554, "step": 2070 }, { "epoch": 0.07, "grad_norm": 0.6670634746551514, "learning_rate": 1.9860911225240164e-05, "loss": 2.3854, "step": 2071 }, { "epoch": 0.07, "grad_norm": 0.6399278044700623, "learning_rate": 1.9860734510791632e-05, "loss": 2.4069, "step": 2072 }, { "epoch": 0.07, "grad_norm": 0.6676241755485535, "learning_rate": 1.98605576849423e-05, "loss": 2.4118, "step": 2073 }, { "epoch": 0.07, "grad_norm": 0.6826092600822449, "learning_rate": 1.986038074769417e-05, "loss": 2.3657, "step": 2074 }, { "epoch": 0.07, "grad_norm": 0.7534124255180359, "learning_rate": 1.986020369904923e-05, "loss": 2.4276, "step": 2075 }, { "epoch": 0.07, "grad_norm": 0.6724449396133423, "learning_rate": 1.986002653900949e-05, "loss": 2.4011, "step": 2076 }, { "epoch": 0.07, "grad_norm": 0.6664377450942993, "learning_rate": 1.985984926757695e-05, "loss": 2.4104, "step": 2077 }, { "epoch": 0.07, "grad_norm": 0.6607882976531982, "learning_rate": 1.985967188475361e-05, "loss": 2.4033, "step": 2078 }, { "epoch": 0.07, "grad_norm": 0.7024486064910889, "learning_rate": 1.9859494390541476e-05, "loss": 2.4296, "step": 2079 }, { "epoch": 0.07, "grad_norm": 0.6828149557113647, "learning_rate": 1.9859316784942554e-05, "loss": 2.3237, "step": 2080 }, { "epoch": 0.07, "grad_norm": 0.6808659434318542, "learning_rate": 1.9859139067958847e-05, "loss": 2.424, "step": 2081 }, { "epoch": 0.07, "grad_norm": 0.6621743440628052, "learning_rate": 1.9858961239592367e-05, "loss": 2.3853, "step": 2082 }, { "epoch": 0.07, "grad_norm": 0.68265300989151, "learning_rate": 1.985878329984512e-05, "loss": 2.3204, "step": 2083 }, { "epoch": 0.07, "grad_norm": 0.6555649042129517, "learning_rate": 1.9858605248719116e-05, "loss": 2.3968, "step": 2084 }, { "epoch": 0.07, "grad_norm": 0.6771050095558167, "learning_rate": 1.985842708621637e-05, "loss": 2.3691, "step": 2085 }, { "epoch": 0.07, "grad_norm": 0.6494032144546509, "learning_rate": 1.9858248812338888e-05, "loss": 2.4183, "step": 2086 }, { "epoch": 0.07, "grad_norm": 0.669026792049408, "learning_rate": 1.9858070427088693e-05, "loss": 2.3452, "step": 2087 }, { "epoch": 0.07, "grad_norm": 0.7020699381828308, "learning_rate": 1.985789193046779e-05, "loss": 2.3691, "step": 2088 }, { "epoch": 0.07, "grad_norm": 0.6524288654327393, "learning_rate": 1.9857713322478206e-05, "loss": 2.3178, "step": 2089 }, { "epoch": 0.07, "grad_norm": 0.6542246341705322, "learning_rate": 1.9857534603121954e-05, "loss": 2.3738, "step": 2090 }, { "epoch": 0.07, "grad_norm": 0.668902575969696, "learning_rate": 1.985735577240105e-05, "loss": 2.3369, "step": 2091 }, { "epoch": 0.07, "grad_norm": 0.6768938899040222, "learning_rate": 1.9857176830317522e-05, "loss": 2.3875, "step": 2092 }, { "epoch": 0.07, "grad_norm": 0.6657976508140564, "learning_rate": 1.9856997776873386e-05, "loss": 2.367, "step": 2093 }, { "epoch": 0.07, "grad_norm": 0.6555323004722595, "learning_rate": 1.9856818612070665e-05, "loss": 2.3022, "step": 2094 }, { "epoch": 0.07, "grad_norm": 0.6780571937561035, "learning_rate": 1.9856639335911385e-05, "loss": 2.3329, "step": 2095 }, { "epoch": 0.07, "grad_norm": 0.6511105298995972, "learning_rate": 1.985645994839757e-05, "loss": 2.3687, "step": 2096 }, { "epoch": 0.07, "grad_norm": 0.6600292921066284, "learning_rate": 1.9856280449531244e-05, "loss": 2.349, "step": 2097 }, { "epoch": 0.07, "grad_norm": 0.6861180067062378, "learning_rate": 1.9856100839314442e-05, "loss": 2.4087, "step": 2098 }, { "epoch": 0.07, "grad_norm": 0.6916261911392212, "learning_rate": 1.9855921117749186e-05, "loss": 2.3968, "step": 2099 }, { "epoch": 0.07, "grad_norm": 0.6579020619392395, "learning_rate": 1.9855741284837508e-05, "loss": 2.3103, "step": 2100 }, { "epoch": 0.07, "grad_norm": 0.6711440682411194, "learning_rate": 1.9855561340581445e-05, "loss": 2.419, "step": 2101 }, { "epoch": 0.07, "grad_norm": 0.6653145551681519, "learning_rate": 1.9855381284983023e-05, "loss": 2.393, "step": 2102 }, { "epoch": 0.07, "grad_norm": 0.6877423524856567, "learning_rate": 1.985520111804428e-05, "loss": 2.4116, "step": 2103 }, { "epoch": 0.07, "grad_norm": 0.6493449807167053, "learning_rate": 1.985502083976725e-05, "loss": 2.3635, "step": 2104 }, { "epoch": 0.07, "grad_norm": 0.6679942607879639, "learning_rate": 1.985484045015397e-05, "loss": 2.4139, "step": 2105 }, { "epoch": 0.07, "grad_norm": 0.6926138401031494, "learning_rate": 1.985465994920648e-05, "loss": 2.3995, "step": 2106 }, { "epoch": 0.07, "grad_norm": 0.659649133682251, "learning_rate": 1.9854479336926816e-05, "loss": 2.3948, "step": 2107 }, { "epoch": 0.07, "grad_norm": 0.6499054431915283, "learning_rate": 1.985429861331702e-05, "loss": 2.3598, "step": 2108 }, { "epoch": 0.07, "grad_norm": 0.6990717649459839, "learning_rate": 1.9854117778379135e-05, "loss": 2.3509, "step": 2109 }, { "epoch": 0.07, "grad_norm": 0.715269148349762, "learning_rate": 1.9853936832115198e-05, "loss": 2.3407, "step": 2110 }, { "epoch": 0.07, "grad_norm": 0.7020423412322998, "learning_rate": 1.9853755774527262e-05, "loss": 2.2913, "step": 2111 }, { "epoch": 0.07, "grad_norm": 0.716228723526001, "learning_rate": 1.9853574605617364e-05, "loss": 2.3399, "step": 2112 }, { "epoch": 0.07, "grad_norm": 0.6657246351242065, "learning_rate": 1.985339332538756e-05, "loss": 2.3581, "step": 2113 }, { "epoch": 0.07, "grad_norm": 0.6320322155952454, "learning_rate": 1.9853211933839887e-05, "loss": 2.4297, "step": 2114 }, { "epoch": 0.07, "grad_norm": 0.6691752076148987, "learning_rate": 1.9853030430976404e-05, "loss": 2.3932, "step": 2115 }, { "epoch": 0.07, "grad_norm": 0.7007383704185486, "learning_rate": 1.985284881679916e-05, "loss": 2.3776, "step": 2116 }, { "epoch": 0.07, "grad_norm": 0.700365424156189, "learning_rate": 1.98526670913102e-05, "loss": 2.3643, "step": 2117 }, { "epoch": 0.07, "grad_norm": 0.6492392420768738, "learning_rate": 1.9852485254511584e-05, "loss": 2.3954, "step": 2118 }, { "epoch": 0.07, "grad_norm": 0.6809250116348267, "learning_rate": 1.985230330640536e-05, "loss": 2.3443, "step": 2119 }, { "epoch": 0.07, "grad_norm": 0.7044557332992554, "learning_rate": 1.985212124699359e-05, "loss": 2.4108, "step": 2120 }, { "epoch": 0.07, "grad_norm": 0.660711944103241, "learning_rate": 1.9851939076278328e-05, "loss": 2.4054, "step": 2121 }, { "epoch": 0.07, "grad_norm": 0.7104321122169495, "learning_rate": 1.9851756794261634e-05, "loss": 2.3376, "step": 2122 }, { "epoch": 0.07, "grad_norm": 0.6935300230979919, "learning_rate": 1.985157440094556e-05, "loss": 2.3494, "step": 2123 }, { "epoch": 0.07, "grad_norm": 0.6529771685600281, "learning_rate": 1.985139189633218e-05, "loss": 2.3632, "step": 2124 }, { "epoch": 0.07, "grad_norm": 0.6651171445846558, "learning_rate": 1.985120928042354e-05, "loss": 2.3846, "step": 2125 }, { "epoch": 0.07, "grad_norm": 0.683975875377655, "learning_rate": 1.9851026553221715e-05, "loss": 2.3916, "step": 2126 }, { "epoch": 0.07, "grad_norm": 0.6484038829803467, "learning_rate": 1.9850843714728763e-05, "loss": 2.3413, "step": 2127 }, { "epoch": 0.07, "grad_norm": 0.6678999066352844, "learning_rate": 1.9850660764946754e-05, "loss": 2.3454, "step": 2128 }, { "epoch": 0.07, "grad_norm": 0.6546140909194946, "learning_rate": 1.985047770387775e-05, "loss": 2.4254, "step": 2129 }, { "epoch": 0.07, "grad_norm": 0.6998836994171143, "learning_rate": 1.985029453152383e-05, "loss": 2.4132, "step": 2130 }, { "epoch": 0.07, "grad_norm": 0.7189781069755554, "learning_rate": 1.9850111247887047e-05, "loss": 2.3623, "step": 2131 }, { "epoch": 0.07, "grad_norm": 0.6607388257980347, "learning_rate": 1.984992785296948e-05, "loss": 2.3734, "step": 2132 }, { "epoch": 0.07, "grad_norm": 0.6751648187637329, "learning_rate": 1.9849744346773203e-05, "loss": 2.3753, "step": 2133 }, { "epoch": 0.07, "grad_norm": 0.6989012956619263, "learning_rate": 1.9849560729300288e-05, "loss": 2.3938, "step": 2134 }, { "epoch": 0.07, "grad_norm": 0.6420818567276001, "learning_rate": 1.9849377000552805e-05, "loss": 2.3443, "step": 2135 }, { "epoch": 0.07, "grad_norm": 0.6902161240577698, "learning_rate": 1.9849193160532836e-05, "loss": 2.3801, "step": 2136 }, { "epoch": 0.07, "grad_norm": 0.692310631275177, "learning_rate": 1.9849009209242455e-05, "loss": 2.3207, "step": 2137 }, { "epoch": 0.07, "grad_norm": 0.6986352801322937, "learning_rate": 1.9848825146683735e-05, "loss": 2.3127, "step": 2138 }, { "epoch": 0.07, "grad_norm": 0.7159571051597595, "learning_rate": 1.9848640972858768e-05, "loss": 2.372, "step": 2139 }, { "epoch": 0.07, "grad_norm": 0.7222824096679688, "learning_rate": 1.9848456687769623e-05, "loss": 2.3334, "step": 2140 }, { "epoch": 0.07, "grad_norm": 0.6626449823379517, "learning_rate": 1.9848272291418387e-05, "loss": 2.3185, "step": 2141 }, { "epoch": 0.07, "grad_norm": 0.6679739952087402, "learning_rate": 1.984808778380714e-05, "loss": 2.3761, "step": 2142 }, { "epoch": 0.07, "grad_norm": 0.682604193687439, "learning_rate": 1.984790316493797e-05, "loss": 2.3264, "step": 2143 }, { "epoch": 0.07, "grad_norm": 0.7066798806190491, "learning_rate": 1.9847718434812965e-05, "loss": 2.4053, "step": 2144 }, { "epoch": 0.07, "grad_norm": 0.6840493679046631, "learning_rate": 1.9847533593434206e-05, "loss": 2.4115, "step": 2145 }, { "epoch": 0.07, "grad_norm": 0.6960777640342712, "learning_rate": 1.984734864080378e-05, "loss": 2.3578, "step": 2146 }, { "epoch": 0.07, "grad_norm": 0.6708642244338989, "learning_rate": 1.9847163576923784e-05, "loss": 2.3484, "step": 2147 }, { "epoch": 0.07, "grad_norm": 0.655567467212677, "learning_rate": 1.9846978401796304e-05, "loss": 2.3435, "step": 2148 }, { "epoch": 0.07, "grad_norm": 0.670125424861908, "learning_rate": 1.9846793115423433e-05, "loss": 2.3615, "step": 2149 }, { "epoch": 0.07, "grad_norm": 0.7002980709075928, "learning_rate": 1.9846607717807266e-05, "loss": 2.3947, "step": 2150 }, { "epoch": 0.07, "grad_norm": 0.6629713177680969, "learning_rate": 1.9846422208949893e-05, "loss": 2.4121, "step": 2151 }, { "epoch": 0.07, "grad_norm": 0.7053492069244385, "learning_rate": 1.9846236588853414e-05, "loss": 2.4199, "step": 2152 }, { "epoch": 0.07, "grad_norm": 0.6962704062461853, "learning_rate": 1.9846050857519926e-05, "loss": 2.324, "step": 2153 }, { "epoch": 0.07, "grad_norm": 0.6892321705818176, "learning_rate": 1.984586501495152e-05, "loss": 2.4088, "step": 2154 }, { "epoch": 0.07, "grad_norm": 0.6752204298973083, "learning_rate": 1.9845679061150305e-05, "loss": 2.3328, "step": 2155 }, { "epoch": 0.07, "grad_norm": 0.6548035144805908, "learning_rate": 1.984549299611838e-05, "loss": 2.3186, "step": 2156 }, { "epoch": 0.07, "grad_norm": 0.6625932455062866, "learning_rate": 1.9845306819857844e-05, "loss": 2.3366, "step": 2157 }, { "epoch": 0.07, "grad_norm": 0.734879732131958, "learning_rate": 1.9845120532370797e-05, "loss": 2.3609, "step": 2158 }, { "epoch": 0.07, "grad_norm": 0.6960425972938538, "learning_rate": 1.9844934133659356e-05, "loss": 2.3509, "step": 2159 }, { "epoch": 0.07, "grad_norm": 0.6719722747802734, "learning_rate": 1.9844747623725615e-05, "loss": 2.3544, "step": 2160 }, { "epoch": 0.07, "grad_norm": 0.6537691354751587, "learning_rate": 1.9844561002571683e-05, "loss": 2.3312, "step": 2161 }, { "epoch": 0.07, "grad_norm": 0.6795572638511658, "learning_rate": 1.984437427019967e-05, "loss": 2.3576, "step": 2162 }, { "epoch": 0.07, "grad_norm": 0.6786391735076904, "learning_rate": 1.984418742661169e-05, "loss": 2.3089, "step": 2163 }, { "epoch": 0.07, "grad_norm": 0.6689738035202026, "learning_rate": 1.9844000471809845e-05, "loss": 2.3998, "step": 2164 }, { "epoch": 0.07, "grad_norm": 0.752162754535675, "learning_rate": 1.9843813405796257e-05, "loss": 2.4445, "step": 2165 }, { "epoch": 0.07, "grad_norm": 0.6791106462478638, "learning_rate": 1.984362622857303e-05, "loss": 2.3781, "step": 2166 }, { "epoch": 0.07, "grad_norm": 0.6738051772117615, "learning_rate": 1.9843438940142286e-05, "loss": 2.3065, "step": 2167 }, { "epoch": 0.07, "grad_norm": 0.7057000994682312, "learning_rate": 1.9843251540506133e-05, "loss": 2.4463, "step": 2168 }, { "epoch": 0.07, "grad_norm": 0.6772225499153137, "learning_rate": 1.9843064029666697e-05, "loss": 2.4504, "step": 2169 }, { "epoch": 0.07, "grad_norm": 0.677797794342041, "learning_rate": 1.9842876407626092e-05, "loss": 2.3421, "step": 2170 }, { "epoch": 0.07, "grad_norm": 0.6764008402824402, "learning_rate": 1.9842688674386436e-05, "loss": 2.3668, "step": 2171 }, { "epoch": 0.07, "grad_norm": 0.7085667848587036, "learning_rate": 1.984250082994985e-05, "loss": 2.4127, "step": 2172 }, { "epoch": 0.07, "grad_norm": 0.6663535237312317, "learning_rate": 1.9842312874318465e-05, "loss": 2.4099, "step": 2173 }, { "epoch": 0.07, "grad_norm": 0.682090699672699, "learning_rate": 1.984212480749439e-05, "loss": 2.4213, "step": 2174 }, { "epoch": 0.07, "grad_norm": 0.6791077256202698, "learning_rate": 1.984193662947976e-05, "loss": 2.387, "step": 2175 }, { "epoch": 0.07, "grad_norm": 0.6651291251182556, "learning_rate": 1.9841748340276697e-05, "loss": 2.3421, "step": 2176 }, { "epoch": 0.07, "grad_norm": 0.6477990746498108, "learning_rate": 1.9841559939887333e-05, "loss": 2.3968, "step": 2177 }, { "epoch": 0.07, "grad_norm": 0.6812819242477417, "learning_rate": 1.9841371428313784e-05, "loss": 2.3848, "step": 2178 }, { "epoch": 0.07, "grad_norm": 0.6822513341903687, "learning_rate": 1.9841182805558196e-05, "loss": 2.2675, "step": 2179 }, { "epoch": 0.07, "grad_norm": 0.6849010586738586, "learning_rate": 1.984099407162269e-05, "loss": 2.3486, "step": 2180 }, { "epoch": 0.07, "grad_norm": 0.6623086333274841, "learning_rate": 1.98408052265094e-05, "loss": 2.3179, "step": 2181 }, { "epoch": 0.07, "grad_norm": 0.6471244692802429, "learning_rate": 1.984061627022046e-05, "loss": 2.3481, "step": 2182 }, { "epoch": 0.07, "grad_norm": 0.6550038456916809, "learning_rate": 1.9840427202758005e-05, "loss": 2.3348, "step": 2183 }, { "epoch": 0.07, "grad_norm": 0.664729118347168, "learning_rate": 1.9840238024124167e-05, "loss": 2.3733, "step": 2184 }, { "epoch": 0.07, "grad_norm": 0.6552519202232361, "learning_rate": 1.984004873432109e-05, "loss": 2.3487, "step": 2185 }, { "epoch": 0.07, "grad_norm": 0.67540442943573, "learning_rate": 1.9839859333350908e-05, "loss": 2.3756, "step": 2186 }, { "epoch": 0.07, "grad_norm": 0.6946542263031006, "learning_rate": 1.9839669821215764e-05, "loss": 2.3425, "step": 2187 }, { "epoch": 0.07, "grad_norm": 0.6732617020606995, "learning_rate": 1.9839480197917797e-05, "loss": 2.3929, "step": 2188 }, { "epoch": 0.07, "grad_norm": 0.6694601774215698, "learning_rate": 1.9839290463459147e-05, "loss": 2.4033, "step": 2189 }, { "epoch": 0.07, "grad_norm": 0.6610518097877502, "learning_rate": 1.983910061784196e-05, "loss": 2.4387, "step": 2190 }, { "epoch": 0.07, "grad_norm": 0.6607776284217834, "learning_rate": 1.9838910661068383e-05, "loss": 2.3341, "step": 2191 }, { "epoch": 0.07, "grad_norm": 0.6444172859191895, "learning_rate": 1.9838720593140556e-05, "loss": 2.3956, "step": 2192 }, { "epoch": 0.07, "grad_norm": 0.680614173412323, "learning_rate": 1.983853041406063e-05, "loss": 2.3562, "step": 2193 }, { "epoch": 0.07, "grad_norm": 0.7059701681137085, "learning_rate": 1.9838340123830757e-05, "loss": 2.3543, "step": 2194 }, { "epoch": 0.07, "grad_norm": 0.6627543568611145, "learning_rate": 1.9838149722453083e-05, "loss": 2.345, "step": 2195 }, { "epoch": 0.07, "grad_norm": 0.6775937080383301, "learning_rate": 1.9837959209929757e-05, "loss": 2.3323, "step": 2196 }, { "epoch": 0.07, "grad_norm": 0.6425820589065552, "learning_rate": 1.9837768586262932e-05, "loss": 2.359, "step": 2197 }, { "epoch": 0.07, "grad_norm": 0.6489399671554565, "learning_rate": 1.9837577851454766e-05, "loss": 2.3938, "step": 2198 }, { "epoch": 0.07, "grad_norm": 0.6738085746765137, "learning_rate": 1.983738700550741e-05, "loss": 2.3928, "step": 2199 }, { "epoch": 0.07, "grad_norm": 0.7129176259040833, "learning_rate": 1.9837196048423016e-05, "loss": 2.2998, "step": 2200 }, { "epoch": 0.07, "grad_norm": 0.6612347364425659, "learning_rate": 1.9837004980203753e-05, "loss": 2.3982, "step": 2201 }, { "epoch": 0.07, "grad_norm": 0.6673231720924377, "learning_rate": 1.983681380085177e-05, "loss": 2.4097, "step": 2202 }, { "epoch": 0.07, "grad_norm": 0.6592409014701843, "learning_rate": 1.983662251036923e-05, "loss": 2.3826, "step": 2203 }, { "epoch": 0.07, "grad_norm": 0.6936072111129761, "learning_rate": 1.9836431108758295e-05, "loss": 2.3897, "step": 2204 }, { "epoch": 0.07, "grad_norm": 0.7123696208000183, "learning_rate": 1.983623959602112e-05, "loss": 2.3228, "step": 2205 }, { "epoch": 0.07, "grad_norm": 0.6704083681106567, "learning_rate": 1.9836047972159883e-05, "loss": 2.3384, "step": 2206 }, { "epoch": 0.07, "grad_norm": 0.6490381360054016, "learning_rate": 1.9835856237176737e-05, "loss": 2.3899, "step": 2207 }, { "epoch": 0.07, "grad_norm": 0.6442617774009705, "learning_rate": 1.983566439107385e-05, "loss": 2.2996, "step": 2208 }, { "epoch": 0.07, "grad_norm": 0.6636687517166138, "learning_rate": 1.9835472433853393e-05, "loss": 2.3065, "step": 2209 }, { "epoch": 0.07, "grad_norm": 0.6941110491752625, "learning_rate": 1.983528036551753e-05, "loss": 2.3928, "step": 2210 }, { "epoch": 0.07, "grad_norm": 0.6904152631759644, "learning_rate": 1.983508818606844e-05, "loss": 2.3905, "step": 2211 }, { "epoch": 0.07, "grad_norm": 0.6666905879974365, "learning_rate": 1.983489589550828e-05, "loss": 2.4326, "step": 2212 }, { "epoch": 0.07, "grad_norm": 0.6616180539131165, "learning_rate": 1.9834703493839232e-05, "loss": 2.3421, "step": 2213 }, { "epoch": 0.07, "grad_norm": 0.6803056597709656, "learning_rate": 1.983451098106347e-05, "loss": 2.3051, "step": 2214 }, { "epoch": 0.07, "grad_norm": 0.6685052514076233, "learning_rate": 1.9834318357183165e-05, "loss": 2.2906, "step": 2215 }, { "epoch": 0.07, "grad_norm": 0.6381955742835999, "learning_rate": 1.9834125622200495e-05, "loss": 2.3177, "step": 2216 }, { "epoch": 0.07, "grad_norm": 0.6845715641975403, "learning_rate": 1.9833932776117635e-05, "loss": 2.4243, "step": 2217 }, { "epoch": 0.07, "grad_norm": 0.6919439435005188, "learning_rate": 1.983373981893677e-05, "loss": 2.4294, "step": 2218 }, { "epoch": 0.07, "grad_norm": 0.6709433197975159, "learning_rate": 1.9833546750660074e-05, "loss": 2.3393, "step": 2219 }, { "epoch": 0.07, "grad_norm": 0.6831055283546448, "learning_rate": 1.983335357128973e-05, "loss": 2.3991, "step": 2220 }, { "epoch": 0.07, "grad_norm": 0.6682264804840088, "learning_rate": 1.9833160280827918e-05, "loss": 2.4037, "step": 2221 }, { "epoch": 0.07, "grad_norm": 0.6652941107749939, "learning_rate": 1.9832966879276823e-05, "loss": 2.2745, "step": 2222 }, { "epoch": 0.07, "grad_norm": 0.7240835428237915, "learning_rate": 1.9832773366638635e-05, "loss": 2.3751, "step": 2223 }, { "epoch": 0.07, "grad_norm": 0.6648626923561096, "learning_rate": 1.9832579742915535e-05, "loss": 2.3702, "step": 2224 }, { "epoch": 0.07, "grad_norm": 0.7103949189186096, "learning_rate": 1.983238600810971e-05, "loss": 2.422, "step": 2225 }, { "epoch": 0.07, "grad_norm": 0.6544563174247742, "learning_rate": 1.983219216222335e-05, "loss": 2.4306, "step": 2226 }, { "epoch": 0.07, "grad_norm": 0.7084912061691284, "learning_rate": 1.983199820525865e-05, "loss": 2.4458, "step": 2227 }, { "epoch": 0.07, "grad_norm": 0.6707584857940674, "learning_rate": 1.9831804137217792e-05, "loss": 2.3604, "step": 2228 }, { "epoch": 0.07, "grad_norm": 0.6936107277870178, "learning_rate": 1.9831609958102975e-05, "loss": 2.3785, "step": 2229 }, { "epoch": 0.07, "grad_norm": 0.6739218235015869, "learning_rate": 1.9831415667916384e-05, "loss": 2.3709, "step": 2230 }, { "epoch": 0.07, "grad_norm": 0.6616268754005432, "learning_rate": 1.9831221266660227e-05, "loss": 2.2716, "step": 2231 }, { "epoch": 0.07, "grad_norm": 0.6548470258712769, "learning_rate": 1.9831026754336692e-05, "loss": 2.3323, "step": 2232 }, { "epoch": 0.07, "grad_norm": 0.6888502836227417, "learning_rate": 1.983083213094798e-05, "loss": 2.3388, "step": 2233 }, { "epoch": 0.07, "grad_norm": 0.6601564288139343, "learning_rate": 1.983063739649629e-05, "loss": 2.3674, "step": 2234 }, { "epoch": 0.07, "grad_norm": 0.6550270318984985, "learning_rate": 1.9830442550983813e-05, "loss": 2.3844, "step": 2235 }, { "epoch": 0.07, "grad_norm": 0.6713321208953857, "learning_rate": 1.983024759441276e-05, "loss": 2.3543, "step": 2236 }, { "epoch": 0.07, "grad_norm": 0.6701964735984802, "learning_rate": 1.9830052526785332e-05, "loss": 2.4103, "step": 2237 }, { "epoch": 0.07, "grad_norm": 0.6644104719161987, "learning_rate": 1.9829857348103735e-05, "loss": 2.3459, "step": 2238 }, { "epoch": 0.07, "grad_norm": 0.627583920955658, "learning_rate": 1.9829662058370166e-05, "loss": 2.3467, "step": 2239 }, { "epoch": 0.07, "grad_norm": 0.6823845505714417, "learning_rate": 1.9829466657586836e-05, "loss": 2.3181, "step": 2240 }, { "epoch": 0.07, "grad_norm": 0.7115896344184875, "learning_rate": 1.9829271145755953e-05, "loss": 2.3261, "step": 2241 }, { "epoch": 0.07, "grad_norm": 0.6602494716644287, "learning_rate": 1.9829075522879725e-05, "loss": 2.3696, "step": 2242 }, { "epoch": 0.07, "grad_norm": 0.6751396059989929, "learning_rate": 1.9828879788960363e-05, "loss": 2.3747, "step": 2243 }, { "epoch": 0.07, "grad_norm": 0.666413426399231, "learning_rate": 1.9828683944000075e-05, "loss": 2.3838, "step": 2244 }, { "epoch": 0.07, "grad_norm": 0.687860369682312, "learning_rate": 1.9828487988001076e-05, "loss": 2.361, "step": 2245 }, { "epoch": 0.07, "grad_norm": 0.7031977772712708, "learning_rate": 1.9828291920965584e-05, "loss": 2.3181, "step": 2246 }, { "epoch": 0.07, "grad_norm": 0.725185215473175, "learning_rate": 1.9828095742895807e-05, "loss": 2.3702, "step": 2247 }, { "epoch": 0.07, "grad_norm": 0.6503520607948303, "learning_rate": 1.9827899453793965e-05, "loss": 2.4002, "step": 2248 }, { "epoch": 0.07, "grad_norm": 0.6763826608657837, "learning_rate": 1.9827703053662276e-05, "loss": 2.3354, "step": 2249 }, { "epoch": 0.07, "grad_norm": 0.676348865032196, "learning_rate": 1.9827506542502952e-05, "loss": 2.3694, "step": 2250 }, { "epoch": 0.07, "grad_norm": 0.6666421890258789, "learning_rate": 1.982730992031822e-05, "loss": 2.3091, "step": 2251 }, { "epoch": 0.07, "grad_norm": 0.7023469805717468, "learning_rate": 1.9827113187110307e-05, "loss": 2.3515, "step": 2252 }, { "epoch": 0.07, "grad_norm": 0.6576856374740601, "learning_rate": 1.9826916342881423e-05, "loss": 2.3297, "step": 2253 }, { "epoch": 0.07, "grad_norm": 0.7065723538398743, "learning_rate": 1.9826719387633796e-05, "loss": 2.3503, "step": 2254 }, { "epoch": 0.08, "grad_norm": 0.6622753143310547, "learning_rate": 1.9826522321369658e-05, "loss": 2.3695, "step": 2255 }, { "epoch": 0.08, "grad_norm": 0.6447135806083679, "learning_rate": 1.9826325144091223e-05, "loss": 2.3309, "step": 2256 }, { "epoch": 0.08, "grad_norm": 0.6541101336479187, "learning_rate": 1.9826127855800728e-05, "loss": 2.3419, "step": 2257 }, { "epoch": 0.08, "grad_norm": 0.6862423419952393, "learning_rate": 1.98259304565004e-05, "loss": 2.3309, "step": 2258 }, { "epoch": 0.08, "grad_norm": 0.6656540632247925, "learning_rate": 1.982573294619247e-05, "loss": 2.3244, "step": 2259 }, { "epoch": 0.08, "grad_norm": 0.6669071912765503, "learning_rate": 1.9825535324879163e-05, "loss": 2.2975, "step": 2260 }, { "epoch": 0.08, "grad_norm": 0.6684110760688782, "learning_rate": 1.9825337592562718e-05, "loss": 2.4088, "step": 2261 }, { "epoch": 0.08, "grad_norm": 0.6939542293548584, "learning_rate": 1.9825139749245366e-05, "loss": 2.4086, "step": 2262 }, { "epoch": 0.08, "grad_norm": 0.6607959270477295, "learning_rate": 1.9824941794929348e-05, "loss": 2.379, "step": 2263 }, { "epoch": 0.08, "grad_norm": 0.6760181188583374, "learning_rate": 1.9824743729616892e-05, "loss": 2.3516, "step": 2264 }, { "epoch": 0.08, "grad_norm": 0.671470046043396, "learning_rate": 1.982454555331024e-05, "loss": 2.285, "step": 2265 }, { "epoch": 0.08, "grad_norm": 0.6935726404190063, "learning_rate": 1.982434726601163e-05, "loss": 2.3662, "step": 2266 }, { "epoch": 0.08, "grad_norm": 0.6683235764503479, "learning_rate": 1.98241488677233e-05, "loss": 2.3068, "step": 2267 }, { "epoch": 0.08, "grad_norm": 0.6576142907142639, "learning_rate": 1.9823950358447496e-05, "loss": 2.3303, "step": 2268 }, { "epoch": 0.08, "grad_norm": 0.68270343542099, "learning_rate": 1.982375173818646e-05, "loss": 2.3367, "step": 2269 }, { "epoch": 0.08, "grad_norm": 0.6778784990310669, "learning_rate": 1.982355300694243e-05, "loss": 2.3381, "step": 2270 }, { "epoch": 0.08, "grad_norm": 0.6466098427772522, "learning_rate": 1.9823354164717657e-05, "loss": 2.3546, "step": 2271 }, { "epoch": 0.08, "grad_norm": 0.6838887333869934, "learning_rate": 1.9823155211514387e-05, "loss": 2.3914, "step": 2272 }, { "epoch": 0.08, "grad_norm": 0.6879265904426575, "learning_rate": 1.9822956147334865e-05, "loss": 2.3555, "step": 2273 }, { "epoch": 0.08, "grad_norm": 0.7021540403366089, "learning_rate": 1.982275697218134e-05, "loss": 2.3519, "step": 2274 }, { "epoch": 0.08, "grad_norm": 0.684629499912262, "learning_rate": 1.9822557686056068e-05, "loss": 2.2958, "step": 2275 }, { "epoch": 0.08, "grad_norm": 0.6549875736236572, "learning_rate": 1.982235828896129e-05, "loss": 2.3237, "step": 2276 }, { "epoch": 0.08, "grad_norm": 0.6954808235168457, "learning_rate": 1.9822158780899267e-05, "loss": 2.3542, "step": 2277 }, { "epoch": 0.08, "grad_norm": 0.6480743288993835, "learning_rate": 1.9821959161872254e-05, "loss": 2.3431, "step": 2278 }, { "epoch": 0.08, "grad_norm": 0.6996781229972839, "learning_rate": 1.9821759431882498e-05, "loss": 2.4046, "step": 2279 }, { "epoch": 0.08, "grad_norm": 0.6610148549079895, "learning_rate": 1.982155959093226e-05, "loss": 2.3877, "step": 2280 }, { "epoch": 0.08, "grad_norm": 0.6795777082443237, "learning_rate": 1.98213596390238e-05, "loss": 2.3228, "step": 2281 }, { "epoch": 0.08, "grad_norm": 0.6659331321716309, "learning_rate": 1.9821159576159373e-05, "loss": 2.3286, "step": 2282 }, { "epoch": 0.08, "grad_norm": 0.6794893145561218, "learning_rate": 1.9820959402341243e-05, "loss": 2.3709, "step": 2283 }, { "epoch": 0.08, "grad_norm": 0.6683352589607239, "learning_rate": 1.982075911757167e-05, "loss": 2.425, "step": 2284 }, { "epoch": 0.08, "grad_norm": 0.6822823882102966, "learning_rate": 1.9820558721852912e-05, "loss": 2.3288, "step": 2285 }, { "epoch": 0.08, "grad_norm": 0.7015583515167236, "learning_rate": 1.982035821518724e-05, "loss": 2.3452, "step": 2286 }, { "epoch": 0.08, "grad_norm": 0.7066489458084106, "learning_rate": 1.9820157597576916e-05, "loss": 2.418, "step": 2287 }, { "epoch": 0.08, "grad_norm": 0.7535952925682068, "learning_rate": 1.9819956869024202e-05, "loss": 2.3598, "step": 2288 }, { "epoch": 0.08, "grad_norm": 0.6653067469596863, "learning_rate": 1.9819756029531376e-05, "loss": 2.3433, "step": 2289 }, { "epoch": 0.08, "grad_norm": 0.7046130299568176, "learning_rate": 1.98195550791007e-05, "loss": 2.3948, "step": 2290 }, { "epoch": 0.08, "grad_norm": 0.7025448083877563, "learning_rate": 1.9819354017734444e-05, "loss": 2.3742, "step": 2291 }, { "epoch": 0.08, "grad_norm": 0.7082207798957825, "learning_rate": 1.9819152845434884e-05, "loss": 2.3808, "step": 2292 }, { "epoch": 0.08, "grad_norm": 0.6819546222686768, "learning_rate": 1.981895156220429e-05, "loss": 2.41, "step": 2293 }, { "epoch": 0.08, "grad_norm": 0.669355571269989, "learning_rate": 1.981875016804493e-05, "loss": 2.3912, "step": 2294 }, { "epoch": 0.08, "grad_norm": 0.6663204431533813, "learning_rate": 1.981854866295909e-05, "loss": 2.3569, "step": 2295 }, { "epoch": 0.08, "grad_norm": 0.636420488357544, "learning_rate": 1.981834704694904e-05, "loss": 2.3258, "step": 2296 }, { "epoch": 0.08, "grad_norm": 0.6867048144340515, "learning_rate": 1.981814532001706e-05, "loss": 2.3886, "step": 2297 }, { "epoch": 0.08, "grad_norm": 0.7038825750350952, "learning_rate": 1.9817943482165428e-05, "loss": 2.384, "step": 2298 }, { "epoch": 0.08, "grad_norm": 0.7206969857215881, "learning_rate": 1.981774153339642e-05, "loss": 2.4174, "step": 2299 }, { "epoch": 0.08, "grad_norm": 0.6829017996788025, "learning_rate": 1.9817539473712325e-05, "loss": 2.3915, "step": 2300 }, { "epoch": 0.08, "grad_norm": 0.6383287906646729, "learning_rate": 1.9817337303115428e-05, "loss": 2.3093, "step": 2301 }, { "epoch": 0.08, "grad_norm": 0.6981441974639893, "learning_rate": 1.9817135021608e-05, "loss": 2.3901, "step": 2302 }, { "epoch": 0.08, "grad_norm": 0.6976889371871948, "learning_rate": 1.9816932629192337e-05, "loss": 2.3326, "step": 2303 }, { "epoch": 0.08, "grad_norm": 0.6695263385772705, "learning_rate": 1.9816730125870724e-05, "loss": 2.336, "step": 2304 }, { "epoch": 0.08, "grad_norm": 0.6563782095909119, "learning_rate": 1.9816527511645444e-05, "loss": 2.2674, "step": 2305 }, { "epoch": 0.08, "grad_norm": 0.6686438918113708, "learning_rate": 1.981632478651879e-05, "loss": 2.382, "step": 2306 }, { "epoch": 0.08, "grad_norm": 0.655463695526123, "learning_rate": 1.9816121950493054e-05, "loss": 2.3732, "step": 2307 }, { "epoch": 0.08, "grad_norm": 0.6731521487236023, "learning_rate": 1.981591900357052e-05, "loss": 2.4068, "step": 2308 }, { "epoch": 0.08, "grad_norm": 0.6871774792671204, "learning_rate": 1.9815715945753494e-05, "loss": 2.3312, "step": 2309 }, { "epoch": 0.08, "grad_norm": 0.710421085357666, "learning_rate": 1.9815512777044254e-05, "loss": 2.3715, "step": 2310 }, { "epoch": 0.08, "grad_norm": 0.6659837961196899, "learning_rate": 1.9815309497445104e-05, "loss": 2.3593, "step": 2311 }, { "epoch": 0.08, "grad_norm": 0.7032288908958435, "learning_rate": 1.9815106106958346e-05, "loss": 2.4283, "step": 2312 }, { "epoch": 0.08, "grad_norm": 0.7084277868270874, "learning_rate": 1.9814902605586264e-05, "loss": 2.424, "step": 2313 }, { "epoch": 0.08, "grad_norm": 0.6944140195846558, "learning_rate": 1.981469899333117e-05, "loss": 2.4113, "step": 2314 }, { "epoch": 0.08, "grad_norm": 0.6629042029380798, "learning_rate": 1.9814495270195356e-05, "loss": 2.3446, "step": 2315 }, { "epoch": 0.08, "grad_norm": 0.6665487885475159, "learning_rate": 1.9814291436181127e-05, "loss": 2.3898, "step": 2316 }, { "epoch": 0.08, "grad_norm": 0.742047131061554, "learning_rate": 1.9814087491290787e-05, "loss": 2.3705, "step": 2317 }, { "epoch": 0.08, "grad_norm": 0.6905255913734436, "learning_rate": 1.9813883435526632e-05, "loss": 2.3752, "step": 2318 }, { "epoch": 0.08, "grad_norm": 0.65250164270401, "learning_rate": 1.9813679268890977e-05, "loss": 2.2976, "step": 2319 }, { "epoch": 0.08, "grad_norm": 0.6600178480148315, "learning_rate": 1.9813474991386126e-05, "loss": 2.3313, "step": 2320 }, { "epoch": 0.08, "grad_norm": 0.6407349705696106, "learning_rate": 1.9813270603014387e-05, "loss": 2.3562, "step": 2321 }, { "epoch": 0.08, "grad_norm": 0.6514217257499695, "learning_rate": 1.9813066103778065e-05, "loss": 2.3724, "step": 2322 }, { "epoch": 0.08, "grad_norm": 0.6543853282928467, "learning_rate": 1.9812861493679473e-05, "loss": 2.3668, "step": 2323 }, { "epoch": 0.08, "grad_norm": 0.6927496790885925, "learning_rate": 1.9812656772720925e-05, "loss": 2.3391, "step": 2324 }, { "epoch": 0.08, "grad_norm": 0.6647445559501648, "learning_rate": 1.981245194090473e-05, "loss": 2.3347, "step": 2325 }, { "epoch": 0.08, "grad_norm": 0.65156090259552, "learning_rate": 1.9812246998233203e-05, "loss": 2.3256, "step": 2326 }, { "epoch": 0.08, "grad_norm": 0.6763613224029541, "learning_rate": 1.981204194470866e-05, "loss": 2.3799, "step": 2327 }, { "epoch": 0.08, "grad_norm": 0.6693637371063232, "learning_rate": 1.9811836780333423e-05, "loss": 2.3025, "step": 2328 }, { "epoch": 0.08, "grad_norm": 0.6679176688194275, "learning_rate": 1.98116315051098e-05, "loss": 2.2816, "step": 2329 }, { "epoch": 0.08, "grad_norm": 0.6952856779098511, "learning_rate": 1.9811426119040112e-05, "loss": 2.3168, "step": 2330 }, { "epoch": 0.08, "grad_norm": 0.6682324409484863, "learning_rate": 1.9811220622126685e-05, "loss": 2.3869, "step": 2331 }, { "epoch": 0.08, "grad_norm": 0.688409686088562, "learning_rate": 1.9811015014371833e-05, "loss": 2.2884, "step": 2332 }, { "epoch": 0.08, "grad_norm": 0.6618613600730896, "learning_rate": 1.981080929577789e-05, "loss": 2.3327, "step": 2333 }, { "epoch": 0.08, "grad_norm": 0.6743007898330688, "learning_rate": 1.981060346634717e-05, "loss": 2.4113, "step": 2334 }, { "epoch": 0.08, "grad_norm": 0.6952850818634033, "learning_rate": 1.9810397526082e-05, "loss": 2.3446, "step": 2335 }, { "epoch": 0.08, "grad_norm": 0.6999621391296387, "learning_rate": 1.981019147498471e-05, "loss": 2.3243, "step": 2336 }, { "epoch": 0.08, "grad_norm": 0.6548631191253662, "learning_rate": 1.980998531305763e-05, "loss": 2.2809, "step": 2337 }, { "epoch": 0.08, "grad_norm": 0.6674143671989441, "learning_rate": 1.980977904030308e-05, "loss": 2.3572, "step": 2338 }, { "epoch": 0.08, "grad_norm": 0.6914337277412415, "learning_rate": 1.9809572656723395e-05, "loss": 2.3094, "step": 2339 }, { "epoch": 0.08, "grad_norm": 0.6847976446151733, "learning_rate": 1.9809366162320906e-05, "loss": 2.3424, "step": 2340 }, { "epoch": 0.08, "grad_norm": 0.6632505655288696, "learning_rate": 1.980915955709795e-05, "loss": 2.3189, "step": 2341 }, { "epoch": 0.08, "grad_norm": 0.7025483250617981, "learning_rate": 1.980895284105686e-05, "loss": 2.3234, "step": 2342 }, { "epoch": 0.08, "grad_norm": 0.6602748036384583, "learning_rate": 1.9808746014199967e-05, "loss": 2.3485, "step": 2343 }, { "epoch": 0.08, "grad_norm": 0.6879835724830627, "learning_rate": 1.9808539076529608e-05, "loss": 2.3745, "step": 2344 }, { "epoch": 0.08, "grad_norm": 0.6636591553688049, "learning_rate": 1.9808332028048126e-05, "loss": 2.3354, "step": 2345 }, { "epoch": 0.08, "grad_norm": 0.675483763217926, "learning_rate": 1.9808124868757855e-05, "loss": 2.3341, "step": 2346 }, { "epoch": 0.08, "grad_norm": 0.6626991629600525, "learning_rate": 1.9807917598661136e-05, "loss": 2.3837, "step": 2347 }, { "epoch": 0.08, "grad_norm": 0.6520759463310242, "learning_rate": 1.9807710217760316e-05, "loss": 2.3212, "step": 2348 }, { "epoch": 0.08, "grad_norm": 0.6476108431816101, "learning_rate": 1.980750272605773e-05, "loss": 2.2884, "step": 2349 }, { "epoch": 0.08, "grad_norm": 0.6741753816604614, "learning_rate": 1.980729512355573e-05, "loss": 2.399, "step": 2350 }, { "epoch": 0.08, "grad_norm": 0.6582635641098022, "learning_rate": 1.9807087410256657e-05, "loss": 2.3465, "step": 2351 }, { "epoch": 0.08, "grad_norm": 0.6374813318252563, "learning_rate": 1.9806879586162853e-05, "loss": 2.3512, "step": 2352 }, { "epoch": 0.08, "grad_norm": 0.6761130094528198, "learning_rate": 1.9806671651276676e-05, "loss": 2.311, "step": 2353 }, { "epoch": 0.08, "grad_norm": 0.6651429533958435, "learning_rate": 1.980646360560047e-05, "loss": 2.3808, "step": 2354 }, { "epoch": 0.08, "grad_norm": 0.7057443857192993, "learning_rate": 1.980625544913658e-05, "loss": 2.3341, "step": 2355 }, { "epoch": 0.08, "grad_norm": 0.66246098279953, "learning_rate": 1.9806047181887368e-05, "loss": 2.2941, "step": 2356 }, { "epoch": 0.08, "grad_norm": 0.7019712924957275, "learning_rate": 1.9805838803855182e-05, "loss": 2.3611, "step": 2357 }, { "epoch": 0.08, "grad_norm": 0.6965950131416321, "learning_rate": 1.9805630315042373e-05, "loss": 2.3089, "step": 2358 }, { "epoch": 0.08, "grad_norm": 0.6560185551643372, "learning_rate": 1.98054217154513e-05, "loss": 2.3538, "step": 2359 }, { "epoch": 0.08, "grad_norm": 0.7050288319587708, "learning_rate": 1.980521300508432e-05, "loss": 2.3845, "step": 2360 }, { "epoch": 0.08, "grad_norm": 0.6816173791885376, "learning_rate": 1.980500418394379e-05, "loss": 2.3627, "step": 2361 }, { "epoch": 0.08, "grad_norm": 0.6425358653068542, "learning_rate": 1.980479525203207e-05, "loss": 2.2805, "step": 2362 }, { "epoch": 0.08, "grad_norm": 0.698404848575592, "learning_rate": 1.9804586209351516e-05, "loss": 2.332, "step": 2363 }, { "epoch": 0.08, "grad_norm": 0.672673761844635, "learning_rate": 1.9804377055904496e-05, "loss": 2.3349, "step": 2364 }, { "epoch": 0.08, "grad_norm": 0.6692365407943726, "learning_rate": 1.9804167791693367e-05, "loss": 2.362, "step": 2365 }, { "epoch": 0.08, "grad_norm": 0.659717321395874, "learning_rate": 1.98039584167205e-05, "loss": 2.3975, "step": 2366 }, { "epoch": 0.08, "grad_norm": 0.6474272608757019, "learning_rate": 1.9803748930988254e-05, "loss": 2.2985, "step": 2367 }, { "epoch": 0.08, "grad_norm": 0.6747021079063416, "learning_rate": 1.9803539334498997e-05, "loss": 2.2719, "step": 2368 }, { "epoch": 0.08, "grad_norm": 0.6841328740119934, "learning_rate": 1.98033296272551e-05, "loss": 2.3732, "step": 2369 }, { "epoch": 0.08, "grad_norm": 0.7138396501541138, "learning_rate": 1.980311980925893e-05, "loss": 2.3756, "step": 2370 }, { "epoch": 0.08, "grad_norm": 0.6870377063751221, "learning_rate": 1.980290988051286e-05, "loss": 2.3642, "step": 2371 }, { "epoch": 0.08, "grad_norm": 0.6847071647644043, "learning_rate": 1.9802699841019254e-05, "loss": 2.3311, "step": 2372 }, { "epoch": 0.08, "grad_norm": 0.6537051796913147, "learning_rate": 1.9802489690780494e-05, "loss": 2.3416, "step": 2373 }, { "epoch": 0.08, "grad_norm": 0.685537576675415, "learning_rate": 1.980227942979895e-05, "loss": 2.3635, "step": 2374 }, { "epoch": 0.08, "grad_norm": 0.6559857726097107, "learning_rate": 1.9802069058076997e-05, "loss": 2.2923, "step": 2375 }, { "epoch": 0.08, "grad_norm": 0.6780763864517212, "learning_rate": 1.980185857561701e-05, "loss": 2.3519, "step": 2376 }, { "epoch": 0.08, "grad_norm": 0.6728874444961548, "learning_rate": 1.980164798242137e-05, "loss": 2.3395, "step": 2377 }, { "epoch": 0.08, "grad_norm": 0.697409451007843, "learning_rate": 1.980143727849246e-05, "loss": 2.377, "step": 2378 }, { "epoch": 0.08, "grad_norm": 0.7174190878868103, "learning_rate": 1.9801226463832654e-05, "loss": 2.3417, "step": 2379 }, { "epoch": 0.08, "grad_norm": 0.7185983061790466, "learning_rate": 1.9801015538444333e-05, "loss": 2.2838, "step": 2380 }, { "epoch": 0.08, "grad_norm": 0.7024831771850586, "learning_rate": 1.9800804502329884e-05, "loss": 2.3891, "step": 2381 }, { "epoch": 0.08, "grad_norm": 0.6656056642532349, "learning_rate": 1.9800593355491687e-05, "loss": 2.3812, "step": 2382 }, { "epoch": 0.08, "grad_norm": 0.6568113565444946, "learning_rate": 1.9800382097932135e-05, "loss": 2.3372, "step": 2383 }, { "epoch": 0.08, "grad_norm": 0.6643727421760559, "learning_rate": 1.9800170729653603e-05, "loss": 2.4087, "step": 2384 }, { "epoch": 0.08, "grad_norm": 0.6439428925514221, "learning_rate": 1.979995925065849e-05, "loss": 2.3234, "step": 2385 }, { "epoch": 0.08, "grad_norm": 0.6895545125007629, "learning_rate": 1.9799747660949182e-05, "loss": 2.4139, "step": 2386 }, { "epoch": 0.08, "grad_norm": 0.6769569516181946, "learning_rate": 1.9799535960528066e-05, "loss": 2.3634, "step": 2387 }, { "epoch": 0.08, "grad_norm": 0.7185633778572083, "learning_rate": 1.9799324149397538e-05, "loss": 2.3182, "step": 2388 }, { "epoch": 0.08, "grad_norm": 0.6852063536643982, "learning_rate": 1.9799112227559983e-05, "loss": 2.3522, "step": 2389 }, { "epoch": 0.08, "grad_norm": 0.6807916164398193, "learning_rate": 1.9798900195017804e-05, "loss": 2.3247, "step": 2390 }, { "epoch": 0.08, "grad_norm": 0.6746793985366821, "learning_rate": 1.97986880517734e-05, "loss": 2.3947, "step": 2391 }, { "epoch": 0.08, "grad_norm": 0.658911943435669, "learning_rate": 1.979847579782915e-05, "loss": 2.3334, "step": 2392 }, { "epoch": 0.08, "grad_norm": 0.6673645377159119, "learning_rate": 1.979826343318747e-05, "loss": 2.3625, "step": 2393 }, { "epoch": 0.08, "grad_norm": 0.6755536794662476, "learning_rate": 1.9798050957850748e-05, "loss": 2.3165, "step": 2394 }, { "epoch": 0.08, "grad_norm": 0.6686786413192749, "learning_rate": 1.979783837182139e-05, "loss": 2.3413, "step": 2395 }, { "epoch": 0.08, "grad_norm": 0.6808382868766785, "learning_rate": 1.97976256751018e-05, "loss": 2.3751, "step": 2396 }, { "epoch": 0.08, "grad_norm": 0.6608921885490417, "learning_rate": 1.9797412867694372e-05, "loss": 2.2984, "step": 2397 }, { "epoch": 0.08, "grad_norm": 0.6757985353469849, "learning_rate": 1.9797199949601517e-05, "loss": 2.3168, "step": 2398 }, { "epoch": 0.08, "grad_norm": 0.6643825769424438, "learning_rate": 1.979698692082564e-05, "loss": 2.2871, "step": 2399 }, { "epoch": 0.08, "grad_norm": 0.6880276799201965, "learning_rate": 1.9796773781369147e-05, "loss": 2.371, "step": 2400 }, { "epoch": 0.08, "grad_norm": 0.6854760646820068, "learning_rate": 1.979656053123444e-05, "loss": 2.3488, "step": 2401 }, { "epoch": 0.08, "grad_norm": 0.6247063875198364, "learning_rate": 1.9796347170423938e-05, "loss": 2.305, "step": 2402 }, { "epoch": 0.08, "grad_norm": 0.7004512548446655, "learning_rate": 1.9796133698940047e-05, "loss": 2.3131, "step": 2403 }, { "epoch": 0.08, "grad_norm": 0.6864848732948303, "learning_rate": 1.9795920116785175e-05, "loss": 2.3258, "step": 2404 }, { "epoch": 0.08, "grad_norm": 0.7278434634208679, "learning_rate": 1.979570642396174e-05, "loss": 2.3947, "step": 2405 }, { "epoch": 0.08, "grad_norm": 0.7325437664985657, "learning_rate": 1.9795492620472158e-05, "loss": 2.3156, "step": 2406 }, { "epoch": 0.08, "grad_norm": 0.7307333946228027, "learning_rate": 1.9795278706318836e-05, "loss": 2.3246, "step": 2407 }, { "epoch": 0.08, "grad_norm": 0.6175476312637329, "learning_rate": 1.9795064681504198e-05, "loss": 2.3598, "step": 2408 }, { "epoch": 0.08, "grad_norm": 0.6618334054946899, "learning_rate": 1.979485054603066e-05, "loss": 2.3681, "step": 2409 }, { "epoch": 0.08, "grad_norm": 0.6762998700141907, "learning_rate": 1.979463629990064e-05, "loss": 2.2989, "step": 2410 }, { "epoch": 0.08, "grad_norm": 0.6708942651748657, "learning_rate": 1.9794421943116566e-05, "loss": 2.3587, "step": 2411 }, { "epoch": 0.08, "grad_norm": 0.7319777011871338, "learning_rate": 1.9794207475680842e-05, "loss": 2.3366, "step": 2412 }, { "epoch": 0.08, "grad_norm": 0.7017005681991577, "learning_rate": 1.9793992897595907e-05, "loss": 2.4201, "step": 2413 }, { "epoch": 0.08, "grad_norm": 0.6522051692008972, "learning_rate": 1.979377820886418e-05, "loss": 2.3404, "step": 2414 }, { "epoch": 0.08, "grad_norm": 0.6675101518630981, "learning_rate": 1.979356340948809e-05, "loss": 2.4141, "step": 2415 }, { "epoch": 0.08, "grad_norm": 0.6809315085411072, "learning_rate": 1.9793348499470054e-05, "loss": 2.4456, "step": 2416 }, { "epoch": 0.08, "grad_norm": 0.6461268067359924, "learning_rate": 1.979313347881251e-05, "loss": 2.3205, "step": 2417 }, { "epoch": 0.08, "grad_norm": 0.6469259262084961, "learning_rate": 1.9792918347517883e-05, "loss": 2.3083, "step": 2418 }, { "epoch": 0.08, "grad_norm": 0.7118104696273804, "learning_rate": 1.9792703105588602e-05, "loss": 2.4228, "step": 2419 }, { "epoch": 0.08, "grad_norm": 0.6664142608642578, "learning_rate": 1.9792487753027105e-05, "loss": 2.3281, "step": 2420 }, { "epoch": 0.08, "grad_norm": 0.7148756980895996, "learning_rate": 1.9792272289835813e-05, "loss": 2.3269, "step": 2421 }, { "epoch": 0.08, "grad_norm": 0.6392363905906677, "learning_rate": 1.9792056716017173e-05, "loss": 2.3326, "step": 2422 }, { "epoch": 0.08, "grad_norm": 0.6856620907783508, "learning_rate": 1.9791841031573612e-05, "loss": 2.3162, "step": 2423 }, { "epoch": 0.08, "grad_norm": 0.6761419177055359, "learning_rate": 1.979162523650757e-05, "loss": 2.3402, "step": 2424 }, { "epoch": 0.08, "grad_norm": 0.686103105545044, "learning_rate": 1.9791409330821487e-05, "loss": 2.3455, "step": 2425 }, { "epoch": 0.08, "grad_norm": 0.6756161451339722, "learning_rate": 1.97911933145178e-05, "loss": 2.3883, "step": 2426 }, { "epoch": 0.08, "grad_norm": 0.6594185829162598, "learning_rate": 1.9790977187598944e-05, "loss": 2.3497, "step": 2427 }, { "epoch": 0.08, "grad_norm": 0.6902406215667725, "learning_rate": 1.979076095006737e-05, "loss": 2.3433, "step": 2428 }, { "epoch": 0.08, "grad_norm": 0.6560533046722412, "learning_rate": 1.9790544601925516e-05, "loss": 2.33, "step": 2429 }, { "epoch": 0.08, "grad_norm": 0.7036490440368652, "learning_rate": 1.9790328143175825e-05, "loss": 2.3057, "step": 2430 }, { "epoch": 0.08, "grad_norm": 0.7172425985336304, "learning_rate": 1.9790111573820748e-05, "loss": 2.3128, "step": 2431 }, { "epoch": 0.08, "grad_norm": 0.6538001298904419, "learning_rate": 1.9789894893862724e-05, "loss": 2.3007, "step": 2432 }, { "epoch": 0.08, "grad_norm": 0.746077299118042, "learning_rate": 1.9789678103304207e-05, "loss": 2.3522, "step": 2433 }, { "epoch": 0.08, "grad_norm": 0.641230046749115, "learning_rate": 1.9789461202147646e-05, "loss": 2.3442, "step": 2434 }, { "epoch": 0.08, "grad_norm": 0.6417468190193176, "learning_rate": 1.9789244190395487e-05, "loss": 2.3624, "step": 2435 }, { "epoch": 0.08, "grad_norm": 0.6666562557220459, "learning_rate": 1.9789027068050183e-05, "loss": 2.3395, "step": 2436 }, { "epoch": 0.08, "grad_norm": 0.6717875599861145, "learning_rate": 1.978880983511419e-05, "loss": 2.3655, "step": 2437 }, { "epoch": 0.08, "grad_norm": 0.6529743671417236, "learning_rate": 1.978859249158996e-05, "loss": 2.3807, "step": 2438 }, { "epoch": 0.08, "grad_norm": 0.6670441031455994, "learning_rate": 1.978837503747995e-05, "loss": 2.3199, "step": 2439 }, { "epoch": 0.08, "grad_norm": 0.6656957268714905, "learning_rate": 1.9788157472786612e-05, "loss": 2.3606, "step": 2440 }, { "epoch": 0.08, "grad_norm": 0.6989373564720154, "learning_rate": 1.9787939797512414e-05, "loss": 2.3375, "step": 2441 }, { "epoch": 0.08, "grad_norm": 0.6827592849731445, "learning_rate": 1.9787722011659802e-05, "loss": 2.3123, "step": 2442 }, { "epoch": 0.08, "grad_norm": 0.6697636246681213, "learning_rate": 1.9787504115231244e-05, "loss": 2.4168, "step": 2443 }, { "epoch": 0.08, "grad_norm": 0.686275839805603, "learning_rate": 1.9787286108229202e-05, "loss": 2.3603, "step": 2444 }, { "epoch": 0.08, "grad_norm": 0.6657013893127441, "learning_rate": 1.9787067990656137e-05, "loss": 2.3509, "step": 2445 }, { "epoch": 0.08, "grad_norm": 0.7105251550674438, "learning_rate": 1.978684976251451e-05, "loss": 2.3474, "step": 2446 }, { "epoch": 0.08, "grad_norm": 0.7280749082565308, "learning_rate": 1.9786631423806795e-05, "loss": 2.3462, "step": 2447 }, { "epoch": 0.08, "grad_norm": 0.6626918315887451, "learning_rate": 1.9786412974535455e-05, "loss": 2.3222, "step": 2448 }, { "epoch": 0.08, "grad_norm": 0.6755833029747009, "learning_rate": 1.9786194414702954e-05, "loss": 2.2975, "step": 2449 }, { "epoch": 0.08, "grad_norm": 0.6483421325683594, "learning_rate": 1.9785975744311762e-05, "loss": 2.3571, "step": 2450 }, { "epoch": 0.08, "grad_norm": 0.6710434556007385, "learning_rate": 1.9785756963364357e-05, "loss": 2.376, "step": 2451 }, { "epoch": 0.08, "grad_norm": 0.719467282295227, "learning_rate": 1.9785538071863196e-05, "loss": 2.3948, "step": 2452 }, { "epoch": 0.08, "grad_norm": 0.6863904595375061, "learning_rate": 1.9785319069810765e-05, "loss": 2.3777, "step": 2453 }, { "epoch": 0.08, "grad_norm": 0.6910497546195984, "learning_rate": 1.9785099957209537e-05, "loss": 2.3985, "step": 2454 }, { "epoch": 0.08, "grad_norm": 0.6693934798240662, "learning_rate": 1.978488073406198e-05, "loss": 2.3428, "step": 2455 }, { "epoch": 0.08, "grad_norm": 0.6567925214767456, "learning_rate": 1.9784661400370577e-05, "loss": 2.3877, "step": 2456 }, { "epoch": 0.08, "grad_norm": 0.6569461226463318, "learning_rate": 1.9784441956137806e-05, "loss": 2.3467, "step": 2457 }, { "epoch": 0.08, "grad_norm": 0.6679854393005371, "learning_rate": 1.978422240136614e-05, "loss": 2.3632, "step": 2458 }, { "epoch": 0.08, "grad_norm": 0.7015809416770935, "learning_rate": 1.9784002736058063e-05, "loss": 2.327, "step": 2459 }, { "epoch": 0.08, "grad_norm": 0.6517996191978455, "learning_rate": 1.978378296021606e-05, "loss": 2.3622, "step": 2460 }, { "epoch": 0.08, "grad_norm": 0.7029489278793335, "learning_rate": 1.978356307384261e-05, "loss": 2.3368, "step": 2461 }, { "epoch": 0.08, "grad_norm": 0.6573650240898132, "learning_rate": 1.97833430769402e-05, "loss": 2.335, "step": 2462 }, { "epoch": 0.08, "grad_norm": 0.6775755286216736, "learning_rate": 1.978312296951131e-05, "loss": 2.3194, "step": 2463 }, { "epoch": 0.08, "grad_norm": 0.680691123008728, "learning_rate": 1.978290275155843e-05, "loss": 2.4107, "step": 2464 }, { "epoch": 0.08, "grad_norm": 0.6899623274803162, "learning_rate": 1.9782682423084053e-05, "loss": 2.3359, "step": 2465 }, { "epoch": 0.08, "grad_norm": 0.669139564037323, "learning_rate": 1.978246198409066e-05, "loss": 2.3315, "step": 2466 }, { "epoch": 0.08, "grad_norm": 0.6696339249610901, "learning_rate": 1.9782241434580744e-05, "loss": 2.2645, "step": 2467 }, { "epoch": 0.08, "grad_norm": 0.6642321944236755, "learning_rate": 1.97820207745568e-05, "loss": 2.2725, "step": 2468 }, { "epoch": 0.08, "grad_norm": 0.7508084177970886, "learning_rate": 1.9781800004021314e-05, "loss": 2.3355, "step": 2469 }, { "epoch": 0.08, "grad_norm": 0.6830573678016663, "learning_rate": 1.9781579122976786e-05, "loss": 2.3611, "step": 2470 }, { "epoch": 0.08, "grad_norm": 0.663981020450592, "learning_rate": 1.978135813142571e-05, "loss": 2.2829, "step": 2471 }, { "epoch": 0.08, "grad_norm": 0.6628773808479309, "learning_rate": 1.978113702937058e-05, "loss": 2.3523, "step": 2472 }, { "epoch": 0.08, "grad_norm": 0.6705053448677063, "learning_rate": 1.9780915816813898e-05, "loss": 2.3284, "step": 2473 }, { "epoch": 0.08, "grad_norm": 0.7338024973869324, "learning_rate": 1.9780694493758164e-05, "loss": 2.3616, "step": 2474 }, { "epoch": 0.08, "grad_norm": 0.6585288643836975, "learning_rate": 1.9780473060205873e-05, "loss": 2.3061, "step": 2475 }, { "epoch": 0.08, "grad_norm": 0.6750043630599976, "learning_rate": 1.978025151615953e-05, "loss": 2.3577, "step": 2476 }, { "epoch": 0.08, "grad_norm": 0.7103081941604614, "learning_rate": 1.9780029861621636e-05, "loss": 2.3343, "step": 2477 }, { "epoch": 0.08, "grad_norm": 0.6971740126609802, "learning_rate": 1.9779808096594697e-05, "loss": 2.3729, "step": 2478 }, { "epoch": 0.08, "grad_norm": 0.6948923468589783, "learning_rate": 1.9779586221081218e-05, "loss": 2.3973, "step": 2479 }, { "epoch": 0.08, "grad_norm": 0.6590224504470825, "learning_rate": 1.9779364235083705e-05, "loss": 2.342, "step": 2480 }, { "epoch": 0.08, "grad_norm": 0.6810263991355896, "learning_rate": 1.9779142138604667e-05, "loss": 2.3886, "step": 2481 }, { "epoch": 0.08, "grad_norm": 0.677933931350708, "learning_rate": 1.977891993164661e-05, "loss": 2.3141, "step": 2482 }, { "epoch": 0.08, "grad_norm": 0.7265552878379822, "learning_rate": 1.9778697614212046e-05, "loss": 2.3418, "step": 2483 }, { "epoch": 0.08, "grad_norm": 0.7075479030609131, "learning_rate": 1.9778475186303493e-05, "loss": 2.3563, "step": 2484 }, { "epoch": 0.08, "grad_norm": 0.6601793766021729, "learning_rate": 1.9778252647923452e-05, "loss": 2.3481, "step": 2485 }, { "epoch": 0.08, "grad_norm": 0.6419060826301575, "learning_rate": 1.9778029999074445e-05, "loss": 2.258, "step": 2486 }, { "epoch": 0.08, "grad_norm": 0.6921791434288025, "learning_rate": 1.9777807239758986e-05, "loss": 2.2663, "step": 2487 }, { "epoch": 0.08, "grad_norm": 0.6691721677780151, "learning_rate": 1.977758436997959e-05, "loss": 2.3167, "step": 2488 }, { "epoch": 0.08, "grad_norm": 0.7009128332138062, "learning_rate": 1.977736138973878e-05, "loss": 2.3695, "step": 2489 }, { "epoch": 0.08, "grad_norm": 0.6716076731681824, "learning_rate": 1.9777138299039068e-05, "loss": 2.3607, "step": 2490 }, { "epoch": 0.08, "grad_norm": 0.6918885707855225, "learning_rate": 1.9776915097882976e-05, "loss": 2.3011, "step": 2491 }, { "epoch": 0.08, "grad_norm": 0.653325080871582, "learning_rate": 1.977669178627303e-05, "loss": 2.3295, "step": 2492 }, { "epoch": 0.08, "grad_norm": 0.7150638699531555, "learning_rate": 1.977646836421175e-05, "loss": 2.3531, "step": 2493 }, { "epoch": 0.08, "grad_norm": 0.6699737906455994, "learning_rate": 1.977624483170166e-05, "loss": 2.2952, "step": 2494 }, { "epoch": 0.08, "grad_norm": 0.6769757270812988, "learning_rate": 1.9776021188745283e-05, "loss": 2.3511, "step": 2495 }, { "epoch": 0.08, "grad_norm": 0.6839399337768555, "learning_rate": 1.9775797435345146e-05, "loss": 2.3964, "step": 2496 }, { "epoch": 0.08, "grad_norm": 0.6775975227355957, "learning_rate": 1.9775573571503782e-05, "loss": 2.307, "step": 2497 }, { "epoch": 0.08, "grad_norm": 0.6734044551849365, "learning_rate": 1.9775349597223718e-05, "loss": 2.2954, "step": 2498 }, { "epoch": 0.08, "grad_norm": 0.6867079138755798, "learning_rate": 1.977512551250748e-05, "loss": 2.3962, "step": 2499 }, { "epoch": 0.08, "grad_norm": 0.6623310446739197, "learning_rate": 1.9774901317357606e-05, "loss": 2.3888, "step": 2500 }, { "epoch": 0.08, "grad_norm": 0.6749738454818726, "learning_rate": 1.9774677011776627e-05, "loss": 2.2942, "step": 2501 }, { "epoch": 0.08, "grad_norm": 0.7118020057678223, "learning_rate": 1.977445259576707e-05, "loss": 2.3604, "step": 2502 }, { "epoch": 0.08, "grad_norm": 0.7161041498184204, "learning_rate": 1.9774228069331477e-05, "loss": 2.3489, "step": 2503 }, { "epoch": 0.08, "grad_norm": 0.6589542031288147, "learning_rate": 1.9774003432472386e-05, "loss": 2.3941, "step": 2504 }, { "epoch": 0.08, "grad_norm": 0.6774342656135559, "learning_rate": 1.9773778685192334e-05, "loss": 2.361, "step": 2505 }, { "epoch": 0.08, "grad_norm": 0.6852116584777832, "learning_rate": 1.9773553827493853e-05, "loss": 2.3543, "step": 2506 }, { "epoch": 0.08, "grad_norm": 0.6582016348838806, "learning_rate": 1.9773328859379495e-05, "loss": 2.293, "step": 2507 }, { "epoch": 0.08, "grad_norm": 0.66592937707901, "learning_rate": 1.977310378085179e-05, "loss": 2.3754, "step": 2508 }, { "epoch": 0.08, "grad_norm": 0.6909178495407104, "learning_rate": 1.977287859191329e-05, "loss": 2.2913, "step": 2509 }, { "epoch": 0.08, "grad_norm": 0.7298354506492615, "learning_rate": 1.977265329256653e-05, "loss": 2.3705, "step": 2510 }, { "epoch": 0.08, "grad_norm": 0.6798087954521179, "learning_rate": 1.9772427882814066e-05, "loss": 2.3607, "step": 2511 }, { "epoch": 0.08, "grad_norm": 0.6613409519195557, "learning_rate": 1.9772202362658436e-05, "loss": 2.3717, "step": 2512 }, { "epoch": 0.08, "grad_norm": 0.7095069885253906, "learning_rate": 1.977197673210219e-05, "loss": 2.3206, "step": 2513 }, { "epoch": 0.08, "grad_norm": 0.7083219885826111, "learning_rate": 1.977175099114788e-05, "loss": 2.3383, "step": 2514 }, { "epoch": 0.08, "grad_norm": 0.7079398036003113, "learning_rate": 1.9771525139798052e-05, "loss": 2.3359, "step": 2515 }, { "epoch": 0.08, "grad_norm": 0.7500843405723572, "learning_rate": 1.9771299178055262e-05, "loss": 2.2748, "step": 2516 }, { "epoch": 0.08, "grad_norm": 0.6314077377319336, "learning_rate": 1.977107310592206e-05, "loss": 2.3019, "step": 2517 }, { "epoch": 0.08, "grad_norm": 0.7129048109054565, "learning_rate": 1.9770846923401e-05, "loss": 2.3916, "step": 2518 }, { "epoch": 0.08, "grad_norm": 0.6690525412559509, "learning_rate": 1.9770620630494637e-05, "loss": 2.3426, "step": 2519 }, { "epoch": 0.08, "grad_norm": 0.6771161556243896, "learning_rate": 1.977039422720553e-05, "loss": 2.4115, "step": 2520 }, { "epoch": 0.08, "grad_norm": 0.6748583316802979, "learning_rate": 1.9770167713536234e-05, "loss": 2.3244, "step": 2521 }, { "epoch": 0.08, "grad_norm": 0.6719194054603577, "learning_rate": 1.976994108948931e-05, "loss": 2.309, "step": 2522 }, { "epoch": 0.08, "grad_norm": 0.6624894738197327, "learning_rate": 1.9769714355067314e-05, "loss": 2.3283, "step": 2523 }, { "epoch": 0.08, "grad_norm": 0.6610791087150574, "learning_rate": 1.9769487510272812e-05, "loss": 2.3539, "step": 2524 }, { "epoch": 0.08, "grad_norm": 0.710705041885376, "learning_rate": 1.9769260555108366e-05, "loss": 2.4005, "step": 2525 }, { "epoch": 0.08, "grad_norm": 0.6553685665130615, "learning_rate": 1.9769033489576543e-05, "loss": 2.3185, "step": 2526 }, { "epoch": 0.08, "grad_norm": 0.6929029822349548, "learning_rate": 1.97688063136799e-05, "loss": 2.3661, "step": 2527 }, { "epoch": 0.08, "grad_norm": 0.6664001941680908, "learning_rate": 1.976857902742101e-05, "loss": 2.2775, "step": 2528 }, { "epoch": 0.08, "grad_norm": 0.6698111295700073, "learning_rate": 1.976835163080244e-05, "loss": 2.3137, "step": 2529 }, { "epoch": 0.08, "grad_norm": 0.6514168977737427, "learning_rate": 1.976812412382676e-05, "loss": 2.3823, "step": 2530 }, { "epoch": 0.08, "grad_norm": 0.661277174949646, "learning_rate": 1.9767896506496534e-05, "loss": 2.29, "step": 2531 }, { "epoch": 0.08, "grad_norm": 0.6575627326965332, "learning_rate": 1.9767668778814343e-05, "loss": 2.3572, "step": 2532 }, { "epoch": 0.08, "grad_norm": 0.6911303400993347, "learning_rate": 1.976744094078275e-05, "loss": 2.3405, "step": 2533 }, { "epoch": 0.08, "grad_norm": 0.6842401027679443, "learning_rate": 1.9767212992404335e-05, "loss": 2.4345, "step": 2534 }, { "epoch": 0.08, "grad_norm": 0.6876300573348999, "learning_rate": 1.9766984933681672e-05, "loss": 2.3806, "step": 2535 }, { "epoch": 0.08, "grad_norm": 0.681178629398346, "learning_rate": 1.9766756764617338e-05, "loss": 2.2977, "step": 2536 }, { "epoch": 0.08, "grad_norm": 0.6752604246139526, "learning_rate": 1.9766528485213907e-05, "loss": 2.3436, "step": 2537 }, { "epoch": 0.08, "grad_norm": 0.6857795715332031, "learning_rate": 1.9766300095473963e-05, "loss": 2.3354, "step": 2538 }, { "epoch": 0.08, "grad_norm": 0.6532720923423767, "learning_rate": 1.9766071595400083e-05, "loss": 2.3791, "step": 2539 }, { "epoch": 0.08, "grad_norm": 0.6762163043022156, "learning_rate": 1.9765842984994853e-05, "loss": 2.3965, "step": 2540 }, { "epoch": 0.08, "grad_norm": 0.6776533126831055, "learning_rate": 1.976561426426085e-05, "loss": 2.3515, "step": 2541 }, { "epoch": 0.08, "grad_norm": 0.6732799410820007, "learning_rate": 1.976538543320066e-05, "loss": 2.3031, "step": 2542 }, { "epoch": 0.08, "grad_norm": 0.6601924300193787, "learning_rate": 1.976515649181687e-05, "loss": 2.3455, "step": 2543 }, { "epoch": 0.08, "grad_norm": 0.6769899129867554, "learning_rate": 1.976492744011206e-05, "loss": 2.3406, "step": 2544 }, { "epoch": 0.08, "grad_norm": 0.6701977252960205, "learning_rate": 1.9764698278088826e-05, "loss": 2.3313, "step": 2545 }, { "epoch": 0.08, "grad_norm": 0.6765927672386169, "learning_rate": 1.9764469005749753e-05, "loss": 2.3199, "step": 2546 }, { "epoch": 0.08, "grad_norm": 0.6569625735282898, "learning_rate": 1.9764239623097432e-05, "loss": 2.3749, "step": 2547 }, { "epoch": 0.08, "grad_norm": 0.6878440976142883, "learning_rate": 1.9764010130134456e-05, "loss": 2.3246, "step": 2548 }, { "epoch": 0.08, "grad_norm": 0.7374963164329529, "learning_rate": 1.9763780526863415e-05, "loss": 2.3752, "step": 2549 }, { "epoch": 0.08, "grad_norm": 0.6553363800048828, "learning_rate": 1.97635508132869e-05, "loss": 2.3942, "step": 2550 }, { "epoch": 0.08, "grad_norm": 0.6632681488990784, "learning_rate": 1.9763320989407516e-05, "loss": 2.3239, "step": 2551 }, { "epoch": 0.08, "grad_norm": 0.742710530757904, "learning_rate": 1.976309105522785e-05, "loss": 2.2165, "step": 2552 }, { "epoch": 0.08, "grad_norm": 0.6711102724075317, "learning_rate": 1.97628610107505e-05, "loss": 2.3137, "step": 2553 }, { "epoch": 0.08, "grad_norm": 0.6705657243728638, "learning_rate": 1.9762630855978074e-05, "loss": 2.3955, "step": 2554 }, { "epoch": 0.09, "grad_norm": 0.6922289133071899, "learning_rate": 1.976240059091316e-05, "loss": 2.3154, "step": 2555 }, { "epoch": 0.09, "grad_norm": 0.6572542190551758, "learning_rate": 1.976217021555837e-05, "loss": 2.2869, "step": 2556 }, { "epoch": 0.09, "grad_norm": 0.7043604254722595, "learning_rate": 1.97619397299163e-05, "loss": 2.3116, "step": 2557 }, { "epoch": 0.09, "grad_norm": 0.6992303729057312, "learning_rate": 1.976170913398956e-05, "loss": 2.3722, "step": 2558 }, { "epoch": 0.09, "grad_norm": 0.6854923367500305, "learning_rate": 1.9761478427780746e-05, "loss": 2.3498, "step": 2559 }, { "epoch": 0.09, "grad_norm": 0.7188247442245483, "learning_rate": 1.9761247611292472e-05, "loss": 2.3239, "step": 2560 }, { "epoch": 0.09, "grad_norm": 0.670086145401001, "learning_rate": 1.976101668452734e-05, "loss": 2.3215, "step": 2561 }, { "epoch": 0.09, "grad_norm": 0.6621863842010498, "learning_rate": 1.9760785647487965e-05, "loss": 2.2859, "step": 2562 }, { "epoch": 0.09, "grad_norm": 0.6977325677871704, "learning_rate": 1.9760554500176953e-05, "loss": 2.3643, "step": 2563 }, { "epoch": 0.09, "grad_norm": 0.6692652106285095, "learning_rate": 1.9760323242596918e-05, "loss": 2.326, "step": 2564 }, { "epoch": 0.09, "grad_norm": 0.6879720091819763, "learning_rate": 1.976009187475047e-05, "loss": 2.3995, "step": 2565 }, { "epoch": 0.09, "grad_norm": 0.7025189399719238, "learning_rate": 1.9759860396640228e-05, "loss": 2.3461, "step": 2566 }, { "epoch": 0.09, "grad_norm": 0.7029894590377808, "learning_rate": 1.9759628808268796e-05, "loss": 2.287, "step": 2567 }, { "epoch": 0.09, "grad_norm": 0.7145413160324097, "learning_rate": 1.9759397109638804e-05, "loss": 2.325, "step": 2568 }, { "epoch": 0.09, "grad_norm": 0.6734700202941895, "learning_rate": 1.9759165300752858e-05, "loss": 2.3274, "step": 2569 }, { "epoch": 0.09, "grad_norm": 0.6818303465843201, "learning_rate": 1.9758933381613583e-05, "loss": 2.4053, "step": 2570 }, { "epoch": 0.09, "grad_norm": 0.6740102767944336, "learning_rate": 1.9758701352223598e-05, "loss": 2.3826, "step": 2571 }, { "epoch": 0.09, "grad_norm": 0.6677603721618652, "learning_rate": 1.9758469212585526e-05, "loss": 2.3814, "step": 2572 }, { "epoch": 0.09, "grad_norm": 0.7409994006156921, "learning_rate": 1.9758236962701986e-05, "loss": 2.3438, "step": 2573 }, { "epoch": 0.09, "grad_norm": 0.6682050228118896, "learning_rate": 1.9758004602575604e-05, "loss": 2.3731, "step": 2574 }, { "epoch": 0.09, "grad_norm": 0.6983799934387207, "learning_rate": 1.9757772132209005e-05, "loss": 2.3769, "step": 2575 }, { "epoch": 0.09, "grad_norm": 0.667367696762085, "learning_rate": 1.9757539551604815e-05, "loss": 2.4061, "step": 2576 }, { "epoch": 0.09, "grad_norm": 0.6818742156028748, "learning_rate": 1.9757306860765663e-05, "loss": 2.2342, "step": 2577 }, { "epoch": 0.09, "grad_norm": 0.6677143573760986, "learning_rate": 1.9757074059694176e-05, "loss": 2.3649, "step": 2578 }, { "epoch": 0.09, "grad_norm": 0.6930766105651855, "learning_rate": 1.9756841148392985e-05, "loss": 2.3522, "step": 2579 }, { "epoch": 0.09, "grad_norm": 0.6598162055015564, "learning_rate": 1.9756608126864717e-05, "loss": 2.3653, "step": 2580 }, { "epoch": 0.09, "grad_norm": 0.6312028765678406, "learning_rate": 1.9756374995112014e-05, "loss": 2.3636, "step": 2581 }, { "epoch": 0.09, "grad_norm": 0.6659961342811584, "learning_rate": 1.97561417531375e-05, "loss": 2.3344, "step": 2582 }, { "epoch": 0.09, "grad_norm": 0.6427549719810486, "learning_rate": 1.975590840094381e-05, "loss": 2.3591, "step": 2583 }, { "epoch": 0.09, "grad_norm": 0.6754583716392517, "learning_rate": 1.9755674938533593e-05, "loss": 2.3293, "step": 2584 }, { "epoch": 0.09, "grad_norm": 0.6722887754440308, "learning_rate": 1.9755441365909474e-05, "loss": 2.3619, "step": 2585 }, { "epoch": 0.09, "grad_norm": 0.6678455471992493, "learning_rate": 1.9755207683074097e-05, "loss": 2.3705, "step": 2586 }, { "epoch": 0.09, "grad_norm": 0.6525406241416931, "learning_rate": 1.9754973890030097e-05, "loss": 2.3724, "step": 2587 }, { "epoch": 0.09, "grad_norm": 0.6728523373603821, "learning_rate": 1.9754739986780125e-05, "loss": 2.41, "step": 2588 }, { "epoch": 0.09, "grad_norm": 0.6756922006607056, "learning_rate": 1.9754505973326816e-05, "loss": 2.3362, "step": 2589 }, { "epoch": 0.09, "grad_norm": 0.6535354256629944, "learning_rate": 1.975427184967281e-05, "loss": 2.3035, "step": 2590 }, { "epoch": 0.09, "grad_norm": 0.6603698134422302, "learning_rate": 1.975403761582076e-05, "loss": 2.3197, "step": 2591 }, { "epoch": 0.09, "grad_norm": 0.6813551783561707, "learning_rate": 1.975380327177331e-05, "loss": 2.3345, "step": 2592 }, { "epoch": 0.09, "grad_norm": 0.6722380518913269, "learning_rate": 1.975356881753311e-05, "loss": 2.3617, "step": 2593 }, { "epoch": 0.09, "grad_norm": 0.6549121737480164, "learning_rate": 1.9753334253102802e-05, "loss": 2.3345, "step": 2594 }, { "epoch": 0.09, "grad_norm": 0.7044191360473633, "learning_rate": 1.975309957848504e-05, "loss": 2.3977, "step": 2595 }, { "epoch": 0.09, "grad_norm": 0.6986442804336548, "learning_rate": 1.9752864793682476e-05, "loss": 2.4535, "step": 2596 }, { "epoch": 0.09, "grad_norm": 0.7342276573181152, "learning_rate": 1.975262989869776e-05, "loss": 2.3951, "step": 2597 }, { "epoch": 0.09, "grad_norm": 0.644694447517395, "learning_rate": 1.9752394893533546e-05, "loss": 2.3238, "step": 2598 }, { "epoch": 0.09, "grad_norm": 0.6654594540596008, "learning_rate": 1.975215977819249e-05, "loss": 2.3113, "step": 2599 }, { "epoch": 0.09, "grad_norm": 0.6604269742965698, "learning_rate": 1.9751924552677254e-05, "loss": 2.3596, "step": 2600 }, { "epoch": 0.09, "grad_norm": 0.675030529499054, "learning_rate": 1.9751689216990484e-05, "loss": 2.2923, "step": 2601 }, { "epoch": 0.09, "grad_norm": 0.7175413370132446, "learning_rate": 1.9751453771134845e-05, "loss": 2.3172, "step": 2602 }, { "epoch": 0.09, "grad_norm": 0.6582487225532532, "learning_rate": 1.9751218215112996e-05, "loss": 2.3568, "step": 2603 }, { "epoch": 0.09, "grad_norm": 0.6553653478622437, "learning_rate": 1.97509825489276e-05, "loss": 2.3252, "step": 2604 }, { "epoch": 0.09, "grad_norm": 0.6520278453826904, "learning_rate": 1.975074677258132e-05, "loss": 2.2719, "step": 2605 }, { "epoch": 0.09, "grad_norm": 0.6835995316505432, "learning_rate": 1.975051088607681e-05, "loss": 2.2891, "step": 2606 }, { "epoch": 0.09, "grad_norm": 0.6756531596183777, "learning_rate": 1.9750274889416746e-05, "loss": 2.2676, "step": 2607 }, { "epoch": 0.09, "grad_norm": 0.6604465842247009, "learning_rate": 1.9750038782603792e-05, "loss": 2.3499, "step": 2608 }, { "epoch": 0.09, "grad_norm": 0.6575184464454651, "learning_rate": 1.9749802565640614e-05, "loss": 2.336, "step": 2609 }, { "epoch": 0.09, "grad_norm": 0.6695541143417358, "learning_rate": 1.9749566238529877e-05, "loss": 2.3244, "step": 2610 }, { "epoch": 0.09, "grad_norm": 0.6648885011672974, "learning_rate": 1.974932980127426e-05, "loss": 2.3452, "step": 2611 }, { "epoch": 0.09, "grad_norm": 0.6662595868110657, "learning_rate": 1.9749093253876426e-05, "loss": 2.2842, "step": 2612 }, { "epoch": 0.09, "grad_norm": 0.6614744663238525, "learning_rate": 1.9748856596339048e-05, "loss": 2.3319, "step": 2613 }, { "epoch": 0.09, "grad_norm": 0.6691503524780273, "learning_rate": 1.9748619828664805e-05, "loss": 2.2617, "step": 2614 }, { "epoch": 0.09, "grad_norm": 0.6675350666046143, "learning_rate": 1.9748382950856367e-05, "loss": 2.2923, "step": 2615 }, { "epoch": 0.09, "grad_norm": 0.6864911913871765, "learning_rate": 1.974814596291641e-05, "loss": 2.3235, "step": 2616 }, { "epoch": 0.09, "grad_norm": 0.6606498956680298, "learning_rate": 1.9747908864847615e-05, "loss": 2.3297, "step": 2617 }, { "epoch": 0.09, "grad_norm": 0.6963382363319397, "learning_rate": 1.9747671656652654e-05, "loss": 2.3068, "step": 2618 }, { "epoch": 0.09, "grad_norm": 0.7013143301010132, "learning_rate": 1.974743433833422e-05, "loss": 2.3221, "step": 2619 }, { "epoch": 0.09, "grad_norm": 0.6671189665794373, "learning_rate": 1.9747196909894978e-05, "loss": 2.2847, "step": 2620 }, { "epoch": 0.09, "grad_norm": 0.7117622494697571, "learning_rate": 1.9746959371337618e-05, "loss": 2.2751, "step": 2621 }, { "epoch": 0.09, "grad_norm": 0.6652538180351257, "learning_rate": 1.9746721722664827e-05, "loss": 2.3045, "step": 2622 }, { "epoch": 0.09, "grad_norm": 0.692873477935791, "learning_rate": 1.9746483963879287e-05, "loss": 2.3351, "step": 2623 }, { "epoch": 0.09, "grad_norm": 0.6478108167648315, "learning_rate": 1.974624609498368e-05, "loss": 2.306, "step": 2624 }, { "epoch": 0.09, "grad_norm": 0.6797033548355103, "learning_rate": 1.9746008115980693e-05, "loss": 2.268, "step": 2625 }, { "epoch": 0.09, "grad_norm": 0.7153365015983582, "learning_rate": 1.9745770026873026e-05, "loss": 2.429, "step": 2626 }, { "epoch": 0.09, "grad_norm": 0.6668415665626526, "learning_rate": 1.9745531827663354e-05, "loss": 2.3347, "step": 2627 }, { "epoch": 0.09, "grad_norm": 0.6554449200630188, "learning_rate": 1.974529351835438e-05, "loss": 2.3302, "step": 2628 }, { "epoch": 0.09, "grad_norm": 0.68616783618927, "learning_rate": 1.9745055098948785e-05, "loss": 2.3398, "step": 2629 }, { "epoch": 0.09, "grad_norm": 0.6684591770172119, "learning_rate": 1.9744816569449274e-05, "loss": 2.3457, "step": 2630 }, { "epoch": 0.09, "grad_norm": 0.6995184421539307, "learning_rate": 1.9744577929858537e-05, "loss": 2.4529, "step": 2631 }, { "epoch": 0.09, "grad_norm": 0.6782556772232056, "learning_rate": 1.9744339180179266e-05, "loss": 2.3126, "step": 2632 }, { "epoch": 0.09, "grad_norm": 0.6802340149879456, "learning_rate": 1.9744100320414166e-05, "loss": 2.3574, "step": 2633 }, { "epoch": 0.09, "grad_norm": 0.6853451132774353, "learning_rate": 1.9743861350565926e-05, "loss": 2.3394, "step": 2634 }, { "epoch": 0.09, "grad_norm": 0.6779966354370117, "learning_rate": 1.9743622270637254e-05, "loss": 2.3204, "step": 2635 }, { "epoch": 0.09, "grad_norm": 0.6626641154289246, "learning_rate": 1.9743383080630847e-05, "loss": 2.3263, "step": 2636 }, { "epoch": 0.09, "grad_norm": 0.6955687403678894, "learning_rate": 1.974314378054941e-05, "loss": 2.3328, "step": 2637 }, { "epoch": 0.09, "grad_norm": 0.688758134841919, "learning_rate": 1.974290437039564e-05, "loss": 2.3672, "step": 2638 }, { "epoch": 0.09, "grad_norm": 0.6622840166091919, "learning_rate": 1.974266485017225e-05, "loss": 2.317, "step": 2639 }, { "epoch": 0.09, "grad_norm": 0.693135142326355, "learning_rate": 1.974242521988194e-05, "loss": 2.3454, "step": 2640 }, { "epoch": 0.09, "grad_norm": 0.6840419173240662, "learning_rate": 1.9742185479527423e-05, "loss": 2.4075, "step": 2641 }, { "epoch": 0.09, "grad_norm": 0.673745334148407, "learning_rate": 1.9741945629111402e-05, "loss": 2.3096, "step": 2642 }, { "epoch": 0.09, "grad_norm": 0.6978880167007446, "learning_rate": 1.974170566863659e-05, "loss": 2.331, "step": 2643 }, { "epoch": 0.09, "grad_norm": 0.6693536043167114, "learning_rate": 1.9741465598105693e-05, "loss": 2.3699, "step": 2644 }, { "epoch": 0.09, "grad_norm": 0.7083197832107544, "learning_rate": 1.9741225417521427e-05, "loss": 2.3074, "step": 2645 }, { "epoch": 0.09, "grad_norm": 0.660619854927063, "learning_rate": 1.974098512688651e-05, "loss": 2.3456, "step": 2646 }, { "epoch": 0.09, "grad_norm": 0.6895096898078918, "learning_rate": 1.9740744726203646e-05, "loss": 2.3467, "step": 2647 }, { "epoch": 0.09, "grad_norm": 0.6936542391777039, "learning_rate": 1.9740504215475562e-05, "loss": 2.3278, "step": 2648 }, { "epoch": 0.09, "grad_norm": 0.6921387314796448, "learning_rate": 1.9740263594704966e-05, "loss": 2.3043, "step": 2649 }, { "epoch": 0.09, "grad_norm": 0.686497688293457, "learning_rate": 1.9740022863894582e-05, "loss": 2.2841, "step": 2650 }, { "epoch": 0.09, "grad_norm": 0.6849023103713989, "learning_rate": 1.973978202304713e-05, "loss": 2.3904, "step": 2651 }, { "epoch": 0.09, "grad_norm": 0.6534825563430786, "learning_rate": 1.9739541072165325e-05, "loss": 2.3281, "step": 2652 }, { "epoch": 0.09, "grad_norm": 0.684870183467865, "learning_rate": 1.9739300011251893e-05, "loss": 2.2653, "step": 2653 }, { "epoch": 0.09, "grad_norm": 0.6593223214149475, "learning_rate": 1.973905884030956e-05, "loss": 2.2932, "step": 2654 }, { "epoch": 0.09, "grad_norm": 0.7248606085777283, "learning_rate": 1.9738817559341044e-05, "loss": 2.3455, "step": 2655 }, { "epoch": 0.09, "grad_norm": 0.6495844721794128, "learning_rate": 1.973857616834908e-05, "loss": 2.3143, "step": 2656 }, { "epoch": 0.09, "grad_norm": 0.6471594572067261, "learning_rate": 1.9738334667336385e-05, "loss": 2.3243, "step": 2657 }, { "epoch": 0.09, "grad_norm": 0.7020214200019836, "learning_rate": 1.9738093056305693e-05, "loss": 2.3331, "step": 2658 }, { "epoch": 0.09, "grad_norm": 0.6698110699653625, "learning_rate": 1.9737851335259736e-05, "loss": 2.3643, "step": 2659 }, { "epoch": 0.09, "grad_norm": 0.6480512022972107, "learning_rate": 1.9737609504201238e-05, "loss": 2.284, "step": 2660 }, { "epoch": 0.09, "grad_norm": 0.6656991243362427, "learning_rate": 1.9737367563132936e-05, "loss": 2.2823, "step": 2661 }, { "epoch": 0.09, "grad_norm": 0.7015044689178467, "learning_rate": 1.9737125512057562e-05, "loss": 2.2916, "step": 2662 }, { "epoch": 0.09, "grad_norm": 0.692518949508667, "learning_rate": 1.973688335097785e-05, "loss": 2.3405, "step": 2663 }, { "epoch": 0.09, "grad_norm": 0.6857185959815979, "learning_rate": 1.9736641079896537e-05, "loss": 2.2824, "step": 2664 }, { "epoch": 0.09, "grad_norm": 0.6690720319747925, "learning_rate": 1.9736398698816358e-05, "loss": 2.274, "step": 2665 }, { "epoch": 0.09, "grad_norm": 0.6870160698890686, "learning_rate": 1.9736156207740054e-05, "loss": 2.4095, "step": 2666 }, { "epoch": 0.09, "grad_norm": 0.680374026298523, "learning_rate": 1.973591360667036e-05, "loss": 2.3454, "step": 2667 }, { "epoch": 0.09, "grad_norm": 0.7228624820709229, "learning_rate": 1.9735670895610025e-05, "loss": 2.4323, "step": 2668 }, { "epoch": 0.09, "grad_norm": 0.6820529103279114, "learning_rate": 1.9735428074561784e-05, "loss": 2.3229, "step": 2669 }, { "epoch": 0.09, "grad_norm": 0.6576327681541443, "learning_rate": 1.973518514352838e-05, "loss": 2.3514, "step": 2670 }, { "epoch": 0.09, "grad_norm": 0.73012375831604, "learning_rate": 1.9734942102512564e-05, "loss": 2.2817, "step": 2671 }, { "epoch": 0.09, "grad_norm": 0.7029129266738892, "learning_rate": 1.9734698951517074e-05, "loss": 2.2584, "step": 2672 }, { "epoch": 0.09, "grad_norm": 0.6998592615127563, "learning_rate": 1.973445569054466e-05, "loss": 2.366, "step": 2673 }, { "epoch": 0.09, "grad_norm": 0.6375428438186646, "learning_rate": 1.973421231959807e-05, "loss": 2.3533, "step": 2674 }, { "epoch": 0.09, "grad_norm": 0.6980307102203369, "learning_rate": 1.9733968838680057e-05, "loss": 2.3354, "step": 2675 }, { "epoch": 0.09, "grad_norm": 0.6800624132156372, "learning_rate": 1.973372524779337e-05, "loss": 2.3186, "step": 2676 }, { "epoch": 0.09, "grad_norm": 0.6651679873466492, "learning_rate": 1.9733481546940753e-05, "loss": 2.313, "step": 2677 }, { "epoch": 0.09, "grad_norm": 0.6906301975250244, "learning_rate": 1.973323773612497e-05, "loss": 2.4072, "step": 2678 }, { "epoch": 0.09, "grad_norm": 0.6820117831230164, "learning_rate": 1.973299381534877e-05, "loss": 2.3682, "step": 2679 }, { "epoch": 0.09, "grad_norm": 0.6804512143135071, "learning_rate": 1.9732749784614912e-05, "loss": 2.3221, "step": 2680 }, { "epoch": 0.09, "grad_norm": 0.6502261757850647, "learning_rate": 1.973250564392615e-05, "loss": 2.3421, "step": 2681 }, { "epoch": 0.09, "grad_norm": 0.6732829809188843, "learning_rate": 1.9732261393285245e-05, "loss": 2.3534, "step": 2682 }, { "epoch": 0.09, "grad_norm": 0.6769009828567505, "learning_rate": 1.9732017032694953e-05, "loss": 2.349, "step": 2683 }, { "epoch": 0.09, "grad_norm": 0.7115375995635986, "learning_rate": 1.9731772562158033e-05, "loss": 2.3782, "step": 2684 }, { "epoch": 0.09, "grad_norm": 0.702756941318512, "learning_rate": 1.9731527981677253e-05, "loss": 2.4245, "step": 2685 }, { "epoch": 0.09, "grad_norm": 0.6708562970161438, "learning_rate": 1.973128329125537e-05, "loss": 2.3797, "step": 2686 }, { "epoch": 0.09, "grad_norm": 0.6474363207817078, "learning_rate": 1.9731038490895155e-05, "loss": 2.337, "step": 2687 }, { "epoch": 0.09, "grad_norm": 0.7877047061920166, "learning_rate": 1.973079358059937e-05, "loss": 2.313, "step": 2688 }, { "epoch": 0.09, "grad_norm": 0.6766756176948547, "learning_rate": 1.9730548560370782e-05, "loss": 2.2859, "step": 2689 }, { "epoch": 0.09, "grad_norm": 0.6642561554908752, "learning_rate": 1.9730303430212155e-05, "loss": 2.2982, "step": 2690 }, { "epoch": 0.09, "grad_norm": 0.6470698118209839, "learning_rate": 1.973005819012627e-05, "loss": 2.3217, "step": 2691 }, { "epoch": 0.09, "grad_norm": 0.6641038060188293, "learning_rate": 1.972981284011588e-05, "loss": 2.2844, "step": 2692 }, { "epoch": 0.09, "grad_norm": 0.7244064211845398, "learning_rate": 1.9729567380183777e-05, "loss": 2.2948, "step": 2693 }, { "epoch": 0.09, "grad_norm": 0.6843678951263428, "learning_rate": 1.972932181033272e-05, "loss": 2.363, "step": 2694 }, { "epoch": 0.09, "grad_norm": 0.6606845259666443, "learning_rate": 1.9729076130565482e-05, "loss": 2.3669, "step": 2695 }, { "epoch": 0.09, "grad_norm": 0.6558694839477539, "learning_rate": 1.972883034088485e-05, "loss": 2.4103, "step": 2696 }, { "epoch": 0.09, "grad_norm": 0.6615455746650696, "learning_rate": 1.9728584441293594e-05, "loss": 2.3222, "step": 2697 }, { "epoch": 0.09, "grad_norm": 0.6603631377220154, "learning_rate": 1.9728338431794485e-05, "loss": 2.3586, "step": 2698 }, { "epoch": 0.09, "grad_norm": 0.6658846139907837, "learning_rate": 1.9728092312390318e-05, "loss": 2.2742, "step": 2699 }, { "epoch": 0.09, "grad_norm": 0.6689095497131348, "learning_rate": 1.9727846083083863e-05, "loss": 2.2687, "step": 2700 }, { "epoch": 0.09, "grad_norm": 0.6557187438011169, "learning_rate": 1.97275997438779e-05, "loss": 2.318, "step": 2701 }, { "epoch": 0.09, "grad_norm": 0.6632029414176941, "learning_rate": 1.9727353294775223e-05, "loss": 2.3758, "step": 2702 }, { "epoch": 0.09, "grad_norm": 0.6535089612007141, "learning_rate": 1.9727106735778604e-05, "loss": 2.3527, "step": 2703 }, { "epoch": 0.09, "grad_norm": 0.6508504152297974, "learning_rate": 1.972686006689084e-05, "loss": 2.3167, "step": 2704 }, { "epoch": 0.09, "grad_norm": 0.6619318723678589, "learning_rate": 1.9726613288114707e-05, "loss": 2.3414, "step": 2705 }, { "epoch": 0.09, "grad_norm": 0.6698011755943298, "learning_rate": 1.9726366399452998e-05, "loss": 2.3451, "step": 2706 }, { "epoch": 0.09, "grad_norm": 0.6627507209777832, "learning_rate": 1.97261194009085e-05, "loss": 2.3602, "step": 2707 }, { "epoch": 0.09, "grad_norm": 0.6537807583808899, "learning_rate": 1.9725872292484004e-05, "loss": 2.313, "step": 2708 }, { "epoch": 0.09, "grad_norm": 0.6808216571807861, "learning_rate": 1.9725625074182307e-05, "loss": 2.3337, "step": 2709 }, { "epoch": 0.09, "grad_norm": 0.7131608724594116, "learning_rate": 1.9725377746006196e-05, "loss": 2.3667, "step": 2710 }, { "epoch": 0.09, "grad_norm": 0.6554555296897888, "learning_rate": 1.9725130307958466e-05, "loss": 2.3328, "step": 2711 }, { "epoch": 0.09, "grad_norm": 0.6939631104469299, "learning_rate": 1.9724882760041914e-05, "loss": 2.2983, "step": 2712 }, { "epoch": 0.09, "grad_norm": 0.6824575066566467, "learning_rate": 1.9724635102259337e-05, "loss": 2.3407, "step": 2713 }, { "epoch": 0.09, "grad_norm": 0.6878491044044495, "learning_rate": 1.972438733461353e-05, "loss": 2.3289, "step": 2714 }, { "epoch": 0.09, "grad_norm": 0.7263282537460327, "learning_rate": 1.9724139457107293e-05, "loss": 2.378, "step": 2715 }, { "epoch": 0.09, "grad_norm": 0.6850353479385376, "learning_rate": 1.972389146974343e-05, "loss": 2.317, "step": 2716 }, { "epoch": 0.09, "grad_norm": 0.6950661540031433, "learning_rate": 1.9723643372524737e-05, "loss": 2.3665, "step": 2717 }, { "epoch": 0.09, "grad_norm": 0.6544163823127747, "learning_rate": 1.9723395165454023e-05, "loss": 2.2455, "step": 2718 }, { "epoch": 0.09, "grad_norm": 0.6603690981864929, "learning_rate": 1.9723146848534086e-05, "loss": 2.3369, "step": 2719 }, { "epoch": 0.09, "grad_norm": 0.7105359435081482, "learning_rate": 1.972289842176774e-05, "loss": 2.2781, "step": 2720 }, { "epoch": 0.09, "grad_norm": 0.6735163927078247, "learning_rate": 1.972264988515778e-05, "loss": 2.3244, "step": 2721 }, { "epoch": 0.09, "grad_norm": 0.679388701915741, "learning_rate": 1.972240123870702e-05, "loss": 2.3253, "step": 2722 }, { "epoch": 0.09, "grad_norm": 0.7158270478248596, "learning_rate": 1.972215248241827e-05, "loss": 2.3268, "step": 2723 }, { "epoch": 0.09, "grad_norm": 0.6961209177970886, "learning_rate": 1.972190361629434e-05, "loss": 2.3232, "step": 2724 }, { "epoch": 0.09, "grad_norm": 0.6575139164924622, "learning_rate": 1.972165464033804e-05, "loss": 2.3428, "step": 2725 }, { "epoch": 0.09, "grad_norm": 0.6724652051925659, "learning_rate": 1.9721405554552184e-05, "loss": 2.3171, "step": 2726 }, { "epoch": 0.09, "grad_norm": 0.7010536789894104, "learning_rate": 1.9721156358939583e-05, "loss": 2.3046, "step": 2727 }, { "epoch": 0.09, "grad_norm": 0.676328718662262, "learning_rate": 1.9720907053503055e-05, "loss": 2.325, "step": 2728 }, { "epoch": 0.09, "grad_norm": 0.6754179000854492, "learning_rate": 1.9720657638245416e-05, "loss": 2.4289, "step": 2729 }, { "epoch": 0.09, "grad_norm": 0.6611103415489197, "learning_rate": 1.9720408113169484e-05, "loss": 2.2179, "step": 2730 }, { "epoch": 0.09, "grad_norm": 0.6659634709358215, "learning_rate": 1.972015847827808e-05, "loss": 2.2713, "step": 2731 }, { "epoch": 0.09, "grad_norm": 0.658402144908905, "learning_rate": 1.971990873357402e-05, "loss": 2.3174, "step": 2732 }, { "epoch": 0.09, "grad_norm": 0.6764083504676819, "learning_rate": 1.971965887906013e-05, "loss": 2.2581, "step": 2733 }, { "epoch": 0.09, "grad_norm": 0.6838564872741699, "learning_rate": 1.971940891473923e-05, "loss": 2.3845, "step": 2734 }, { "epoch": 0.09, "grad_norm": 0.664307177066803, "learning_rate": 1.971915884061414e-05, "loss": 2.3254, "step": 2735 }, { "epoch": 0.09, "grad_norm": 0.6847516298294067, "learning_rate": 1.9718908656687694e-05, "loss": 2.3191, "step": 2736 }, { "epoch": 0.09, "grad_norm": 0.6980885863304138, "learning_rate": 1.9718658362962708e-05, "loss": 2.3384, "step": 2737 }, { "epoch": 0.09, "grad_norm": 0.7042725682258606, "learning_rate": 1.9718407959442022e-05, "loss": 2.3456, "step": 2738 }, { "epoch": 0.09, "grad_norm": 0.684991717338562, "learning_rate": 1.9718157446128458e-05, "loss": 2.3108, "step": 2739 }, { "epoch": 0.09, "grad_norm": 0.64825439453125, "learning_rate": 1.971790682302484e-05, "loss": 2.2857, "step": 2740 }, { "epoch": 0.09, "grad_norm": 0.6802736520767212, "learning_rate": 1.971765609013401e-05, "loss": 2.2591, "step": 2741 }, { "epoch": 0.09, "grad_norm": 0.6619553565979004, "learning_rate": 1.9717405247458797e-05, "loss": 2.3043, "step": 2742 }, { "epoch": 0.09, "grad_norm": 0.7146410942077637, "learning_rate": 1.9717154295002035e-05, "loss": 2.3555, "step": 2743 }, { "epoch": 0.09, "grad_norm": 0.6785635948181152, "learning_rate": 1.971690323276656e-05, "loss": 2.2608, "step": 2744 }, { "epoch": 0.09, "grad_norm": 0.6665732860565186, "learning_rate": 1.9716652060755206e-05, "loss": 2.4102, "step": 2745 }, { "epoch": 0.09, "grad_norm": 0.6612725257873535, "learning_rate": 1.9716400778970807e-05, "loss": 2.3051, "step": 2746 }, { "epoch": 0.09, "grad_norm": 0.6648546457290649, "learning_rate": 1.9716149387416214e-05, "loss": 2.3456, "step": 2747 }, { "epoch": 0.09, "grad_norm": 0.6508253812789917, "learning_rate": 1.971589788609425e-05, "loss": 2.3343, "step": 2748 }, { "epoch": 0.09, "grad_norm": 0.7162386775016785, "learning_rate": 1.9715646275007773e-05, "loss": 2.3702, "step": 2749 }, { "epoch": 0.09, "grad_norm": 0.6513245105743408, "learning_rate": 1.9715394554159618e-05, "loss": 2.3236, "step": 2750 }, { "epoch": 0.09, "grad_norm": 0.7057669758796692, "learning_rate": 1.971514272355263e-05, "loss": 2.2712, "step": 2751 }, { "epoch": 0.09, "grad_norm": 0.6696560382843018, "learning_rate": 1.9714890783189647e-05, "loss": 2.3212, "step": 2752 }, { "epoch": 0.09, "grad_norm": 0.6656703948974609, "learning_rate": 1.9714638733073524e-05, "loss": 2.2859, "step": 2753 }, { "epoch": 0.09, "grad_norm": 0.6675062775611877, "learning_rate": 1.9714386573207108e-05, "loss": 2.3799, "step": 2754 }, { "epoch": 0.09, "grad_norm": 0.6664295196533203, "learning_rate": 1.9714134303593245e-05, "loss": 2.3152, "step": 2755 }, { "epoch": 0.09, "grad_norm": 0.6889835000038147, "learning_rate": 1.9713881924234785e-05, "loss": 2.2728, "step": 2756 }, { "epoch": 0.09, "grad_norm": 0.713476836681366, "learning_rate": 1.9713629435134584e-05, "loss": 2.318, "step": 2757 }, { "epoch": 0.09, "grad_norm": 0.6531257033348083, "learning_rate": 1.9713376836295484e-05, "loss": 2.2585, "step": 2758 }, { "epoch": 0.09, "grad_norm": 0.693151593208313, "learning_rate": 1.9713124127720346e-05, "loss": 2.3006, "step": 2759 }, { "epoch": 0.09, "grad_norm": 0.6975042819976807, "learning_rate": 1.9712871309412028e-05, "loss": 2.2839, "step": 2760 }, { "epoch": 0.09, "grad_norm": 0.6617156267166138, "learning_rate": 1.971261838137338e-05, "loss": 2.3373, "step": 2761 }, { "epoch": 0.09, "grad_norm": 0.7235597968101501, "learning_rate": 1.9712365343607262e-05, "loss": 2.3684, "step": 2762 }, { "epoch": 0.09, "grad_norm": 0.710724949836731, "learning_rate": 1.9712112196116533e-05, "loss": 2.3078, "step": 2763 }, { "epoch": 0.09, "grad_norm": 0.67494797706604, "learning_rate": 1.971185893890405e-05, "loss": 2.337, "step": 2764 }, { "epoch": 0.09, "grad_norm": 0.6771137118339539, "learning_rate": 1.971160557197268e-05, "loss": 2.3543, "step": 2765 }, { "epoch": 0.09, "grad_norm": 0.6714797616004944, "learning_rate": 1.971135209532528e-05, "loss": 2.2936, "step": 2766 }, { "epoch": 0.09, "grad_norm": 0.6499322652816772, "learning_rate": 1.9711098508964713e-05, "loss": 2.2439, "step": 2767 }, { "epoch": 0.09, "grad_norm": 0.686851441860199, "learning_rate": 1.9710844812893848e-05, "loss": 2.2578, "step": 2768 }, { "epoch": 0.09, "grad_norm": 0.6743660569190979, "learning_rate": 1.971059100711555e-05, "loss": 2.3012, "step": 2769 }, { "epoch": 0.09, "grad_norm": 0.6434255242347717, "learning_rate": 1.9710337091632686e-05, "loss": 2.3013, "step": 2770 }, { "epoch": 0.09, "grad_norm": 0.692061185836792, "learning_rate": 1.971008306644812e-05, "loss": 2.2622, "step": 2771 }, { "epoch": 0.09, "grad_norm": 0.6664226055145264, "learning_rate": 1.970982893156473e-05, "loss": 2.3594, "step": 2772 }, { "epoch": 0.09, "grad_norm": 0.6570258140563965, "learning_rate": 1.9709574686985388e-05, "loss": 2.2674, "step": 2773 }, { "epoch": 0.09, "grad_norm": 0.6971666812896729, "learning_rate": 1.9709320332712955e-05, "loss": 2.3696, "step": 2774 }, { "epoch": 0.09, "grad_norm": 0.6769872903823853, "learning_rate": 1.970906586875031e-05, "loss": 2.3101, "step": 2775 }, { "epoch": 0.09, "grad_norm": 0.6789970993995667, "learning_rate": 1.9708811295100333e-05, "loss": 2.3187, "step": 2776 }, { "epoch": 0.09, "grad_norm": 0.6811920404434204, "learning_rate": 1.9708556611765896e-05, "loss": 2.2682, "step": 2777 }, { "epoch": 0.09, "grad_norm": 0.6698407530784607, "learning_rate": 1.9708301818749875e-05, "loss": 2.2877, "step": 2778 }, { "epoch": 0.09, "grad_norm": 0.666793167591095, "learning_rate": 1.970804691605515e-05, "loss": 2.3332, "step": 2779 }, { "epoch": 0.09, "grad_norm": 0.697039008140564, "learning_rate": 1.97077919036846e-05, "loss": 2.3361, "step": 2780 }, { "epoch": 0.09, "grad_norm": 0.6642515063285828, "learning_rate": 1.9707536781641105e-05, "loss": 2.2603, "step": 2781 }, { "epoch": 0.09, "grad_norm": 0.6724225878715515, "learning_rate": 1.970728154992755e-05, "loss": 2.3526, "step": 2782 }, { "epoch": 0.09, "grad_norm": 0.6646852493286133, "learning_rate": 1.9707026208546817e-05, "loss": 2.3927, "step": 2783 }, { "epoch": 0.09, "grad_norm": 0.6520638465881348, "learning_rate": 1.9706770757501796e-05, "loss": 2.3084, "step": 2784 }, { "epoch": 0.09, "grad_norm": 0.6691730618476868, "learning_rate": 1.970651519679536e-05, "loss": 2.3428, "step": 2785 }, { "epoch": 0.09, "grad_norm": 0.6513739824295044, "learning_rate": 1.9706259526430407e-05, "loss": 2.2991, "step": 2786 }, { "epoch": 0.09, "grad_norm": 0.7030876278877258, "learning_rate": 1.970600374640982e-05, "loss": 2.3405, "step": 2787 }, { "epoch": 0.09, "grad_norm": 0.7199677228927612, "learning_rate": 1.9705747856736494e-05, "loss": 2.3145, "step": 2788 }, { "epoch": 0.09, "grad_norm": 0.6951326131820679, "learning_rate": 1.9705491857413314e-05, "loss": 2.3813, "step": 2789 }, { "epoch": 0.09, "grad_norm": 0.6960110664367676, "learning_rate": 1.9705235748443176e-05, "loss": 2.3517, "step": 2790 }, { "epoch": 0.09, "grad_norm": 0.6430880427360535, "learning_rate": 1.9704979529828976e-05, "loss": 2.335, "step": 2791 }, { "epoch": 0.09, "grad_norm": 0.6669966578483582, "learning_rate": 1.9704723201573598e-05, "loss": 2.3209, "step": 2792 }, { "epoch": 0.09, "grad_norm": 0.7018842697143555, "learning_rate": 1.970446676367995e-05, "loss": 2.2517, "step": 2793 }, { "epoch": 0.09, "grad_norm": 0.7080121040344238, "learning_rate": 1.9704210216150927e-05, "loss": 2.3463, "step": 2794 }, { "epoch": 0.09, "grad_norm": 0.6576233506202698, "learning_rate": 1.9703953558989413e-05, "loss": 2.3411, "step": 2795 }, { "epoch": 0.09, "grad_norm": 0.7154086828231812, "learning_rate": 1.970369679219833e-05, "loss": 2.3039, "step": 2796 }, { "epoch": 0.09, "grad_norm": 0.6594984531402588, "learning_rate": 1.9703439915780562e-05, "loss": 2.3591, "step": 2797 }, { "epoch": 0.09, "grad_norm": 0.6829261779785156, "learning_rate": 1.970318292973902e-05, "loss": 2.3764, "step": 2798 }, { "epoch": 0.09, "grad_norm": 0.6986505389213562, "learning_rate": 1.9702925834076598e-05, "loss": 2.3564, "step": 2799 }, { "epoch": 0.09, "grad_norm": 0.7279203534126282, "learning_rate": 1.9702668628796212e-05, "loss": 2.2904, "step": 2800 }, { "epoch": 0.09, "grad_norm": 0.6733790636062622, "learning_rate": 1.970241131390076e-05, "loss": 2.2949, "step": 2801 }, { "epoch": 0.09, "grad_norm": 0.6831963658332825, "learning_rate": 1.970215388939315e-05, "loss": 2.3072, "step": 2802 }, { "epoch": 0.09, "grad_norm": 0.6766725778579712, "learning_rate": 1.9701896355276292e-05, "loss": 2.2128, "step": 2803 }, { "epoch": 0.09, "grad_norm": 0.6662180423736572, "learning_rate": 1.9701638711553095e-05, "loss": 2.3418, "step": 2804 }, { "epoch": 0.09, "grad_norm": 0.6496817469596863, "learning_rate": 1.9701380958226472e-05, "loss": 2.3203, "step": 2805 }, { "epoch": 0.09, "grad_norm": 0.6867324709892273, "learning_rate": 1.970112309529933e-05, "loss": 2.3713, "step": 2806 }, { "epoch": 0.09, "grad_norm": 0.665769636631012, "learning_rate": 1.9700865122774587e-05, "loss": 2.3942, "step": 2807 }, { "epoch": 0.09, "grad_norm": 0.6567417979240417, "learning_rate": 1.970060704065515e-05, "loss": 2.2627, "step": 2808 }, { "epoch": 0.09, "grad_norm": 0.6605551838874817, "learning_rate": 1.9700348848943945e-05, "loss": 2.3088, "step": 2809 }, { "epoch": 0.09, "grad_norm": 0.6862800717353821, "learning_rate": 1.970009054764388e-05, "loss": 2.3394, "step": 2810 }, { "epoch": 0.09, "grad_norm": 0.6355836987495422, "learning_rate": 1.969983213675788e-05, "loss": 2.3071, "step": 2811 }, { "epoch": 0.09, "grad_norm": 0.7063869833946228, "learning_rate": 1.969957361628886e-05, "loss": 2.3235, "step": 2812 }, { "epoch": 0.09, "grad_norm": 0.6928921341896057, "learning_rate": 1.969931498623974e-05, "loss": 2.2941, "step": 2813 }, { "epoch": 0.09, "grad_norm": 0.6836203932762146, "learning_rate": 1.9699056246613445e-05, "loss": 2.3272, "step": 2814 }, { "epoch": 0.09, "grad_norm": 0.7501394152641296, "learning_rate": 1.9698797397412897e-05, "loss": 2.3011, "step": 2815 }, { "epoch": 0.09, "grad_norm": 0.6688721179962158, "learning_rate": 1.969853843864102e-05, "loss": 2.2575, "step": 2816 }, { "epoch": 0.09, "grad_norm": 0.703423261642456, "learning_rate": 1.969827937030074e-05, "loss": 2.3742, "step": 2817 }, { "epoch": 0.09, "grad_norm": 0.6901835799217224, "learning_rate": 1.9698020192394984e-05, "loss": 2.258, "step": 2818 }, { "epoch": 0.09, "grad_norm": 0.6559231877326965, "learning_rate": 1.9697760904926677e-05, "loss": 2.2929, "step": 2819 }, { "epoch": 0.09, "grad_norm": 0.6842488646507263, "learning_rate": 1.969750150789875e-05, "loss": 2.32, "step": 2820 }, { "epoch": 0.09, "grad_norm": 0.7518985271453857, "learning_rate": 1.969724200131414e-05, "loss": 2.3606, "step": 2821 }, { "epoch": 0.09, "grad_norm": 0.6831926703453064, "learning_rate": 1.969698238517577e-05, "loss": 2.2733, "step": 2822 }, { "epoch": 0.09, "grad_norm": 0.7015331387519836, "learning_rate": 1.9696722659486575e-05, "loss": 2.3697, "step": 2823 }, { "epoch": 0.09, "grad_norm": 0.664523184299469, "learning_rate": 1.969646282424949e-05, "loss": 2.39, "step": 2824 }, { "epoch": 0.09, "grad_norm": 0.67743319272995, "learning_rate": 1.9696202879467453e-05, "loss": 2.3874, "step": 2825 }, { "epoch": 0.09, "grad_norm": 0.6681963205337524, "learning_rate": 1.9695942825143394e-05, "loss": 2.352, "step": 2826 }, { "epoch": 0.09, "grad_norm": 0.6769992113113403, "learning_rate": 1.969568266128026e-05, "loss": 2.3296, "step": 2827 }, { "epoch": 0.09, "grad_norm": 0.6578978300094604, "learning_rate": 1.9695422387880986e-05, "loss": 2.298, "step": 2828 }, { "epoch": 0.09, "grad_norm": 0.6683707237243652, "learning_rate": 1.9695162004948506e-05, "loss": 2.3166, "step": 2829 }, { "epoch": 0.09, "grad_norm": 0.6699895262718201, "learning_rate": 1.9694901512485775e-05, "loss": 2.3068, "step": 2830 }, { "epoch": 0.09, "grad_norm": 0.6741325259208679, "learning_rate": 1.9694640910495724e-05, "loss": 2.3837, "step": 2831 }, { "epoch": 0.09, "grad_norm": 0.6590512990951538, "learning_rate": 1.96943801989813e-05, "loss": 2.41, "step": 2832 }, { "epoch": 0.09, "grad_norm": 0.6686561703681946, "learning_rate": 1.9694119377945454e-05, "loss": 2.3824, "step": 2833 }, { "epoch": 0.09, "grad_norm": 0.6950041055679321, "learning_rate": 1.9693858447391127e-05, "loss": 2.3005, "step": 2834 }, { "epoch": 0.09, "grad_norm": 0.7246048450469971, "learning_rate": 1.9693597407321267e-05, "loss": 2.3192, "step": 2835 }, { "epoch": 0.09, "grad_norm": 0.6865792870521545, "learning_rate": 1.9693336257738826e-05, "loss": 2.4299, "step": 2836 }, { "epoch": 0.09, "grad_norm": 0.6889941692352295, "learning_rate": 1.9693074998646754e-05, "loss": 2.3421, "step": 2837 }, { "epoch": 0.09, "grad_norm": 0.6586092710494995, "learning_rate": 1.9692813630047997e-05, "loss": 2.2776, "step": 2838 }, { "epoch": 0.09, "grad_norm": 0.6731042861938477, "learning_rate": 1.9692552151945517e-05, "loss": 2.3016, "step": 2839 }, { "epoch": 0.09, "grad_norm": 0.6686066389083862, "learning_rate": 1.969229056434226e-05, "loss": 2.3174, "step": 2840 }, { "epoch": 0.09, "grad_norm": 0.6902161836624146, "learning_rate": 1.9692028867241185e-05, "loss": 2.3257, "step": 2841 }, { "epoch": 0.09, "grad_norm": 0.6358218193054199, "learning_rate": 1.969176706064525e-05, "loss": 2.2772, "step": 2842 }, { "epoch": 0.09, "grad_norm": 0.6587114334106445, "learning_rate": 1.969150514455741e-05, "loss": 2.3158, "step": 2843 }, { "epoch": 0.09, "grad_norm": 0.6809013485908508, "learning_rate": 1.9691243118980624e-05, "loss": 2.2681, "step": 2844 }, { "epoch": 0.09, "grad_norm": 0.6586540341377258, "learning_rate": 1.9690980983917853e-05, "loss": 2.3074, "step": 2845 }, { "epoch": 0.09, "grad_norm": 0.6920683979988098, "learning_rate": 1.9690718739372058e-05, "loss": 2.405, "step": 2846 }, { "epoch": 0.09, "grad_norm": 0.689683198928833, "learning_rate": 1.96904563853462e-05, "loss": 2.3508, "step": 2847 }, { "epoch": 0.09, "grad_norm": 0.6630246043205261, "learning_rate": 1.969019392184325e-05, "loss": 2.3226, "step": 2848 }, { "epoch": 0.09, "grad_norm": 0.6757833957672119, "learning_rate": 1.9689931348866163e-05, "loss": 2.3491, "step": 2849 }, { "epoch": 0.09, "grad_norm": 0.6617912650108337, "learning_rate": 1.9689668666417914e-05, "loss": 2.322, "step": 2850 }, { "epoch": 0.09, "grad_norm": 0.6862687468528748, "learning_rate": 1.9689405874501465e-05, "loss": 2.2738, "step": 2851 }, { "epoch": 0.09, "grad_norm": 0.697131335735321, "learning_rate": 1.9689142973119787e-05, "loss": 2.3246, "step": 2852 }, { "epoch": 0.09, "grad_norm": 0.7109963297843933, "learning_rate": 1.9688879962275852e-05, "loss": 2.3636, "step": 2853 }, { "epoch": 0.09, "grad_norm": 0.6603410840034485, "learning_rate": 1.968861684197263e-05, "loss": 2.3731, "step": 2854 }, { "epoch": 0.09, "grad_norm": 0.6609280109405518, "learning_rate": 1.968835361221309e-05, "loss": 2.3883, "step": 2855 }, { "epoch": 0.1, "grad_norm": 0.6524820923805237, "learning_rate": 1.9688090273000208e-05, "loss": 2.3379, "step": 2856 }, { "epoch": 0.1, "grad_norm": 0.7151212096214294, "learning_rate": 1.9687826824336963e-05, "loss": 2.344, "step": 2857 }, { "epoch": 0.1, "grad_norm": 0.7164432406425476, "learning_rate": 1.9687563266226328e-05, "loss": 2.3213, "step": 2858 }, { "epoch": 0.1, "grad_norm": 0.6699766516685486, "learning_rate": 1.968729959867128e-05, "loss": 2.3085, "step": 2859 }, { "epoch": 0.1, "grad_norm": 0.6621629595756531, "learning_rate": 1.96870358216748e-05, "loss": 2.3541, "step": 2860 }, { "epoch": 0.1, "grad_norm": 0.7014157772064209, "learning_rate": 1.9686771935239865e-05, "loss": 2.2916, "step": 2861 }, { "epoch": 0.1, "grad_norm": 0.6799154281616211, "learning_rate": 1.9686507939369453e-05, "loss": 2.2846, "step": 2862 }, { "epoch": 0.1, "grad_norm": 0.7110755443572998, "learning_rate": 1.968624383406656e-05, "loss": 2.2864, "step": 2863 }, { "epoch": 0.1, "grad_norm": 0.7277179956436157, "learning_rate": 1.9685979619334154e-05, "loss": 2.3314, "step": 2864 }, { "epoch": 0.1, "grad_norm": 0.7831695079803467, "learning_rate": 1.968571529517523e-05, "loss": 2.3018, "step": 2865 }, { "epoch": 0.1, "grad_norm": 0.6849926710128784, "learning_rate": 1.968545086159277e-05, "loss": 2.3282, "step": 2866 }, { "epoch": 0.1, "grad_norm": 0.6711964011192322, "learning_rate": 1.9685186318589763e-05, "loss": 2.3194, "step": 2867 }, { "epoch": 0.1, "grad_norm": 0.7245537042617798, "learning_rate": 1.9684921666169196e-05, "loss": 2.2753, "step": 2868 }, { "epoch": 0.1, "grad_norm": 0.7223377227783203, "learning_rate": 1.968465690433406e-05, "loss": 2.3601, "step": 2869 }, { "epoch": 0.1, "grad_norm": 0.6900066137313843, "learning_rate": 1.968439203308735e-05, "loss": 2.2858, "step": 2870 }, { "epoch": 0.1, "grad_norm": 0.6660450100898743, "learning_rate": 1.9684127052432048e-05, "loss": 2.352, "step": 2871 }, { "epoch": 0.1, "grad_norm": 0.6990333199501038, "learning_rate": 1.9683861962371157e-05, "loss": 2.3597, "step": 2872 }, { "epoch": 0.1, "grad_norm": 0.706770122051239, "learning_rate": 1.968359676290767e-05, "loss": 2.3662, "step": 2873 }, { "epoch": 0.1, "grad_norm": 0.6692976355552673, "learning_rate": 1.968333145404458e-05, "loss": 2.3545, "step": 2874 }, { "epoch": 0.1, "grad_norm": 0.6634491682052612, "learning_rate": 1.9683066035784887e-05, "loss": 2.3419, "step": 2875 }, { "epoch": 0.1, "grad_norm": 0.6848682165145874, "learning_rate": 1.968280050813159e-05, "loss": 2.2765, "step": 2876 }, { "epoch": 0.1, "grad_norm": 0.6789926290512085, "learning_rate": 1.9682534871087687e-05, "loss": 2.3227, "step": 2877 }, { "epoch": 0.1, "grad_norm": 0.7044233679771423, "learning_rate": 1.968226912465618e-05, "loss": 2.3168, "step": 2878 }, { "epoch": 0.1, "grad_norm": 0.6985860466957092, "learning_rate": 1.9682003268840065e-05, "loss": 2.3249, "step": 2879 }, { "epoch": 0.1, "grad_norm": 0.6989080309867859, "learning_rate": 1.9681737303642358e-05, "loss": 2.3902, "step": 2880 }, { "epoch": 0.1, "grad_norm": 0.6801905632019043, "learning_rate": 1.9681471229066056e-05, "loss": 2.2602, "step": 2881 }, { "epoch": 0.1, "grad_norm": 0.6810482740402222, "learning_rate": 1.9681205045114165e-05, "loss": 2.3372, "step": 2882 }, { "epoch": 0.1, "grad_norm": 0.690697431564331, "learning_rate": 1.968093875178969e-05, "loss": 2.3505, "step": 2883 }, { "epoch": 0.1, "grad_norm": 0.688677191734314, "learning_rate": 1.968067234909565e-05, "loss": 2.3376, "step": 2884 }, { "epoch": 0.1, "grad_norm": 0.6849868297576904, "learning_rate": 1.968040583703504e-05, "loss": 2.3899, "step": 2885 }, { "epoch": 0.1, "grad_norm": 0.6812607049942017, "learning_rate": 1.9680139215610883e-05, "loss": 2.3214, "step": 2886 }, { "epoch": 0.1, "grad_norm": 0.6999800205230713, "learning_rate": 1.9679872484826183e-05, "loss": 2.3885, "step": 2887 }, { "epoch": 0.1, "grad_norm": 0.6741604804992676, "learning_rate": 1.967960564468396e-05, "loss": 2.3095, "step": 2888 }, { "epoch": 0.1, "grad_norm": 0.68731290102005, "learning_rate": 1.967933869518722e-05, "loss": 2.3032, "step": 2889 }, { "epoch": 0.1, "grad_norm": 0.7445862889289856, "learning_rate": 1.967907163633899e-05, "loss": 2.3417, "step": 2890 }, { "epoch": 0.1, "grad_norm": 0.6830973029136658, "learning_rate": 1.9678804468142275e-05, "loss": 2.3185, "step": 2891 }, { "epoch": 0.1, "grad_norm": 0.6858906149864197, "learning_rate": 1.9678537190600105e-05, "loss": 2.369, "step": 2892 }, { "epoch": 0.1, "grad_norm": 0.6888479590415955, "learning_rate": 1.9678269803715492e-05, "loss": 2.364, "step": 2893 }, { "epoch": 0.1, "grad_norm": 0.6803169846534729, "learning_rate": 1.9678002307491458e-05, "loss": 2.3447, "step": 2894 }, { "epoch": 0.1, "grad_norm": 0.7076132297515869, "learning_rate": 1.9677734701931026e-05, "loss": 2.2798, "step": 2895 }, { "epoch": 0.1, "grad_norm": 0.6628857254981995, "learning_rate": 1.967746698703722e-05, "loss": 2.2807, "step": 2896 }, { "epoch": 0.1, "grad_norm": 0.6583648920059204, "learning_rate": 1.9677199162813064e-05, "loss": 2.3475, "step": 2897 }, { "epoch": 0.1, "grad_norm": 0.6615450978279114, "learning_rate": 1.9676931229261583e-05, "loss": 2.2934, "step": 2898 }, { "epoch": 0.1, "grad_norm": 0.6884331107139587, "learning_rate": 1.96766631863858e-05, "loss": 2.3462, "step": 2899 }, { "epoch": 0.1, "grad_norm": 0.6690109968185425, "learning_rate": 1.9676395034188753e-05, "loss": 2.3161, "step": 2900 }, { "epoch": 0.1, "grad_norm": 0.693662166595459, "learning_rate": 1.9676126772673464e-05, "loss": 2.2918, "step": 2901 }, { "epoch": 0.1, "grad_norm": 0.6729449033737183, "learning_rate": 1.9675858401842965e-05, "loss": 2.3063, "step": 2902 }, { "epoch": 0.1, "grad_norm": 0.72861647605896, "learning_rate": 1.967558992170029e-05, "loss": 2.2797, "step": 2903 }, { "epoch": 0.1, "grad_norm": 0.692997932434082, "learning_rate": 1.9675321332248465e-05, "loss": 2.3049, "step": 2904 }, { "epoch": 0.1, "grad_norm": 0.6852849721908569, "learning_rate": 1.9675052633490537e-05, "loss": 2.3263, "step": 2905 }, { "epoch": 0.1, "grad_norm": 0.6654704213142395, "learning_rate": 1.967478382542953e-05, "loss": 2.3684, "step": 2906 }, { "epoch": 0.1, "grad_norm": 0.6582489013671875, "learning_rate": 1.9674514908068486e-05, "loss": 2.3281, "step": 2907 }, { "epoch": 0.1, "grad_norm": 0.6841027736663818, "learning_rate": 1.9674245881410445e-05, "loss": 2.3741, "step": 2908 }, { "epoch": 0.1, "grad_norm": 0.6436604261398315, "learning_rate": 1.9673976745458443e-05, "loss": 2.3055, "step": 2909 }, { "epoch": 0.1, "grad_norm": 0.6485874056816101, "learning_rate": 1.967370750021552e-05, "loss": 2.3059, "step": 2910 }, { "epoch": 0.1, "grad_norm": 0.6804980039596558, "learning_rate": 1.9673438145684718e-05, "loss": 2.2776, "step": 2911 }, { "epoch": 0.1, "grad_norm": 0.6365377902984619, "learning_rate": 1.967316868186908e-05, "loss": 2.2342, "step": 2912 }, { "epoch": 0.1, "grad_norm": 0.6652557849884033, "learning_rate": 1.9672899108771655e-05, "loss": 2.3008, "step": 2913 }, { "epoch": 0.1, "grad_norm": 0.6715561747550964, "learning_rate": 1.9672629426395482e-05, "loss": 2.2901, "step": 2914 }, { "epoch": 0.1, "grad_norm": 0.6965147256851196, "learning_rate": 1.9672359634743613e-05, "loss": 2.3019, "step": 2915 }, { "epoch": 0.1, "grad_norm": 0.6825271844863892, "learning_rate": 1.9672089733819094e-05, "loss": 2.3214, "step": 2916 }, { "epoch": 0.1, "grad_norm": 0.7176769375801086, "learning_rate": 1.967181972362497e-05, "loss": 2.3139, "step": 2917 }, { "epoch": 0.1, "grad_norm": 0.677059531211853, "learning_rate": 1.9671549604164295e-05, "loss": 2.3367, "step": 2918 }, { "epoch": 0.1, "grad_norm": 0.6437870860099792, "learning_rate": 1.9671279375440125e-05, "loss": 2.3398, "step": 2919 }, { "epoch": 0.1, "grad_norm": 0.6716718077659607, "learning_rate": 1.9671009037455505e-05, "loss": 2.3098, "step": 2920 }, { "epoch": 0.1, "grad_norm": 0.7423004508018494, "learning_rate": 1.9670738590213495e-05, "loss": 2.3374, "step": 2921 }, { "epoch": 0.1, "grad_norm": 0.6660673022270203, "learning_rate": 1.9670468033717146e-05, "loss": 2.2926, "step": 2922 }, { "epoch": 0.1, "grad_norm": 0.7292910814285278, "learning_rate": 1.9670197367969515e-05, "loss": 2.3283, "step": 2923 }, { "epoch": 0.1, "grad_norm": 0.683182954788208, "learning_rate": 1.9669926592973665e-05, "loss": 2.3062, "step": 2924 }, { "epoch": 0.1, "grad_norm": 0.65863037109375, "learning_rate": 1.966965570873265e-05, "loss": 2.324, "step": 2925 }, { "epoch": 0.1, "grad_norm": 0.6742762327194214, "learning_rate": 1.966938471524953e-05, "loss": 2.2788, "step": 2926 }, { "epoch": 0.1, "grad_norm": 0.6800342202186584, "learning_rate": 1.966911361252737e-05, "loss": 2.2504, "step": 2927 }, { "epoch": 0.1, "grad_norm": 0.6733680367469788, "learning_rate": 1.966884240056923e-05, "loss": 2.3576, "step": 2928 }, { "epoch": 0.1, "grad_norm": 0.6734517216682434, "learning_rate": 1.966857107937818e-05, "loss": 2.3624, "step": 2929 }, { "epoch": 0.1, "grad_norm": 0.6791380643844604, "learning_rate": 1.9668299648957274e-05, "loss": 2.2488, "step": 2930 }, { "epoch": 0.1, "grad_norm": 0.6863561868667603, "learning_rate": 1.966802810930959e-05, "loss": 2.2741, "step": 2931 }, { "epoch": 0.1, "grad_norm": 0.7177913784980774, "learning_rate": 1.9667756460438187e-05, "loss": 2.3396, "step": 2932 }, { "epoch": 0.1, "grad_norm": 0.7329105734825134, "learning_rate": 1.9667484702346137e-05, "loss": 2.3455, "step": 2933 }, { "epoch": 0.1, "grad_norm": 0.6803003549575806, "learning_rate": 1.966721283503651e-05, "loss": 2.2926, "step": 2934 }, { "epoch": 0.1, "grad_norm": 0.6709240078926086, "learning_rate": 1.966694085851238e-05, "loss": 2.2645, "step": 2935 }, { "epoch": 0.1, "grad_norm": 0.6546924114227295, "learning_rate": 1.9666668772776816e-05, "loss": 2.2866, "step": 2936 }, { "epoch": 0.1, "grad_norm": 0.6571659445762634, "learning_rate": 1.9666396577832894e-05, "loss": 2.2654, "step": 2937 }, { "epoch": 0.1, "grad_norm": 0.6824886798858643, "learning_rate": 1.966612427368369e-05, "loss": 2.3668, "step": 2938 }, { "epoch": 0.1, "grad_norm": 0.6801427602767944, "learning_rate": 1.966585186033228e-05, "loss": 2.3443, "step": 2939 }, { "epoch": 0.1, "grad_norm": 0.6745694875717163, "learning_rate": 1.9665579337781736e-05, "loss": 2.3563, "step": 2940 }, { "epoch": 0.1, "grad_norm": 0.6681062579154968, "learning_rate": 1.9665306706035144e-05, "loss": 2.309, "step": 2941 }, { "epoch": 0.1, "grad_norm": 0.6749881505966187, "learning_rate": 1.966503396509558e-05, "loss": 2.2378, "step": 2942 }, { "epoch": 0.1, "grad_norm": 0.7094277143478394, "learning_rate": 1.966476111496613e-05, "loss": 2.3236, "step": 2943 }, { "epoch": 0.1, "grad_norm": 0.7237303853034973, "learning_rate": 1.966448815564987e-05, "loss": 2.321, "step": 2944 }, { "epoch": 0.1, "grad_norm": 0.6517361998558044, "learning_rate": 1.9664215087149887e-05, "loss": 2.3316, "step": 2945 }, { "epoch": 0.1, "grad_norm": 0.7197785973548889, "learning_rate": 1.9663941909469266e-05, "loss": 2.3227, "step": 2946 }, { "epoch": 0.1, "grad_norm": 0.6674231290817261, "learning_rate": 1.9663668622611092e-05, "loss": 2.3273, "step": 2947 }, { "epoch": 0.1, "grad_norm": 0.6753541827201843, "learning_rate": 1.9663395226578456e-05, "loss": 2.3204, "step": 2948 }, { "epoch": 0.1, "grad_norm": 0.6835004091262817, "learning_rate": 1.966312172137444e-05, "loss": 2.2465, "step": 2949 }, { "epoch": 0.1, "grad_norm": 0.6827998161315918, "learning_rate": 1.966284810700214e-05, "loss": 2.3224, "step": 2950 }, { "epoch": 0.1, "grad_norm": 0.6671438813209534, "learning_rate": 1.9662574383464645e-05, "loss": 2.2785, "step": 2951 }, { "epoch": 0.1, "grad_norm": 0.6737426519393921, "learning_rate": 1.9662300550765047e-05, "loss": 2.3119, "step": 2952 }, { "epoch": 0.1, "grad_norm": 0.6862102150917053, "learning_rate": 1.9662026608906443e-05, "loss": 2.2767, "step": 2953 }, { "epoch": 0.1, "grad_norm": 0.6574042439460754, "learning_rate": 1.9661752557891922e-05, "loss": 2.2886, "step": 2954 }, { "epoch": 0.1, "grad_norm": 0.6658502817153931, "learning_rate": 1.9661478397724582e-05, "loss": 2.3101, "step": 2955 }, { "epoch": 0.1, "grad_norm": 0.6832183599472046, "learning_rate": 1.9661204128407527e-05, "loss": 2.2869, "step": 2956 }, { "epoch": 0.1, "grad_norm": 0.6775646209716797, "learning_rate": 1.966092974994384e-05, "loss": 2.2984, "step": 2957 }, { "epoch": 0.1, "grad_norm": 0.6615846753120422, "learning_rate": 1.9660655262336637e-05, "loss": 2.2624, "step": 2958 }, { "epoch": 0.1, "grad_norm": 0.6634668111801147, "learning_rate": 1.9660380665589014e-05, "loss": 2.3543, "step": 2959 }, { "epoch": 0.1, "grad_norm": 0.6529388427734375, "learning_rate": 1.966010595970407e-05, "loss": 2.3351, "step": 2960 }, { "epoch": 0.1, "grad_norm": 0.6666375398635864, "learning_rate": 1.965983114468491e-05, "loss": 2.2789, "step": 2961 }, { "epoch": 0.1, "grad_norm": 0.643934965133667, "learning_rate": 1.9659556220534637e-05, "loss": 2.2641, "step": 2962 }, { "epoch": 0.1, "grad_norm": 0.6774589419364929, "learning_rate": 1.965928118725636e-05, "loss": 2.3048, "step": 2963 }, { "epoch": 0.1, "grad_norm": 0.6716755032539368, "learning_rate": 1.9659006044853185e-05, "loss": 2.2985, "step": 2964 }, { "epoch": 0.1, "grad_norm": 0.6758524775505066, "learning_rate": 1.9658730793328223e-05, "loss": 2.2921, "step": 2965 }, { "epoch": 0.1, "grad_norm": 0.6557595133781433, "learning_rate": 1.9658455432684577e-05, "loss": 2.3303, "step": 2966 }, { "epoch": 0.1, "grad_norm": 0.7137976288795471, "learning_rate": 1.9658179962925364e-05, "loss": 2.3339, "step": 2967 }, { "epoch": 0.1, "grad_norm": 0.6865337491035461, "learning_rate": 1.9657904384053694e-05, "loss": 2.2815, "step": 2968 }, { "epoch": 0.1, "grad_norm": 0.7109874486923218, "learning_rate": 1.9657628696072682e-05, "loss": 2.2711, "step": 2969 }, { "epoch": 0.1, "grad_norm": 0.7054360508918762, "learning_rate": 1.9657352898985437e-05, "loss": 2.287, "step": 2970 }, { "epoch": 0.1, "grad_norm": 0.6785338521003723, "learning_rate": 1.9657076992795082e-05, "loss": 2.3105, "step": 2971 }, { "epoch": 0.1, "grad_norm": 0.6620676517486572, "learning_rate": 1.965680097750473e-05, "loss": 2.3705, "step": 2972 }, { "epoch": 0.1, "grad_norm": 0.7038158178329468, "learning_rate": 1.96565248531175e-05, "loss": 2.3203, "step": 2973 }, { "epoch": 0.1, "grad_norm": 0.6762446761131287, "learning_rate": 1.965624861963651e-05, "loss": 2.3627, "step": 2974 }, { "epoch": 0.1, "grad_norm": 0.6897306442260742, "learning_rate": 1.9655972277064883e-05, "loss": 2.3561, "step": 2975 }, { "epoch": 0.1, "grad_norm": 0.7097106575965881, "learning_rate": 1.965569582540574e-05, "loss": 2.3221, "step": 2976 }, { "epoch": 0.1, "grad_norm": 0.6775929927825928, "learning_rate": 1.9655419264662207e-05, "loss": 2.29, "step": 2977 }, { "epoch": 0.1, "grad_norm": 0.6804125905036926, "learning_rate": 1.96551425948374e-05, "loss": 2.3053, "step": 2978 }, { "epoch": 0.1, "grad_norm": 0.7170947194099426, "learning_rate": 1.965486581593446e-05, "loss": 2.3726, "step": 2979 }, { "epoch": 0.1, "grad_norm": 0.6982647180557251, "learning_rate": 1.9654588927956493e-05, "loss": 2.3388, "step": 2980 }, { "epoch": 0.1, "grad_norm": 0.6818144917488098, "learning_rate": 1.9654311930906647e-05, "loss": 2.3887, "step": 2981 }, { "epoch": 0.1, "grad_norm": 0.6808184385299683, "learning_rate": 1.965403482478804e-05, "loss": 2.2705, "step": 2982 }, { "epoch": 0.1, "grad_norm": 0.7061893939971924, "learning_rate": 1.9653757609603805e-05, "loss": 2.3364, "step": 2983 }, { "epoch": 0.1, "grad_norm": 0.6770671010017395, "learning_rate": 1.9653480285357075e-05, "loss": 2.3129, "step": 2984 }, { "epoch": 0.1, "grad_norm": 0.6462687849998474, "learning_rate": 1.965320285205098e-05, "loss": 2.3095, "step": 2985 }, { "epoch": 0.1, "grad_norm": 0.6441843509674072, "learning_rate": 1.9652925309688657e-05, "loss": 2.3125, "step": 2986 }, { "epoch": 0.1, "grad_norm": 0.6661598086357117, "learning_rate": 1.965264765827324e-05, "loss": 2.305, "step": 2987 }, { "epoch": 0.1, "grad_norm": 0.6770630478858948, "learning_rate": 1.9652369897807873e-05, "loss": 2.3224, "step": 2988 }, { "epoch": 0.1, "grad_norm": 0.6821475625038147, "learning_rate": 1.965209202829568e-05, "loss": 2.2615, "step": 2989 }, { "epoch": 0.1, "grad_norm": 0.6795044541358948, "learning_rate": 1.9651814049739813e-05, "loss": 2.2746, "step": 2990 }, { "epoch": 0.1, "grad_norm": 0.6581634283065796, "learning_rate": 1.9651535962143406e-05, "loss": 2.3292, "step": 2991 }, { "epoch": 0.1, "grad_norm": 0.7273206114768982, "learning_rate": 1.9651257765509602e-05, "loss": 2.243, "step": 2992 }, { "epoch": 0.1, "grad_norm": 0.6700400710105896, "learning_rate": 1.9650979459841544e-05, "loss": 2.2106, "step": 2993 }, { "epoch": 0.1, "grad_norm": 0.6745424866676331, "learning_rate": 1.9650701045142378e-05, "loss": 2.2631, "step": 2994 }, { "epoch": 0.1, "grad_norm": 0.6996797919273376, "learning_rate": 1.9650422521415245e-05, "loss": 2.3256, "step": 2995 }, { "epoch": 0.1, "grad_norm": 0.6645322442054749, "learning_rate": 1.9650143888663293e-05, "loss": 2.2706, "step": 2996 }, { "epoch": 0.1, "grad_norm": 0.6626133322715759, "learning_rate": 1.9649865146889673e-05, "loss": 2.3176, "step": 2997 }, { "epoch": 0.1, "grad_norm": 0.7161187529563904, "learning_rate": 1.9649586296097532e-05, "loss": 2.3507, "step": 2998 }, { "epoch": 0.1, "grad_norm": 0.7267447113990784, "learning_rate": 1.964930733629002e-05, "loss": 2.3298, "step": 2999 }, { "epoch": 0.1, "grad_norm": 0.6988711357116699, "learning_rate": 1.964902826747029e-05, "loss": 2.3242, "step": 3000 }, { "epoch": 0.1, "grad_norm": 0.6781612038612366, "learning_rate": 1.964874908964149e-05, "loss": 2.3394, "step": 3001 }, { "epoch": 0.1, "grad_norm": 0.663555383682251, "learning_rate": 1.9648469802806777e-05, "loss": 2.2873, "step": 3002 }, { "epoch": 0.1, "grad_norm": 0.6918426156044006, "learning_rate": 1.964819040696931e-05, "loss": 2.243, "step": 3003 }, { "epoch": 0.1, "grad_norm": 0.6672970056533813, "learning_rate": 1.964791090213224e-05, "loss": 2.2704, "step": 3004 }, { "epoch": 0.1, "grad_norm": 0.766591489315033, "learning_rate": 1.964763128829873e-05, "loss": 2.3628, "step": 3005 }, { "epoch": 0.1, "grad_norm": 0.694460391998291, "learning_rate": 1.9647351565471932e-05, "loss": 2.2729, "step": 3006 }, { "epoch": 0.1, "grad_norm": 0.7073972225189209, "learning_rate": 1.9647071733655013e-05, "loss": 2.3376, "step": 3007 }, { "epoch": 0.1, "grad_norm": 0.6735703349113464, "learning_rate": 1.964679179285113e-05, "loss": 2.2827, "step": 3008 }, { "epoch": 0.1, "grad_norm": 0.6921026110649109, "learning_rate": 1.9646511743063447e-05, "loss": 2.3041, "step": 3009 }, { "epoch": 0.1, "grad_norm": 0.7243918776512146, "learning_rate": 1.9646231584295128e-05, "loss": 2.2428, "step": 3010 }, { "epoch": 0.1, "grad_norm": 0.7014862298965454, "learning_rate": 1.964595131654934e-05, "loss": 2.2854, "step": 3011 }, { "epoch": 0.1, "grad_norm": 0.7269584536552429, "learning_rate": 1.964567093982924e-05, "loss": 2.3257, "step": 3012 }, { "epoch": 0.1, "grad_norm": 0.7346311211585999, "learning_rate": 1.9645390454138008e-05, "loss": 2.3521, "step": 3013 }, { "epoch": 0.1, "grad_norm": 0.6559798717498779, "learning_rate": 1.964510985947881e-05, "loss": 2.3009, "step": 3014 }, { "epoch": 0.1, "grad_norm": 0.7006489038467407, "learning_rate": 1.964482915585481e-05, "loss": 2.3408, "step": 3015 }, { "epoch": 0.1, "grad_norm": 0.7058645486831665, "learning_rate": 1.964454834326918e-05, "loss": 2.3534, "step": 3016 }, { "epoch": 0.1, "grad_norm": 0.7061296701431274, "learning_rate": 1.9644267421725098e-05, "loss": 2.318, "step": 3017 }, { "epoch": 0.1, "grad_norm": 0.710426390171051, "learning_rate": 1.9643986391225733e-05, "loss": 2.351, "step": 3018 }, { "epoch": 0.1, "grad_norm": 0.664726197719574, "learning_rate": 1.9643705251774265e-05, "loss": 2.3205, "step": 3019 }, { "epoch": 0.1, "grad_norm": 0.7297730445861816, "learning_rate": 1.9643424003373866e-05, "loss": 2.286, "step": 3020 }, { "epoch": 0.1, "grad_norm": 0.6999923586845398, "learning_rate": 1.9643142646027712e-05, "loss": 2.3199, "step": 3021 }, { "epoch": 0.1, "grad_norm": 0.7477583885192871, "learning_rate": 1.9642861179738987e-05, "loss": 2.2819, "step": 3022 }, { "epoch": 0.1, "grad_norm": 0.685806393623352, "learning_rate": 1.9642579604510863e-05, "loss": 2.3025, "step": 3023 }, { "epoch": 0.1, "grad_norm": 0.6749387383460999, "learning_rate": 1.964229792034653e-05, "loss": 2.3002, "step": 3024 }, { "epoch": 0.1, "grad_norm": 0.6618485450744629, "learning_rate": 1.9642016127249163e-05, "loss": 2.2874, "step": 3025 }, { "epoch": 0.1, "grad_norm": 0.7302093505859375, "learning_rate": 1.9641734225221953e-05, "loss": 2.2887, "step": 3026 }, { "epoch": 0.1, "grad_norm": 0.6852867007255554, "learning_rate": 1.9641452214268074e-05, "loss": 2.3125, "step": 3027 }, { "epoch": 0.1, "grad_norm": 0.65642911195755, "learning_rate": 1.964117009439072e-05, "loss": 2.3186, "step": 3028 }, { "epoch": 0.1, "grad_norm": 0.6707789301872253, "learning_rate": 1.964088786559308e-05, "loss": 2.3398, "step": 3029 }, { "epoch": 0.1, "grad_norm": 0.6963332891464233, "learning_rate": 1.964060552787834e-05, "loss": 2.3412, "step": 3030 }, { "epoch": 0.1, "grad_norm": 0.6832951903343201, "learning_rate": 1.9640323081249682e-05, "loss": 2.3255, "step": 3031 }, { "epoch": 0.1, "grad_norm": 0.6812306642532349, "learning_rate": 1.9640040525710312e-05, "loss": 2.262, "step": 3032 }, { "epoch": 0.1, "grad_norm": 0.6663896441459656, "learning_rate": 1.9639757861263406e-05, "loss": 2.3401, "step": 3033 }, { "epoch": 0.1, "grad_norm": 0.6947149038314819, "learning_rate": 1.963947508791217e-05, "loss": 2.3147, "step": 3034 }, { "epoch": 0.1, "grad_norm": 0.6737911105155945, "learning_rate": 1.963919220565979e-05, "loss": 2.2706, "step": 3035 }, { "epoch": 0.1, "grad_norm": 0.6445004940032959, "learning_rate": 1.9638909214509468e-05, "loss": 2.3945, "step": 3036 }, { "epoch": 0.1, "grad_norm": 0.6594206094741821, "learning_rate": 1.96386261144644e-05, "loss": 2.2973, "step": 3037 }, { "epoch": 0.1, "grad_norm": 0.6794245839118958, "learning_rate": 1.9638342905527782e-05, "loss": 2.3288, "step": 3038 }, { "epoch": 0.1, "grad_norm": 0.6590719819068909, "learning_rate": 1.9638059587702815e-05, "loss": 2.3301, "step": 3039 }, { "epoch": 0.1, "grad_norm": 0.7198504209518433, "learning_rate": 1.9637776160992697e-05, "loss": 2.2887, "step": 3040 }, { "epoch": 0.1, "grad_norm": 0.6979372501373291, "learning_rate": 1.9637492625400635e-05, "loss": 2.3052, "step": 3041 }, { "epoch": 0.1, "grad_norm": 0.6512684226036072, "learning_rate": 1.9637208980929826e-05, "loss": 2.3156, "step": 3042 }, { "epoch": 0.1, "grad_norm": 0.704622745513916, "learning_rate": 1.963692522758348e-05, "loss": 2.3174, "step": 3043 }, { "epoch": 0.1, "grad_norm": 0.6765683889389038, "learning_rate": 1.9636641365364802e-05, "loss": 2.264, "step": 3044 }, { "epoch": 0.1, "grad_norm": 0.6656168699264526, "learning_rate": 1.9636357394276994e-05, "loss": 2.2789, "step": 3045 }, { "epoch": 0.1, "grad_norm": 0.6795988082885742, "learning_rate": 1.963607331432327e-05, "loss": 2.3539, "step": 3046 }, { "epoch": 0.1, "grad_norm": 0.6894536018371582, "learning_rate": 1.9635789125506836e-05, "loss": 2.2465, "step": 3047 }, { "epoch": 0.1, "grad_norm": 0.6951778531074524, "learning_rate": 1.9635504827830903e-05, "loss": 2.3085, "step": 3048 }, { "epoch": 0.1, "grad_norm": 0.677876353263855, "learning_rate": 1.9635220421298683e-05, "loss": 2.2011, "step": 3049 }, { "epoch": 0.1, "grad_norm": 0.6561124920845032, "learning_rate": 1.9634935905913392e-05, "loss": 2.3303, "step": 3050 }, { "epoch": 0.1, "grad_norm": 0.7039267420768738, "learning_rate": 1.9634651281678245e-05, "loss": 2.3445, "step": 3051 }, { "epoch": 0.1, "grad_norm": 0.6769395470619202, "learning_rate": 1.9634366548596447e-05, "loss": 2.3078, "step": 3052 }, { "epoch": 0.1, "grad_norm": 0.6986508965492249, "learning_rate": 1.9634081706671226e-05, "loss": 2.2988, "step": 3053 }, { "epoch": 0.1, "grad_norm": 0.6775603890419006, "learning_rate": 1.9633796755905793e-05, "loss": 2.3272, "step": 3054 }, { "epoch": 0.1, "grad_norm": 0.6805962920188904, "learning_rate": 1.9633511696303374e-05, "loss": 2.354, "step": 3055 }, { "epoch": 0.1, "grad_norm": 0.6692181825637817, "learning_rate": 1.9633226527867184e-05, "loss": 2.2114, "step": 3056 }, { "epoch": 0.1, "grad_norm": 0.6714400053024292, "learning_rate": 1.9632941250600446e-05, "loss": 2.3039, "step": 3057 }, { "epoch": 0.1, "grad_norm": 0.6624983549118042, "learning_rate": 1.9632655864506383e-05, "loss": 2.2455, "step": 3058 }, { "epoch": 0.1, "grad_norm": 0.6828032732009888, "learning_rate": 1.9632370369588217e-05, "loss": 2.3183, "step": 3059 }, { "epoch": 0.1, "grad_norm": 0.6705331206321716, "learning_rate": 1.963208476584918e-05, "loss": 2.2456, "step": 3060 }, { "epoch": 0.1, "grad_norm": 0.673492431640625, "learning_rate": 1.9631799053292492e-05, "loss": 2.2594, "step": 3061 }, { "epoch": 0.1, "grad_norm": 0.7021549344062805, "learning_rate": 1.9631513231921384e-05, "loss": 2.3157, "step": 3062 }, { "epoch": 0.1, "grad_norm": 0.655586302280426, "learning_rate": 1.9631227301739085e-05, "loss": 2.2824, "step": 3063 }, { "epoch": 0.1, "grad_norm": 0.6719132661819458, "learning_rate": 1.963094126274882e-05, "loss": 2.265, "step": 3064 }, { "epoch": 0.1, "grad_norm": 0.6862486600875854, "learning_rate": 1.9630655114953828e-05, "loss": 2.2409, "step": 3065 }, { "epoch": 0.1, "grad_norm": 0.6536693572998047, "learning_rate": 1.963036885835734e-05, "loss": 2.212, "step": 3066 }, { "epoch": 0.1, "grad_norm": 0.7304664850234985, "learning_rate": 1.963008249296259e-05, "loss": 2.2872, "step": 3067 }, { "epoch": 0.1, "grad_norm": 0.6861785650253296, "learning_rate": 1.962979601877281e-05, "loss": 2.3103, "step": 3068 }, { "epoch": 0.1, "grad_norm": 0.6688429117202759, "learning_rate": 1.9629509435791235e-05, "loss": 2.2763, "step": 3069 }, { "epoch": 0.1, "grad_norm": 0.6977278590202332, "learning_rate": 1.962922274402111e-05, "loss": 2.2802, "step": 3070 }, { "epoch": 0.1, "grad_norm": 0.7347684502601624, "learning_rate": 1.9628935943465666e-05, "loss": 2.3477, "step": 3071 }, { "epoch": 0.1, "grad_norm": 0.7178388833999634, "learning_rate": 1.9628649034128148e-05, "loss": 2.267, "step": 3072 }, { "epoch": 0.1, "grad_norm": 0.6722525358200073, "learning_rate": 1.96283620160118e-05, "loss": 2.3399, "step": 3073 }, { "epoch": 0.1, "grad_norm": 0.6516978740692139, "learning_rate": 1.9628074889119854e-05, "loss": 2.3217, "step": 3074 }, { "epoch": 0.1, "grad_norm": 0.6769207119941711, "learning_rate": 1.9627787653455564e-05, "loss": 2.2535, "step": 3075 }, { "epoch": 0.1, "grad_norm": 0.7423757314682007, "learning_rate": 1.962750030902217e-05, "loss": 2.2908, "step": 3076 }, { "epoch": 0.1, "grad_norm": 0.6747810244560242, "learning_rate": 1.9627212855822923e-05, "loss": 2.2945, "step": 3077 }, { "epoch": 0.1, "grad_norm": 0.6609032154083252, "learning_rate": 1.9626925293861068e-05, "loss": 2.2892, "step": 3078 }, { "epoch": 0.1, "grad_norm": 0.7531595230102539, "learning_rate": 1.962663762313985e-05, "loss": 2.3213, "step": 3079 }, { "epoch": 0.1, "grad_norm": 0.658375084400177, "learning_rate": 1.962634984366252e-05, "loss": 2.3033, "step": 3080 }, { "epoch": 0.1, "grad_norm": 0.6509714126586914, "learning_rate": 1.9626061955432333e-05, "loss": 2.3249, "step": 3081 }, { "epoch": 0.1, "grad_norm": 0.6989246606826782, "learning_rate": 1.962577395845254e-05, "loss": 2.3516, "step": 3082 }, { "epoch": 0.1, "grad_norm": 0.6992824077606201, "learning_rate": 1.9625485852726397e-05, "loss": 2.2787, "step": 3083 }, { "epoch": 0.1, "grad_norm": 0.7299829721450806, "learning_rate": 1.9625197638257152e-05, "loss": 2.3116, "step": 3084 }, { "epoch": 0.1, "grad_norm": 0.6939265131950378, "learning_rate": 1.9624909315048067e-05, "loss": 2.3574, "step": 3085 }, { "epoch": 0.1, "grad_norm": 0.6580144166946411, "learning_rate": 1.9624620883102395e-05, "loss": 2.3364, "step": 3086 }, { "epoch": 0.1, "grad_norm": 0.6714300513267517, "learning_rate": 1.9624332342423398e-05, "loss": 2.2868, "step": 3087 }, { "epoch": 0.1, "grad_norm": 0.6902357339859009, "learning_rate": 1.9624043693014336e-05, "loss": 2.2624, "step": 3088 }, { "epoch": 0.1, "grad_norm": 0.6722714304924011, "learning_rate": 1.9623754934878464e-05, "loss": 2.3488, "step": 3089 }, { "epoch": 0.1, "grad_norm": 0.6773586273193359, "learning_rate": 1.9623466068019056e-05, "loss": 2.2837, "step": 3090 }, { "epoch": 0.1, "grad_norm": 0.6732198596000671, "learning_rate": 1.9623177092439364e-05, "loss": 2.2584, "step": 3091 }, { "epoch": 0.1, "grad_norm": 0.6610260009765625, "learning_rate": 1.962288800814266e-05, "loss": 2.346, "step": 3092 }, { "epoch": 0.1, "grad_norm": 0.6437327265739441, "learning_rate": 1.9622598815132198e-05, "loss": 2.3689, "step": 3093 }, { "epoch": 0.1, "grad_norm": 0.6843645572662354, "learning_rate": 1.9622309513411265e-05, "loss": 2.2683, "step": 3094 }, { "epoch": 0.1, "grad_norm": 0.6770291328430176, "learning_rate": 1.9622020102983114e-05, "loss": 2.2661, "step": 3095 }, { "epoch": 0.1, "grad_norm": 0.6504688262939453, "learning_rate": 1.9621730583851016e-05, "loss": 2.3082, "step": 3096 }, { "epoch": 0.1, "grad_norm": 0.6647862792015076, "learning_rate": 1.9621440956018248e-05, "loss": 2.3046, "step": 3097 }, { "epoch": 0.1, "grad_norm": 0.6618741154670715, "learning_rate": 1.9621151219488077e-05, "loss": 2.287, "step": 3098 }, { "epoch": 0.1, "grad_norm": 0.6801279783248901, "learning_rate": 1.9620861374263778e-05, "loss": 2.33, "step": 3099 }, { "epoch": 0.1, "grad_norm": 0.7018862366676331, "learning_rate": 1.962057142034863e-05, "loss": 2.2983, "step": 3100 }, { "epoch": 0.1, "grad_norm": 0.6851409077644348, "learning_rate": 1.96202813577459e-05, "loss": 2.335, "step": 3101 }, { "epoch": 0.1, "grad_norm": 0.6796402931213379, "learning_rate": 1.9619991186458868e-05, "loss": 2.2785, "step": 3102 }, { "epoch": 0.1, "grad_norm": 0.6904879808425903, "learning_rate": 1.9619700906490816e-05, "loss": 2.3951, "step": 3103 }, { "epoch": 0.1, "grad_norm": 0.6954065561294556, "learning_rate": 1.9619410517845022e-05, "loss": 2.3086, "step": 3104 }, { "epoch": 0.1, "grad_norm": 0.7203652262687683, "learning_rate": 1.9619120020524765e-05, "loss": 2.3378, "step": 3105 }, { "epoch": 0.1, "grad_norm": 0.6783618927001953, "learning_rate": 1.9618829414533325e-05, "loss": 2.2792, "step": 3106 }, { "epoch": 0.1, "grad_norm": 0.681893527507782, "learning_rate": 1.961853869987399e-05, "loss": 2.2767, "step": 3107 }, { "epoch": 0.1, "grad_norm": 0.6805277466773987, "learning_rate": 1.961824787655004e-05, "loss": 2.3361, "step": 3108 }, { "epoch": 0.1, "grad_norm": 0.6789894700050354, "learning_rate": 1.9617956944564763e-05, "loss": 2.3144, "step": 3109 }, { "epoch": 0.1, "grad_norm": 0.6904398202896118, "learning_rate": 1.9617665903921446e-05, "loss": 2.3166, "step": 3110 }, { "epoch": 0.1, "grad_norm": 0.6732697486877441, "learning_rate": 1.9617374754623376e-05, "loss": 2.312, "step": 3111 }, { "epoch": 0.1, "grad_norm": 0.662837564945221, "learning_rate": 1.9617083496673838e-05, "loss": 2.4164, "step": 3112 }, { "epoch": 0.1, "grad_norm": 0.6572659611701965, "learning_rate": 1.9616792130076132e-05, "loss": 2.2993, "step": 3113 }, { "epoch": 0.1, "grad_norm": 0.6960311532020569, "learning_rate": 1.9616500654833542e-05, "loss": 2.2621, "step": 3114 }, { "epoch": 0.1, "grad_norm": 0.6972876191139221, "learning_rate": 1.9616209070949365e-05, "loss": 2.2946, "step": 3115 }, { "epoch": 0.1, "grad_norm": 0.6960448622703552, "learning_rate": 1.9615917378426893e-05, "loss": 2.2674, "step": 3116 }, { "epoch": 0.1, "grad_norm": 0.6815236806869507, "learning_rate": 1.961562557726942e-05, "loss": 2.2426, "step": 3117 }, { "epoch": 0.1, "grad_norm": 0.6605592370033264, "learning_rate": 1.9615333667480247e-05, "loss": 2.2673, "step": 3118 }, { "epoch": 0.1, "grad_norm": 0.7008052468299866, "learning_rate": 1.961504164906267e-05, "loss": 2.3058, "step": 3119 }, { "epoch": 0.1, "grad_norm": 0.7082712054252625, "learning_rate": 1.9614749522019986e-05, "loss": 2.2543, "step": 3120 }, { "epoch": 0.1, "grad_norm": 0.7027872204780579, "learning_rate": 1.9614457286355496e-05, "loss": 2.2355, "step": 3121 }, { "epoch": 0.1, "grad_norm": 0.6617802977561951, "learning_rate": 1.9614164942072505e-05, "loss": 2.2881, "step": 3122 }, { "epoch": 0.1, "grad_norm": 0.6827096939086914, "learning_rate": 1.961387248917431e-05, "loss": 2.3263, "step": 3123 }, { "epoch": 0.1, "grad_norm": 0.6676981449127197, "learning_rate": 1.961357992766422e-05, "loss": 2.33, "step": 3124 }, { "epoch": 0.1, "grad_norm": 0.670794665813446, "learning_rate": 1.9613287257545533e-05, "loss": 2.2564, "step": 3125 }, { "epoch": 0.1, "grad_norm": 0.7067950367927551, "learning_rate": 1.961299447882157e-05, "loss": 2.3435, "step": 3126 }, { "epoch": 0.1, "grad_norm": 0.664048969745636, "learning_rate": 1.9612701591495618e-05, "loss": 2.2498, "step": 3127 }, { "epoch": 0.1, "grad_norm": 0.661981463432312, "learning_rate": 1.9612408595571007e-05, "loss": 2.315, "step": 3128 }, { "epoch": 0.1, "grad_norm": 0.6529367566108704, "learning_rate": 1.9612115491051033e-05, "loss": 2.1863, "step": 3129 }, { "epoch": 0.1, "grad_norm": 0.6726460456848145, "learning_rate": 1.961182227793901e-05, "loss": 2.3114, "step": 3130 }, { "epoch": 0.1, "grad_norm": 0.6730542778968811, "learning_rate": 1.9611528956238252e-05, "loss": 2.3233, "step": 3131 }, { "epoch": 0.1, "grad_norm": 0.6809111833572388, "learning_rate": 1.9611235525952076e-05, "loss": 2.2634, "step": 3132 }, { "epoch": 0.1, "grad_norm": 0.6963350176811218, "learning_rate": 1.9610941987083788e-05, "loss": 2.2942, "step": 3133 }, { "epoch": 0.1, "grad_norm": 0.6986297369003296, "learning_rate": 1.9610648339636715e-05, "loss": 2.2303, "step": 3134 }, { "epoch": 0.1, "grad_norm": 0.6576271057128906, "learning_rate": 1.961035458361417e-05, "loss": 2.212, "step": 3135 }, { "epoch": 0.1, "grad_norm": 0.6566599607467651, "learning_rate": 1.961006071901947e-05, "loss": 2.3151, "step": 3136 }, { "epoch": 0.1, "grad_norm": 0.6749888062477112, "learning_rate": 1.9609766745855933e-05, "loss": 2.3049, "step": 3137 }, { "epoch": 0.1, "grad_norm": 0.6851335167884827, "learning_rate": 1.9609472664126885e-05, "loss": 2.3199, "step": 3138 }, { "epoch": 0.1, "grad_norm": 0.7235228419303894, "learning_rate": 1.9609178473835647e-05, "loss": 2.3395, "step": 3139 }, { "epoch": 0.1, "grad_norm": 0.7386735677719116, "learning_rate": 1.9608884174985542e-05, "loss": 2.3669, "step": 3140 }, { "epoch": 0.1, "grad_norm": 0.6980429887771606, "learning_rate": 1.96085897675799e-05, "loss": 2.3397, "step": 3141 }, { "epoch": 0.1, "grad_norm": 0.6962876319885254, "learning_rate": 1.9608295251622036e-05, "loss": 2.3555, "step": 3142 }, { "epoch": 0.1, "grad_norm": 0.682605504989624, "learning_rate": 1.9608000627115282e-05, "loss": 2.2796, "step": 3143 }, { "epoch": 0.1, "grad_norm": 0.6582570672035217, "learning_rate": 1.960770589406297e-05, "loss": 2.2466, "step": 3144 }, { "epoch": 0.1, "grad_norm": 0.7072826623916626, "learning_rate": 1.9607411052468427e-05, "loss": 2.3273, "step": 3145 }, { "epoch": 0.1, "grad_norm": 0.6734939813613892, "learning_rate": 1.9607116102334988e-05, "loss": 2.2719, "step": 3146 }, { "epoch": 0.1, "grad_norm": 0.6844536066055298, "learning_rate": 1.960682104366598e-05, "loss": 2.2639, "step": 3147 }, { "epoch": 0.1, "grad_norm": 0.7324599027633667, "learning_rate": 1.9606525876464734e-05, "loss": 2.3159, "step": 3148 }, { "epoch": 0.1, "grad_norm": 0.7275071740150452, "learning_rate": 1.960623060073459e-05, "loss": 2.3568, "step": 3149 }, { "epoch": 0.1, "grad_norm": 0.6729439496994019, "learning_rate": 1.9605935216478884e-05, "loss": 2.3132, "step": 3150 }, { "epoch": 0.1, "grad_norm": 0.65671706199646, "learning_rate": 1.9605639723700953e-05, "loss": 2.2849, "step": 3151 }, { "epoch": 0.1, "grad_norm": 0.6889495849609375, "learning_rate": 1.9605344122404132e-05, "loss": 2.3272, "step": 3152 }, { "epoch": 0.1, "grad_norm": 0.6683332920074463, "learning_rate": 1.9605048412591762e-05, "loss": 2.287, "step": 3153 }, { "epoch": 0.1, "grad_norm": 0.6705678105354309, "learning_rate": 1.9604752594267184e-05, "loss": 2.2859, "step": 3154 }, { "epoch": 0.1, "grad_norm": 0.6564631462097168, "learning_rate": 1.960445666743374e-05, "loss": 2.2636, "step": 3155 }, { "epoch": 0.11, "grad_norm": 0.7232211828231812, "learning_rate": 1.9604160632094778e-05, "loss": 2.3698, "step": 3156 }, { "epoch": 0.11, "grad_norm": 0.6949573159217834, "learning_rate": 1.9603864488253632e-05, "loss": 2.2941, "step": 3157 }, { "epoch": 0.11, "grad_norm": 0.7094296216964722, "learning_rate": 1.9603568235913654e-05, "loss": 2.2363, "step": 3158 }, { "epoch": 0.11, "grad_norm": 0.6770161986351013, "learning_rate": 1.9603271875078194e-05, "loss": 2.2922, "step": 3159 }, { "epoch": 0.11, "grad_norm": 0.6891723871231079, "learning_rate": 1.960297540575059e-05, "loss": 2.2343, "step": 3160 }, { "epoch": 0.11, "grad_norm": 0.6961461901664734, "learning_rate": 1.9602678827934205e-05, "loss": 2.2409, "step": 3161 }, { "epoch": 0.11, "grad_norm": 0.6995030045509338, "learning_rate": 1.960238214163238e-05, "loss": 2.2295, "step": 3162 }, { "epoch": 0.11, "grad_norm": 0.6646366119384766, "learning_rate": 1.9602085346848468e-05, "loss": 2.2904, "step": 3163 }, { "epoch": 0.11, "grad_norm": 0.6777051687240601, "learning_rate": 1.960178844358582e-05, "loss": 2.2175, "step": 3164 }, { "epoch": 0.11, "grad_norm": 0.6713035106658936, "learning_rate": 1.9601491431847802e-05, "loss": 2.284, "step": 3165 }, { "epoch": 0.11, "grad_norm": 0.6949702501296997, "learning_rate": 1.960119431163775e-05, "loss": 2.3241, "step": 3166 }, { "epoch": 0.11, "grad_norm": 0.6910507082939148, "learning_rate": 1.960089708295904e-05, "loss": 2.3641, "step": 3167 }, { "epoch": 0.11, "grad_norm": 0.6850019097328186, "learning_rate": 1.960059974581502e-05, "loss": 2.3064, "step": 3168 }, { "epoch": 0.11, "grad_norm": 0.6518955230712891, "learning_rate": 1.9600302300209047e-05, "loss": 2.3273, "step": 3169 }, { "epoch": 0.11, "grad_norm": 0.7129683494567871, "learning_rate": 1.960000474614449e-05, "loss": 2.3703, "step": 3170 }, { "epoch": 0.11, "grad_norm": 0.6644880771636963, "learning_rate": 1.95997070836247e-05, "loss": 2.294, "step": 3171 }, { "epoch": 0.11, "grad_norm": 0.6693429350852966, "learning_rate": 1.959940931265305e-05, "loss": 2.2669, "step": 3172 }, { "epoch": 0.11, "grad_norm": 0.6777902245521545, "learning_rate": 1.9599111433232897e-05, "loss": 2.2397, "step": 3173 }, { "epoch": 0.11, "grad_norm": 0.6584378480911255, "learning_rate": 1.9598813445367608e-05, "loss": 2.2918, "step": 3174 }, { "epoch": 0.11, "grad_norm": 0.6797993183135986, "learning_rate": 1.9598515349060553e-05, "loss": 2.308, "step": 3175 }, { "epoch": 0.11, "grad_norm": 0.668185293674469, "learning_rate": 1.9598217144315096e-05, "loss": 2.2994, "step": 3176 }, { "epoch": 0.11, "grad_norm": 0.6579134464263916, "learning_rate": 1.9597918831134603e-05, "loss": 2.2443, "step": 3177 }, { "epoch": 0.11, "grad_norm": 0.6813787221908569, "learning_rate": 1.9597620409522454e-05, "loss": 2.2887, "step": 3178 }, { "epoch": 0.11, "grad_norm": 0.6628063917160034, "learning_rate": 1.959732187948201e-05, "loss": 2.3068, "step": 3179 }, { "epoch": 0.11, "grad_norm": 0.6778666377067566, "learning_rate": 1.9597023241016647e-05, "loss": 2.2634, "step": 3180 }, { "epoch": 0.11, "grad_norm": 0.666515588760376, "learning_rate": 1.9596724494129745e-05, "loss": 2.272, "step": 3181 }, { "epoch": 0.11, "grad_norm": 0.6696109175682068, "learning_rate": 1.959642563882467e-05, "loss": 2.3331, "step": 3182 }, { "epoch": 0.11, "grad_norm": 0.6769323945045471, "learning_rate": 1.9596126675104803e-05, "loss": 2.2684, "step": 3183 }, { "epoch": 0.11, "grad_norm": 0.671610414981842, "learning_rate": 1.959582760297352e-05, "loss": 2.2981, "step": 3184 }, { "epoch": 0.11, "grad_norm": 0.6878125071525574, "learning_rate": 1.95955284224342e-05, "loss": 2.297, "step": 3185 }, { "epoch": 0.11, "grad_norm": 0.7316403388977051, "learning_rate": 1.9595229133490225e-05, "loss": 2.2497, "step": 3186 }, { "epoch": 0.11, "grad_norm": 0.7361128330230713, "learning_rate": 1.9594929736144978e-05, "loss": 2.3559, "step": 3187 }, { "epoch": 0.11, "grad_norm": 0.6558111906051636, "learning_rate": 1.959463023040183e-05, "loss": 2.2835, "step": 3188 }, { "epoch": 0.11, "grad_norm": 0.6527162790298462, "learning_rate": 1.959433061626418e-05, "loss": 2.2099, "step": 3189 }, { "epoch": 0.11, "grad_norm": 0.6706349849700928, "learning_rate": 1.9594030893735404e-05, "loss": 2.3187, "step": 3190 }, { "epoch": 0.11, "grad_norm": 0.6911870241165161, "learning_rate": 1.9593731062818887e-05, "loss": 2.3388, "step": 3191 }, { "epoch": 0.11, "grad_norm": 0.6891999244689941, "learning_rate": 1.959343112351802e-05, "loss": 2.3054, "step": 3192 }, { "epoch": 0.11, "grad_norm": 0.6826380491256714, "learning_rate": 1.959313107583619e-05, "loss": 2.2617, "step": 3193 }, { "epoch": 0.11, "grad_norm": 0.7053672075271606, "learning_rate": 1.9592830919776786e-05, "loss": 2.299, "step": 3194 }, { "epoch": 0.11, "grad_norm": 0.7277592420578003, "learning_rate": 1.9592530655343202e-05, "loss": 2.2939, "step": 3195 }, { "epoch": 0.11, "grad_norm": 0.6787527799606323, "learning_rate": 1.9592230282538828e-05, "loss": 2.292, "step": 3196 }, { "epoch": 0.11, "grad_norm": 0.6701645255088806, "learning_rate": 1.959192980136706e-05, "loss": 2.311, "step": 3197 }, { "epoch": 0.11, "grad_norm": 0.6556048393249512, "learning_rate": 1.9591629211831288e-05, "loss": 2.2946, "step": 3198 }, { "epoch": 0.11, "grad_norm": 0.6999236941337585, "learning_rate": 1.9591328513934913e-05, "loss": 2.3142, "step": 3199 }, { "epoch": 0.11, "grad_norm": 0.7091421484947205, "learning_rate": 1.9591027707681326e-05, "loss": 2.2679, "step": 3200 }, { "epoch": 0.11, "grad_norm": 0.696406900882721, "learning_rate": 1.959072679307393e-05, "loss": 2.3135, "step": 3201 }, { "epoch": 0.11, "grad_norm": 0.7025538086891174, "learning_rate": 1.9590425770116125e-05, "loss": 2.3131, "step": 3202 }, { "epoch": 0.11, "grad_norm": 0.6604133248329163, "learning_rate": 1.959012463881131e-05, "loss": 2.3091, "step": 3203 }, { "epoch": 0.11, "grad_norm": 0.7291641235351562, "learning_rate": 1.9589823399162887e-05, "loss": 2.2842, "step": 3204 }, { "epoch": 0.11, "grad_norm": 0.6853616833686829, "learning_rate": 1.9589522051174257e-05, "loss": 2.3785, "step": 3205 }, { "epoch": 0.11, "grad_norm": 0.7002339959144592, "learning_rate": 1.9589220594848826e-05, "loss": 2.3104, "step": 3206 }, { "epoch": 0.11, "grad_norm": 0.6704463958740234, "learning_rate": 1.9588919030190006e-05, "loss": 2.2511, "step": 3207 }, { "epoch": 0.11, "grad_norm": 0.7219144701957703, "learning_rate": 1.9588617357201198e-05, "loss": 2.2918, "step": 3208 }, { "epoch": 0.11, "grad_norm": 0.6766985058784485, "learning_rate": 1.9588315575885806e-05, "loss": 2.3221, "step": 3209 }, { "epoch": 0.11, "grad_norm": 0.697449266910553, "learning_rate": 1.9588013686247247e-05, "loss": 2.2893, "step": 3210 }, { "epoch": 0.11, "grad_norm": 0.6685243844985962, "learning_rate": 1.958771168828893e-05, "loss": 2.2684, "step": 3211 }, { "epoch": 0.11, "grad_norm": 0.6941660046577454, "learning_rate": 1.958740958201426e-05, "loss": 2.2944, "step": 3212 }, { "epoch": 0.11, "grad_norm": 0.6850084066390991, "learning_rate": 1.958710736742666e-05, "loss": 2.2579, "step": 3213 }, { "epoch": 0.11, "grad_norm": 0.6781442165374756, "learning_rate": 1.9586805044529536e-05, "loss": 2.284, "step": 3214 }, { "epoch": 0.11, "grad_norm": 0.6558775305747986, "learning_rate": 1.958650261332631e-05, "loss": 2.3083, "step": 3215 }, { "epoch": 0.11, "grad_norm": 0.7022719979286194, "learning_rate": 1.9586200073820394e-05, "loss": 2.3518, "step": 3216 }, { "epoch": 0.11, "grad_norm": 0.6738336682319641, "learning_rate": 1.9585897426015207e-05, "loss": 2.2363, "step": 3217 }, { "epoch": 0.11, "grad_norm": 0.6709434986114502, "learning_rate": 1.958559466991417e-05, "loss": 2.2673, "step": 3218 }, { "epoch": 0.11, "grad_norm": 0.6536407470703125, "learning_rate": 1.9585291805520702e-05, "loss": 2.3066, "step": 3219 }, { "epoch": 0.11, "grad_norm": 0.6876931190490723, "learning_rate": 1.9584988832838227e-05, "loss": 2.3874, "step": 3220 }, { "epoch": 0.11, "grad_norm": 0.6653323173522949, "learning_rate": 1.9584685751870162e-05, "loss": 2.3723, "step": 3221 }, { "epoch": 0.11, "grad_norm": 0.6823737025260925, "learning_rate": 1.9584382562619937e-05, "loss": 2.3453, "step": 3222 }, { "epoch": 0.11, "grad_norm": 0.6831386685371399, "learning_rate": 1.9584079265090975e-05, "loss": 2.2667, "step": 3223 }, { "epoch": 0.11, "grad_norm": 0.6659426093101501, "learning_rate": 1.95837758592867e-05, "loss": 2.233, "step": 3224 }, { "epoch": 0.11, "grad_norm": 0.6855709552764893, "learning_rate": 1.9583472345210544e-05, "loss": 2.2367, "step": 3225 }, { "epoch": 0.11, "grad_norm": 0.6779314875602722, "learning_rate": 1.9583168722865932e-05, "loss": 2.3187, "step": 3226 }, { "epoch": 0.11, "grad_norm": 0.7188411951065063, "learning_rate": 1.9582864992256295e-05, "loss": 2.2545, "step": 3227 }, { "epoch": 0.11, "grad_norm": 0.6842631101608276, "learning_rate": 1.9582561153385067e-05, "loss": 2.2459, "step": 3228 }, { "epoch": 0.11, "grad_norm": 0.7029978632926941, "learning_rate": 1.958225720625568e-05, "loss": 2.2771, "step": 3229 }, { "epoch": 0.11, "grad_norm": 0.7499919533729553, "learning_rate": 1.958195315087157e-05, "loss": 2.3098, "step": 3230 }, { "epoch": 0.11, "grad_norm": 0.6602301597595215, "learning_rate": 1.9581648987236165e-05, "loss": 2.2927, "step": 3231 }, { "epoch": 0.11, "grad_norm": 0.6747449040412903, "learning_rate": 1.9581344715352902e-05, "loss": 2.2869, "step": 3232 }, { "epoch": 0.11, "grad_norm": 0.6745195388793945, "learning_rate": 1.958104033522523e-05, "loss": 2.3307, "step": 3233 }, { "epoch": 0.11, "grad_norm": 0.7078105211257935, "learning_rate": 1.958073584685657e-05, "loss": 2.2831, "step": 3234 }, { "epoch": 0.11, "grad_norm": 0.6723508834838867, "learning_rate": 1.9580431250250376e-05, "loss": 2.2534, "step": 3235 }, { "epoch": 0.11, "grad_norm": 0.681725800037384, "learning_rate": 1.958012654541008e-05, "loss": 2.3029, "step": 3236 }, { "epoch": 0.11, "grad_norm": 0.6951661109924316, "learning_rate": 1.9579821732339136e-05, "loss": 2.3438, "step": 3237 }, { "epoch": 0.11, "grad_norm": 0.6803429126739502, "learning_rate": 1.9579516811040977e-05, "loss": 2.2932, "step": 3238 }, { "epoch": 0.11, "grad_norm": 0.6646954417228699, "learning_rate": 1.9579211781519052e-05, "loss": 2.2873, "step": 3239 }, { "epoch": 0.11, "grad_norm": 0.6637921333312988, "learning_rate": 1.9578906643776807e-05, "loss": 2.269, "step": 3240 }, { "epoch": 0.11, "grad_norm": 0.6860204339027405, "learning_rate": 1.9578601397817686e-05, "loss": 2.2794, "step": 3241 }, { "epoch": 0.11, "grad_norm": 0.6754475831985474, "learning_rate": 1.9578296043645142e-05, "loss": 2.3017, "step": 3242 }, { "epoch": 0.11, "grad_norm": 0.6865530610084534, "learning_rate": 1.9577990581262622e-05, "loss": 2.3192, "step": 3243 }, { "epoch": 0.11, "grad_norm": 0.6550263166427612, "learning_rate": 1.9577685010673577e-05, "loss": 2.2488, "step": 3244 }, { "epoch": 0.11, "grad_norm": 0.6935075521469116, "learning_rate": 1.957737933188146e-05, "loss": 2.2327, "step": 3245 }, { "epoch": 0.11, "grad_norm": 0.6526719331741333, "learning_rate": 1.9577073544889728e-05, "loss": 2.307, "step": 3246 }, { "epoch": 0.11, "grad_norm": 0.6713154315948486, "learning_rate": 1.9576767649701828e-05, "loss": 2.2876, "step": 3247 }, { "epoch": 0.11, "grad_norm": 0.7009166479110718, "learning_rate": 1.9576461646321217e-05, "loss": 2.3952, "step": 3248 }, { "epoch": 0.11, "grad_norm": 0.6870133280754089, "learning_rate": 1.957615553475136e-05, "loss": 2.3105, "step": 3249 }, { "epoch": 0.11, "grad_norm": 0.6648132801055908, "learning_rate": 1.9575849314995707e-05, "loss": 2.29, "step": 3250 }, { "epoch": 0.11, "grad_norm": 0.6671931147575378, "learning_rate": 1.957554298705772e-05, "loss": 2.2932, "step": 3251 }, { "epoch": 0.11, "grad_norm": 0.7125139236450195, "learning_rate": 1.957523655094086e-05, "loss": 2.3547, "step": 3252 }, { "epoch": 0.11, "grad_norm": 0.6626330018043518, "learning_rate": 1.9574930006648592e-05, "loss": 2.3173, "step": 3253 }, { "epoch": 0.11, "grad_norm": 0.6882072687149048, "learning_rate": 1.9574623354184374e-05, "loss": 2.2967, "step": 3254 }, { "epoch": 0.11, "grad_norm": 0.6339793801307678, "learning_rate": 1.9574316593551674e-05, "loss": 2.2545, "step": 3255 }, { "epoch": 0.11, "grad_norm": 0.6647969484329224, "learning_rate": 1.9574009724753954e-05, "loss": 2.3258, "step": 3256 }, { "epoch": 0.11, "grad_norm": 0.6963188648223877, "learning_rate": 1.9573702747794687e-05, "loss": 2.3536, "step": 3257 }, { "epoch": 0.11, "grad_norm": 0.6629931926727295, "learning_rate": 1.9573395662677332e-05, "loss": 2.3059, "step": 3258 }, { "epoch": 0.11, "grad_norm": 0.6581913232803345, "learning_rate": 1.957308846940537e-05, "loss": 2.341, "step": 3259 }, { "epoch": 0.11, "grad_norm": 0.6464890241622925, "learning_rate": 1.9572781167982258e-05, "loss": 2.2698, "step": 3260 }, { "epoch": 0.11, "grad_norm": 0.6877791285514832, "learning_rate": 1.9572473758411477e-05, "loss": 2.3588, "step": 3261 }, { "epoch": 0.11, "grad_norm": 0.6642687916755676, "learning_rate": 1.9572166240696496e-05, "loss": 2.2444, "step": 3262 }, { "epoch": 0.11, "grad_norm": 0.6501506567001343, "learning_rate": 1.9571858614840793e-05, "loss": 2.2833, "step": 3263 }, { "epoch": 0.11, "grad_norm": 0.6657009720802307, "learning_rate": 1.957155088084784e-05, "loss": 2.2363, "step": 3264 }, { "epoch": 0.11, "grad_norm": 0.6632678508758545, "learning_rate": 1.9571243038721116e-05, "loss": 2.3072, "step": 3265 }, { "epoch": 0.11, "grad_norm": 0.7015467882156372, "learning_rate": 1.95709350884641e-05, "loss": 2.2846, "step": 3266 }, { "epoch": 0.11, "grad_norm": 0.7435239553451538, "learning_rate": 1.9570627030080263e-05, "loss": 2.3322, "step": 3267 }, { "epoch": 0.11, "grad_norm": 0.7427944540977478, "learning_rate": 1.9570318863573092e-05, "loss": 2.2779, "step": 3268 }, { "epoch": 0.11, "grad_norm": 0.6989432573318481, "learning_rate": 1.957001058894607e-05, "loss": 2.2088, "step": 3269 }, { "epoch": 0.11, "grad_norm": 0.6986260414123535, "learning_rate": 1.9569702206202675e-05, "loss": 2.3343, "step": 3270 }, { "epoch": 0.11, "grad_norm": 0.6584373712539673, "learning_rate": 1.9569393715346392e-05, "loss": 2.3144, "step": 3271 }, { "epoch": 0.11, "grad_norm": 0.6659768223762512, "learning_rate": 1.9569085116380705e-05, "loss": 2.2224, "step": 3272 }, { "epoch": 0.11, "grad_norm": 0.7299159169197083, "learning_rate": 1.9568776409309108e-05, "loss": 2.3413, "step": 3273 }, { "epoch": 0.11, "grad_norm": 0.6683459877967834, "learning_rate": 1.956846759413508e-05, "loss": 2.231, "step": 3274 }, { "epoch": 0.11, "grad_norm": 0.6688826084136963, "learning_rate": 1.956815867086211e-05, "loss": 2.2433, "step": 3275 }, { "epoch": 0.11, "grad_norm": 0.675812304019928, "learning_rate": 1.9567849639493697e-05, "loss": 2.3089, "step": 3276 }, { "epoch": 0.11, "grad_norm": 0.6878451704978943, "learning_rate": 1.9567540500033325e-05, "loss": 2.3657, "step": 3277 }, { "epoch": 0.11, "grad_norm": 0.6912396550178528, "learning_rate": 1.9567231252484485e-05, "loss": 2.313, "step": 3278 }, { "epoch": 0.11, "grad_norm": 0.6846947073936462, "learning_rate": 1.9566921896850673e-05, "loss": 2.2733, "step": 3279 }, { "epoch": 0.11, "grad_norm": 0.6598116755485535, "learning_rate": 1.9566612433135383e-05, "loss": 2.2762, "step": 3280 }, { "epoch": 0.11, "grad_norm": 0.6648226380348206, "learning_rate": 1.9566302861342117e-05, "loss": 2.3119, "step": 3281 }, { "epoch": 0.11, "grad_norm": 0.6883125305175781, "learning_rate": 1.9565993181474362e-05, "loss": 2.2338, "step": 3282 }, { "epoch": 0.11, "grad_norm": 0.6678287982940674, "learning_rate": 1.9565683393535625e-05, "loss": 2.3436, "step": 3283 }, { "epoch": 0.11, "grad_norm": 0.6640600562095642, "learning_rate": 1.9565373497529406e-05, "loss": 2.278, "step": 3284 }, { "epoch": 0.11, "grad_norm": 0.7145614624023438, "learning_rate": 1.9565063493459198e-05, "loss": 2.3214, "step": 3285 }, { "epoch": 0.11, "grad_norm": 0.6901586651802063, "learning_rate": 1.9564753381328515e-05, "loss": 2.2573, "step": 3286 }, { "epoch": 0.11, "grad_norm": 0.7153841853141785, "learning_rate": 1.956444316114085e-05, "loss": 2.2895, "step": 3287 }, { "epoch": 0.11, "grad_norm": 0.6618996262550354, "learning_rate": 1.9564132832899707e-05, "loss": 2.281, "step": 3288 }, { "epoch": 0.11, "grad_norm": 0.6791805028915405, "learning_rate": 1.9563822396608603e-05, "loss": 2.2662, "step": 3289 }, { "epoch": 0.11, "grad_norm": 0.6669474244117737, "learning_rate": 1.9563511852271033e-05, "loss": 2.2253, "step": 3290 }, { "epoch": 0.11, "grad_norm": 0.659118115901947, "learning_rate": 1.9563201199890514e-05, "loss": 2.2917, "step": 3291 }, { "epoch": 0.11, "grad_norm": 0.6741810441017151, "learning_rate": 1.9562890439470554e-05, "loss": 2.3494, "step": 3292 }, { "epoch": 0.11, "grad_norm": 0.7016229033470154, "learning_rate": 1.956257957101466e-05, "loss": 2.3065, "step": 3293 }, { "epoch": 0.11, "grad_norm": 0.6853245496749878, "learning_rate": 1.9562268594526347e-05, "loss": 2.3762, "step": 3294 }, { "epoch": 0.11, "grad_norm": 0.6615698933601379, "learning_rate": 1.9561957510009128e-05, "loss": 2.2544, "step": 3295 }, { "epoch": 0.11, "grad_norm": 0.7177711129188538, "learning_rate": 1.9561646317466514e-05, "loss": 2.3078, "step": 3296 }, { "epoch": 0.11, "grad_norm": 0.6854174137115479, "learning_rate": 1.956133501690203e-05, "loss": 2.3099, "step": 3297 }, { "epoch": 0.11, "grad_norm": 0.6666294932365417, "learning_rate": 1.956102360831918e-05, "loss": 2.312, "step": 3298 }, { "epoch": 0.11, "grad_norm": 0.6575854420661926, "learning_rate": 1.9560712091721488e-05, "loss": 2.2546, "step": 3299 }, { "epoch": 0.11, "grad_norm": 0.695037305355072, "learning_rate": 1.9560400467112477e-05, "loss": 2.266, "step": 3300 }, { "epoch": 0.11, "grad_norm": 0.6751512289047241, "learning_rate": 1.9560088734495665e-05, "loss": 2.2428, "step": 3301 }, { "epoch": 0.11, "grad_norm": 0.7023912668228149, "learning_rate": 1.955977689387457e-05, "loss": 2.2193, "step": 3302 }, { "epoch": 0.11, "grad_norm": 0.6935909390449524, "learning_rate": 1.9559464945252722e-05, "loss": 2.3496, "step": 3303 }, { "epoch": 0.11, "grad_norm": 0.7226423025131226, "learning_rate": 1.955915288863364e-05, "loss": 2.3311, "step": 3304 }, { "epoch": 0.11, "grad_norm": 0.688186526298523, "learning_rate": 1.9558840724020852e-05, "loss": 2.2579, "step": 3305 }, { "epoch": 0.11, "grad_norm": 0.7643429040908813, "learning_rate": 1.955852845141788e-05, "loss": 2.3113, "step": 3306 }, { "epoch": 0.11, "grad_norm": 0.6757791638374329, "learning_rate": 1.9558216070828257e-05, "loss": 2.3065, "step": 3307 }, { "epoch": 0.11, "grad_norm": 0.6753725409507751, "learning_rate": 1.9557903582255513e-05, "loss": 2.3026, "step": 3308 }, { "epoch": 0.11, "grad_norm": 0.6865319013595581, "learning_rate": 1.9557590985703174e-05, "loss": 2.3241, "step": 3309 }, { "epoch": 0.11, "grad_norm": 0.6682195067405701, "learning_rate": 1.9557278281174775e-05, "loss": 2.2624, "step": 3310 }, { "epoch": 0.11, "grad_norm": 0.6861039996147156, "learning_rate": 1.9556965468673847e-05, "loss": 2.2788, "step": 3311 }, { "epoch": 0.11, "grad_norm": 0.6809138059616089, "learning_rate": 1.9556652548203922e-05, "loss": 2.2352, "step": 3312 }, { "epoch": 0.11, "grad_norm": 0.6755217909812927, "learning_rate": 1.9556339519768535e-05, "loss": 2.2645, "step": 3313 }, { "epoch": 0.11, "grad_norm": 0.6843271255493164, "learning_rate": 1.9556026383371227e-05, "loss": 2.3389, "step": 3314 }, { "epoch": 0.11, "grad_norm": 0.6822518110275269, "learning_rate": 1.9555713139015534e-05, "loss": 2.2334, "step": 3315 }, { "epoch": 0.11, "grad_norm": 0.6699937582015991, "learning_rate": 1.9555399786704994e-05, "loss": 2.2655, "step": 3316 }, { "epoch": 0.11, "grad_norm": 0.6926649808883667, "learning_rate": 1.955508632644315e-05, "loss": 2.2775, "step": 3317 }, { "epoch": 0.11, "grad_norm": 0.6768621206283569, "learning_rate": 1.9554772758233535e-05, "loss": 2.2864, "step": 3318 }, { "epoch": 0.11, "grad_norm": 0.6527018547058105, "learning_rate": 1.95544590820797e-05, "loss": 2.3011, "step": 3319 }, { "epoch": 0.11, "grad_norm": 0.6702751517295837, "learning_rate": 1.9554145297985187e-05, "loss": 2.3541, "step": 3320 }, { "epoch": 0.11, "grad_norm": 0.6950775384902954, "learning_rate": 1.9553831405953537e-05, "loss": 2.3011, "step": 3321 }, { "epoch": 0.11, "grad_norm": 0.6607711911201477, "learning_rate": 1.95535174059883e-05, "loss": 2.2887, "step": 3322 }, { "epoch": 0.11, "grad_norm": 0.6950490474700928, "learning_rate": 1.955320329809302e-05, "loss": 2.3431, "step": 3323 }, { "epoch": 0.11, "grad_norm": 0.6859994530677795, "learning_rate": 1.955288908227125e-05, "loss": 2.27, "step": 3324 }, { "epoch": 0.11, "grad_norm": 0.6716843247413635, "learning_rate": 1.9552574758526538e-05, "loss": 2.1834, "step": 3325 }, { "epoch": 0.11, "grad_norm": 0.700272262096405, "learning_rate": 1.9552260326862438e-05, "loss": 2.228, "step": 3326 }, { "epoch": 0.11, "grad_norm": 0.6992161870002747, "learning_rate": 1.9551945787282496e-05, "loss": 2.3421, "step": 3327 }, { "epoch": 0.11, "grad_norm": 0.6473497152328491, "learning_rate": 1.9551631139790264e-05, "loss": 2.2662, "step": 3328 }, { "epoch": 0.11, "grad_norm": 0.6414010524749756, "learning_rate": 1.955131638438931e-05, "loss": 2.2617, "step": 3329 }, { "epoch": 0.11, "grad_norm": 0.6958712339401245, "learning_rate": 1.9551001521083176e-05, "loss": 2.2968, "step": 3330 }, { "epoch": 0.11, "grad_norm": 0.6657890677452087, "learning_rate": 1.9550686549875423e-05, "loss": 2.2757, "step": 3331 }, { "epoch": 0.11, "grad_norm": 0.7069322466850281, "learning_rate": 1.9550371470769615e-05, "loss": 2.3166, "step": 3332 }, { "epoch": 0.11, "grad_norm": 0.7049700617790222, "learning_rate": 1.95500562837693e-05, "loss": 2.3427, "step": 3333 }, { "epoch": 0.11, "grad_norm": 0.6740976572036743, "learning_rate": 1.9549740988878053e-05, "loss": 2.2906, "step": 3334 }, { "epoch": 0.11, "grad_norm": 0.6662238240242004, "learning_rate": 1.9549425586099425e-05, "loss": 2.3191, "step": 3335 }, { "epoch": 0.11, "grad_norm": 0.6581270694732666, "learning_rate": 1.9549110075436984e-05, "loss": 2.2896, "step": 3336 }, { "epoch": 0.11, "grad_norm": 0.6574767827987671, "learning_rate": 1.9548794456894297e-05, "loss": 2.3268, "step": 3337 }, { "epoch": 0.11, "grad_norm": 0.7186704874038696, "learning_rate": 1.9548478730474923e-05, "loss": 2.3037, "step": 3338 }, { "epoch": 0.11, "grad_norm": 0.759251058101654, "learning_rate": 1.9548162896182433e-05, "loss": 2.2765, "step": 3339 }, { "epoch": 0.11, "grad_norm": 0.6931256055831909, "learning_rate": 1.9547846954020393e-05, "loss": 2.2978, "step": 3340 }, { "epoch": 0.11, "grad_norm": 0.6591857075691223, "learning_rate": 1.9547530903992377e-05, "loss": 2.3768, "step": 3341 }, { "epoch": 0.11, "grad_norm": 0.710225522518158, "learning_rate": 1.954721474610195e-05, "loss": 2.2768, "step": 3342 }, { "epoch": 0.11, "grad_norm": 0.7074884176254272, "learning_rate": 1.9546898480352685e-05, "loss": 2.3369, "step": 3343 }, { "epoch": 0.11, "grad_norm": 0.7213272452354431, "learning_rate": 1.9546582106748158e-05, "loss": 2.3301, "step": 3344 }, { "epoch": 0.11, "grad_norm": 0.6728893518447876, "learning_rate": 1.954626562529194e-05, "loss": 2.3361, "step": 3345 }, { "epoch": 0.11, "grad_norm": 0.6933442950248718, "learning_rate": 1.9545949035987607e-05, "loss": 2.2059, "step": 3346 }, { "epoch": 0.11, "grad_norm": 0.6774097084999084, "learning_rate": 1.954563233883874e-05, "loss": 2.3017, "step": 3347 }, { "epoch": 0.11, "grad_norm": 0.6783869862556458, "learning_rate": 1.954531553384891e-05, "loss": 2.2787, "step": 3348 }, { "epoch": 0.11, "grad_norm": 0.6795644164085388, "learning_rate": 1.9544998621021702e-05, "loss": 2.2513, "step": 3349 }, { "epoch": 0.11, "grad_norm": 0.6820704936981201, "learning_rate": 1.9544681600360687e-05, "loss": 2.2568, "step": 3350 }, { "epoch": 0.11, "grad_norm": 0.7162044644355774, "learning_rate": 1.9544364471869458e-05, "loss": 2.3018, "step": 3351 }, { "epoch": 0.11, "grad_norm": 0.6596421003341675, "learning_rate": 1.9544047235551592e-05, "loss": 2.2835, "step": 3352 }, { "epoch": 0.11, "grad_norm": 0.6737750768661499, "learning_rate": 1.9543729891410674e-05, "loss": 2.2869, "step": 3353 }, { "epoch": 0.11, "grad_norm": 0.6758458018302917, "learning_rate": 1.9543412439450288e-05, "loss": 2.2493, "step": 3354 }, { "epoch": 0.11, "grad_norm": 0.6500502228736877, "learning_rate": 1.9543094879674022e-05, "loss": 2.2356, "step": 3355 }, { "epoch": 0.11, "grad_norm": 0.6987592577934265, "learning_rate": 1.954277721208546e-05, "loss": 2.3173, "step": 3356 }, { "epoch": 0.11, "grad_norm": 0.698062002658844, "learning_rate": 1.9542459436688198e-05, "loss": 2.3035, "step": 3357 }, { "epoch": 0.11, "grad_norm": 0.7098769545555115, "learning_rate": 1.954214155348582e-05, "loss": 2.2808, "step": 3358 }, { "epoch": 0.11, "grad_norm": 0.7098121643066406, "learning_rate": 1.954182356248192e-05, "loss": 2.2999, "step": 3359 }, { "epoch": 0.11, "grad_norm": 0.7207874655723572, "learning_rate": 1.9541505463680092e-05, "loss": 2.2884, "step": 3360 }, { "epoch": 0.11, "grad_norm": 0.6703974604606628, "learning_rate": 1.954118725708392e-05, "loss": 2.2689, "step": 3361 }, { "epoch": 0.11, "grad_norm": 0.6942721009254456, "learning_rate": 1.954086894269701e-05, "loss": 2.3201, "step": 3362 }, { "epoch": 0.11, "grad_norm": 0.6930775046348572, "learning_rate": 1.9540550520522953e-05, "loss": 2.285, "step": 3363 }, { "epoch": 0.11, "grad_norm": 0.6868101358413696, "learning_rate": 1.954023199056535e-05, "loss": 2.2863, "step": 3364 }, { "epoch": 0.11, "grad_norm": 0.6652754545211792, "learning_rate": 1.9539913352827794e-05, "loss": 2.206, "step": 3365 }, { "epoch": 0.11, "grad_norm": 0.6693503260612488, "learning_rate": 1.953959460731389e-05, "loss": 2.1681, "step": 3366 }, { "epoch": 0.11, "grad_norm": 0.6639902591705322, "learning_rate": 1.9539275754027235e-05, "loss": 2.2197, "step": 3367 }, { "epoch": 0.11, "grad_norm": 0.6737970113754272, "learning_rate": 1.953895679297144e-05, "loss": 2.3076, "step": 3368 }, { "epoch": 0.11, "grad_norm": 0.6923253536224365, "learning_rate": 1.953863772415009e-05, "loss": 2.3116, "step": 3369 }, { "epoch": 0.11, "grad_norm": 0.7079081535339355, "learning_rate": 1.953831854756681e-05, "loss": 2.3126, "step": 3370 }, { "epoch": 0.11, "grad_norm": 0.6975604891777039, "learning_rate": 1.9537999263225194e-05, "loss": 2.2651, "step": 3371 }, { "epoch": 0.11, "grad_norm": 0.6655405163764954, "learning_rate": 1.9537679871128853e-05, "loss": 2.3007, "step": 3372 }, { "epoch": 0.11, "grad_norm": 0.6903324127197266, "learning_rate": 1.953736037128139e-05, "loss": 2.3234, "step": 3373 }, { "epoch": 0.11, "grad_norm": 0.6861885190010071, "learning_rate": 1.9537040763686422e-05, "loss": 2.3528, "step": 3374 }, { "epoch": 0.11, "grad_norm": 0.6907678246498108, "learning_rate": 1.953672104834756e-05, "loss": 2.3082, "step": 3375 }, { "epoch": 0.11, "grad_norm": 0.6754046082496643, "learning_rate": 1.953640122526841e-05, "loss": 2.233, "step": 3376 }, { "epoch": 0.11, "grad_norm": 0.6609771847724915, "learning_rate": 1.953608129445259e-05, "loss": 2.3472, "step": 3377 }, { "epoch": 0.11, "grad_norm": 0.6661567687988281, "learning_rate": 1.953576125590371e-05, "loss": 2.2698, "step": 3378 }, { "epoch": 0.11, "grad_norm": 0.681016206741333, "learning_rate": 1.9535441109625387e-05, "loss": 2.2685, "step": 3379 }, { "epoch": 0.11, "grad_norm": 0.6722439527511597, "learning_rate": 1.9535120855621238e-05, "loss": 2.2453, "step": 3380 }, { "epoch": 0.11, "grad_norm": 0.6440585255622864, "learning_rate": 1.9534800493894884e-05, "loss": 2.3155, "step": 3381 }, { "epoch": 0.11, "grad_norm": 0.6670352220535278, "learning_rate": 1.953448002444994e-05, "loss": 2.3069, "step": 3382 }, { "epoch": 0.11, "grad_norm": 0.6978661417961121, "learning_rate": 1.953415944729003e-05, "loss": 2.3057, "step": 3383 }, { "epoch": 0.11, "grad_norm": 0.6492437720298767, "learning_rate": 1.9533838762418774e-05, "loss": 2.335, "step": 3384 }, { "epoch": 0.11, "grad_norm": 0.6747755408287048, "learning_rate": 1.9533517969839794e-05, "loss": 2.2508, "step": 3385 }, { "epoch": 0.11, "grad_norm": 0.6762723922729492, "learning_rate": 1.953319706955672e-05, "loss": 2.3085, "step": 3386 }, { "epoch": 0.11, "grad_norm": 0.6577224731445312, "learning_rate": 1.9532876061573164e-05, "loss": 2.232, "step": 3387 }, { "epoch": 0.11, "grad_norm": 0.6669553518295288, "learning_rate": 1.953255494589277e-05, "loss": 2.267, "step": 3388 }, { "epoch": 0.11, "grad_norm": 0.7034344673156738, "learning_rate": 1.953223372251915e-05, "loss": 2.3281, "step": 3389 }, { "epoch": 0.11, "grad_norm": 0.712338924407959, "learning_rate": 1.9531912391455942e-05, "loss": 2.2683, "step": 3390 }, { "epoch": 0.11, "grad_norm": 0.6566740274429321, "learning_rate": 1.9531590952706776e-05, "loss": 2.2568, "step": 3391 }, { "epoch": 0.11, "grad_norm": 0.7083081007003784, "learning_rate": 1.953126940627528e-05, "loss": 2.3304, "step": 3392 }, { "epoch": 0.11, "grad_norm": 0.6720786690711975, "learning_rate": 1.9530947752165086e-05, "loss": 2.2513, "step": 3393 }, { "epoch": 0.11, "grad_norm": 0.7112762331962585, "learning_rate": 1.9530625990379834e-05, "loss": 2.3022, "step": 3394 }, { "epoch": 0.11, "grad_norm": 0.6996901631355286, "learning_rate": 1.953030412092315e-05, "loss": 2.2459, "step": 3395 }, { "epoch": 0.11, "grad_norm": 0.7056000828742981, "learning_rate": 1.952998214379868e-05, "loss": 2.2842, "step": 3396 }, { "epoch": 0.11, "grad_norm": 0.7128042578697205, "learning_rate": 1.9529660059010056e-05, "loss": 2.2868, "step": 3397 }, { "epoch": 0.11, "grad_norm": 0.6723177433013916, "learning_rate": 1.9529337866560917e-05, "loss": 2.3254, "step": 3398 }, { "epoch": 0.11, "grad_norm": 0.6917299628257751, "learning_rate": 1.95290155664549e-05, "loss": 2.3654, "step": 3399 }, { "epoch": 0.11, "grad_norm": 0.6795265078544617, "learning_rate": 1.9528693158695654e-05, "loss": 2.2848, "step": 3400 }, { "epoch": 0.11, "grad_norm": 0.6936113834381104, "learning_rate": 1.9528370643286818e-05, "loss": 2.2222, "step": 3401 }, { "epoch": 0.11, "grad_norm": 0.7027393579483032, "learning_rate": 1.952804802023203e-05, "loss": 2.364, "step": 3402 }, { "epoch": 0.11, "grad_norm": 0.7291425466537476, "learning_rate": 1.9527725289534944e-05, "loss": 2.2969, "step": 3403 }, { "epoch": 0.11, "grad_norm": 0.6756417751312256, "learning_rate": 1.95274024511992e-05, "loss": 2.3043, "step": 3404 }, { "epoch": 0.11, "grad_norm": 0.6620833873748779, "learning_rate": 1.9527079505228445e-05, "loss": 2.307, "step": 3405 }, { "epoch": 0.11, "grad_norm": 0.6577024459838867, "learning_rate": 1.9526756451626333e-05, "loss": 2.3146, "step": 3406 }, { "epoch": 0.11, "grad_norm": 0.6745875477790833, "learning_rate": 1.9526433290396505e-05, "loss": 2.2838, "step": 3407 }, { "epoch": 0.11, "grad_norm": 0.6589615345001221, "learning_rate": 1.952611002154262e-05, "loss": 2.1902, "step": 3408 }, { "epoch": 0.11, "grad_norm": 0.7146021723747253, "learning_rate": 1.9525786645068326e-05, "loss": 2.2918, "step": 3409 }, { "epoch": 0.11, "grad_norm": 0.6813265681266785, "learning_rate": 1.9525463160977277e-05, "loss": 2.3083, "step": 3410 }, { "epoch": 0.11, "grad_norm": 0.6737810373306274, "learning_rate": 1.9525139569273128e-05, "loss": 2.3204, "step": 3411 }, { "epoch": 0.11, "grad_norm": 0.6791371703147888, "learning_rate": 1.9524815869959535e-05, "loss": 2.2571, "step": 3412 }, { "epoch": 0.11, "grad_norm": 0.6639089584350586, "learning_rate": 1.9524492063040153e-05, "loss": 2.2866, "step": 3413 }, { "epoch": 0.11, "grad_norm": 0.6872113943099976, "learning_rate": 1.9524168148518643e-05, "loss": 2.2505, "step": 3414 }, { "epoch": 0.11, "grad_norm": 0.6669588088989258, "learning_rate": 1.952384412639866e-05, "loss": 2.312, "step": 3415 }, { "epoch": 0.11, "grad_norm": 0.6643083691596985, "learning_rate": 1.952351999668387e-05, "loss": 2.3176, "step": 3416 }, { "epoch": 0.11, "grad_norm": 0.6882132291793823, "learning_rate": 1.9523195759377932e-05, "loss": 2.2089, "step": 3417 }, { "epoch": 0.11, "grad_norm": 0.6898518204689026, "learning_rate": 1.952287141448451e-05, "loss": 2.2675, "step": 3418 }, { "epoch": 0.11, "grad_norm": 0.7204471230506897, "learning_rate": 1.9522546962007266e-05, "loss": 2.3435, "step": 3419 }, { "epoch": 0.11, "grad_norm": 0.7124329209327698, "learning_rate": 1.9522222401949867e-05, "loss": 2.312, "step": 3420 }, { "epoch": 0.11, "grad_norm": 0.6496767401695251, "learning_rate": 1.952189773431598e-05, "loss": 2.2707, "step": 3421 }, { "epoch": 0.11, "grad_norm": 0.6923325657844543, "learning_rate": 1.9521572959109277e-05, "loss": 2.3195, "step": 3422 }, { "epoch": 0.11, "grad_norm": 0.6800845265388489, "learning_rate": 1.952124807633342e-05, "loss": 2.2783, "step": 3423 }, { "epoch": 0.11, "grad_norm": 0.7181073427200317, "learning_rate": 1.9520923085992083e-05, "loss": 2.2602, "step": 3424 }, { "epoch": 0.11, "grad_norm": 0.6994168758392334, "learning_rate": 1.9520597988088937e-05, "loss": 2.2398, "step": 3425 }, { "epoch": 0.11, "grad_norm": 0.6955270171165466, "learning_rate": 1.9520272782627652e-05, "loss": 2.2968, "step": 3426 }, { "epoch": 0.11, "grad_norm": 0.6842566728591919, "learning_rate": 1.9519947469611906e-05, "loss": 2.333, "step": 3427 }, { "epoch": 0.11, "grad_norm": 0.6695115566253662, "learning_rate": 1.951962204904537e-05, "loss": 2.3024, "step": 3428 }, { "epoch": 0.11, "grad_norm": 0.6924493312835693, "learning_rate": 1.9519296520931727e-05, "loss": 2.3159, "step": 3429 }, { "epoch": 0.11, "grad_norm": 0.6888512969017029, "learning_rate": 1.9518970885274654e-05, "loss": 2.3097, "step": 3430 }, { "epoch": 0.11, "grad_norm": 0.699921727180481, "learning_rate": 1.951864514207782e-05, "loss": 2.2789, "step": 3431 }, { "epoch": 0.11, "grad_norm": 0.6825907230377197, "learning_rate": 1.9518319291344915e-05, "loss": 2.2531, "step": 3432 }, { "epoch": 0.11, "grad_norm": 0.6498616337776184, "learning_rate": 1.9517993333079616e-05, "loss": 2.2994, "step": 3433 }, { "epoch": 0.11, "grad_norm": 0.6959145665168762, "learning_rate": 1.9517667267285605e-05, "loss": 2.3102, "step": 3434 }, { "epoch": 0.11, "grad_norm": 0.6896790266036987, "learning_rate": 1.9517341093966568e-05, "loss": 2.3168, "step": 3435 }, { "epoch": 0.11, "grad_norm": 0.6853213906288147, "learning_rate": 1.951701481312619e-05, "loss": 2.2811, "step": 3436 }, { "epoch": 0.11, "grad_norm": 0.6857879161834717, "learning_rate": 1.9516688424768154e-05, "loss": 2.2829, "step": 3437 }, { "epoch": 0.11, "grad_norm": 0.670282244682312, "learning_rate": 1.9516361928896152e-05, "loss": 2.311, "step": 3438 }, { "epoch": 0.11, "grad_norm": 0.6659310460090637, "learning_rate": 1.951603532551387e-05, "loss": 2.2545, "step": 3439 }, { "epoch": 0.11, "grad_norm": 0.6962447166442871, "learning_rate": 1.9515708614624995e-05, "loss": 2.2005, "step": 3440 }, { "epoch": 0.11, "grad_norm": 0.6639426946640015, "learning_rate": 1.9515381796233225e-05, "loss": 2.2276, "step": 3441 }, { "epoch": 0.11, "grad_norm": 0.6607621908187866, "learning_rate": 1.9515054870342243e-05, "loss": 2.3179, "step": 3442 }, { "epoch": 0.11, "grad_norm": 0.6547995209693909, "learning_rate": 1.951472783695575e-05, "loss": 2.2112, "step": 3443 }, { "epoch": 0.11, "grad_norm": 0.6812206506729126, "learning_rate": 1.951440069607744e-05, "loss": 2.2735, "step": 3444 }, { "epoch": 0.11, "grad_norm": 0.6637999415397644, "learning_rate": 1.9514073447711007e-05, "loss": 2.2831, "step": 3445 }, { "epoch": 0.11, "grad_norm": 0.67435622215271, "learning_rate": 1.9513746091860145e-05, "loss": 2.2981, "step": 3446 }, { "epoch": 0.11, "grad_norm": 0.6635550856590271, "learning_rate": 1.951341862852856e-05, "loss": 2.2676, "step": 3447 }, { "epoch": 0.11, "grad_norm": 0.6529548764228821, "learning_rate": 1.951309105771994e-05, "loss": 2.3021, "step": 3448 }, { "epoch": 0.11, "grad_norm": 0.6742626428604126, "learning_rate": 1.9512763379437997e-05, "loss": 2.2425, "step": 3449 }, { "epoch": 0.11, "grad_norm": 0.6873909831047058, "learning_rate": 1.9512435593686425e-05, "loss": 2.3165, "step": 3450 }, { "epoch": 0.11, "grad_norm": 0.6663831472396851, "learning_rate": 1.951210770046893e-05, "loss": 2.3146, "step": 3451 }, { "epoch": 0.11, "grad_norm": 0.6812635064125061, "learning_rate": 1.9511779699789223e-05, "loss": 2.286, "step": 3452 }, { "epoch": 0.11, "grad_norm": 0.6595397591590881, "learning_rate": 1.9511451591651e-05, "loss": 2.236, "step": 3453 }, { "epoch": 0.11, "grad_norm": 0.6729317903518677, "learning_rate": 1.951112337605797e-05, "loss": 2.2351, "step": 3454 }, { "epoch": 0.11, "grad_norm": 0.6776654124259949, "learning_rate": 1.9510795053013843e-05, "loss": 2.2852, "step": 3455 }, { "epoch": 0.11, "grad_norm": 0.6912583112716675, "learning_rate": 1.9510466622522327e-05, "loss": 2.2225, "step": 3456 }, { "epoch": 0.12, "grad_norm": 0.6785014867782593, "learning_rate": 1.9510138084587135e-05, "loss": 2.2637, "step": 3457 }, { "epoch": 0.12, "grad_norm": 0.7126516103744507, "learning_rate": 1.9509809439211973e-05, "loss": 2.2875, "step": 3458 }, { "epoch": 0.12, "grad_norm": 0.6923148036003113, "learning_rate": 1.950948068640056e-05, "loss": 2.2884, "step": 3459 }, { "epoch": 0.12, "grad_norm": 0.6734636425971985, "learning_rate": 1.9509151826156606e-05, "loss": 2.2916, "step": 3460 }, { "epoch": 0.12, "grad_norm": 0.6748021841049194, "learning_rate": 1.950882285848383e-05, "loss": 2.2819, "step": 3461 }, { "epoch": 0.12, "grad_norm": 0.6771445870399475, "learning_rate": 1.9508493783385942e-05, "loss": 2.2334, "step": 3462 }, { "epoch": 0.12, "grad_norm": 0.6622663140296936, "learning_rate": 1.9508164600866662e-05, "loss": 2.2817, "step": 3463 }, { "epoch": 0.12, "grad_norm": 0.7023286819458008, "learning_rate": 1.950783531092972e-05, "loss": 2.3262, "step": 3464 }, { "epoch": 0.12, "grad_norm": 0.6604294776916504, "learning_rate": 1.950750591357882e-05, "loss": 2.2929, "step": 3465 }, { "epoch": 0.12, "grad_norm": 0.7139497399330139, "learning_rate": 1.950717640881769e-05, "loss": 2.2682, "step": 3466 }, { "epoch": 0.12, "grad_norm": 0.6750184893608093, "learning_rate": 1.9506846796650056e-05, "loss": 2.3418, "step": 3467 }, { "epoch": 0.12, "grad_norm": 0.6928868889808655, "learning_rate": 1.9506517077079632e-05, "loss": 2.2963, "step": 3468 }, { "epoch": 0.12, "grad_norm": 0.6937859058380127, "learning_rate": 1.9506187250110155e-05, "loss": 2.3062, "step": 3469 }, { "epoch": 0.12, "grad_norm": 0.6828649044036865, "learning_rate": 1.9505857315745346e-05, "loss": 2.2334, "step": 3470 }, { "epoch": 0.12, "grad_norm": 0.6752848029136658, "learning_rate": 1.950552727398893e-05, "loss": 2.3374, "step": 3471 }, { "epoch": 0.12, "grad_norm": 0.6865189075469971, "learning_rate": 1.950519712484464e-05, "loss": 2.3397, "step": 3472 }, { "epoch": 0.12, "grad_norm": 0.6818467378616333, "learning_rate": 1.95048668683162e-05, "loss": 2.247, "step": 3473 }, { "epoch": 0.12, "grad_norm": 0.6738606691360474, "learning_rate": 1.9504536504407345e-05, "loss": 2.2814, "step": 3474 }, { "epoch": 0.12, "grad_norm": 0.6744799017906189, "learning_rate": 1.950420603312181e-05, "loss": 2.2357, "step": 3475 }, { "epoch": 0.12, "grad_norm": 0.6895490884780884, "learning_rate": 1.950387545446332e-05, "loss": 2.3056, "step": 3476 }, { "epoch": 0.12, "grad_norm": 0.6931930184364319, "learning_rate": 1.950354476843562e-05, "loss": 2.2138, "step": 3477 }, { "epoch": 0.12, "grad_norm": 0.6776548624038696, "learning_rate": 1.950321397504244e-05, "loss": 2.2876, "step": 3478 }, { "epoch": 0.12, "grad_norm": 0.6878694891929626, "learning_rate": 1.9502883074287516e-05, "loss": 2.333, "step": 3479 }, { "epoch": 0.12, "grad_norm": 0.6741803288459778, "learning_rate": 1.950255206617459e-05, "loss": 2.2588, "step": 3480 }, { "epoch": 0.12, "grad_norm": 0.6665170788764954, "learning_rate": 1.9502220950707397e-05, "loss": 2.2819, "step": 3481 }, { "epoch": 0.12, "grad_norm": 0.6764044761657715, "learning_rate": 1.9501889727889686e-05, "loss": 2.3122, "step": 3482 }, { "epoch": 0.12, "grad_norm": 0.6899176836013794, "learning_rate": 1.9501558397725186e-05, "loss": 2.3786, "step": 3483 }, { "epoch": 0.12, "grad_norm": 0.6570963859558105, "learning_rate": 1.950122696021765e-05, "loss": 2.2498, "step": 3484 }, { "epoch": 0.12, "grad_norm": 0.6669249534606934, "learning_rate": 1.9500895415370823e-05, "loss": 2.3338, "step": 3485 }, { "epoch": 0.12, "grad_norm": 0.6689350605010986, "learning_rate": 1.9500563763188445e-05, "loss": 2.3312, "step": 3486 }, { "epoch": 0.12, "grad_norm": 0.6737769246101379, "learning_rate": 1.950023200367427e-05, "loss": 2.2852, "step": 3487 }, { "epoch": 0.12, "grad_norm": 0.7012941241264343, "learning_rate": 1.9499900136832036e-05, "loss": 2.2931, "step": 3488 }, { "epoch": 0.12, "grad_norm": 0.6590933799743652, "learning_rate": 1.9499568162665503e-05, "loss": 2.2476, "step": 3489 }, { "epoch": 0.12, "grad_norm": 0.6714367270469666, "learning_rate": 1.949923608117841e-05, "loss": 2.2868, "step": 3490 }, { "epoch": 0.12, "grad_norm": 0.6752423048019409, "learning_rate": 1.949890389237452e-05, "loss": 2.2997, "step": 3491 }, { "epoch": 0.12, "grad_norm": 0.7171645164489746, "learning_rate": 1.949857159625758e-05, "loss": 2.325, "step": 3492 }, { "epoch": 0.12, "grad_norm": 0.6709889769554138, "learning_rate": 1.9498239192831342e-05, "loss": 2.213, "step": 3493 }, { "epoch": 0.12, "grad_norm": 0.6939496994018555, "learning_rate": 1.9497906682099564e-05, "loss": 2.3505, "step": 3494 }, { "epoch": 0.12, "grad_norm": 0.6679947376251221, "learning_rate": 1.9497574064066008e-05, "loss": 2.3483, "step": 3495 }, { "epoch": 0.12, "grad_norm": 0.685558021068573, "learning_rate": 1.9497241338734424e-05, "loss": 2.3125, "step": 3496 }, { "epoch": 0.12, "grad_norm": 0.6740711331367493, "learning_rate": 1.949690850610857e-05, "loss": 2.3581, "step": 3497 }, { "epoch": 0.12, "grad_norm": 0.6664581894874573, "learning_rate": 1.949657556619221e-05, "loss": 2.2713, "step": 3498 }, { "epoch": 0.12, "grad_norm": 0.6670740842819214, "learning_rate": 1.9496242518989108e-05, "loss": 2.2952, "step": 3499 }, { "epoch": 0.12, "grad_norm": 0.6904124021530151, "learning_rate": 1.949590936450302e-05, "loss": 2.288, "step": 3500 }, { "epoch": 0.12, "grad_norm": 0.7095031142234802, "learning_rate": 1.9495576102737715e-05, "loss": 2.2638, "step": 3501 }, { "epoch": 0.12, "grad_norm": 0.6857951283454895, "learning_rate": 1.9495242733696958e-05, "loss": 2.3004, "step": 3502 }, { "epoch": 0.12, "grad_norm": 0.7024117112159729, "learning_rate": 1.949490925738451e-05, "loss": 2.2699, "step": 3503 }, { "epoch": 0.12, "grad_norm": 0.6955640316009521, "learning_rate": 1.9494575673804145e-05, "loss": 2.3566, "step": 3504 }, { "epoch": 0.12, "grad_norm": 0.671813428401947, "learning_rate": 1.9494241982959624e-05, "loss": 2.2246, "step": 3505 }, { "epoch": 0.12, "grad_norm": 0.6745615601539612, "learning_rate": 1.9493908184854727e-05, "loss": 2.3297, "step": 3506 }, { "epoch": 0.12, "grad_norm": 0.7011864185333252, "learning_rate": 1.9493574279493213e-05, "loss": 2.2687, "step": 3507 }, { "epoch": 0.12, "grad_norm": 0.6664074659347534, "learning_rate": 1.9493240266878866e-05, "loss": 2.3133, "step": 3508 }, { "epoch": 0.12, "grad_norm": 0.6848646998405457, "learning_rate": 1.949290614701545e-05, "loss": 2.2742, "step": 3509 }, { "epoch": 0.12, "grad_norm": 0.7012177109718323, "learning_rate": 1.9492571919906747e-05, "loss": 2.2487, "step": 3510 }, { "epoch": 0.12, "grad_norm": 0.6619529724121094, "learning_rate": 1.9492237585556527e-05, "loss": 2.3242, "step": 3511 }, { "epoch": 0.12, "grad_norm": 0.6916337609291077, "learning_rate": 1.949190314396857e-05, "loss": 2.2456, "step": 3512 }, { "epoch": 0.12, "grad_norm": 0.674776554107666, "learning_rate": 1.9491568595146657e-05, "loss": 2.2046, "step": 3513 }, { "epoch": 0.12, "grad_norm": 0.6817277669906616, "learning_rate": 1.949123393909456e-05, "loss": 2.3114, "step": 3514 }, { "epoch": 0.12, "grad_norm": 0.6720808148384094, "learning_rate": 1.9490899175816068e-05, "loss": 2.3064, "step": 3515 }, { "epoch": 0.12, "grad_norm": 0.6769091486930847, "learning_rate": 1.9490564305314958e-05, "loss": 2.3276, "step": 3516 }, { "epoch": 0.12, "grad_norm": 0.686509907245636, "learning_rate": 1.9490229327595015e-05, "loss": 2.2504, "step": 3517 }, { "epoch": 0.12, "grad_norm": 0.696013331413269, "learning_rate": 1.9489894242660023e-05, "loss": 2.245, "step": 3518 }, { "epoch": 0.12, "grad_norm": 0.6892712712287903, "learning_rate": 1.9489559050513767e-05, "loss": 2.2722, "step": 3519 }, { "epoch": 0.12, "grad_norm": 0.6955429315567017, "learning_rate": 1.9489223751160035e-05, "loss": 2.2573, "step": 3520 }, { "epoch": 0.12, "grad_norm": 0.6766269207000732, "learning_rate": 1.948888834460261e-05, "loss": 2.349, "step": 3521 }, { "epoch": 0.12, "grad_norm": 0.7339362502098083, "learning_rate": 1.9488552830845294e-05, "loss": 2.2768, "step": 3522 }, { "epoch": 0.12, "grad_norm": 0.7005716562271118, "learning_rate": 1.948821720989186e-05, "loss": 2.3168, "step": 3523 }, { "epoch": 0.12, "grad_norm": 0.6667356491088867, "learning_rate": 1.9487881481746114e-05, "loss": 2.3052, "step": 3524 }, { "epoch": 0.12, "grad_norm": 0.7022951245307922, "learning_rate": 1.9487545646411844e-05, "loss": 2.3252, "step": 3525 }, { "epoch": 0.12, "grad_norm": 0.71285480260849, "learning_rate": 1.948720970389284e-05, "loss": 2.2532, "step": 3526 }, { "epoch": 0.12, "grad_norm": 0.7096241116523743, "learning_rate": 1.94868736541929e-05, "loss": 2.297, "step": 3527 }, { "epoch": 0.12, "grad_norm": 0.7043609023094177, "learning_rate": 1.9486537497315824e-05, "loss": 2.3178, "step": 3528 }, { "epoch": 0.12, "grad_norm": 0.6503707766532898, "learning_rate": 1.948620123326541e-05, "loss": 2.2669, "step": 3529 }, { "epoch": 0.12, "grad_norm": 0.6767188310623169, "learning_rate": 1.9485864862045448e-05, "loss": 2.2865, "step": 3530 }, { "epoch": 0.12, "grad_norm": 0.6910713911056519, "learning_rate": 1.948552838365975e-05, "loss": 2.2707, "step": 3531 }, { "epoch": 0.12, "grad_norm": 0.685518741607666, "learning_rate": 1.9485191798112105e-05, "loss": 2.2775, "step": 3532 }, { "epoch": 0.12, "grad_norm": 0.7035486102104187, "learning_rate": 1.948485510540633e-05, "loss": 2.3289, "step": 3533 }, { "epoch": 0.12, "grad_norm": 0.6760866045951843, "learning_rate": 1.9484518305546213e-05, "loss": 2.2802, "step": 3534 }, { "epoch": 0.12, "grad_norm": 0.6866413354873657, "learning_rate": 1.9484181398535568e-05, "loss": 2.3452, "step": 3535 }, { "epoch": 0.12, "grad_norm": 0.6818528771400452, "learning_rate": 1.9483844384378203e-05, "loss": 2.2618, "step": 3536 }, { "epoch": 0.12, "grad_norm": 0.6678909659385681, "learning_rate": 1.948350726307792e-05, "loss": 2.2818, "step": 3537 }, { "epoch": 0.12, "grad_norm": 0.6868831515312195, "learning_rate": 1.9483170034638533e-05, "loss": 2.2854, "step": 3538 }, { "epoch": 0.12, "grad_norm": 0.6847174167633057, "learning_rate": 1.9482832699063844e-05, "loss": 2.1954, "step": 3539 }, { "epoch": 0.12, "grad_norm": 0.6939400434494019, "learning_rate": 1.948249525635767e-05, "loss": 2.2947, "step": 3540 }, { "epoch": 0.12, "grad_norm": 0.7136675119400024, "learning_rate": 1.9482157706523822e-05, "loss": 2.2846, "step": 3541 }, { "epoch": 0.12, "grad_norm": 0.6572127938270569, "learning_rate": 1.9481820049566113e-05, "loss": 2.2469, "step": 3542 }, { "epoch": 0.12, "grad_norm": 0.6830535531044006, "learning_rate": 1.948148228548836e-05, "loss": 2.2468, "step": 3543 }, { "epoch": 0.12, "grad_norm": 0.6657615900039673, "learning_rate": 1.9481144414294375e-05, "loss": 2.2803, "step": 3544 }, { "epoch": 0.12, "grad_norm": 0.7039142847061157, "learning_rate": 1.948080643598798e-05, "loss": 2.355, "step": 3545 }, { "epoch": 0.12, "grad_norm": 0.7190167903900146, "learning_rate": 1.9480468350572988e-05, "loss": 2.2113, "step": 3546 }, { "epoch": 0.12, "grad_norm": 0.6970165371894836, "learning_rate": 1.948013015805322e-05, "loss": 2.2803, "step": 3547 }, { "epoch": 0.12, "grad_norm": 0.6995370388031006, "learning_rate": 1.9479791858432494e-05, "loss": 2.2955, "step": 3548 }, { "epoch": 0.12, "grad_norm": 0.7019973397254944, "learning_rate": 1.947945345171464e-05, "loss": 2.2102, "step": 3549 }, { "epoch": 0.12, "grad_norm": 0.6501315832138062, "learning_rate": 1.9479114937903478e-05, "loss": 2.2773, "step": 3550 }, { "epoch": 0.12, "grad_norm": 0.6660248637199402, "learning_rate": 1.9478776317002824e-05, "loss": 2.2877, "step": 3551 }, { "epoch": 0.12, "grad_norm": 0.6723596453666687, "learning_rate": 1.9478437589016518e-05, "loss": 2.2659, "step": 3552 }, { "epoch": 0.12, "grad_norm": 0.6939621567726135, "learning_rate": 1.9478098753948377e-05, "loss": 2.2179, "step": 3553 }, { "epoch": 0.12, "grad_norm": 0.6738900542259216, "learning_rate": 1.9477759811802228e-05, "loss": 2.2073, "step": 3554 }, { "epoch": 0.12, "grad_norm": 0.7160170674324036, "learning_rate": 1.9477420762581905e-05, "loss": 2.2912, "step": 3555 }, { "epoch": 0.12, "grad_norm": 0.6936720609664917, "learning_rate": 1.9477081606291233e-05, "loss": 2.2782, "step": 3556 }, { "epoch": 0.12, "grad_norm": 0.7159092426300049, "learning_rate": 1.9476742342934053e-05, "loss": 2.3297, "step": 3557 }, { "epoch": 0.12, "grad_norm": 0.669593870639801, "learning_rate": 1.947640297251419e-05, "loss": 2.2583, "step": 3558 }, { "epoch": 0.12, "grad_norm": 0.6970084309577942, "learning_rate": 1.947606349503548e-05, "loss": 2.2254, "step": 3559 }, { "epoch": 0.12, "grad_norm": 0.6627599596977234, "learning_rate": 1.9475723910501756e-05, "loss": 2.2442, "step": 3560 }, { "epoch": 0.12, "grad_norm": 0.6634886264801025, "learning_rate": 1.9475384218916857e-05, "loss": 2.3363, "step": 3561 }, { "epoch": 0.12, "grad_norm": 0.6702818870544434, "learning_rate": 1.9475044420284622e-05, "loss": 2.2577, "step": 3562 }, { "epoch": 0.12, "grad_norm": 0.7019290328025818, "learning_rate": 1.9474704514608886e-05, "loss": 2.2377, "step": 3563 }, { "epoch": 0.12, "grad_norm": 0.7041156888008118, "learning_rate": 1.9474364501893492e-05, "loss": 2.2244, "step": 3564 }, { "epoch": 0.12, "grad_norm": 0.6737179160118103, "learning_rate": 1.947402438214228e-05, "loss": 2.3119, "step": 3565 }, { "epoch": 0.12, "grad_norm": 0.6735750436782837, "learning_rate": 1.9473684155359093e-05, "loss": 2.2836, "step": 3566 }, { "epoch": 0.12, "grad_norm": 0.7283354997634888, "learning_rate": 1.947334382154778e-05, "loss": 2.3148, "step": 3567 }, { "epoch": 0.12, "grad_norm": 0.7039659023284912, "learning_rate": 1.947300338071217e-05, "loss": 2.252, "step": 3568 }, { "epoch": 0.12, "grad_norm": 0.6613754034042358, "learning_rate": 1.947266283285613e-05, "loss": 2.2106, "step": 3569 }, { "epoch": 0.12, "grad_norm": 0.6886136531829834, "learning_rate": 1.947232217798349e-05, "loss": 2.3261, "step": 3570 }, { "epoch": 0.12, "grad_norm": 0.6867454051971436, "learning_rate": 1.9471981416098105e-05, "loss": 2.2806, "step": 3571 }, { "epoch": 0.12, "grad_norm": 0.6932241320610046, "learning_rate": 1.947164054720383e-05, "loss": 2.3267, "step": 3572 }, { "epoch": 0.12, "grad_norm": 0.685605525970459, "learning_rate": 1.947129957130451e-05, "loss": 2.3084, "step": 3573 }, { "epoch": 0.12, "grad_norm": 0.668707013130188, "learning_rate": 1.9470958488403994e-05, "loss": 2.3182, "step": 3574 }, { "epoch": 0.12, "grad_norm": 0.6734517812728882, "learning_rate": 1.9470617298506143e-05, "loss": 2.295, "step": 3575 }, { "epoch": 0.12, "grad_norm": 0.6585891842842102, "learning_rate": 1.9470276001614804e-05, "loss": 2.283, "step": 3576 }, { "epoch": 0.12, "grad_norm": 0.6893803477287292, "learning_rate": 1.9469934597733845e-05, "loss": 2.2767, "step": 3577 }, { "epoch": 0.12, "grad_norm": 0.713064432144165, "learning_rate": 1.9469593086867106e-05, "loss": 2.3564, "step": 3578 }, { "epoch": 0.12, "grad_norm": 0.7094035744667053, "learning_rate": 1.946925146901846e-05, "loss": 2.284, "step": 3579 }, { "epoch": 0.12, "grad_norm": 0.6580082178115845, "learning_rate": 1.9468909744191757e-05, "loss": 2.2991, "step": 3580 }, { "epoch": 0.12, "grad_norm": 0.6811079978942871, "learning_rate": 1.9468567912390865e-05, "loss": 2.2529, "step": 3581 }, { "epoch": 0.12, "grad_norm": 0.6906937956809998, "learning_rate": 1.9468225973619635e-05, "loss": 2.3296, "step": 3582 }, { "epoch": 0.12, "grad_norm": 0.690157949924469, "learning_rate": 1.9467883927881944e-05, "loss": 2.2139, "step": 3583 }, { "epoch": 0.12, "grad_norm": 0.7017017602920532, "learning_rate": 1.9467541775181648e-05, "loss": 2.2057, "step": 3584 }, { "epoch": 0.12, "grad_norm": 0.6760733127593994, "learning_rate": 1.946719951552261e-05, "loss": 2.2971, "step": 3585 }, { "epoch": 0.12, "grad_norm": 0.6878550052642822, "learning_rate": 1.94668571489087e-05, "loss": 2.3081, "step": 3586 }, { "epoch": 0.12, "grad_norm": 0.6624292135238647, "learning_rate": 1.946651467534379e-05, "loss": 2.298, "step": 3587 }, { "epoch": 0.12, "grad_norm": 0.7125348448753357, "learning_rate": 1.9466172094831742e-05, "loss": 2.3015, "step": 3588 }, { "epoch": 0.12, "grad_norm": 0.689313530921936, "learning_rate": 1.9465829407376432e-05, "loss": 2.31, "step": 3589 }, { "epoch": 0.12, "grad_norm": 0.7020419239997864, "learning_rate": 1.9465486612981725e-05, "loss": 2.3112, "step": 3590 }, { "epoch": 0.12, "grad_norm": 0.7126964926719666, "learning_rate": 1.94651437116515e-05, "loss": 2.2397, "step": 3591 }, { "epoch": 0.12, "grad_norm": 0.6662706136703491, "learning_rate": 1.946480070338963e-05, "loss": 2.2541, "step": 3592 }, { "epoch": 0.12, "grad_norm": 0.6958052515983582, "learning_rate": 1.946445758819999e-05, "loss": 2.3148, "step": 3593 }, { "epoch": 0.12, "grad_norm": 0.6823972463607788, "learning_rate": 1.9464114366086448e-05, "loss": 2.3385, "step": 3594 }, { "epoch": 0.12, "grad_norm": 0.6693614721298218, "learning_rate": 1.9463771037052893e-05, "loss": 2.2784, "step": 3595 }, { "epoch": 0.12, "grad_norm": 0.6864728331565857, "learning_rate": 1.9463427601103197e-05, "loss": 2.2771, "step": 3596 }, { "epoch": 0.12, "grad_norm": 0.7275521159172058, "learning_rate": 1.9463084058241243e-05, "loss": 2.3019, "step": 3597 }, { "epoch": 0.12, "grad_norm": 0.6874843239784241, "learning_rate": 1.9462740408470914e-05, "loss": 2.2823, "step": 3598 }, { "epoch": 0.12, "grad_norm": 0.6993808746337891, "learning_rate": 1.9462396651796086e-05, "loss": 2.2229, "step": 3599 }, { "epoch": 0.12, "grad_norm": 0.6955780982971191, "learning_rate": 1.9462052788220648e-05, "loss": 2.2508, "step": 3600 }, { "epoch": 0.12, "grad_norm": 0.6660264730453491, "learning_rate": 1.9461708817748483e-05, "loss": 2.2563, "step": 3601 }, { "epoch": 0.12, "grad_norm": 0.7100155353546143, "learning_rate": 1.9461364740383474e-05, "loss": 2.2477, "step": 3602 }, { "epoch": 0.12, "grad_norm": 0.6789041757583618, "learning_rate": 1.9461020556129514e-05, "loss": 2.3007, "step": 3603 }, { "epoch": 0.12, "grad_norm": 0.706390380859375, "learning_rate": 1.946067626499049e-05, "loss": 2.3153, "step": 3604 }, { "epoch": 0.12, "grad_norm": 0.6905703544616699, "learning_rate": 1.9460331866970286e-05, "loss": 2.2993, "step": 3605 }, { "epoch": 0.12, "grad_norm": 0.7201916575431824, "learning_rate": 1.94599873620728e-05, "loss": 2.2688, "step": 3606 }, { "epoch": 0.12, "grad_norm": 0.7137526273727417, "learning_rate": 1.9459642750301918e-05, "loss": 2.2158, "step": 3607 }, { "epoch": 0.12, "grad_norm": 0.7265249490737915, "learning_rate": 1.945929803166154e-05, "loss": 2.3062, "step": 3608 }, { "epoch": 0.12, "grad_norm": 0.6788699626922607, "learning_rate": 1.9458953206155554e-05, "loss": 2.2542, "step": 3609 }, { "epoch": 0.12, "grad_norm": 0.7030156850814819, "learning_rate": 1.9458608273787854e-05, "loss": 2.2353, "step": 3610 }, { "epoch": 0.12, "grad_norm": 0.6762740015983582, "learning_rate": 1.9458263234562348e-05, "loss": 2.3069, "step": 3611 }, { "epoch": 0.12, "grad_norm": 0.7022709250450134, "learning_rate": 1.9457918088482923e-05, "loss": 2.3291, "step": 3612 }, { "epoch": 0.12, "grad_norm": 0.6675629019737244, "learning_rate": 1.9457572835553484e-05, "loss": 2.2963, "step": 3613 }, { "epoch": 0.12, "grad_norm": 0.6842197179794312, "learning_rate": 1.945722747577793e-05, "loss": 2.258, "step": 3614 }, { "epoch": 0.12, "grad_norm": 0.6655317544937134, "learning_rate": 1.945688200916016e-05, "loss": 2.2494, "step": 3615 }, { "epoch": 0.12, "grad_norm": 0.6737083792686462, "learning_rate": 1.9456536435704083e-05, "loss": 2.3036, "step": 3616 }, { "epoch": 0.12, "grad_norm": 0.6749952435493469, "learning_rate": 1.94561907554136e-05, "loss": 2.2809, "step": 3617 }, { "epoch": 0.12, "grad_norm": 0.7076355814933777, "learning_rate": 1.9455844968292613e-05, "loss": 2.2699, "step": 3618 }, { "epoch": 0.12, "grad_norm": 0.7070693969726562, "learning_rate": 1.945549907434503e-05, "loss": 2.312, "step": 3619 }, { "epoch": 0.12, "grad_norm": 0.7362203598022461, "learning_rate": 1.945515307357476e-05, "loss": 2.2756, "step": 3620 }, { "epoch": 0.12, "grad_norm": 0.6687037944793701, "learning_rate": 1.9454806965985716e-05, "loss": 2.2226, "step": 3621 }, { "epoch": 0.12, "grad_norm": 0.6770171523094177, "learning_rate": 1.94544607515818e-05, "loss": 2.2967, "step": 3622 }, { "epoch": 0.12, "grad_norm": 0.6660905480384827, "learning_rate": 1.945411443036693e-05, "loss": 2.303, "step": 3623 }, { "epoch": 0.12, "grad_norm": 0.6492541432380676, "learning_rate": 1.9453768002345013e-05, "loss": 2.2959, "step": 3624 }, { "epoch": 0.12, "grad_norm": 0.7097062468528748, "learning_rate": 1.9453421467519967e-05, "loss": 2.3442, "step": 3625 }, { "epoch": 0.12, "grad_norm": 0.6908991932868958, "learning_rate": 1.945307482589571e-05, "loss": 2.3162, "step": 3626 }, { "epoch": 0.12, "grad_norm": 0.7540254592895508, "learning_rate": 1.9452728077476146e-05, "loss": 2.2021, "step": 3627 }, { "epoch": 0.12, "grad_norm": 0.6622290015220642, "learning_rate": 1.9452381222265204e-05, "loss": 2.2724, "step": 3628 }, { "epoch": 0.12, "grad_norm": 0.6925186514854431, "learning_rate": 1.9452034260266796e-05, "loss": 2.2525, "step": 3629 }, { "epoch": 0.12, "grad_norm": 0.6948437094688416, "learning_rate": 1.945168719148485e-05, "loss": 2.3201, "step": 3630 }, { "epoch": 0.12, "grad_norm": 0.6924076080322266, "learning_rate": 1.9451340015923275e-05, "loss": 2.2381, "step": 3631 }, { "epoch": 0.12, "grad_norm": 0.7558528780937195, "learning_rate": 1.9450992733586e-05, "loss": 2.2655, "step": 3632 }, { "epoch": 0.12, "grad_norm": 0.6910993456840515, "learning_rate": 1.945064534447695e-05, "loss": 2.3398, "step": 3633 }, { "epoch": 0.12, "grad_norm": 0.7183420658111572, "learning_rate": 1.945029784860005e-05, "loss": 2.3202, "step": 3634 }, { "epoch": 0.12, "grad_norm": 0.6935462951660156, "learning_rate": 1.944995024595922e-05, "loss": 2.2775, "step": 3635 }, { "epoch": 0.12, "grad_norm": 0.6807013750076294, "learning_rate": 1.944960253655839e-05, "loss": 2.236, "step": 3636 }, { "epoch": 0.12, "grad_norm": 0.7202365398406982, "learning_rate": 1.9449254720401492e-05, "loss": 2.256, "step": 3637 }, { "epoch": 0.12, "grad_norm": 0.7282215356826782, "learning_rate": 1.944890679749245e-05, "loss": 2.1769, "step": 3638 }, { "epoch": 0.12, "grad_norm": 0.6830108165740967, "learning_rate": 1.9448558767835194e-05, "loss": 2.283, "step": 3639 }, { "epoch": 0.12, "grad_norm": 0.7019248008728027, "learning_rate": 1.9448210631433662e-05, "loss": 2.2922, "step": 3640 }, { "epoch": 0.12, "grad_norm": 0.7160152792930603, "learning_rate": 1.9447862388291782e-05, "loss": 2.2644, "step": 3641 }, { "epoch": 0.12, "grad_norm": 0.681394100189209, "learning_rate": 1.944751403841349e-05, "loss": 2.3206, "step": 3642 }, { "epoch": 0.12, "grad_norm": 0.6696096658706665, "learning_rate": 1.944716558180272e-05, "loss": 2.2646, "step": 3643 }, { "epoch": 0.12, "grad_norm": 0.6756918430328369, "learning_rate": 1.9446817018463412e-05, "loss": 2.2412, "step": 3644 }, { "epoch": 0.12, "grad_norm": 0.6465837359428406, "learning_rate": 1.94464683483995e-05, "loss": 2.3048, "step": 3645 }, { "epoch": 0.12, "grad_norm": 0.6895812749862671, "learning_rate": 1.9446119571614925e-05, "loss": 2.297, "step": 3646 }, { "epoch": 0.12, "grad_norm": 0.6861203908920288, "learning_rate": 1.944577068811363e-05, "loss": 2.3233, "step": 3647 }, { "epoch": 0.12, "grad_norm": 0.6962707042694092, "learning_rate": 1.944542169789955e-05, "loss": 2.2271, "step": 3648 }, { "epoch": 0.12, "grad_norm": 0.660060465335846, "learning_rate": 1.9445072600976633e-05, "loss": 2.2757, "step": 3649 }, { "epoch": 0.12, "grad_norm": 0.692184567451477, "learning_rate": 1.944472339734882e-05, "loss": 2.2959, "step": 3650 }, { "epoch": 0.12, "grad_norm": 0.7133738398551941, "learning_rate": 1.9444374087020057e-05, "loss": 2.3268, "step": 3651 }, { "epoch": 0.12, "grad_norm": 0.6811637282371521, "learning_rate": 1.9444024669994294e-05, "loss": 2.2814, "step": 3652 }, { "epoch": 0.12, "grad_norm": 0.7280363440513611, "learning_rate": 1.9443675146275468e-05, "loss": 2.2434, "step": 3653 }, { "epoch": 0.12, "grad_norm": 0.7025060057640076, "learning_rate": 1.944332551586754e-05, "loss": 2.3152, "step": 3654 }, { "epoch": 0.12, "grad_norm": 0.6796720027923584, "learning_rate": 1.9442975778774453e-05, "loss": 2.3378, "step": 3655 }, { "epoch": 0.12, "grad_norm": 0.6745999455451965, "learning_rate": 1.9442625935000162e-05, "loss": 2.331, "step": 3656 }, { "epoch": 0.12, "grad_norm": 0.6987726092338562, "learning_rate": 1.9442275984548614e-05, "loss": 2.3179, "step": 3657 }, { "epoch": 0.12, "grad_norm": 0.6735572218894958, "learning_rate": 1.944192592742377e-05, "loss": 2.2812, "step": 3658 }, { "epoch": 0.12, "grad_norm": 0.671812891960144, "learning_rate": 1.9441575763629576e-05, "loss": 2.2404, "step": 3659 }, { "epoch": 0.12, "grad_norm": 0.6835404634475708, "learning_rate": 1.9441225493169993e-05, "loss": 2.2787, "step": 3660 }, { "epoch": 0.12, "grad_norm": 0.6952942609786987, "learning_rate": 1.944087511604898e-05, "loss": 2.335, "step": 3661 }, { "epoch": 0.12, "grad_norm": 0.6576671004295349, "learning_rate": 1.944052463227049e-05, "loss": 2.2742, "step": 3662 }, { "epoch": 0.12, "grad_norm": 0.6631172895431519, "learning_rate": 1.9440174041838484e-05, "loss": 2.2914, "step": 3663 }, { "epoch": 0.12, "grad_norm": 0.6596168279647827, "learning_rate": 1.9439823344756927e-05, "loss": 2.2755, "step": 3664 }, { "epoch": 0.12, "grad_norm": 0.6673133373260498, "learning_rate": 1.943947254102978e-05, "loss": 2.3057, "step": 3665 }, { "epoch": 0.12, "grad_norm": 0.6609194874763489, "learning_rate": 1.9439121630661e-05, "loss": 2.252, "step": 3666 }, { "epoch": 0.12, "grad_norm": 0.6546292304992676, "learning_rate": 1.943877061365456e-05, "loss": 2.2646, "step": 3667 }, { "epoch": 0.12, "grad_norm": 0.7106362581253052, "learning_rate": 1.9438419490014417e-05, "loss": 2.2382, "step": 3668 }, { "epoch": 0.12, "grad_norm": 0.7512779831886292, "learning_rate": 1.9438068259744546e-05, "loss": 2.2603, "step": 3669 }, { "epoch": 0.12, "grad_norm": 0.6715754270553589, "learning_rate": 1.9437716922848907e-05, "loss": 2.2388, "step": 3670 }, { "epoch": 0.12, "grad_norm": 0.6882107853889465, "learning_rate": 1.9437365479331475e-05, "loss": 2.309, "step": 3671 }, { "epoch": 0.12, "grad_norm": 0.6753911375999451, "learning_rate": 1.943701392919622e-05, "loss": 2.2215, "step": 3672 }, { "epoch": 0.12, "grad_norm": 0.7017413973808289, "learning_rate": 1.943666227244711e-05, "loss": 2.331, "step": 3673 }, { "epoch": 0.12, "grad_norm": 0.6687679290771484, "learning_rate": 1.9436310509088122e-05, "loss": 2.3083, "step": 3674 }, { "epoch": 0.12, "grad_norm": 0.7172769904136658, "learning_rate": 1.943595863912323e-05, "loss": 2.2197, "step": 3675 }, { "epoch": 0.12, "grad_norm": 0.7076760530471802, "learning_rate": 1.9435606662556402e-05, "loss": 2.3206, "step": 3676 }, { "epoch": 0.12, "grad_norm": 0.6666626334190369, "learning_rate": 1.9435254579391625e-05, "loss": 2.2212, "step": 3677 }, { "epoch": 0.12, "grad_norm": 0.6660637259483337, "learning_rate": 1.9434902389632867e-05, "loss": 2.352, "step": 3678 }, { "epoch": 0.12, "grad_norm": 0.7116628289222717, "learning_rate": 1.9434550093284113e-05, "loss": 2.3049, "step": 3679 }, { "epoch": 0.12, "grad_norm": 0.6952300071716309, "learning_rate": 1.943419769034934e-05, "loss": 2.301, "step": 3680 }, { "epoch": 0.12, "grad_norm": 0.6677801012992859, "learning_rate": 1.9433845180832532e-05, "loss": 2.184, "step": 3681 }, { "epoch": 0.12, "grad_norm": 0.671719491481781, "learning_rate": 1.9433492564737673e-05, "loss": 2.229, "step": 3682 }, { "epoch": 0.12, "grad_norm": 0.6637355089187622, "learning_rate": 1.9433139842068737e-05, "loss": 2.2418, "step": 3683 }, { "epoch": 0.12, "grad_norm": 0.7171868085861206, "learning_rate": 1.9432787012829723e-05, "loss": 2.2693, "step": 3684 }, { "epoch": 0.12, "grad_norm": 0.6579193472862244, "learning_rate": 1.9432434077024602e-05, "loss": 2.2586, "step": 3685 }, { "epoch": 0.12, "grad_norm": 0.6844008564949036, "learning_rate": 1.9432081034657374e-05, "loss": 2.2166, "step": 3686 }, { "epoch": 0.12, "grad_norm": 0.6631815433502197, "learning_rate": 1.9431727885732015e-05, "loss": 2.2513, "step": 3687 }, { "epoch": 0.12, "grad_norm": 0.6815577149391174, "learning_rate": 1.943137463025253e-05, "loss": 2.2373, "step": 3688 }, { "epoch": 0.12, "grad_norm": 0.704023540019989, "learning_rate": 1.9431021268222898e-05, "loss": 2.31, "step": 3689 }, { "epoch": 0.12, "grad_norm": 0.7080705165863037, "learning_rate": 1.9430667799647115e-05, "loss": 2.1978, "step": 3690 }, { "epoch": 0.12, "grad_norm": 0.6579564213752747, "learning_rate": 1.9430314224529177e-05, "loss": 2.284, "step": 3691 }, { "epoch": 0.12, "grad_norm": 0.69828861951828, "learning_rate": 1.942996054287307e-05, "loss": 2.3091, "step": 3692 }, { "epoch": 0.12, "grad_norm": 0.7169203758239746, "learning_rate": 1.9429606754682803e-05, "loss": 2.2944, "step": 3693 }, { "epoch": 0.12, "grad_norm": 0.6707363724708557, "learning_rate": 1.942925285996236e-05, "loss": 2.2127, "step": 3694 }, { "epoch": 0.12, "grad_norm": 0.6921399831771851, "learning_rate": 1.9428898858715745e-05, "loss": 2.2407, "step": 3695 }, { "epoch": 0.12, "grad_norm": 0.6841446161270142, "learning_rate": 1.9428544750946955e-05, "loss": 2.2568, "step": 3696 }, { "epoch": 0.12, "grad_norm": 0.6716071963310242, "learning_rate": 1.9428190536659995e-05, "loss": 2.2963, "step": 3697 }, { "epoch": 0.12, "grad_norm": 0.6964879035949707, "learning_rate": 1.942783621585886e-05, "loss": 2.2276, "step": 3698 }, { "epoch": 0.12, "grad_norm": 0.6463219523429871, "learning_rate": 1.942748178854756e-05, "loss": 2.2197, "step": 3699 }, { "epoch": 0.12, "grad_norm": 0.6674463748931885, "learning_rate": 1.9427127254730095e-05, "loss": 2.276, "step": 3700 }, { "epoch": 0.12, "grad_norm": 0.6658323407173157, "learning_rate": 1.942677261441047e-05, "loss": 2.2693, "step": 3701 }, { "epoch": 0.12, "grad_norm": 0.664180338382721, "learning_rate": 1.9426417867592694e-05, "loss": 2.2563, "step": 3702 }, { "epoch": 0.12, "grad_norm": 0.6704079508781433, "learning_rate": 1.9426063014280775e-05, "loss": 2.2813, "step": 3703 }, { "epoch": 0.12, "grad_norm": 0.7194498777389526, "learning_rate": 1.9425708054478718e-05, "loss": 2.2816, "step": 3704 }, { "epoch": 0.12, "grad_norm": 0.7084808349609375, "learning_rate": 1.9425352988190535e-05, "loss": 2.3381, "step": 3705 }, { "epoch": 0.12, "grad_norm": 0.7209370732307434, "learning_rate": 1.9424997815420237e-05, "loss": 2.2332, "step": 3706 }, { "epoch": 0.12, "grad_norm": 0.6653528809547424, "learning_rate": 1.942464253617184e-05, "loss": 2.2177, "step": 3707 }, { "epoch": 0.12, "grad_norm": 0.7053827047348022, "learning_rate": 1.9424287150449356e-05, "loss": 2.2957, "step": 3708 }, { "epoch": 0.12, "grad_norm": 0.6653017997741699, "learning_rate": 1.9423931658256795e-05, "loss": 2.2521, "step": 3709 }, { "epoch": 0.12, "grad_norm": 0.6525475382804871, "learning_rate": 1.942357605959818e-05, "loss": 2.2293, "step": 3710 }, { "epoch": 0.12, "grad_norm": 0.6921650171279907, "learning_rate": 1.9423220354477524e-05, "loss": 2.2676, "step": 3711 }, { "epoch": 0.12, "grad_norm": 0.6810005307197571, "learning_rate": 1.9422864542898847e-05, "loss": 2.2119, "step": 3712 }, { "epoch": 0.12, "grad_norm": 0.670204222202301, "learning_rate": 1.942250862486617e-05, "loss": 2.2884, "step": 3713 }, { "epoch": 0.12, "grad_norm": 0.6999047994613647, "learning_rate": 1.942215260038351e-05, "loss": 2.3229, "step": 3714 }, { "epoch": 0.12, "grad_norm": 0.6989960074424744, "learning_rate": 1.9421796469454896e-05, "loss": 2.3461, "step": 3715 }, { "epoch": 0.12, "grad_norm": 0.6834724545478821, "learning_rate": 1.9421440232084344e-05, "loss": 2.2876, "step": 3716 }, { "epoch": 0.12, "grad_norm": 0.6775102019309998, "learning_rate": 1.9421083888275882e-05, "loss": 2.221, "step": 3717 }, { "epoch": 0.12, "grad_norm": 0.7152907252311707, "learning_rate": 1.9420727438033537e-05, "loss": 2.252, "step": 3718 }, { "epoch": 0.12, "grad_norm": 0.6460477709770203, "learning_rate": 1.9420370881361332e-05, "loss": 2.2655, "step": 3719 }, { "epoch": 0.12, "grad_norm": 0.7120924592018127, "learning_rate": 1.94200142182633e-05, "loss": 2.2999, "step": 3720 }, { "epoch": 0.12, "grad_norm": 0.677557110786438, "learning_rate": 1.9419657448743465e-05, "loss": 2.2966, "step": 3721 }, { "epoch": 0.12, "grad_norm": 0.7043353915214539, "learning_rate": 1.9419300572805863e-05, "loss": 2.3866, "step": 3722 }, { "epoch": 0.12, "grad_norm": 0.7141336798667908, "learning_rate": 1.9418943590454523e-05, "loss": 2.2002, "step": 3723 }, { "epoch": 0.12, "grad_norm": 0.6903342604637146, "learning_rate": 1.941858650169348e-05, "loss": 2.2939, "step": 3724 }, { "epoch": 0.12, "grad_norm": 0.6966872811317444, "learning_rate": 1.9418229306526766e-05, "loss": 2.2765, "step": 3725 }, { "epoch": 0.12, "grad_norm": 0.6794267296791077, "learning_rate": 1.9417872004958415e-05, "loss": 2.2519, "step": 3726 }, { "epoch": 0.12, "grad_norm": 0.6904814839363098, "learning_rate": 1.9417514596992467e-05, "loss": 2.2037, "step": 3727 }, { "epoch": 0.12, "grad_norm": 0.6799295544624329, "learning_rate": 1.9417157082632957e-05, "loss": 2.2426, "step": 3728 }, { "epoch": 0.12, "grad_norm": 0.687701404094696, "learning_rate": 1.9416799461883926e-05, "loss": 2.282, "step": 3729 }, { "epoch": 0.12, "grad_norm": 0.6887182593345642, "learning_rate": 1.9416441734749414e-05, "loss": 2.2245, "step": 3730 }, { "epoch": 0.12, "grad_norm": 0.6488282084465027, "learning_rate": 1.9416083901233463e-05, "loss": 2.3164, "step": 3731 }, { "epoch": 0.12, "grad_norm": 0.6747837066650391, "learning_rate": 1.941572596134011e-05, "loss": 2.2615, "step": 3732 }, { "epoch": 0.12, "grad_norm": 0.7180113792419434, "learning_rate": 1.941536791507341e-05, "loss": 2.3417, "step": 3733 }, { "epoch": 0.12, "grad_norm": 0.6641101837158203, "learning_rate": 1.9415009762437393e-05, "loss": 2.2425, "step": 3734 }, { "epoch": 0.12, "grad_norm": 0.7113267183303833, "learning_rate": 1.941465150343612e-05, "loss": 2.3495, "step": 3735 }, { "epoch": 0.12, "grad_norm": 0.6928016543388367, "learning_rate": 1.9414293138073627e-05, "loss": 2.2755, "step": 3736 }, { "epoch": 0.12, "grad_norm": 0.6808182001113892, "learning_rate": 1.9413934666353972e-05, "loss": 2.211, "step": 3737 }, { "epoch": 0.12, "grad_norm": 0.659949779510498, "learning_rate": 1.94135760882812e-05, "loss": 2.3518, "step": 3738 }, { "epoch": 0.12, "grad_norm": 0.6785452365875244, "learning_rate": 1.941321740385936e-05, "loss": 2.2474, "step": 3739 }, { "epoch": 0.12, "grad_norm": 0.7094192504882812, "learning_rate": 1.9412858613092503e-05, "loss": 2.2156, "step": 3740 }, { "epoch": 0.12, "grad_norm": 0.6787962317466736, "learning_rate": 1.9412499715984694e-05, "loss": 2.2738, "step": 3741 }, { "epoch": 0.12, "grad_norm": 0.6623697876930237, "learning_rate": 1.9412140712539975e-05, "loss": 2.2391, "step": 3742 }, { "epoch": 0.12, "grad_norm": 0.6972416639328003, "learning_rate": 1.941178160276241e-05, "loss": 2.2584, "step": 3743 }, { "epoch": 0.12, "grad_norm": 0.6648272275924683, "learning_rate": 1.9411422386656045e-05, "loss": 2.2689, "step": 3744 }, { "epoch": 0.12, "grad_norm": 0.7274272441864014, "learning_rate": 1.9411063064224952e-05, "loss": 2.1682, "step": 3745 }, { "epoch": 0.12, "grad_norm": 0.6804381608963013, "learning_rate": 1.941070363547318e-05, "loss": 2.2258, "step": 3746 }, { "epoch": 0.12, "grad_norm": 0.6874633431434631, "learning_rate": 1.9410344100404792e-05, "loss": 2.273, "step": 3747 }, { "epoch": 0.12, "grad_norm": 0.6696643829345703, "learning_rate": 1.9409984459023856e-05, "loss": 2.2466, "step": 3748 }, { "epoch": 0.12, "grad_norm": 0.7038395404815674, "learning_rate": 1.940962471133443e-05, "loss": 2.3261, "step": 3749 }, { "epoch": 0.12, "grad_norm": 0.6701029539108276, "learning_rate": 1.9409264857340578e-05, "loss": 2.2746, "step": 3750 }, { "epoch": 0.12, "grad_norm": 0.695885181427002, "learning_rate": 1.9408904897046364e-05, "loss": 2.2239, "step": 3751 }, { "epoch": 0.12, "grad_norm": 0.6909551024436951, "learning_rate": 1.9408544830455858e-05, "loss": 2.3368, "step": 3752 }, { "epoch": 0.12, "grad_norm": 0.686220645904541, "learning_rate": 1.9408184657573125e-05, "loss": 2.2643, "step": 3753 }, { "epoch": 0.12, "grad_norm": 0.7020294070243835, "learning_rate": 1.9407824378402238e-05, "loss": 2.2943, "step": 3754 }, { "epoch": 0.12, "grad_norm": 0.6743779182434082, "learning_rate": 1.9407463992947263e-05, "loss": 2.2428, "step": 3755 }, { "epoch": 0.12, "grad_norm": 0.6766365170478821, "learning_rate": 1.9407103501212276e-05, "loss": 2.2272, "step": 3756 }, { "epoch": 0.12, "grad_norm": 0.6825518012046814, "learning_rate": 1.9406742903201342e-05, "loss": 2.2346, "step": 3757 }, { "epoch": 0.13, "grad_norm": 0.7209994792938232, "learning_rate": 1.940638219891854e-05, "loss": 2.2656, "step": 3758 }, { "epoch": 0.13, "grad_norm": 0.6528053283691406, "learning_rate": 1.9406021388367948e-05, "loss": 2.2748, "step": 3759 }, { "epoch": 0.13, "grad_norm": 0.6639633774757385, "learning_rate": 1.9405660471553637e-05, "loss": 2.2343, "step": 3760 }, { "epoch": 0.13, "grad_norm": 0.6886638402938843, "learning_rate": 1.9405299448479686e-05, "loss": 2.2773, "step": 3761 }, { "epoch": 0.13, "grad_norm": 0.7195329666137695, "learning_rate": 1.9404938319150175e-05, "loss": 2.3398, "step": 3762 }, { "epoch": 0.13, "grad_norm": 0.6680470705032349, "learning_rate": 1.9404577083569183e-05, "loss": 2.2494, "step": 3763 }, { "epoch": 0.13, "grad_norm": 0.7430095076560974, "learning_rate": 1.940421574174079e-05, "loss": 2.2654, "step": 3764 }, { "epoch": 0.13, "grad_norm": 0.680685818195343, "learning_rate": 1.940385429366908e-05, "loss": 2.2734, "step": 3765 }, { "epoch": 0.13, "grad_norm": 0.7081979513168335, "learning_rate": 1.9403492739358132e-05, "loss": 2.2889, "step": 3766 }, { "epoch": 0.13, "grad_norm": 0.6792062520980835, "learning_rate": 1.9403131078812037e-05, "loss": 2.2726, "step": 3767 }, { "epoch": 0.13, "grad_norm": 0.6896951794624329, "learning_rate": 1.9402769312034878e-05, "loss": 2.249, "step": 3768 }, { "epoch": 0.13, "grad_norm": 0.6813924908638, "learning_rate": 1.940240743903074e-05, "loss": 2.2295, "step": 3769 }, { "epoch": 0.13, "grad_norm": 0.7176504135131836, "learning_rate": 1.9402045459803712e-05, "loss": 2.2056, "step": 3770 }, { "epoch": 0.13, "grad_norm": 0.6598018407821655, "learning_rate": 1.9401683374357888e-05, "loss": 2.2767, "step": 3771 }, { "epoch": 0.13, "grad_norm": 0.6950193643569946, "learning_rate": 1.9401321182697354e-05, "loss": 2.2728, "step": 3772 }, { "epoch": 0.13, "grad_norm": 0.6810510754585266, "learning_rate": 1.9400958884826203e-05, "loss": 2.3052, "step": 3773 }, { "epoch": 0.13, "grad_norm": 0.6701185703277588, "learning_rate": 1.940059648074853e-05, "loss": 2.2383, "step": 3774 }, { "epoch": 0.13, "grad_norm": 0.6869978308677673, "learning_rate": 1.9400233970468424e-05, "loss": 2.2488, "step": 3775 }, { "epoch": 0.13, "grad_norm": 0.6976146697998047, "learning_rate": 1.9399871353989984e-05, "loss": 2.2684, "step": 3776 }, { "epoch": 0.13, "grad_norm": 0.671232283115387, "learning_rate": 1.9399508631317306e-05, "loss": 2.2699, "step": 3777 }, { "epoch": 0.13, "grad_norm": 0.6673446893692017, "learning_rate": 1.939914580245449e-05, "loss": 2.2968, "step": 3778 }, { "epoch": 0.13, "grad_norm": 0.6780042052268982, "learning_rate": 1.9398782867405633e-05, "loss": 2.257, "step": 3779 }, { "epoch": 0.13, "grad_norm": 0.6714012622833252, "learning_rate": 1.9398419826174835e-05, "loss": 2.3533, "step": 3780 }, { "epoch": 0.13, "grad_norm": 0.6834983825683594, "learning_rate": 1.9398056678766196e-05, "loss": 2.3048, "step": 3781 }, { "epoch": 0.13, "grad_norm": 0.6877976059913635, "learning_rate": 1.9397693425183824e-05, "loss": 2.2664, "step": 3782 }, { "epoch": 0.13, "grad_norm": 0.6947190165519714, "learning_rate": 1.9397330065431815e-05, "loss": 2.2917, "step": 3783 }, { "epoch": 0.13, "grad_norm": 0.6546889543533325, "learning_rate": 1.9396966599514285e-05, "loss": 2.2027, "step": 3784 }, { "epoch": 0.13, "grad_norm": 0.6743900775909424, "learning_rate": 1.9396603027435324e-05, "loss": 2.2568, "step": 3785 }, { "epoch": 0.13, "grad_norm": 0.6828441023826599, "learning_rate": 1.939623934919906e-05, "loss": 2.246, "step": 3786 }, { "epoch": 0.13, "grad_norm": 0.648501455783844, "learning_rate": 1.9395875564809582e-05, "loss": 2.2794, "step": 3787 }, { "epoch": 0.13, "grad_norm": 0.6960654258728027, "learning_rate": 1.939551167427101e-05, "loss": 2.2531, "step": 3788 }, { "epoch": 0.13, "grad_norm": 0.741593599319458, "learning_rate": 1.9395147677587457e-05, "loss": 2.312, "step": 3789 }, { "epoch": 0.13, "grad_norm": 0.671448826789856, "learning_rate": 1.939478357476303e-05, "loss": 2.2683, "step": 3790 }, { "epoch": 0.13, "grad_norm": 0.6526780724525452, "learning_rate": 1.939441936580184e-05, "loss": 2.246, "step": 3791 }, { "epoch": 0.13, "grad_norm": 0.6915876269340515, "learning_rate": 1.939405505070801e-05, "loss": 2.2602, "step": 3792 }, { "epoch": 0.13, "grad_norm": 0.6606250405311584, "learning_rate": 1.939369062948565e-05, "loss": 2.2105, "step": 3793 }, { "epoch": 0.13, "grad_norm": 0.6585690975189209, "learning_rate": 1.939332610213888e-05, "loss": 2.3095, "step": 3794 }, { "epoch": 0.13, "grad_norm": 0.7024334073066711, "learning_rate": 1.9392961468671812e-05, "loss": 2.2228, "step": 3795 }, { "epoch": 0.13, "grad_norm": 0.6917774677276611, "learning_rate": 1.9392596729088577e-05, "loss": 2.2104, "step": 3796 }, { "epoch": 0.13, "grad_norm": 0.6826688647270203, "learning_rate": 1.9392231883393285e-05, "loss": 2.2854, "step": 3797 }, { "epoch": 0.13, "grad_norm": 0.6910467147827148, "learning_rate": 1.939186693159006e-05, "loss": 2.1931, "step": 3798 }, { "epoch": 0.13, "grad_norm": 0.7044627070426941, "learning_rate": 1.9391501873683028e-05, "loss": 2.1717, "step": 3799 }, { "epoch": 0.13, "grad_norm": 0.6685564517974854, "learning_rate": 1.9391136709676316e-05, "loss": 2.313, "step": 3800 }, { "epoch": 0.13, "grad_norm": 0.6969269514083862, "learning_rate": 1.939077143957404e-05, "loss": 2.2872, "step": 3801 }, { "epoch": 0.13, "grad_norm": 0.6813797950744629, "learning_rate": 1.9390406063380334e-05, "loss": 2.2175, "step": 3802 }, { "epoch": 0.13, "grad_norm": 0.7131717205047607, "learning_rate": 1.9390040581099322e-05, "loss": 2.296, "step": 3803 }, { "epoch": 0.13, "grad_norm": 0.685228168964386, "learning_rate": 1.9389674992735137e-05, "loss": 2.2951, "step": 3804 }, { "epoch": 0.13, "grad_norm": 0.6646665930747986, "learning_rate": 1.9389309298291904e-05, "loss": 2.2439, "step": 3805 }, { "epoch": 0.13, "grad_norm": 0.6834672689437866, "learning_rate": 1.938894349777376e-05, "loss": 2.2997, "step": 3806 }, { "epoch": 0.13, "grad_norm": 0.6647322773933411, "learning_rate": 1.9388577591184833e-05, "loss": 2.3175, "step": 3807 }, { "epoch": 0.13, "grad_norm": 0.6993784308433533, "learning_rate": 1.9388211578529258e-05, "loss": 2.2611, "step": 3808 }, { "epoch": 0.13, "grad_norm": 0.7068907618522644, "learning_rate": 1.9387845459811175e-05, "loss": 2.3137, "step": 3809 }, { "epoch": 0.13, "grad_norm": 0.7464045882225037, "learning_rate": 1.938747923503471e-05, "loss": 2.367, "step": 3810 }, { "epoch": 0.13, "grad_norm": 0.6496166586875916, "learning_rate": 1.9387112904204008e-05, "loss": 2.224, "step": 3811 }, { "epoch": 0.13, "grad_norm": 0.711633026599884, "learning_rate": 1.9386746467323206e-05, "loss": 2.293, "step": 3812 }, { "epoch": 0.13, "grad_norm": 0.7147133350372314, "learning_rate": 1.9386379924396448e-05, "loss": 2.2877, "step": 3813 }, { "epoch": 0.13, "grad_norm": 0.7064916491508484, "learning_rate": 1.9386013275427866e-05, "loss": 2.2539, "step": 3814 }, { "epoch": 0.13, "grad_norm": 0.6703269481658936, "learning_rate": 1.9385646520421605e-05, "loss": 2.2243, "step": 3815 }, { "epoch": 0.13, "grad_norm": 0.6712427139282227, "learning_rate": 1.938527965938181e-05, "loss": 2.2781, "step": 3816 }, { "epoch": 0.13, "grad_norm": 0.6613171100616455, "learning_rate": 1.9384912692312633e-05, "loss": 2.2297, "step": 3817 }, { "epoch": 0.13, "grad_norm": 0.6942279934883118, "learning_rate": 1.938454561921821e-05, "loss": 2.303, "step": 3818 }, { "epoch": 0.13, "grad_norm": 0.6588547825813293, "learning_rate": 1.9384178440102686e-05, "loss": 2.2316, "step": 3819 }, { "epoch": 0.13, "grad_norm": 0.676581621170044, "learning_rate": 1.9383811154970216e-05, "loss": 2.2574, "step": 3820 }, { "epoch": 0.13, "grad_norm": 0.6870656609535217, "learning_rate": 1.9383443763824946e-05, "loss": 2.3091, "step": 3821 }, { "epoch": 0.13, "grad_norm": 0.6999729871749878, "learning_rate": 1.938307626667103e-05, "loss": 2.2618, "step": 3822 }, { "epoch": 0.13, "grad_norm": 0.6895442008972168, "learning_rate": 1.9382708663512616e-05, "loss": 2.3144, "step": 3823 }, { "epoch": 0.13, "grad_norm": 0.6573551893234253, "learning_rate": 1.9382340954353857e-05, "loss": 2.2283, "step": 3824 }, { "epoch": 0.13, "grad_norm": 0.6985926628112793, "learning_rate": 1.9381973139198912e-05, "loss": 2.3384, "step": 3825 }, { "epoch": 0.13, "grad_norm": 0.6900296211242676, "learning_rate": 1.938160521805193e-05, "loss": 2.239, "step": 3826 }, { "epoch": 0.13, "grad_norm": 0.6675920486450195, "learning_rate": 1.938123719091707e-05, "loss": 2.2255, "step": 3827 }, { "epoch": 0.13, "grad_norm": 0.692578136920929, "learning_rate": 1.9380869057798494e-05, "loss": 2.2696, "step": 3828 }, { "epoch": 0.13, "grad_norm": 0.6923479437828064, "learning_rate": 1.938050081870035e-05, "loss": 2.2553, "step": 3829 }, { "epoch": 0.13, "grad_norm": 0.7357146739959717, "learning_rate": 1.9380132473626813e-05, "loss": 2.2535, "step": 3830 }, { "epoch": 0.13, "grad_norm": 0.6821557879447937, "learning_rate": 1.9379764022582032e-05, "loss": 2.2379, "step": 3831 }, { "epoch": 0.13, "grad_norm": 0.7115961909294128, "learning_rate": 1.9379395465570175e-05, "loss": 2.2285, "step": 3832 }, { "epoch": 0.13, "grad_norm": 0.6817278265953064, "learning_rate": 1.9379026802595406e-05, "loss": 2.2827, "step": 3833 }, { "epoch": 0.13, "grad_norm": 0.7151566743850708, "learning_rate": 1.937865803366189e-05, "loss": 2.2156, "step": 3834 }, { "epoch": 0.13, "grad_norm": 0.6646065711975098, "learning_rate": 1.9378289158773785e-05, "loss": 2.3028, "step": 3835 }, { "epoch": 0.13, "grad_norm": 0.751550555229187, "learning_rate": 1.9377920177935274e-05, "loss": 2.2381, "step": 3836 }, { "epoch": 0.13, "grad_norm": 0.6581366658210754, "learning_rate": 1.937755109115051e-05, "loss": 2.2975, "step": 3837 }, { "epoch": 0.13, "grad_norm": 0.6811825037002563, "learning_rate": 1.9377181898423676e-05, "loss": 2.2861, "step": 3838 }, { "epoch": 0.13, "grad_norm": 0.6856463551521301, "learning_rate": 1.937681259975893e-05, "loss": 2.2393, "step": 3839 }, { "epoch": 0.13, "grad_norm": 0.6743162274360657, "learning_rate": 1.9376443195160454e-05, "loss": 2.2287, "step": 3840 }, { "epoch": 0.13, "grad_norm": 0.6996175050735474, "learning_rate": 1.937607368463242e-05, "loss": 2.2938, "step": 3841 }, { "epoch": 0.13, "grad_norm": 0.6857704520225525, "learning_rate": 1.9375704068179e-05, "loss": 2.2518, "step": 3842 }, { "epoch": 0.13, "grad_norm": 0.6750434637069702, "learning_rate": 1.9375334345804366e-05, "loss": 2.2987, "step": 3843 }, { "epoch": 0.13, "grad_norm": 0.661083459854126, "learning_rate": 1.93749645175127e-05, "loss": 2.3061, "step": 3844 }, { "epoch": 0.13, "grad_norm": 0.6785575747489929, "learning_rate": 1.937459458330818e-05, "loss": 2.2249, "step": 3845 }, { "epoch": 0.13, "grad_norm": 0.6781976222991943, "learning_rate": 1.9374224543194986e-05, "loss": 2.2666, "step": 3846 }, { "epoch": 0.13, "grad_norm": 0.6765331625938416, "learning_rate": 1.9373854397177295e-05, "loss": 2.2515, "step": 3847 }, { "epoch": 0.13, "grad_norm": 0.6972099542617798, "learning_rate": 1.9373484145259293e-05, "loss": 2.26, "step": 3848 }, { "epoch": 0.13, "grad_norm": 0.7114779353141785, "learning_rate": 1.937311378744516e-05, "loss": 2.174, "step": 3849 }, { "epoch": 0.13, "grad_norm": 0.6999061107635498, "learning_rate": 1.937274332373908e-05, "loss": 2.2336, "step": 3850 }, { "epoch": 0.13, "grad_norm": 0.6958664059638977, "learning_rate": 1.937237275414524e-05, "loss": 2.2772, "step": 3851 }, { "epoch": 0.13, "grad_norm": 0.7307547926902771, "learning_rate": 1.937200207866782e-05, "loss": 2.2527, "step": 3852 }, { "epoch": 0.13, "grad_norm": 0.6892645955085754, "learning_rate": 1.9371631297311018e-05, "loss": 2.2756, "step": 3853 }, { "epoch": 0.13, "grad_norm": 0.7035792469978333, "learning_rate": 1.9371260410079017e-05, "loss": 2.2895, "step": 3854 }, { "epoch": 0.13, "grad_norm": 0.6636065244674683, "learning_rate": 1.9370889416976005e-05, "loss": 2.2186, "step": 3855 }, { "epoch": 0.13, "grad_norm": 0.6809833645820618, "learning_rate": 1.9370518318006183e-05, "loss": 2.2541, "step": 3856 }, { "epoch": 0.13, "grad_norm": 0.691703200340271, "learning_rate": 1.937014711317373e-05, "loss": 2.2346, "step": 3857 }, { "epoch": 0.13, "grad_norm": 0.6827625632286072, "learning_rate": 1.936977580248285e-05, "loss": 2.2599, "step": 3858 }, { "epoch": 0.13, "grad_norm": 0.68923419713974, "learning_rate": 1.9369404385937734e-05, "loss": 2.3339, "step": 3859 }, { "epoch": 0.13, "grad_norm": 0.707530677318573, "learning_rate": 1.936903286354258e-05, "loss": 2.3143, "step": 3860 }, { "epoch": 0.13, "grad_norm": 0.6839113831520081, "learning_rate": 1.936866123530158e-05, "loss": 2.3177, "step": 3861 }, { "epoch": 0.13, "grad_norm": 0.6864945888519287, "learning_rate": 1.9368289501218935e-05, "loss": 2.2573, "step": 3862 }, { "epoch": 0.13, "grad_norm": 0.6917358040809631, "learning_rate": 1.936791766129885e-05, "loss": 2.3434, "step": 3863 }, { "epoch": 0.13, "grad_norm": 0.6996966600418091, "learning_rate": 1.936754571554552e-05, "loss": 2.3309, "step": 3864 }, { "epoch": 0.13, "grad_norm": 0.6747732162475586, "learning_rate": 1.9367173663963146e-05, "loss": 2.2537, "step": 3865 }, { "epoch": 0.13, "grad_norm": 0.6763821244239807, "learning_rate": 1.9366801506555933e-05, "loss": 2.1774, "step": 3866 }, { "epoch": 0.13, "grad_norm": 0.6778748631477356, "learning_rate": 1.936642924332809e-05, "loss": 2.2048, "step": 3867 }, { "epoch": 0.13, "grad_norm": 0.6992117166519165, "learning_rate": 1.9366056874283817e-05, "loss": 2.2658, "step": 3868 }, { "epoch": 0.13, "grad_norm": 0.667715847492218, "learning_rate": 1.9365684399427324e-05, "loss": 2.2399, "step": 3869 }, { "epoch": 0.13, "grad_norm": 0.6567389369010925, "learning_rate": 1.9365311818762812e-05, "loss": 2.2115, "step": 3870 }, { "epoch": 0.13, "grad_norm": 0.6794754862785339, "learning_rate": 1.9364939132294503e-05, "loss": 2.2936, "step": 3871 }, { "epoch": 0.13, "grad_norm": 0.7116652727127075, "learning_rate": 1.9364566340026595e-05, "loss": 2.2988, "step": 3872 }, { "epoch": 0.13, "grad_norm": 0.6668806076049805, "learning_rate": 1.9364193441963307e-05, "loss": 2.2648, "step": 3873 }, { "epoch": 0.13, "grad_norm": 0.6928072571754456, "learning_rate": 1.936382043810885e-05, "loss": 2.2542, "step": 3874 }, { "epoch": 0.13, "grad_norm": 0.6819534301757812, "learning_rate": 1.9363447328467434e-05, "loss": 2.271, "step": 3875 }, { "epoch": 0.13, "grad_norm": 0.6918515563011169, "learning_rate": 1.9363074113043282e-05, "loss": 2.3231, "step": 3876 }, { "epoch": 0.13, "grad_norm": 0.7092213034629822, "learning_rate": 1.9362700791840602e-05, "loss": 2.2577, "step": 3877 }, { "epoch": 0.13, "grad_norm": 0.6970034241676331, "learning_rate": 1.936232736486362e-05, "loss": 2.2668, "step": 3878 }, { "epoch": 0.13, "grad_norm": 0.7085751891136169, "learning_rate": 1.9361953832116548e-05, "loss": 2.2622, "step": 3879 }, { "epoch": 0.13, "grad_norm": 0.6737750172615051, "learning_rate": 1.9361580193603603e-05, "loss": 2.2725, "step": 3880 }, { "epoch": 0.13, "grad_norm": 0.6979121565818787, "learning_rate": 1.9361206449329017e-05, "loss": 2.1956, "step": 3881 }, { "epoch": 0.13, "grad_norm": 0.7040246725082397, "learning_rate": 1.9360832599297007e-05, "loss": 2.2627, "step": 3882 }, { "epoch": 0.13, "grad_norm": 0.6824359893798828, "learning_rate": 1.9360458643511798e-05, "loss": 2.2686, "step": 3883 }, { "epoch": 0.13, "grad_norm": 0.6732771396636963, "learning_rate": 1.936008458197761e-05, "loss": 2.2638, "step": 3884 }, { "epoch": 0.13, "grad_norm": 0.6961280703544617, "learning_rate": 1.9359710414698672e-05, "loss": 2.2877, "step": 3885 }, { "epoch": 0.13, "grad_norm": 0.6671313047409058, "learning_rate": 1.935933614167921e-05, "loss": 2.2342, "step": 3886 }, { "epoch": 0.13, "grad_norm": 0.6848142743110657, "learning_rate": 1.9358961762923455e-05, "loss": 2.2807, "step": 3887 }, { "epoch": 0.13, "grad_norm": 0.7128828763961792, "learning_rate": 1.9358587278435633e-05, "loss": 2.3302, "step": 3888 }, { "epoch": 0.13, "grad_norm": 0.6834088563919067, "learning_rate": 1.935821268821998e-05, "loss": 2.2734, "step": 3889 }, { "epoch": 0.13, "grad_norm": 0.7087829113006592, "learning_rate": 1.935783799228072e-05, "loss": 2.2717, "step": 3890 }, { "epoch": 0.13, "grad_norm": 0.6643744707107544, "learning_rate": 1.9357463190622096e-05, "loss": 2.2913, "step": 3891 }, { "epoch": 0.13, "grad_norm": 0.6617563366889954, "learning_rate": 1.9357088283248334e-05, "loss": 2.2531, "step": 3892 }, { "epoch": 0.13, "grad_norm": 0.6796502470970154, "learning_rate": 1.9356713270163672e-05, "loss": 2.2258, "step": 3893 }, { "epoch": 0.13, "grad_norm": 0.6705867052078247, "learning_rate": 1.9356338151372347e-05, "loss": 2.2279, "step": 3894 }, { "epoch": 0.13, "grad_norm": 0.6770344376564026, "learning_rate": 1.9355962926878598e-05, "loss": 2.2395, "step": 3895 }, { "epoch": 0.13, "grad_norm": 0.6612032651901245, "learning_rate": 1.9355587596686663e-05, "loss": 2.2633, "step": 3896 }, { "epoch": 0.13, "grad_norm": 0.6406837701797485, "learning_rate": 1.935521216080078e-05, "loss": 2.2691, "step": 3897 }, { "epoch": 0.13, "grad_norm": 0.664910614490509, "learning_rate": 1.9354836619225196e-05, "loss": 2.336, "step": 3898 }, { "epoch": 0.13, "grad_norm": 0.6791231036186218, "learning_rate": 1.9354460971964147e-05, "loss": 2.2131, "step": 3899 }, { "epoch": 0.13, "grad_norm": 0.7030355334281921, "learning_rate": 1.935408521902188e-05, "loss": 2.2883, "step": 3900 }, { "epoch": 0.13, "grad_norm": 0.6841225624084473, "learning_rate": 1.9353709360402645e-05, "loss": 2.2596, "step": 3901 }, { "epoch": 0.13, "grad_norm": 0.6789695024490356, "learning_rate": 1.9353333396110682e-05, "loss": 2.299, "step": 3902 }, { "epoch": 0.13, "grad_norm": 0.6934884786605835, "learning_rate": 1.9352957326150238e-05, "loss": 2.2427, "step": 3903 }, { "epoch": 0.13, "grad_norm": 0.6843969821929932, "learning_rate": 1.935258115052556e-05, "loss": 2.2395, "step": 3904 }, { "epoch": 0.13, "grad_norm": 0.6676246523857117, "learning_rate": 1.9352204869240906e-05, "loss": 2.206, "step": 3905 }, { "epoch": 0.13, "grad_norm": 0.6708856225013733, "learning_rate": 1.9351828482300523e-05, "loss": 2.2368, "step": 3906 }, { "epoch": 0.13, "grad_norm": 0.6857244372367859, "learning_rate": 1.935145198970866e-05, "loss": 2.272, "step": 3907 }, { "epoch": 0.13, "grad_norm": 0.7019046545028687, "learning_rate": 1.9351075391469575e-05, "loss": 2.2947, "step": 3908 }, { "epoch": 0.13, "grad_norm": 0.6780718564987183, "learning_rate": 1.935069868758752e-05, "loss": 2.273, "step": 3909 }, { "epoch": 0.13, "grad_norm": 0.6682991981506348, "learning_rate": 1.935032187806675e-05, "loss": 2.1827, "step": 3910 }, { "epoch": 0.13, "grad_norm": 0.6956418752670288, "learning_rate": 1.9349944962911523e-05, "loss": 2.2479, "step": 3911 }, { "epoch": 0.13, "grad_norm": 0.6831377148628235, "learning_rate": 1.9349567942126102e-05, "loss": 2.2517, "step": 3912 }, { "epoch": 0.13, "grad_norm": 0.6706723570823669, "learning_rate": 1.9349190815714734e-05, "loss": 2.3225, "step": 3913 }, { "epoch": 0.13, "grad_norm": 0.6868249773979187, "learning_rate": 1.9348813583681692e-05, "loss": 2.2743, "step": 3914 }, { "epoch": 0.13, "grad_norm": 0.6771519184112549, "learning_rate": 1.9348436246031234e-05, "loss": 2.2711, "step": 3915 }, { "epoch": 0.13, "grad_norm": 0.6883339881896973, "learning_rate": 1.9348058802767622e-05, "loss": 2.2709, "step": 3916 }, { "epoch": 0.13, "grad_norm": 0.6665377616882324, "learning_rate": 1.934768125389512e-05, "loss": 2.2763, "step": 3917 }, { "epoch": 0.13, "grad_norm": 0.7197356224060059, "learning_rate": 1.934730359941799e-05, "loss": 2.2944, "step": 3918 }, { "epoch": 0.13, "grad_norm": 0.698290228843689, "learning_rate": 1.9346925839340507e-05, "loss": 2.2297, "step": 3919 }, { "epoch": 0.13, "grad_norm": 0.7223041653633118, "learning_rate": 1.9346547973666928e-05, "loss": 2.2834, "step": 3920 }, { "epoch": 0.13, "grad_norm": 0.6898424029350281, "learning_rate": 1.934617000240153e-05, "loss": 2.3536, "step": 3921 }, { "epoch": 0.13, "grad_norm": 0.688073992729187, "learning_rate": 1.9345791925548584e-05, "loss": 2.2874, "step": 3922 }, { "epoch": 0.13, "grad_norm": 0.6813004016876221, "learning_rate": 1.9345413743112354e-05, "loss": 2.23, "step": 3923 }, { "epoch": 0.13, "grad_norm": 0.6877011656761169, "learning_rate": 1.934503545509712e-05, "loss": 2.2894, "step": 3924 }, { "epoch": 0.13, "grad_norm": 0.6924586296081543, "learning_rate": 1.934465706150715e-05, "loss": 2.3253, "step": 3925 }, { "epoch": 0.13, "grad_norm": 0.8447633981704712, "learning_rate": 1.934427856234672e-05, "loss": 2.2928, "step": 3926 }, { "epoch": 0.13, "grad_norm": 0.6965543627738953, "learning_rate": 1.9343899957620105e-05, "loss": 2.2453, "step": 3927 }, { "epoch": 0.13, "grad_norm": 0.7002049684524536, "learning_rate": 1.934352124733159e-05, "loss": 2.2676, "step": 3928 }, { "epoch": 0.13, "grad_norm": 0.6983843445777893, "learning_rate": 1.9343142431485447e-05, "loss": 2.2615, "step": 3929 }, { "epoch": 0.13, "grad_norm": 0.6810771226882935, "learning_rate": 1.9342763510085954e-05, "loss": 2.2556, "step": 3930 }, { "epoch": 0.13, "grad_norm": 0.6740642786026001, "learning_rate": 1.9342384483137394e-05, "loss": 2.2308, "step": 3931 }, { "epoch": 0.13, "grad_norm": 0.687989354133606, "learning_rate": 1.934200535064405e-05, "loss": 2.3078, "step": 3932 }, { "epoch": 0.13, "grad_norm": 0.6699447631835938, "learning_rate": 1.9341626112610204e-05, "loss": 2.197, "step": 3933 }, { "epoch": 0.13, "grad_norm": 0.6842352747917175, "learning_rate": 1.9341246769040142e-05, "loss": 2.2704, "step": 3934 }, { "epoch": 0.13, "grad_norm": 0.6888436675071716, "learning_rate": 1.9340867319938147e-05, "loss": 2.2767, "step": 3935 }, { "epoch": 0.13, "grad_norm": 0.6800437569618225, "learning_rate": 1.9340487765308508e-05, "loss": 2.3061, "step": 3936 }, { "epoch": 0.13, "grad_norm": 0.6544216275215149, "learning_rate": 1.9340108105155515e-05, "loss": 2.2451, "step": 3937 }, { "epoch": 0.13, "grad_norm": 0.6744993329048157, "learning_rate": 1.933972833948345e-05, "loss": 2.2858, "step": 3938 }, { "epoch": 0.13, "grad_norm": 0.680762529373169, "learning_rate": 1.933934846829661e-05, "loss": 2.2925, "step": 3939 }, { "epoch": 0.13, "grad_norm": 0.6617317199707031, "learning_rate": 1.9338968491599286e-05, "loss": 2.2356, "step": 3940 }, { "epoch": 0.13, "grad_norm": 0.6594517230987549, "learning_rate": 1.9338588409395766e-05, "loss": 2.2268, "step": 3941 }, { "epoch": 0.13, "grad_norm": 0.6944791078567505, "learning_rate": 1.933820822169035e-05, "loss": 2.2893, "step": 3942 }, { "epoch": 0.13, "grad_norm": 0.687288224697113, "learning_rate": 1.933782792848733e-05, "loss": 2.2566, "step": 3943 }, { "epoch": 0.13, "grad_norm": 0.6864408254623413, "learning_rate": 1.9337447529791e-05, "loss": 2.2672, "step": 3944 }, { "epoch": 0.13, "grad_norm": 0.677907407283783, "learning_rate": 1.933706702560566e-05, "loss": 2.2708, "step": 3945 }, { "epoch": 0.13, "grad_norm": 0.6765162944793701, "learning_rate": 1.933668641593561e-05, "loss": 2.2501, "step": 3946 }, { "epoch": 0.13, "grad_norm": 0.6796731352806091, "learning_rate": 1.933630570078515e-05, "loss": 2.2774, "step": 3947 }, { "epoch": 0.13, "grad_norm": 0.6950958371162415, "learning_rate": 1.933592488015858e-05, "loss": 2.2606, "step": 3948 }, { "epoch": 0.13, "grad_norm": 0.6761336326599121, "learning_rate": 1.93355439540602e-05, "loss": 2.2602, "step": 3949 }, { "epoch": 0.13, "grad_norm": 0.669391393661499, "learning_rate": 1.9335162922494317e-05, "loss": 2.2477, "step": 3950 }, { "epoch": 0.13, "grad_norm": 0.6777642965316772, "learning_rate": 1.9334781785465234e-05, "loss": 2.2023, "step": 3951 }, { "epoch": 0.13, "grad_norm": 0.6737990975379944, "learning_rate": 1.9334400542977256e-05, "loss": 2.2923, "step": 3952 }, { "epoch": 0.13, "grad_norm": 0.695613443851471, "learning_rate": 1.9334019195034693e-05, "loss": 2.2825, "step": 3953 }, { "epoch": 0.13, "grad_norm": 0.6728472709655762, "learning_rate": 1.933363774164185e-05, "loss": 2.2328, "step": 3954 }, { "epoch": 0.13, "grad_norm": 0.70237797498703, "learning_rate": 1.933325618280304e-05, "loss": 2.2637, "step": 3955 }, { "epoch": 0.13, "grad_norm": 0.7053419947624207, "learning_rate": 1.9332874518522567e-05, "loss": 2.2701, "step": 3956 }, { "epoch": 0.13, "grad_norm": 0.6708536744117737, "learning_rate": 1.933249274880475e-05, "loss": 2.2638, "step": 3957 }, { "epoch": 0.13, "grad_norm": 0.6735857129096985, "learning_rate": 1.9332110873653903e-05, "loss": 2.2613, "step": 3958 }, { "epoch": 0.13, "grad_norm": 0.6780750751495361, "learning_rate": 1.933172889307433e-05, "loss": 2.2484, "step": 3959 }, { "epoch": 0.13, "grad_norm": 0.6781023144721985, "learning_rate": 1.9331346807070358e-05, "loss": 2.2538, "step": 3960 }, { "epoch": 0.13, "grad_norm": 0.6940311789512634, "learning_rate": 1.9330964615646294e-05, "loss": 2.243, "step": 3961 }, { "epoch": 0.13, "grad_norm": 0.680412232875824, "learning_rate": 1.9330582318806462e-05, "loss": 2.2727, "step": 3962 }, { "epoch": 0.13, "grad_norm": 0.6722524166107178, "learning_rate": 1.9330199916555183e-05, "loss": 2.2579, "step": 3963 }, { "epoch": 0.13, "grad_norm": 0.7014780640602112, "learning_rate": 1.932981740889677e-05, "loss": 2.2792, "step": 3964 }, { "epoch": 0.13, "grad_norm": 0.6936694383621216, "learning_rate": 1.9329434795835546e-05, "loss": 2.191, "step": 3965 }, { "epoch": 0.13, "grad_norm": 0.6772468686103821, "learning_rate": 1.9329052077375836e-05, "loss": 2.2207, "step": 3966 }, { "epoch": 0.13, "grad_norm": 0.6639229655265808, "learning_rate": 1.9328669253521964e-05, "loss": 2.3123, "step": 3967 }, { "epoch": 0.13, "grad_norm": 0.6710674166679382, "learning_rate": 1.9328286324278257e-05, "loss": 2.2683, "step": 3968 }, { "epoch": 0.13, "grad_norm": 0.6889562010765076, "learning_rate": 1.9327903289649032e-05, "loss": 2.2424, "step": 3969 }, { "epoch": 0.13, "grad_norm": 0.6798684597015381, "learning_rate": 1.9327520149638625e-05, "loss": 2.3013, "step": 3970 }, { "epoch": 0.13, "grad_norm": 0.713754415512085, "learning_rate": 1.9327136904251363e-05, "loss": 2.2461, "step": 3971 }, { "epoch": 0.13, "grad_norm": 0.6701642274856567, "learning_rate": 1.9326753553491576e-05, "loss": 2.215, "step": 3972 }, { "epoch": 0.13, "grad_norm": 0.6795248985290527, "learning_rate": 1.9326370097363588e-05, "loss": 2.2822, "step": 3973 }, { "epoch": 0.13, "grad_norm": 0.6594623923301697, "learning_rate": 1.9325986535871738e-05, "loss": 2.2373, "step": 3974 }, { "epoch": 0.13, "grad_norm": 0.6895204782485962, "learning_rate": 1.932560286902036e-05, "loss": 2.2973, "step": 3975 }, { "epoch": 0.13, "grad_norm": 0.7123169898986816, "learning_rate": 1.9325219096813787e-05, "loss": 2.2842, "step": 3976 }, { "epoch": 0.13, "grad_norm": 0.6550269722938538, "learning_rate": 1.9324835219256353e-05, "loss": 2.2312, "step": 3977 }, { "epoch": 0.13, "grad_norm": 0.7204231023788452, "learning_rate": 1.9324451236352394e-05, "loss": 2.25, "step": 3978 }, { "epoch": 0.13, "grad_norm": 0.6728067994117737, "learning_rate": 1.932406714810625e-05, "loss": 2.2935, "step": 3979 }, { "epoch": 0.13, "grad_norm": 0.6936350464820862, "learning_rate": 1.932368295452226e-05, "loss": 2.3103, "step": 3980 }, { "epoch": 0.13, "grad_norm": 0.7079327702522278, "learning_rate": 1.9323298655604765e-05, "loss": 2.2153, "step": 3981 }, { "epoch": 0.13, "grad_norm": 0.6720169186592102, "learning_rate": 1.9322914251358104e-05, "loss": 2.2262, "step": 3982 }, { "epoch": 0.13, "grad_norm": 0.696619987487793, "learning_rate": 1.9322529741786623e-05, "loss": 2.3156, "step": 3983 }, { "epoch": 0.13, "grad_norm": 0.6902371644973755, "learning_rate": 1.9322145126894662e-05, "loss": 2.2503, "step": 3984 }, { "epoch": 0.13, "grad_norm": 0.6832977533340454, "learning_rate": 1.9321760406686572e-05, "loss": 2.3031, "step": 3985 }, { "epoch": 0.13, "grad_norm": 0.7059169411659241, "learning_rate": 1.9321375581166696e-05, "loss": 2.3153, "step": 3986 }, { "epoch": 0.13, "grad_norm": 0.6833273768424988, "learning_rate": 1.932099065033938e-05, "loss": 2.2597, "step": 3987 }, { "epoch": 0.13, "grad_norm": 0.6970521211624146, "learning_rate": 1.9320605614208974e-05, "loss": 2.2588, "step": 3988 }, { "epoch": 0.13, "grad_norm": 0.6857945322990417, "learning_rate": 1.932022047277983e-05, "loss": 2.3074, "step": 3989 }, { "epoch": 0.13, "grad_norm": 0.7248327136039734, "learning_rate": 1.9319835226056295e-05, "loss": 2.2426, "step": 3990 }, { "epoch": 0.13, "grad_norm": 0.6732500195503235, "learning_rate": 1.9319449874042725e-05, "loss": 2.2786, "step": 3991 }, { "epoch": 0.13, "grad_norm": 0.7115939259529114, "learning_rate": 1.931906441674347e-05, "loss": 2.2653, "step": 3992 }, { "epoch": 0.13, "grad_norm": 0.6777035593986511, "learning_rate": 1.931867885416289e-05, "loss": 2.2099, "step": 3993 }, { "epoch": 0.13, "grad_norm": 0.6582109928131104, "learning_rate": 1.9318293186305336e-05, "loss": 2.2221, "step": 3994 }, { "epoch": 0.13, "grad_norm": 0.7182148098945618, "learning_rate": 1.9317907413175163e-05, "loss": 2.2377, "step": 3995 }, { "epoch": 0.13, "grad_norm": 0.6935954689979553, "learning_rate": 1.9317521534776738e-05, "loss": 2.2432, "step": 3996 }, { "epoch": 0.13, "grad_norm": 0.7065118551254272, "learning_rate": 1.9317135551114416e-05, "loss": 2.286, "step": 3997 }, { "epoch": 0.13, "grad_norm": 0.7009655237197876, "learning_rate": 1.9316749462192552e-05, "loss": 2.2385, "step": 3998 }, { "epoch": 0.13, "grad_norm": 0.6878575086593628, "learning_rate": 1.9316363268015515e-05, "loss": 2.2227, "step": 3999 }, { "epoch": 0.13, "grad_norm": 0.7239075899124146, "learning_rate": 1.9315976968587668e-05, "loss": 2.3003, "step": 4000 }, { "epoch": 0.13, "grad_norm": 0.6808204650878906, "learning_rate": 1.931559056391337e-05, "loss": 2.1639, "step": 4001 }, { "epoch": 0.13, "grad_norm": 0.704873263835907, "learning_rate": 1.9315204053996994e-05, "loss": 2.2154, "step": 4002 }, { "epoch": 0.13, "grad_norm": 0.7178678512573242, "learning_rate": 1.9314817438842894e-05, "loss": 2.2235, "step": 4003 }, { "epoch": 0.13, "grad_norm": 0.6878161430358887, "learning_rate": 1.931443071845545e-05, "loss": 2.2823, "step": 4004 }, { "epoch": 0.13, "grad_norm": 0.7085456252098083, "learning_rate": 1.9314043892839028e-05, "loss": 2.2742, "step": 4005 }, { "epoch": 0.13, "grad_norm": 0.6980879306793213, "learning_rate": 1.9313656961997992e-05, "loss": 2.2868, "step": 4006 }, { "epoch": 0.13, "grad_norm": 0.7101640701293945, "learning_rate": 1.931326992593672e-05, "loss": 2.2735, "step": 4007 }, { "epoch": 0.13, "grad_norm": 0.667428731918335, "learning_rate": 1.9312882784659586e-05, "loss": 2.2479, "step": 4008 }, { "epoch": 0.13, "grad_norm": 0.6823713183403015, "learning_rate": 1.9312495538170957e-05, "loss": 2.1994, "step": 4009 }, { "epoch": 0.13, "grad_norm": 0.6577711701393127, "learning_rate": 1.931210818647521e-05, "loss": 2.2359, "step": 4010 }, { "epoch": 0.13, "grad_norm": 0.7209216356277466, "learning_rate": 1.9311720729576726e-05, "loss": 2.283, "step": 4011 }, { "epoch": 0.13, "grad_norm": 0.6786279678344727, "learning_rate": 1.9311333167479877e-05, "loss": 2.2328, "step": 4012 }, { "epoch": 0.13, "grad_norm": 0.6739886403083801, "learning_rate": 1.9310945500189044e-05, "loss": 2.2308, "step": 4013 }, { "epoch": 0.13, "grad_norm": 0.6918519139289856, "learning_rate": 1.93105577277086e-05, "loss": 2.2332, "step": 4014 }, { "epoch": 0.13, "grad_norm": 0.6787145733833313, "learning_rate": 1.9310169850042935e-05, "loss": 2.3042, "step": 4015 }, { "epoch": 0.13, "grad_norm": 0.6656612753868103, "learning_rate": 1.9309781867196425e-05, "loss": 2.1578, "step": 4016 }, { "epoch": 0.13, "grad_norm": 0.7095340490341187, "learning_rate": 1.930939377917346e-05, "loss": 2.3214, "step": 4017 }, { "epoch": 0.13, "grad_norm": 0.670860767364502, "learning_rate": 1.9309005585978417e-05, "loss": 2.2514, "step": 4018 }, { "epoch": 0.13, "grad_norm": 0.6796691417694092, "learning_rate": 1.930861728761569e-05, "loss": 2.217, "step": 4019 }, { "epoch": 0.13, "grad_norm": 0.6609011888504028, "learning_rate": 1.9308228884089652e-05, "loss": 2.2554, "step": 4020 }, { "epoch": 0.13, "grad_norm": 0.6706184148788452, "learning_rate": 1.9307840375404702e-05, "loss": 2.2334, "step": 4021 }, { "epoch": 0.13, "grad_norm": 0.6741407513618469, "learning_rate": 1.930745176156523e-05, "loss": 2.2528, "step": 4022 }, { "epoch": 0.13, "grad_norm": 0.7067764401435852, "learning_rate": 1.930706304257562e-05, "loss": 2.2892, "step": 4023 }, { "epoch": 0.13, "grad_norm": 0.6657218933105469, "learning_rate": 1.930667421844026e-05, "loss": 2.2829, "step": 4024 }, { "epoch": 0.13, "grad_norm": 0.69483882188797, "learning_rate": 1.9306285289163557e-05, "loss": 2.2651, "step": 4025 }, { "epoch": 0.13, "grad_norm": 0.7377609610557556, "learning_rate": 1.9305896254749895e-05, "loss": 2.2366, "step": 4026 }, { "epoch": 0.13, "grad_norm": 0.6977409720420837, "learning_rate": 1.930550711520367e-05, "loss": 2.2245, "step": 4027 }, { "epoch": 0.13, "grad_norm": 0.6938289999961853, "learning_rate": 1.930511787052928e-05, "loss": 2.2253, "step": 4028 }, { "epoch": 0.13, "grad_norm": 0.738755464553833, "learning_rate": 1.930472852073112e-05, "loss": 2.3212, "step": 4029 }, { "epoch": 0.13, "grad_norm": 0.6851395964622498, "learning_rate": 1.9304339065813593e-05, "loss": 2.2855, "step": 4030 }, { "epoch": 0.13, "grad_norm": 0.7024518847465515, "learning_rate": 1.9303949505781093e-05, "loss": 2.2453, "step": 4031 }, { "epoch": 0.13, "grad_norm": 0.714691698551178, "learning_rate": 1.930355984063803e-05, "loss": 2.2441, "step": 4032 }, { "epoch": 0.13, "grad_norm": 0.6691733598709106, "learning_rate": 1.9303170070388793e-05, "loss": 2.1669, "step": 4033 }, { "epoch": 0.13, "grad_norm": 0.7234676480293274, "learning_rate": 1.93027801950378e-05, "loss": 2.183, "step": 4034 }, { "epoch": 0.13, "grad_norm": 0.6671250462532043, "learning_rate": 1.9302390214589444e-05, "loss": 2.3147, "step": 4035 }, { "epoch": 0.13, "grad_norm": 0.7062347531318665, "learning_rate": 1.9302000129048135e-05, "loss": 2.2399, "step": 4036 }, { "epoch": 0.13, "grad_norm": 0.7137826681137085, "learning_rate": 1.930160993841828e-05, "loss": 2.3784, "step": 4037 }, { "epoch": 0.13, "grad_norm": 0.6843861937522888, "learning_rate": 1.9301219642704287e-05, "loss": 2.3105, "step": 4038 }, { "epoch": 0.13, "grad_norm": 0.7326799035072327, "learning_rate": 1.9300829241910566e-05, "loss": 2.2966, "step": 4039 }, { "epoch": 0.13, "grad_norm": 0.7401648163795471, "learning_rate": 1.9300438736041527e-05, "loss": 2.2887, "step": 4040 }, { "epoch": 0.13, "grad_norm": 0.7100834846496582, "learning_rate": 1.9300048125101582e-05, "loss": 2.2705, "step": 4041 }, { "epoch": 0.13, "grad_norm": 0.6748153567314148, "learning_rate": 1.9299657409095145e-05, "loss": 2.2105, "step": 4042 }, { "epoch": 0.13, "grad_norm": 0.6559632420539856, "learning_rate": 1.9299266588026627e-05, "loss": 2.2251, "step": 4043 }, { "epoch": 0.13, "grad_norm": 0.7120890021324158, "learning_rate": 1.9298875661900443e-05, "loss": 2.2803, "step": 4044 }, { "epoch": 0.13, "grad_norm": 0.6809351444244385, "learning_rate": 1.9298484630721015e-05, "loss": 2.2034, "step": 4045 }, { "epoch": 0.13, "grad_norm": 0.7116170525550842, "learning_rate": 1.9298093494492754e-05, "loss": 2.245, "step": 4046 }, { "epoch": 0.13, "grad_norm": 0.692582905292511, "learning_rate": 1.929770225322009e-05, "loss": 2.298, "step": 4047 }, { "epoch": 0.13, "grad_norm": 0.717008650302887, "learning_rate": 1.9297310906907426e-05, "loss": 2.2476, "step": 4048 }, { "epoch": 0.13, "grad_norm": 0.6927548050880432, "learning_rate": 1.9296919455559195e-05, "loss": 2.3081, "step": 4049 }, { "epoch": 0.13, "grad_norm": 0.6683803200721741, "learning_rate": 1.9296527899179817e-05, "loss": 2.2315, "step": 4050 }, { "epoch": 0.13, "grad_norm": 0.6744758486747742, "learning_rate": 1.9296136237773714e-05, "loss": 2.2669, "step": 4051 }, { "epoch": 0.13, "grad_norm": 0.7054483294487, "learning_rate": 1.929574447134531e-05, "loss": 2.1422, "step": 4052 }, { "epoch": 0.13, "grad_norm": 0.6705911755561829, "learning_rate": 1.9295352599899037e-05, "loss": 2.2704, "step": 4053 }, { "epoch": 0.13, "grad_norm": 0.6922420859336853, "learning_rate": 1.9294960623439314e-05, "loss": 2.2424, "step": 4054 }, { "epoch": 0.13, "grad_norm": 0.6809048056602478, "learning_rate": 1.9294568541970578e-05, "loss": 2.2833, "step": 4055 }, { "epoch": 0.13, "grad_norm": 0.6752691864967346, "learning_rate": 1.9294176355497248e-05, "loss": 2.3028, "step": 4056 }, { "epoch": 0.13, "grad_norm": 0.6868904829025269, "learning_rate": 1.9293784064023766e-05, "loss": 2.3387, "step": 4057 }, { "epoch": 0.14, "grad_norm": 0.6761431694030762, "learning_rate": 1.9293391667554556e-05, "loss": 2.3075, "step": 4058 }, { "epoch": 0.14, "grad_norm": 0.6953502893447876, "learning_rate": 1.9292999166094054e-05, "loss": 2.2698, "step": 4059 }, { "epoch": 0.14, "grad_norm": 0.6974455714225769, "learning_rate": 1.929260655964669e-05, "loss": 2.2439, "step": 4060 }, { "epoch": 0.14, "grad_norm": 0.6760315299034119, "learning_rate": 1.9292213848216906e-05, "loss": 2.2518, "step": 4061 }, { "epoch": 0.14, "grad_norm": 0.658374011516571, "learning_rate": 1.9291821031809138e-05, "loss": 2.2063, "step": 4062 }, { "epoch": 0.14, "grad_norm": 0.6942732930183411, "learning_rate": 1.929142811042782e-05, "loss": 2.245, "step": 4063 }, { "epoch": 0.14, "grad_norm": 0.6837006211280823, "learning_rate": 1.9291035084077393e-05, "loss": 2.2986, "step": 4064 }, { "epoch": 0.14, "grad_norm": 0.7031409740447998, "learning_rate": 1.9290641952762293e-05, "loss": 2.2378, "step": 4065 }, { "epoch": 0.14, "grad_norm": 0.6463398337364197, "learning_rate": 1.9290248716486967e-05, "loss": 2.254, "step": 4066 }, { "epoch": 0.14, "grad_norm": 0.6659468412399292, "learning_rate": 1.9289855375255857e-05, "loss": 2.2422, "step": 4067 }, { "epoch": 0.14, "grad_norm": 0.7240059971809387, "learning_rate": 1.9289461929073403e-05, "loss": 2.2301, "step": 4068 }, { "epoch": 0.14, "grad_norm": 0.6983621716499329, "learning_rate": 1.9289068377944055e-05, "loss": 2.2544, "step": 4069 }, { "epoch": 0.14, "grad_norm": 0.6898089051246643, "learning_rate": 1.9288674721872255e-05, "loss": 2.3016, "step": 4070 }, { "epoch": 0.14, "grad_norm": 0.6957117319107056, "learning_rate": 1.928828096086245e-05, "loss": 2.2343, "step": 4071 }, { "epoch": 0.14, "grad_norm": 0.7072228789329529, "learning_rate": 1.9287887094919095e-05, "loss": 2.2243, "step": 4072 }, { "epoch": 0.14, "grad_norm": 0.6874589920043945, "learning_rate": 1.928749312404663e-05, "loss": 2.185, "step": 4073 }, { "epoch": 0.14, "grad_norm": 0.6967061758041382, "learning_rate": 1.9287099048249515e-05, "loss": 2.2442, "step": 4074 }, { "epoch": 0.14, "grad_norm": 0.6740591526031494, "learning_rate": 1.9286704867532195e-05, "loss": 2.2356, "step": 4075 }, { "epoch": 0.14, "grad_norm": 0.6667296886444092, "learning_rate": 1.9286310581899125e-05, "loss": 2.269, "step": 4076 }, { "epoch": 0.14, "grad_norm": 0.7137822508811951, "learning_rate": 1.9285916191354763e-05, "loss": 2.3062, "step": 4077 }, { "epoch": 0.14, "grad_norm": 0.6585429906845093, "learning_rate": 1.928552169590356e-05, "loss": 2.2543, "step": 4078 }, { "epoch": 0.14, "grad_norm": 0.694356381893158, "learning_rate": 1.9285127095549975e-05, "loss": 2.2893, "step": 4079 }, { "epoch": 0.14, "grad_norm": 0.6740075945854187, "learning_rate": 1.9284732390298467e-05, "loss": 2.252, "step": 4080 }, { "epoch": 0.14, "grad_norm": 0.6916618943214417, "learning_rate": 1.9284337580153495e-05, "loss": 2.2878, "step": 4081 }, { "epoch": 0.14, "grad_norm": 0.6934636235237122, "learning_rate": 1.928394266511951e-05, "loss": 2.1903, "step": 4082 }, { "epoch": 0.14, "grad_norm": 0.6917687058448792, "learning_rate": 1.928354764520099e-05, "loss": 2.1913, "step": 4083 }, { "epoch": 0.14, "grad_norm": 0.6849561929702759, "learning_rate": 1.928315252040239e-05, "loss": 2.2753, "step": 4084 }, { "epoch": 0.14, "grad_norm": 0.6985245943069458, "learning_rate": 1.928275729072817e-05, "loss": 2.2794, "step": 4085 }, { "epoch": 0.14, "grad_norm": 0.6691417098045349, "learning_rate": 1.9282361956182796e-05, "loss": 2.1862, "step": 4086 }, { "epoch": 0.14, "grad_norm": 0.7120523452758789, "learning_rate": 1.9281966516770742e-05, "loss": 2.2543, "step": 4087 }, { "epoch": 0.14, "grad_norm": 0.6947247982025146, "learning_rate": 1.9281570972496467e-05, "loss": 2.2446, "step": 4088 }, { "epoch": 0.14, "grad_norm": 0.689336895942688, "learning_rate": 1.9281175323364442e-05, "loss": 2.2132, "step": 4089 }, { "epoch": 0.14, "grad_norm": 0.6751183271408081, "learning_rate": 1.928077956937914e-05, "loss": 2.2913, "step": 4090 }, { "epoch": 0.14, "grad_norm": 0.7047361135482788, "learning_rate": 1.928038371054503e-05, "loss": 2.2762, "step": 4091 }, { "epoch": 0.14, "grad_norm": 0.6893208622932434, "learning_rate": 1.9279987746866578e-05, "loss": 2.2295, "step": 4092 }, { "epoch": 0.14, "grad_norm": 0.6904289722442627, "learning_rate": 1.927959167834827e-05, "loss": 2.2475, "step": 4093 }, { "epoch": 0.14, "grad_norm": 0.6864316463470459, "learning_rate": 1.927919550499457e-05, "loss": 2.1996, "step": 4094 }, { "epoch": 0.14, "grad_norm": 0.7316529750823975, "learning_rate": 1.9278799226809958e-05, "loss": 2.337, "step": 4095 }, { "epoch": 0.14, "grad_norm": 0.6959969401359558, "learning_rate": 1.9278402843798908e-05, "loss": 2.2408, "step": 4096 }, { "epoch": 0.14, "grad_norm": 0.7061643600463867, "learning_rate": 1.92780063559659e-05, "loss": 2.283, "step": 4097 }, { "epoch": 0.14, "grad_norm": 0.6846803426742554, "learning_rate": 1.927760976331542e-05, "loss": 2.2315, "step": 4098 }, { "epoch": 0.14, "grad_norm": 0.6590558290481567, "learning_rate": 1.9277213065851937e-05, "loss": 2.1848, "step": 4099 }, { "epoch": 0.14, "grad_norm": 0.71205073595047, "learning_rate": 1.9276816263579938e-05, "loss": 2.2495, "step": 4100 }, { "epoch": 0.14, "grad_norm": 0.6840975284576416, "learning_rate": 1.9276419356503905e-05, "loss": 2.2749, "step": 4101 }, { "epoch": 0.14, "grad_norm": 0.7232087850570679, "learning_rate": 1.9276022344628328e-05, "loss": 2.1806, "step": 4102 }, { "epoch": 0.14, "grad_norm": 0.7220843434333801, "learning_rate": 1.927562522795768e-05, "loss": 2.2749, "step": 4103 }, { "epoch": 0.14, "grad_norm": 0.6793683767318726, "learning_rate": 1.927522800649646e-05, "loss": 2.2893, "step": 4104 }, { "epoch": 0.14, "grad_norm": 0.719146728515625, "learning_rate": 1.9274830680249147e-05, "loss": 2.191, "step": 4105 }, { "epoch": 0.14, "grad_norm": 0.6640661358833313, "learning_rate": 1.927443324922023e-05, "loss": 2.2322, "step": 4106 }, { "epoch": 0.14, "grad_norm": 0.6741136312484741, "learning_rate": 1.9274035713414206e-05, "loss": 2.2247, "step": 4107 }, { "epoch": 0.14, "grad_norm": 0.6907027959823608, "learning_rate": 1.927363807283556e-05, "loss": 2.2309, "step": 4108 }, { "epoch": 0.14, "grad_norm": 0.6727420687675476, "learning_rate": 1.9273240327488785e-05, "loss": 2.2236, "step": 4109 }, { "epoch": 0.14, "grad_norm": 0.6931125521659851, "learning_rate": 1.9272842477378375e-05, "loss": 2.2684, "step": 4110 }, { "epoch": 0.14, "grad_norm": 0.6975182890892029, "learning_rate": 1.9272444522508827e-05, "loss": 2.2534, "step": 4111 }, { "epoch": 0.14, "grad_norm": 0.6834473609924316, "learning_rate": 1.9272046462884634e-05, "loss": 2.297, "step": 4112 }, { "epoch": 0.14, "grad_norm": 0.6884832978248596, "learning_rate": 1.9271648298510292e-05, "loss": 2.2195, "step": 4113 }, { "epoch": 0.14, "grad_norm": 0.7182244658470154, "learning_rate": 1.9271250029390305e-05, "loss": 2.2656, "step": 4114 }, { "epoch": 0.14, "grad_norm": 0.676688551902771, "learning_rate": 1.9270851655529167e-05, "loss": 2.2906, "step": 4115 }, { "epoch": 0.14, "grad_norm": 0.6723467111587524, "learning_rate": 1.9270453176931382e-05, "loss": 2.2452, "step": 4116 }, { "epoch": 0.14, "grad_norm": 0.6708421111106873, "learning_rate": 1.9270054593601446e-05, "loss": 2.245, "step": 4117 }, { "epoch": 0.14, "grad_norm": 0.664937436580658, "learning_rate": 1.926965590554387e-05, "loss": 2.262, "step": 4118 }, { "epoch": 0.14, "grad_norm": 0.7137066721916199, "learning_rate": 1.9269257112763153e-05, "loss": 2.206, "step": 4119 }, { "epoch": 0.14, "grad_norm": 0.7133108377456665, "learning_rate": 1.92688582152638e-05, "loss": 2.2702, "step": 4120 }, { "epoch": 0.14, "grad_norm": 0.7143247127532959, "learning_rate": 1.926845921305032e-05, "loss": 2.2699, "step": 4121 }, { "epoch": 0.14, "grad_norm": 0.6612210273742676, "learning_rate": 1.926806010612722e-05, "loss": 2.2304, "step": 4122 }, { "epoch": 0.14, "grad_norm": 0.6612570285797119, "learning_rate": 1.9267660894499006e-05, "loss": 2.2376, "step": 4123 }, { "epoch": 0.14, "grad_norm": 0.7004397511482239, "learning_rate": 1.9267261578170193e-05, "loss": 2.2443, "step": 4124 }, { "epoch": 0.14, "grad_norm": 0.6992260217666626, "learning_rate": 1.926686215714529e-05, "loss": 2.1572, "step": 4125 }, { "epoch": 0.14, "grad_norm": 0.6960259079933167, "learning_rate": 1.9266462631428807e-05, "loss": 2.2872, "step": 4126 }, { "epoch": 0.14, "grad_norm": 0.769133985042572, "learning_rate": 1.9266063001025263e-05, "loss": 2.2139, "step": 4127 }, { "epoch": 0.14, "grad_norm": 0.7451737523078918, "learning_rate": 1.9265663265939167e-05, "loss": 2.2873, "step": 4128 }, { "epoch": 0.14, "grad_norm": 0.6848505139350891, "learning_rate": 1.926526342617504e-05, "loss": 2.2294, "step": 4129 }, { "epoch": 0.14, "grad_norm": 0.6998298764228821, "learning_rate": 1.9264863481737396e-05, "loss": 2.1783, "step": 4130 }, { "epoch": 0.14, "grad_norm": 0.7073127627372742, "learning_rate": 1.9264463432630752e-05, "loss": 2.3522, "step": 4131 }, { "epoch": 0.14, "grad_norm": 0.6959075927734375, "learning_rate": 1.9264063278859634e-05, "loss": 2.1921, "step": 4132 }, { "epoch": 0.14, "grad_norm": 0.7207573056221008, "learning_rate": 1.9263663020428556e-05, "loss": 2.2664, "step": 4133 }, { "epoch": 0.14, "grad_norm": 0.6904211640357971, "learning_rate": 1.926326265734204e-05, "loss": 2.2408, "step": 4134 }, { "epoch": 0.14, "grad_norm": 0.6853381991386414, "learning_rate": 1.9262862189604616e-05, "loss": 2.2246, "step": 4135 }, { "epoch": 0.14, "grad_norm": 0.6879850029945374, "learning_rate": 1.92624616172208e-05, "loss": 2.2129, "step": 4136 }, { "epoch": 0.14, "grad_norm": 0.6997902989387512, "learning_rate": 1.9262060940195123e-05, "loss": 2.2817, "step": 4137 }, { "epoch": 0.14, "grad_norm": 0.6840111613273621, "learning_rate": 1.926166015853211e-05, "loss": 2.2462, "step": 4138 }, { "epoch": 0.14, "grad_norm": 0.6782870292663574, "learning_rate": 1.9261259272236287e-05, "loss": 2.254, "step": 4139 }, { "epoch": 0.14, "grad_norm": 0.6812632083892822, "learning_rate": 1.926085828131218e-05, "loss": 2.3096, "step": 4140 }, { "epoch": 0.14, "grad_norm": 0.6874100565910339, "learning_rate": 1.926045718576433e-05, "loss": 2.2337, "step": 4141 }, { "epoch": 0.14, "grad_norm": 0.6933589577674866, "learning_rate": 1.926005598559726e-05, "loss": 2.2611, "step": 4142 }, { "epoch": 0.14, "grad_norm": 0.660602331161499, "learning_rate": 1.9259654680815503e-05, "loss": 2.223, "step": 4143 }, { "epoch": 0.14, "grad_norm": 0.6787921190261841, "learning_rate": 1.9259253271423595e-05, "loss": 2.2034, "step": 4144 }, { "epoch": 0.14, "grad_norm": 0.6807339191436768, "learning_rate": 1.925885175742607e-05, "loss": 2.2636, "step": 4145 }, { "epoch": 0.14, "grad_norm": 0.7023966908454895, "learning_rate": 1.9258450138827465e-05, "loss": 2.2757, "step": 4146 }, { "epoch": 0.14, "grad_norm": 0.6914066672325134, "learning_rate": 1.9258048415632317e-05, "loss": 2.2947, "step": 4147 }, { "epoch": 0.14, "grad_norm": 0.715107798576355, "learning_rate": 1.925764658784516e-05, "loss": 2.2492, "step": 4148 }, { "epoch": 0.14, "grad_norm": 0.6742933988571167, "learning_rate": 1.925724465547054e-05, "loss": 2.2126, "step": 4149 }, { "epoch": 0.14, "grad_norm": 0.6703738570213318, "learning_rate": 1.9256842618512996e-05, "loss": 2.2814, "step": 4150 }, { "epoch": 0.14, "grad_norm": 0.6775171160697937, "learning_rate": 1.9256440476977067e-05, "loss": 2.2763, "step": 4151 }, { "epoch": 0.14, "grad_norm": 0.6648553013801575, "learning_rate": 1.92560382308673e-05, "loss": 2.2145, "step": 4152 }, { "epoch": 0.14, "grad_norm": 0.6665297150611877, "learning_rate": 1.9255635880188234e-05, "loss": 2.1968, "step": 4153 }, { "epoch": 0.14, "grad_norm": 0.7342318296432495, "learning_rate": 1.9255233424944424e-05, "loss": 2.3158, "step": 4154 }, { "epoch": 0.14, "grad_norm": 0.7199332118034363, "learning_rate": 1.9254830865140407e-05, "loss": 2.2776, "step": 4155 }, { "epoch": 0.14, "grad_norm": 0.709397554397583, "learning_rate": 1.9254428200780734e-05, "loss": 2.2482, "step": 4156 }, { "epoch": 0.14, "grad_norm": 0.6641029715538025, "learning_rate": 1.9254025431869957e-05, "loss": 2.2156, "step": 4157 }, { "epoch": 0.14, "grad_norm": 0.6642153263092041, "learning_rate": 1.9253622558412625e-05, "loss": 2.2505, "step": 4158 }, { "epoch": 0.14, "grad_norm": 0.7343358993530273, "learning_rate": 1.9253219580413287e-05, "loss": 2.2497, "step": 4159 }, { "epoch": 0.14, "grad_norm": 0.6918613314628601, "learning_rate": 1.9252816497876497e-05, "loss": 2.251, "step": 4160 }, { "epoch": 0.14, "grad_norm": 0.6823561191558838, "learning_rate": 1.925241331080681e-05, "loss": 2.2629, "step": 4161 }, { "epoch": 0.14, "grad_norm": 0.7178806066513062, "learning_rate": 1.9252010019208778e-05, "loss": 2.2447, "step": 4162 }, { "epoch": 0.14, "grad_norm": 0.67769855260849, "learning_rate": 1.925160662308696e-05, "loss": 2.2854, "step": 4163 }, { "epoch": 0.14, "grad_norm": 0.6748905181884766, "learning_rate": 1.9251203122445915e-05, "loss": 2.2027, "step": 4164 }, { "epoch": 0.14, "grad_norm": 0.6750346422195435, "learning_rate": 1.9250799517290196e-05, "loss": 2.2551, "step": 4165 }, { "epoch": 0.14, "grad_norm": 0.7078080773353577, "learning_rate": 1.9250395807624364e-05, "loss": 2.2747, "step": 4166 }, { "epoch": 0.14, "grad_norm": 0.6961045265197754, "learning_rate": 1.9249991993452983e-05, "loss": 2.2219, "step": 4167 }, { "epoch": 0.14, "grad_norm": 0.677168607711792, "learning_rate": 1.9249588074780612e-05, "loss": 2.2163, "step": 4168 }, { "epoch": 0.14, "grad_norm": 0.7004295587539673, "learning_rate": 1.924918405161182e-05, "loss": 2.2627, "step": 4169 }, { "epoch": 0.14, "grad_norm": 0.6706784963607788, "learning_rate": 1.9248779923951162e-05, "loss": 2.207, "step": 4170 }, { "epoch": 0.14, "grad_norm": 0.745366096496582, "learning_rate": 1.924837569180321e-05, "loss": 2.2778, "step": 4171 }, { "epoch": 0.14, "grad_norm": 0.7028800249099731, "learning_rate": 1.9247971355172533e-05, "loss": 2.2395, "step": 4172 }, { "epoch": 0.14, "grad_norm": 0.6924868226051331, "learning_rate": 1.9247566914063695e-05, "loss": 2.1954, "step": 4173 }, { "epoch": 0.14, "grad_norm": 0.6988398432731628, "learning_rate": 1.9247162368481264e-05, "loss": 2.2969, "step": 4174 }, { "epoch": 0.14, "grad_norm": 0.6819460391998291, "learning_rate": 1.9246757718429808e-05, "loss": 2.214, "step": 4175 }, { "epoch": 0.14, "grad_norm": 0.6714036464691162, "learning_rate": 1.9246352963913907e-05, "loss": 2.2242, "step": 4176 }, { "epoch": 0.14, "grad_norm": 0.699151337146759, "learning_rate": 1.924594810493813e-05, "loss": 2.2486, "step": 4177 }, { "epoch": 0.14, "grad_norm": 0.6854993104934692, "learning_rate": 1.9245543141507047e-05, "loss": 2.2521, "step": 4178 }, { "epoch": 0.14, "grad_norm": 0.743733823299408, "learning_rate": 1.924513807362524e-05, "loss": 2.314, "step": 4179 }, { "epoch": 0.14, "grad_norm": 0.7106902003288269, "learning_rate": 1.9244732901297276e-05, "loss": 2.295, "step": 4180 }, { "epoch": 0.14, "grad_norm": 0.6810404658317566, "learning_rate": 1.9244327624527738e-05, "loss": 2.2485, "step": 4181 }, { "epoch": 0.14, "grad_norm": 0.7209069132804871, "learning_rate": 1.9243922243321206e-05, "loss": 2.2621, "step": 4182 }, { "epoch": 0.14, "grad_norm": 0.6818249225616455, "learning_rate": 1.924351675768226e-05, "loss": 2.2309, "step": 4183 }, { "epoch": 0.14, "grad_norm": 0.7056018710136414, "learning_rate": 1.924311116761548e-05, "loss": 2.2297, "step": 4184 }, { "epoch": 0.14, "grad_norm": 0.6919586062431335, "learning_rate": 1.9242705473125443e-05, "loss": 2.3078, "step": 4185 }, { "epoch": 0.14, "grad_norm": 0.6827554702758789, "learning_rate": 1.9242299674216736e-05, "loss": 2.2194, "step": 4186 }, { "epoch": 0.14, "grad_norm": 0.6794097423553467, "learning_rate": 1.9241893770893945e-05, "loss": 2.2373, "step": 4187 }, { "epoch": 0.14, "grad_norm": 0.6981142163276672, "learning_rate": 1.9241487763161655e-05, "loss": 2.3126, "step": 4188 }, { "epoch": 0.14, "grad_norm": 0.6815127730369568, "learning_rate": 1.924108165102445e-05, "loss": 2.1562, "step": 4189 }, { "epoch": 0.14, "grad_norm": 0.7028631567955017, "learning_rate": 1.9240675434486924e-05, "loss": 2.2575, "step": 4190 }, { "epoch": 0.14, "grad_norm": 0.6733468770980835, "learning_rate": 1.9240269113553662e-05, "loss": 2.2176, "step": 4191 }, { "epoch": 0.14, "grad_norm": 0.6907353401184082, "learning_rate": 1.9239862688229253e-05, "loss": 2.2068, "step": 4192 }, { "epoch": 0.14, "grad_norm": 0.7049823999404907, "learning_rate": 1.923945615851829e-05, "loss": 2.2611, "step": 4193 }, { "epoch": 0.14, "grad_norm": 0.6889074444770813, "learning_rate": 1.923904952442537e-05, "loss": 2.243, "step": 4194 }, { "epoch": 0.14, "grad_norm": 0.6630429625511169, "learning_rate": 1.923864278595508e-05, "loss": 2.2565, "step": 4195 }, { "epoch": 0.14, "grad_norm": 0.7224636673927307, "learning_rate": 1.9238235943112017e-05, "loss": 2.2782, "step": 4196 }, { "epoch": 0.14, "grad_norm": 0.6806182265281677, "learning_rate": 1.9237828995900784e-05, "loss": 2.264, "step": 4197 }, { "epoch": 0.14, "grad_norm": 0.6724852323532104, "learning_rate": 1.9237421944325968e-05, "loss": 2.2331, "step": 4198 }, { "epoch": 0.14, "grad_norm": 0.6790256500244141, "learning_rate": 1.9237014788392173e-05, "loss": 2.2826, "step": 4199 }, { "epoch": 0.14, "grad_norm": 0.66058349609375, "learning_rate": 1.9236607528104e-05, "loss": 2.2221, "step": 4200 }, { "epoch": 0.14, "grad_norm": 0.6907892823219299, "learning_rate": 1.9236200163466046e-05, "loss": 2.2802, "step": 4201 }, { "epoch": 0.14, "grad_norm": 0.7072100043296814, "learning_rate": 1.9235792694482914e-05, "loss": 2.2423, "step": 4202 }, { "epoch": 0.14, "grad_norm": 0.6915808320045471, "learning_rate": 1.9235385121159214e-05, "loss": 2.2589, "step": 4203 }, { "epoch": 0.14, "grad_norm": 0.6821367740631104, "learning_rate": 1.923497744349954e-05, "loss": 2.2619, "step": 4204 }, { "epoch": 0.14, "grad_norm": 0.7339404821395874, "learning_rate": 1.923456966150851e-05, "loss": 2.2702, "step": 4205 }, { "epoch": 0.14, "grad_norm": 0.6708696484565735, "learning_rate": 1.923416177519072e-05, "loss": 2.2652, "step": 4206 }, { "epoch": 0.14, "grad_norm": 0.6846692562103271, "learning_rate": 1.9233753784550783e-05, "loss": 2.227, "step": 4207 }, { "epoch": 0.14, "grad_norm": 0.7005242705345154, "learning_rate": 1.9233345689593304e-05, "loss": 2.2362, "step": 4208 }, { "epoch": 0.14, "grad_norm": 0.6648873090744019, "learning_rate": 1.92329374903229e-05, "loss": 2.1961, "step": 4209 }, { "epoch": 0.14, "grad_norm": 0.7171528339385986, "learning_rate": 1.9232529186744177e-05, "loss": 2.2767, "step": 4210 }, { "epoch": 0.14, "grad_norm": 0.7055249214172363, "learning_rate": 1.923212077886175e-05, "loss": 2.3218, "step": 4211 }, { "epoch": 0.14, "grad_norm": 0.699110746383667, "learning_rate": 1.9231712266680236e-05, "loss": 2.2461, "step": 4212 }, { "epoch": 0.14, "grad_norm": 0.676840603351593, "learning_rate": 1.9231303650204244e-05, "loss": 2.1944, "step": 4213 }, { "epoch": 0.14, "grad_norm": 0.6827298998832703, "learning_rate": 1.9230894929438393e-05, "loss": 2.3278, "step": 4214 }, { "epoch": 0.14, "grad_norm": 0.7176477909088135, "learning_rate": 1.9230486104387304e-05, "loss": 2.2611, "step": 4215 }, { "epoch": 0.14, "grad_norm": 0.7108551263809204, "learning_rate": 1.9230077175055592e-05, "loss": 2.2883, "step": 4216 }, { "epoch": 0.14, "grad_norm": 0.7139759659767151, "learning_rate": 1.9229668141447877e-05, "loss": 2.2279, "step": 4217 }, { "epoch": 0.14, "grad_norm": 0.7175858020782471, "learning_rate": 1.922925900356878e-05, "loss": 2.2631, "step": 4218 }, { "epoch": 0.14, "grad_norm": 0.7114773392677307, "learning_rate": 1.9228849761422923e-05, "loss": 2.2262, "step": 4219 }, { "epoch": 0.14, "grad_norm": 0.708707332611084, "learning_rate": 1.922844041501493e-05, "loss": 2.2704, "step": 4220 }, { "epoch": 0.14, "grad_norm": 0.7688212394714355, "learning_rate": 1.9228030964349428e-05, "loss": 2.2478, "step": 4221 }, { "epoch": 0.14, "grad_norm": 0.6771520972251892, "learning_rate": 1.922762140943104e-05, "loss": 2.1879, "step": 4222 }, { "epoch": 0.14, "grad_norm": 0.7011240124702454, "learning_rate": 1.922721175026439e-05, "loss": 2.2365, "step": 4223 }, { "epoch": 0.14, "grad_norm": 0.6894611716270447, "learning_rate": 1.9226801986854112e-05, "loss": 2.3041, "step": 4224 }, { "epoch": 0.14, "grad_norm": 0.7119976282119751, "learning_rate": 1.9226392119204835e-05, "loss": 2.2309, "step": 4225 }, { "epoch": 0.14, "grad_norm": 0.7273371815681458, "learning_rate": 1.922598214732118e-05, "loss": 2.2803, "step": 4226 }, { "epoch": 0.14, "grad_norm": 0.6832113265991211, "learning_rate": 1.922557207120779e-05, "loss": 2.2929, "step": 4227 }, { "epoch": 0.14, "grad_norm": 0.6855977773666382, "learning_rate": 1.922516189086929e-05, "loss": 2.2334, "step": 4228 }, { "epoch": 0.14, "grad_norm": 0.7750351428985596, "learning_rate": 1.9224751606310323e-05, "loss": 2.2476, "step": 4229 }, { "epoch": 0.14, "grad_norm": 0.6686546206474304, "learning_rate": 1.9224341217535517e-05, "loss": 2.2335, "step": 4230 }, { "epoch": 0.14, "grad_norm": 0.6697055101394653, "learning_rate": 1.922393072454951e-05, "loss": 2.1985, "step": 4231 }, { "epoch": 0.14, "grad_norm": 0.6736751198768616, "learning_rate": 1.9223520127356938e-05, "loss": 2.2272, "step": 4232 }, { "epoch": 0.14, "grad_norm": 0.6789987683296204, "learning_rate": 1.922310942596244e-05, "loss": 2.233, "step": 4233 }, { "epoch": 0.14, "grad_norm": 0.7310657501220703, "learning_rate": 1.922269862037066e-05, "loss": 2.273, "step": 4234 }, { "epoch": 0.14, "grad_norm": 0.7140824198722839, "learning_rate": 1.9222287710586234e-05, "loss": 2.2605, "step": 4235 }, { "epoch": 0.14, "grad_norm": 0.6683670878410339, "learning_rate": 1.9221876696613808e-05, "loss": 2.2078, "step": 4236 }, { "epoch": 0.14, "grad_norm": 0.7233315110206604, "learning_rate": 1.922146557845802e-05, "loss": 2.269, "step": 4237 }, { "epoch": 0.14, "grad_norm": 0.6739272475242615, "learning_rate": 1.9221054356123522e-05, "loss": 2.1626, "step": 4238 }, { "epoch": 0.14, "grad_norm": 0.6815820336341858, "learning_rate": 1.9220643029614953e-05, "loss": 2.3263, "step": 4239 }, { "epoch": 0.14, "grad_norm": 0.6762769818305969, "learning_rate": 1.9220231598936964e-05, "loss": 2.2301, "step": 4240 }, { "epoch": 0.14, "grad_norm": 0.6702861785888672, "learning_rate": 1.9219820064094207e-05, "loss": 2.2011, "step": 4241 }, { "epoch": 0.14, "grad_norm": 0.6802589297294617, "learning_rate": 1.921940842509132e-05, "loss": 2.2924, "step": 4242 }, { "epoch": 0.14, "grad_norm": 0.6847351789474487, "learning_rate": 1.921899668193296e-05, "loss": 2.1822, "step": 4243 }, { "epoch": 0.14, "grad_norm": 0.6704094409942627, "learning_rate": 1.921858483462378e-05, "loss": 2.2552, "step": 4244 }, { "epoch": 0.14, "grad_norm": 0.6904206275939941, "learning_rate": 1.9218172883168432e-05, "loss": 2.2928, "step": 4245 }, { "epoch": 0.14, "grad_norm": 0.7047081589698792, "learning_rate": 1.9217760827571567e-05, "loss": 2.227, "step": 4246 }, { "epoch": 0.14, "grad_norm": 0.6851109862327576, "learning_rate": 1.9217348667837843e-05, "loss": 2.2906, "step": 4247 }, { "epoch": 0.14, "grad_norm": 0.671583354473114, "learning_rate": 1.9216936403971918e-05, "loss": 2.1968, "step": 4248 }, { "epoch": 0.14, "grad_norm": 0.6658949851989746, "learning_rate": 1.9216524035978443e-05, "loss": 2.2784, "step": 4249 }, { "epoch": 0.14, "grad_norm": 0.6806211471557617, "learning_rate": 1.921611156386208e-05, "loss": 2.1715, "step": 4250 }, { "epoch": 0.14, "grad_norm": 0.674152135848999, "learning_rate": 1.9215698987627495e-05, "loss": 2.2108, "step": 4251 }, { "epoch": 0.14, "grad_norm": 0.728184700012207, "learning_rate": 1.9215286307279342e-05, "loss": 2.2149, "step": 4252 }, { "epoch": 0.14, "grad_norm": 0.6897283792495728, "learning_rate": 1.9214873522822285e-05, "loss": 2.2659, "step": 4253 }, { "epoch": 0.14, "grad_norm": 0.6815371513366699, "learning_rate": 1.9214460634260986e-05, "loss": 2.2255, "step": 4254 }, { "epoch": 0.14, "grad_norm": 0.6435858607292175, "learning_rate": 1.9214047641600113e-05, "loss": 2.2543, "step": 4255 }, { "epoch": 0.14, "grad_norm": 0.6967017650604248, "learning_rate": 1.921363454484433e-05, "loss": 2.219, "step": 4256 }, { "epoch": 0.14, "grad_norm": 0.6708928942680359, "learning_rate": 1.92132213439983e-05, "loss": 2.2233, "step": 4257 }, { "epoch": 0.14, "grad_norm": 0.7591630816459656, "learning_rate": 1.9212808039066696e-05, "loss": 2.2053, "step": 4258 }, { "epoch": 0.14, "grad_norm": 0.7051583528518677, "learning_rate": 1.921239463005419e-05, "loss": 2.1823, "step": 4259 }, { "epoch": 0.14, "grad_norm": 0.6815659403800964, "learning_rate": 1.9211981116965444e-05, "loss": 2.2357, "step": 4260 }, { "epoch": 0.14, "grad_norm": 0.699821949005127, "learning_rate": 1.9211567499805134e-05, "loss": 2.318, "step": 4261 }, { "epoch": 0.14, "grad_norm": 0.7074738144874573, "learning_rate": 1.9211153778577935e-05, "loss": 2.2506, "step": 4262 }, { "epoch": 0.14, "grad_norm": 0.6815426349639893, "learning_rate": 1.9210739953288516e-05, "loss": 2.2001, "step": 4263 }, { "epoch": 0.14, "grad_norm": 0.740431547164917, "learning_rate": 1.9210326023941558e-05, "loss": 2.2312, "step": 4264 }, { "epoch": 0.14, "grad_norm": 0.6804113984107971, "learning_rate": 1.9209911990541735e-05, "loss": 2.1852, "step": 4265 }, { "epoch": 0.14, "grad_norm": 0.6760491132736206, "learning_rate": 1.9209497853093724e-05, "loss": 2.2152, "step": 4266 }, { "epoch": 0.14, "grad_norm": 0.7127466201782227, "learning_rate": 1.9209083611602202e-05, "loss": 2.2115, "step": 4267 }, { "epoch": 0.14, "grad_norm": 0.6811696887016296, "learning_rate": 1.9208669266071853e-05, "loss": 2.203, "step": 4268 }, { "epoch": 0.14, "grad_norm": 0.6816546320915222, "learning_rate": 1.920825481650735e-05, "loss": 2.1733, "step": 4269 }, { "epoch": 0.14, "grad_norm": 0.6877427697181702, "learning_rate": 1.9207840262913384e-05, "loss": 2.2356, "step": 4270 }, { "epoch": 0.14, "grad_norm": 0.6752818822860718, "learning_rate": 1.9207425605294633e-05, "loss": 2.289, "step": 4271 }, { "epoch": 0.14, "grad_norm": 0.6741052269935608, "learning_rate": 1.9207010843655788e-05, "loss": 2.2376, "step": 4272 }, { "epoch": 0.14, "grad_norm": 0.6986745595932007, "learning_rate": 1.9206595978001527e-05, "loss": 2.2519, "step": 4273 }, { "epoch": 0.14, "grad_norm": 0.6718072891235352, "learning_rate": 1.920618100833654e-05, "loss": 2.2707, "step": 4274 }, { "epoch": 0.14, "grad_norm": 0.6851564049720764, "learning_rate": 1.920576593466552e-05, "loss": 2.1898, "step": 4275 }, { "epoch": 0.14, "grad_norm": 0.6624534130096436, "learning_rate": 1.9205350756993146e-05, "loss": 2.2259, "step": 4276 }, { "epoch": 0.14, "grad_norm": 0.6701390147209167, "learning_rate": 1.9204935475324114e-05, "loss": 2.1864, "step": 4277 }, { "epoch": 0.14, "grad_norm": 0.7018690705299377, "learning_rate": 1.9204520089663117e-05, "loss": 2.232, "step": 4278 }, { "epoch": 0.14, "grad_norm": 0.7081454396247864, "learning_rate": 1.9204104600014845e-05, "loss": 2.2151, "step": 4279 }, { "epoch": 0.14, "grad_norm": 0.6908605694770813, "learning_rate": 1.9203689006383996e-05, "loss": 2.2727, "step": 4280 }, { "epoch": 0.14, "grad_norm": 0.7174037098884583, "learning_rate": 1.9203273308775262e-05, "loss": 2.2352, "step": 4281 }, { "epoch": 0.14, "grad_norm": 0.6773051023483276, "learning_rate": 1.920285750719334e-05, "loss": 2.2262, "step": 4282 }, { "epoch": 0.14, "grad_norm": 0.7071585059165955, "learning_rate": 1.9202441601642925e-05, "loss": 2.2373, "step": 4283 }, { "epoch": 0.14, "grad_norm": 0.7034968137741089, "learning_rate": 1.9202025592128717e-05, "loss": 2.2253, "step": 4284 }, { "epoch": 0.14, "grad_norm": 0.6774274110794067, "learning_rate": 1.9201609478655422e-05, "loss": 2.2604, "step": 4285 }, { "epoch": 0.14, "grad_norm": 0.7059097290039062, "learning_rate": 1.9201193261227735e-05, "loss": 2.2413, "step": 4286 }, { "epoch": 0.14, "grad_norm": 0.7126817107200623, "learning_rate": 1.9200776939850355e-05, "loss": 2.2493, "step": 4287 }, { "epoch": 0.14, "grad_norm": 0.7232714891433716, "learning_rate": 1.9200360514527993e-05, "loss": 2.2543, "step": 4288 }, { "epoch": 0.14, "grad_norm": 0.689019501209259, "learning_rate": 1.919994398526535e-05, "loss": 2.2236, "step": 4289 }, { "epoch": 0.14, "grad_norm": 0.6858633756637573, "learning_rate": 1.919952735206713e-05, "loss": 2.2171, "step": 4290 }, { "epoch": 0.14, "grad_norm": 0.6815646290779114, "learning_rate": 1.919911061493804e-05, "loss": 2.2148, "step": 4291 }, { "epoch": 0.14, "grad_norm": 0.6734101176261902, "learning_rate": 1.919869377388279e-05, "loss": 2.2289, "step": 4292 }, { "epoch": 0.14, "grad_norm": 0.6812942624092102, "learning_rate": 1.919827682890609e-05, "loss": 2.2405, "step": 4293 }, { "epoch": 0.14, "grad_norm": 0.686515212059021, "learning_rate": 1.9197859780012647e-05, "loss": 2.1884, "step": 4294 }, { "epoch": 0.14, "grad_norm": 0.6849203109741211, "learning_rate": 1.9197442627207177e-05, "loss": 2.2359, "step": 4295 }, { "epoch": 0.14, "grad_norm": 0.7028157114982605, "learning_rate": 1.9197025370494387e-05, "loss": 2.2941, "step": 4296 }, { "epoch": 0.14, "grad_norm": 0.686337411403656, "learning_rate": 1.9196608009879e-05, "loss": 2.2146, "step": 4297 }, { "epoch": 0.14, "grad_norm": 0.6850748658180237, "learning_rate": 1.9196190545365724e-05, "loss": 2.2756, "step": 4298 }, { "epoch": 0.14, "grad_norm": 0.6942947506904602, "learning_rate": 1.9195772976959274e-05, "loss": 2.242, "step": 4299 }, { "epoch": 0.14, "grad_norm": 0.6923968195915222, "learning_rate": 1.919535530466437e-05, "loss": 2.2302, "step": 4300 }, { "epoch": 0.14, "grad_norm": 0.7147257328033447, "learning_rate": 1.9194937528485732e-05, "loss": 2.3098, "step": 4301 }, { "epoch": 0.14, "grad_norm": 0.6557518839836121, "learning_rate": 1.919451964842808e-05, "loss": 2.1527, "step": 4302 }, { "epoch": 0.14, "grad_norm": 0.6924035549163818, "learning_rate": 1.9194101664496133e-05, "loss": 2.2917, "step": 4303 }, { "epoch": 0.14, "grad_norm": 0.716222882270813, "learning_rate": 1.9193683576694612e-05, "loss": 2.2452, "step": 4304 }, { "epoch": 0.14, "grad_norm": 0.6650006175041199, "learning_rate": 1.919326538502824e-05, "loss": 2.2292, "step": 4305 }, { "epoch": 0.14, "grad_norm": 0.675167977809906, "learning_rate": 1.919284708950175e-05, "loss": 2.2074, "step": 4306 }, { "epoch": 0.14, "grad_norm": 0.733742356300354, "learning_rate": 1.9192428690119856e-05, "loss": 2.2724, "step": 4307 }, { "epoch": 0.14, "grad_norm": 0.7422173023223877, "learning_rate": 1.9192010186887292e-05, "loss": 2.2882, "step": 4308 }, { "epoch": 0.14, "grad_norm": 0.8102651834487915, "learning_rate": 1.9191591579808784e-05, "loss": 2.241, "step": 4309 }, { "epoch": 0.14, "grad_norm": 0.7037965655326843, "learning_rate": 1.919117286888906e-05, "loss": 2.2687, "step": 4310 }, { "epoch": 0.14, "grad_norm": 0.7065612077713013, "learning_rate": 1.9190754054132853e-05, "loss": 2.2605, "step": 4311 }, { "epoch": 0.14, "grad_norm": 0.6892729997634888, "learning_rate": 1.919033513554489e-05, "loss": 2.1547, "step": 4312 }, { "epoch": 0.14, "grad_norm": 0.6618356108665466, "learning_rate": 1.9189916113129908e-05, "loss": 2.2217, "step": 4313 }, { "epoch": 0.14, "grad_norm": 0.6700673699378967, "learning_rate": 1.918949698689264e-05, "loss": 2.2688, "step": 4314 }, { "epoch": 0.14, "grad_norm": 0.6773180961608887, "learning_rate": 1.9189077756837822e-05, "loss": 2.2506, "step": 4315 }, { "epoch": 0.14, "grad_norm": 0.6946173906326294, "learning_rate": 1.9188658422970188e-05, "loss": 2.2832, "step": 4316 }, { "epoch": 0.14, "grad_norm": 0.6928938627243042, "learning_rate": 1.9188238985294475e-05, "loss": 2.2512, "step": 4317 }, { "epoch": 0.14, "grad_norm": 0.70658940076828, "learning_rate": 1.9187819443815427e-05, "loss": 2.2258, "step": 4318 }, { "epoch": 0.14, "grad_norm": 0.7347752451896667, "learning_rate": 1.9187399798537773e-05, "loss": 2.1972, "step": 4319 }, { "epoch": 0.14, "grad_norm": 0.6671786308288574, "learning_rate": 1.9186980049466263e-05, "loss": 2.3089, "step": 4320 }, { "epoch": 0.14, "grad_norm": 0.6883679032325745, "learning_rate": 1.9186560196605638e-05, "loss": 2.2869, "step": 4321 }, { "epoch": 0.14, "grad_norm": 0.7399175763130188, "learning_rate": 1.9186140239960638e-05, "loss": 2.2112, "step": 4322 }, { "epoch": 0.14, "grad_norm": 0.7386508584022522, "learning_rate": 1.9185720179536012e-05, "loss": 2.2078, "step": 4323 }, { "epoch": 0.14, "grad_norm": 0.7095656394958496, "learning_rate": 1.9185300015336498e-05, "loss": 2.2287, "step": 4324 }, { "epoch": 0.14, "grad_norm": 0.7059575319290161, "learning_rate": 1.9184879747366852e-05, "loss": 2.2832, "step": 4325 }, { "epoch": 0.14, "grad_norm": 0.6887425184249878, "learning_rate": 1.9184459375631818e-05, "loss": 2.2058, "step": 4326 }, { "epoch": 0.14, "grad_norm": 0.6827843189239502, "learning_rate": 1.918403890013614e-05, "loss": 2.2277, "step": 4327 }, { "epoch": 0.14, "grad_norm": 0.7170089483261108, "learning_rate": 1.9183618320884578e-05, "loss": 2.2751, "step": 4328 }, { "epoch": 0.14, "grad_norm": 0.7188481092453003, "learning_rate": 1.9183197637881872e-05, "loss": 2.2946, "step": 4329 }, { "epoch": 0.14, "grad_norm": 0.7036263942718506, "learning_rate": 1.9182776851132786e-05, "loss": 2.2952, "step": 4330 }, { "epoch": 0.14, "grad_norm": 0.6741549372673035, "learning_rate": 1.9182355960642066e-05, "loss": 2.3494, "step": 4331 }, { "epoch": 0.14, "grad_norm": 0.6946344971656799, "learning_rate": 1.9181934966414472e-05, "loss": 2.2812, "step": 4332 }, { "epoch": 0.14, "grad_norm": 0.7252484560012817, "learning_rate": 1.9181513868454758e-05, "loss": 2.2825, "step": 4333 }, { "epoch": 0.14, "grad_norm": 0.6909781098365784, "learning_rate": 1.9181092666767683e-05, "loss": 2.2108, "step": 4334 }, { "epoch": 0.14, "grad_norm": 0.6733206510543823, "learning_rate": 1.9180671361358e-05, "loss": 2.2751, "step": 4335 }, { "epoch": 0.14, "grad_norm": 0.6813592314720154, "learning_rate": 1.9180249952230472e-05, "loss": 2.2511, "step": 4336 }, { "epoch": 0.14, "grad_norm": 0.7085376381874084, "learning_rate": 1.9179828439389863e-05, "loss": 2.2474, "step": 4337 }, { "epoch": 0.14, "grad_norm": 0.6810333728790283, "learning_rate": 1.917940682284093e-05, "loss": 2.2976, "step": 4338 }, { "epoch": 0.14, "grad_norm": 0.7013625502586365, "learning_rate": 1.917898510258844e-05, "loss": 2.239, "step": 4339 }, { "epoch": 0.14, "grad_norm": 0.6949984431266785, "learning_rate": 1.9178563278637155e-05, "loss": 2.2295, "step": 4340 }, { "epoch": 0.14, "grad_norm": 0.6863375306129456, "learning_rate": 1.917814135099184e-05, "loss": 2.2274, "step": 4341 }, { "epoch": 0.14, "grad_norm": 0.7012094855308533, "learning_rate": 1.917771931965726e-05, "loss": 2.2489, "step": 4342 }, { "epoch": 0.14, "grad_norm": 0.6757533550262451, "learning_rate": 1.9177297184638192e-05, "loss": 2.2683, "step": 4343 }, { "epoch": 0.14, "grad_norm": 0.677887499332428, "learning_rate": 1.9176874945939397e-05, "loss": 2.2398, "step": 4344 }, { "epoch": 0.14, "grad_norm": 0.7272711992263794, "learning_rate": 1.9176452603565646e-05, "loss": 2.2495, "step": 4345 }, { "epoch": 0.14, "grad_norm": 0.675494372844696, "learning_rate": 1.9176030157521714e-05, "loss": 2.2869, "step": 4346 }, { "epoch": 0.14, "grad_norm": 0.6765179634094238, "learning_rate": 1.9175607607812366e-05, "loss": 2.2472, "step": 4347 }, { "epoch": 0.14, "grad_norm": 0.6921008825302124, "learning_rate": 1.9175184954442385e-05, "loss": 2.2219, "step": 4348 }, { "epoch": 0.14, "grad_norm": 0.7094331979751587, "learning_rate": 1.917476219741654e-05, "loss": 2.2319, "step": 4349 }, { "epoch": 0.14, "grad_norm": 0.6870075464248657, "learning_rate": 1.917433933673961e-05, "loss": 2.2843, "step": 4350 }, { "epoch": 0.14, "grad_norm": 0.692391574382782, "learning_rate": 1.9173916372416372e-05, "loss": 2.3004, "step": 4351 }, { "epoch": 0.14, "grad_norm": 0.6720705628395081, "learning_rate": 1.91734933044516e-05, "loss": 2.2298, "step": 4352 }, { "epoch": 0.14, "grad_norm": 0.7211714386940002, "learning_rate": 1.9173070132850076e-05, "loss": 2.222, "step": 4353 }, { "epoch": 0.14, "grad_norm": 0.6883718967437744, "learning_rate": 1.9172646857616586e-05, "loss": 2.2426, "step": 4354 }, { "epoch": 0.14, "grad_norm": 0.6840395927429199, "learning_rate": 1.9172223478755906e-05, "loss": 2.2383, "step": 4355 }, { "epoch": 0.14, "grad_norm": 0.6846573948860168, "learning_rate": 1.9171799996272816e-05, "loss": 2.3109, "step": 4356 }, { "epoch": 0.14, "grad_norm": 0.6816583871841431, "learning_rate": 1.917137641017211e-05, "loss": 2.305, "step": 4357 }, { "epoch": 0.14, "grad_norm": 0.7049252986907959, "learning_rate": 1.9170952720458566e-05, "loss": 2.23, "step": 4358 }, { "epoch": 0.15, "grad_norm": 0.6837392449378967, "learning_rate": 1.9170528927136974e-05, "loss": 2.2405, "step": 4359 }, { "epoch": 0.15, "grad_norm": 0.7084900140762329, "learning_rate": 1.9170105030212122e-05, "loss": 2.2255, "step": 4360 }, { "epoch": 0.15, "grad_norm": 0.696904182434082, "learning_rate": 1.9169681029688795e-05, "loss": 2.3168, "step": 4361 }, { "epoch": 0.15, "grad_norm": 0.7154303193092346, "learning_rate": 1.9169256925571783e-05, "loss": 2.2789, "step": 4362 }, { "epoch": 0.15, "grad_norm": 0.710085391998291, "learning_rate": 1.9168832717865887e-05, "loss": 2.3226, "step": 4363 }, { "epoch": 0.15, "grad_norm": 0.6970433592796326, "learning_rate": 1.9168408406575885e-05, "loss": 2.2416, "step": 4364 }, { "epoch": 0.15, "grad_norm": 0.7164191007614136, "learning_rate": 1.9167983991706585e-05, "loss": 2.2341, "step": 4365 }, { "epoch": 0.15, "grad_norm": 0.6675049066543579, "learning_rate": 1.916755947326277e-05, "loss": 2.261, "step": 4366 }, { "epoch": 0.15, "grad_norm": 0.6755507588386536, "learning_rate": 1.9167134851249245e-05, "loss": 2.3228, "step": 4367 }, { "epoch": 0.15, "grad_norm": 0.6516239643096924, "learning_rate": 1.91667101256708e-05, "loss": 2.2168, "step": 4368 }, { "epoch": 0.15, "grad_norm": 0.6708970665931702, "learning_rate": 1.9166285296532237e-05, "loss": 2.2842, "step": 4369 }, { "epoch": 0.15, "grad_norm": 0.6642171740531921, "learning_rate": 1.9165860363838353e-05, "loss": 2.2259, "step": 4370 }, { "epoch": 0.15, "grad_norm": 0.7030302286148071, "learning_rate": 1.9165435327593953e-05, "loss": 2.2444, "step": 4371 }, { "epoch": 0.15, "grad_norm": 0.6991947293281555, "learning_rate": 1.9165010187803834e-05, "loss": 2.2601, "step": 4372 }, { "epoch": 0.15, "grad_norm": 0.7127227187156677, "learning_rate": 1.91645849444728e-05, "loss": 2.2409, "step": 4373 }, { "epoch": 0.15, "grad_norm": 0.7084012031555176, "learning_rate": 1.916415959760566e-05, "loss": 2.2792, "step": 4374 }, { "epoch": 0.15, "grad_norm": 0.6790396571159363, "learning_rate": 1.9163734147207215e-05, "loss": 2.2507, "step": 4375 }, { "epoch": 0.15, "grad_norm": 0.6784519553184509, "learning_rate": 1.9163308593282268e-05, "loss": 2.2235, "step": 4376 }, { "epoch": 0.15, "grad_norm": 0.694807231426239, "learning_rate": 1.9162882935835637e-05, "loss": 2.2071, "step": 4377 }, { "epoch": 0.15, "grad_norm": 0.7259537577629089, "learning_rate": 1.916245717487212e-05, "loss": 2.2639, "step": 4378 }, { "epoch": 0.15, "grad_norm": 0.7170383334159851, "learning_rate": 1.9162031310396533e-05, "loss": 2.2854, "step": 4379 }, { "epoch": 0.15, "grad_norm": 0.6712615489959717, "learning_rate": 1.9161605342413686e-05, "loss": 2.3089, "step": 4380 }, { "epoch": 0.15, "grad_norm": 0.6805903911590576, "learning_rate": 1.916117927092839e-05, "loss": 2.239, "step": 4381 }, { "epoch": 0.15, "grad_norm": 0.6861051321029663, "learning_rate": 1.9160753095945458e-05, "loss": 2.2394, "step": 4382 }, { "epoch": 0.15, "grad_norm": 0.6917824149131775, "learning_rate": 1.916032681746971e-05, "loss": 2.2403, "step": 4383 }, { "epoch": 0.15, "grad_norm": 0.6860147714614868, "learning_rate": 1.9159900435505957e-05, "loss": 2.1552, "step": 4384 }, { "epoch": 0.15, "grad_norm": 0.6734533309936523, "learning_rate": 1.9159473950059016e-05, "loss": 2.2103, "step": 4385 }, { "epoch": 0.15, "grad_norm": 0.6711817979812622, "learning_rate": 1.9159047361133706e-05, "loss": 2.2561, "step": 4386 }, { "epoch": 0.15, "grad_norm": 0.7137919664382935, "learning_rate": 1.9158620668734848e-05, "loss": 2.2224, "step": 4387 }, { "epoch": 0.15, "grad_norm": 0.7050636410713196, "learning_rate": 1.9158193872867262e-05, "loss": 2.2368, "step": 4388 }, { "epoch": 0.15, "grad_norm": 0.7166649699211121, "learning_rate": 1.9157766973535765e-05, "loss": 2.2685, "step": 4389 }, { "epoch": 0.15, "grad_norm": 0.6831756830215454, "learning_rate": 1.915733997074519e-05, "loss": 2.1879, "step": 4390 }, { "epoch": 0.15, "grad_norm": 0.7025697827339172, "learning_rate": 1.9156912864500348e-05, "loss": 2.2422, "step": 4391 }, { "epoch": 0.15, "grad_norm": 0.7169339060783386, "learning_rate": 1.9156485654806075e-05, "loss": 2.2367, "step": 4392 }, { "epoch": 0.15, "grad_norm": 0.6627646088600159, "learning_rate": 1.915605834166719e-05, "loss": 2.2418, "step": 4393 }, { "epoch": 0.15, "grad_norm": 0.6652949452400208, "learning_rate": 1.915563092508853e-05, "loss": 2.1737, "step": 4394 }, { "epoch": 0.15, "grad_norm": 0.6702134013175964, "learning_rate": 1.915520340507491e-05, "loss": 2.2029, "step": 4395 }, { "epoch": 0.15, "grad_norm": 0.6901026368141174, "learning_rate": 1.9154775781631172e-05, "loss": 2.2851, "step": 4396 }, { "epoch": 0.15, "grad_norm": 0.6956287026405334, "learning_rate": 1.9154348054762142e-05, "loss": 2.2261, "step": 4397 }, { "epoch": 0.15, "grad_norm": 0.7035785913467407, "learning_rate": 1.9153920224472653e-05, "loss": 2.1871, "step": 4398 }, { "epoch": 0.15, "grad_norm": 0.662171483039856, "learning_rate": 1.9153492290767536e-05, "loss": 2.2421, "step": 4399 }, { "epoch": 0.15, "grad_norm": 0.7039633393287659, "learning_rate": 1.915306425365163e-05, "loss": 2.2784, "step": 4400 }, { "epoch": 0.15, "grad_norm": 0.694678008556366, "learning_rate": 1.9152636113129767e-05, "loss": 2.1918, "step": 4401 }, { "epoch": 0.15, "grad_norm": 0.7037451863288879, "learning_rate": 1.915220786920678e-05, "loss": 2.256, "step": 4402 }, { "epoch": 0.15, "grad_norm": 0.6861623525619507, "learning_rate": 1.915177952188752e-05, "loss": 2.2929, "step": 4403 }, { "epoch": 0.15, "grad_norm": 0.6903137564659119, "learning_rate": 1.915135107117682e-05, "loss": 2.2495, "step": 4404 }, { "epoch": 0.15, "grad_norm": 0.6748040318489075, "learning_rate": 1.915092251707951e-05, "loss": 2.2264, "step": 4405 }, { "epoch": 0.15, "grad_norm": 0.7340372204780579, "learning_rate": 1.9150493859600446e-05, "loss": 2.3174, "step": 4406 }, { "epoch": 0.15, "grad_norm": 0.6770610213279724, "learning_rate": 1.9150065098744464e-05, "loss": 2.2245, "step": 4407 }, { "epoch": 0.15, "grad_norm": 0.6855124235153198, "learning_rate": 1.914963623451641e-05, "loss": 2.2128, "step": 4408 }, { "epoch": 0.15, "grad_norm": 0.6983487606048584, "learning_rate": 1.9149207266921127e-05, "loss": 2.338, "step": 4409 }, { "epoch": 0.15, "grad_norm": 0.6790350079536438, "learning_rate": 1.9148778195963463e-05, "loss": 2.2326, "step": 4410 }, { "epoch": 0.15, "grad_norm": 0.7224295735359192, "learning_rate": 1.9148349021648266e-05, "loss": 2.2603, "step": 4411 }, { "epoch": 0.15, "grad_norm": 0.7385583519935608, "learning_rate": 1.914791974398038e-05, "loss": 2.2597, "step": 4412 }, { "epoch": 0.15, "grad_norm": 0.6818671226501465, "learning_rate": 1.9147490362964655e-05, "loss": 2.2276, "step": 4413 }, { "epoch": 0.15, "grad_norm": 0.6700279712677002, "learning_rate": 1.9147060878605954e-05, "loss": 2.2412, "step": 4414 }, { "epoch": 0.15, "grad_norm": 0.6668648719787598, "learning_rate": 1.914663129090911e-05, "loss": 2.2163, "step": 4415 }, { "epoch": 0.15, "grad_norm": 0.6888675689697266, "learning_rate": 1.914620159987899e-05, "loss": 2.2743, "step": 4416 }, { "epoch": 0.15, "grad_norm": 0.709429144859314, "learning_rate": 1.914577180552045e-05, "loss": 2.2469, "step": 4417 }, { "epoch": 0.15, "grad_norm": 0.6794667840003967, "learning_rate": 1.9145341907838336e-05, "loss": 2.1758, "step": 4418 }, { "epoch": 0.15, "grad_norm": 0.6865988373756409, "learning_rate": 1.9144911906837506e-05, "loss": 2.1894, "step": 4419 }, { "epoch": 0.15, "grad_norm": 0.7329025864601135, "learning_rate": 1.9144481802522824e-05, "loss": 2.258, "step": 4420 }, { "epoch": 0.15, "grad_norm": 0.6853049993515015, "learning_rate": 1.9144051594899148e-05, "loss": 2.2387, "step": 4421 }, { "epoch": 0.15, "grad_norm": 0.6635901927947998, "learning_rate": 1.914362128397133e-05, "loss": 2.2063, "step": 4422 }, { "epoch": 0.15, "grad_norm": 0.6731832027435303, "learning_rate": 1.914319086974424e-05, "loss": 2.2055, "step": 4423 }, { "epoch": 0.15, "grad_norm": 0.6986168622970581, "learning_rate": 1.9142760352222737e-05, "loss": 2.2159, "step": 4424 }, { "epoch": 0.15, "grad_norm": 0.6876347661018372, "learning_rate": 1.9142329731411687e-05, "loss": 2.2883, "step": 4425 }, { "epoch": 0.15, "grad_norm": 0.6784365773200989, "learning_rate": 1.9141899007315957e-05, "loss": 2.2402, "step": 4426 }, { "epoch": 0.15, "grad_norm": 0.7190592288970947, "learning_rate": 1.9141468179940405e-05, "loss": 2.286, "step": 4427 }, { "epoch": 0.15, "grad_norm": 0.6770738363265991, "learning_rate": 1.9141037249289904e-05, "loss": 2.2378, "step": 4428 }, { "epoch": 0.15, "grad_norm": 0.693450927734375, "learning_rate": 1.9140606215369325e-05, "loss": 2.2621, "step": 4429 }, { "epoch": 0.15, "grad_norm": 0.6916965842247009, "learning_rate": 1.9140175078183528e-05, "loss": 2.2435, "step": 4430 }, { "epoch": 0.15, "grad_norm": 0.7128582000732422, "learning_rate": 1.913974383773739e-05, "loss": 2.2977, "step": 4431 }, { "epoch": 0.15, "grad_norm": 0.694035530090332, "learning_rate": 1.9139312494035786e-05, "loss": 2.2156, "step": 4432 }, { "epoch": 0.15, "grad_norm": 0.711816668510437, "learning_rate": 1.9138881047083585e-05, "loss": 2.2057, "step": 4433 }, { "epoch": 0.15, "grad_norm": 0.6962132453918457, "learning_rate": 1.913844949688566e-05, "loss": 2.2035, "step": 4434 }, { "epoch": 0.15, "grad_norm": 0.6823022365570068, "learning_rate": 1.9138017843446893e-05, "loss": 2.2626, "step": 4435 }, { "epoch": 0.15, "grad_norm": 0.6944918632507324, "learning_rate": 1.9137586086772152e-05, "loss": 2.2931, "step": 4436 }, { "epoch": 0.15, "grad_norm": 0.6743447780609131, "learning_rate": 1.9137154226866317e-05, "loss": 2.2444, "step": 4437 }, { "epoch": 0.15, "grad_norm": 0.6706382036209106, "learning_rate": 1.913672226373427e-05, "loss": 2.2335, "step": 4438 }, { "epoch": 0.15, "grad_norm": 0.675216794013977, "learning_rate": 1.913629019738089e-05, "loss": 2.2347, "step": 4439 }, { "epoch": 0.15, "grad_norm": 0.6797457933425903, "learning_rate": 1.9135858027811056e-05, "loss": 2.1319, "step": 4440 }, { "epoch": 0.15, "grad_norm": 0.7135501503944397, "learning_rate": 1.9135425755029656e-05, "loss": 2.2661, "step": 4441 }, { "epoch": 0.15, "grad_norm": 0.7354063391685486, "learning_rate": 1.9134993379041565e-05, "loss": 2.2869, "step": 4442 }, { "epoch": 0.15, "grad_norm": 0.6962104439735413, "learning_rate": 1.9134560899851674e-05, "loss": 2.2949, "step": 4443 }, { "epoch": 0.15, "grad_norm": 0.677567183971405, "learning_rate": 1.913412831746487e-05, "loss": 2.2623, "step": 4444 }, { "epoch": 0.15, "grad_norm": 0.6790776252746582, "learning_rate": 1.9133695631886036e-05, "loss": 2.2064, "step": 4445 }, { "epoch": 0.15, "grad_norm": 0.6918368339538574, "learning_rate": 1.9133262843120063e-05, "loss": 2.2275, "step": 4446 }, { "epoch": 0.15, "grad_norm": 0.6921852231025696, "learning_rate": 1.9132829951171837e-05, "loss": 2.2545, "step": 4447 }, { "epoch": 0.15, "grad_norm": 0.7122212648391724, "learning_rate": 1.913239695604625e-05, "loss": 2.2442, "step": 4448 }, { "epoch": 0.15, "grad_norm": 0.688454806804657, "learning_rate": 1.9131963857748193e-05, "loss": 2.2662, "step": 4449 }, { "epoch": 0.15, "grad_norm": 0.6824939250946045, "learning_rate": 1.913153065628256e-05, "loss": 2.2404, "step": 4450 }, { "epoch": 0.15, "grad_norm": 0.6846230626106262, "learning_rate": 1.913109735165425e-05, "loss": 2.2551, "step": 4451 }, { "epoch": 0.15, "grad_norm": 0.669251024723053, "learning_rate": 1.913066394386815e-05, "loss": 2.2341, "step": 4452 }, { "epoch": 0.15, "grad_norm": 0.7283945083618164, "learning_rate": 1.9130230432929162e-05, "loss": 2.215, "step": 4453 }, { "epoch": 0.15, "grad_norm": 0.7121866345405579, "learning_rate": 1.912979681884218e-05, "loss": 2.2714, "step": 4454 }, { "epoch": 0.15, "grad_norm": 0.6670955419540405, "learning_rate": 1.9129363101612104e-05, "loss": 2.232, "step": 4455 }, { "epoch": 0.15, "grad_norm": 0.6937485933303833, "learning_rate": 1.9128929281243834e-05, "loss": 2.2648, "step": 4456 }, { "epoch": 0.15, "grad_norm": 0.6939454078674316, "learning_rate": 1.9128495357742273e-05, "loss": 2.2119, "step": 4457 }, { "epoch": 0.15, "grad_norm": 0.7153201699256897, "learning_rate": 1.912806133111232e-05, "loss": 2.2215, "step": 4458 }, { "epoch": 0.15, "grad_norm": 0.7199572324752808, "learning_rate": 1.9127627201358878e-05, "loss": 2.2619, "step": 4459 }, { "epoch": 0.15, "grad_norm": 0.711908221244812, "learning_rate": 1.9127192968486854e-05, "loss": 2.1738, "step": 4460 }, { "epoch": 0.15, "grad_norm": 0.6910556554794312, "learning_rate": 1.9126758632501155e-05, "loss": 2.2213, "step": 4461 }, { "epoch": 0.15, "grad_norm": 0.6971352100372314, "learning_rate": 1.9126324193406684e-05, "loss": 2.2507, "step": 4462 }, { "epoch": 0.15, "grad_norm": 0.6779584288597107, "learning_rate": 1.9125889651208348e-05, "loss": 2.2271, "step": 4463 }, { "epoch": 0.15, "grad_norm": 0.6746313571929932, "learning_rate": 1.9125455005911066e-05, "loss": 2.2429, "step": 4464 }, { "epoch": 0.15, "grad_norm": 0.7001540064811707, "learning_rate": 1.9125020257519736e-05, "loss": 2.2535, "step": 4465 }, { "epoch": 0.15, "grad_norm": 0.7001059651374817, "learning_rate": 1.9124585406039276e-05, "loss": 2.1943, "step": 4466 }, { "epoch": 0.15, "grad_norm": 0.6591464281082153, "learning_rate": 1.9124150451474597e-05, "loss": 2.1906, "step": 4467 }, { "epoch": 0.15, "grad_norm": 0.7008257508277893, "learning_rate": 1.9123715393830614e-05, "loss": 2.3508, "step": 4468 }, { "epoch": 0.15, "grad_norm": 0.7082772850990295, "learning_rate": 1.912328023311224e-05, "loss": 2.2319, "step": 4469 }, { "epoch": 0.15, "grad_norm": 0.7263653874397278, "learning_rate": 1.91228449693244e-05, "loss": 2.2375, "step": 4470 }, { "epoch": 0.15, "grad_norm": 0.7051621079444885, "learning_rate": 1.9122409602471997e-05, "loss": 2.2804, "step": 4471 }, { "epoch": 0.15, "grad_norm": 0.7339425683021545, "learning_rate": 1.9121974132559958e-05, "loss": 2.2079, "step": 4472 }, { "epoch": 0.15, "grad_norm": 0.6864722967147827, "learning_rate": 1.91215385595932e-05, "loss": 2.2082, "step": 4473 }, { "epoch": 0.15, "grad_norm": 0.7400726675987244, "learning_rate": 1.9121102883576647e-05, "loss": 2.2807, "step": 4474 }, { "epoch": 0.15, "grad_norm": 0.7143449783325195, "learning_rate": 1.912066710451522e-05, "loss": 2.2416, "step": 4475 }, { "epoch": 0.15, "grad_norm": 0.7170915007591248, "learning_rate": 1.912023122241384e-05, "loss": 2.2746, "step": 4476 }, { "epoch": 0.15, "grad_norm": 0.6703060865402222, "learning_rate": 1.911979523727743e-05, "loss": 2.1689, "step": 4477 }, { "epoch": 0.15, "grad_norm": 0.6909302473068237, "learning_rate": 1.9119359149110924e-05, "loss": 2.1541, "step": 4478 }, { "epoch": 0.15, "grad_norm": 0.6457582712173462, "learning_rate": 1.9118922957919238e-05, "loss": 2.2244, "step": 4479 }, { "epoch": 0.15, "grad_norm": 0.6873852610588074, "learning_rate": 1.9118486663707308e-05, "loss": 2.2121, "step": 4480 }, { "epoch": 0.15, "grad_norm": 0.6986265778541565, "learning_rate": 1.9118050266480057e-05, "loss": 2.2899, "step": 4481 }, { "epoch": 0.15, "grad_norm": 0.7102159261703491, "learning_rate": 1.9117613766242416e-05, "loss": 2.198, "step": 4482 }, { "epoch": 0.15, "grad_norm": 0.7048208117485046, "learning_rate": 1.911717716299932e-05, "loss": 2.2178, "step": 4483 }, { "epoch": 0.15, "grad_norm": 0.691589891910553, "learning_rate": 1.9116740456755702e-05, "loss": 2.2098, "step": 4484 }, { "epoch": 0.15, "grad_norm": 0.6619166731834412, "learning_rate": 1.911630364751649e-05, "loss": 2.2175, "step": 4485 }, { "epoch": 0.15, "grad_norm": 0.6970120072364807, "learning_rate": 1.9115866735286626e-05, "loss": 2.2132, "step": 4486 }, { "epoch": 0.15, "grad_norm": 0.6771944165229797, "learning_rate": 1.9115429720071043e-05, "loss": 2.2163, "step": 4487 }, { "epoch": 0.15, "grad_norm": 0.6751306056976318, "learning_rate": 1.911499260187467e-05, "loss": 2.1743, "step": 4488 }, { "epoch": 0.15, "grad_norm": 0.7012993097305298, "learning_rate": 1.9114555380702457e-05, "loss": 2.2428, "step": 4489 }, { "epoch": 0.15, "grad_norm": 0.6847450137138367, "learning_rate": 1.9114118056559337e-05, "loss": 2.3057, "step": 4490 }, { "epoch": 0.15, "grad_norm": 0.6945045590400696, "learning_rate": 1.9113680629450256e-05, "loss": 2.2047, "step": 4491 }, { "epoch": 0.15, "grad_norm": 0.6948922276496887, "learning_rate": 1.911324309938015e-05, "loss": 2.2938, "step": 4492 }, { "epoch": 0.15, "grad_norm": 0.7318688035011292, "learning_rate": 1.9112805466353964e-05, "loss": 2.2778, "step": 4493 }, { "epoch": 0.15, "grad_norm": 0.6989498138427734, "learning_rate": 1.9112367730376646e-05, "loss": 2.193, "step": 4494 }, { "epoch": 0.15, "grad_norm": 0.7335171699523926, "learning_rate": 1.9111929891453132e-05, "loss": 2.3043, "step": 4495 }, { "epoch": 0.15, "grad_norm": 0.7089048624038696, "learning_rate": 1.9111491949588376e-05, "loss": 2.2643, "step": 4496 }, { "epoch": 0.15, "grad_norm": 0.6977636218070984, "learning_rate": 1.9111053904787325e-05, "loss": 2.2011, "step": 4497 }, { "epoch": 0.15, "grad_norm": 0.6723150610923767, "learning_rate": 1.911061575705493e-05, "loss": 2.2092, "step": 4498 }, { "epoch": 0.15, "grad_norm": 0.6890773177146912, "learning_rate": 1.911017750639613e-05, "loss": 2.1875, "step": 4499 }, { "epoch": 0.15, "grad_norm": 0.6538310647010803, "learning_rate": 1.9109739152815888e-05, "loss": 2.1812, "step": 4500 }, { "epoch": 0.15, "grad_norm": 0.702815055847168, "learning_rate": 1.9109300696319152e-05, "loss": 2.2143, "step": 4501 }, { "epoch": 0.15, "grad_norm": 0.6854480504989624, "learning_rate": 1.9108862136910877e-05, "loss": 2.2009, "step": 4502 }, { "epoch": 0.15, "grad_norm": 0.6895673274993896, "learning_rate": 1.9108423474596014e-05, "loss": 2.2761, "step": 4503 }, { "epoch": 0.15, "grad_norm": 0.6680657863616943, "learning_rate": 1.910798470937952e-05, "loss": 2.2378, "step": 4504 }, { "epoch": 0.15, "grad_norm": 0.6699315309524536, "learning_rate": 1.910754584126635e-05, "loss": 2.1556, "step": 4505 }, { "epoch": 0.15, "grad_norm": 0.7083072662353516, "learning_rate": 1.910710687026147e-05, "loss": 2.3596, "step": 4506 }, { "epoch": 0.15, "grad_norm": 0.6675357818603516, "learning_rate": 1.910666779636983e-05, "loss": 2.2646, "step": 4507 }, { "epoch": 0.15, "grad_norm": 0.6957983374595642, "learning_rate": 1.9106228619596395e-05, "loss": 2.2206, "step": 4508 }, { "epoch": 0.15, "grad_norm": 0.6860612630844116, "learning_rate": 1.910578933994613e-05, "loss": 2.2531, "step": 4509 }, { "epoch": 0.15, "grad_norm": 0.6830098628997803, "learning_rate": 1.910534995742399e-05, "loss": 2.2069, "step": 4510 }, { "epoch": 0.15, "grad_norm": 0.7457085251808167, "learning_rate": 1.9104910472034942e-05, "loss": 2.1973, "step": 4511 }, { "epoch": 0.15, "grad_norm": 0.6901485323905945, "learning_rate": 1.910447088378395e-05, "loss": 2.2808, "step": 4512 }, { "epoch": 0.15, "grad_norm": 0.6666480898857117, "learning_rate": 1.9104031192675984e-05, "loss": 2.2288, "step": 4513 }, { "epoch": 0.15, "grad_norm": 0.720283031463623, "learning_rate": 1.910359139871601e-05, "loss": 2.2478, "step": 4514 }, { "epoch": 0.15, "grad_norm": 0.7002878785133362, "learning_rate": 1.9103151501908993e-05, "loss": 2.246, "step": 4515 }, { "epoch": 0.15, "grad_norm": 0.6835904121398926, "learning_rate": 1.910271150225991e-05, "loss": 2.2377, "step": 4516 }, { "epoch": 0.15, "grad_norm": 0.7141403555870056, "learning_rate": 1.910227139977372e-05, "loss": 2.2755, "step": 4517 }, { "epoch": 0.15, "grad_norm": 0.6972172856330872, "learning_rate": 1.9101831194455406e-05, "loss": 2.267, "step": 4518 }, { "epoch": 0.15, "grad_norm": 0.666835606098175, "learning_rate": 1.910139088630994e-05, "loss": 2.2295, "step": 4519 }, { "epoch": 0.15, "grad_norm": 0.6905609369277954, "learning_rate": 1.9100950475342292e-05, "loss": 2.2884, "step": 4520 }, { "epoch": 0.15, "grad_norm": 0.7138857841491699, "learning_rate": 1.9100509961557435e-05, "loss": 2.2752, "step": 4521 }, { "epoch": 0.15, "grad_norm": 0.6792369484901428, "learning_rate": 1.9100069344960353e-05, "loss": 2.3247, "step": 4522 }, { "epoch": 0.15, "grad_norm": 0.6814497709274292, "learning_rate": 1.9099628625556023e-05, "loss": 2.2487, "step": 4523 }, { "epoch": 0.15, "grad_norm": 0.6874086856842041, "learning_rate": 1.9099187803349418e-05, "loss": 2.2425, "step": 4524 }, { "epoch": 0.15, "grad_norm": 0.7241448760032654, "learning_rate": 1.9098746878345526e-05, "loss": 2.2633, "step": 4525 }, { "epoch": 0.15, "grad_norm": 0.6647095680236816, "learning_rate": 1.909830585054932e-05, "loss": 2.1892, "step": 4526 }, { "epoch": 0.15, "grad_norm": 0.6803613901138306, "learning_rate": 1.9097864719965788e-05, "loss": 2.228, "step": 4527 }, { "epoch": 0.15, "grad_norm": 0.6743932962417603, "learning_rate": 1.9097423486599914e-05, "loss": 2.2926, "step": 4528 }, { "epoch": 0.15, "grad_norm": 0.667098879814148, "learning_rate": 1.9096982150456678e-05, "loss": 2.163, "step": 4529 }, { "epoch": 0.15, "grad_norm": 0.7247840762138367, "learning_rate": 1.909654071154107e-05, "loss": 2.1927, "step": 4530 }, { "epoch": 0.15, "grad_norm": 0.6938689947128296, "learning_rate": 1.9096099169858077e-05, "loss": 2.256, "step": 4531 }, { "epoch": 0.15, "grad_norm": 0.6706234812736511, "learning_rate": 1.9095657525412687e-05, "loss": 2.1942, "step": 4532 }, { "epoch": 0.15, "grad_norm": 0.6712916493415833, "learning_rate": 1.9095215778209888e-05, "loss": 2.1464, "step": 4533 }, { "epoch": 0.15, "grad_norm": 0.6955000162124634, "learning_rate": 1.909477392825467e-05, "loss": 2.2807, "step": 4534 }, { "epoch": 0.15, "grad_norm": 0.7521129250526428, "learning_rate": 1.909433197555203e-05, "loss": 2.1938, "step": 4535 }, { "epoch": 0.15, "grad_norm": 0.6938499808311462, "learning_rate": 1.9093889920106954e-05, "loss": 2.2998, "step": 4536 }, { "epoch": 0.15, "grad_norm": 0.6937317848205566, "learning_rate": 1.909344776192444e-05, "loss": 2.2573, "step": 4537 }, { "epoch": 0.15, "grad_norm": 0.6733994483947754, "learning_rate": 1.909300550100948e-05, "loss": 2.2581, "step": 4538 }, { "epoch": 0.15, "grad_norm": 0.6996141076087952, "learning_rate": 1.9092563137367077e-05, "loss": 2.2948, "step": 4539 }, { "epoch": 0.15, "grad_norm": 0.6986212730407715, "learning_rate": 1.9092120671002222e-05, "loss": 2.2509, "step": 4540 }, { "epoch": 0.15, "grad_norm": 0.6827740669250488, "learning_rate": 1.909167810191992e-05, "loss": 2.2315, "step": 4541 }, { "epoch": 0.15, "grad_norm": 0.7064968347549438, "learning_rate": 1.909123543012516e-05, "loss": 2.1918, "step": 4542 }, { "epoch": 0.15, "grad_norm": 0.6934424042701721, "learning_rate": 1.9090792655622958e-05, "loss": 2.2324, "step": 4543 }, { "epoch": 0.15, "grad_norm": 0.7085209488868713, "learning_rate": 1.90903497784183e-05, "loss": 2.2602, "step": 4544 }, { "epoch": 0.15, "grad_norm": 0.7089028358459473, "learning_rate": 1.9089906798516206e-05, "loss": 2.2524, "step": 4545 }, { "epoch": 0.15, "grad_norm": 0.6986806392669678, "learning_rate": 1.908946371592167e-05, "loss": 2.2122, "step": 4546 }, { "epoch": 0.15, "grad_norm": 0.6926932334899902, "learning_rate": 1.9089020530639695e-05, "loss": 2.231, "step": 4547 }, { "epoch": 0.15, "grad_norm": 0.7113991975784302, "learning_rate": 1.9088577242675294e-05, "loss": 2.1531, "step": 4548 }, { "epoch": 0.15, "grad_norm": 0.682905912399292, "learning_rate": 1.9088133852033475e-05, "loss": 2.2802, "step": 4549 }, { "epoch": 0.15, "grad_norm": 0.7404407262802124, "learning_rate": 1.908769035871925e-05, "loss": 2.2866, "step": 4550 }, { "epoch": 0.15, "grad_norm": 0.6578441858291626, "learning_rate": 1.908724676273762e-05, "loss": 2.1688, "step": 4551 }, { "epoch": 0.15, "grad_norm": 0.6865326166152954, "learning_rate": 1.9086803064093604e-05, "loss": 2.2487, "step": 4552 }, { "epoch": 0.15, "grad_norm": 0.6928645968437195, "learning_rate": 1.9086359262792214e-05, "loss": 2.2492, "step": 4553 }, { "epoch": 0.15, "grad_norm": 0.6785802245140076, "learning_rate": 1.9085915358838458e-05, "loss": 2.2255, "step": 4554 }, { "epoch": 0.15, "grad_norm": 0.6708192229270935, "learning_rate": 1.9085471352237356e-05, "loss": 2.2274, "step": 4555 }, { "epoch": 0.15, "grad_norm": 0.6953854560852051, "learning_rate": 1.9085027242993927e-05, "loss": 2.2109, "step": 4556 }, { "epoch": 0.15, "grad_norm": 0.7028687000274658, "learning_rate": 1.908458303111318e-05, "loss": 2.3107, "step": 4557 }, { "epoch": 0.15, "grad_norm": 0.6851649880409241, "learning_rate": 1.908413871660014e-05, "loss": 2.1954, "step": 4558 }, { "epoch": 0.15, "grad_norm": 0.7201548218727112, "learning_rate": 1.9083694299459827e-05, "loss": 2.3498, "step": 4559 }, { "epoch": 0.15, "grad_norm": 0.7277721762657166, "learning_rate": 1.9083249779697258e-05, "loss": 2.1507, "step": 4560 }, { "epoch": 0.15, "grad_norm": 0.694873571395874, "learning_rate": 1.9082805157317454e-05, "loss": 2.2682, "step": 4561 }, { "epoch": 0.15, "grad_norm": 0.7165713310241699, "learning_rate": 1.908236043232544e-05, "loss": 2.2574, "step": 4562 }, { "epoch": 0.15, "grad_norm": 0.7065345048904419, "learning_rate": 1.9081915604726246e-05, "loss": 2.2643, "step": 4563 }, { "epoch": 0.15, "grad_norm": 0.6978501677513123, "learning_rate": 1.9081470674524887e-05, "loss": 2.1966, "step": 4564 }, { "epoch": 0.15, "grad_norm": 0.6929413676261902, "learning_rate": 1.9081025641726395e-05, "loss": 2.2575, "step": 4565 }, { "epoch": 0.15, "grad_norm": 0.710230827331543, "learning_rate": 1.9080580506335798e-05, "loss": 2.189, "step": 4566 }, { "epoch": 0.15, "grad_norm": 0.7041822671890259, "learning_rate": 1.9080135268358123e-05, "loss": 2.1717, "step": 4567 }, { "epoch": 0.15, "grad_norm": 0.712323009967804, "learning_rate": 1.9079689927798402e-05, "loss": 2.3079, "step": 4568 }, { "epoch": 0.15, "grad_norm": 0.6946340203285217, "learning_rate": 1.9079244484661667e-05, "loss": 2.1584, "step": 4569 }, { "epoch": 0.15, "grad_norm": 0.7263460755348206, "learning_rate": 1.9078798938952948e-05, "loss": 2.2643, "step": 4570 }, { "epoch": 0.15, "grad_norm": 0.6815439462661743, "learning_rate": 1.9078353290677277e-05, "loss": 2.2765, "step": 4571 }, { "epoch": 0.15, "grad_norm": 0.6965142488479614, "learning_rate": 1.9077907539839696e-05, "loss": 2.236, "step": 4572 }, { "epoch": 0.15, "grad_norm": 0.7325807809829712, "learning_rate": 1.9077461686445233e-05, "loss": 2.2505, "step": 4573 }, { "epoch": 0.15, "grad_norm": 0.6771026253700256, "learning_rate": 1.907701573049893e-05, "loss": 2.1882, "step": 4574 }, { "epoch": 0.15, "grad_norm": 0.7427154183387756, "learning_rate": 1.9076569672005818e-05, "loss": 2.2702, "step": 4575 }, { "epoch": 0.15, "grad_norm": 2.0212326049804688, "learning_rate": 1.9076123510970946e-05, "loss": 2.2655, "step": 4576 }, { "epoch": 0.15, "grad_norm": 0.7426022291183472, "learning_rate": 1.907567724739935e-05, "loss": 2.2891, "step": 4577 }, { "epoch": 0.15, "grad_norm": 0.6935437321662903, "learning_rate": 1.9075230881296062e-05, "loss": 2.2058, "step": 4578 }, { "epoch": 0.15, "grad_norm": 0.692090630531311, "learning_rate": 1.9074784412666143e-05, "loss": 2.2189, "step": 4579 }, { "epoch": 0.15, "grad_norm": 0.6593171954154968, "learning_rate": 1.9074337841514625e-05, "loss": 2.1989, "step": 4580 }, { "epoch": 0.15, "grad_norm": 0.7352481484413147, "learning_rate": 1.9073891167846557e-05, "loss": 2.2326, "step": 4581 }, { "epoch": 0.15, "grad_norm": 0.6694567799568176, "learning_rate": 1.9073444391666984e-05, "loss": 2.2396, "step": 4582 }, { "epoch": 0.15, "grad_norm": 0.6737841367721558, "learning_rate": 1.9072997512980954e-05, "loss": 2.2062, "step": 4583 }, { "epoch": 0.15, "grad_norm": 0.6751940846443176, "learning_rate": 1.907255053179352e-05, "loss": 2.2713, "step": 4584 }, { "epoch": 0.15, "grad_norm": 0.7192975878715515, "learning_rate": 1.907210344810972e-05, "loss": 2.3455, "step": 4585 }, { "epoch": 0.15, "grad_norm": 0.6888840794563293, "learning_rate": 1.9071656261934617e-05, "loss": 2.2298, "step": 4586 }, { "epoch": 0.15, "grad_norm": 0.6834296584129333, "learning_rate": 1.9071208973273254e-05, "loss": 2.1835, "step": 4587 }, { "epoch": 0.15, "grad_norm": 0.7026612162590027, "learning_rate": 1.907076158213069e-05, "loss": 2.194, "step": 4588 }, { "epoch": 0.15, "grad_norm": 0.6923760771751404, "learning_rate": 1.907031408851198e-05, "loss": 2.2319, "step": 4589 }, { "epoch": 0.15, "grad_norm": 0.6889351010322571, "learning_rate": 1.9069866492422172e-05, "loss": 2.281, "step": 4590 }, { "epoch": 0.15, "grad_norm": 0.7010775804519653, "learning_rate": 1.9069418793866332e-05, "loss": 2.1759, "step": 4591 }, { "epoch": 0.15, "grad_norm": 0.6823865175247192, "learning_rate": 1.9068970992849514e-05, "loss": 2.1999, "step": 4592 }, { "epoch": 0.15, "grad_norm": 0.6949930787086487, "learning_rate": 1.9068523089376777e-05, "loss": 2.168, "step": 4593 }, { "epoch": 0.15, "grad_norm": 0.7002421617507935, "learning_rate": 1.9068075083453175e-05, "loss": 2.2158, "step": 4594 }, { "epoch": 0.15, "grad_norm": 0.6876270771026611, "learning_rate": 1.9067626975083778e-05, "loss": 2.209, "step": 4595 }, { "epoch": 0.15, "grad_norm": 0.6715017557144165, "learning_rate": 1.906717876427365e-05, "loss": 2.2091, "step": 4596 }, { "epoch": 0.15, "grad_norm": 0.6766876578330994, "learning_rate": 1.9066730451027847e-05, "loss": 2.2066, "step": 4597 }, { "epoch": 0.15, "grad_norm": 0.7371006011962891, "learning_rate": 1.9066282035351437e-05, "loss": 2.2863, "step": 4598 }, { "epoch": 0.15, "grad_norm": 0.7380681037902832, "learning_rate": 1.9065833517249485e-05, "loss": 2.1779, "step": 4599 }, { "epoch": 0.15, "grad_norm": 0.7549762725830078, "learning_rate": 1.906538489672706e-05, "loss": 2.3302, "step": 4600 }, { "epoch": 0.15, "grad_norm": 0.6860675811767578, "learning_rate": 1.906493617378923e-05, "loss": 2.2591, "step": 4601 }, { "epoch": 0.15, "grad_norm": 0.6979547142982483, "learning_rate": 1.906448734844106e-05, "loss": 2.2395, "step": 4602 }, { "epoch": 0.15, "grad_norm": 0.6913871765136719, "learning_rate": 1.906403842068763e-05, "loss": 2.3294, "step": 4603 }, { "epoch": 0.15, "grad_norm": 0.6853166222572327, "learning_rate": 1.9063589390534e-05, "loss": 2.1914, "step": 4604 }, { "epoch": 0.15, "grad_norm": 0.7007079720497131, "learning_rate": 1.9063140257985253e-05, "loss": 2.2107, "step": 4605 }, { "epoch": 0.15, "grad_norm": 0.6780895590782166, "learning_rate": 1.9062691023046457e-05, "loss": 2.2244, "step": 4606 }, { "epoch": 0.15, "grad_norm": 0.7327328324317932, "learning_rate": 1.9062241685722692e-05, "loss": 2.2269, "step": 4607 }, { "epoch": 0.15, "grad_norm": 0.6897546052932739, "learning_rate": 1.9061792246019028e-05, "loss": 2.2254, "step": 4608 }, { "epoch": 0.15, "grad_norm": 0.6947484612464905, "learning_rate": 1.906134270394055e-05, "loss": 2.3009, "step": 4609 }, { "epoch": 0.15, "grad_norm": 0.6870993971824646, "learning_rate": 1.9060893059492328e-05, "loss": 2.0749, "step": 4610 }, { "epoch": 0.15, "grad_norm": 0.7048745155334473, "learning_rate": 1.906044331267945e-05, "loss": 2.262, "step": 4611 }, { "epoch": 0.15, "grad_norm": 0.6902496814727783, "learning_rate": 1.905999346350699e-05, "loss": 2.2667, "step": 4612 }, { "epoch": 0.15, "grad_norm": 0.6931498050689697, "learning_rate": 1.9059543511980036e-05, "loss": 2.243, "step": 4613 }, { "epoch": 0.15, "grad_norm": 0.6988371014595032, "learning_rate": 1.9059093458103664e-05, "loss": 2.2357, "step": 4614 }, { "epoch": 0.15, "grad_norm": 0.7046211957931519, "learning_rate": 1.905864330188297e-05, "loss": 2.2414, "step": 4615 }, { "epoch": 0.15, "grad_norm": 0.6742013096809387, "learning_rate": 1.9058193043323032e-05, "loss": 2.2386, "step": 4616 }, { "epoch": 0.15, "grad_norm": 0.6920917630195618, "learning_rate": 1.9057742682428933e-05, "loss": 2.2034, "step": 4617 }, { "epoch": 0.15, "grad_norm": 0.688748836517334, "learning_rate": 1.905729221920577e-05, "loss": 2.2308, "step": 4618 }, { "epoch": 0.15, "grad_norm": 0.6944236159324646, "learning_rate": 1.9056841653658624e-05, "loss": 2.2351, "step": 4619 }, { "epoch": 0.15, "grad_norm": 0.6904944181442261, "learning_rate": 1.9056390985792592e-05, "loss": 2.2596, "step": 4620 }, { "epoch": 0.15, "grad_norm": 0.7856355905532837, "learning_rate": 1.9055940215612763e-05, "loss": 2.2648, "step": 4621 }, { "epoch": 0.15, "grad_norm": 0.6989129185676575, "learning_rate": 1.9055489343124225e-05, "loss": 2.2286, "step": 4622 }, { "epoch": 0.15, "grad_norm": 0.6861041784286499, "learning_rate": 1.9055038368332078e-05, "loss": 2.1994, "step": 4623 }, { "epoch": 0.15, "grad_norm": 0.6962595582008362, "learning_rate": 1.905458729124141e-05, "loss": 2.2713, "step": 4624 }, { "epoch": 0.15, "grad_norm": 0.7135487198829651, "learning_rate": 1.9054136111857327e-05, "loss": 2.2302, "step": 4625 }, { "epoch": 0.15, "grad_norm": 0.7174801826477051, "learning_rate": 1.9053684830184916e-05, "loss": 2.2462, "step": 4626 }, { "epoch": 0.15, "grad_norm": 0.7136462926864624, "learning_rate": 1.9053233446229285e-05, "loss": 2.3219, "step": 4627 }, { "epoch": 0.15, "grad_norm": 0.7102366089820862, "learning_rate": 1.9052781959995524e-05, "loss": 2.2153, "step": 4628 }, { "epoch": 0.15, "grad_norm": 0.6911572813987732, "learning_rate": 1.9052330371488738e-05, "loss": 2.2451, "step": 4629 }, { "epoch": 0.15, "grad_norm": 0.6709862351417542, "learning_rate": 1.905187868071403e-05, "loss": 2.263, "step": 4630 }, { "epoch": 0.15, "grad_norm": 0.6938295960426331, "learning_rate": 1.90514268876765e-05, "loss": 2.2877, "step": 4631 }, { "epoch": 0.15, "grad_norm": 0.7085367441177368, "learning_rate": 1.9050974992381256e-05, "loss": 2.205, "step": 4632 }, { "epoch": 0.15, "grad_norm": 0.707690954208374, "learning_rate": 1.90505229948334e-05, "loss": 2.2603, "step": 4633 }, { "epoch": 0.15, "grad_norm": 0.666207492351532, "learning_rate": 1.905007089503804e-05, "loss": 2.295, "step": 4634 }, { "epoch": 0.15, "grad_norm": 0.722149133682251, "learning_rate": 1.904961869300028e-05, "loss": 2.2134, "step": 4635 }, { "epoch": 0.15, "grad_norm": 0.6758798956871033, "learning_rate": 1.9049166388725237e-05, "loss": 2.2543, "step": 4636 }, { "epoch": 0.15, "grad_norm": 0.7259350419044495, "learning_rate": 1.904871398221801e-05, "loss": 2.2901, "step": 4637 }, { "epoch": 0.15, "grad_norm": 0.6832470893859863, "learning_rate": 1.9048261473483718e-05, "loss": 2.2933, "step": 4638 }, { "epoch": 0.15, "grad_norm": 0.7110921740531921, "learning_rate": 1.904780886252747e-05, "loss": 2.2172, "step": 4639 }, { "epoch": 0.15, "grad_norm": 0.7228556275367737, "learning_rate": 1.904735614935438e-05, "loss": 2.2116, "step": 4640 }, { "epoch": 0.15, "grad_norm": 0.6668363809585571, "learning_rate": 1.9046903333969564e-05, "loss": 2.2295, "step": 4641 }, { "epoch": 0.15, "grad_norm": 0.7395142316818237, "learning_rate": 1.9046450416378135e-05, "loss": 2.2783, "step": 4642 }, { "epoch": 0.15, "grad_norm": 0.6834067702293396, "learning_rate": 1.904599739658521e-05, "loss": 2.2767, "step": 4643 }, { "epoch": 0.15, "grad_norm": 0.6739209294319153, "learning_rate": 1.904554427459591e-05, "loss": 2.1694, "step": 4644 }, { "epoch": 0.15, "grad_norm": 0.6868010759353638, "learning_rate": 1.904509105041535e-05, "loss": 2.2444, "step": 4645 }, { "epoch": 0.15, "grad_norm": 0.6862638592720032, "learning_rate": 1.9044637724048654e-05, "loss": 2.2702, "step": 4646 }, { "epoch": 0.15, "grad_norm": 0.694426953792572, "learning_rate": 1.904418429550094e-05, "loss": 2.2437, "step": 4647 }, { "epoch": 0.15, "grad_norm": 0.7224493026733398, "learning_rate": 1.9043730764777335e-05, "loss": 2.2029, "step": 4648 }, { "epoch": 0.15, "grad_norm": 0.6839559674263, "learning_rate": 1.9043277131882957e-05, "loss": 2.2095, "step": 4649 }, { "epoch": 0.15, "grad_norm": 0.6855702996253967, "learning_rate": 1.9042823396822935e-05, "loss": 2.2297, "step": 4650 }, { "epoch": 0.15, "grad_norm": 0.7167149782180786, "learning_rate": 1.904236955960239e-05, "loss": 2.2956, "step": 4651 }, { "epoch": 0.15, "grad_norm": 0.697726845741272, "learning_rate": 1.9041915620226458e-05, "loss": 2.2994, "step": 4652 }, { "epoch": 0.15, "grad_norm": 0.7095721960067749, "learning_rate": 1.9041461578700262e-05, "loss": 2.2747, "step": 4653 }, { "epoch": 0.15, "grad_norm": 0.6605771780014038, "learning_rate": 1.9041007435028934e-05, "loss": 2.1758, "step": 4654 }, { "epoch": 0.15, "grad_norm": 0.6793345808982849, "learning_rate": 1.90405531892176e-05, "loss": 2.2372, "step": 4655 }, { "epoch": 0.15, "grad_norm": 0.6980547308921814, "learning_rate": 1.904009884127139e-05, "loss": 2.2648, "step": 4656 }, { "epoch": 0.15, "grad_norm": 0.6922035217285156, "learning_rate": 1.9039644391195448e-05, "loss": 2.178, "step": 4657 }, { "epoch": 0.15, "grad_norm": 0.7471354007720947, "learning_rate": 1.9039189838994895e-05, "loss": 2.2931, "step": 4658 }, { "epoch": 0.16, "grad_norm": 0.7050546407699585, "learning_rate": 1.9038735184674877e-05, "loss": 2.2606, "step": 4659 }, { "epoch": 0.16, "grad_norm": 0.6797491312026978, "learning_rate": 1.9038280428240528e-05, "loss": 2.1881, "step": 4660 }, { "epoch": 0.16, "grad_norm": 0.6849299669265747, "learning_rate": 1.903782556969698e-05, "loss": 2.2772, "step": 4661 }, { "epoch": 0.16, "grad_norm": 0.6662061810493469, "learning_rate": 1.9037370609049377e-05, "loss": 2.1805, "step": 4662 }, { "epoch": 0.16, "grad_norm": 0.6830024719238281, "learning_rate": 1.9036915546302856e-05, "loss": 2.1578, "step": 4663 }, { "epoch": 0.16, "grad_norm": 0.6732627749443054, "learning_rate": 1.903646038146256e-05, "loss": 2.1806, "step": 4664 }, { "epoch": 0.16, "grad_norm": 0.6966559886932373, "learning_rate": 1.9036005114533633e-05, "loss": 2.2785, "step": 4665 }, { "epoch": 0.16, "grad_norm": 0.7576621174812317, "learning_rate": 1.903554974552121e-05, "loss": 2.2234, "step": 4666 }, { "epoch": 0.16, "grad_norm": 0.69753497838974, "learning_rate": 1.903509427443045e-05, "loss": 2.2634, "step": 4667 }, { "epoch": 0.16, "grad_norm": 0.691307783126831, "learning_rate": 1.903463870126648e-05, "loss": 2.1989, "step": 4668 }, { "epoch": 0.16, "grad_norm": 0.6854883432388306, "learning_rate": 1.9034183026034464e-05, "loss": 2.2481, "step": 4669 }, { "epoch": 0.16, "grad_norm": 0.7330436706542969, "learning_rate": 1.903372724873954e-05, "loss": 2.3033, "step": 4670 }, { "epoch": 0.16, "grad_norm": 0.6856635808944702, "learning_rate": 1.9033271369386857e-05, "loss": 2.2615, "step": 4671 }, { "epoch": 0.16, "grad_norm": 0.7164098620414734, "learning_rate": 1.9032815387981574e-05, "loss": 2.1888, "step": 4672 }, { "epoch": 0.16, "grad_norm": 0.7534990906715393, "learning_rate": 1.9032359304528835e-05, "loss": 2.206, "step": 4673 }, { "epoch": 0.16, "grad_norm": 0.6944230794906616, "learning_rate": 1.903190311903379e-05, "loss": 2.2532, "step": 4674 }, { "epoch": 0.16, "grad_norm": 0.6585599780082703, "learning_rate": 1.90314468315016e-05, "loss": 2.266, "step": 4675 }, { "epoch": 0.16, "grad_norm": 0.6750780344009399, "learning_rate": 1.9030990441937415e-05, "loss": 2.2157, "step": 4676 }, { "epoch": 0.16, "grad_norm": 0.6668248176574707, "learning_rate": 1.9030533950346397e-05, "loss": 2.2001, "step": 4677 }, { "epoch": 0.16, "grad_norm": 0.676856279373169, "learning_rate": 1.9030077356733695e-05, "loss": 2.2057, "step": 4678 }, { "epoch": 0.16, "grad_norm": 0.7172597050666809, "learning_rate": 1.902962066110447e-05, "loss": 2.2366, "step": 4679 }, { "epoch": 0.16, "grad_norm": 0.6745300889015198, "learning_rate": 1.9029163863463885e-05, "loss": 2.2293, "step": 4680 }, { "epoch": 0.16, "grad_norm": 0.6895262002944946, "learning_rate": 1.90287069638171e-05, "loss": 2.2089, "step": 4681 }, { "epoch": 0.16, "grad_norm": 0.6986544728279114, "learning_rate": 1.902824996216927e-05, "loss": 2.1799, "step": 4682 }, { "epoch": 0.16, "grad_norm": 0.6920831799507141, "learning_rate": 1.9027792858525567e-05, "loss": 2.2806, "step": 4683 }, { "epoch": 0.16, "grad_norm": 0.7168166041374207, "learning_rate": 1.9027335652891148e-05, "loss": 2.2487, "step": 4684 }, { "epoch": 0.16, "grad_norm": 0.6964851021766663, "learning_rate": 1.9026878345271184e-05, "loss": 2.1965, "step": 4685 }, { "epoch": 0.16, "grad_norm": 0.6743322014808655, "learning_rate": 1.9026420935670838e-05, "loss": 2.2135, "step": 4686 }, { "epoch": 0.16, "grad_norm": 0.7139073610305786, "learning_rate": 1.902596342409528e-05, "loss": 2.2628, "step": 4687 }, { "epoch": 0.16, "grad_norm": 0.6876122355461121, "learning_rate": 1.9025505810549673e-05, "loss": 2.2583, "step": 4688 }, { "epoch": 0.16, "grad_norm": 0.7109939455986023, "learning_rate": 1.9025048095039194e-05, "loss": 2.2455, "step": 4689 }, { "epoch": 0.16, "grad_norm": 0.6654071807861328, "learning_rate": 1.9024590277569007e-05, "loss": 2.261, "step": 4690 }, { "epoch": 0.16, "grad_norm": 0.6604311466217041, "learning_rate": 1.902413235814429e-05, "loss": 2.1672, "step": 4691 }, { "epoch": 0.16, "grad_norm": 0.681565523147583, "learning_rate": 1.9023674336770218e-05, "loss": 2.2215, "step": 4692 }, { "epoch": 0.16, "grad_norm": 0.6992550492286682, "learning_rate": 1.9023216213451962e-05, "loss": 2.2648, "step": 4693 }, { "epoch": 0.16, "grad_norm": 0.6947644948959351, "learning_rate": 1.902275798819469e-05, "loss": 2.2481, "step": 4694 }, { "epoch": 0.16, "grad_norm": 0.7007181644439697, "learning_rate": 1.902229966100359e-05, "loss": 2.2614, "step": 4695 }, { "epoch": 0.16, "grad_norm": 0.6869601011276245, "learning_rate": 1.9021841231883838e-05, "loss": 2.2218, "step": 4696 }, { "epoch": 0.16, "grad_norm": 0.6765796542167664, "learning_rate": 1.9021382700840608e-05, "loss": 2.187, "step": 4697 }, { "epoch": 0.16, "grad_norm": 0.7050783038139343, "learning_rate": 1.9020924067879084e-05, "loss": 2.252, "step": 4698 }, { "epoch": 0.16, "grad_norm": 0.689795196056366, "learning_rate": 1.9020465333004447e-05, "loss": 2.2411, "step": 4699 }, { "epoch": 0.16, "grad_norm": 0.7019768357276917, "learning_rate": 1.9020006496221883e-05, "loss": 2.1898, "step": 4700 }, { "epoch": 0.16, "grad_norm": 0.7195585370063782, "learning_rate": 1.9019547557536566e-05, "loss": 2.2732, "step": 4701 }, { "epoch": 0.16, "grad_norm": 0.6751661896705627, "learning_rate": 1.901908851695369e-05, "loss": 2.1515, "step": 4702 }, { "epoch": 0.16, "grad_norm": 0.7056312561035156, "learning_rate": 1.9018629374478437e-05, "loss": 2.127, "step": 4703 }, { "epoch": 0.16, "grad_norm": 0.7532898187637329, "learning_rate": 1.9018170130115995e-05, "loss": 2.2179, "step": 4704 }, { "epoch": 0.16, "grad_norm": 0.7017446160316467, "learning_rate": 1.901771078387155e-05, "loss": 2.2784, "step": 4705 }, { "epoch": 0.16, "grad_norm": 0.682104766368866, "learning_rate": 1.9017251335750295e-05, "loss": 2.1998, "step": 4706 }, { "epoch": 0.16, "grad_norm": 0.6823832392692566, "learning_rate": 1.901679178575742e-05, "loss": 2.2293, "step": 4707 }, { "epoch": 0.16, "grad_norm": 0.7049028873443604, "learning_rate": 1.901633213389811e-05, "loss": 2.2029, "step": 4708 }, { "epoch": 0.16, "grad_norm": 0.7099496126174927, "learning_rate": 1.9015872380177574e-05, "loss": 2.2673, "step": 4709 }, { "epoch": 0.16, "grad_norm": 0.7724513411521912, "learning_rate": 1.9015412524600986e-05, "loss": 2.2185, "step": 4710 }, { "epoch": 0.16, "grad_norm": 0.7045884132385254, "learning_rate": 1.901495256717356e-05, "loss": 2.2328, "step": 4711 }, { "epoch": 0.16, "grad_norm": 0.6682022213935852, "learning_rate": 1.9014492507900475e-05, "loss": 2.2218, "step": 4712 }, { "epoch": 0.16, "grad_norm": 0.6990941166877747, "learning_rate": 1.901403234678694e-05, "loss": 2.27, "step": 4713 }, { "epoch": 0.16, "grad_norm": 0.7037197351455688, "learning_rate": 1.901357208383815e-05, "loss": 2.1981, "step": 4714 }, { "epoch": 0.16, "grad_norm": 0.6634253263473511, "learning_rate": 1.9013111719059306e-05, "loss": 2.246, "step": 4715 }, { "epoch": 0.16, "grad_norm": 0.7547180652618408, "learning_rate": 1.9012651252455606e-05, "loss": 2.2939, "step": 4716 }, { "epoch": 0.16, "grad_norm": 0.7243127822875977, "learning_rate": 1.9012190684032255e-05, "loss": 2.3154, "step": 4717 }, { "epoch": 0.16, "grad_norm": 0.7080589532852173, "learning_rate": 1.9011730013794455e-05, "loss": 2.2295, "step": 4718 }, { "epoch": 0.16, "grad_norm": 0.6812245845794678, "learning_rate": 1.9011269241747412e-05, "loss": 2.2276, "step": 4719 }, { "epoch": 0.16, "grad_norm": 0.6937214136123657, "learning_rate": 1.9010808367896326e-05, "loss": 2.2944, "step": 4720 }, { "epoch": 0.16, "grad_norm": 0.7501614093780518, "learning_rate": 1.9010347392246414e-05, "loss": 2.2433, "step": 4721 }, { "epoch": 0.16, "grad_norm": 0.6915141940116882, "learning_rate": 1.9009886314802875e-05, "loss": 2.2183, "step": 4722 }, { "epoch": 0.16, "grad_norm": 0.6897430419921875, "learning_rate": 1.9009425135570923e-05, "loss": 2.1968, "step": 4723 }, { "epoch": 0.16, "grad_norm": 0.7018092274665833, "learning_rate": 1.900896385455576e-05, "loss": 2.2467, "step": 4724 }, { "epoch": 0.16, "grad_norm": 0.6967020034790039, "learning_rate": 1.9008502471762608e-05, "loss": 2.1736, "step": 4725 }, { "epoch": 0.16, "grad_norm": 0.6835813522338867, "learning_rate": 1.900804098719667e-05, "loss": 2.127, "step": 4726 }, { "epoch": 0.16, "grad_norm": 0.7201195955276489, "learning_rate": 1.900757940086317e-05, "loss": 2.2164, "step": 4727 }, { "epoch": 0.16, "grad_norm": 0.7513042688369751, "learning_rate": 1.9007117712767315e-05, "loss": 2.2299, "step": 4728 }, { "epoch": 0.16, "grad_norm": 0.6663387417793274, "learning_rate": 1.9006655922914322e-05, "loss": 2.2264, "step": 4729 }, { "epoch": 0.16, "grad_norm": 0.7091827392578125, "learning_rate": 1.9006194031309412e-05, "loss": 2.2036, "step": 4730 }, { "epoch": 0.16, "grad_norm": 0.6978490352630615, "learning_rate": 1.9005732037957797e-05, "loss": 2.2253, "step": 4731 }, { "epoch": 0.16, "grad_norm": 0.6767582297325134, "learning_rate": 1.9005269942864697e-05, "loss": 2.2531, "step": 4732 }, { "epoch": 0.16, "grad_norm": 0.7303970456123352, "learning_rate": 1.9004807746035338e-05, "loss": 2.2603, "step": 4733 }, { "epoch": 0.16, "grad_norm": 0.7422457337379456, "learning_rate": 1.9004345447474936e-05, "loss": 2.2301, "step": 4734 }, { "epoch": 0.16, "grad_norm": 0.719879150390625, "learning_rate": 1.9003883047188717e-05, "loss": 2.2006, "step": 4735 }, { "epoch": 0.16, "grad_norm": 0.7116043567657471, "learning_rate": 1.9003420545181904e-05, "loss": 2.2767, "step": 4736 }, { "epoch": 0.16, "grad_norm": 0.6684793829917908, "learning_rate": 1.9002957941459726e-05, "loss": 2.2467, "step": 4737 }, { "epoch": 0.16, "grad_norm": 0.7001445293426514, "learning_rate": 1.90024952360274e-05, "loss": 2.2611, "step": 4738 }, { "epoch": 0.16, "grad_norm": 0.7057059407234192, "learning_rate": 1.9002032428890162e-05, "loss": 2.2866, "step": 4739 }, { "epoch": 0.16, "grad_norm": 0.7199113965034485, "learning_rate": 1.9001569520053235e-05, "loss": 2.2214, "step": 4740 }, { "epoch": 0.16, "grad_norm": 0.6772589087486267, "learning_rate": 1.9001106509521854e-05, "loss": 2.2293, "step": 4741 }, { "epoch": 0.16, "grad_norm": 0.703598141670227, "learning_rate": 1.9000643397301248e-05, "loss": 2.2667, "step": 4742 }, { "epoch": 0.16, "grad_norm": 0.7413385510444641, "learning_rate": 1.9000180183396643e-05, "loss": 2.2312, "step": 4743 }, { "epoch": 0.16, "grad_norm": 0.6944820284843445, "learning_rate": 1.899971686781328e-05, "loss": 2.1681, "step": 4744 }, { "epoch": 0.16, "grad_norm": 0.6855244636535645, "learning_rate": 1.899925345055639e-05, "loss": 2.2931, "step": 4745 }, { "epoch": 0.16, "grad_norm": 0.6820714473724365, "learning_rate": 1.8998789931631205e-05, "loss": 2.3185, "step": 4746 }, { "epoch": 0.16, "grad_norm": 0.6639246344566345, "learning_rate": 1.899832631104297e-05, "loss": 2.2587, "step": 4747 }, { "epoch": 0.16, "grad_norm": 0.7517090439796448, "learning_rate": 1.8997862588796914e-05, "loss": 2.222, "step": 4748 }, { "epoch": 0.16, "grad_norm": 0.7011862993240356, "learning_rate": 1.8997398764898283e-05, "loss": 2.256, "step": 4749 }, { "epoch": 0.16, "grad_norm": 0.689825713634491, "learning_rate": 1.899693483935231e-05, "loss": 2.1859, "step": 4750 }, { "epoch": 0.16, "grad_norm": 0.7003384828567505, "learning_rate": 1.8996470812164244e-05, "loss": 2.2672, "step": 4751 }, { "epoch": 0.16, "grad_norm": 0.6527851819992065, "learning_rate": 1.8996006683339323e-05, "loss": 2.2509, "step": 4752 }, { "epoch": 0.16, "grad_norm": 0.7032826542854309, "learning_rate": 1.899554245288279e-05, "loss": 2.2519, "step": 4753 }, { "epoch": 0.16, "grad_norm": 0.6933138966560364, "learning_rate": 1.899507812079989e-05, "loss": 2.2572, "step": 4754 }, { "epoch": 0.16, "grad_norm": 0.6778034567832947, "learning_rate": 1.8994613687095866e-05, "loss": 2.1596, "step": 4755 }, { "epoch": 0.16, "grad_norm": 0.7171981334686279, "learning_rate": 1.899414915177597e-05, "loss": 2.2819, "step": 4756 }, { "epoch": 0.16, "grad_norm": 0.681999921798706, "learning_rate": 1.899368451484545e-05, "loss": 2.2006, "step": 4757 }, { "epoch": 0.16, "grad_norm": 0.7144233584403992, "learning_rate": 1.899321977630955e-05, "loss": 2.3002, "step": 4758 }, { "epoch": 0.16, "grad_norm": 0.7501667737960815, "learning_rate": 1.8992754936173525e-05, "loss": 2.2388, "step": 4759 }, { "epoch": 0.16, "grad_norm": 0.7081283330917358, "learning_rate": 1.8992289994442624e-05, "loss": 2.2208, "step": 4760 }, { "epoch": 0.16, "grad_norm": 0.7322137951850891, "learning_rate": 1.89918249511221e-05, "loss": 2.203, "step": 4761 }, { "epoch": 0.16, "grad_norm": 0.6836585998535156, "learning_rate": 1.899135980621721e-05, "loss": 2.2255, "step": 4762 }, { "epoch": 0.16, "grad_norm": 0.6888043284416199, "learning_rate": 1.8990894559733207e-05, "loss": 2.2524, "step": 4763 }, { "epoch": 0.16, "grad_norm": 0.666958749294281, "learning_rate": 1.8990429211675346e-05, "loss": 2.1743, "step": 4764 }, { "epoch": 0.16, "grad_norm": 0.6876826882362366, "learning_rate": 1.8989963762048883e-05, "loss": 2.2047, "step": 4765 }, { "epoch": 0.16, "grad_norm": 0.7631163001060486, "learning_rate": 1.8989498210859077e-05, "loss": 2.2205, "step": 4766 }, { "epoch": 0.16, "grad_norm": 0.7583569288253784, "learning_rate": 1.8989032558111193e-05, "loss": 2.2258, "step": 4767 }, { "epoch": 0.16, "grad_norm": 0.7457662224769592, "learning_rate": 1.8988566803810486e-05, "loss": 2.2954, "step": 4768 }, { "epoch": 0.16, "grad_norm": 0.7219420075416565, "learning_rate": 1.8988100947962214e-05, "loss": 2.2765, "step": 4769 }, { "epoch": 0.16, "grad_norm": 0.6786173582077026, "learning_rate": 1.898763499057165e-05, "loss": 2.1854, "step": 4770 }, { "epoch": 0.16, "grad_norm": 0.6997213959693909, "learning_rate": 1.898716893164405e-05, "loss": 2.2406, "step": 4771 }, { "epoch": 0.16, "grad_norm": 0.6940840482711792, "learning_rate": 1.8986702771184685e-05, "loss": 2.222, "step": 4772 }, { "epoch": 0.16, "grad_norm": 0.659393310546875, "learning_rate": 1.898623650919882e-05, "loss": 2.2632, "step": 4773 }, { "epoch": 0.16, "grad_norm": 0.6857128739356995, "learning_rate": 1.898577014569172e-05, "loss": 2.2292, "step": 4774 }, { "epoch": 0.16, "grad_norm": 0.6805195212364197, "learning_rate": 1.898530368066865e-05, "loss": 2.23, "step": 4775 }, { "epoch": 0.16, "grad_norm": 0.6657445430755615, "learning_rate": 1.8984837114134894e-05, "loss": 2.2716, "step": 4776 }, { "epoch": 0.16, "grad_norm": 0.6844924092292786, "learning_rate": 1.8984370446095708e-05, "loss": 2.2397, "step": 4777 }, { "epoch": 0.16, "grad_norm": 0.7198789119720459, "learning_rate": 1.898390367655637e-05, "loss": 2.2229, "step": 4778 }, { "epoch": 0.16, "grad_norm": 0.7239323258399963, "learning_rate": 1.8983436805522157e-05, "loss": 2.2944, "step": 4779 }, { "epoch": 0.16, "grad_norm": 0.709490954875946, "learning_rate": 1.8982969832998336e-05, "loss": 2.2577, "step": 4780 }, { "epoch": 0.16, "grad_norm": 0.6862909197807312, "learning_rate": 1.8982502758990187e-05, "loss": 2.1898, "step": 4781 }, { "epoch": 0.16, "grad_norm": 0.6763822436332703, "learning_rate": 1.8982035583502987e-05, "loss": 2.1829, "step": 4782 }, { "epoch": 0.16, "grad_norm": 0.7010238170623779, "learning_rate": 1.8981568306542014e-05, "loss": 2.2036, "step": 4783 }, { "epoch": 0.16, "grad_norm": 0.6869719624519348, "learning_rate": 1.8981100928112544e-05, "loss": 2.2641, "step": 4784 }, { "epoch": 0.16, "grad_norm": 0.6763965487480164, "learning_rate": 1.8980633448219857e-05, "loss": 2.2742, "step": 4785 }, { "epoch": 0.16, "grad_norm": 0.6951785683631897, "learning_rate": 1.8980165866869242e-05, "loss": 2.2348, "step": 4786 }, { "epoch": 0.16, "grad_norm": 0.7116746306419373, "learning_rate": 1.8979698184065974e-05, "loss": 2.229, "step": 4787 }, { "epoch": 0.16, "grad_norm": 0.6824199557304382, "learning_rate": 1.8979230399815338e-05, "loss": 2.1917, "step": 4788 }, { "epoch": 0.16, "grad_norm": 0.7191827297210693, "learning_rate": 1.897876251412262e-05, "loss": 2.2197, "step": 4789 }, { "epoch": 0.16, "grad_norm": 0.7147292494773865, "learning_rate": 1.8978294526993103e-05, "loss": 2.2748, "step": 4790 }, { "epoch": 0.16, "grad_norm": 0.7100715637207031, "learning_rate": 1.8977826438432078e-05, "loss": 2.1955, "step": 4791 }, { "epoch": 0.16, "grad_norm": 0.7220259308815002, "learning_rate": 1.897735824844483e-05, "loss": 2.2544, "step": 4792 }, { "epoch": 0.16, "grad_norm": 0.706153154373169, "learning_rate": 1.897688995703665e-05, "loss": 2.2283, "step": 4793 }, { "epoch": 0.16, "grad_norm": 0.7048121690750122, "learning_rate": 1.897642156421283e-05, "loss": 2.2328, "step": 4794 }, { "epoch": 0.16, "grad_norm": 0.7308088541030884, "learning_rate": 1.8975953069978658e-05, "loss": 2.2408, "step": 4795 }, { "epoch": 0.16, "grad_norm": 0.6967409253120422, "learning_rate": 1.897548447433943e-05, "loss": 2.1815, "step": 4796 }, { "epoch": 0.16, "grad_norm": 0.7581714987754822, "learning_rate": 1.897501577730044e-05, "loss": 2.1975, "step": 4797 }, { "epoch": 0.16, "grad_norm": 0.6834259629249573, "learning_rate": 1.897454697886698e-05, "loss": 2.2592, "step": 4798 }, { "epoch": 0.16, "grad_norm": 0.7299399971961975, "learning_rate": 1.8974078079044347e-05, "loss": 2.2586, "step": 4799 }, { "epoch": 0.16, "grad_norm": 0.7129775881767273, "learning_rate": 1.8973609077837837e-05, "loss": 2.2026, "step": 4800 }, { "epoch": 0.16, "grad_norm": 0.6958067417144775, "learning_rate": 1.8973139975252756e-05, "loss": 2.2811, "step": 4801 }, { "epoch": 0.16, "grad_norm": 0.7096580266952515, "learning_rate": 1.89726707712944e-05, "loss": 2.1941, "step": 4802 }, { "epoch": 0.16, "grad_norm": 0.6954323053359985, "learning_rate": 1.8972201465968058e-05, "loss": 2.1811, "step": 4803 }, { "epoch": 0.16, "grad_norm": 0.7187135219573975, "learning_rate": 1.897173205927905e-05, "loss": 2.1819, "step": 4804 }, { "epoch": 0.16, "grad_norm": 0.6987690925598145, "learning_rate": 1.897126255123267e-05, "loss": 2.1495, "step": 4805 }, { "epoch": 0.16, "grad_norm": 0.6783400177955627, "learning_rate": 1.897079294183422e-05, "loss": 2.2867, "step": 4806 }, { "epoch": 0.16, "grad_norm": 0.7092986702919006, "learning_rate": 1.8970323231089013e-05, "loss": 2.2398, "step": 4807 }, { "epoch": 0.16, "grad_norm": 0.7376054525375366, "learning_rate": 1.8969853419002348e-05, "loss": 2.285, "step": 4808 }, { "epoch": 0.16, "grad_norm": 0.7156516313552856, "learning_rate": 1.8969383505579538e-05, "loss": 2.1289, "step": 4809 }, { "epoch": 0.16, "grad_norm": 0.6990901231765747, "learning_rate": 1.896891349082589e-05, "loss": 2.3434, "step": 4810 }, { "epoch": 0.16, "grad_norm": 0.6845041513442993, "learning_rate": 1.8968443374746712e-05, "loss": 2.219, "step": 4811 }, { "epoch": 0.16, "grad_norm": 0.7466919422149658, "learning_rate": 1.896797315734732e-05, "loss": 2.2279, "step": 4812 }, { "epoch": 0.16, "grad_norm": 0.685339629650116, "learning_rate": 1.896750283863302e-05, "loss": 2.1961, "step": 4813 }, { "epoch": 0.16, "grad_norm": 0.7060288786888123, "learning_rate": 1.8967032418609128e-05, "loss": 2.1464, "step": 4814 }, { "epoch": 0.16, "grad_norm": 0.7408494353294373, "learning_rate": 1.896656189728096e-05, "loss": 2.2185, "step": 4815 }, { "epoch": 0.16, "grad_norm": 0.7651981711387634, "learning_rate": 1.896609127465383e-05, "loss": 2.2012, "step": 4816 }, { "epoch": 0.16, "grad_norm": 0.7010645866394043, "learning_rate": 1.8965620550733055e-05, "loss": 2.2667, "step": 4817 }, { "epoch": 0.16, "grad_norm": 0.7302684187889099, "learning_rate": 1.8965149725523953e-05, "loss": 2.2955, "step": 4818 }, { "epoch": 0.16, "grad_norm": 0.6990823745727539, "learning_rate": 1.8964678799031846e-05, "loss": 2.2571, "step": 4819 }, { "epoch": 0.16, "grad_norm": 0.7016177773475647, "learning_rate": 1.896420777126205e-05, "loss": 2.2137, "step": 4820 }, { "epoch": 0.16, "grad_norm": 0.7371472716331482, "learning_rate": 1.896373664221989e-05, "loss": 2.1897, "step": 4821 }, { "epoch": 0.16, "grad_norm": 0.7481779456138611, "learning_rate": 1.896326541191069e-05, "loss": 2.2554, "step": 4822 }, { "epoch": 0.16, "grad_norm": 0.6907321810722351, "learning_rate": 1.8962794080339765e-05, "loss": 2.1911, "step": 4823 }, { "epoch": 0.16, "grad_norm": 0.733113706111908, "learning_rate": 1.8962322647512442e-05, "loss": 2.2483, "step": 4824 }, { "epoch": 0.16, "grad_norm": 0.6961241364479065, "learning_rate": 1.8961851113434054e-05, "loss": 2.2809, "step": 4825 }, { "epoch": 0.16, "grad_norm": 0.7328378558158875, "learning_rate": 1.8961379478109926e-05, "loss": 2.1666, "step": 4826 }, { "epoch": 0.16, "grad_norm": 0.7186113595962524, "learning_rate": 1.8960907741545384e-05, "loss": 2.2388, "step": 4827 }, { "epoch": 0.16, "grad_norm": 0.696742594242096, "learning_rate": 1.896043590374576e-05, "loss": 2.2255, "step": 4828 }, { "epoch": 0.16, "grad_norm": 0.7496445178985596, "learning_rate": 1.8959963964716375e-05, "loss": 2.1754, "step": 4829 }, { "epoch": 0.16, "grad_norm": 0.7089710235595703, "learning_rate": 1.8959491924462573e-05, "loss": 2.2434, "step": 4830 }, { "epoch": 0.16, "grad_norm": 0.7051609754562378, "learning_rate": 1.8959019782989682e-05, "loss": 2.2, "step": 4831 }, { "epoch": 0.16, "grad_norm": 0.7148686647415161, "learning_rate": 1.8958547540303035e-05, "loss": 2.2332, "step": 4832 }, { "epoch": 0.16, "grad_norm": 0.6751871705055237, "learning_rate": 1.895807519640797e-05, "loss": 2.3279, "step": 4833 }, { "epoch": 0.16, "grad_norm": 0.6686996817588806, "learning_rate": 1.8957602751309817e-05, "loss": 2.2654, "step": 4834 }, { "epoch": 0.16, "grad_norm": 0.6863821148872375, "learning_rate": 1.8957130205013924e-05, "loss": 2.2506, "step": 4835 }, { "epoch": 0.16, "grad_norm": 0.7030407786369324, "learning_rate": 1.895665755752562e-05, "loss": 2.2566, "step": 4836 }, { "epoch": 0.16, "grad_norm": 0.7643577456474304, "learning_rate": 1.8956184808850246e-05, "loss": 2.2525, "step": 4837 }, { "epoch": 0.16, "grad_norm": 0.6815815567970276, "learning_rate": 1.8955711958993148e-05, "loss": 2.2123, "step": 4838 }, { "epoch": 0.16, "grad_norm": 0.7061907052993774, "learning_rate": 1.8955239007959665e-05, "loss": 2.2259, "step": 4839 }, { "epoch": 0.16, "grad_norm": 0.7244454622268677, "learning_rate": 1.895476595575514e-05, "loss": 2.1651, "step": 4840 }, { "epoch": 0.16, "grad_norm": 0.6943740248680115, "learning_rate": 1.8954292802384916e-05, "loss": 2.2313, "step": 4841 }, { "epoch": 0.16, "grad_norm": 0.6884348392486572, "learning_rate": 1.895381954785434e-05, "loss": 2.2947, "step": 4842 }, { "epoch": 0.16, "grad_norm": 0.693078875541687, "learning_rate": 1.8953346192168756e-05, "loss": 2.2086, "step": 4843 }, { "epoch": 0.16, "grad_norm": 0.6925577521324158, "learning_rate": 1.8952872735333516e-05, "loss": 2.2012, "step": 4844 }, { "epoch": 0.16, "grad_norm": 0.6842532753944397, "learning_rate": 1.895239917735397e-05, "loss": 2.2253, "step": 4845 }, { "epoch": 0.16, "grad_norm": 0.6860787868499756, "learning_rate": 1.8951925518235458e-05, "loss": 2.2174, "step": 4846 }, { "epoch": 0.16, "grad_norm": 0.7333983778953552, "learning_rate": 1.8951451757983343e-05, "loss": 2.2143, "step": 4847 }, { "epoch": 0.16, "grad_norm": 0.6927983164787292, "learning_rate": 1.8950977896602968e-05, "loss": 2.271, "step": 4848 }, { "epoch": 0.16, "grad_norm": 0.6859998106956482, "learning_rate": 1.8950503934099697e-05, "loss": 2.2854, "step": 4849 }, { "epoch": 0.16, "grad_norm": 0.7233844995498657, "learning_rate": 1.895002987047887e-05, "loss": 2.2643, "step": 4850 }, { "epoch": 0.16, "grad_norm": 0.6846117377281189, "learning_rate": 1.8949555705745855e-05, "loss": 2.275, "step": 4851 }, { "epoch": 0.16, "grad_norm": 0.6728076934814453, "learning_rate": 1.8949081439906005e-05, "loss": 2.2382, "step": 4852 }, { "epoch": 0.16, "grad_norm": 0.715869665145874, "learning_rate": 1.8948607072964678e-05, "loss": 2.2466, "step": 4853 }, { "epoch": 0.16, "grad_norm": 0.6812397837638855, "learning_rate": 1.894813260492723e-05, "loss": 2.2279, "step": 4854 }, { "epoch": 0.16, "grad_norm": 0.7054184675216675, "learning_rate": 1.8947658035799024e-05, "loss": 2.2156, "step": 4855 }, { "epoch": 0.16, "grad_norm": 0.6722304224967957, "learning_rate": 1.8947183365585424e-05, "loss": 2.2488, "step": 4856 }, { "epoch": 0.16, "grad_norm": 0.6748642921447754, "learning_rate": 1.8946708594291788e-05, "loss": 2.2098, "step": 4857 }, { "epoch": 0.16, "grad_norm": 0.6665319204330444, "learning_rate": 1.8946233721923483e-05, "loss": 2.3029, "step": 4858 }, { "epoch": 0.16, "grad_norm": 0.6844080090522766, "learning_rate": 1.894575874848587e-05, "loss": 2.2601, "step": 4859 }, { "epoch": 0.16, "grad_norm": 0.7068097591400146, "learning_rate": 1.894528367398432e-05, "loss": 2.1822, "step": 4860 }, { "epoch": 0.16, "grad_norm": 0.6819680333137512, "learning_rate": 1.89448084984242e-05, "loss": 2.2628, "step": 4861 }, { "epoch": 0.16, "grad_norm": 0.7051294445991516, "learning_rate": 1.8944333221810872e-05, "loss": 2.271, "step": 4862 }, { "epoch": 0.16, "grad_norm": 0.6991629600524902, "learning_rate": 1.8943857844149707e-05, "loss": 2.3393, "step": 4863 }, { "epoch": 0.16, "grad_norm": 0.7286848425865173, "learning_rate": 1.894338236544608e-05, "loss": 2.2781, "step": 4864 }, { "epoch": 0.16, "grad_norm": 0.7007122039794922, "learning_rate": 1.894290678570536e-05, "loss": 2.181, "step": 4865 }, { "epoch": 0.16, "grad_norm": 0.7174482345581055, "learning_rate": 1.8942431104932923e-05, "loss": 2.1817, "step": 4866 }, { "epoch": 0.16, "grad_norm": 0.6742419600486755, "learning_rate": 1.8941955323134138e-05, "loss": 2.3149, "step": 4867 }, { "epoch": 0.16, "grad_norm": 0.6836929321289062, "learning_rate": 1.8941479440314385e-05, "loss": 2.2309, "step": 4868 }, { "epoch": 0.16, "grad_norm": 0.6904343366622925, "learning_rate": 1.8941003456479034e-05, "loss": 2.2103, "step": 4869 }, { "epoch": 0.16, "grad_norm": 0.6752385497093201, "learning_rate": 1.8940527371633463e-05, "loss": 2.1965, "step": 4870 }, { "epoch": 0.16, "grad_norm": 0.6568467020988464, "learning_rate": 1.8940051185783058e-05, "loss": 2.161, "step": 4871 }, { "epoch": 0.16, "grad_norm": 0.6638485193252563, "learning_rate": 1.8939574898933193e-05, "loss": 2.2665, "step": 4872 }, { "epoch": 0.16, "grad_norm": 0.6979497075080872, "learning_rate": 1.8939098511089253e-05, "loss": 2.2586, "step": 4873 }, { "epoch": 0.16, "grad_norm": 0.7235592007637024, "learning_rate": 1.8938622022256613e-05, "loss": 2.2256, "step": 4874 }, { "epoch": 0.16, "grad_norm": 0.7090440392494202, "learning_rate": 1.8938145432440658e-05, "loss": 2.2613, "step": 4875 }, { "epoch": 0.16, "grad_norm": 0.6963095664978027, "learning_rate": 1.8937668741646777e-05, "loss": 2.2741, "step": 4876 }, { "epoch": 0.16, "grad_norm": 0.6985675096511841, "learning_rate": 1.8937191949880353e-05, "loss": 2.2226, "step": 4877 }, { "epoch": 0.16, "grad_norm": 0.7113465070724487, "learning_rate": 1.8936715057146774e-05, "loss": 2.2518, "step": 4878 }, { "epoch": 0.16, "grad_norm": 0.6837702393531799, "learning_rate": 1.8936238063451422e-05, "loss": 2.2059, "step": 4879 }, { "epoch": 0.16, "grad_norm": 0.6932180523872375, "learning_rate": 1.893576096879969e-05, "loss": 2.2262, "step": 4880 }, { "epoch": 0.16, "grad_norm": 0.6973762512207031, "learning_rate": 1.8935283773196968e-05, "loss": 2.2418, "step": 4881 }, { "epoch": 0.16, "grad_norm": 0.6797553896903992, "learning_rate": 1.8934806476648648e-05, "loss": 2.2563, "step": 4882 }, { "epoch": 0.16, "grad_norm": 0.6827948093414307, "learning_rate": 1.893432907916012e-05, "loss": 2.1967, "step": 4883 }, { "epoch": 0.16, "grad_norm": 0.6905988454818726, "learning_rate": 1.8933851580736777e-05, "loss": 2.2012, "step": 4884 }, { "epoch": 0.16, "grad_norm": 0.6837722659111023, "learning_rate": 1.8933373981384014e-05, "loss": 2.2146, "step": 4885 }, { "epoch": 0.16, "grad_norm": 0.6837047934532166, "learning_rate": 1.893289628110723e-05, "loss": 2.2211, "step": 4886 }, { "epoch": 0.16, "grad_norm": 0.6742048859596252, "learning_rate": 1.8932418479911817e-05, "loss": 2.2403, "step": 4887 }, { "epoch": 0.16, "grad_norm": 0.7212556004524231, "learning_rate": 1.8931940577803173e-05, "loss": 2.2334, "step": 4888 }, { "epoch": 0.16, "grad_norm": 0.7001677751541138, "learning_rate": 1.8931462574786705e-05, "loss": 2.2099, "step": 4889 }, { "epoch": 0.16, "grad_norm": 0.6931729912757874, "learning_rate": 1.8930984470867802e-05, "loss": 2.2813, "step": 4890 }, { "epoch": 0.16, "grad_norm": 0.7120679020881653, "learning_rate": 1.893050626605187e-05, "loss": 2.2449, "step": 4891 }, { "epoch": 0.16, "grad_norm": 0.7054172158241272, "learning_rate": 1.8930027960344316e-05, "loss": 2.1826, "step": 4892 }, { "epoch": 0.16, "grad_norm": 0.758216917514801, "learning_rate": 1.8929549553750537e-05, "loss": 2.2148, "step": 4893 }, { "epoch": 0.16, "grad_norm": 0.7101566791534424, "learning_rate": 1.892907104627594e-05, "loss": 2.1885, "step": 4894 }, { "epoch": 0.16, "grad_norm": 0.6897265911102295, "learning_rate": 1.8928592437925936e-05, "loss": 2.2315, "step": 4895 }, { "epoch": 0.16, "grad_norm": 0.7192511558532715, "learning_rate": 1.8928113728705922e-05, "loss": 2.258, "step": 4896 }, { "epoch": 0.16, "grad_norm": 0.6910582780838013, "learning_rate": 1.892763491862131e-05, "loss": 2.1909, "step": 4897 }, { "epoch": 0.16, "grad_norm": 0.6786988973617554, "learning_rate": 1.8927156007677517e-05, "loss": 2.14, "step": 4898 }, { "epoch": 0.16, "grad_norm": 0.760339617729187, "learning_rate": 1.8926676995879944e-05, "loss": 2.1893, "step": 4899 }, { "epoch": 0.16, "grad_norm": 0.6589984893798828, "learning_rate": 1.8926197883234004e-05, "loss": 2.2477, "step": 4900 }, { "epoch": 0.16, "grad_norm": 0.705695629119873, "learning_rate": 1.8925718669745116e-05, "loss": 2.1911, "step": 4901 }, { "epoch": 0.16, "grad_norm": 0.6967737674713135, "learning_rate": 1.8925239355418687e-05, "loss": 2.1795, "step": 4902 }, { "epoch": 0.16, "grad_norm": 0.679197371006012, "learning_rate": 1.8924759940260134e-05, "loss": 2.187, "step": 4903 }, { "epoch": 0.16, "grad_norm": 0.698905885219574, "learning_rate": 1.8924280424274873e-05, "loss": 2.2716, "step": 4904 }, { "epoch": 0.16, "grad_norm": 0.6894533634185791, "learning_rate": 1.8923800807468323e-05, "loss": 2.2946, "step": 4905 }, { "epoch": 0.16, "grad_norm": 0.727260172367096, "learning_rate": 1.89233210898459e-05, "loss": 2.1636, "step": 4906 }, { "epoch": 0.16, "grad_norm": 0.6923341155052185, "learning_rate": 1.892284127141303e-05, "loss": 2.2301, "step": 4907 }, { "epoch": 0.16, "grad_norm": 0.7053573727607727, "learning_rate": 1.8922361352175124e-05, "loss": 2.2577, "step": 4908 }, { "epoch": 0.16, "grad_norm": 0.687829315662384, "learning_rate": 1.8921881332137608e-05, "loss": 2.2556, "step": 4909 }, { "epoch": 0.16, "grad_norm": 0.7025995850563049, "learning_rate": 1.8921401211305905e-05, "loss": 2.2183, "step": 4910 }, { "epoch": 0.16, "grad_norm": 0.7422037720680237, "learning_rate": 1.8920920989685444e-05, "loss": 2.3328, "step": 4911 }, { "epoch": 0.16, "grad_norm": 0.6935421824455261, "learning_rate": 1.8920440667281645e-05, "loss": 2.2012, "step": 4912 }, { "epoch": 0.16, "grad_norm": 0.7041882276535034, "learning_rate": 1.8919960244099932e-05, "loss": 2.2565, "step": 4913 }, { "epoch": 0.16, "grad_norm": 0.6862683892250061, "learning_rate": 1.8919479720145735e-05, "loss": 2.2158, "step": 4914 }, { "epoch": 0.16, "grad_norm": 0.6875916123390198, "learning_rate": 1.8918999095424486e-05, "loss": 2.1983, "step": 4915 }, { "epoch": 0.16, "grad_norm": 0.6591494679450989, "learning_rate": 1.891851836994161e-05, "loss": 2.1778, "step": 4916 }, { "epoch": 0.16, "grad_norm": 0.7151937484741211, "learning_rate": 1.8918037543702543e-05, "loss": 2.2424, "step": 4917 }, { "epoch": 0.16, "grad_norm": 0.6980819702148438, "learning_rate": 1.8917556616712715e-05, "loss": 2.284, "step": 4918 }, { "epoch": 0.16, "grad_norm": 0.6850806474685669, "learning_rate": 1.8917075588977557e-05, "loss": 2.2329, "step": 4919 }, { "epoch": 0.16, "grad_norm": 0.7290889620780945, "learning_rate": 1.8916594460502504e-05, "loss": 2.2209, "step": 4920 }, { "epoch": 0.16, "grad_norm": 0.7004249095916748, "learning_rate": 1.8916113231292994e-05, "loss": 2.1932, "step": 4921 }, { "epoch": 0.16, "grad_norm": 0.6988550424575806, "learning_rate": 1.891563190135446e-05, "loss": 2.2506, "step": 4922 }, { "epoch": 0.16, "grad_norm": 0.6866981387138367, "learning_rate": 1.8915150470692344e-05, "loss": 2.3055, "step": 4923 }, { "epoch": 0.16, "grad_norm": 0.6938983201980591, "learning_rate": 1.891466893931208e-05, "loss": 2.2401, "step": 4924 }, { "epoch": 0.16, "grad_norm": 0.6911777853965759, "learning_rate": 1.8914187307219115e-05, "loss": 2.2385, "step": 4925 }, { "epoch": 0.16, "grad_norm": 0.6610969305038452, "learning_rate": 1.8913705574418885e-05, "loss": 2.2253, "step": 4926 }, { "epoch": 0.16, "grad_norm": 0.6792849898338318, "learning_rate": 1.8913223740916832e-05, "loss": 2.2774, "step": 4927 }, { "epoch": 0.16, "grad_norm": 0.6893838047981262, "learning_rate": 1.89127418067184e-05, "loss": 2.2601, "step": 4928 }, { "epoch": 0.16, "grad_norm": 0.6990026831626892, "learning_rate": 1.8912259771829035e-05, "loss": 2.2078, "step": 4929 }, { "epoch": 0.16, "grad_norm": 0.6758476495742798, "learning_rate": 1.8911777636254183e-05, "loss": 2.1951, "step": 4930 }, { "epoch": 0.16, "grad_norm": 0.6893577575683594, "learning_rate": 1.891129539999929e-05, "loss": 2.2568, "step": 4931 }, { "epoch": 0.16, "grad_norm": 0.7033409476280212, "learning_rate": 1.8910813063069806e-05, "loss": 2.1711, "step": 4932 }, { "epoch": 0.16, "grad_norm": 0.6908968687057495, "learning_rate": 1.8910330625471174e-05, "loss": 2.2832, "step": 4933 }, { "epoch": 0.16, "grad_norm": 0.699252188205719, "learning_rate": 1.890984808720885e-05, "loss": 2.2938, "step": 4934 }, { "epoch": 0.16, "grad_norm": 0.665632963180542, "learning_rate": 1.8909365448288287e-05, "loss": 2.2688, "step": 4935 }, { "epoch": 0.16, "grad_norm": 0.6963040232658386, "learning_rate": 1.8908882708714932e-05, "loss": 2.2039, "step": 4936 }, { "epoch": 0.16, "grad_norm": 0.6532336473464966, "learning_rate": 1.8908399868494242e-05, "loss": 2.2121, "step": 4937 }, { "epoch": 0.16, "grad_norm": 0.7373966574668884, "learning_rate": 1.8907916927631672e-05, "loss": 2.2348, "step": 4938 }, { "epoch": 0.16, "grad_norm": 0.6986955404281616, "learning_rate": 1.8907433886132674e-05, "loss": 2.2722, "step": 4939 }, { "epoch": 0.16, "grad_norm": 0.6973358988761902, "learning_rate": 1.890695074400271e-05, "loss": 2.2467, "step": 4940 }, { "epoch": 0.16, "grad_norm": 0.7383548617362976, "learning_rate": 1.8906467501247236e-05, "loss": 2.2426, "step": 4941 }, { "epoch": 0.16, "grad_norm": 0.7176355123519897, "learning_rate": 1.8905984157871713e-05, "loss": 2.1795, "step": 4942 }, { "epoch": 0.16, "grad_norm": 0.6596736907958984, "learning_rate": 1.8905500713881598e-05, "loss": 2.2155, "step": 4943 }, { "epoch": 0.16, "grad_norm": 0.7085241079330444, "learning_rate": 1.8905017169282356e-05, "loss": 2.2032, "step": 4944 }, { "epoch": 0.16, "grad_norm": 0.6802390217781067, "learning_rate": 1.8904533524079453e-05, "loss": 2.2547, "step": 4945 }, { "epoch": 0.16, "grad_norm": 0.6536100506782532, "learning_rate": 1.8904049778278342e-05, "loss": 2.21, "step": 4946 }, { "epoch": 0.16, "grad_norm": 0.6878189444541931, "learning_rate": 1.89035659318845e-05, "loss": 2.2463, "step": 4947 }, { "epoch": 0.16, "grad_norm": 0.70465087890625, "learning_rate": 1.8903081984903385e-05, "loss": 2.2097, "step": 4948 }, { "epoch": 0.16, "grad_norm": 0.7641710638999939, "learning_rate": 1.890259793734047e-05, "loss": 2.2942, "step": 4949 }, { "epoch": 0.16, "grad_norm": 0.6710432171821594, "learning_rate": 1.890211378920122e-05, "loss": 2.2391, "step": 4950 }, { "epoch": 0.16, "grad_norm": 0.6708301305770874, "learning_rate": 1.8901629540491105e-05, "loss": 2.1809, "step": 4951 }, { "epoch": 0.16, "grad_norm": 0.6742755174636841, "learning_rate": 1.8901145191215598e-05, "loss": 2.2419, "step": 4952 }, { "epoch": 0.16, "grad_norm": 0.6981996893882751, "learning_rate": 1.8900660741380167e-05, "loss": 2.1894, "step": 4953 }, { "epoch": 0.16, "grad_norm": 0.6808575391769409, "learning_rate": 1.890017619099029e-05, "loss": 2.2263, "step": 4954 }, { "epoch": 0.16, "grad_norm": 0.683233380317688, "learning_rate": 1.8899691540051436e-05, "loss": 2.2211, "step": 4955 }, { "epoch": 0.16, "grad_norm": 0.6842895746231079, "learning_rate": 1.8899206788569083e-05, "loss": 2.2631, "step": 4956 }, { "epoch": 0.16, "grad_norm": 0.6878306865692139, "learning_rate": 1.8898721936548707e-05, "loss": 2.324, "step": 4957 }, { "epoch": 0.16, "grad_norm": 0.6985573172569275, "learning_rate": 1.8898236983995786e-05, "loss": 2.2618, "step": 4958 }, { "epoch": 0.16, "grad_norm": 0.6800970435142517, "learning_rate": 1.88977519309158e-05, "loss": 2.2108, "step": 4959 }, { "epoch": 0.17, "grad_norm": 0.7035930156707764, "learning_rate": 1.8897266777314224e-05, "loss": 2.2586, "step": 4960 }, { "epoch": 0.17, "grad_norm": 0.6819291710853577, "learning_rate": 1.8896781523196547e-05, "loss": 2.2496, "step": 4961 }, { "epoch": 0.17, "grad_norm": 0.7000607848167419, "learning_rate": 1.8896296168568243e-05, "loss": 2.253, "step": 4962 }, { "epoch": 0.17, "grad_norm": 0.7050279974937439, "learning_rate": 1.8895810713434798e-05, "loss": 2.261, "step": 4963 }, { "epoch": 0.17, "grad_norm": 0.6799076199531555, "learning_rate": 1.88953251578017e-05, "loss": 2.2571, "step": 4964 }, { "epoch": 0.17, "grad_norm": 0.6625675559043884, "learning_rate": 1.889483950167443e-05, "loss": 2.2051, "step": 4965 }, { "epoch": 0.17, "grad_norm": 0.6812843680381775, "learning_rate": 1.8894353745058476e-05, "loss": 2.1567, "step": 4966 }, { "epoch": 0.17, "grad_norm": 0.6905362010002136, "learning_rate": 1.889386788795932e-05, "loss": 2.2659, "step": 4967 }, { "epoch": 0.17, "grad_norm": 0.7396480441093445, "learning_rate": 1.889338193038247e-05, "loss": 2.2381, "step": 4968 }, { "epoch": 0.17, "grad_norm": 0.6694372296333313, "learning_rate": 1.889289587233339e-05, "loss": 2.2594, "step": 4969 }, { "epoch": 0.17, "grad_norm": 0.6820674538612366, "learning_rate": 1.8892409713817592e-05, "loss": 2.2359, "step": 4970 }, { "epoch": 0.17, "grad_norm": 0.6703478693962097, "learning_rate": 1.889192345484056e-05, "loss": 2.2191, "step": 4971 }, { "epoch": 0.17, "grad_norm": 0.6847573518753052, "learning_rate": 1.8891437095407787e-05, "loss": 2.2643, "step": 4972 }, { "epoch": 0.17, "grad_norm": 0.6730780601501465, "learning_rate": 1.8890950635524767e-05, "loss": 2.246, "step": 4973 }, { "epoch": 0.17, "grad_norm": 0.7108911275863647, "learning_rate": 1.8890464075197e-05, "loss": 2.2301, "step": 4974 }, { "epoch": 0.17, "grad_norm": 0.6786690950393677, "learning_rate": 1.8889977414429976e-05, "loss": 2.2601, "step": 4975 }, { "epoch": 0.17, "grad_norm": 0.7251121401786804, "learning_rate": 1.8889490653229202e-05, "loss": 2.2282, "step": 4976 }, { "epoch": 0.17, "grad_norm": 0.6847441792488098, "learning_rate": 1.888900379160017e-05, "loss": 2.2492, "step": 4977 }, { "epoch": 0.17, "grad_norm": 0.6911371946334839, "learning_rate": 1.8888516829548382e-05, "loss": 2.2787, "step": 4978 }, { "epoch": 0.17, "grad_norm": 0.6721246242523193, "learning_rate": 1.888802976707934e-05, "loss": 2.2541, "step": 4979 }, { "epoch": 0.17, "grad_norm": 0.6804863214492798, "learning_rate": 1.888754260419855e-05, "loss": 2.2274, "step": 4980 }, { "epoch": 0.17, "grad_norm": 0.7050630450248718, "learning_rate": 1.8887055340911508e-05, "loss": 2.2396, "step": 4981 }, { "epoch": 0.17, "grad_norm": 0.7062733769416809, "learning_rate": 1.8886567977223723e-05, "loss": 2.25, "step": 4982 }, { "epoch": 0.17, "grad_norm": 0.688262939453125, "learning_rate": 1.8886080513140705e-05, "loss": 2.2226, "step": 4983 }, { "epoch": 0.17, "grad_norm": 0.7089536190032959, "learning_rate": 1.8885592948667955e-05, "loss": 2.2186, "step": 4984 }, { "epoch": 0.17, "grad_norm": 0.6549676060676575, "learning_rate": 1.8885105283810983e-05, "loss": 2.2122, "step": 4985 }, { "epoch": 0.17, "grad_norm": 0.6723291873931885, "learning_rate": 1.88846175185753e-05, "loss": 2.2285, "step": 4986 }, { "epoch": 0.17, "grad_norm": 0.7110216021537781, "learning_rate": 1.8884129652966414e-05, "loss": 2.2326, "step": 4987 }, { "epoch": 0.17, "grad_norm": 0.724730372428894, "learning_rate": 1.8883641686989838e-05, "loss": 2.2437, "step": 4988 }, { "epoch": 0.17, "grad_norm": 0.6912781000137329, "learning_rate": 1.8883153620651084e-05, "loss": 2.3385, "step": 4989 }, { "epoch": 0.17, "grad_norm": 0.7131420969963074, "learning_rate": 1.888266545395567e-05, "loss": 2.2051, "step": 4990 }, { "epoch": 0.17, "grad_norm": 0.7040233612060547, "learning_rate": 1.88821771869091e-05, "loss": 2.2559, "step": 4991 }, { "epoch": 0.17, "grad_norm": 0.7079867720603943, "learning_rate": 1.8881688819516902e-05, "loss": 2.2228, "step": 4992 }, { "epoch": 0.17, "grad_norm": 0.7042294144630432, "learning_rate": 1.8881200351784592e-05, "loss": 2.2568, "step": 4993 }, { "epoch": 0.17, "grad_norm": 0.7099297642707825, "learning_rate": 1.8880711783717682e-05, "loss": 2.2235, "step": 4994 }, { "epoch": 0.17, "grad_norm": 0.6637887358665466, "learning_rate": 1.8880223115321695e-05, "loss": 2.1899, "step": 4995 }, { "epoch": 0.17, "grad_norm": 0.666069746017456, "learning_rate": 1.8879734346602153e-05, "loss": 2.2155, "step": 4996 }, { "epoch": 0.17, "grad_norm": 0.6963674426078796, "learning_rate": 1.8879245477564572e-05, "loss": 2.2441, "step": 4997 }, { "epoch": 0.17, "grad_norm": 0.7048020362854004, "learning_rate": 1.8878756508214482e-05, "loss": 2.1783, "step": 4998 }, { "epoch": 0.17, "grad_norm": 0.7003149390220642, "learning_rate": 1.887826743855741e-05, "loss": 2.1917, "step": 4999 }, { "epoch": 0.17, "grad_norm": 0.667918860912323, "learning_rate": 1.8877778268598868e-05, "loss": 2.1887, "step": 5000 }, { "epoch": 0.17, "grad_norm": 0.7057336568832397, "learning_rate": 1.8877288998344392e-05, "loss": 2.2062, "step": 5001 }, { "epoch": 0.17, "grad_norm": 0.6759325861930847, "learning_rate": 1.887679962779951e-05, "loss": 2.2066, "step": 5002 }, { "epoch": 0.17, "grad_norm": 0.6874157190322876, "learning_rate": 1.8876310156969745e-05, "loss": 2.2504, "step": 5003 }, { "epoch": 0.17, "grad_norm": 0.6488717794418335, "learning_rate": 1.887582058586063e-05, "loss": 2.2011, "step": 5004 }, { "epoch": 0.17, "grad_norm": 0.6775034070014954, "learning_rate": 1.8875330914477696e-05, "loss": 2.1922, "step": 5005 }, { "epoch": 0.17, "grad_norm": 0.7067666053771973, "learning_rate": 1.8874841142826475e-05, "loss": 2.2438, "step": 5006 }, { "epoch": 0.17, "grad_norm": 0.6979759931564331, "learning_rate": 1.88743512709125e-05, "loss": 2.3106, "step": 5007 }, { "epoch": 0.17, "grad_norm": 0.6776329278945923, "learning_rate": 1.8873861298741306e-05, "loss": 2.1763, "step": 5008 }, { "epoch": 0.17, "grad_norm": 0.6810246109962463, "learning_rate": 1.8873371226318427e-05, "loss": 2.1903, "step": 5009 }, { "epoch": 0.17, "grad_norm": 0.6592420935630798, "learning_rate": 1.88728810536494e-05, "loss": 2.2192, "step": 5010 }, { "epoch": 0.17, "grad_norm": 0.7082527279853821, "learning_rate": 1.8872390780739763e-05, "loss": 2.2234, "step": 5011 }, { "epoch": 0.17, "grad_norm": 0.6939054727554321, "learning_rate": 1.8871900407595058e-05, "loss": 2.1776, "step": 5012 }, { "epoch": 0.17, "grad_norm": 0.6793426871299744, "learning_rate": 1.8871409934220818e-05, "loss": 2.1277, "step": 5013 }, { "epoch": 0.17, "grad_norm": 0.6657713651657104, "learning_rate": 1.8870919360622588e-05, "loss": 2.1616, "step": 5014 }, { "epoch": 0.17, "grad_norm": 0.6900374293327332, "learning_rate": 1.887042868680591e-05, "loss": 2.286, "step": 5015 }, { "epoch": 0.17, "grad_norm": 0.7074781060218811, "learning_rate": 1.886993791277633e-05, "loss": 2.2437, "step": 5016 }, { "epoch": 0.17, "grad_norm": 0.6935451626777649, "learning_rate": 1.8869447038539387e-05, "loss": 2.2164, "step": 5017 }, { "epoch": 0.17, "grad_norm": 0.7203739881515503, "learning_rate": 1.886895606410063e-05, "loss": 2.2355, "step": 5018 }, { "epoch": 0.17, "grad_norm": 0.7050142288208008, "learning_rate": 1.8868464989465605e-05, "loss": 2.2589, "step": 5019 }, { "epoch": 0.17, "grad_norm": 0.7077071070671082, "learning_rate": 1.886797381463986e-05, "loss": 2.2414, "step": 5020 }, { "epoch": 0.17, "grad_norm": 0.6927554607391357, "learning_rate": 1.8867482539628942e-05, "loss": 2.1831, "step": 5021 }, { "epoch": 0.17, "grad_norm": 0.6928524374961853, "learning_rate": 1.8866991164438405e-05, "loss": 2.2028, "step": 5022 }, { "epoch": 0.17, "grad_norm": 0.6950771808624268, "learning_rate": 1.88664996890738e-05, "loss": 2.1945, "step": 5023 }, { "epoch": 0.17, "grad_norm": 0.7099668383598328, "learning_rate": 1.8866008113540674e-05, "loss": 2.3015, "step": 5024 }, { "epoch": 0.17, "grad_norm": 0.6755173802375793, "learning_rate": 1.8865516437844586e-05, "loss": 2.1791, "step": 5025 }, { "epoch": 0.17, "grad_norm": 0.6789655685424805, "learning_rate": 1.886502466199109e-05, "loss": 2.2237, "step": 5026 }, { "epoch": 0.17, "grad_norm": 0.7095639109611511, "learning_rate": 1.886453278598574e-05, "loss": 2.2423, "step": 5027 }, { "epoch": 0.17, "grad_norm": 0.7211741209030151, "learning_rate": 1.8864040809834093e-05, "loss": 2.1976, "step": 5028 }, { "epoch": 0.17, "grad_norm": 0.6962361931800842, "learning_rate": 1.886354873354171e-05, "loss": 2.238, "step": 5029 }, { "epoch": 0.17, "grad_norm": 0.6994633674621582, "learning_rate": 1.8863056557114148e-05, "loss": 2.2196, "step": 5030 }, { "epoch": 0.17, "grad_norm": 0.6872656941413879, "learning_rate": 1.8862564280556966e-05, "loss": 2.2344, "step": 5031 }, { "epoch": 0.17, "grad_norm": 0.7347181439399719, "learning_rate": 1.8862071903875727e-05, "loss": 2.2577, "step": 5032 }, { "epoch": 0.17, "grad_norm": 0.7118401527404785, "learning_rate": 1.8861579427075992e-05, "loss": 2.1704, "step": 5033 }, { "epoch": 0.17, "grad_norm": 0.689094603061676, "learning_rate": 1.8861086850163327e-05, "loss": 2.2785, "step": 5034 }, { "epoch": 0.17, "grad_norm": 0.7315661311149597, "learning_rate": 1.8860594173143296e-05, "loss": 2.2297, "step": 5035 }, { "epoch": 0.17, "grad_norm": 0.7047004699707031, "learning_rate": 1.8860101396021468e-05, "loss": 2.2395, "step": 5036 }, { "epoch": 0.17, "grad_norm": 0.6841112971305847, "learning_rate": 1.88596085188034e-05, "loss": 2.2449, "step": 5037 }, { "epoch": 0.17, "grad_norm": 0.7024719715118408, "learning_rate": 1.885911554149467e-05, "loss": 2.1848, "step": 5038 }, { "epoch": 0.17, "grad_norm": 0.7408850789070129, "learning_rate": 1.8858622464100847e-05, "loss": 2.2881, "step": 5039 }, { "epoch": 0.17, "grad_norm": 0.6529345512390137, "learning_rate": 1.8858129286627497e-05, "loss": 2.1309, "step": 5040 }, { "epoch": 0.17, "grad_norm": 0.7167838215827942, "learning_rate": 1.8857636009080192e-05, "loss": 2.2678, "step": 5041 }, { "epoch": 0.17, "grad_norm": 0.7037912607192993, "learning_rate": 1.885714263146451e-05, "loss": 2.2042, "step": 5042 }, { "epoch": 0.17, "grad_norm": 0.7449973225593567, "learning_rate": 1.885664915378602e-05, "loss": 2.2383, "step": 5043 }, { "epoch": 0.17, "grad_norm": 0.7064663171768188, "learning_rate": 1.88561555760503e-05, "loss": 2.2093, "step": 5044 }, { "epoch": 0.17, "grad_norm": 0.7145691514015198, "learning_rate": 1.8855661898262926e-05, "loss": 2.2091, "step": 5045 }, { "epoch": 0.17, "grad_norm": 0.6990336179733276, "learning_rate": 1.885516812042947e-05, "loss": 2.2251, "step": 5046 }, { "epoch": 0.17, "grad_norm": 0.6995840668678284, "learning_rate": 1.8854674242555514e-05, "loss": 2.1769, "step": 5047 }, { "epoch": 0.17, "grad_norm": 0.6863860487937927, "learning_rate": 1.8854180264646637e-05, "loss": 2.2144, "step": 5048 }, { "epoch": 0.17, "grad_norm": 0.7096532583236694, "learning_rate": 1.885368618670842e-05, "loss": 2.2029, "step": 5049 }, { "epoch": 0.17, "grad_norm": 0.7184292674064636, "learning_rate": 1.885319200874645e-05, "loss": 2.2367, "step": 5050 }, { "epoch": 0.17, "grad_norm": 0.6989868879318237, "learning_rate": 1.8852697730766303e-05, "loss": 2.2247, "step": 5051 }, { "epoch": 0.17, "grad_norm": 0.7400781512260437, "learning_rate": 1.8852203352773566e-05, "loss": 2.2633, "step": 5052 }, { "epoch": 0.17, "grad_norm": 0.6930588483810425, "learning_rate": 1.8851708874773818e-05, "loss": 2.2579, "step": 5053 }, { "epoch": 0.17, "grad_norm": 0.7328749299049377, "learning_rate": 1.885121429677266e-05, "loss": 2.31, "step": 5054 }, { "epoch": 0.17, "grad_norm": 0.7348461151123047, "learning_rate": 1.885071961877566e-05, "loss": 2.252, "step": 5055 }, { "epoch": 0.17, "grad_norm": 0.7306812405586243, "learning_rate": 1.8850224840788424e-05, "loss": 2.2259, "step": 5056 }, { "epoch": 0.17, "grad_norm": 0.7326768636703491, "learning_rate": 1.8849729962816533e-05, "loss": 2.2677, "step": 5057 }, { "epoch": 0.17, "grad_norm": 0.7767086625099182, "learning_rate": 1.884923498486558e-05, "loss": 2.2038, "step": 5058 }, { "epoch": 0.17, "grad_norm": 0.707169234752655, "learning_rate": 1.8848739906941153e-05, "loss": 2.2909, "step": 5059 }, { "epoch": 0.17, "grad_norm": 0.669387936592102, "learning_rate": 1.884824472904885e-05, "loss": 2.1259, "step": 5060 }, { "epoch": 0.17, "grad_norm": 0.688892662525177, "learning_rate": 1.8847749451194264e-05, "loss": 2.191, "step": 5061 }, { "epoch": 0.17, "grad_norm": 0.7167370319366455, "learning_rate": 1.8847254073382993e-05, "loss": 2.2376, "step": 5062 }, { "epoch": 0.17, "grad_norm": 0.6911334991455078, "learning_rate": 1.8846758595620628e-05, "loss": 2.2385, "step": 5063 }, { "epoch": 0.17, "grad_norm": 0.7398827075958252, "learning_rate": 1.8846263017912766e-05, "loss": 2.2651, "step": 5064 }, { "epoch": 0.17, "grad_norm": 0.7097740769386292, "learning_rate": 1.8845767340265012e-05, "loss": 2.1988, "step": 5065 }, { "epoch": 0.17, "grad_norm": 0.7139015197753906, "learning_rate": 1.884527156268296e-05, "loss": 2.2769, "step": 5066 }, { "epoch": 0.17, "grad_norm": 0.71262127161026, "learning_rate": 1.884477568517222e-05, "loss": 2.2398, "step": 5067 }, { "epoch": 0.17, "grad_norm": 0.6858075261116028, "learning_rate": 1.8844279707738384e-05, "loss": 2.2017, "step": 5068 }, { "epoch": 0.17, "grad_norm": 0.7736190557479858, "learning_rate": 1.8843783630387057e-05, "loss": 2.1817, "step": 5069 }, { "epoch": 0.17, "grad_norm": 0.7048096060752869, "learning_rate": 1.8843287453123847e-05, "loss": 2.1457, "step": 5070 }, { "epoch": 0.17, "grad_norm": 0.7217193245887756, "learning_rate": 1.8842791175954358e-05, "loss": 2.2319, "step": 5071 }, { "epoch": 0.17, "grad_norm": 0.6749775409698486, "learning_rate": 1.8842294798884197e-05, "loss": 2.1967, "step": 5072 }, { "epoch": 0.17, "grad_norm": 0.7348529100418091, "learning_rate": 1.8841798321918972e-05, "loss": 2.164, "step": 5073 }, { "epoch": 0.17, "grad_norm": 0.7659627199172974, "learning_rate": 1.884130174506429e-05, "loss": 2.2006, "step": 5074 }, { "epoch": 0.17, "grad_norm": 0.756077229976654, "learning_rate": 1.8840805068325765e-05, "loss": 2.2635, "step": 5075 }, { "epoch": 0.17, "grad_norm": 0.6964266300201416, "learning_rate": 1.8840308291709e-05, "loss": 2.1826, "step": 5076 }, { "epoch": 0.17, "grad_norm": 0.6656002998352051, "learning_rate": 1.883981141521962e-05, "loss": 2.2388, "step": 5077 }, { "epoch": 0.17, "grad_norm": 0.7073518633842468, "learning_rate": 1.883931443886323e-05, "loss": 2.2141, "step": 5078 }, { "epoch": 0.17, "grad_norm": 0.7134633660316467, "learning_rate": 1.8838817362645443e-05, "loss": 2.3162, "step": 5079 }, { "epoch": 0.17, "grad_norm": 0.7177674770355225, "learning_rate": 1.883832018657188e-05, "loss": 2.243, "step": 5080 }, { "epoch": 0.17, "grad_norm": 0.7090542912483215, "learning_rate": 1.8837822910648152e-05, "loss": 2.2455, "step": 5081 }, { "epoch": 0.17, "grad_norm": 0.672967791557312, "learning_rate": 1.883732553487988e-05, "loss": 2.2182, "step": 5082 }, { "epoch": 0.17, "grad_norm": 0.6998160481452942, "learning_rate": 1.8836828059272685e-05, "loss": 2.2016, "step": 5083 }, { "epoch": 0.17, "grad_norm": 0.6857835650444031, "learning_rate": 1.8836330483832185e-05, "loss": 2.2118, "step": 5084 }, { "epoch": 0.17, "grad_norm": 0.6978532671928406, "learning_rate": 1.8835832808564002e-05, "loss": 2.2585, "step": 5085 }, { "epoch": 0.17, "grad_norm": 0.7195155024528503, "learning_rate": 1.883533503347376e-05, "loss": 2.2588, "step": 5086 }, { "epoch": 0.17, "grad_norm": 0.7492415904998779, "learning_rate": 1.8834837158567078e-05, "loss": 2.2017, "step": 5087 }, { "epoch": 0.17, "grad_norm": 0.7003429532051086, "learning_rate": 1.8834339183849586e-05, "loss": 2.2221, "step": 5088 }, { "epoch": 0.17, "grad_norm": 0.7068492770195007, "learning_rate": 1.8833841109326906e-05, "loss": 2.2284, "step": 5089 }, { "epoch": 0.17, "grad_norm": 0.7309505343437195, "learning_rate": 1.8833342935004667e-05, "loss": 2.1664, "step": 5090 }, { "epoch": 0.17, "grad_norm": 0.6891161203384399, "learning_rate": 1.8832844660888496e-05, "loss": 2.2346, "step": 5091 }, { "epoch": 0.17, "grad_norm": 0.6830641627311707, "learning_rate": 1.883234628698402e-05, "loss": 2.1392, "step": 5092 }, { "epoch": 0.17, "grad_norm": 0.692571222782135, "learning_rate": 1.883184781329688e-05, "loss": 2.1848, "step": 5093 }, { "epoch": 0.17, "grad_norm": 0.7061823606491089, "learning_rate": 1.883134923983269e-05, "loss": 2.2209, "step": 5094 }, { "epoch": 0.17, "grad_norm": 0.6857052445411682, "learning_rate": 1.8830850566597096e-05, "loss": 2.1984, "step": 5095 }, { "epoch": 0.17, "grad_norm": 0.7192384004592896, "learning_rate": 1.8830351793595727e-05, "loss": 2.2937, "step": 5096 }, { "epoch": 0.17, "grad_norm": 0.6932501792907715, "learning_rate": 1.8829852920834223e-05, "loss": 2.2218, "step": 5097 }, { "epoch": 0.17, "grad_norm": 0.7089884877204895, "learning_rate": 1.882935394831821e-05, "loss": 2.2552, "step": 5098 }, { "epoch": 0.17, "grad_norm": 0.6818573474884033, "learning_rate": 1.8828854876053332e-05, "loss": 2.1774, "step": 5099 }, { "epoch": 0.17, "grad_norm": 0.7147440314292908, "learning_rate": 1.8828355704045225e-05, "loss": 2.1969, "step": 5100 }, { "epoch": 0.17, "grad_norm": 0.7260103225708008, "learning_rate": 1.882785643229953e-05, "loss": 2.2024, "step": 5101 }, { "epoch": 0.17, "grad_norm": 0.7020487189292908, "learning_rate": 1.8827357060821886e-05, "loss": 2.1687, "step": 5102 }, { "epoch": 0.17, "grad_norm": 0.708466649055481, "learning_rate": 1.8826857589617934e-05, "loss": 2.2057, "step": 5103 }, { "epoch": 0.17, "grad_norm": 0.6802229881286621, "learning_rate": 1.8826358018693324e-05, "loss": 2.0251, "step": 5104 }, { "epoch": 0.17, "grad_norm": 0.689306378364563, "learning_rate": 1.8825858348053686e-05, "loss": 2.2905, "step": 5105 }, { "epoch": 0.17, "grad_norm": 0.6987339854240417, "learning_rate": 1.882535857770468e-05, "loss": 2.2342, "step": 5106 }, { "epoch": 0.17, "grad_norm": 0.7029455304145813, "learning_rate": 1.882485870765194e-05, "loss": 2.2232, "step": 5107 }, { "epoch": 0.17, "grad_norm": 0.6918255686759949, "learning_rate": 1.882435873790112e-05, "loss": 2.2525, "step": 5108 }, { "epoch": 0.17, "grad_norm": 0.6963185667991638, "learning_rate": 1.8823858668457866e-05, "loss": 2.2526, "step": 5109 }, { "epoch": 0.17, "grad_norm": 0.6784640550613403, "learning_rate": 1.882335849932783e-05, "loss": 2.1899, "step": 5110 }, { "epoch": 0.17, "grad_norm": 0.6808115243911743, "learning_rate": 1.882285823051666e-05, "loss": 2.2027, "step": 5111 }, { "epoch": 0.17, "grad_norm": 0.7175215482711792, "learning_rate": 1.8822357862030008e-05, "loss": 2.191, "step": 5112 }, { "epoch": 0.17, "grad_norm": 0.7055568695068359, "learning_rate": 1.8821857393873525e-05, "loss": 2.2004, "step": 5113 }, { "epoch": 0.17, "grad_norm": 0.6805019378662109, "learning_rate": 1.882135682605287e-05, "loss": 2.2352, "step": 5114 }, { "epoch": 0.17, "grad_norm": 0.7093477249145508, "learning_rate": 1.8820856158573693e-05, "loss": 2.2168, "step": 5115 }, { "epoch": 0.17, "grad_norm": 0.7215244174003601, "learning_rate": 1.8820355391441657e-05, "loss": 2.224, "step": 5116 }, { "epoch": 0.17, "grad_norm": 0.7366530895233154, "learning_rate": 1.8819854524662413e-05, "loss": 2.2683, "step": 5117 }, { "epoch": 0.17, "grad_norm": 0.6769137382507324, "learning_rate": 1.8819353558241617e-05, "loss": 2.2585, "step": 5118 }, { "epoch": 0.17, "grad_norm": 0.6813618540763855, "learning_rate": 1.881885249218494e-05, "loss": 2.1645, "step": 5119 }, { "epoch": 0.17, "grad_norm": 0.6905884146690369, "learning_rate": 1.881835132649803e-05, "loss": 2.2181, "step": 5120 }, { "epoch": 0.17, "grad_norm": 0.7158372402191162, "learning_rate": 1.8817850061186558e-05, "loss": 2.2497, "step": 5121 }, { "epoch": 0.17, "grad_norm": 0.9966598749160767, "learning_rate": 1.8817348696256185e-05, "loss": 2.2706, "step": 5122 }, { "epoch": 0.17, "grad_norm": 0.6946061253547668, "learning_rate": 1.8816847231712572e-05, "loss": 2.2492, "step": 5123 }, { "epoch": 0.17, "grad_norm": 0.7460147142410278, "learning_rate": 1.8816345667561385e-05, "loss": 2.3094, "step": 5124 }, { "epoch": 0.17, "grad_norm": 0.7010629177093506, "learning_rate": 1.881584400380829e-05, "loss": 2.204, "step": 5125 }, { "epoch": 0.17, "grad_norm": 0.6772447824478149, "learning_rate": 1.8815342240458963e-05, "loss": 2.2181, "step": 5126 }, { "epoch": 0.17, "grad_norm": 0.728985607624054, "learning_rate": 1.8814840377519062e-05, "loss": 2.2205, "step": 5127 }, { "epoch": 0.17, "grad_norm": 0.6857736706733704, "learning_rate": 1.8814338414994256e-05, "loss": 2.2202, "step": 5128 }, { "epoch": 0.17, "grad_norm": 0.6764331459999084, "learning_rate": 1.8813836352890227e-05, "loss": 2.1848, "step": 5129 }, { "epoch": 0.17, "grad_norm": 0.6858397126197815, "learning_rate": 1.8813334191212637e-05, "loss": 2.2129, "step": 5130 }, { "epoch": 0.17, "grad_norm": 0.7191885113716125, "learning_rate": 1.8812831929967165e-05, "loss": 2.1414, "step": 5131 }, { "epoch": 0.17, "grad_norm": 0.6899157762527466, "learning_rate": 1.881232956915948e-05, "loss": 2.1246, "step": 5132 }, { "epoch": 0.17, "grad_norm": 0.6869111657142639, "learning_rate": 1.881182710879526e-05, "loss": 2.2016, "step": 5133 }, { "epoch": 0.17, "grad_norm": 0.6878247857093811, "learning_rate": 1.8811324548880182e-05, "loss": 2.2824, "step": 5134 }, { "epoch": 0.17, "grad_norm": 0.6816338300704956, "learning_rate": 1.8810821889419926e-05, "loss": 2.221, "step": 5135 }, { "epoch": 0.17, "grad_norm": 0.7088598012924194, "learning_rate": 1.8810319130420164e-05, "loss": 2.2069, "step": 5136 }, { "epoch": 0.17, "grad_norm": 0.7191385626792908, "learning_rate": 1.880981627188658e-05, "loss": 2.2419, "step": 5137 }, { "epoch": 0.17, "grad_norm": 0.7018817067146301, "learning_rate": 1.880931331382486e-05, "loss": 2.1571, "step": 5138 }, { "epoch": 0.17, "grad_norm": 0.6681764721870422, "learning_rate": 1.8808810256240676e-05, "loss": 2.1501, "step": 5139 }, { "epoch": 0.17, "grad_norm": 0.7004812359809875, "learning_rate": 1.8808307099139714e-05, "loss": 2.1977, "step": 5140 }, { "epoch": 0.17, "grad_norm": 0.72236567735672, "learning_rate": 1.8807803842527665e-05, "loss": 2.2801, "step": 5141 }, { "epoch": 0.17, "grad_norm": 0.6962788105010986, "learning_rate": 1.880730048641021e-05, "loss": 2.2461, "step": 5142 }, { "epoch": 0.17, "grad_norm": 0.7311447858810425, "learning_rate": 1.8806797030793035e-05, "loss": 2.2518, "step": 5143 }, { "epoch": 0.17, "grad_norm": 0.6980361342430115, "learning_rate": 1.880629347568183e-05, "loss": 2.2762, "step": 5144 }, { "epoch": 0.17, "grad_norm": 0.7014514803886414, "learning_rate": 1.8805789821082276e-05, "loss": 2.2332, "step": 5145 }, { "epoch": 0.17, "grad_norm": 0.7239410281181335, "learning_rate": 1.8805286067000075e-05, "loss": 2.1908, "step": 5146 }, { "epoch": 0.17, "grad_norm": 0.694811224937439, "learning_rate": 1.880478221344091e-05, "loss": 2.2238, "step": 5147 }, { "epoch": 0.17, "grad_norm": 0.6995794773101807, "learning_rate": 1.8804278260410476e-05, "loss": 2.1678, "step": 5148 }, { "epoch": 0.17, "grad_norm": 0.6875421404838562, "learning_rate": 1.880377420791447e-05, "loss": 2.2063, "step": 5149 }, { "epoch": 0.17, "grad_norm": 0.6913409233093262, "learning_rate": 1.880327005595858e-05, "loss": 2.2812, "step": 5150 }, { "epoch": 0.17, "grad_norm": 0.6921247243881226, "learning_rate": 1.8802765804548502e-05, "loss": 2.196, "step": 5151 }, { "epoch": 0.17, "grad_norm": 0.7294057607650757, "learning_rate": 1.8802261453689933e-05, "loss": 2.1911, "step": 5152 }, { "epoch": 0.17, "grad_norm": 0.714398205280304, "learning_rate": 1.8801757003388578e-05, "loss": 2.185, "step": 5153 }, { "epoch": 0.17, "grad_norm": 0.6870704889297485, "learning_rate": 1.880125245365013e-05, "loss": 2.2089, "step": 5154 }, { "epoch": 0.17, "grad_norm": 0.7094072699546814, "learning_rate": 1.8800747804480285e-05, "loss": 2.1477, "step": 5155 }, { "epoch": 0.17, "grad_norm": 0.7408642768859863, "learning_rate": 1.8800243055884755e-05, "loss": 2.1855, "step": 5156 }, { "epoch": 0.17, "grad_norm": 0.6755397319793701, "learning_rate": 1.8799738207869237e-05, "loss": 2.138, "step": 5157 }, { "epoch": 0.17, "grad_norm": 0.7241804599761963, "learning_rate": 1.8799233260439427e-05, "loss": 2.2652, "step": 5158 }, { "epoch": 0.17, "grad_norm": 0.7762075662612915, "learning_rate": 1.8798728213601042e-05, "loss": 2.1924, "step": 5159 }, { "epoch": 0.17, "grad_norm": 0.7232881188392639, "learning_rate": 1.879822306735978e-05, "loss": 2.1984, "step": 5160 }, { "epoch": 0.17, "grad_norm": 0.7912119030952454, "learning_rate": 1.879771782172135e-05, "loss": 2.1972, "step": 5161 }, { "epoch": 0.17, "grad_norm": 0.6885355114936829, "learning_rate": 1.8797212476691464e-05, "loss": 2.1617, "step": 5162 }, { "epoch": 0.17, "grad_norm": 0.7278901934623718, "learning_rate": 1.879670703227582e-05, "loss": 2.2085, "step": 5163 }, { "epoch": 0.17, "grad_norm": 0.6827148199081421, "learning_rate": 1.879620148848014e-05, "loss": 2.2332, "step": 5164 }, { "epoch": 0.17, "grad_norm": 0.7028045654296875, "learning_rate": 1.879569584531013e-05, "loss": 2.1916, "step": 5165 }, { "epoch": 0.17, "grad_norm": 0.7079675793647766, "learning_rate": 1.8795190102771502e-05, "loss": 2.1834, "step": 5166 }, { "epoch": 0.17, "grad_norm": 0.7118650674819946, "learning_rate": 1.879468426086997e-05, "loss": 2.2151, "step": 5167 }, { "epoch": 0.17, "grad_norm": 0.7239086031913757, "learning_rate": 1.8794178319611254e-05, "loss": 2.1602, "step": 5168 }, { "epoch": 0.17, "grad_norm": 0.6722283363342285, "learning_rate": 1.879367227900106e-05, "loss": 2.155, "step": 5169 }, { "epoch": 0.17, "grad_norm": 0.6998399496078491, "learning_rate": 1.8793166139045112e-05, "loss": 2.1961, "step": 5170 }, { "epoch": 0.17, "grad_norm": 0.6859027147293091, "learning_rate": 1.879265989974913e-05, "loss": 2.2233, "step": 5171 }, { "epoch": 0.17, "grad_norm": 0.6962826251983643, "learning_rate": 1.8792153561118823e-05, "loss": 2.1912, "step": 5172 }, { "epoch": 0.17, "grad_norm": 0.7125821709632874, "learning_rate": 1.8791647123159922e-05, "loss": 2.2599, "step": 5173 }, { "epoch": 0.17, "grad_norm": 0.6984997391700745, "learning_rate": 1.8791140585878144e-05, "loss": 2.2752, "step": 5174 }, { "epoch": 0.17, "grad_norm": 0.7091352343559265, "learning_rate": 1.8790633949279212e-05, "loss": 2.1558, "step": 5175 }, { "epoch": 0.17, "grad_norm": 0.6993475556373596, "learning_rate": 1.879012721336885e-05, "loss": 2.2107, "step": 5176 }, { "epoch": 0.17, "grad_norm": 0.7126711010932922, "learning_rate": 1.878962037815278e-05, "loss": 2.2304, "step": 5177 }, { "epoch": 0.17, "grad_norm": 0.7008918523788452, "learning_rate": 1.878911344363673e-05, "loss": 2.1511, "step": 5178 }, { "epoch": 0.17, "grad_norm": 0.687052309513092, "learning_rate": 1.8788606409826427e-05, "loss": 2.2292, "step": 5179 }, { "epoch": 0.17, "grad_norm": 0.7070609331130981, "learning_rate": 1.87880992767276e-05, "loss": 2.2167, "step": 5180 }, { "epoch": 0.17, "grad_norm": 0.6896861791610718, "learning_rate": 1.878759204434598e-05, "loss": 2.217, "step": 5181 }, { "epoch": 0.17, "grad_norm": 0.68677818775177, "learning_rate": 1.8787084712687292e-05, "loss": 2.2229, "step": 5182 }, { "epoch": 0.17, "grad_norm": 0.6798427700996399, "learning_rate": 1.878657728175727e-05, "loss": 2.2, "step": 5183 }, { "epoch": 0.17, "grad_norm": 0.7029697299003601, "learning_rate": 1.878606975156165e-05, "loss": 2.2202, "step": 5184 }, { "epoch": 0.17, "grad_norm": 0.6784753203392029, "learning_rate": 1.8785562122106164e-05, "loss": 2.1838, "step": 5185 }, { "epoch": 0.17, "grad_norm": 0.7120959758758545, "learning_rate": 1.8785054393396543e-05, "loss": 2.2425, "step": 5186 }, { "epoch": 0.17, "grad_norm": 0.7057787179946899, "learning_rate": 1.878454656543853e-05, "loss": 2.2116, "step": 5187 }, { "epoch": 0.17, "grad_norm": 0.6878623366355896, "learning_rate": 1.878403863823785e-05, "loss": 2.1995, "step": 5188 }, { "epoch": 0.17, "grad_norm": 0.6719970107078552, "learning_rate": 1.878353061180026e-05, "loss": 2.1639, "step": 5189 }, { "epoch": 0.17, "grad_norm": 0.7024589776992798, "learning_rate": 1.878302248613148e-05, "loss": 2.2103, "step": 5190 }, { "epoch": 0.17, "grad_norm": 0.7108834385871887, "learning_rate": 1.8782514261237263e-05, "loss": 2.1811, "step": 5191 }, { "epoch": 0.17, "grad_norm": 0.7629290819168091, "learning_rate": 1.878200593712335e-05, "loss": 2.2176, "step": 5192 }, { "epoch": 0.17, "grad_norm": 0.6590706706047058, "learning_rate": 1.8781497513795476e-05, "loss": 2.1312, "step": 5193 }, { "epoch": 0.17, "grad_norm": 0.6679494976997375, "learning_rate": 1.878098899125939e-05, "loss": 2.1923, "step": 5194 }, { "epoch": 0.17, "grad_norm": 0.6982284188270569, "learning_rate": 1.878048036952084e-05, "loss": 2.2231, "step": 5195 }, { "epoch": 0.17, "grad_norm": 0.6949024796485901, "learning_rate": 1.8779971648585566e-05, "loss": 2.2107, "step": 5196 }, { "epoch": 0.17, "grad_norm": 0.7009048461914062, "learning_rate": 1.877946282845932e-05, "loss": 2.2243, "step": 5197 }, { "epoch": 0.17, "grad_norm": 0.7282629609107971, "learning_rate": 1.8778953909147844e-05, "loss": 2.2313, "step": 5198 }, { "epoch": 0.17, "grad_norm": 0.7124980092048645, "learning_rate": 1.8778444890656896e-05, "loss": 2.2477, "step": 5199 }, { "epoch": 0.17, "grad_norm": 0.7047486305236816, "learning_rate": 1.877793577299222e-05, "loss": 2.1911, "step": 5200 }, { "epoch": 0.17, "grad_norm": 0.7133715748786926, "learning_rate": 1.877742655615957e-05, "loss": 2.167, "step": 5201 }, { "epoch": 0.17, "grad_norm": 0.6848175525665283, "learning_rate": 1.87769172401647e-05, "loss": 2.2421, "step": 5202 }, { "epoch": 0.17, "grad_norm": 0.6807448863983154, "learning_rate": 1.877640782501336e-05, "loss": 2.2311, "step": 5203 }, { "epoch": 0.17, "grad_norm": 0.7021402716636658, "learning_rate": 1.877589831071131e-05, "loss": 2.2491, "step": 5204 }, { "epoch": 0.17, "grad_norm": 0.6985737681388855, "learning_rate": 1.8775388697264305e-05, "loss": 2.2056, "step": 5205 }, { "epoch": 0.17, "grad_norm": 0.6904672980308533, "learning_rate": 1.87748789846781e-05, "loss": 2.2153, "step": 5206 }, { "epoch": 0.17, "grad_norm": 0.7181807160377502, "learning_rate": 1.8774369172958456e-05, "loss": 2.2447, "step": 5207 }, { "epoch": 0.17, "grad_norm": 0.6959670782089233, "learning_rate": 1.877385926211113e-05, "loss": 2.1482, "step": 5208 }, { "epoch": 0.17, "grad_norm": 0.7081802487373352, "learning_rate": 1.8773349252141884e-05, "loss": 2.2023, "step": 5209 }, { "epoch": 0.17, "grad_norm": 0.6991066932678223, "learning_rate": 1.877283914305648e-05, "loss": 2.2326, "step": 5210 }, { "epoch": 0.17, "grad_norm": 0.6997536420822144, "learning_rate": 1.8772328934860682e-05, "loss": 2.1842, "step": 5211 }, { "epoch": 0.17, "grad_norm": 0.7279016375541687, "learning_rate": 1.877181862756025e-05, "loss": 2.25, "step": 5212 }, { "epoch": 0.17, "grad_norm": 0.6934177875518799, "learning_rate": 1.8771308221160956e-05, "loss": 2.2361, "step": 5213 }, { "epoch": 0.17, "grad_norm": 0.718974232673645, "learning_rate": 1.877079771566856e-05, "loss": 2.1879, "step": 5214 }, { "epoch": 0.17, "grad_norm": 0.7184673547744751, "learning_rate": 1.8770287111088832e-05, "loss": 2.1693, "step": 5215 }, { "epoch": 0.17, "grad_norm": 0.6779240369796753, "learning_rate": 1.876977640742754e-05, "loss": 2.2711, "step": 5216 }, { "epoch": 0.17, "grad_norm": 0.6954242587089539, "learning_rate": 1.8769265604690456e-05, "loss": 2.2146, "step": 5217 }, { "epoch": 0.17, "grad_norm": 0.7047503590583801, "learning_rate": 1.8768754702883342e-05, "loss": 2.1805, "step": 5218 }, { "epoch": 0.17, "grad_norm": 0.7255203127861023, "learning_rate": 1.8768243702011984e-05, "loss": 2.242, "step": 5219 }, { "epoch": 0.17, "grad_norm": 0.6902960538864136, "learning_rate": 1.8767732602082143e-05, "loss": 2.1662, "step": 5220 }, { "epoch": 0.17, "grad_norm": 0.6786679625511169, "learning_rate": 1.8767221403099598e-05, "loss": 2.1941, "step": 5221 }, { "epoch": 0.17, "grad_norm": 0.6963168382644653, "learning_rate": 1.8766710105070122e-05, "loss": 2.1896, "step": 5222 }, { "epoch": 0.17, "grad_norm": 0.6761866807937622, "learning_rate": 1.8766198707999497e-05, "loss": 2.1576, "step": 5223 }, { "epoch": 0.17, "grad_norm": 0.688229501247406, "learning_rate": 1.8765687211893494e-05, "loss": 2.2233, "step": 5224 }, { "epoch": 0.17, "grad_norm": 0.7287259697914124, "learning_rate": 1.8765175616757892e-05, "loss": 2.288, "step": 5225 }, { "epoch": 0.17, "grad_norm": 0.7473742961883545, "learning_rate": 1.876466392259848e-05, "loss": 2.2698, "step": 5226 }, { "epoch": 0.17, "grad_norm": 0.7048467397689819, "learning_rate": 1.8764152129421025e-05, "loss": 2.1749, "step": 5227 }, { "epoch": 0.17, "grad_norm": 0.7084662318229675, "learning_rate": 1.8763640237231317e-05, "loss": 2.1858, "step": 5228 }, { "epoch": 0.17, "grad_norm": 0.6919201612472534, "learning_rate": 1.876312824603514e-05, "loss": 2.2708, "step": 5229 }, { "epoch": 0.17, "grad_norm": 0.689358651638031, "learning_rate": 1.8762616155838273e-05, "loss": 2.2076, "step": 5230 }, { "epoch": 0.17, "grad_norm": 1.0632957220077515, "learning_rate": 1.8762103966646504e-05, "loss": 2.2484, "step": 5231 }, { "epoch": 0.17, "grad_norm": 0.7174069881439209, "learning_rate": 1.876159167846562e-05, "loss": 2.2538, "step": 5232 }, { "epoch": 0.17, "grad_norm": 0.7072395086288452, "learning_rate": 1.8761079291301412e-05, "loss": 2.2607, "step": 5233 }, { "epoch": 0.17, "grad_norm": 0.726084291934967, "learning_rate": 1.8760566805159658e-05, "loss": 2.2206, "step": 5234 }, { "epoch": 0.17, "grad_norm": 0.7034367322921753, "learning_rate": 1.876005422004616e-05, "loss": 2.2611, "step": 5235 }, { "epoch": 0.17, "grad_norm": 0.6781550049781799, "learning_rate": 1.87595415359667e-05, "loss": 2.1555, "step": 5236 }, { "epoch": 0.17, "grad_norm": 0.7205064296722412, "learning_rate": 1.8759028752927073e-05, "loss": 2.3049, "step": 5237 }, { "epoch": 0.17, "grad_norm": 0.6687968373298645, "learning_rate": 1.8758515870933074e-05, "loss": 2.1595, "step": 5238 }, { "epoch": 0.17, "grad_norm": 0.7130811810493469, "learning_rate": 1.8758002889990495e-05, "loss": 2.2346, "step": 5239 }, { "epoch": 0.17, "grad_norm": 0.7240042686462402, "learning_rate": 1.875748981010513e-05, "loss": 2.1687, "step": 5240 }, { "epoch": 0.17, "grad_norm": 0.6798577308654785, "learning_rate": 1.8756976631282784e-05, "loss": 2.1766, "step": 5241 }, { "epoch": 0.17, "grad_norm": 0.6812129020690918, "learning_rate": 1.8756463353529243e-05, "loss": 2.223, "step": 5242 }, { "epoch": 0.17, "grad_norm": 0.7317559719085693, "learning_rate": 1.8755949976850313e-05, "loss": 2.2232, "step": 5243 }, { "epoch": 0.17, "grad_norm": 0.7212831377983093, "learning_rate": 1.875543650125179e-05, "loss": 2.2592, "step": 5244 }, { "epoch": 0.17, "grad_norm": 0.7260909080505371, "learning_rate": 1.875492292673948e-05, "loss": 2.2108, "step": 5245 }, { "epoch": 0.17, "grad_norm": 0.6917257905006409, "learning_rate": 1.8754409253319175e-05, "loss": 2.2471, "step": 5246 }, { "epoch": 0.17, "grad_norm": 0.6876272559165955, "learning_rate": 1.8753895480996688e-05, "loss": 2.2357, "step": 5247 }, { "epoch": 0.17, "grad_norm": 0.6912677884101868, "learning_rate": 1.875338160977782e-05, "loss": 2.2529, "step": 5248 }, { "epoch": 0.17, "grad_norm": 0.7301540970802307, "learning_rate": 1.875286763966838e-05, "loss": 2.2543, "step": 5249 }, { "epoch": 0.17, "grad_norm": 0.7249478697776794, "learning_rate": 1.8752353570674166e-05, "loss": 2.2306, "step": 5250 }, { "epoch": 0.17, "grad_norm": 0.7140246629714966, "learning_rate": 1.8751839402800994e-05, "loss": 2.1985, "step": 5251 }, { "epoch": 0.17, "grad_norm": 0.7103005647659302, "learning_rate": 1.875132513605467e-05, "loss": 2.267, "step": 5252 }, { "epoch": 0.17, "grad_norm": 0.7145276069641113, "learning_rate": 1.8750810770441e-05, "loss": 2.2387, "step": 5253 }, { "epoch": 0.17, "grad_norm": 0.7124696373939514, "learning_rate": 1.8750296305965802e-05, "loss": 2.2413, "step": 5254 }, { "epoch": 0.17, "grad_norm": 0.6838007569313049, "learning_rate": 1.8749781742634882e-05, "loss": 2.168, "step": 5255 }, { "epoch": 0.17, "grad_norm": 0.7054868340492249, "learning_rate": 1.8749267080454056e-05, "loss": 2.2863, "step": 5256 }, { "epoch": 0.17, "grad_norm": 0.7296440601348877, "learning_rate": 1.874875231942914e-05, "loss": 2.1533, "step": 5257 }, { "epoch": 0.17, "grad_norm": 0.6929095387458801, "learning_rate": 1.8748237459565944e-05, "loss": 2.2304, "step": 5258 }, { "epoch": 0.17, "grad_norm": 0.7003834247589111, "learning_rate": 1.874772250087029e-05, "loss": 2.1616, "step": 5259 }, { "epoch": 0.18, "grad_norm": 0.664771318435669, "learning_rate": 1.8747207443347997e-05, "loss": 2.1972, "step": 5260 }, { "epoch": 0.18, "grad_norm": 0.6660566329956055, "learning_rate": 1.8746692287004876e-05, "loss": 2.232, "step": 5261 }, { "epoch": 0.18, "grad_norm": 0.6920390129089355, "learning_rate": 1.8746177031846756e-05, "loss": 2.2525, "step": 5262 }, { "epoch": 0.18, "grad_norm": 0.6631890535354614, "learning_rate": 1.874566167787945e-05, "loss": 2.1842, "step": 5263 }, { "epoch": 0.18, "grad_norm": 0.6929755210876465, "learning_rate": 1.8745146225108784e-05, "loss": 2.203, "step": 5264 }, { "epoch": 0.18, "grad_norm": 0.6716404557228088, "learning_rate": 1.8744630673540585e-05, "loss": 2.303, "step": 5265 }, { "epoch": 0.18, "grad_norm": 0.687391459941864, "learning_rate": 1.8744115023180673e-05, "loss": 2.177, "step": 5266 }, { "epoch": 0.18, "grad_norm": 0.6871922016143799, "learning_rate": 1.8743599274034866e-05, "loss": 2.2062, "step": 5267 }, { "epoch": 0.18, "grad_norm": 0.6913610100746155, "learning_rate": 1.8743083426109008e-05, "loss": 2.1569, "step": 5268 }, { "epoch": 0.18, "grad_norm": 0.7089357972145081, "learning_rate": 1.8742567479408914e-05, "loss": 2.1745, "step": 5269 }, { "epoch": 0.18, "grad_norm": 0.7007119655609131, "learning_rate": 1.8742051433940417e-05, "loss": 2.2804, "step": 5270 }, { "epoch": 0.18, "grad_norm": 0.673805296421051, "learning_rate": 1.8741535289709343e-05, "loss": 2.1742, "step": 5271 }, { "epoch": 0.18, "grad_norm": 0.6845331192016602, "learning_rate": 1.874101904672153e-05, "loss": 2.237, "step": 5272 }, { "epoch": 0.18, "grad_norm": 0.6765787601470947, "learning_rate": 1.8740502704982805e-05, "loss": 2.1846, "step": 5273 }, { "epoch": 0.18, "grad_norm": 0.6866967678070068, "learning_rate": 1.8739986264499003e-05, "loss": 2.2091, "step": 5274 }, { "epoch": 0.18, "grad_norm": 0.6875186562538147, "learning_rate": 1.8739469725275957e-05, "loss": 2.196, "step": 5275 }, { "epoch": 0.18, "grad_norm": 0.6828067898750305, "learning_rate": 1.8738953087319504e-05, "loss": 2.2236, "step": 5276 }, { "epoch": 0.18, "grad_norm": 0.7565956711769104, "learning_rate": 1.8738436350635484e-05, "loss": 2.2474, "step": 5277 }, { "epoch": 0.18, "grad_norm": 0.6970562934875488, "learning_rate": 1.873791951522973e-05, "loss": 2.2526, "step": 5278 }, { "epoch": 0.18, "grad_norm": 0.7061561942100525, "learning_rate": 1.873740258110808e-05, "loss": 2.2714, "step": 5279 }, { "epoch": 0.18, "grad_norm": 0.6894608736038208, "learning_rate": 1.873688554827638e-05, "loss": 2.2531, "step": 5280 }, { "epoch": 0.18, "grad_norm": 0.6829431056976318, "learning_rate": 1.8736368416740462e-05, "loss": 2.2067, "step": 5281 }, { "epoch": 0.18, "grad_norm": 0.7060560584068298, "learning_rate": 1.8735851186506176e-05, "loss": 2.2441, "step": 5282 }, { "epoch": 0.18, "grad_norm": 0.7230420112609863, "learning_rate": 1.8735333857579365e-05, "loss": 2.2011, "step": 5283 }, { "epoch": 0.18, "grad_norm": 0.7455811500549316, "learning_rate": 1.8734816429965873e-05, "loss": 2.1947, "step": 5284 }, { "epoch": 0.18, "grad_norm": 0.7295905351638794, "learning_rate": 1.8734298903671536e-05, "loss": 2.2331, "step": 5285 }, { "epoch": 0.18, "grad_norm": 0.7490503191947937, "learning_rate": 1.8733781278702217e-05, "loss": 2.2404, "step": 5286 }, { "epoch": 0.18, "grad_norm": 0.6925051212310791, "learning_rate": 1.873326355506375e-05, "loss": 2.1867, "step": 5287 }, { "epoch": 0.18, "grad_norm": 0.7150958180427551, "learning_rate": 1.8732745732761993e-05, "loss": 2.2145, "step": 5288 }, { "epoch": 0.18, "grad_norm": 0.6927400231361389, "learning_rate": 1.8732227811802794e-05, "loss": 2.2983, "step": 5289 }, { "epoch": 0.18, "grad_norm": 0.7035780549049377, "learning_rate": 1.8731709792192003e-05, "loss": 2.2311, "step": 5290 }, { "epoch": 0.18, "grad_norm": 0.6878951191902161, "learning_rate": 1.8731191673935466e-05, "loss": 2.1893, "step": 5291 }, { "epoch": 0.18, "grad_norm": 0.6932110786437988, "learning_rate": 1.8730673457039046e-05, "loss": 2.162, "step": 5292 }, { "epoch": 0.18, "grad_norm": 0.695071816444397, "learning_rate": 1.8730155141508596e-05, "loss": 2.2531, "step": 5293 }, { "epoch": 0.18, "grad_norm": 0.6811164021492004, "learning_rate": 1.8729636727349966e-05, "loss": 2.2514, "step": 5294 }, { "epoch": 0.18, "grad_norm": 0.6868739724159241, "learning_rate": 1.872911821456902e-05, "loss": 2.2621, "step": 5295 }, { "epoch": 0.18, "grad_norm": 0.697605311870575, "learning_rate": 1.872859960317161e-05, "loss": 2.2291, "step": 5296 }, { "epoch": 0.18, "grad_norm": 0.6981117129325867, "learning_rate": 1.8728080893163595e-05, "loss": 2.1618, "step": 5297 }, { "epoch": 0.18, "grad_norm": 0.6748373508453369, "learning_rate": 1.872756208455084e-05, "loss": 2.2274, "step": 5298 }, { "epoch": 0.18, "grad_norm": 0.6913279294967651, "learning_rate": 1.8727043177339205e-05, "loss": 2.2371, "step": 5299 }, { "epoch": 0.18, "grad_norm": 0.7016263008117676, "learning_rate": 1.8726524171534546e-05, "loss": 2.1737, "step": 5300 }, { "epoch": 0.18, "grad_norm": 0.6757892966270447, "learning_rate": 1.8726005067142737e-05, "loss": 2.2235, "step": 5301 }, { "epoch": 0.18, "grad_norm": 0.7008597254753113, "learning_rate": 1.872548586416963e-05, "loss": 2.3142, "step": 5302 }, { "epoch": 0.18, "grad_norm": 0.706744909286499, "learning_rate": 1.87249665626211e-05, "loss": 2.2115, "step": 5303 }, { "epoch": 0.18, "grad_norm": 0.7096339464187622, "learning_rate": 1.8724447162503015e-05, "loss": 2.2428, "step": 5304 }, { "epoch": 0.18, "grad_norm": 0.703036367893219, "learning_rate": 1.8723927663821235e-05, "loss": 2.2323, "step": 5305 }, { "epoch": 0.18, "grad_norm": 0.7101420164108276, "learning_rate": 1.8723408066581634e-05, "loss": 2.21, "step": 5306 }, { "epoch": 0.18, "grad_norm": 0.7034243941307068, "learning_rate": 1.8722888370790083e-05, "loss": 2.256, "step": 5307 }, { "epoch": 0.18, "grad_norm": 0.7019157409667969, "learning_rate": 1.8722368576452448e-05, "loss": 2.2387, "step": 5308 }, { "epoch": 0.18, "grad_norm": 0.6812459230422974, "learning_rate": 1.8721848683574605e-05, "loss": 2.2216, "step": 5309 }, { "epoch": 0.18, "grad_norm": 0.7246698141098022, "learning_rate": 1.872132869216243e-05, "loss": 2.1682, "step": 5310 }, { "epoch": 0.18, "grad_norm": 0.695830225944519, "learning_rate": 1.8720808602221788e-05, "loss": 2.2236, "step": 5311 }, { "epoch": 0.18, "grad_norm": 0.6872268319129944, "learning_rate": 1.872028841375857e-05, "loss": 2.2286, "step": 5312 }, { "epoch": 0.18, "grad_norm": 0.6908929347991943, "learning_rate": 1.871976812677864e-05, "loss": 2.2656, "step": 5313 }, { "epoch": 0.18, "grad_norm": 0.6971749663352966, "learning_rate": 1.8719247741287877e-05, "loss": 2.2842, "step": 5314 }, { "epoch": 0.18, "grad_norm": 0.7302929162979126, "learning_rate": 1.8718727257292168e-05, "loss": 2.2074, "step": 5315 }, { "epoch": 0.18, "grad_norm": 0.7205823063850403, "learning_rate": 1.871820667479739e-05, "loss": 2.2375, "step": 5316 }, { "epoch": 0.18, "grad_norm": 0.6951491236686707, "learning_rate": 1.8717685993809413e-05, "loss": 2.2508, "step": 5317 }, { "epoch": 0.18, "grad_norm": 0.6884263753890991, "learning_rate": 1.8717165214334137e-05, "loss": 2.1395, "step": 5318 }, { "epoch": 0.18, "grad_norm": 0.6922199130058289, "learning_rate": 1.871664433637743e-05, "loss": 2.2473, "step": 5319 }, { "epoch": 0.18, "grad_norm": 0.6757543087005615, "learning_rate": 1.8716123359945192e-05, "loss": 2.2073, "step": 5320 }, { "epoch": 0.18, "grad_norm": 0.6590266823768616, "learning_rate": 1.8715602285043297e-05, "loss": 2.1763, "step": 5321 }, { "epoch": 0.18, "grad_norm": 0.7229035496711731, "learning_rate": 1.8715081111677636e-05, "loss": 2.2826, "step": 5322 }, { "epoch": 0.18, "grad_norm": 0.6956657767295837, "learning_rate": 1.8714559839854096e-05, "loss": 2.2112, "step": 5323 }, { "epoch": 0.18, "grad_norm": 0.6742832064628601, "learning_rate": 1.871403846957856e-05, "loss": 2.1635, "step": 5324 }, { "epoch": 0.18, "grad_norm": 0.6906077265739441, "learning_rate": 1.8713517000856933e-05, "loss": 2.2277, "step": 5325 }, { "epoch": 0.18, "grad_norm": 0.6868237257003784, "learning_rate": 1.871299543369509e-05, "loss": 2.2257, "step": 5326 }, { "epoch": 0.18, "grad_norm": 0.6614282727241516, "learning_rate": 1.8712473768098937e-05, "loss": 2.2426, "step": 5327 }, { "epoch": 0.18, "grad_norm": 0.7293252944946289, "learning_rate": 1.871195200407436e-05, "loss": 2.2029, "step": 5328 }, { "epoch": 0.18, "grad_norm": 0.6892263293266296, "learning_rate": 1.8711430141627253e-05, "loss": 2.1873, "step": 5329 }, { "epoch": 0.18, "grad_norm": 0.6843459010124207, "learning_rate": 1.8710908180763515e-05, "loss": 2.2151, "step": 5330 }, { "epoch": 0.18, "grad_norm": 0.6956990361213684, "learning_rate": 1.8710386121489043e-05, "loss": 2.2371, "step": 5331 }, { "epoch": 0.18, "grad_norm": 0.7233136296272278, "learning_rate": 1.8709863963809728e-05, "loss": 2.2339, "step": 5332 }, { "epoch": 0.18, "grad_norm": 0.693359911441803, "learning_rate": 1.870934170773148e-05, "loss": 2.2342, "step": 5333 }, { "epoch": 0.18, "grad_norm": 0.6913610696792603, "learning_rate": 1.870881935326019e-05, "loss": 2.204, "step": 5334 }, { "epoch": 0.18, "grad_norm": 0.6841592788696289, "learning_rate": 1.8708296900401767e-05, "loss": 2.2108, "step": 5335 }, { "epoch": 0.18, "grad_norm": 0.7112038731575012, "learning_rate": 1.8707774349162105e-05, "loss": 2.2177, "step": 5336 }, { "epoch": 0.18, "grad_norm": 0.6971014142036438, "learning_rate": 1.8707251699547115e-05, "loss": 2.1938, "step": 5337 }, { "epoch": 0.18, "grad_norm": 0.68815678358078, "learning_rate": 1.8706728951562696e-05, "loss": 2.2188, "step": 5338 }, { "epoch": 0.18, "grad_norm": 0.6814174652099609, "learning_rate": 1.8706206105214757e-05, "loss": 2.1766, "step": 5339 }, { "epoch": 0.18, "grad_norm": 0.6762268543243408, "learning_rate": 1.8705683160509203e-05, "loss": 2.1857, "step": 5340 }, { "epoch": 0.18, "grad_norm": 0.6923859119415283, "learning_rate": 1.8705160117451942e-05, "loss": 2.276, "step": 5341 }, { "epoch": 0.18, "grad_norm": 0.6915370225906372, "learning_rate": 1.8704636976048888e-05, "loss": 2.1864, "step": 5342 }, { "epoch": 0.18, "grad_norm": 0.665428102016449, "learning_rate": 1.870411373630594e-05, "loss": 2.218, "step": 5343 }, { "epoch": 0.18, "grad_norm": 0.7339053750038147, "learning_rate": 1.870359039822902e-05, "loss": 2.2011, "step": 5344 }, { "epoch": 0.18, "grad_norm": 0.7154248356819153, "learning_rate": 1.8703066961824036e-05, "loss": 2.2076, "step": 5345 }, { "epoch": 0.18, "grad_norm": 0.7322760224342346, "learning_rate": 1.8702543427096906e-05, "loss": 2.1635, "step": 5346 }, { "epoch": 0.18, "grad_norm": 0.7206581830978394, "learning_rate": 1.8702019794053534e-05, "loss": 2.2212, "step": 5347 }, { "epoch": 0.18, "grad_norm": 0.7312862277030945, "learning_rate": 1.8701496062699848e-05, "loss": 2.1776, "step": 5348 }, { "epoch": 0.18, "grad_norm": 0.6989860534667969, "learning_rate": 1.8700972233041755e-05, "loss": 2.216, "step": 5349 }, { "epoch": 0.18, "grad_norm": 0.6877766251564026, "learning_rate": 1.8700448305085177e-05, "loss": 2.2133, "step": 5350 }, { "epoch": 0.18, "grad_norm": 0.689217746257782, "learning_rate": 1.8699924278836032e-05, "loss": 2.159, "step": 5351 }, { "epoch": 0.18, "grad_norm": 0.6855818629264832, "learning_rate": 1.8699400154300244e-05, "loss": 2.1867, "step": 5352 }, { "epoch": 0.18, "grad_norm": 0.7007381916046143, "learning_rate": 1.869887593148373e-05, "loss": 2.2638, "step": 5353 }, { "epoch": 0.18, "grad_norm": 0.7113897800445557, "learning_rate": 1.8698351610392416e-05, "loss": 2.2138, "step": 5354 }, { "epoch": 0.18, "grad_norm": 0.7301239371299744, "learning_rate": 1.869782719103222e-05, "loss": 2.1666, "step": 5355 }, { "epoch": 0.18, "grad_norm": 0.7006897330284119, "learning_rate": 1.8697302673409072e-05, "loss": 2.2525, "step": 5356 }, { "epoch": 0.18, "grad_norm": 0.6918513774871826, "learning_rate": 1.8696778057528896e-05, "loss": 2.1298, "step": 5357 }, { "epoch": 0.18, "grad_norm": 0.6716538071632385, "learning_rate": 1.8696253343397617e-05, "loss": 2.1695, "step": 5358 }, { "epoch": 0.18, "grad_norm": 0.7065458297729492, "learning_rate": 1.8695728531021165e-05, "loss": 2.2303, "step": 5359 }, { "epoch": 0.18, "grad_norm": 0.6769695281982422, "learning_rate": 1.8695203620405466e-05, "loss": 2.2553, "step": 5360 }, { "epoch": 0.18, "grad_norm": 0.7818331122398376, "learning_rate": 1.8694678611556455e-05, "loss": 2.2299, "step": 5361 }, { "epoch": 0.18, "grad_norm": 0.7667955160140991, "learning_rate": 1.869415350448006e-05, "loss": 2.1514, "step": 5362 }, { "epoch": 0.18, "grad_norm": 0.6861446499824524, "learning_rate": 1.8693628299182215e-05, "loss": 2.1735, "step": 5363 }, { "epoch": 0.18, "grad_norm": 0.698962390422821, "learning_rate": 1.8693102995668847e-05, "loss": 2.1578, "step": 5364 }, { "epoch": 0.18, "grad_norm": 0.7044604420661926, "learning_rate": 1.86925775939459e-05, "loss": 2.25, "step": 5365 }, { "epoch": 0.18, "grad_norm": 0.7175866365432739, "learning_rate": 1.8692052094019307e-05, "loss": 2.2016, "step": 5366 }, { "epoch": 0.18, "grad_norm": 0.6886649131774902, "learning_rate": 1.8691526495895002e-05, "loss": 2.1987, "step": 5367 }, { "epoch": 0.18, "grad_norm": 0.7320054769515991, "learning_rate": 1.8691000799578927e-05, "loss": 2.2625, "step": 5368 }, { "epoch": 0.18, "grad_norm": 0.6931559443473816, "learning_rate": 1.8690475005077016e-05, "loss": 2.2609, "step": 5369 }, { "epoch": 0.18, "grad_norm": 0.6913182735443115, "learning_rate": 1.868994911239521e-05, "loss": 2.2325, "step": 5370 }, { "epoch": 0.18, "grad_norm": 0.7052625417709351, "learning_rate": 1.8689423121539457e-05, "loss": 2.2349, "step": 5371 }, { "epoch": 0.18, "grad_norm": 0.6975575685501099, "learning_rate": 1.868889703251569e-05, "loss": 2.1084, "step": 5372 }, { "epoch": 0.18, "grad_norm": 0.700147271156311, "learning_rate": 1.8688370845329855e-05, "loss": 2.2219, "step": 5373 }, { "epoch": 0.18, "grad_norm": 0.695196807384491, "learning_rate": 1.8687844559987903e-05, "loss": 2.1564, "step": 5374 }, { "epoch": 0.18, "grad_norm": 0.6810943484306335, "learning_rate": 1.868731817649577e-05, "loss": 2.2038, "step": 5375 }, { "epoch": 0.18, "grad_norm": 0.6825370192527771, "learning_rate": 1.8686791694859407e-05, "loss": 2.2726, "step": 5376 }, { "epoch": 0.18, "grad_norm": 0.6985204219818115, "learning_rate": 1.8686265115084766e-05, "loss": 2.2453, "step": 5377 }, { "epoch": 0.18, "grad_norm": 0.7111895680427551, "learning_rate": 1.868573843717779e-05, "loss": 2.3021, "step": 5378 }, { "epoch": 0.18, "grad_norm": 0.705970048904419, "learning_rate": 1.8685211661144437e-05, "loss": 2.1198, "step": 5379 }, { "epoch": 0.18, "grad_norm": 0.6955997943878174, "learning_rate": 1.8684684786990645e-05, "loss": 2.2134, "step": 5380 }, { "epoch": 0.18, "grad_norm": 0.6756146550178528, "learning_rate": 1.8684157814722376e-05, "loss": 2.2083, "step": 5381 }, { "epoch": 0.18, "grad_norm": 0.6966724991798401, "learning_rate": 1.868363074434558e-05, "loss": 2.2801, "step": 5382 }, { "epoch": 0.18, "grad_norm": 0.6867474317550659, "learning_rate": 1.868310357586622e-05, "loss": 2.1273, "step": 5383 }, { "epoch": 0.18, "grad_norm": 0.7444121241569519, "learning_rate": 1.868257630929024e-05, "loss": 2.2383, "step": 5384 }, { "epoch": 0.18, "grad_norm": 0.7392184138298035, "learning_rate": 1.86820489446236e-05, "loss": 2.195, "step": 5385 }, { "epoch": 0.18, "grad_norm": 0.7271459698677063, "learning_rate": 1.868152148187226e-05, "loss": 2.2587, "step": 5386 }, { "epoch": 0.18, "grad_norm": 0.7205798625946045, "learning_rate": 1.868099392104218e-05, "loss": 2.2178, "step": 5387 }, { "epoch": 0.18, "grad_norm": 0.6847581267356873, "learning_rate": 1.8680466262139318e-05, "loss": 2.0889, "step": 5388 }, { "epoch": 0.18, "grad_norm": 0.6811925768852234, "learning_rate": 1.8679938505169634e-05, "loss": 2.2135, "step": 5389 }, { "epoch": 0.18, "grad_norm": 0.6883907914161682, "learning_rate": 1.8679410650139095e-05, "loss": 2.1567, "step": 5390 }, { "epoch": 0.18, "grad_norm": 0.7297616004943848, "learning_rate": 1.8678882697053654e-05, "loss": 2.2658, "step": 5391 }, { "epoch": 0.18, "grad_norm": 0.6985902786254883, "learning_rate": 1.867835464591929e-05, "loss": 2.1996, "step": 5392 }, { "epoch": 0.18, "grad_norm": 0.713433563709259, "learning_rate": 1.8677826496741957e-05, "loss": 2.2445, "step": 5393 }, { "epoch": 0.18, "grad_norm": 0.7662736773490906, "learning_rate": 1.867729824952763e-05, "loss": 2.2951, "step": 5394 }, { "epoch": 0.18, "grad_norm": 0.682248055934906, "learning_rate": 1.8676769904282267e-05, "loss": 2.162, "step": 5395 }, { "epoch": 0.18, "grad_norm": 0.7012356519699097, "learning_rate": 1.8676241461011845e-05, "loss": 2.2166, "step": 5396 }, { "epoch": 0.18, "grad_norm": 0.6921445727348328, "learning_rate": 1.8675712919722334e-05, "loss": 2.2047, "step": 5397 }, { "epoch": 0.18, "grad_norm": 0.7156121730804443, "learning_rate": 1.86751842804197e-05, "loss": 2.2358, "step": 5398 }, { "epoch": 0.18, "grad_norm": 0.7065675854682922, "learning_rate": 1.8674655543109922e-05, "loss": 2.1874, "step": 5399 }, { "epoch": 0.18, "grad_norm": 0.6988011002540588, "learning_rate": 1.8674126707798965e-05, "loss": 2.1563, "step": 5400 }, { "epoch": 0.18, "grad_norm": 0.7090868353843689, "learning_rate": 1.867359777449281e-05, "loss": 2.2142, "step": 5401 }, { "epoch": 0.18, "grad_norm": 0.6951003074645996, "learning_rate": 1.867306874319743e-05, "loss": 2.2123, "step": 5402 }, { "epoch": 0.18, "grad_norm": 0.6858704090118408, "learning_rate": 1.8672539613918802e-05, "loss": 2.3237, "step": 5403 }, { "epoch": 0.18, "grad_norm": 0.7154164910316467, "learning_rate": 1.8672010386662908e-05, "loss": 2.2159, "step": 5404 }, { "epoch": 0.18, "grad_norm": 0.6896719336509705, "learning_rate": 1.867148106143572e-05, "loss": 2.1738, "step": 5405 }, { "epoch": 0.18, "grad_norm": 0.6984386444091797, "learning_rate": 1.867095163824322e-05, "loss": 2.1847, "step": 5406 }, { "epoch": 0.18, "grad_norm": 0.723698616027832, "learning_rate": 1.867042211709139e-05, "loss": 2.2102, "step": 5407 }, { "epoch": 0.18, "grad_norm": 0.6982423067092896, "learning_rate": 1.866989249798621e-05, "loss": 2.2104, "step": 5408 }, { "epoch": 0.18, "grad_norm": 0.7349892854690552, "learning_rate": 1.8669362780933675e-05, "loss": 2.157, "step": 5409 }, { "epoch": 0.18, "grad_norm": 0.6859233975410461, "learning_rate": 1.866883296593975e-05, "loss": 2.166, "step": 5410 }, { "epoch": 0.18, "grad_norm": 0.6601275205612183, "learning_rate": 1.8668303053010436e-05, "loss": 2.19, "step": 5411 }, { "epoch": 0.18, "grad_norm": 0.7230720520019531, "learning_rate": 1.8667773042151714e-05, "loss": 2.1467, "step": 5412 }, { "epoch": 0.18, "grad_norm": 0.748222291469574, "learning_rate": 1.866724293336957e-05, "loss": 2.2229, "step": 5413 }, { "epoch": 0.18, "grad_norm": 0.6734126210212708, "learning_rate": 1.8666712726669994e-05, "loss": 2.2003, "step": 5414 }, { "epoch": 0.18, "grad_norm": 0.7056984901428223, "learning_rate": 1.866618242205898e-05, "loss": 2.1362, "step": 5415 }, { "epoch": 0.18, "grad_norm": 0.7258943319320679, "learning_rate": 1.8665652019542512e-05, "loss": 2.2775, "step": 5416 }, { "epoch": 0.18, "grad_norm": 0.6757399439811707, "learning_rate": 1.8665121519126587e-05, "loss": 2.2203, "step": 5417 }, { "epoch": 0.18, "grad_norm": 0.6979444026947021, "learning_rate": 1.86645909208172e-05, "loss": 2.2087, "step": 5418 }, { "epoch": 0.18, "grad_norm": 0.704862117767334, "learning_rate": 1.866406022462034e-05, "loss": 2.2101, "step": 5419 }, { "epoch": 0.18, "grad_norm": 0.6693013906478882, "learning_rate": 1.866352943054201e-05, "loss": 2.1976, "step": 5420 }, { "epoch": 0.18, "grad_norm": 0.6967042088508606, "learning_rate": 1.86629985385882e-05, "loss": 2.206, "step": 5421 }, { "epoch": 0.18, "grad_norm": 0.7242235541343689, "learning_rate": 1.8662467548764904e-05, "loss": 2.2046, "step": 5422 }, { "epoch": 0.18, "grad_norm": 0.7256143093109131, "learning_rate": 1.8661936461078133e-05, "loss": 2.2108, "step": 5423 }, { "epoch": 0.18, "grad_norm": 0.6766475439071655, "learning_rate": 1.8661405275533876e-05, "loss": 2.1661, "step": 5424 }, { "epoch": 0.18, "grad_norm": 0.7162649631500244, "learning_rate": 1.866087399213814e-05, "loss": 2.2202, "step": 5425 }, { "epoch": 0.18, "grad_norm": 0.6785038113594055, "learning_rate": 1.8660342610896922e-05, "loss": 2.2361, "step": 5426 }, { "epoch": 0.18, "grad_norm": 0.7025398015975952, "learning_rate": 1.8659811131816233e-05, "loss": 2.2294, "step": 5427 }, { "epoch": 0.18, "grad_norm": 0.7494419813156128, "learning_rate": 1.8659279554902074e-05, "loss": 2.2756, "step": 5428 }, { "epoch": 0.18, "grad_norm": 0.67743980884552, "learning_rate": 1.8658747880160443e-05, "loss": 2.1855, "step": 5429 }, { "epoch": 0.18, "grad_norm": 0.6811135411262512, "learning_rate": 1.865821610759736e-05, "loss": 2.2088, "step": 5430 }, { "epoch": 0.18, "grad_norm": 0.7470099925994873, "learning_rate": 1.8657684237218823e-05, "loss": 2.2392, "step": 5431 }, { "epoch": 0.18, "grad_norm": 0.6951124668121338, "learning_rate": 1.8657152269030844e-05, "loss": 2.2116, "step": 5432 }, { "epoch": 0.18, "grad_norm": 0.685046911239624, "learning_rate": 1.865662020303943e-05, "loss": 2.1808, "step": 5433 }, { "epoch": 0.18, "grad_norm": 0.69264817237854, "learning_rate": 1.8656088039250595e-05, "loss": 2.2121, "step": 5434 }, { "epoch": 0.18, "grad_norm": 0.7065656781196594, "learning_rate": 1.8655555777670353e-05, "loss": 2.1962, "step": 5435 }, { "epoch": 0.18, "grad_norm": 0.6592856049537659, "learning_rate": 1.865502341830471e-05, "loss": 2.1927, "step": 5436 }, { "epoch": 0.18, "grad_norm": 0.7418520450592041, "learning_rate": 1.8654490961159688e-05, "loss": 2.2608, "step": 5437 }, { "epoch": 0.18, "grad_norm": 0.7003412842750549, "learning_rate": 1.86539584062413e-05, "loss": 2.218, "step": 5438 }, { "epoch": 0.18, "grad_norm": 0.715146541595459, "learning_rate": 1.865342575355556e-05, "loss": 2.2088, "step": 5439 }, { "epoch": 0.18, "grad_norm": 0.6890220642089844, "learning_rate": 1.8652893003108485e-05, "loss": 2.283, "step": 5440 }, { "epoch": 0.18, "grad_norm": 0.716315507888794, "learning_rate": 1.86523601549061e-05, "loss": 2.2476, "step": 5441 }, { "epoch": 0.18, "grad_norm": 0.7049520611763, "learning_rate": 1.865182720895442e-05, "loss": 2.2387, "step": 5442 }, { "epoch": 0.18, "grad_norm": 0.6862707734107971, "learning_rate": 1.8651294165259464e-05, "loss": 2.1911, "step": 5443 }, { "epoch": 0.18, "grad_norm": 0.6874216198921204, "learning_rate": 1.8650761023827258e-05, "loss": 2.1584, "step": 5444 }, { "epoch": 0.18, "grad_norm": 0.696226179599762, "learning_rate": 1.8650227784663825e-05, "loss": 2.1757, "step": 5445 }, { "epoch": 0.18, "grad_norm": 0.7441384792327881, "learning_rate": 1.8649694447775184e-05, "loss": 2.1678, "step": 5446 }, { "epoch": 0.18, "grad_norm": 0.6918995976448059, "learning_rate": 1.864916101316737e-05, "loss": 2.2048, "step": 5447 }, { "epoch": 0.18, "grad_norm": 0.6750444173812866, "learning_rate": 1.86486274808464e-05, "loss": 2.1889, "step": 5448 }, { "epoch": 0.18, "grad_norm": 0.7276250720024109, "learning_rate": 1.8648093850818306e-05, "loss": 2.1614, "step": 5449 }, { "epoch": 0.18, "grad_norm": 0.692226231098175, "learning_rate": 1.864756012308912e-05, "loss": 2.1936, "step": 5450 }, { "epoch": 0.18, "grad_norm": 0.6868963837623596, "learning_rate": 1.864702629766486e-05, "loss": 2.2254, "step": 5451 }, { "epoch": 0.18, "grad_norm": 0.7053418159484863, "learning_rate": 1.864649237455157e-05, "loss": 2.2325, "step": 5452 }, { "epoch": 0.18, "grad_norm": 0.6951059103012085, "learning_rate": 1.8645958353755276e-05, "loss": 2.1848, "step": 5453 }, { "epoch": 0.18, "grad_norm": 0.6877989768981934, "learning_rate": 1.8645424235282007e-05, "loss": 2.2899, "step": 5454 }, { "epoch": 0.18, "grad_norm": 0.6931294798851013, "learning_rate": 1.8644890019137806e-05, "loss": 2.267, "step": 5455 }, { "epoch": 0.18, "grad_norm": 0.7140448689460754, "learning_rate": 1.8644355705328707e-05, "loss": 2.2686, "step": 5456 }, { "epoch": 0.18, "grad_norm": 0.6684128642082214, "learning_rate": 1.8643821293860737e-05, "loss": 2.1757, "step": 5457 }, { "epoch": 0.18, "grad_norm": 0.6734837889671326, "learning_rate": 1.8643286784739945e-05, "loss": 2.1974, "step": 5458 }, { "epoch": 0.18, "grad_norm": 0.674057126045227, "learning_rate": 1.8642752177972362e-05, "loss": 2.2014, "step": 5459 }, { "epoch": 0.18, "grad_norm": 0.6899111866950989, "learning_rate": 1.8642217473564028e-05, "loss": 2.1724, "step": 5460 }, { "epoch": 0.18, "grad_norm": 0.6900359392166138, "learning_rate": 1.8641682671520987e-05, "loss": 2.2603, "step": 5461 }, { "epoch": 0.18, "grad_norm": 0.6928248405456543, "learning_rate": 1.8641147771849282e-05, "loss": 2.2622, "step": 5462 }, { "epoch": 0.18, "grad_norm": 0.6716713309288025, "learning_rate": 1.8640612774554952e-05, "loss": 2.1786, "step": 5463 }, { "epoch": 0.18, "grad_norm": 0.6904696226119995, "learning_rate": 1.8640077679644043e-05, "loss": 2.2464, "step": 5464 }, { "epoch": 0.18, "grad_norm": 0.6997979879379272, "learning_rate": 1.86395424871226e-05, "loss": 2.2752, "step": 5465 }, { "epoch": 0.18, "grad_norm": 0.7003740072250366, "learning_rate": 1.8639007196996666e-05, "loss": 2.2966, "step": 5466 }, { "epoch": 0.18, "grad_norm": 0.708040177822113, "learning_rate": 1.8638471809272298e-05, "loss": 2.1917, "step": 5467 }, { "epoch": 0.18, "grad_norm": 0.7367563247680664, "learning_rate": 1.8637936323955535e-05, "loss": 2.3012, "step": 5468 }, { "epoch": 0.18, "grad_norm": 0.7042673230171204, "learning_rate": 1.863740074105243e-05, "loss": 2.2485, "step": 5469 }, { "epoch": 0.18, "grad_norm": 0.6951615810394287, "learning_rate": 1.863686506056903e-05, "loss": 2.1976, "step": 5470 }, { "epoch": 0.18, "grad_norm": 0.7056179642677307, "learning_rate": 1.8636329282511396e-05, "loss": 2.2954, "step": 5471 }, { "epoch": 0.18, "grad_norm": 0.6767618060112, "learning_rate": 1.863579340688557e-05, "loss": 2.2458, "step": 5472 }, { "epoch": 0.18, "grad_norm": 0.6723634004592896, "learning_rate": 1.8635257433697617e-05, "loss": 2.1689, "step": 5473 }, { "epoch": 0.18, "grad_norm": 0.702843189239502, "learning_rate": 1.863472136295358e-05, "loss": 2.1316, "step": 5474 }, { "epoch": 0.18, "grad_norm": 0.6565383076667786, "learning_rate": 1.8634185194659526e-05, "loss": 2.1886, "step": 5475 }, { "epoch": 0.18, "grad_norm": 0.6981648206710815, "learning_rate": 1.8633648928821505e-05, "loss": 2.2147, "step": 5476 }, { "epoch": 0.18, "grad_norm": 0.7100622653961182, "learning_rate": 1.863311256544558e-05, "loss": 2.2265, "step": 5477 }, { "epoch": 0.18, "grad_norm": 0.694697916507721, "learning_rate": 1.863257610453781e-05, "loss": 2.2423, "step": 5478 }, { "epoch": 0.18, "grad_norm": 0.6915003657341003, "learning_rate": 1.863203954610425e-05, "loss": 2.2446, "step": 5479 }, { "epoch": 0.18, "grad_norm": 0.7234693169593811, "learning_rate": 1.863150289015097e-05, "loss": 2.2404, "step": 5480 }, { "epoch": 0.18, "grad_norm": 0.6751726865768433, "learning_rate": 1.8630966136684028e-05, "loss": 2.1591, "step": 5481 }, { "epoch": 0.18, "grad_norm": 0.7268427014350891, "learning_rate": 1.8630429285709488e-05, "loss": 2.2034, "step": 5482 }, { "epoch": 0.18, "grad_norm": 0.6962007284164429, "learning_rate": 1.8629892337233416e-05, "loss": 2.1773, "step": 5483 }, { "epoch": 0.18, "grad_norm": 0.6734541058540344, "learning_rate": 1.8629355291261876e-05, "loss": 2.2747, "step": 5484 }, { "epoch": 0.18, "grad_norm": 0.6871586441993713, "learning_rate": 1.862881814780094e-05, "loss": 2.1693, "step": 5485 }, { "epoch": 0.18, "grad_norm": 0.6941714286804199, "learning_rate": 1.8628280906856676e-05, "loss": 2.1657, "step": 5486 }, { "epoch": 0.18, "grad_norm": 0.6855652928352356, "learning_rate": 1.8627743568435146e-05, "loss": 2.172, "step": 5487 }, { "epoch": 0.18, "grad_norm": 0.7262262105941772, "learning_rate": 1.8627206132542428e-05, "loss": 2.246, "step": 5488 }, { "epoch": 0.18, "grad_norm": 0.6726296544075012, "learning_rate": 1.862666859918459e-05, "loss": 2.2458, "step": 5489 }, { "epoch": 0.18, "grad_norm": 0.6849536299705505, "learning_rate": 1.862613096836771e-05, "loss": 2.2036, "step": 5490 }, { "epoch": 0.18, "grad_norm": 0.7037539482116699, "learning_rate": 1.862559324009785e-05, "loss": 2.1393, "step": 5491 }, { "epoch": 0.18, "grad_norm": 0.7455698251724243, "learning_rate": 1.8625055414381097e-05, "loss": 2.2794, "step": 5492 }, { "epoch": 0.18, "grad_norm": 0.671033501625061, "learning_rate": 1.8624517491223525e-05, "loss": 2.1886, "step": 5493 }, { "epoch": 0.18, "grad_norm": 0.7034931182861328, "learning_rate": 1.8623979470631207e-05, "loss": 2.21, "step": 5494 }, { "epoch": 0.18, "grad_norm": 0.7348529100418091, "learning_rate": 1.862344135261022e-05, "loss": 2.1648, "step": 5495 }, { "epoch": 0.18, "grad_norm": 0.6792471408843994, "learning_rate": 1.862290313716665e-05, "loss": 2.1797, "step": 5496 }, { "epoch": 0.18, "grad_norm": 0.6816999316215515, "learning_rate": 1.862236482430657e-05, "loss": 2.167, "step": 5497 }, { "epoch": 0.18, "grad_norm": 0.71730637550354, "learning_rate": 1.862182641403607e-05, "loss": 2.2021, "step": 5498 }, { "epoch": 0.18, "grad_norm": 0.7005695104598999, "learning_rate": 1.8621287906361227e-05, "loss": 2.2008, "step": 5499 }, { "epoch": 0.18, "grad_norm": 0.7150699496269226, "learning_rate": 1.8620749301288125e-05, "loss": 2.2006, "step": 5500 }, { "epoch": 0.18, "grad_norm": 0.7294042110443115, "learning_rate": 1.862021059882285e-05, "loss": 2.1673, "step": 5501 }, { "epoch": 0.18, "grad_norm": 0.6940259337425232, "learning_rate": 1.861967179897149e-05, "loss": 2.2113, "step": 5502 }, { "epoch": 0.18, "grad_norm": 0.6767745614051819, "learning_rate": 1.8619132901740126e-05, "loss": 2.2421, "step": 5503 }, { "epoch": 0.18, "grad_norm": 0.7345863580703735, "learning_rate": 1.8618593907134847e-05, "loss": 2.2711, "step": 5504 }, { "epoch": 0.18, "grad_norm": 0.7103909254074097, "learning_rate": 1.8618054815161752e-05, "loss": 2.2055, "step": 5505 }, { "epoch": 0.18, "grad_norm": 0.7215567231178284, "learning_rate": 1.8617515625826922e-05, "loss": 2.2516, "step": 5506 }, { "epoch": 0.18, "grad_norm": 0.7726604342460632, "learning_rate": 1.8616976339136446e-05, "loss": 2.1847, "step": 5507 }, { "epoch": 0.18, "grad_norm": 0.6908069849014282, "learning_rate": 1.861643695509643e-05, "loss": 2.2314, "step": 5508 }, { "epoch": 0.18, "grad_norm": 0.7274362444877625, "learning_rate": 1.861589747371295e-05, "loss": 2.1753, "step": 5509 }, { "epoch": 0.18, "grad_norm": 0.6759147644042969, "learning_rate": 1.8615357894992116e-05, "loss": 2.248, "step": 5510 }, { "epoch": 0.18, "grad_norm": 0.7180452346801758, "learning_rate": 1.8614818218940015e-05, "loss": 2.2368, "step": 5511 }, { "epoch": 0.18, "grad_norm": 0.7551531791687012, "learning_rate": 1.8614278445562748e-05, "loss": 2.1844, "step": 5512 }, { "epoch": 0.18, "grad_norm": 0.6935170888900757, "learning_rate": 1.8613738574866413e-05, "loss": 2.1608, "step": 5513 }, { "epoch": 0.18, "grad_norm": 0.6883781552314758, "learning_rate": 1.8613198606857105e-05, "loss": 2.2857, "step": 5514 }, { "epoch": 0.18, "grad_norm": 0.6902933716773987, "learning_rate": 1.8612658541540924e-05, "loss": 2.1987, "step": 5515 }, { "epoch": 0.18, "grad_norm": 0.6875550746917725, "learning_rate": 1.861211837892398e-05, "loss": 2.2228, "step": 5516 }, { "epoch": 0.18, "grad_norm": 0.6723427772521973, "learning_rate": 1.8611578119012367e-05, "loss": 2.2504, "step": 5517 }, { "epoch": 0.18, "grad_norm": 0.6614521741867065, "learning_rate": 1.8611037761812193e-05, "loss": 2.1557, "step": 5518 }, { "epoch": 0.18, "grad_norm": 0.7107515931129456, "learning_rate": 1.861049730732956e-05, "loss": 2.3023, "step": 5519 }, { "epoch": 0.18, "grad_norm": 0.6961677670478821, "learning_rate": 1.8609956755570576e-05, "loss": 2.3136, "step": 5520 }, { "epoch": 0.18, "grad_norm": 0.7206357717514038, "learning_rate": 1.860941610654134e-05, "loss": 2.1585, "step": 5521 }, { "epoch": 0.18, "grad_norm": 0.6987873315811157, "learning_rate": 1.8608875360247977e-05, "loss": 2.2404, "step": 5522 }, { "epoch": 0.18, "grad_norm": 0.7084365487098694, "learning_rate": 1.860833451669658e-05, "loss": 2.2177, "step": 5523 }, { "epoch": 0.18, "grad_norm": 0.7131112813949585, "learning_rate": 1.860779357589326e-05, "loss": 2.1164, "step": 5524 }, { "epoch": 0.18, "grad_norm": 0.6868016719818115, "learning_rate": 1.8607252537844142e-05, "loss": 2.1786, "step": 5525 }, { "epoch": 0.18, "grad_norm": 0.6579523086547852, "learning_rate": 1.860671140255532e-05, "loss": 2.2116, "step": 5526 }, { "epoch": 0.18, "grad_norm": 0.6694238185882568, "learning_rate": 1.8606170170032922e-05, "loss": 2.1808, "step": 5527 }, { "epoch": 0.18, "grad_norm": 0.671684741973877, "learning_rate": 1.8605628840283057e-05, "loss": 2.2538, "step": 5528 }, { "epoch": 0.18, "grad_norm": 0.6888987421989441, "learning_rate": 1.860508741331184e-05, "loss": 2.2145, "step": 5529 }, { "epoch": 0.18, "grad_norm": 0.6738946437835693, "learning_rate": 1.8604545889125387e-05, "loss": 2.2444, "step": 5530 }, { "epoch": 0.18, "grad_norm": 0.7116053700447083, "learning_rate": 1.860400426772982e-05, "loss": 2.1907, "step": 5531 }, { "epoch": 0.18, "grad_norm": 0.6757642030715942, "learning_rate": 1.8603462549131255e-05, "loss": 2.2448, "step": 5532 }, { "epoch": 0.18, "grad_norm": 0.7053086161613464, "learning_rate": 1.860292073333581e-05, "loss": 2.2386, "step": 5533 }, { "epoch": 0.18, "grad_norm": 0.6668089628219604, "learning_rate": 1.860237882034961e-05, "loss": 2.1735, "step": 5534 }, { "epoch": 0.18, "grad_norm": 0.6910253167152405, "learning_rate": 1.8601836810178775e-05, "loss": 2.2152, "step": 5535 }, { "epoch": 0.18, "grad_norm": 0.6880805492401123, "learning_rate": 1.860129470282943e-05, "loss": 2.1959, "step": 5536 }, { "epoch": 0.18, "grad_norm": 0.6861456632614136, "learning_rate": 1.8600752498307696e-05, "loss": 2.2708, "step": 5537 }, { "epoch": 0.18, "grad_norm": 0.7295663952827454, "learning_rate": 1.8600210196619704e-05, "loss": 2.2496, "step": 5538 }, { "epoch": 0.18, "grad_norm": 0.6734892129898071, "learning_rate": 1.859966779777158e-05, "loss": 2.2155, "step": 5539 }, { "epoch": 0.18, "grad_norm": 0.6983523368835449, "learning_rate": 1.8599125301769438e-05, "loss": 2.1859, "step": 5540 }, { "epoch": 0.18, "grad_norm": 0.7135183215141296, "learning_rate": 1.8598582708619428e-05, "loss": 2.1719, "step": 5541 }, { "epoch": 0.18, "grad_norm": 0.7493467926979065, "learning_rate": 1.8598040018327665e-05, "loss": 2.2675, "step": 5542 }, { "epoch": 0.18, "grad_norm": 0.7057217359542847, "learning_rate": 1.8597497230900288e-05, "loss": 2.1965, "step": 5543 }, { "epoch": 0.18, "grad_norm": 0.670571506023407, "learning_rate": 1.8596954346343423e-05, "loss": 2.1929, "step": 5544 }, { "epoch": 0.18, "grad_norm": 0.6801275610923767, "learning_rate": 1.859641136466321e-05, "loss": 2.2156, "step": 5545 }, { "epoch": 0.18, "grad_norm": 0.7076669335365295, "learning_rate": 1.8595868285865775e-05, "loss": 2.1874, "step": 5546 }, { "epoch": 0.18, "grad_norm": 0.7053273320198059, "learning_rate": 1.859532510995726e-05, "loss": 2.2453, "step": 5547 }, { "epoch": 0.18, "grad_norm": 0.7127453088760376, "learning_rate": 1.8594781836943797e-05, "loss": 2.1342, "step": 5548 }, { "epoch": 0.18, "grad_norm": 0.7153468728065491, "learning_rate": 1.8594238466831526e-05, "loss": 2.1582, "step": 5549 }, { "epoch": 0.18, "grad_norm": 0.6730310916900635, "learning_rate": 1.859369499962659e-05, "loss": 2.2399, "step": 5550 }, { "epoch": 0.18, "grad_norm": 0.7787979245185852, "learning_rate": 1.859315143533512e-05, "loss": 2.1869, "step": 5551 }, { "epoch": 0.18, "grad_norm": 0.7161830067634583, "learning_rate": 1.8592607773963262e-05, "loss": 2.2198, "step": 5552 }, { "epoch": 0.18, "grad_norm": 0.7131355404853821, "learning_rate": 1.859206401551716e-05, "loss": 2.1467, "step": 5553 }, { "epoch": 0.18, "grad_norm": 0.6888655424118042, "learning_rate": 1.8591520160002953e-05, "loss": 2.1874, "step": 5554 }, { "epoch": 0.18, "grad_norm": 0.6643336415290833, "learning_rate": 1.8590976207426784e-05, "loss": 2.236, "step": 5555 }, { "epoch": 0.18, "grad_norm": 0.7004115581512451, "learning_rate": 1.85904321577948e-05, "loss": 2.2059, "step": 5556 }, { "epoch": 0.18, "grad_norm": 0.7037082314491272, "learning_rate": 1.8589888011113154e-05, "loss": 2.2197, "step": 5557 }, { "epoch": 0.18, "grad_norm": 0.740204393863678, "learning_rate": 1.858934376738798e-05, "loss": 2.2869, "step": 5558 }, { "epoch": 0.18, "grad_norm": 0.7056680917739868, "learning_rate": 1.8588799426625438e-05, "loss": 2.2085, "step": 5559 }, { "epoch": 0.18, "grad_norm": 0.7074881196022034, "learning_rate": 1.8588254988831674e-05, "loss": 2.2235, "step": 5560 }, { "epoch": 0.19, "grad_norm": 0.6911839246749878, "learning_rate": 1.8587710454012837e-05, "loss": 2.2555, "step": 5561 }, { "epoch": 0.19, "grad_norm": 0.6697965264320374, "learning_rate": 1.858716582217508e-05, "loss": 2.2001, "step": 5562 }, { "epoch": 0.19, "grad_norm": 0.678257405757904, "learning_rate": 1.8586621093324555e-05, "loss": 2.2246, "step": 5563 }, { "epoch": 0.19, "grad_norm": 0.6960854530334473, "learning_rate": 1.858607626746742e-05, "loss": 2.2236, "step": 5564 }, { "epoch": 0.19, "grad_norm": 0.7796299457550049, "learning_rate": 1.8585531344609827e-05, "loss": 2.3053, "step": 5565 }, { "epoch": 0.19, "grad_norm": 0.6959280967712402, "learning_rate": 1.858498632475793e-05, "loss": 2.1606, "step": 5566 }, { "epoch": 0.19, "grad_norm": 0.7102519273757935, "learning_rate": 1.858444120791789e-05, "loss": 2.2322, "step": 5567 }, { "epoch": 0.19, "grad_norm": 0.7155406475067139, "learning_rate": 1.8583895994095864e-05, "loss": 2.2198, "step": 5568 }, { "epoch": 0.19, "grad_norm": 0.6744106411933899, "learning_rate": 1.8583350683298015e-05, "loss": 2.1861, "step": 5569 }, { "epoch": 0.19, "grad_norm": 0.7172953486442566, "learning_rate": 1.8582805275530495e-05, "loss": 2.2601, "step": 5570 }, { "epoch": 0.19, "grad_norm": 0.7674827575683594, "learning_rate": 1.8582259770799475e-05, "loss": 2.2199, "step": 5571 }, { "epoch": 0.19, "grad_norm": 0.6964028477668762, "learning_rate": 1.858171416911111e-05, "loss": 2.2127, "step": 5572 }, { "epoch": 0.19, "grad_norm": 0.6877376437187195, "learning_rate": 1.8581168470471572e-05, "loss": 2.2473, "step": 5573 }, { "epoch": 0.19, "grad_norm": 0.6967625617980957, "learning_rate": 1.858062267488702e-05, "loss": 2.1633, "step": 5574 }, { "epoch": 0.19, "grad_norm": 0.6951371431350708, "learning_rate": 1.858007678236362e-05, "loss": 2.2336, "step": 5575 }, { "epoch": 0.19, "grad_norm": 0.7015105485916138, "learning_rate": 1.8579530792907545e-05, "loss": 2.2123, "step": 5576 }, { "epoch": 0.19, "grad_norm": 0.6948661208152771, "learning_rate": 1.8578984706524953e-05, "loss": 2.2136, "step": 5577 }, { "epoch": 0.19, "grad_norm": 0.726847767829895, "learning_rate": 1.8578438523222025e-05, "loss": 2.2187, "step": 5578 }, { "epoch": 0.19, "grad_norm": 0.726252019405365, "learning_rate": 1.8577892243004925e-05, "loss": 2.2335, "step": 5579 }, { "epoch": 0.19, "grad_norm": 0.7257281541824341, "learning_rate": 1.8577345865879822e-05, "loss": 2.1701, "step": 5580 }, { "epoch": 0.19, "grad_norm": 0.7173301577568054, "learning_rate": 1.8576799391852897e-05, "loss": 2.207, "step": 5581 }, { "epoch": 0.19, "grad_norm": 0.7347779273986816, "learning_rate": 1.8576252820930315e-05, "loss": 2.2597, "step": 5582 }, { "epoch": 0.19, "grad_norm": 0.7081858515739441, "learning_rate": 1.8575706153118256e-05, "loss": 2.2277, "step": 5583 }, { "epoch": 0.19, "grad_norm": 0.6845013499259949, "learning_rate": 1.8575159388422895e-05, "loss": 2.1899, "step": 5584 }, { "epoch": 0.19, "grad_norm": 0.7392457127571106, "learning_rate": 1.857461252685041e-05, "loss": 2.1537, "step": 5585 }, { "epoch": 0.19, "grad_norm": 0.7651543021202087, "learning_rate": 1.857406556840698e-05, "loss": 2.1385, "step": 5586 }, { "epoch": 0.19, "grad_norm": 0.6975187659263611, "learning_rate": 1.857351851309878e-05, "loss": 2.2183, "step": 5587 }, { "epoch": 0.19, "grad_norm": 0.6869101524353027, "learning_rate": 1.857297136093199e-05, "loss": 2.2096, "step": 5588 }, { "epoch": 0.19, "grad_norm": 0.7008610963821411, "learning_rate": 1.8572424111912796e-05, "loss": 2.2878, "step": 5589 }, { "epoch": 0.19, "grad_norm": 0.7367783188819885, "learning_rate": 1.8571876766047377e-05, "loss": 2.3027, "step": 5590 }, { "epoch": 0.19, "grad_norm": 0.6932119131088257, "learning_rate": 1.8571329323341918e-05, "loss": 2.2268, "step": 5591 }, { "epoch": 0.19, "grad_norm": 0.7032895088195801, "learning_rate": 1.8570781783802605e-05, "loss": 2.1581, "step": 5592 }, { "epoch": 0.19, "grad_norm": 0.7264770865440369, "learning_rate": 1.8570234147435622e-05, "loss": 2.1819, "step": 5593 }, { "epoch": 0.19, "grad_norm": 0.6944084763526917, "learning_rate": 1.8569686414247156e-05, "loss": 2.1661, "step": 5594 }, { "epoch": 0.19, "grad_norm": 0.6666220426559448, "learning_rate": 1.8569138584243393e-05, "loss": 2.1788, "step": 5595 }, { "epoch": 0.19, "grad_norm": 0.6838375926017761, "learning_rate": 1.8568590657430527e-05, "loss": 2.2102, "step": 5596 }, { "epoch": 0.19, "grad_norm": 0.7171152830123901, "learning_rate": 1.8568042633814745e-05, "loss": 2.2683, "step": 5597 }, { "epoch": 0.19, "grad_norm": 0.7188552618026733, "learning_rate": 1.856749451340224e-05, "loss": 2.1678, "step": 5598 }, { "epoch": 0.19, "grad_norm": 0.7029561400413513, "learning_rate": 1.85669462961992e-05, "loss": 2.1847, "step": 5599 }, { "epoch": 0.19, "grad_norm": 0.6970089077949524, "learning_rate": 1.8566397982211824e-05, "loss": 2.1656, "step": 5600 }, { "epoch": 0.19, "grad_norm": 0.699612557888031, "learning_rate": 1.85658495714463e-05, "loss": 2.241, "step": 5601 }, { "epoch": 0.19, "grad_norm": 0.7196010947227478, "learning_rate": 1.8565301063908835e-05, "loss": 2.2421, "step": 5602 }, { "epoch": 0.19, "grad_norm": 0.6825580596923828, "learning_rate": 1.856475245960561e-05, "loss": 2.2258, "step": 5603 }, { "epoch": 0.19, "grad_norm": 0.6924556493759155, "learning_rate": 1.856420375854284e-05, "loss": 2.1826, "step": 5604 }, { "epoch": 0.19, "grad_norm": 0.6784041523933411, "learning_rate": 1.8563654960726708e-05, "loss": 2.1784, "step": 5605 }, { "epoch": 0.19, "grad_norm": 0.7094051837921143, "learning_rate": 1.8563106066163422e-05, "loss": 2.2358, "step": 5606 }, { "epoch": 0.19, "grad_norm": 0.673248291015625, "learning_rate": 1.8562557074859183e-05, "loss": 2.1545, "step": 5607 }, { "epoch": 0.19, "grad_norm": 0.6802188754081726, "learning_rate": 1.8562007986820192e-05, "loss": 2.2248, "step": 5608 }, { "epoch": 0.19, "grad_norm": 0.6944574117660522, "learning_rate": 1.8561458802052655e-05, "loss": 2.1954, "step": 5609 }, { "epoch": 0.19, "grad_norm": 0.6873819231987, "learning_rate": 1.856090952056277e-05, "loss": 2.229, "step": 5610 }, { "epoch": 0.19, "grad_norm": 0.6994902491569519, "learning_rate": 1.856036014235675e-05, "loss": 2.1363, "step": 5611 }, { "epoch": 0.19, "grad_norm": 0.7005643248558044, "learning_rate": 1.85598106674408e-05, "loss": 2.199, "step": 5612 }, { "epoch": 0.19, "grad_norm": 0.6903079748153687, "learning_rate": 1.855926109582112e-05, "loss": 2.2653, "step": 5613 }, { "epoch": 0.19, "grad_norm": 0.7384333610534668, "learning_rate": 1.8558711427503924e-05, "loss": 2.1607, "step": 5614 }, { "epoch": 0.19, "grad_norm": 0.702040433883667, "learning_rate": 1.8558161662495427e-05, "loss": 2.1642, "step": 5615 }, { "epoch": 0.19, "grad_norm": 0.7769565582275391, "learning_rate": 1.8557611800801835e-05, "loss": 2.2698, "step": 5616 }, { "epoch": 0.19, "grad_norm": 0.6853185892105103, "learning_rate": 1.855706184242936e-05, "loss": 2.1464, "step": 5617 }, { "epoch": 0.19, "grad_norm": 0.7219986319541931, "learning_rate": 1.855651178738421e-05, "loss": 2.2501, "step": 5618 }, { "epoch": 0.19, "grad_norm": 0.7006060481071472, "learning_rate": 1.855596163567261e-05, "loss": 2.1989, "step": 5619 }, { "epoch": 0.19, "grad_norm": 0.694694459438324, "learning_rate": 1.8555411387300765e-05, "loss": 2.2333, "step": 5620 }, { "epoch": 0.19, "grad_norm": 0.688143253326416, "learning_rate": 1.85548610422749e-05, "loss": 2.2051, "step": 5621 }, { "epoch": 0.19, "grad_norm": 0.6829541921615601, "learning_rate": 1.8554310600601227e-05, "loss": 2.1406, "step": 5622 }, { "epoch": 0.19, "grad_norm": 0.6947133541107178, "learning_rate": 1.8553760062285967e-05, "loss": 2.1565, "step": 5623 }, { "epoch": 0.19, "grad_norm": 0.6916519999504089, "learning_rate": 1.8553209427335337e-05, "loss": 2.1921, "step": 5624 }, { "epoch": 0.19, "grad_norm": 0.7105448842048645, "learning_rate": 1.8552658695755558e-05, "loss": 2.1853, "step": 5625 }, { "epoch": 0.19, "grad_norm": 0.7299433946609497, "learning_rate": 1.8552107867552856e-05, "loss": 2.1408, "step": 5626 }, { "epoch": 0.19, "grad_norm": 0.677615225315094, "learning_rate": 1.8551556942733454e-05, "loss": 2.1561, "step": 5627 }, { "epoch": 0.19, "grad_norm": 0.7708123922348022, "learning_rate": 1.855100592130357e-05, "loss": 2.1148, "step": 5628 }, { "epoch": 0.19, "grad_norm": 0.7076684832572937, "learning_rate": 1.8550454803269434e-05, "loss": 2.2088, "step": 5629 }, { "epoch": 0.19, "grad_norm": 0.76951003074646, "learning_rate": 1.854990358863727e-05, "loss": 2.2664, "step": 5630 }, { "epoch": 0.19, "grad_norm": 0.6916471719741821, "learning_rate": 1.8549352277413307e-05, "loss": 2.1557, "step": 5631 }, { "epoch": 0.19, "grad_norm": 0.7412891983985901, "learning_rate": 1.8548800869603767e-05, "loss": 2.2193, "step": 5632 }, { "epoch": 0.19, "grad_norm": 0.691865086555481, "learning_rate": 1.854824936521489e-05, "loss": 2.1714, "step": 5633 }, { "epoch": 0.19, "grad_norm": 0.7263773083686829, "learning_rate": 1.8547697764252902e-05, "loss": 2.1972, "step": 5634 }, { "epoch": 0.19, "grad_norm": 0.6993486881256104, "learning_rate": 1.8547146066724034e-05, "loss": 2.2254, "step": 5635 }, { "epoch": 0.19, "grad_norm": 0.7502067685127258, "learning_rate": 1.8546594272634518e-05, "loss": 2.2153, "step": 5636 }, { "epoch": 0.19, "grad_norm": 0.7296717762947083, "learning_rate": 1.854604238199059e-05, "loss": 2.1962, "step": 5637 }, { "epoch": 0.19, "grad_norm": 0.6980537176132202, "learning_rate": 1.854549039479848e-05, "loss": 2.1625, "step": 5638 }, { "epoch": 0.19, "grad_norm": 0.7148882150650024, "learning_rate": 1.854493831106443e-05, "loss": 2.1139, "step": 5639 }, { "epoch": 0.19, "grad_norm": 0.6928771734237671, "learning_rate": 1.8544386130794674e-05, "loss": 2.2283, "step": 5640 }, { "epoch": 0.19, "grad_norm": 0.7212710380554199, "learning_rate": 1.8543833853995454e-05, "loss": 2.1868, "step": 5641 }, { "epoch": 0.19, "grad_norm": 0.7183309197425842, "learning_rate": 1.8543281480673e-05, "loss": 2.1854, "step": 5642 }, { "epoch": 0.19, "grad_norm": 0.740610659122467, "learning_rate": 1.8542729010833565e-05, "loss": 2.1671, "step": 5643 }, { "epoch": 0.19, "grad_norm": 0.6828684210777283, "learning_rate": 1.854217644448338e-05, "loss": 2.1862, "step": 5644 }, { "epoch": 0.19, "grad_norm": 0.6876583099365234, "learning_rate": 1.8541623781628694e-05, "loss": 2.1578, "step": 5645 }, { "epoch": 0.19, "grad_norm": 0.6859642863273621, "learning_rate": 1.854107102227575e-05, "loss": 2.189, "step": 5646 }, { "epoch": 0.19, "grad_norm": 0.7046186923980713, "learning_rate": 1.8540518166430786e-05, "loss": 2.1867, "step": 5647 }, { "epoch": 0.19, "grad_norm": 0.7016686201095581, "learning_rate": 1.8539965214100056e-05, "loss": 2.1755, "step": 5648 }, { "epoch": 0.19, "grad_norm": 0.7054423093795776, "learning_rate": 1.8539412165289803e-05, "loss": 2.1632, "step": 5649 }, { "epoch": 0.19, "grad_norm": 0.6863690614700317, "learning_rate": 1.8538859020006275e-05, "loss": 2.2322, "step": 5650 }, { "epoch": 0.19, "grad_norm": 0.7221072912216187, "learning_rate": 1.8538305778255727e-05, "loss": 2.2056, "step": 5651 }, { "epoch": 0.19, "grad_norm": 0.6877840757369995, "learning_rate": 1.85377524400444e-05, "loss": 2.2096, "step": 5652 }, { "epoch": 0.19, "grad_norm": 0.690946102142334, "learning_rate": 1.853719900537855e-05, "loss": 2.1877, "step": 5653 }, { "epoch": 0.19, "grad_norm": 0.6967852711677551, "learning_rate": 1.853664547426443e-05, "loss": 2.1957, "step": 5654 }, { "epoch": 0.19, "grad_norm": 0.7667407393455505, "learning_rate": 1.8536091846708292e-05, "loss": 2.2885, "step": 5655 }, { "epoch": 0.19, "grad_norm": 0.6827165484428406, "learning_rate": 1.853553812271639e-05, "loss": 2.1943, "step": 5656 }, { "epoch": 0.19, "grad_norm": 0.6725485324859619, "learning_rate": 1.8534984302294984e-05, "loss": 2.1761, "step": 5657 }, { "epoch": 0.19, "grad_norm": 0.7040414810180664, "learning_rate": 1.853443038545032e-05, "loss": 2.1618, "step": 5658 }, { "epoch": 0.19, "grad_norm": 0.711972713470459, "learning_rate": 1.853387637218867e-05, "loss": 2.2586, "step": 5659 }, { "epoch": 0.19, "grad_norm": 0.7058595418930054, "learning_rate": 1.8533322262516284e-05, "loss": 2.1537, "step": 5660 }, { "epoch": 0.19, "grad_norm": 0.7124547362327576, "learning_rate": 1.8532768056439424e-05, "loss": 2.257, "step": 5661 }, { "epoch": 0.19, "grad_norm": 0.6787526607513428, "learning_rate": 1.8532213753964356e-05, "loss": 2.1562, "step": 5662 }, { "epoch": 0.19, "grad_norm": 0.7267456650733948, "learning_rate": 1.853165935509733e-05, "loss": 2.2167, "step": 5663 }, { "epoch": 0.19, "grad_norm": 0.7189849019050598, "learning_rate": 1.853110485984462e-05, "loss": 2.2196, "step": 5664 }, { "epoch": 0.19, "grad_norm": 0.6702702641487122, "learning_rate": 1.8530550268212483e-05, "loss": 2.2164, "step": 5665 }, { "epoch": 0.19, "grad_norm": 0.6748697757720947, "learning_rate": 1.8529995580207195e-05, "loss": 2.1395, "step": 5666 }, { "epoch": 0.19, "grad_norm": 0.7360884547233582, "learning_rate": 1.8529440795835012e-05, "loss": 2.2542, "step": 5667 }, { "epoch": 0.19, "grad_norm": 0.6972715854644775, "learning_rate": 1.8528885915102206e-05, "loss": 2.1685, "step": 5668 }, { "epoch": 0.19, "grad_norm": 0.6819813251495361, "learning_rate": 1.8528330938015045e-05, "loss": 2.2144, "step": 5669 }, { "epoch": 0.19, "grad_norm": 0.7535191178321838, "learning_rate": 1.85277758645798e-05, "loss": 2.1532, "step": 5670 }, { "epoch": 0.19, "grad_norm": 0.6854206323623657, "learning_rate": 1.8527220694802742e-05, "loss": 2.1489, "step": 5671 }, { "epoch": 0.19, "grad_norm": 0.6832886338233948, "learning_rate": 1.852666542869014e-05, "loss": 2.1693, "step": 5672 }, { "epoch": 0.19, "grad_norm": 0.6725049018859863, "learning_rate": 1.852611006624827e-05, "loss": 2.194, "step": 5673 }, { "epoch": 0.19, "grad_norm": 0.6795705556869507, "learning_rate": 1.8525554607483405e-05, "loss": 2.1702, "step": 5674 }, { "epoch": 0.19, "grad_norm": 0.7061900496482849, "learning_rate": 1.8524999052401822e-05, "loss": 2.1954, "step": 5675 }, { "epoch": 0.19, "grad_norm": 0.6852815747261047, "learning_rate": 1.8524443401009794e-05, "loss": 2.2372, "step": 5676 }, { "epoch": 0.19, "grad_norm": 0.7232020497322083, "learning_rate": 1.85238876533136e-05, "loss": 2.256, "step": 5677 }, { "epoch": 0.19, "grad_norm": 0.6963891983032227, "learning_rate": 1.852333180931952e-05, "loss": 2.2246, "step": 5678 }, { "epoch": 0.19, "grad_norm": 0.6922557353973389, "learning_rate": 1.8522775869033832e-05, "loss": 2.2516, "step": 5679 }, { "epoch": 0.19, "grad_norm": 0.7227574586868286, "learning_rate": 1.852221983246282e-05, "loss": 2.2781, "step": 5680 }, { "epoch": 0.19, "grad_norm": 0.6583694219589233, "learning_rate": 1.852166369961276e-05, "loss": 2.1877, "step": 5681 }, { "epoch": 0.19, "grad_norm": 0.691525399684906, "learning_rate": 1.8521107470489936e-05, "loss": 2.1654, "step": 5682 }, { "epoch": 0.19, "grad_norm": 0.7407993078231812, "learning_rate": 1.852055114510064e-05, "loss": 2.1989, "step": 5683 }, { "epoch": 0.19, "grad_norm": 0.6777991056442261, "learning_rate": 1.8519994723451144e-05, "loss": 2.2016, "step": 5684 }, { "epoch": 0.19, "grad_norm": 0.6982577443122864, "learning_rate": 1.8519438205547742e-05, "loss": 2.2563, "step": 5685 }, { "epoch": 0.19, "grad_norm": 0.6696224212646484, "learning_rate": 1.851888159139672e-05, "loss": 2.1372, "step": 5686 }, { "epoch": 0.19, "grad_norm": 0.6761857271194458, "learning_rate": 1.851832488100437e-05, "loss": 2.2024, "step": 5687 }, { "epoch": 0.19, "grad_norm": 0.7226700782775879, "learning_rate": 1.8517768074376974e-05, "loss": 2.1896, "step": 5688 }, { "epoch": 0.19, "grad_norm": 0.6938554048538208, "learning_rate": 1.8517211171520827e-05, "loss": 2.1917, "step": 5689 }, { "epoch": 0.19, "grad_norm": 0.6912155151367188, "learning_rate": 1.8516654172442223e-05, "loss": 2.1957, "step": 5690 }, { "epoch": 0.19, "grad_norm": 0.7307465076446533, "learning_rate": 1.8516097077147445e-05, "loss": 2.2042, "step": 5691 }, { "epoch": 0.19, "grad_norm": 0.6893206238746643, "learning_rate": 1.85155398856428e-05, "loss": 2.2514, "step": 5692 }, { "epoch": 0.19, "grad_norm": 0.7607327103614807, "learning_rate": 1.851498259793457e-05, "loss": 2.1813, "step": 5693 }, { "epoch": 0.19, "grad_norm": 0.7359330654144287, "learning_rate": 1.851442521402906e-05, "loss": 2.179, "step": 5694 }, { "epoch": 0.19, "grad_norm": 0.7026422619819641, "learning_rate": 1.8513867733932563e-05, "loss": 2.2105, "step": 5695 }, { "epoch": 0.19, "grad_norm": 0.7308424711227417, "learning_rate": 1.851331015765138e-05, "loss": 2.2052, "step": 5696 }, { "epoch": 0.19, "grad_norm": 0.6761261820793152, "learning_rate": 1.8512752485191804e-05, "loss": 2.2391, "step": 5697 }, { "epoch": 0.19, "grad_norm": 0.6896911263465881, "learning_rate": 1.8512194716560145e-05, "loss": 2.2366, "step": 5698 }, { "epoch": 0.19, "grad_norm": 0.7406615614891052, "learning_rate": 1.8511636851762694e-05, "loss": 2.2587, "step": 5699 }, { "epoch": 0.19, "grad_norm": 0.667523980140686, "learning_rate": 1.851107889080576e-05, "loss": 2.2125, "step": 5700 }, { "epoch": 0.19, "grad_norm": 0.6855826377868652, "learning_rate": 1.8510520833695644e-05, "loss": 2.2316, "step": 5701 }, { "epoch": 0.19, "grad_norm": 0.6964127421379089, "learning_rate": 1.850996268043865e-05, "loss": 2.1675, "step": 5702 }, { "epoch": 0.19, "grad_norm": 0.6840857267379761, "learning_rate": 1.8509404431041088e-05, "loss": 2.1918, "step": 5703 }, { "epoch": 0.19, "grad_norm": 0.7179672122001648, "learning_rate": 1.850884608550926e-05, "loss": 2.1245, "step": 5704 }, { "epoch": 0.19, "grad_norm": 0.7067128419876099, "learning_rate": 1.8508287643849474e-05, "loss": 2.2511, "step": 5705 }, { "epoch": 0.19, "grad_norm": 0.7082436084747314, "learning_rate": 1.8507729106068046e-05, "loss": 2.1442, "step": 5706 }, { "epoch": 0.19, "grad_norm": 0.6925551295280457, "learning_rate": 1.8507170472171275e-05, "loss": 2.1899, "step": 5707 }, { "epoch": 0.19, "grad_norm": 0.6743143200874329, "learning_rate": 1.850661174216548e-05, "loss": 2.2037, "step": 5708 }, { "epoch": 0.19, "grad_norm": 0.728156566619873, "learning_rate": 1.8506052916056972e-05, "loss": 2.1831, "step": 5709 }, { "epoch": 0.19, "grad_norm": 0.7280499935150146, "learning_rate": 1.850549399385206e-05, "loss": 2.1842, "step": 5710 }, { "epoch": 0.19, "grad_norm": 0.6557510495185852, "learning_rate": 1.8504934975557064e-05, "loss": 2.1276, "step": 5711 }, { "epoch": 0.19, "grad_norm": 0.704355001449585, "learning_rate": 1.8504375861178296e-05, "loss": 2.1626, "step": 5712 }, { "epoch": 0.19, "grad_norm": 0.6704341173171997, "learning_rate": 1.8503816650722075e-05, "loss": 2.2024, "step": 5713 }, { "epoch": 0.19, "grad_norm": 0.689931333065033, "learning_rate": 1.8503257344194717e-05, "loss": 2.2125, "step": 5714 }, { "epoch": 0.19, "grad_norm": 0.7019279599189758, "learning_rate": 1.850269794160254e-05, "loss": 2.1518, "step": 5715 }, { "epoch": 0.19, "grad_norm": 0.6858396530151367, "learning_rate": 1.8502138442951866e-05, "loss": 2.2054, "step": 5716 }, { "epoch": 0.19, "grad_norm": 0.707389771938324, "learning_rate": 1.850157884824901e-05, "loss": 2.2043, "step": 5717 }, { "epoch": 0.19, "grad_norm": 0.6972928643226624, "learning_rate": 1.85010191575003e-05, "loss": 2.2093, "step": 5718 }, { "epoch": 0.19, "grad_norm": 0.7109317779541016, "learning_rate": 1.850045937071206e-05, "loss": 2.1362, "step": 5719 }, { "epoch": 0.19, "grad_norm": 0.6993730068206787, "learning_rate": 1.8499899487890615e-05, "loss": 2.1745, "step": 5720 }, { "epoch": 0.19, "grad_norm": 0.7064675092697144, "learning_rate": 1.8499339509042286e-05, "loss": 2.2045, "step": 5721 }, { "epoch": 0.19, "grad_norm": 0.7081506848335266, "learning_rate": 1.8498779434173394e-05, "loss": 2.21, "step": 5722 }, { "epoch": 0.19, "grad_norm": 0.6949377655982971, "learning_rate": 1.849821926329028e-05, "loss": 2.1899, "step": 5723 }, { "epoch": 0.19, "grad_norm": 0.739858090877533, "learning_rate": 1.8497658996399258e-05, "loss": 2.2108, "step": 5724 }, { "epoch": 0.19, "grad_norm": 0.7240483164787292, "learning_rate": 1.849709863350667e-05, "loss": 2.1727, "step": 5725 }, { "epoch": 0.19, "grad_norm": 0.6831229329109192, "learning_rate": 1.849653817461884e-05, "loss": 2.1385, "step": 5726 }, { "epoch": 0.19, "grad_norm": 0.6802065372467041, "learning_rate": 1.84959776197421e-05, "loss": 2.1887, "step": 5727 }, { "epoch": 0.19, "grad_norm": 0.6928762793540955, "learning_rate": 1.8495416968882787e-05, "loss": 2.1857, "step": 5728 }, { "epoch": 0.19, "grad_norm": 0.7056906223297119, "learning_rate": 1.8494856222047232e-05, "loss": 2.1992, "step": 5729 }, { "epoch": 0.19, "grad_norm": 0.6872397065162659, "learning_rate": 1.8494295379241766e-05, "loss": 2.2372, "step": 5730 }, { "epoch": 0.19, "grad_norm": 0.6804319024085999, "learning_rate": 1.849373444047273e-05, "loss": 2.1564, "step": 5731 }, { "epoch": 0.19, "grad_norm": 0.6832162737846375, "learning_rate": 1.849317340574646e-05, "loss": 2.189, "step": 5732 }, { "epoch": 0.19, "grad_norm": 0.6804760098457336, "learning_rate": 1.8492612275069297e-05, "loss": 2.1802, "step": 5733 }, { "epoch": 0.19, "grad_norm": 0.7294803261756897, "learning_rate": 1.8492051048447575e-05, "loss": 2.242, "step": 5734 }, { "epoch": 0.19, "grad_norm": 0.6804189085960388, "learning_rate": 1.849148972588764e-05, "loss": 2.2371, "step": 5735 }, { "epoch": 0.19, "grad_norm": 0.7128114104270935, "learning_rate": 1.849092830739583e-05, "loss": 2.1209, "step": 5736 }, { "epoch": 0.19, "grad_norm": 0.707399845123291, "learning_rate": 1.849036679297848e-05, "loss": 2.207, "step": 5737 }, { "epoch": 0.19, "grad_norm": 0.6638315320014954, "learning_rate": 1.8489805182641954e-05, "loss": 2.1705, "step": 5738 }, { "epoch": 0.19, "grad_norm": 0.6954512596130371, "learning_rate": 1.8489243476392577e-05, "loss": 2.1287, "step": 5739 }, { "epoch": 0.19, "grad_norm": 0.6850218772888184, "learning_rate": 1.8488681674236708e-05, "loss": 2.2591, "step": 5740 }, { "epoch": 0.19, "grad_norm": 0.7465183138847351, "learning_rate": 1.8488119776180683e-05, "loss": 2.1737, "step": 5741 }, { "epoch": 0.19, "grad_norm": 0.698441207408905, "learning_rate": 1.848755778223086e-05, "loss": 2.2203, "step": 5742 }, { "epoch": 0.19, "grad_norm": 0.7407118082046509, "learning_rate": 1.848699569239358e-05, "loss": 2.1947, "step": 5743 }, { "epoch": 0.19, "grad_norm": 0.6816844940185547, "learning_rate": 1.8486433506675194e-05, "loss": 2.1664, "step": 5744 }, { "epoch": 0.19, "grad_norm": 0.6911886930465698, "learning_rate": 1.848587122508206e-05, "loss": 2.2101, "step": 5745 }, { "epoch": 0.19, "grad_norm": 0.7974408864974976, "learning_rate": 1.8485308847620523e-05, "loss": 2.215, "step": 5746 }, { "epoch": 0.19, "grad_norm": 0.7241461873054504, "learning_rate": 1.8484746374296943e-05, "loss": 2.2264, "step": 5747 }, { "epoch": 0.19, "grad_norm": 0.7246502637863159, "learning_rate": 1.848418380511767e-05, "loss": 2.2024, "step": 5748 }, { "epoch": 0.19, "grad_norm": 0.7227166891098022, "learning_rate": 1.848362114008906e-05, "loss": 2.2325, "step": 5749 }, { "epoch": 0.19, "grad_norm": 0.7089793682098389, "learning_rate": 1.848305837921747e-05, "loss": 2.2491, "step": 5750 }, { "epoch": 0.19, "grad_norm": 0.7279247641563416, "learning_rate": 1.848249552250926e-05, "loss": 2.183, "step": 5751 }, { "epoch": 0.19, "grad_norm": 0.6795079708099365, "learning_rate": 1.8481932569970782e-05, "loss": 2.259, "step": 5752 }, { "epoch": 0.19, "grad_norm": 0.6965195536613464, "learning_rate": 1.8481369521608406e-05, "loss": 2.2388, "step": 5753 }, { "epoch": 0.19, "grad_norm": 0.7750223875045776, "learning_rate": 1.8480806377428483e-05, "loss": 2.2226, "step": 5754 }, { "epoch": 0.19, "grad_norm": 0.7496516704559326, "learning_rate": 1.8480243137437385e-05, "loss": 2.1744, "step": 5755 }, { "epoch": 0.19, "grad_norm": 0.6600427031517029, "learning_rate": 1.8479679801641466e-05, "loss": 2.1989, "step": 5756 }, { "epoch": 0.19, "grad_norm": 0.7313635945320129, "learning_rate": 1.8479116370047096e-05, "loss": 2.1683, "step": 5757 }, { "epoch": 0.19, "grad_norm": 0.7298907041549683, "learning_rate": 1.8478552842660636e-05, "loss": 2.1809, "step": 5758 }, { "epoch": 0.19, "grad_norm": 0.728547215461731, "learning_rate": 1.8477989219488458e-05, "loss": 2.2321, "step": 5759 }, { "epoch": 0.19, "grad_norm": 0.7118574380874634, "learning_rate": 1.8477425500536927e-05, "loss": 2.2615, "step": 5760 }, { "epoch": 0.19, "grad_norm": 0.7344117164611816, "learning_rate": 1.8476861685812412e-05, "loss": 2.2594, "step": 5761 }, { "epoch": 0.19, "grad_norm": 0.7285259962081909, "learning_rate": 1.8476297775321278e-05, "loss": 2.1876, "step": 5762 }, { "epoch": 0.19, "grad_norm": 0.7066472172737122, "learning_rate": 1.84757337690699e-05, "loss": 2.1914, "step": 5763 }, { "epoch": 0.19, "grad_norm": 0.7148813009262085, "learning_rate": 1.8475169667064647e-05, "loss": 2.1986, "step": 5764 }, { "epoch": 0.19, "grad_norm": 0.7032379508018494, "learning_rate": 1.8474605469311897e-05, "loss": 2.2771, "step": 5765 }, { "epoch": 0.19, "grad_norm": 0.7261673212051392, "learning_rate": 1.847404117581802e-05, "loss": 2.1403, "step": 5766 }, { "epoch": 0.19, "grad_norm": 0.7148529291152954, "learning_rate": 1.847347678658939e-05, "loss": 2.1727, "step": 5767 }, { "epoch": 0.19, "grad_norm": 0.7385905385017395, "learning_rate": 1.847291230163239e-05, "loss": 2.2446, "step": 5768 }, { "epoch": 0.19, "grad_norm": 0.6731931567192078, "learning_rate": 1.8472347720953392e-05, "loss": 2.209, "step": 5769 }, { "epoch": 0.19, "grad_norm": 0.7199866771697998, "learning_rate": 1.847178304455877e-05, "loss": 2.221, "step": 5770 }, { "epoch": 0.19, "grad_norm": 0.7093116044998169, "learning_rate": 1.8471218272454913e-05, "loss": 2.2051, "step": 5771 }, { "epoch": 0.19, "grad_norm": 0.6715319156646729, "learning_rate": 1.8470653404648192e-05, "loss": 2.2453, "step": 5772 }, { "epoch": 0.19, "grad_norm": 0.704441487789154, "learning_rate": 1.8470088441144996e-05, "loss": 2.194, "step": 5773 }, { "epoch": 0.19, "grad_norm": 0.7156498432159424, "learning_rate": 1.8469523381951706e-05, "loss": 2.2629, "step": 5774 }, { "epoch": 0.19, "grad_norm": 0.719251811504364, "learning_rate": 1.8468958227074704e-05, "loss": 2.2132, "step": 5775 }, { "epoch": 0.19, "grad_norm": 0.7486870288848877, "learning_rate": 1.8468392976520374e-05, "loss": 2.2248, "step": 5776 }, { "epoch": 0.19, "grad_norm": 0.678663432598114, "learning_rate": 1.8467827630295102e-05, "loss": 2.1407, "step": 5777 }, { "epoch": 0.19, "grad_norm": 0.7613793611526489, "learning_rate": 1.846726218840528e-05, "loss": 2.2142, "step": 5778 }, { "epoch": 0.19, "grad_norm": 0.7172176241874695, "learning_rate": 1.8466696650857287e-05, "loss": 2.2728, "step": 5779 }, { "epoch": 0.19, "grad_norm": 0.727596640586853, "learning_rate": 1.846613101765752e-05, "loss": 2.2357, "step": 5780 }, { "epoch": 0.19, "grad_norm": 0.8211220502853394, "learning_rate": 1.8465565288812362e-05, "loss": 2.213, "step": 5781 }, { "epoch": 0.19, "grad_norm": 0.6838415265083313, "learning_rate": 1.8464999464328216e-05, "loss": 2.2234, "step": 5782 }, { "epoch": 0.19, "grad_norm": 0.7337430715560913, "learning_rate": 1.8464433544211465e-05, "loss": 2.1714, "step": 5783 }, { "epoch": 0.19, "grad_norm": 0.7136640548706055, "learning_rate": 1.8463867528468502e-05, "loss": 2.2249, "step": 5784 }, { "epoch": 0.19, "grad_norm": 0.6972154378890991, "learning_rate": 1.8463301417105724e-05, "loss": 2.2083, "step": 5785 }, { "epoch": 0.19, "grad_norm": 0.6871650815010071, "learning_rate": 1.846273521012953e-05, "loss": 2.2087, "step": 5786 }, { "epoch": 0.19, "grad_norm": 0.7603265643119812, "learning_rate": 1.846216890754631e-05, "loss": 2.2314, "step": 5787 }, { "epoch": 0.19, "grad_norm": 0.7470519542694092, "learning_rate": 1.8461602509362465e-05, "loss": 2.1892, "step": 5788 }, { "epoch": 0.19, "grad_norm": 0.7518399953842163, "learning_rate": 1.8461036015584393e-05, "loss": 2.2239, "step": 5789 }, { "epoch": 0.19, "grad_norm": 0.7135910987854004, "learning_rate": 1.8460469426218498e-05, "loss": 2.1873, "step": 5790 }, { "epoch": 0.19, "grad_norm": 0.729735255241394, "learning_rate": 1.8459902741271175e-05, "loss": 2.1145, "step": 5791 }, { "epoch": 0.19, "grad_norm": 0.6778610944747925, "learning_rate": 1.8459335960748835e-05, "loss": 2.1942, "step": 5792 }, { "epoch": 0.19, "grad_norm": 0.6946741342544556, "learning_rate": 1.845876908465787e-05, "loss": 2.1534, "step": 5793 }, { "epoch": 0.19, "grad_norm": 0.733286440372467, "learning_rate": 1.8458202113004686e-05, "loss": 2.14, "step": 5794 }, { "epoch": 0.19, "grad_norm": 0.7079468965530396, "learning_rate": 1.8457635045795697e-05, "loss": 2.2105, "step": 5795 }, { "epoch": 0.19, "grad_norm": 0.730912446975708, "learning_rate": 1.8457067883037302e-05, "loss": 2.2391, "step": 5796 }, { "epoch": 0.19, "grad_norm": 0.6963058114051819, "learning_rate": 1.8456500624735908e-05, "loss": 2.1772, "step": 5797 }, { "epoch": 0.19, "grad_norm": 0.7229794263839722, "learning_rate": 1.845593327089793e-05, "loss": 2.2, "step": 5798 }, { "epoch": 0.19, "grad_norm": 0.6929073929786682, "learning_rate": 1.845536582152977e-05, "loss": 2.2058, "step": 5799 }, { "epoch": 0.19, "grad_norm": 0.7347926497459412, "learning_rate": 1.8454798276637843e-05, "loss": 2.304, "step": 5800 }, { "epoch": 0.19, "grad_norm": 0.6852323412895203, "learning_rate": 1.8454230636228563e-05, "loss": 2.1878, "step": 5801 }, { "epoch": 0.19, "grad_norm": 0.6772787570953369, "learning_rate": 1.8453662900308337e-05, "loss": 2.1611, "step": 5802 }, { "epoch": 0.19, "grad_norm": 0.6806902289390564, "learning_rate": 1.8453095068883583e-05, "loss": 2.2181, "step": 5803 }, { "epoch": 0.19, "grad_norm": 0.6981421709060669, "learning_rate": 1.8452527141960717e-05, "loss": 2.1295, "step": 5804 }, { "epoch": 0.19, "grad_norm": 0.6946926712989807, "learning_rate": 1.845195911954615e-05, "loss": 2.2123, "step": 5805 }, { "epoch": 0.19, "grad_norm": 0.6878112554550171, "learning_rate": 1.8451391001646303e-05, "loss": 2.1646, "step": 5806 }, { "epoch": 0.19, "grad_norm": 0.6959139108657837, "learning_rate": 1.8450822788267593e-05, "loss": 2.1849, "step": 5807 }, { "epoch": 0.19, "grad_norm": 0.7147149443626404, "learning_rate": 1.845025447941644e-05, "loss": 2.2275, "step": 5808 }, { "epoch": 0.19, "grad_norm": 0.6920427680015564, "learning_rate": 1.844968607509926e-05, "loss": 2.15, "step": 5809 }, { "epoch": 0.19, "grad_norm": 0.6701664924621582, "learning_rate": 1.8449117575322487e-05, "loss": 2.2455, "step": 5810 }, { "epoch": 0.19, "grad_norm": 0.6952377557754517, "learning_rate": 1.8448548980092532e-05, "loss": 2.1685, "step": 5811 }, { "epoch": 0.19, "grad_norm": 0.7051309943199158, "learning_rate": 1.8447980289415822e-05, "loss": 2.2331, "step": 5812 }, { "epoch": 0.19, "grad_norm": 0.7096760869026184, "learning_rate": 1.844741150329878e-05, "loss": 2.1934, "step": 5813 }, { "epoch": 0.19, "grad_norm": 0.7116329669952393, "learning_rate": 1.8446842621747834e-05, "loss": 2.1326, "step": 5814 }, { "epoch": 0.19, "grad_norm": 0.6922067403793335, "learning_rate": 1.8446273644769414e-05, "loss": 2.1921, "step": 5815 }, { "epoch": 0.19, "grad_norm": 0.7360302805900574, "learning_rate": 1.8445704572369937e-05, "loss": 2.2255, "step": 5816 }, { "epoch": 0.19, "grad_norm": 0.7079552412033081, "learning_rate": 1.8445135404555844e-05, "loss": 2.2096, "step": 5817 }, { "epoch": 0.19, "grad_norm": 0.6884557008743286, "learning_rate": 1.8444566141333563e-05, "loss": 2.1887, "step": 5818 }, { "epoch": 0.19, "grad_norm": 0.7541073560714722, "learning_rate": 1.844399678270952e-05, "loss": 2.1831, "step": 5819 }, { "epoch": 0.19, "grad_norm": 0.7467616200447083, "learning_rate": 1.844342732869015e-05, "loss": 2.283, "step": 5820 }, { "epoch": 0.19, "grad_norm": 0.6788119077682495, "learning_rate": 1.8442857779281887e-05, "loss": 2.187, "step": 5821 }, { "epoch": 0.19, "grad_norm": 0.7284881472587585, "learning_rate": 1.8442288134491165e-05, "loss": 2.233, "step": 5822 }, { "epoch": 0.19, "grad_norm": 0.6983395218849182, "learning_rate": 1.8441718394324417e-05, "loss": 2.1928, "step": 5823 }, { "epoch": 0.19, "grad_norm": 0.7055501341819763, "learning_rate": 1.8441148558788083e-05, "loss": 2.2157, "step": 5824 }, { "epoch": 0.19, "grad_norm": 0.7153393626213074, "learning_rate": 1.8440578627888597e-05, "loss": 2.2753, "step": 5825 }, { "epoch": 0.19, "grad_norm": 0.6805808544158936, "learning_rate": 1.8440008601632406e-05, "loss": 2.1886, "step": 5826 }, { "epoch": 0.19, "grad_norm": 0.7000783681869507, "learning_rate": 1.843943848002594e-05, "loss": 2.1792, "step": 5827 }, { "epoch": 0.19, "grad_norm": 0.7028132081031799, "learning_rate": 1.8438868263075646e-05, "loss": 2.1927, "step": 5828 }, { "epoch": 0.19, "grad_norm": 0.6968508362770081, "learning_rate": 1.8438297950787966e-05, "loss": 2.1809, "step": 5829 }, { "epoch": 0.19, "grad_norm": 0.6766806244850159, "learning_rate": 1.843772754316934e-05, "loss": 2.1805, "step": 5830 }, { "epoch": 0.19, "grad_norm": 0.6885875463485718, "learning_rate": 1.843715704022621e-05, "loss": 2.2798, "step": 5831 }, { "epoch": 0.19, "grad_norm": 0.6872454285621643, "learning_rate": 1.8436586441965025e-05, "loss": 2.1742, "step": 5832 }, { "epoch": 0.19, "grad_norm": 0.6760377883911133, "learning_rate": 1.8436015748392232e-05, "loss": 2.1702, "step": 5833 }, { "epoch": 0.19, "grad_norm": 0.6895399689674377, "learning_rate": 1.8435444959514278e-05, "loss": 2.2661, "step": 5834 }, { "epoch": 0.19, "grad_norm": 0.7271701097488403, "learning_rate": 1.8434874075337608e-05, "loss": 2.1619, "step": 5835 }, { "epoch": 0.19, "grad_norm": 0.7000858187675476, "learning_rate": 1.843430309586868e-05, "loss": 2.2094, "step": 5836 }, { "epoch": 0.19, "grad_norm": 0.7013121843338013, "learning_rate": 1.8433732021113933e-05, "loss": 2.1684, "step": 5837 }, { "epoch": 0.19, "grad_norm": 0.7426285743713379, "learning_rate": 1.8433160851079822e-05, "loss": 2.1785, "step": 5838 }, { "epoch": 0.19, "grad_norm": 0.7096011638641357, "learning_rate": 1.8432589585772808e-05, "loss": 2.2291, "step": 5839 }, { "epoch": 0.19, "grad_norm": 0.7322856187820435, "learning_rate": 1.8432018225199337e-05, "loss": 2.1919, "step": 5840 }, { "epoch": 0.19, "grad_norm": 0.7045252323150635, "learning_rate": 1.8431446769365867e-05, "loss": 2.1675, "step": 5841 }, { "epoch": 0.19, "grad_norm": 0.6601688861846924, "learning_rate": 1.8430875218278847e-05, "loss": 2.2005, "step": 5842 }, { "epoch": 0.19, "grad_norm": 0.7153899073600769, "learning_rate": 1.8430303571944744e-05, "loss": 2.206, "step": 5843 }, { "epoch": 0.19, "grad_norm": 0.6839788556098938, "learning_rate": 1.8429731830370016e-05, "loss": 2.2015, "step": 5844 }, { "epoch": 0.19, "grad_norm": 0.7272733449935913, "learning_rate": 1.842915999356111e-05, "loss": 2.2061, "step": 5845 }, { "epoch": 0.19, "grad_norm": 0.7017080783843994, "learning_rate": 1.8428588061524498e-05, "loss": 2.2193, "step": 5846 }, { "epoch": 0.19, "grad_norm": 0.6546066999435425, "learning_rate": 1.8428016034266637e-05, "loss": 2.1681, "step": 5847 }, { "epoch": 0.19, "grad_norm": 0.6774407625198364, "learning_rate": 1.8427443911793993e-05, "loss": 2.1968, "step": 5848 }, { "epoch": 0.19, "grad_norm": 0.6878687739372253, "learning_rate": 1.8426871694113023e-05, "loss": 2.257, "step": 5849 }, { "epoch": 0.19, "grad_norm": 0.6821404695510864, "learning_rate": 1.8426299381230198e-05, "loss": 2.2435, "step": 5850 }, { "epoch": 0.19, "grad_norm": 0.6938878297805786, "learning_rate": 1.842572697315198e-05, "loss": 2.228, "step": 5851 }, { "epoch": 0.19, "grad_norm": 0.6978577971458435, "learning_rate": 1.8425154469884834e-05, "loss": 2.1483, "step": 5852 }, { "epoch": 0.19, "grad_norm": 0.6769468784332275, "learning_rate": 1.8424581871435232e-05, "loss": 2.1544, "step": 5853 }, { "epoch": 0.19, "grad_norm": 0.6915081739425659, "learning_rate": 1.8424009177809644e-05, "loss": 2.1374, "step": 5854 }, { "epoch": 0.19, "grad_norm": 0.7178905010223389, "learning_rate": 1.8423436389014534e-05, "loss": 2.1612, "step": 5855 }, { "epoch": 0.19, "grad_norm": 0.7068578600883484, "learning_rate": 1.842286350505638e-05, "loss": 2.1994, "step": 5856 }, { "epoch": 0.19, "grad_norm": 0.7085909247398376, "learning_rate": 1.8422290525941647e-05, "loss": 2.1842, "step": 5857 }, { "epoch": 0.19, "grad_norm": 0.7198071479797363, "learning_rate": 1.842171745167681e-05, "loss": 2.1736, "step": 5858 }, { "epoch": 0.19, "grad_norm": 0.6914926767349243, "learning_rate": 1.8421144282268352e-05, "loss": 2.1614, "step": 5859 }, { "epoch": 0.19, "grad_norm": 0.6952751278877258, "learning_rate": 1.8420571017722736e-05, "loss": 2.1973, "step": 5860 }, { "epoch": 0.19, "grad_norm": 0.7335081696510315, "learning_rate": 1.841999765804644e-05, "loss": 2.1579, "step": 5861 }, { "epoch": 0.2, "grad_norm": 0.7027428150177002, "learning_rate": 1.841942420324595e-05, "loss": 2.2104, "step": 5862 }, { "epoch": 0.2, "grad_norm": 0.7556118369102478, "learning_rate": 1.841885065332774e-05, "loss": 2.2314, "step": 5863 }, { "epoch": 0.2, "grad_norm": 0.6858674883842468, "learning_rate": 1.841827700829829e-05, "loss": 2.1902, "step": 5864 }, { "epoch": 0.2, "grad_norm": 0.6955288052558899, "learning_rate": 1.8417703268164075e-05, "loss": 2.2019, "step": 5865 }, { "epoch": 0.2, "grad_norm": 0.6943233013153076, "learning_rate": 1.8417129432931587e-05, "loss": 2.2263, "step": 5866 }, { "epoch": 0.2, "grad_norm": 0.7108997702598572, "learning_rate": 1.8416555502607297e-05, "loss": 2.2253, "step": 5867 }, { "epoch": 0.2, "grad_norm": 0.6960964798927307, "learning_rate": 1.84159814771977e-05, "loss": 2.1652, "step": 5868 }, { "epoch": 0.2, "grad_norm": 0.7019452452659607, "learning_rate": 1.841540735670928e-05, "loss": 2.1728, "step": 5869 }, { "epoch": 0.2, "grad_norm": 0.6867461800575256, "learning_rate": 1.8414833141148512e-05, "loss": 2.1422, "step": 5870 }, { "epoch": 0.2, "grad_norm": 0.7390266060829163, "learning_rate": 1.84142588305219e-05, "loss": 2.1594, "step": 5871 }, { "epoch": 0.2, "grad_norm": 0.6901983022689819, "learning_rate": 1.8413684424835913e-05, "loss": 2.1657, "step": 5872 }, { "epoch": 0.2, "grad_norm": 0.6811671853065491, "learning_rate": 1.8413109924097058e-05, "loss": 2.1677, "step": 5873 }, { "epoch": 0.2, "grad_norm": 0.6941006779670715, "learning_rate": 1.8412535328311813e-05, "loss": 2.2289, "step": 5874 }, { "epoch": 0.2, "grad_norm": 0.7869853973388672, "learning_rate": 1.8411960637486676e-05, "loss": 2.2051, "step": 5875 }, { "epoch": 0.2, "grad_norm": 0.7169014811515808, "learning_rate": 1.841138585162814e-05, "loss": 2.2509, "step": 5876 }, { "epoch": 0.2, "grad_norm": 0.7121827006340027, "learning_rate": 1.8410810970742693e-05, "loss": 2.1844, "step": 5877 }, { "epoch": 0.2, "grad_norm": 0.6684106588363647, "learning_rate": 1.841023599483683e-05, "loss": 2.1817, "step": 5878 }, { "epoch": 0.2, "grad_norm": 0.7329347729682922, "learning_rate": 1.8409660923917055e-05, "loss": 2.143, "step": 5879 }, { "epoch": 0.2, "grad_norm": 0.6933580636978149, "learning_rate": 1.8409085757989857e-05, "loss": 2.1866, "step": 5880 }, { "epoch": 0.2, "grad_norm": 0.7033088803291321, "learning_rate": 1.8408510497061738e-05, "loss": 2.257, "step": 5881 }, { "epoch": 0.2, "grad_norm": 0.726311981678009, "learning_rate": 1.8407935141139192e-05, "loss": 2.2082, "step": 5882 }, { "epoch": 0.2, "grad_norm": 0.762786328792572, "learning_rate": 1.8407359690228725e-05, "loss": 2.1684, "step": 5883 }, { "epoch": 0.2, "grad_norm": 0.6952560544013977, "learning_rate": 1.8406784144336834e-05, "loss": 2.2183, "step": 5884 }, { "epoch": 0.2, "grad_norm": 0.695530354976654, "learning_rate": 1.8406208503470023e-05, "loss": 2.2046, "step": 5885 }, { "epoch": 0.2, "grad_norm": 0.6944941282272339, "learning_rate": 1.8405632767634796e-05, "loss": 2.2882, "step": 5886 }, { "epoch": 0.2, "grad_norm": 0.6655538082122803, "learning_rate": 1.8405056936837654e-05, "loss": 2.1245, "step": 5887 }, { "epoch": 0.2, "grad_norm": 0.6736990213394165, "learning_rate": 1.8404481011085108e-05, "loss": 2.2207, "step": 5888 }, { "epoch": 0.2, "grad_norm": 0.6976009011268616, "learning_rate": 1.8403904990383657e-05, "loss": 2.1988, "step": 5889 }, { "epoch": 0.2, "grad_norm": 0.694331705570221, "learning_rate": 1.8403328874739815e-05, "loss": 2.2709, "step": 5890 }, { "epoch": 0.2, "grad_norm": 0.7157639265060425, "learning_rate": 1.8402752664160087e-05, "loss": 2.191, "step": 5891 }, { "epoch": 0.2, "grad_norm": 0.6894098520278931, "learning_rate": 1.8402176358650984e-05, "loss": 2.1705, "step": 5892 }, { "epoch": 0.2, "grad_norm": 0.7596185803413391, "learning_rate": 1.840159995821902e-05, "loss": 2.1419, "step": 5893 }, { "epoch": 0.2, "grad_norm": 0.710532546043396, "learning_rate": 1.84010234628707e-05, "loss": 2.1763, "step": 5894 }, { "epoch": 0.2, "grad_norm": 0.6919721961021423, "learning_rate": 1.840044687261254e-05, "loss": 2.2056, "step": 5895 }, { "epoch": 0.2, "grad_norm": 0.6842281222343445, "learning_rate": 1.8399870187451055e-05, "loss": 2.2562, "step": 5896 }, { "epoch": 0.2, "grad_norm": 0.7018915414810181, "learning_rate": 1.839929340739276e-05, "loss": 2.263, "step": 5897 }, { "epoch": 0.2, "grad_norm": 0.7027345895767212, "learning_rate": 1.8398716532444173e-05, "loss": 2.1569, "step": 5898 }, { "epoch": 0.2, "grad_norm": 0.7313182353973389, "learning_rate": 1.8398139562611805e-05, "loss": 2.2466, "step": 5899 }, { "epoch": 0.2, "grad_norm": 0.6955164670944214, "learning_rate": 1.839756249790218e-05, "loss": 2.1344, "step": 5900 }, { "epoch": 0.2, "grad_norm": 0.7044562101364136, "learning_rate": 1.8396985338321814e-05, "loss": 2.2207, "step": 5901 }, { "epoch": 0.2, "grad_norm": 0.7260205149650574, "learning_rate": 1.839640808387723e-05, "loss": 2.1719, "step": 5902 }, { "epoch": 0.2, "grad_norm": 0.676543116569519, "learning_rate": 1.8395830734574947e-05, "loss": 2.1415, "step": 5903 }, { "epoch": 0.2, "grad_norm": 0.7353396415710449, "learning_rate": 1.839525329042149e-05, "loss": 2.2341, "step": 5904 }, { "epoch": 0.2, "grad_norm": 0.6957465410232544, "learning_rate": 1.8394675751423382e-05, "loss": 2.2027, "step": 5905 }, { "epoch": 0.2, "grad_norm": 0.7285893559455872, "learning_rate": 1.8394098117587145e-05, "loss": 2.2132, "step": 5906 }, { "epoch": 0.2, "grad_norm": 0.700844407081604, "learning_rate": 1.839352038891931e-05, "loss": 2.1647, "step": 5907 }, { "epoch": 0.2, "grad_norm": 0.670282244682312, "learning_rate": 1.8392942565426395e-05, "loss": 2.1663, "step": 5908 }, { "epoch": 0.2, "grad_norm": 0.681713342666626, "learning_rate": 1.839236464711494e-05, "loss": 2.1263, "step": 5909 }, { "epoch": 0.2, "grad_norm": 0.707585334777832, "learning_rate": 1.8391786633991463e-05, "loss": 2.1834, "step": 5910 }, { "epoch": 0.2, "grad_norm": 0.7204396724700928, "learning_rate": 1.83912085260625e-05, "loss": 2.3074, "step": 5911 }, { "epoch": 0.2, "grad_norm": 0.7059338092803955, "learning_rate": 1.8390630323334582e-05, "loss": 2.261, "step": 5912 }, { "epoch": 0.2, "grad_norm": 0.6968511343002319, "learning_rate": 1.839005202581424e-05, "loss": 2.196, "step": 5913 }, { "epoch": 0.2, "grad_norm": 0.6919164061546326, "learning_rate": 1.8389473633508007e-05, "loss": 2.1883, "step": 5914 }, { "epoch": 0.2, "grad_norm": 0.713945746421814, "learning_rate": 1.8388895146422417e-05, "loss": 2.3184, "step": 5915 }, { "epoch": 0.2, "grad_norm": 0.6772942543029785, "learning_rate": 1.8388316564564005e-05, "loss": 2.2151, "step": 5916 }, { "epoch": 0.2, "grad_norm": 0.6983880400657654, "learning_rate": 1.838773788793931e-05, "loss": 2.1479, "step": 5917 }, { "epoch": 0.2, "grad_norm": 0.69270259141922, "learning_rate": 1.8387159116554868e-05, "loss": 2.1717, "step": 5918 }, { "epoch": 0.2, "grad_norm": 0.6878052353858948, "learning_rate": 1.838658025041722e-05, "loss": 2.1714, "step": 5919 }, { "epoch": 0.2, "grad_norm": 0.6984413862228394, "learning_rate": 1.83860012895329e-05, "loss": 2.2791, "step": 5920 }, { "epoch": 0.2, "grad_norm": 0.6905718445777893, "learning_rate": 1.8385422233908452e-05, "loss": 2.2023, "step": 5921 }, { "epoch": 0.2, "grad_norm": 0.7092013955116272, "learning_rate": 1.838484308355042e-05, "loss": 2.1796, "step": 5922 }, { "epoch": 0.2, "grad_norm": 0.6702136397361755, "learning_rate": 1.8384263838465343e-05, "loss": 2.1786, "step": 5923 }, { "epoch": 0.2, "grad_norm": 0.7048898935317993, "learning_rate": 1.8383684498659766e-05, "loss": 2.231, "step": 5924 }, { "epoch": 0.2, "grad_norm": 0.6839735507965088, "learning_rate": 1.8383105064140236e-05, "loss": 2.1057, "step": 5925 }, { "epoch": 0.2, "grad_norm": 0.6794403791427612, "learning_rate": 1.83825255349133e-05, "loss": 2.1605, "step": 5926 }, { "epoch": 0.2, "grad_norm": 0.7457671761512756, "learning_rate": 1.83819459109855e-05, "loss": 2.1542, "step": 5927 }, { "epoch": 0.2, "grad_norm": 0.6855442523956299, "learning_rate": 1.838136619236339e-05, "loss": 2.1861, "step": 5928 }, { "epoch": 0.2, "grad_norm": 0.7052209973335266, "learning_rate": 1.8380786379053516e-05, "loss": 2.1828, "step": 5929 }, { "epoch": 0.2, "grad_norm": 0.6917964816093445, "learning_rate": 1.838020647106243e-05, "loss": 2.2451, "step": 5930 }, { "epoch": 0.2, "grad_norm": 0.7004150152206421, "learning_rate": 1.8379626468396677e-05, "loss": 2.1842, "step": 5931 }, { "epoch": 0.2, "grad_norm": 0.7099143266677856, "learning_rate": 1.837904637106282e-05, "loss": 2.214, "step": 5932 }, { "epoch": 0.2, "grad_norm": 0.7352455258369446, "learning_rate": 1.8378466179067407e-05, "loss": 2.1771, "step": 5933 }, { "epoch": 0.2, "grad_norm": 0.6866982579231262, "learning_rate": 1.8377885892416994e-05, "loss": 2.2085, "step": 5934 }, { "epoch": 0.2, "grad_norm": 0.6692179441452026, "learning_rate": 1.8377305511118137e-05, "loss": 2.1688, "step": 5935 }, { "epoch": 0.2, "grad_norm": 0.7044215202331543, "learning_rate": 1.837672503517739e-05, "loss": 2.1047, "step": 5936 }, { "epoch": 0.2, "grad_norm": 0.6862626671791077, "learning_rate": 1.8376144464601314e-05, "loss": 2.1846, "step": 5937 }, { "epoch": 0.2, "grad_norm": 0.7199077010154724, "learning_rate": 1.8375563799396468e-05, "loss": 2.2083, "step": 5938 }, { "epoch": 0.2, "grad_norm": 0.694146990776062, "learning_rate": 1.8374983039569408e-05, "loss": 2.2173, "step": 5939 }, { "epoch": 0.2, "grad_norm": 0.6816791892051697, "learning_rate": 1.8374402185126698e-05, "loss": 2.1635, "step": 5940 }, { "epoch": 0.2, "grad_norm": 0.6968711614608765, "learning_rate": 1.83738212360749e-05, "loss": 2.1973, "step": 5941 }, { "epoch": 0.2, "grad_norm": 0.7187376022338867, "learning_rate": 1.8373240192420578e-05, "loss": 2.1864, "step": 5942 }, { "epoch": 0.2, "grad_norm": 0.6808293461799622, "learning_rate": 1.8372659054170294e-05, "loss": 2.2394, "step": 5943 }, { "epoch": 0.2, "grad_norm": 0.7138363718986511, "learning_rate": 1.8372077821330617e-05, "loss": 2.2057, "step": 5944 }, { "epoch": 0.2, "grad_norm": 0.6983692049980164, "learning_rate": 1.837149649390811e-05, "loss": 2.2146, "step": 5945 }, { "epoch": 0.2, "grad_norm": 0.7370388507843018, "learning_rate": 1.8370915071909345e-05, "loss": 2.2344, "step": 5946 }, { "epoch": 0.2, "grad_norm": 0.7056685090065002, "learning_rate": 1.8370333555340885e-05, "loss": 2.2289, "step": 5947 }, { "epoch": 0.2, "grad_norm": 0.689792811870575, "learning_rate": 1.83697519442093e-05, "loss": 2.2218, "step": 5948 }, { "epoch": 0.2, "grad_norm": 0.6775859594345093, "learning_rate": 1.8369170238521166e-05, "loss": 2.1443, "step": 5949 }, { "epoch": 0.2, "grad_norm": 0.6931392550468445, "learning_rate": 1.836858843828305e-05, "loss": 2.2422, "step": 5950 }, { "epoch": 0.2, "grad_norm": 0.7106674313545227, "learning_rate": 1.8368006543501527e-05, "loss": 2.279, "step": 5951 }, { "epoch": 0.2, "grad_norm": 0.6893783211708069, "learning_rate": 1.8367424554183166e-05, "loss": 2.2492, "step": 5952 }, { "epoch": 0.2, "grad_norm": 0.7215111255645752, "learning_rate": 1.8366842470334553e-05, "loss": 2.2211, "step": 5953 }, { "epoch": 0.2, "grad_norm": 0.7105419039726257, "learning_rate": 1.836626029196225e-05, "loss": 2.2575, "step": 5954 }, { "epoch": 0.2, "grad_norm": 0.7024807333946228, "learning_rate": 1.8365678019072847e-05, "loss": 2.2265, "step": 5955 }, { "epoch": 0.2, "grad_norm": 0.6756042242050171, "learning_rate": 1.8365095651672914e-05, "loss": 2.1462, "step": 5956 }, { "epoch": 0.2, "grad_norm": 0.7355726361274719, "learning_rate": 1.8364513189769033e-05, "loss": 2.2458, "step": 5957 }, { "epoch": 0.2, "grad_norm": 0.6921836137771606, "learning_rate": 1.8363930633367783e-05, "loss": 2.2163, "step": 5958 }, { "epoch": 0.2, "grad_norm": 0.6773610711097717, "learning_rate": 1.836334798247575e-05, "loss": 2.2428, "step": 5959 }, { "epoch": 0.2, "grad_norm": 0.7215322852134705, "learning_rate": 1.836276523709951e-05, "loss": 2.2451, "step": 5960 }, { "epoch": 0.2, "grad_norm": 0.7023947834968567, "learning_rate": 1.8362182397245648e-05, "loss": 2.2407, "step": 5961 }, { "epoch": 0.2, "grad_norm": 0.7059973478317261, "learning_rate": 1.8361599462920752e-05, "loss": 2.2726, "step": 5962 }, { "epoch": 0.2, "grad_norm": 0.6837860941886902, "learning_rate": 1.836101643413141e-05, "loss": 2.2246, "step": 5963 }, { "epoch": 0.2, "grad_norm": 0.7055572867393494, "learning_rate": 1.8360433310884197e-05, "loss": 2.1929, "step": 5964 }, { "epoch": 0.2, "grad_norm": 0.6980744004249573, "learning_rate": 1.8359850093185713e-05, "loss": 2.2268, "step": 5965 }, { "epoch": 0.2, "grad_norm": 0.6900520920753479, "learning_rate": 1.835926678104254e-05, "loss": 2.1952, "step": 5966 }, { "epoch": 0.2, "grad_norm": 0.7105039954185486, "learning_rate": 1.835868337446127e-05, "loss": 2.2974, "step": 5967 }, { "epoch": 0.2, "grad_norm": 0.7339195013046265, "learning_rate": 1.8358099873448493e-05, "loss": 2.2795, "step": 5968 }, { "epoch": 0.2, "grad_norm": 0.7335793972015381, "learning_rate": 1.83575162780108e-05, "loss": 2.2296, "step": 5969 }, { "epoch": 0.2, "grad_norm": 0.6875026822090149, "learning_rate": 1.8356932588154794e-05, "loss": 2.092, "step": 5970 }, { "epoch": 0.2, "grad_norm": 0.6867982745170593, "learning_rate": 1.8356348803887058e-05, "loss": 2.1101, "step": 5971 }, { "epoch": 0.2, "grad_norm": 0.6770048141479492, "learning_rate": 1.8355764925214186e-05, "loss": 2.1351, "step": 5972 }, { "epoch": 0.2, "grad_norm": 0.7466998100280762, "learning_rate": 1.8355180952142782e-05, "loss": 2.2375, "step": 5973 }, { "epoch": 0.2, "grad_norm": 0.7097471952438354, "learning_rate": 1.835459688467944e-05, "loss": 2.2135, "step": 5974 }, { "epoch": 0.2, "grad_norm": 0.6944347620010376, "learning_rate": 1.8354012722830758e-05, "loss": 2.1978, "step": 5975 }, { "epoch": 0.2, "grad_norm": 0.6981500387191772, "learning_rate": 1.8353428466603338e-05, "loss": 2.2386, "step": 5976 }, { "epoch": 0.2, "grad_norm": 0.7420014142990112, "learning_rate": 1.8352844116003776e-05, "loss": 2.1666, "step": 5977 }, { "epoch": 0.2, "grad_norm": 0.7116607427597046, "learning_rate": 1.8352259671038683e-05, "loss": 2.1537, "step": 5978 }, { "epoch": 0.2, "grad_norm": 0.7906265258789062, "learning_rate": 1.8351675131714647e-05, "loss": 2.2019, "step": 5979 }, { "epoch": 0.2, "grad_norm": 0.7103914618492126, "learning_rate": 1.8351090498038284e-05, "loss": 2.1767, "step": 5980 }, { "epoch": 0.2, "grad_norm": 0.6715589165687561, "learning_rate": 1.8350505770016192e-05, "loss": 2.1168, "step": 5981 }, { "epoch": 0.2, "grad_norm": 0.7194783091545105, "learning_rate": 1.8349920947654983e-05, "loss": 2.1955, "step": 5982 }, { "epoch": 0.2, "grad_norm": 0.7149052023887634, "learning_rate": 1.834933603096126e-05, "loss": 2.1642, "step": 5983 }, { "epoch": 0.2, "grad_norm": 0.6783707141876221, "learning_rate": 1.8348751019941628e-05, "loss": 2.1749, "step": 5984 }, { "epoch": 0.2, "grad_norm": 0.7093323469161987, "learning_rate": 1.8348165914602706e-05, "loss": 2.14, "step": 5985 }, { "epoch": 0.2, "grad_norm": 0.7068187594413757, "learning_rate": 1.8347580714951094e-05, "loss": 2.2053, "step": 5986 }, { "epoch": 0.2, "grad_norm": 0.691927969455719, "learning_rate": 1.834699542099341e-05, "loss": 2.196, "step": 5987 }, { "epoch": 0.2, "grad_norm": 0.7062950134277344, "learning_rate": 1.834641003273626e-05, "loss": 2.1975, "step": 5988 }, { "epoch": 0.2, "grad_norm": 0.6762887239456177, "learning_rate": 1.8345824550186262e-05, "loss": 2.2288, "step": 5989 }, { "epoch": 0.2, "grad_norm": 0.716850221157074, "learning_rate": 1.8345238973350028e-05, "loss": 2.2527, "step": 5990 }, { "epoch": 0.2, "grad_norm": 0.6900080442428589, "learning_rate": 1.834465330223418e-05, "loss": 2.2255, "step": 5991 }, { "epoch": 0.2, "grad_norm": 0.7132346630096436, "learning_rate": 1.8344067536845324e-05, "loss": 2.2122, "step": 5992 }, { "epoch": 0.2, "grad_norm": 0.7375802397727966, "learning_rate": 1.8343481677190084e-05, "loss": 2.1508, "step": 5993 }, { "epoch": 0.2, "grad_norm": 0.685304582118988, "learning_rate": 1.834289572327508e-05, "loss": 2.1748, "step": 5994 }, { "epoch": 0.2, "grad_norm": 0.7087296843528748, "learning_rate": 1.834230967510693e-05, "loss": 2.1564, "step": 5995 }, { "epoch": 0.2, "grad_norm": 0.7595576047897339, "learning_rate": 1.834172353269225e-05, "loss": 2.2485, "step": 5996 }, { "epoch": 0.2, "grad_norm": 0.6874471306800842, "learning_rate": 1.8341137296037674e-05, "loss": 2.2287, "step": 5997 }, { "epoch": 0.2, "grad_norm": 0.6641967296600342, "learning_rate": 1.834055096514981e-05, "loss": 2.1199, "step": 5998 }, { "epoch": 0.2, "grad_norm": 0.6969876885414124, "learning_rate": 1.833996454003529e-05, "loss": 2.196, "step": 5999 }, { "epoch": 0.2, "grad_norm": 0.7392247319221497, "learning_rate": 1.8339378020700742e-05, "loss": 2.2641, "step": 6000 }, { "epoch": 0.2, "grad_norm": 0.7043234705924988, "learning_rate": 1.8338791407152786e-05, "loss": 2.1198, "step": 6001 }, { "epoch": 0.2, "grad_norm": 0.727663516998291, "learning_rate": 1.8338204699398053e-05, "loss": 2.2133, "step": 6002 }, { "epoch": 0.2, "grad_norm": 0.7107959389686584, "learning_rate": 1.8337617897443166e-05, "loss": 2.1887, "step": 6003 }, { "epoch": 0.2, "grad_norm": 0.6774876117706299, "learning_rate": 1.8337031001294763e-05, "loss": 2.1195, "step": 6004 }, { "epoch": 0.2, "grad_norm": 0.7117305397987366, "learning_rate": 1.8336444010959468e-05, "loss": 2.1784, "step": 6005 }, { "epoch": 0.2, "grad_norm": 0.7036505341529846, "learning_rate": 1.8335856926443917e-05, "loss": 2.1756, "step": 6006 }, { "epoch": 0.2, "grad_norm": 0.7028264403343201, "learning_rate": 1.8335269747754735e-05, "loss": 2.1763, "step": 6007 }, { "epoch": 0.2, "grad_norm": 0.7299203872680664, "learning_rate": 1.8334682474898563e-05, "loss": 2.2028, "step": 6008 }, { "epoch": 0.2, "grad_norm": 0.6820215582847595, "learning_rate": 1.833409510788203e-05, "loss": 2.1637, "step": 6009 }, { "epoch": 0.2, "grad_norm": 0.6902409195899963, "learning_rate": 1.833350764671178e-05, "loss": 2.173, "step": 6010 }, { "epoch": 0.2, "grad_norm": 0.7083802819252014, "learning_rate": 1.833292009139444e-05, "loss": 2.255, "step": 6011 }, { "epoch": 0.2, "grad_norm": 0.6939976811408997, "learning_rate": 1.8332332441936653e-05, "loss": 2.1539, "step": 6012 }, { "epoch": 0.2, "grad_norm": 0.7012671828269958, "learning_rate": 1.833174469834506e-05, "loss": 2.2298, "step": 6013 }, { "epoch": 0.2, "grad_norm": 0.7109884023666382, "learning_rate": 1.83311568606263e-05, "loss": 2.1514, "step": 6014 }, { "epoch": 0.2, "grad_norm": 0.6631449460983276, "learning_rate": 1.8330568928787005e-05, "loss": 2.2016, "step": 6015 }, { "epoch": 0.2, "grad_norm": 0.6718214154243469, "learning_rate": 1.832998090283383e-05, "loss": 2.1483, "step": 6016 }, { "epoch": 0.2, "grad_norm": 0.7046425938606262, "learning_rate": 1.832939278277341e-05, "loss": 2.1419, "step": 6017 }, { "epoch": 0.2, "grad_norm": 0.686935305595398, "learning_rate": 1.832880456861239e-05, "loss": 2.1696, "step": 6018 }, { "epoch": 0.2, "grad_norm": 0.6837860941886902, "learning_rate": 1.8328216260357422e-05, "loss": 2.1746, "step": 6019 }, { "epoch": 0.2, "grad_norm": 0.6991344094276428, "learning_rate": 1.8327627858015147e-05, "loss": 2.1396, "step": 6020 }, { "epoch": 0.2, "grad_norm": 0.730015754699707, "learning_rate": 1.8327039361592208e-05, "loss": 2.1834, "step": 6021 }, { "epoch": 0.2, "grad_norm": 0.6774981617927551, "learning_rate": 1.8326450771095266e-05, "loss": 2.2054, "step": 6022 }, { "epoch": 0.2, "grad_norm": 0.7134107351303101, "learning_rate": 1.8325862086530958e-05, "loss": 2.2073, "step": 6023 }, { "epoch": 0.2, "grad_norm": 0.7216014862060547, "learning_rate": 1.832527330790594e-05, "loss": 2.1177, "step": 6024 }, { "epoch": 0.2, "grad_norm": 0.7144160866737366, "learning_rate": 1.8324684435226864e-05, "loss": 2.1689, "step": 6025 }, { "epoch": 0.2, "grad_norm": 0.7272300720214844, "learning_rate": 1.832409546850038e-05, "loss": 2.3307, "step": 6026 }, { "epoch": 0.2, "grad_norm": 0.7238339185714722, "learning_rate": 1.8323506407733148e-05, "loss": 2.2375, "step": 6027 }, { "epoch": 0.2, "grad_norm": 0.7258115410804749, "learning_rate": 1.8322917252931814e-05, "loss": 2.2396, "step": 6028 }, { "epoch": 0.2, "grad_norm": 0.6923658847808838, "learning_rate": 1.8322328004103044e-05, "loss": 2.1061, "step": 6029 }, { "epoch": 0.2, "grad_norm": 0.7527838349342346, "learning_rate": 1.8321738661253484e-05, "loss": 2.1484, "step": 6030 }, { "epoch": 0.2, "grad_norm": 0.6811270713806152, "learning_rate": 1.83211492243898e-05, "loss": 2.1407, "step": 6031 }, { "epoch": 0.2, "grad_norm": 0.7285508513450623, "learning_rate": 1.8320559693518647e-05, "loss": 2.2233, "step": 6032 }, { "epoch": 0.2, "grad_norm": 0.710042417049408, "learning_rate": 1.831997006864669e-05, "loss": 2.2382, "step": 6033 }, { "epoch": 0.2, "grad_norm": 0.7149767279624939, "learning_rate": 1.8319380349780583e-05, "loss": 2.2301, "step": 6034 }, { "epoch": 0.2, "grad_norm": 0.6899021863937378, "learning_rate": 1.8318790536926996e-05, "loss": 2.1824, "step": 6035 }, { "epoch": 0.2, "grad_norm": 0.6782851815223694, "learning_rate": 1.831820063009259e-05, "loss": 2.2167, "step": 6036 }, { "epoch": 0.2, "grad_norm": 0.6982631683349609, "learning_rate": 1.8317610629284025e-05, "loss": 2.2101, "step": 6037 }, { "epoch": 0.2, "grad_norm": 0.6747298240661621, "learning_rate": 1.8317020534507974e-05, "loss": 2.1583, "step": 6038 }, { "epoch": 0.2, "grad_norm": 0.7257499694824219, "learning_rate": 1.8316430345771096e-05, "loss": 2.2401, "step": 6039 }, { "epoch": 0.2, "grad_norm": 0.7261691689491272, "learning_rate": 1.8315840063080063e-05, "loss": 2.159, "step": 6040 }, { "epoch": 0.2, "grad_norm": 0.7353494763374329, "learning_rate": 1.831524968644154e-05, "loss": 2.2041, "step": 6041 }, { "epoch": 0.2, "grad_norm": 0.8072969913482666, "learning_rate": 1.83146592158622e-05, "loss": 2.1891, "step": 6042 }, { "epoch": 0.2, "grad_norm": 0.6938116550445557, "learning_rate": 1.8314068651348713e-05, "loss": 2.1847, "step": 6043 }, { "epoch": 0.2, "grad_norm": 0.7015967965126038, "learning_rate": 1.8313477992907752e-05, "loss": 2.1191, "step": 6044 }, { "epoch": 0.2, "grad_norm": 0.685150682926178, "learning_rate": 1.831288724054599e-05, "loss": 2.2588, "step": 6045 }, { "epoch": 0.2, "grad_norm": 0.6879811882972717, "learning_rate": 1.8312296394270096e-05, "loss": 2.2719, "step": 6046 }, { "epoch": 0.2, "grad_norm": 0.765891969203949, "learning_rate": 1.831170545408675e-05, "loss": 2.1679, "step": 6047 }, { "epoch": 0.2, "grad_norm": 0.7400086522102356, "learning_rate": 1.831111442000263e-05, "loss": 2.2094, "step": 6048 }, { "epoch": 0.2, "grad_norm": 0.7193347215652466, "learning_rate": 1.8310523292024407e-05, "loss": 2.1197, "step": 6049 }, { "epoch": 0.2, "grad_norm": 0.7543421983718872, "learning_rate": 1.8309932070158763e-05, "loss": 2.2343, "step": 6050 }, { "epoch": 0.2, "grad_norm": 0.7831417322158813, "learning_rate": 1.830934075441238e-05, "loss": 2.2343, "step": 6051 }, { "epoch": 0.2, "grad_norm": 0.7583911418914795, "learning_rate": 1.830874934479193e-05, "loss": 2.1702, "step": 6052 }, { "epoch": 0.2, "grad_norm": 0.7281417846679688, "learning_rate": 1.8308157841304102e-05, "loss": 2.2591, "step": 6053 }, { "epoch": 0.2, "grad_norm": 0.6615597605705261, "learning_rate": 1.8307566243955573e-05, "loss": 2.1288, "step": 6054 }, { "epoch": 0.2, "grad_norm": 0.724187433719635, "learning_rate": 1.8306974552753032e-05, "loss": 2.2371, "step": 6055 }, { "epoch": 0.2, "grad_norm": 0.6731974482536316, "learning_rate": 1.830638276770316e-05, "loss": 2.1724, "step": 6056 }, { "epoch": 0.2, "grad_norm": 0.7219492793083191, "learning_rate": 1.8305790888812644e-05, "loss": 2.1728, "step": 6057 }, { "epoch": 0.2, "grad_norm": 0.7225202918052673, "learning_rate": 1.830519891608817e-05, "loss": 2.1779, "step": 6058 }, { "epoch": 0.2, "grad_norm": 0.7043360471725464, "learning_rate": 1.8304606849536425e-05, "loss": 2.1872, "step": 6059 }, { "epoch": 0.2, "grad_norm": 0.6997250318527222, "learning_rate": 1.83040146891641e-05, "loss": 2.1889, "step": 6060 }, { "epoch": 0.2, "grad_norm": 0.7361088991165161, "learning_rate": 1.830342243497788e-05, "loss": 2.2281, "step": 6061 }, { "epoch": 0.2, "grad_norm": 0.738519549369812, "learning_rate": 1.8302830086984465e-05, "loss": 2.2028, "step": 6062 }, { "epoch": 0.2, "grad_norm": 0.7191739678382874, "learning_rate": 1.8302237645190543e-05, "loss": 2.15, "step": 6063 }, { "epoch": 0.2, "grad_norm": 0.7262798547744751, "learning_rate": 1.8301645109602798e-05, "loss": 2.184, "step": 6064 }, { "epoch": 0.2, "grad_norm": 0.6967772245407104, "learning_rate": 1.830105248022794e-05, "loss": 2.1784, "step": 6065 }, { "epoch": 0.2, "grad_norm": 0.7169735431671143, "learning_rate": 1.830045975707265e-05, "loss": 2.1693, "step": 6066 }, { "epoch": 0.2, "grad_norm": 0.7417337894439697, "learning_rate": 1.8299866940143635e-05, "loss": 2.1241, "step": 6067 }, { "epoch": 0.2, "grad_norm": 0.7590447664260864, "learning_rate": 1.8299274029447583e-05, "loss": 2.2769, "step": 6068 }, { "epoch": 0.2, "grad_norm": 0.7754830121994019, "learning_rate": 1.82986810249912e-05, "loss": 2.2303, "step": 6069 }, { "epoch": 0.2, "grad_norm": 0.727974534034729, "learning_rate": 1.829808792678118e-05, "loss": 2.2446, "step": 6070 }, { "epoch": 0.2, "grad_norm": 0.7034462690353394, "learning_rate": 1.8297494734824225e-05, "loss": 2.2155, "step": 6071 }, { "epoch": 0.2, "grad_norm": 0.7213855981826782, "learning_rate": 1.829690144912704e-05, "loss": 2.1292, "step": 6072 }, { "epoch": 0.2, "grad_norm": 0.7275819778442383, "learning_rate": 1.8296308069696322e-05, "loss": 2.2415, "step": 6073 }, { "epoch": 0.2, "grad_norm": 0.706141471862793, "learning_rate": 1.829571459653878e-05, "loss": 2.1955, "step": 6074 }, { "epoch": 0.2, "grad_norm": 0.7414873242378235, "learning_rate": 1.8295121029661116e-05, "loss": 2.1622, "step": 6075 }, { "epoch": 0.2, "grad_norm": 0.7358091473579407, "learning_rate": 1.8294527369070036e-05, "loss": 2.1321, "step": 6076 }, { "epoch": 0.2, "grad_norm": 0.6925868391990662, "learning_rate": 1.8293933614772245e-05, "loss": 2.2351, "step": 6077 }, { "epoch": 0.2, "grad_norm": 0.7007365226745605, "learning_rate": 1.8293339766774454e-05, "loss": 2.1834, "step": 6078 }, { "epoch": 0.2, "grad_norm": 0.7314189672470093, "learning_rate": 1.829274582508337e-05, "loss": 2.1988, "step": 6079 }, { "epoch": 0.2, "grad_norm": 0.7194808721542358, "learning_rate": 1.8292151789705707e-05, "loss": 2.1799, "step": 6080 }, { "epoch": 0.2, "grad_norm": 0.6773965954780579, "learning_rate": 1.829155766064817e-05, "loss": 2.2208, "step": 6081 }, { "epoch": 0.2, "grad_norm": 0.6956982612609863, "learning_rate": 1.8290963437917474e-05, "loss": 2.1525, "step": 6082 }, { "epoch": 0.2, "grad_norm": 0.7528073787689209, "learning_rate": 1.829036912152033e-05, "loss": 2.1559, "step": 6083 }, { "epoch": 0.2, "grad_norm": 0.6957526803016663, "learning_rate": 1.828977471146346e-05, "loss": 2.183, "step": 6084 }, { "epoch": 0.2, "grad_norm": 0.6852614879608154, "learning_rate": 1.828918020775357e-05, "loss": 2.2011, "step": 6085 }, { "epoch": 0.2, "grad_norm": 0.704866349697113, "learning_rate": 1.828858561039738e-05, "loss": 2.2265, "step": 6086 }, { "epoch": 0.2, "grad_norm": 0.7121172547340393, "learning_rate": 1.8287990919401607e-05, "loss": 2.1845, "step": 6087 }, { "epoch": 0.2, "grad_norm": 0.7090650796890259, "learning_rate": 1.8287396134772967e-05, "loss": 2.2106, "step": 6088 }, { "epoch": 0.2, "grad_norm": 0.7217909693717957, "learning_rate": 1.8286801256518187e-05, "loss": 2.2171, "step": 6089 }, { "epoch": 0.2, "grad_norm": 0.6990750432014465, "learning_rate": 1.8286206284643983e-05, "loss": 2.1908, "step": 6090 }, { "epoch": 0.2, "grad_norm": 0.6932628154754639, "learning_rate": 1.8285611219157076e-05, "loss": 2.1829, "step": 6091 }, { "epoch": 0.2, "grad_norm": 0.6944264769554138, "learning_rate": 1.8285016060064186e-05, "loss": 2.2011, "step": 6092 }, { "epoch": 0.2, "grad_norm": 0.6755760908126831, "learning_rate": 1.8284420807372044e-05, "loss": 2.1698, "step": 6093 }, { "epoch": 0.2, "grad_norm": 0.7104582190513611, "learning_rate": 1.828382546108737e-05, "loss": 2.204, "step": 6094 }, { "epoch": 0.2, "grad_norm": 0.7113857865333557, "learning_rate": 1.8283230021216888e-05, "loss": 2.2104, "step": 6095 }, { "epoch": 0.2, "grad_norm": 0.6951066255569458, "learning_rate": 1.828263448776733e-05, "loss": 2.1829, "step": 6096 }, { "epoch": 0.2, "grad_norm": 0.691688060760498, "learning_rate": 1.8282038860745424e-05, "loss": 2.2764, "step": 6097 }, { "epoch": 0.2, "grad_norm": 0.7030866742134094, "learning_rate": 1.8281443140157893e-05, "loss": 2.1391, "step": 6098 }, { "epoch": 0.2, "grad_norm": 0.7055807709693909, "learning_rate": 1.828084732601147e-05, "loss": 2.2015, "step": 6099 }, { "epoch": 0.2, "grad_norm": 0.7225607633590698, "learning_rate": 1.828025141831289e-05, "loss": 2.2028, "step": 6100 }, { "epoch": 0.2, "grad_norm": 0.7286145091056824, "learning_rate": 1.8279655417068883e-05, "loss": 2.1027, "step": 6101 }, { "epoch": 0.2, "grad_norm": 0.6851641535758972, "learning_rate": 1.8279059322286177e-05, "loss": 2.2547, "step": 6102 }, { "epoch": 0.2, "grad_norm": 0.6705181002616882, "learning_rate": 1.8278463133971515e-05, "loss": 2.1414, "step": 6103 }, { "epoch": 0.2, "grad_norm": 0.7223976850509644, "learning_rate": 1.827786685213163e-05, "loss": 2.1997, "step": 6104 }, { "epoch": 0.2, "grad_norm": 0.6816443800926208, "learning_rate": 1.827727047677325e-05, "loss": 2.2058, "step": 6105 }, { "epoch": 0.2, "grad_norm": 0.7550622224807739, "learning_rate": 1.8276674007903122e-05, "loss": 2.1709, "step": 6106 }, { "epoch": 0.2, "grad_norm": 0.7704421877861023, "learning_rate": 1.8276077445527983e-05, "loss": 2.1888, "step": 6107 }, { "epoch": 0.2, "grad_norm": 0.745924711227417, "learning_rate": 1.827548078965457e-05, "loss": 2.224, "step": 6108 }, { "epoch": 0.2, "grad_norm": 0.6951000690460205, "learning_rate": 1.8274884040289623e-05, "loss": 2.1383, "step": 6109 }, { "epoch": 0.2, "grad_norm": 0.6771449446678162, "learning_rate": 1.8274287197439887e-05, "loss": 2.1856, "step": 6110 }, { "epoch": 0.2, "grad_norm": 0.7057459354400635, "learning_rate": 1.8273690261112106e-05, "loss": 2.182, "step": 6111 }, { "epoch": 0.2, "grad_norm": 0.7333860993385315, "learning_rate": 1.827309323131302e-05, "loss": 2.14, "step": 6112 }, { "epoch": 0.2, "grad_norm": 0.7036881446838379, "learning_rate": 1.8272496108049377e-05, "loss": 2.1503, "step": 6113 }, { "epoch": 0.2, "grad_norm": 0.6983610391616821, "learning_rate": 1.8271898891327917e-05, "loss": 2.1565, "step": 6114 }, { "epoch": 0.2, "grad_norm": 0.7313774824142456, "learning_rate": 1.8271301581155395e-05, "loss": 2.1619, "step": 6115 }, { "epoch": 0.2, "grad_norm": 0.6971172094345093, "learning_rate": 1.827070417753855e-05, "loss": 2.1996, "step": 6116 }, { "epoch": 0.2, "grad_norm": 0.7039988040924072, "learning_rate": 1.827010668048414e-05, "loss": 2.2181, "step": 6117 }, { "epoch": 0.2, "grad_norm": 0.7069287300109863, "learning_rate": 1.826950908999891e-05, "loss": 2.2428, "step": 6118 }, { "epoch": 0.2, "grad_norm": 0.695624589920044, "learning_rate": 1.8268911406089615e-05, "loss": 2.1869, "step": 6119 }, { "epoch": 0.2, "grad_norm": 0.7200223803520203, "learning_rate": 1.8268313628763004e-05, "loss": 2.1601, "step": 6120 }, { "epoch": 0.2, "grad_norm": 0.704347550868988, "learning_rate": 1.826771575802583e-05, "loss": 2.0877, "step": 6121 }, { "epoch": 0.2, "grad_norm": 0.6993927359580994, "learning_rate": 1.826711779388485e-05, "loss": 2.146, "step": 6122 }, { "epoch": 0.2, "grad_norm": 0.7171605229377747, "learning_rate": 1.826651973634682e-05, "loss": 2.1704, "step": 6123 }, { "epoch": 0.2, "grad_norm": 0.7352426052093506, "learning_rate": 1.8265921585418488e-05, "loss": 2.2503, "step": 6124 }, { "epoch": 0.2, "grad_norm": 0.6969130039215088, "learning_rate": 1.8265323341106628e-05, "loss": 2.1811, "step": 6125 }, { "epoch": 0.2, "grad_norm": 0.7109573483467102, "learning_rate": 1.826472500341798e-05, "loss": 2.2227, "step": 6126 }, { "epoch": 0.2, "grad_norm": 0.745374321937561, "learning_rate": 1.8264126572359317e-05, "loss": 2.256, "step": 6127 }, { "epoch": 0.2, "grad_norm": 0.6687126755714417, "learning_rate": 1.8263528047937395e-05, "loss": 2.1171, "step": 6128 }, { "epoch": 0.2, "grad_norm": 0.7024168372154236, "learning_rate": 1.8262929430158974e-05, "loss": 2.1857, "step": 6129 }, { "epoch": 0.2, "grad_norm": 0.705483078956604, "learning_rate": 1.826233071903082e-05, "loss": 2.3051, "step": 6130 }, { "epoch": 0.2, "grad_norm": 0.7141180634498596, "learning_rate": 1.8261731914559698e-05, "loss": 2.1743, "step": 6131 }, { "epoch": 0.2, "grad_norm": 0.6809024214744568, "learning_rate": 1.8261133016752368e-05, "loss": 2.1463, "step": 6132 }, { "epoch": 0.2, "grad_norm": 0.7426943778991699, "learning_rate": 1.82605340256156e-05, "loss": 2.1639, "step": 6133 }, { "epoch": 0.2, "grad_norm": 0.7214270234107971, "learning_rate": 1.8259934941156157e-05, "loss": 2.1537, "step": 6134 }, { "epoch": 0.2, "grad_norm": 0.7001158595085144, "learning_rate": 1.8259335763380816e-05, "loss": 2.2022, "step": 6135 }, { "epoch": 0.2, "grad_norm": 0.7222607135772705, "learning_rate": 1.8258736492296335e-05, "loss": 2.1775, "step": 6136 }, { "epoch": 0.2, "grad_norm": 0.7160484194755554, "learning_rate": 1.825813712790949e-05, "loss": 2.2132, "step": 6137 }, { "epoch": 0.2, "grad_norm": 0.701562225818634, "learning_rate": 1.825753767022705e-05, "loss": 2.1854, "step": 6138 }, { "epoch": 0.2, "grad_norm": 0.690872848033905, "learning_rate": 1.825693811925579e-05, "loss": 2.2013, "step": 6139 }, { "epoch": 0.2, "grad_norm": 0.7470741271972656, "learning_rate": 1.825633847500248e-05, "loss": 2.2429, "step": 6140 }, { "epoch": 0.2, "grad_norm": 0.7656940221786499, "learning_rate": 1.8255738737473904e-05, "loss": 2.1491, "step": 6141 }, { "epoch": 0.2, "grad_norm": 0.6991948485374451, "learning_rate": 1.8255138906676824e-05, "loss": 2.1446, "step": 6142 }, { "epoch": 0.2, "grad_norm": 0.6899681091308594, "learning_rate": 1.8254538982618023e-05, "loss": 2.1528, "step": 6143 }, { "epoch": 0.2, "grad_norm": 0.7008669376373291, "learning_rate": 1.825393896530428e-05, "loss": 2.1887, "step": 6144 }, { "epoch": 0.2, "grad_norm": 0.6513592004776001, "learning_rate": 1.825333885474237e-05, "loss": 2.1329, "step": 6145 }, { "epoch": 0.2, "grad_norm": 0.712500810623169, "learning_rate": 1.8252738650939073e-05, "loss": 2.1613, "step": 6146 }, { "epoch": 0.2, "grad_norm": 0.6816225051879883, "learning_rate": 1.8252138353901175e-05, "loss": 2.174, "step": 6147 }, { "epoch": 0.2, "grad_norm": 0.7048233151435852, "learning_rate": 1.8251537963635456e-05, "loss": 2.25, "step": 6148 }, { "epoch": 0.2, "grad_norm": 0.7450661659240723, "learning_rate": 1.8250937480148693e-05, "loss": 2.2452, "step": 6149 }, { "epoch": 0.2, "grad_norm": 0.7033609747886658, "learning_rate": 1.8250336903447675e-05, "loss": 2.1024, "step": 6150 }, { "epoch": 0.2, "grad_norm": 0.6958092451095581, "learning_rate": 1.8249736233539185e-05, "loss": 2.1157, "step": 6151 }, { "epoch": 0.2, "grad_norm": 0.7224093675613403, "learning_rate": 1.824913547043001e-05, "loss": 2.1944, "step": 6152 }, { "epoch": 0.2, "grad_norm": 0.7134841680526733, "learning_rate": 1.8248534614126937e-05, "loss": 2.2418, "step": 6153 }, { "epoch": 0.2, "grad_norm": 0.7342516183853149, "learning_rate": 1.8247933664636754e-05, "loss": 2.2417, "step": 6154 }, { "epoch": 0.2, "grad_norm": 0.7000675797462463, "learning_rate": 1.8247332621966252e-05, "loss": 2.2424, "step": 6155 }, { "epoch": 0.2, "grad_norm": 0.6992802619934082, "learning_rate": 1.8246731486122218e-05, "loss": 2.2312, "step": 6156 }, { "epoch": 0.2, "grad_norm": 0.7231718301773071, "learning_rate": 1.8246130257111444e-05, "loss": 2.2105, "step": 6157 }, { "epoch": 0.2, "grad_norm": 0.6765751242637634, "learning_rate": 1.8245528934940723e-05, "loss": 2.1141, "step": 6158 }, { "epoch": 0.2, "grad_norm": 0.7158822417259216, "learning_rate": 1.824492751961685e-05, "loss": 2.237, "step": 6159 }, { "epoch": 0.2, "grad_norm": 0.717758059501648, "learning_rate": 1.8244326011146617e-05, "loss": 2.2047, "step": 6160 }, { "epoch": 0.2, "grad_norm": 0.7053166627883911, "learning_rate": 1.824372440953682e-05, "loss": 2.1299, "step": 6161 }, { "epoch": 0.21, "grad_norm": 0.6988082528114319, "learning_rate": 1.8243122714794257e-05, "loss": 2.2999, "step": 6162 }, { "epoch": 0.21, "grad_norm": 0.6660996079444885, "learning_rate": 1.8242520926925723e-05, "loss": 2.142, "step": 6163 }, { "epoch": 0.21, "grad_norm": 0.6848520636558533, "learning_rate": 1.824191904593802e-05, "loss": 2.1853, "step": 6164 }, { "epoch": 0.21, "grad_norm": 0.6864492893218994, "learning_rate": 1.8241317071837946e-05, "loss": 2.196, "step": 6165 }, { "epoch": 0.21, "grad_norm": 0.7054276466369629, "learning_rate": 1.8240715004632302e-05, "loss": 2.2011, "step": 6166 }, { "epoch": 0.21, "grad_norm": 0.6922920346260071, "learning_rate": 1.8240112844327888e-05, "loss": 2.1856, "step": 6167 }, { "epoch": 0.21, "grad_norm": 0.6933912634849548, "learning_rate": 1.8239510590931507e-05, "loss": 2.183, "step": 6168 }, { "epoch": 0.21, "grad_norm": 0.7077281475067139, "learning_rate": 1.8238908244449966e-05, "loss": 2.1729, "step": 6169 }, { "epoch": 0.21, "grad_norm": 0.7017167806625366, "learning_rate": 1.8238305804890068e-05, "loss": 2.2121, "step": 6170 }, { "epoch": 0.21, "grad_norm": 0.7212790250778198, "learning_rate": 1.8237703272258617e-05, "loss": 2.1961, "step": 6171 }, { "epoch": 0.21, "grad_norm": 0.692525327205658, "learning_rate": 1.8237100646562426e-05, "loss": 2.1353, "step": 6172 }, { "epoch": 0.21, "grad_norm": 0.7116010189056396, "learning_rate": 1.8236497927808295e-05, "loss": 2.0804, "step": 6173 }, { "epoch": 0.21, "grad_norm": 0.710145890712738, "learning_rate": 1.823589511600304e-05, "loss": 2.2376, "step": 6174 }, { "epoch": 0.21, "grad_norm": 0.6760478019714355, "learning_rate": 1.823529221115347e-05, "loss": 2.113, "step": 6175 }, { "epoch": 0.21, "grad_norm": 0.6640588045120239, "learning_rate": 1.8234689213266393e-05, "loss": 2.1815, "step": 6176 }, { "epoch": 0.21, "grad_norm": 0.7036170363426208, "learning_rate": 1.823408612234862e-05, "loss": 2.1824, "step": 6177 }, { "epoch": 0.21, "grad_norm": 0.6894940733909607, "learning_rate": 1.8233482938406974e-05, "loss": 2.1956, "step": 6178 }, { "epoch": 0.21, "grad_norm": 0.6891513466835022, "learning_rate": 1.823287966144826e-05, "loss": 2.1908, "step": 6179 }, { "epoch": 0.21, "grad_norm": 0.6755658984184265, "learning_rate": 1.8232276291479297e-05, "loss": 2.1949, "step": 6180 }, { "epoch": 0.21, "grad_norm": 0.702664852142334, "learning_rate": 1.82316728285069e-05, "loss": 2.2577, "step": 6181 }, { "epoch": 0.21, "grad_norm": 0.6811558604240417, "learning_rate": 1.823106927253789e-05, "loss": 2.1818, "step": 6182 }, { "epoch": 0.21, "grad_norm": 0.727492094039917, "learning_rate": 1.823046562357908e-05, "loss": 2.1927, "step": 6183 }, { "epoch": 0.21, "grad_norm": 0.6844305992126465, "learning_rate": 1.8229861881637296e-05, "loss": 2.151, "step": 6184 }, { "epoch": 0.21, "grad_norm": 0.6705989241600037, "learning_rate": 1.8229258046719356e-05, "loss": 2.0927, "step": 6185 }, { "epoch": 0.21, "grad_norm": 0.7396030426025391, "learning_rate": 1.822865411883208e-05, "loss": 2.2514, "step": 6186 }, { "epoch": 0.21, "grad_norm": 0.7141703963279724, "learning_rate": 1.822805009798229e-05, "loss": 2.154, "step": 6187 }, { "epoch": 0.21, "grad_norm": 0.6837753057479858, "learning_rate": 1.8227445984176815e-05, "loss": 2.133, "step": 6188 }, { "epoch": 0.21, "grad_norm": 0.6862742304801941, "learning_rate": 1.8226841777422474e-05, "loss": 2.1477, "step": 6189 }, { "epoch": 0.21, "grad_norm": 0.7126458883285522, "learning_rate": 1.8226237477726097e-05, "loss": 2.1091, "step": 6190 }, { "epoch": 0.21, "grad_norm": 0.6781268119812012, "learning_rate": 1.8225633085094513e-05, "loss": 2.1036, "step": 6191 }, { "epoch": 0.21, "grad_norm": 0.6711091995239258, "learning_rate": 1.8225028599534544e-05, "loss": 2.2101, "step": 6192 }, { "epoch": 0.21, "grad_norm": 0.6815574765205383, "learning_rate": 1.8224424021053028e-05, "loss": 2.1765, "step": 6193 }, { "epoch": 0.21, "grad_norm": 0.7124742269515991, "learning_rate": 1.822381934965678e-05, "loss": 2.1639, "step": 6194 }, { "epoch": 0.21, "grad_norm": 0.6860081553459167, "learning_rate": 1.8223214585352645e-05, "loss": 2.2187, "step": 6195 }, { "epoch": 0.21, "grad_norm": 0.683290958404541, "learning_rate": 1.8222609728147454e-05, "loss": 2.1457, "step": 6196 }, { "epoch": 0.21, "grad_norm": 0.7669884562492371, "learning_rate": 1.8222004778048033e-05, "loss": 2.2802, "step": 6197 }, { "epoch": 0.21, "grad_norm": 0.7044051885604858, "learning_rate": 1.8221399735061225e-05, "loss": 2.2255, "step": 6198 }, { "epoch": 0.21, "grad_norm": 0.7378009557723999, "learning_rate": 1.8220794599193855e-05, "loss": 2.2224, "step": 6199 }, { "epoch": 0.21, "grad_norm": 0.684019923210144, "learning_rate": 1.822018937045277e-05, "loss": 2.1293, "step": 6200 }, { "epoch": 0.21, "grad_norm": 0.7094712257385254, "learning_rate": 1.8219584048844802e-05, "loss": 2.2337, "step": 6201 }, { "epoch": 0.21, "grad_norm": 0.7301838397979736, "learning_rate": 1.821897863437679e-05, "loss": 2.226, "step": 6202 }, { "epoch": 0.21, "grad_norm": 0.7089719176292419, "learning_rate": 1.8218373127055577e-05, "loss": 2.2129, "step": 6203 }, { "epoch": 0.21, "grad_norm": 0.7048456072807312, "learning_rate": 1.8217767526888e-05, "loss": 2.1371, "step": 6204 }, { "epoch": 0.21, "grad_norm": 0.693789541721344, "learning_rate": 1.8217161833880896e-05, "loss": 2.2254, "step": 6205 }, { "epoch": 0.21, "grad_norm": 0.67508465051651, "learning_rate": 1.8216556048041118e-05, "loss": 2.2422, "step": 6206 }, { "epoch": 0.21, "grad_norm": 0.6962656378746033, "learning_rate": 1.8215950169375503e-05, "loss": 2.1848, "step": 6207 }, { "epoch": 0.21, "grad_norm": 0.754131555557251, "learning_rate": 1.82153441978909e-05, "loss": 2.2022, "step": 6208 }, { "epoch": 0.21, "grad_norm": 0.6938959360122681, "learning_rate": 1.8214738133594153e-05, "loss": 2.2416, "step": 6209 }, { "epoch": 0.21, "grad_norm": 0.6860623359680176, "learning_rate": 1.8214131976492108e-05, "loss": 2.2005, "step": 6210 }, { "epoch": 0.21, "grad_norm": 0.6835795640945435, "learning_rate": 1.821352572659161e-05, "loss": 2.2198, "step": 6211 }, { "epoch": 0.21, "grad_norm": 0.7212157249450684, "learning_rate": 1.8212919383899518e-05, "loss": 2.2119, "step": 6212 }, { "epoch": 0.21, "grad_norm": 0.6794966459274292, "learning_rate": 1.8212312948422674e-05, "loss": 2.1172, "step": 6213 }, { "epoch": 0.21, "grad_norm": 0.7304803133010864, "learning_rate": 1.8211706420167932e-05, "loss": 2.1489, "step": 6214 }, { "epoch": 0.21, "grad_norm": 0.6906092762947083, "learning_rate": 1.821109979914214e-05, "loss": 2.2018, "step": 6215 }, { "epoch": 0.21, "grad_norm": 0.6878266930580139, "learning_rate": 1.8210493085352158e-05, "loss": 2.1808, "step": 6216 }, { "epoch": 0.21, "grad_norm": 0.7326026558876038, "learning_rate": 1.8209886278804837e-05, "loss": 2.1339, "step": 6217 }, { "epoch": 0.21, "grad_norm": 0.7428187727928162, "learning_rate": 1.820927937950703e-05, "loss": 2.151, "step": 6218 }, { "epoch": 0.21, "grad_norm": 0.6972575783729553, "learning_rate": 1.8208672387465597e-05, "loss": 2.2047, "step": 6219 }, { "epoch": 0.21, "grad_norm": 0.6899797320365906, "learning_rate": 1.8208065302687393e-05, "loss": 2.1237, "step": 6220 }, { "epoch": 0.21, "grad_norm": 0.7210092544555664, "learning_rate": 1.820745812517928e-05, "loss": 2.1802, "step": 6221 }, { "epoch": 0.21, "grad_norm": 0.69561767578125, "learning_rate": 1.8206850854948114e-05, "loss": 2.1653, "step": 6222 }, { "epoch": 0.21, "grad_norm": 0.7154582738876343, "learning_rate": 1.8206243492000757e-05, "loss": 2.1818, "step": 6223 }, { "epoch": 0.21, "grad_norm": 0.6922856569290161, "learning_rate": 1.820563603634407e-05, "loss": 2.1714, "step": 6224 }, { "epoch": 0.21, "grad_norm": 0.7273600101470947, "learning_rate": 1.8205028487984916e-05, "loss": 2.1604, "step": 6225 }, { "epoch": 0.21, "grad_norm": 0.7266347408294678, "learning_rate": 1.820442084693016e-05, "loss": 2.2157, "step": 6226 }, { "epoch": 0.21, "grad_norm": 0.6917631030082703, "learning_rate": 1.8203813113186664e-05, "loss": 2.1647, "step": 6227 }, { "epoch": 0.21, "grad_norm": 0.7093598246574402, "learning_rate": 1.8203205286761292e-05, "loss": 2.1412, "step": 6228 }, { "epoch": 0.21, "grad_norm": 0.6844556331634521, "learning_rate": 1.820259736766092e-05, "loss": 2.1465, "step": 6229 }, { "epoch": 0.21, "grad_norm": 0.7113626003265381, "learning_rate": 1.820198935589241e-05, "loss": 2.1403, "step": 6230 }, { "epoch": 0.21, "grad_norm": 0.6789575815200806, "learning_rate": 1.8201381251462628e-05, "loss": 2.189, "step": 6231 }, { "epoch": 0.21, "grad_norm": 0.7100657820701599, "learning_rate": 1.8200773054378448e-05, "loss": 2.1739, "step": 6232 }, { "epoch": 0.21, "grad_norm": 0.7089124917984009, "learning_rate": 1.8200164764646742e-05, "loss": 2.1731, "step": 6233 }, { "epoch": 0.21, "grad_norm": 0.7312548756599426, "learning_rate": 1.819955638227438e-05, "loss": 2.2229, "step": 6234 }, { "epoch": 0.21, "grad_norm": 0.7068428993225098, "learning_rate": 1.8198947907268233e-05, "loss": 2.228, "step": 6235 }, { "epoch": 0.21, "grad_norm": 0.6849491596221924, "learning_rate": 1.819833933963518e-05, "loss": 2.2067, "step": 6236 }, { "epoch": 0.21, "grad_norm": 0.6931092143058777, "learning_rate": 1.819773067938209e-05, "loss": 2.2137, "step": 6237 }, { "epoch": 0.21, "grad_norm": 0.6814872622489929, "learning_rate": 1.8197121926515846e-05, "loss": 2.1804, "step": 6238 }, { "epoch": 0.21, "grad_norm": 0.7009797692298889, "learning_rate": 1.8196513081043323e-05, "loss": 2.1904, "step": 6239 }, { "epoch": 0.21, "grad_norm": 0.7696446180343628, "learning_rate": 1.8195904142971397e-05, "loss": 2.1403, "step": 6240 }, { "epoch": 0.21, "grad_norm": 0.7061079740524292, "learning_rate": 1.819529511230695e-05, "loss": 2.1931, "step": 6241 }, { "epoch": 0.21, "grad_norm": 0.7309475541114807, "learning_rate": 1.819468598905686e-05, "loss": 2.2096, "step": 6242 }, { "epoch": 0.21, "grad_norm": 0.6979312896728516, "learning_rate": 1.8194076773228016e-05, "loss": 2.1797, "step": 6243 }, { "epoch": 0.21, "grad_norm": 0.7142277359962463, "learning_rate": 1.819346746482729e-05, "loss": 2.2248, "step": 6244 }, { "epoch": 0.21, "grad_norm": 0.7087216973304749, "learning_rate": 1.8192858063861573e-05, "loss": 2.1498, "step": 6245 }, { "epoch": 0.21, "grad_norm": 0.6993076205253601, "learning_rate": 1.8192248570337742e-05, "loss": 2.1994, "step": 6246 }, { "epoch": 0.21, "grad_norm": 0.6921340227127075, "learning_rate": 1.819163898426269e-05, "loss": 2.15, "step": 6247 }, { "epoch": 0.21, "grad_norm": 0.705520749092102, "learning_rate": 1.8191029305643306e-05, "loss": 2.2565, "step": 6248 }, { "epoch": 0.21, "grad_norm": 0.6995993256568909, "learning_rate": 1.819041953448647e-05, "loss": 2.1462, "step": 6249 }, { "epoch": 0.21, "grad_norm": 0.7580268979072571, "learning_rate": 1.8189809670799074e-05, "loss": 2.1653, "step": 6250 }, { "epoch": 0.21, "grad_norm": 0.6980740427970886, "learning_rate": 1.818919971458801e-05, "loss": 2.1517, "step": 6251 }, { "epoch": 0.21, "grad_norm": 0.6747538447380066, "learning_rate": 1.8188589665860163e-05, "loss": 2.1554, "step": 6252 }, { "epoch": 0.21, "grad_norm": 0.7014902234077454, "learning_rate": 1.818797952462243e-05, "loss": 2.1507, "step": 6253 }, { "epoch": 0.21, "grad_norm": 0.7250577211380005, "learning_rate": 1.8187369290881705e-05, "loss": 2.1215, "step": 6254 }, { "epoch": 0.21, "grad_norm": 0.6888594627380371, "learning_rate": 1.818675896464488e-05, "loss": 2.1389, "step": 6255 }, { "epoch": 0.21, "grad_norm": 0.7407350540161133, "learning_rate": 1.818614854591885e-05, "loss": 2.2043, "step": 6256 }, { "epoch": 0.21, "grad_norm": 0.7185331583023071, "learning_rate": 1.818553803471051e-05, "loss": 2.2166, "step": 6257 }, { "epoch": 0.21, "grad_norm": 0.7327126860618591, "learning_rate": 1.818492743102676e-05, "loss": 2.1842, "step": 6258 }, { "epoch": 0.21, "grad_norm": 0.6717777252197266, "learning_rate": 1.8184316734874494e-05, "loss": 2.2057, "step": 6259 }, { "epoch": 0.21, "grad_norm": 0.7251034379005432, "learning_rate": 1.8183705946260618e-05, "loss": 2.1421, "step": 6260 }, { "epoch": 0.21, "grad_norm": 0.7194979786872864, "learning_rate": 1.8183095065192025e-05, "loss": 2.1818, "step": 6261 }, { "epoch": 0.21, "grad_norm": 0.7353861927986145, "learning_rate": 1.818248409167562e-05, "loss": 2.0895, "step": 6262 }, { "epoch": 0.21, "grad_norm": 0.7097426056861877, "learning_rate": 1.8181873025718308e-05, "loss": 2.217, "step": 6263 }, { "epoch": 0.21, "grad_norm": 0.721851646900177, "learning_rate": 1.8181261867326987e-05, "loss": 2.1687, "step": 6264 }, { "epoch": 0.21, "grad_norm": 0.6836680769920349, "learning_rate": 1.8180650616508564e-05, "loss": 2.2267, "step": 6265 }, { "epoch": 0.21, "grad_norm": 0.7297622561454773, "learning_rate": 1.8180039273269944e-05, "loss": 2.2466, "step": 6266 }, { "epoch": 0.21, "grad_norm": 0.7098370790481567, "learning_rate": 1.817942783761804e-05, "loss": 2.2356, "step": 6267 }, { "epoch": 0.21, "grad_norm": 0.6814084649085999, "learning_rate": 1.8178816309559747e-05, "loss": 2.1568, "step": 6268 }, { "epoch": 0.21, "grad_norm": 0.707601010799408, "learning_rate": 1.8178204689101983e-05, "loss": 2.2056, "step": 6269 }, { "epoch": 0.21, "grad_norm": 0.7034727334976196, "learning_rate": 1.8177592976251657e-05, "loss": 2.1334, "step": 6270 }, { "epoch": 0.21, "grad_norm": 0.6976986527442932, "learning_rate": 1.8176981171015675e-05, "loss": 2.2238, "step": 6271 }, { "epoch": 0.21, "grad_norm": 0.7366610169410706, "learning_rate": 1.8176369273400954e-05, "loss": 2.1651, "step": 6272 }, { "epoch": 0.21, "grad_norm": 0.6615982055664062, "learning_rate": 1.8175757283414404e-05, "loss": 2.1617, "step": 6273 }, { "epoch": 0.21, "grad_norm": 0.7057796120643616, "learning_rate": 1.817514520106294e-05, "loss": 2.1623, "step": 6274 }, { "epoch": 0.21, "grad_norm": 0.6771888732910156, "learning_rate": 1.8174533026353476e-05, "loss": 2.1548, "step": 6275 }, { "epoch": 0.21, "grad_norm": 0.6665482521057129, "learning_rate": 1.817392075929293e-05, "loss": 2.1494, "step": 6276 }, { "epoch": 0.21, "grad_norm": 0.6746802926063538, "learning_rate": 1.8173308399888218e-05, "loss": 2.1856, "step": 6277 }, { "epoch": 0.21, "grad_norm": 0.6971818208694458, "learning_rate": 1.8172695948146258e-05, "loss": 2.2528, "step": 6278 }, { "epoch": 0.21, "grad_norm": 0.6868285536766052, "learning_rate": 1.8172083404073966e-05, "loss": 2.2043, "step": 6279 }, { "epoch": 0.21, "grad_norm": 0.7004644870758057, "learning_rate": 1.8171470767678264e-05, "loss": 2.1386, "step": 6280 }, { "epoch": 0.21, "grad_norm": 0.6835257411003113, "learning_rate": 1.8170858038966078e-05, "loss": 2.2032, "step": 6281 }, { "epoch": 0.21, "grad_norm": 0.6910305023193359, "learning_rate": 1.8170245217944326e-05, "loss": 2.17, "step": 6282 }, { "epoch": 0.21, "grad_norm": 0.7208327054977417, "learning_rate": 1.816963230461993e-05, "loss": 2.216, "step": 6283 }, { "epoch": 0.21, "grad_norm": 0.6916594505310059, "learning_rate": 1.816901929899982e-05, "loss": 2.2521, "step": 6284 }, { "epoch": 0.21, "grad_norm": 0.6763165593147278, "learning_rate": 1.816840620109091e-05, "loss": 2.2101, "step": 6285 }, { "epoch": 0.21, "grad_norm": 0.6844795346260071, "learning_rate": 1.8167793010900138e-05, "loss": 2.237, "step": 6286 }, { "epoch": 0.21, "grad_norm": 0.6833997368812561, "learning_rate": 1.816717972843443e-05, "loss": 2.2902, "step": 6287 }, { "epoch": 0.21, "grad_norm": 0.7094277739524841, "learning_rate": 1.8166566353700708e-05, "loss": 2.1871, "step": 6288 }, { "epoch": 0.21, "grad_norm": 0.7402395009994507, "learning_rate": 1.8165952886705908e-05, "loss": 2.1758, "step": 6289 }, { "epoch": 0.21, "grad_norm": 0.7101511359214783, "learning_rate": 1.8165339327456958e-05, "loss": 2.1494, "step": 6290 }, { "epoch": 0.21, "grad_norm": 0.6769729256629944, "learning_rate": 1.8164725675960787e-05, "loss": 2.1452, "step": 6291 }, { "epoch": 0.21, "grad_norm": 0.7318620681762695, "learning_rate": 1.8164111932224334e-05, "loss": 2.1832, "step": 6292 }, { "epoch": 0.21, "grad_norm": 0.6814513802528381, "learning_rate": 1.8163498096254525e-05, "loss": 2.2132, "step": 6293 }, { "epoch": 0.21, "grad_norm": 0.6977999210357666, "learning_rate": 1.81628841680583e-05, "loss": 2.1859, "step": 6294 }, { "epoch": 0.21, "grad_norm": 0.6960188746452332, "learning_rate": 1.8162270147642598e-05, "loss": 2.19, "step": 6295 }, { "epoch": 0.21, "grad_norm": 0.6972387433052063, "learning_rate": 1.8161656035014345e-05, "loss": 2.2156, "step": 6296 }, { "epoch": 0.21, "grad_norm": 0.7259312272071838, "learning_rate": 1.816104183018049e-05, "loss": 2.2128, "step": 6297 }, { "epoch": 0.21, "grad_norm": 0.7384807467460632, "learning_rate": 1.8160427533147965e-05, "loss": 2.1929, "step": 6298 }, { "epoch": 0.21, "grad_norm": 0.7192266583442688, "learning_rate": 1.8159813143923712e-05, "loss": 2.221, "step": 6299 }, { "epoch": 0.21, "grad_norm": 0.7081276178359985, "learning_rate": 1.8159198662514672e-05, "loss": 2.2761, "step": 6300 }, { "epoch": 0.21, "grad_norm": 0.6984350085258484, "learning_rate": 1.815858408892779e-05, "loss": 2.1591, "step": 6301 }, { "epoch": 0.21, "grad_norm": 0.7108070254325867, "learning_rate": 1.8157969423170003e-05, "loss": 2.2081, "step": 6302 }, { "epoch": 0.21, "grad_norm": 0.7418871521949768, "learning_rate": 1.815735466524826e-05, "loss": 2.1984, "step": 6303 }, { "epoch": 0.21, "grad_norm": 0.7400758862495422, "learning_rate": 1.8156739815169504e-05, "loss": 2.1936, "step": 6304 }, { "epoch": 0.21, "grad_norm": 0.7166884541511536, "learning_rate": 1.8156124872940683e-05, "loss": 2.212, "step": 6305 }, { "epoch": 0.21, "grad_norm": 0.6811509132385254, "learning_rate": 1.8155509838568744e-05, "loss": 2.1244, "step": 6306 }, { "epoch": 0.21, "grad_norm": 0.6913968324661255, "learning_rate": 1.815489471206063e-05, "loss": 2.1332, "step": 6307 }, { "epoch": 0.21, "grad_norm": 0.6897211074829102, "learning_rate": 1.8154279493423298e-05, "loss": 2.1517, "step": 6308 }, { "epoch": 0.21, "grad_norm": 0.7282798290252686, "learning_rate": 1.81536641826637e-05, "loss": 2.1669, "step": 6309 }, { "epoch": 0.21, "grad_norm": 0.6984309554100037, "learning_rate": 1.8153048779788775e-05, "loss": 2.1608, "step": 6310 }, { "epoch": 0.21, "grad_norm": 0.7070353627204895, "learning_rate": 1.8152433284805484e-05, "loss": 2.1299, "step": 6311 }, { "epoch": 0.21, "grad_norm": 0.7314441800117493, "learning_rate": 1.8151817697720782e-05, "loss": 2.1851, "step": 6312 }, { "epoch": 0.21, "grad_norm": 0.7233806848526001, "learning_rate": 1.815120201854162e-05, "loss": 2.2038, "step": 6313 }, { "epoch": 0.21, "grad_norm": 0.7096444368362427, "learning_rate": 1.8150586247274955e-05, "loss": 2.2084, "step": 6314 }, { "epoch": 0.21, "grad_norm": 0.6937890648841858, "learning_rate": 1.8149970383927745e-05, "loss": 2.1841, "step": 6315 }, { "epoch": 0.21, "grad_norm": 0.6818922758102417, "learning_rate": 1.814935442850694e-05, "loss": 2.175, "step": 6316 }, { "epoch": 0.21, "grad_norm": 0.6858789920806885, "learning_rate": 1.814873838101951e-05, "loss": 2.1925, "step": 6317 }, { "epoch": 0.21, "grad_norm": 0.6864098906517029, "learning_rate": 1.814812224147241e-05, "loss": 2.1812, "step": 6318 }, { "epoch": 0.21, "grad_norm": 0.7687103152275085, "learning_rate": 1.8147506009872598e-05, "loss": 2.1788, "step": 6319 }, { "epoch": 0.21, "grad_norm": 0.687004804611206, "learning_rate": 1.814688968622704e-05, "loss": 2.1971, "step": 6320 }, { "epoch": 0.21, "grad_norm": 0.7049893736839294, "learning_rate": 1.8146273270542695e-05, "loss": 2.1856, "step": 6321 }, { "epoch": 0.21, "grad_norm": 0.6553322672843933, "learning_rate": 1.8145656762826527e-05, "loss": 2.2346, "step": 6322 }, { "epoch": 0.21, "grad_norm": 0.7063862085342407, "learning_rate": 1.8145040163085507e-05, "loss": 2.2448, "step": 6323 }, { "epoch": 0.21, "grad_norm": 0.6989167332649231, "learning_rate": 1.8144423471326593e-05, "loss": 2.2555, "step": 6324 }, { "epoch": 0.21, "grad_norm": 0.6936603784561157, "learning_rate": 1.814380668755676e-05, "loss": 2.2197, "step": 6325 }, { "epoch": 0.21, "grad_norm": 0.7103709578514099, "learning_rate": 1.8143189811782966e-05, "loss": 2.2227, "step": 6326 }, { "epoch": 0.21, "grad_norm": 0.6912099719047546, "learning_rate": 1.814257284401219e-05, "loss": 2.1816, "step": 6327 }, { "epoch": 0.21, "grad_norm": 0.6960267424583435, "learning_rate": 1.8141955784251397e-05, "loss": 2.1873, "step": 6328 }, { "epoch": 0.21, "grad_norm": 0.7041221857070923, "learning_rate": 1.8141338632507558e-05, "loss": 2.2062, "step": 6329 }, { "epoch": 0.21, "grad_norm": 0.6873149871826172, "learning_rate": 1.8140721388787647e-05, "loss": 2.126, "step": 6330 }, { "epoch": 0.21, "grad_norm": 0.7146872878074646, "learning_rate": 1.8140104053098636e-05, "loss": 2.2552, "step": 6331 }, { "epoch": 0.21, "grad_norm": 0.7022344470024109, "learning_rate": 1.8139486625447502e-05, "loss": 2.1751, "step": 6332 }, { "epoch": 0.21, "grad_norm": 0.7143667936325073, "learning_rate": 1.8138869105841217e-05, "loss": 2.1773, "step": 6333 }, { "epoch": 0.21, "grad_norm": 0.7031447291374207, "learning_rate": 1.8138251494286762e-05, "loss": 2.1149, "step": 6334 }, { "epoch": 0.21, "grad_norm": 0.7136235237121582, "learning_rate": 1.8137633790791106e-05, "loss": 2.2171, "step": 6335 }, { "epoch": 0.21, "grad_norm": 0.7328564524650574, "learning_rate": 1.8137015995361237e-05, "loss": 2.1318, "step": 6336 }, { "epoch": 0.21, "grad_norm": 0.7245420217514038, "learning_rate": 1.8136398108004127e-05, "loss": 2.2318, "step": 6337 }, { "epoch": 0.21, "grad_norm": 0.6986823678016663, "learning_rate": 1.813578012872676e-05, "loss": 2.1925, "step": 6338 }, { "epoch": 0.21, "grad_norm": 0.685001790523529, "learning_rate": 1.813516205753612e-05, "loss": 2.2366, "step": 6339 }, { "epoch": 0.21, "grad_norm": 0.7356112599372864, "learning_rate": 1.8134543894439184e-05, "loss": 2.1948, "step": 6340 }, { "epoch": 0.21, "grad_norm": 0.6923741698265076, "learning_rate": 1.8133925639442937e-05, "loss": 2.1875, "step": 6341 }, { "epoch": 0.21, "grad_norm": 0.7427212595939636, "learning_rate": 1.813330729255437e-05, "loss": 2.1757, "step": 6342 }, { "epoch": 0.21, "grad_norm": 0.6963168382644653, "learning_rate": 1.8132688853780456e-05, "loss": 2.1771, "step": 6343 }, { "epoch": 0.21, "grad_norm": 0.7154357433319092, "learning_rate": 1.8132070323128196e-05, "loss": 2.1302, "step": 6344 }, { "epoch": 0.21, "grad_norm": 0.6744716763496399, "learning_rate": 1.813145170060457e-05, "loss": 2.178, "step": 6345 }, { "epoch": 0.21, "grad_norm": 0.7308353781700134, "learning_rate": 1.8130832986216568e-05, "loss": 2.2084, "step": 6346 }, { "epoch": 0.21, "grad_norm": 0.7460055947303772, "learning_rate": 1.813021417997118e-05, "loss": 2.1327, "step": 6347 }, { "epoch": 0.21, "grad_norm": 0.7088429927825928, "learning_rate": 1.8129595281875393e-05, "loss": 2.1834, "step": 6348 }, { "epoch": 0.21, "grad_norm": 0.7316433191299438, "learning_rate": 1.812897629193621e-05, "loss": 2.1508, "step": 6349 }, { "epoch": 0.21, "grad_norm": 0.7352184653282166, "learning_rate": 1.812835721016061e-05, "loss": 2.1479, "step": 6350 }, { "epoch": 0.21, "grad_norm": 0.7303087115287781, "learning_rate": 1.81277380365556e-05, "loss": 2.1116, "step": 6351 }, { "epoch": 0.21, "grad_norm": 0.682537853717804, "learning_rate": 1.8127118771128164e-05, "loss": 2.1957, "step": 6352 }, { "epoch": 0.21, "grad_norm": 0.6753235459327698, "learning_rate": 1.8126499413885306e-05, "loss": 2.2009, "step": 6353 }, { "epoch": 0.21, "grad_norm": 0.6913830637931824, "learning_rate": 1.8125879964834022e-05, "loss": 2.1362, "step": 6354 }, { "epoch": 0.21, "grad_norm": 0.6910260915756226, "learning_rate": 1.8125260423981306e-05, "loss": 2.1488, "step": 6355 }, { "epoch": 0.21, "grad_norm": 0.6832036375999451, "learning_rate": 1.8124640791334157e-05, "loss": 2.1118, "step": 6356 }, { "epoch": 0.21, "grad_norm": 0.7157078385353088, "learning_rate": 1.8124021066899586e-05, "loss": 2.1964, "step": 6357 }, { "epoch": 0.21, "grad_norm": 0.7438831329345703, "learning_rate": 1.8123401250684577e-05, "loss": 2.1687, "step": 6358 }, { "epoch": 0.21, "grad_norm": 0.7154524326324463, "learning_rate": 1.8122781342696147e-05, "loss": 2.172, "step": 6359 }, { "epoch": 0.21, "grad_norm": 0.715208888053894, "learning_rate": 1.812216134294129e-05, "loss": 2.0952, "step": 6360 }, { "epoch": 0.21, "grad_norm": 0.6812685132026672, "learning_rate": 1.8121541251427017e-05, "loss": 2.1503, "step": 6361 }, { "epoch": 0.21, "grad_norm": 0.7198569774627686, "learning_rate": 1.812092106816033e-05, "loss": 2.2044, "step": 6362 }, { "epoch": 0.21, "grad_norm": 0.6905926465988159, "learning_rate": 1.8120300793148235e-05, "loss": 2.2152, "step": 6363 }, { "epoch": 0.21, "grad_norm": 0.7575319409370422, "learning_rate": 1.8119680426397745e-05, "loss": 2.2161, "step": 6364 }, { "epoch": 0.21, "grad_norm": 0.6950216293334961, "learning_rate": 1.8119059967915857e-05, "loss": 2.2185, "step": 6365 }, { "epoch": 0.21, "grad_norm": 0.6988063454627991, "learning_rate": 1.811843941770959e-05, "loss": 2.2093, "step": 6366 }, { "epoch": 0.21, "grad_norm": 0.6994710564613342, "learning_rate": 1.8117818775785957e-05, "loss": 2.2079, "step": 6367 }, { "epoch": 0.21, "grad_norm": 0.7182785868644714, "learning_rate": 1.811719804215196e-05, "loss": 2.2167, "step": 6368 }, { "epoch": 0.21, "grad_norm": 0.7230461835861206, "learning_rate": 1.811657721681462e-05, "loss": 2.2336, "step": 6369 }, { "epoch": 0.21, "grad_norm": 0.7024505138397217, "learning_rate": 1.8115956299780942e-05, "loss": 2.1784, "step": 6370 }, { "epoch": 0.21, "grad_norm": 0.6858686208724976, "learning_rate": 1.811533529105795e-05, "loss": 2.1382, "step": 6371 }, { "epoch": 0.21, "grad_norm": 0.6883136034011841, "learning_rate": 1.811471419065266e-05, "loss": 2.1476, "step": 6372 }, { "epoch": 0.21, "grad_norm": 0.7034709453582764, "learning_rate": 1.811409299857208e-05, "loss": 2.1814, "step": 6373 }, { "epoch": 0.21, "grad_norm": 0.7325684428215027, "learning_rate": 1.8113471714823233e-05, "loss": 2.2138, "step": 6374 }, { "epoch": 0.21, "grad_norm": 0.6874024868011475, "learning_rate": 1.8112850339413136e-05, "loss": 2.1226, "step": 6375 }, { "epoch": 0.21, "grad_norm": 0.6834449172019958, "learning_rate": 1.8112228872348813e-05, "loss": 2.168, "step": 6376 }, { "epoch": 0.21, "grad_norm": 0.6913960576057434, "learning_rate": 1.811160731363728e-05, "loss": 2.172, "step": 6377 }, { "epoch": 0.21, "grad_norm": 0.683789074420929, "learning_rate": 1.8110985663285564e-05, "loss": 2.2221, "step": 6378 }, { "epoch": 0.21, "grad_norm": 0.6796307563781738, "learning_rate": 1.8110363921300685e-05, "loss": 2.1493, "step": 6379 }, { "epoch": 0.21, "grad_norm": 0.6846427321434021, "learning_rate": 1.8109742087689667e-05, "loss": 2.1264, "step": 6380 }, { "epoch": 0.21, "grad_norm": 0.6993242502212524, "learning_rate": 1.8109120162459537e-05, "loss": 2.1857, "step": 6381 }, { "epoch": 0.21, "grad_norm": 0.6903895735740662, "learning_rate": 1.8108498145617316e-05, "loss": 2.1911, "step": 6382 }, { "epoch": 0.21, "grad_norm": 0.6771966814994812, "learning_rate": 1.810787603717004e-05, "loss": 2.1264, "step": 6383 }, { "epoch": 0.21, "grad_norm": 0.6768045425415039, "learning_rate": 1.810725383712473e-05, "loss": 2.1325, "step": 6384 }, { "epoch": 0.21, "grad_norm": 0.7299454212188721, "learning_rate": 1.8106631545488417e-05, "loss": 2.1273, "step": 6385 }, { "epoch": 0.21, "grad_norm": 0.7045280933380127, "learning_rate": 1.810600916226813e-05, "loss": 2.2444, "step": 6386 }, { "epoch": 0.21, "grad_norm": 0.7406953573226929, "learning_rate": 1.8105386687470906e-05, "loss": 2.1438, "step": 6387 }, { "epoch": 0.21, "grad_norm": 0.6980265378952026, "learning_rate": 1.810476412110377e-05, "loss": 2.1574, "step": 6388 }, { "epoch": 0.21, "grad_norm": 0.684349000453949, "learning_rate": 1.810414146317376e-05, "loss": 2.2093, "step": 6389 }, { "epoch": 0.21, "grad_norm": 0.7353962063789368, "learning_rate": 1.8103518713687914e-05, "loss": 2.257, "step": 6390 }, { "epoch": 0.21, "grad_norm": 0.700209379196167, "learning_rate": 1.8102895872653253e-05, "loss": 2.1635, "step": 6391 }, { "epoch": 0.21, "grad_norm": 0.6887604594230652, "learning_rate": 1.810227294007683e-05, "loss": 2.2406, "step": 6392 }, { "epoch": 0.21, "grad_norm": 0.7095186710357666, "learning_rate": 1.8101649915965675e-05, "loss": 2.2186, "step": 6393 }, { "epoch": 0.21, "grad_norm": 0.6913116574287415, "learning_rate": 1.8101026800326825e-05, "loss": 2.2092, "step": 6394 }, { "epoch": 0.21, "grad_norm": 0.716457188129425, "learning_rate": 1.8100403593167322e-05, "loss": 2.1255, "step": 6395 }, { "epoch": 0.21, "grad_norm": 0.6879635453224182, "learning_rate": 1.809978029449421e-05, "loss": 2.2685, "step": 6396 }, { "epoch": 0.21, "grad_norm": 0.691448450088501, "learning_rate": 1.8099156904314527e-05, "loss": 2.1519, "step": 6397 }, { "epoch": 0.21, "grad_norm": 0.7008914947509766, "learning_rate": 1.8098533422635315e-05, "loss": 2.1923, "step": 6398 }, { "epoch": 0.21, "grad_norm": 0.687630295753479, "learning_rate": 1.8097909849463616e-05, "loss": 2.1772, "step": 6399 }, { "epoch": 0.21, "grad_norm": 0.6858562231063843, "learning_rate": 1.809728618480648e-05, "loss": 2.2076, "step": 6400 }, { "epoch": 0.21, "grad_norm": 0.7262950539588928, "learning_rate": 1.809666242867095e-05, "loss": 2.217, "step": 6401 }, { "epoch": 0.21, "grad_norm": 0.6790397763252258, "learning_rate": 1.809603858106407e-05, "loss": 2.1823, "step": 6402 }, { "epoch": 0.21, "grad_norm": 0.7365374565124512, "learning_rate": 1.8095414641992893e-05, "loss": 2.1839, "step": 6403 }, { "epoch": 0.21, "grad_norm": 0.7116594910621643, "learning_rate": 1.8094790611464463e-05, "loss": 2.1758, "step": 6404 }, { "epoch": 0.21, "grad_norm": 0.7414642572402954, "learning_rate": 1.8094166489485836e-05, "loss": 2.3294, "step": 6405 }, { "epoch": 0.21, "grad_norm": 0.6941266655921936, "learning_rate": 1.809354227606406e-05, "loss": 2.1388, "step": 6406 }, { "epoch": 0.21, "grad_norm": 0.6877501010894775, "learning_rate": 1.8092917971206186e-05, "loss": 2.1603, "step": 6407 }, { "epoch": 0.21, "grad_norm": 0.6909286379814148, "learning_rate": 1.8092293574919266e-05, "loss": 2.2053, "step": 6408 }, { "epoch": 0.21, "grad_norm": 0.7115195393562317, "learning_rate": 1.8091669087210357e-05, "loss": 2.1893, "step": 6409 }, { "epoch": 0.21, "grad_norm": 0.7032685279846191, "learning_rate": 1.8091044508086514e-05, "loss": 2.2226, "step": 6410 }, { "epoch": 0.21, "grad_norm": 0.6896002888679504, "learning_rate": 1.809041983755479e-05, "loss": 2.1301, "step": 6411 }, { "epoch": 0.21, "grad_norm": 0.6763260364532471, "learning_rate": 1.8089795075622245e-05, "loss": 2.1891, "step": 6412 }, { "epoch": 0.21, "grad_norm": 0.687936544418335, "learning_rate": 1.8089170222295934e-05, "loss": 2.1396, "step": 6413 }, { "epoch": 0.21, "grad_norm": 0.6922518610954285, "learning_rate": 1.8088545277582923e-05, "loss": 2.2185, "step": 6414 }, { "epoch": 0.21, "grad_norm": 0.6957792639732361, "learning_rate": 1.8087920241490264e-05, "loss": 2.23, "step": 6415 }, { "epoch": 0.21, "grad_norm": 0.6831899285316467, "learning_rate": 1.8087295114025026e-05, "loss": 2.2014, "step": 6416 }, { "epoch": 0.21, "grad_norm": 0.728905439376831, "learning_rate": 1.8086669895194268e-05, "loss": 2.239, "step": 6417 }, { "epoch": 0.21, "grad_norm": 0.6694440245628357, "learning_rate": 1.8086044585005046e-05, "loss": 2.1219, "step": 6418 }, { "epoch": 0.21, "grad_norm": 0.6922813057899475, "learning_rate": 1.8085419183464433e-05, "loss": 2.2065, "step": 6419 }, { "epoch": 0.21, "grad_norm": 0.7299982309341431, "learning_rate": 1.8084793690579498e-05, "loss": 2.2145, "step": 6420 }, { "epoch": 0.21, "grad_norm": 0.6954406499862671, "learning_rate": 1.8084168106357297e-05, "loss": 2.2071, "step": 6421 }, { "epoch": 0.21, "grad_norm": 0.6924938559532166, "learning_rate": 1.8083542430804906e-05, "loss": 2.1214, "step": 6422 }, { "epoch": 0.21, "grad_norm": 0.7058471441268921, "learning_rate": 1.8082916663929388e-05, "loss": 2.2462, "step": 6423 }, { "epoch": 0.21, "grad_norm": 0.7106438279151917, "learning_rate": 1.8082290805737815e-05, "loss": 2.1662, "step": 6424 }, { "epoch": 0.21, "grad_norm": 0.6858699917793274, "learning_rate": 1.8081664856237256e-05, "loss": 2.2066, "step": 6425 }, { "epoch": 0.21, "grad_norm": 0.7168295383453369, "learning_rate": 1.8081038815434785e-05, "loss": 2.1969, "step": 6426 }, { "epoch": 0.21, "grad_norm": 0.7257906198501587, "learning_rate": 1.8080412683337474e-05, "loss": 2.1639, "step": 6427 }, { "epoch": 0.21, "grad_norm": 0.6879714727401733, "learning_rate": 1.8079786459952396e-05, "loss": 2.1983, "step": 6428 }, { "epoch": 0.21, "grad_norm": 0.744773805141449, "learning_rate": 1.8079160145286623e-05, "loss": 2.2146, "step": 6429 }, { "epoch": 0.21, "grad_norm": 0.6614795327186584, "learning_rate": 1.8078533739347236e-05, "loss": 2.1254, "step": 6430 }, { "epoch": 0.21, "grad_norm": 0.6983741521835327, "learning_rate": 1.807790724214131e-05, "loss": 2.1471, "step": 6431 }, { "epoch": 0.21, "grad_norm": 0.7102526426315308, "learning_rate": 1.807728065367592e-05, "loss": 2.1267, "step": 6432 }, { "epoch": 0.21, "grad_norm": 0.7187673449516296, "learning_rate": 1.8076653973958148e-05, "loss": 2.1065, "step": 6433 }, { "epoch": 0.21, "grad_norm": 0.6844434142112732, "learning_rate": 1.8076027202995076e-05, "loss": 2.1211, "step": 6434 }, { "epoch": 0.21, "grad_norm": 0.717785656452179, "learning_rate": 1.8075400340793775e-05, "loss": 2.163, "step": 6435 }, { "epoch": 0.21, "grad_norm": 0.7003490328788757, "learning_rate": 1.8074773387361338e-05, "loss": 2.2091, "step": 6436 }, { "epoch": 0.21, "grad_norm": 0.7042582631111145, "learning_rate": 1.8074146342704844e-05, "loss": 2.2433, "step": 6437 }, { "epoch": 0.21, "grad_norm": 0.6968415379524231, "learning_rate": 1.8073519206831372e-05, "loss": 2.1712, "step": 6438 }, { "epoch": 0.21, "grad_norm": 0.6971914172172546, "learning_rate": 1.8072891979748014e-05, "loss": 2.2193, "step": 6439 }, { "epoch": 0.21, "grad_norm": 0.7089229822158813, "learning_rate": 1.8072264661461857e-05, "loss": 2.2155, "step": 6440 }, { "epoch": 0.21, "grad_norm": 0.7010672092437744, "learning_rate": 1.8071637251979982e-05, "loss": 2.1624, "step": 6441 }, { "epoch": 0.21, "grad_norm": 0.7093105912208557, "learning_rate": 1.8071009751309478e-05, "loss": 2.1832, "step": 6442 }, { "epoch": 0.21, "grad_norm": 0.7181783318519592, "learning_rate": 1.8070382159457442e-05, "loss": 2.1717, "step": 6443 }, { "epoch": 0.21, "grad_norm": 0.7245060205459595, "learning_rate": 1.806975447643095e-05, "loss": 2.1976, "step": 6444 }, { "epoch": 0.21, "grad_norm": 0.7049036026000977, "learning_rate": 1.8069126702237104e-05, "loss": 2.1509, "step": 6445 }, { "epoch": 0.21, "grad_norm": 0.7025460600852966, "learning_rate": 1.8068498836882998e-05, "loss": 2.1817, "step": 6446 }, { "epoch": 0.21, "grad_norm": 0.7077845335006714, "learning_rate": 1.8067870880375716e-05, "loss": 2.2038, "step": 6447 }, { "epoch": 0.21, "grad_norm": 0.6847579479217529, "learning_rate": 1.8067242832722356e-05, "loss": 2.1977, "step": 6448 }, { "epoch": 0.21, "grad_norm": 0.6642428636550903, "learning_rate": 1.8066614693930017e-05, "loss": 2.1934, "step": 6449 }, { "epoch": 0.21, "grad_norm": 0.715844988822937, "learning_rate": 1.806598646400579e-05, "loss": 2.1943, "step": 6450 }, { "epoch": 0.21, "grad_norm": 0.7015922665596008, "learning_rate": 1.806535814295678e-05, "loss": 2.1524, "step": 6451 }, { "epoch": 0.21, "grad_norm": 0.6961047053337097, "learning_rate": 1.806472973079008e-05, "loss": 2.1643, "step": 6452 }, { "epoch": 0.21, "grad_norm": 0.7450258135795593, "learning_rate": 1.8064101227512787e-05, "loss": 2.2079, "step": 6453 }, { "epoch": 0.21, "grad_norm": 0.7437617182731628, "learning_rate": 1.8063472633132006e-05, "loss": 2.1505, "step": 6454 }, { "epoch": 0.21, "grad_norm": 0.7318354845046997, "learning_rate": 1.8062843947654834e-05, "loss": 2.1072, "step": 6455 }, { "epoch": 0.21, "grad_norm": 0.6964071989059448, "learning_rate": 1.806221517108838e-05, "loss": 2.1676, "step": 6456 }, { "epoch": 0.21, "grad_norm": 0.6921846270561218, "learning_rate": 1.806158630343974e-05, "loss": 2.2175, "step": 6457 }, { "epoch": 0.21, "grad_norm": 0.7437452673912048, "learning_rate": 1.8060957344716024e-05, "loss": 2.1265, "step": 6458 }, { "epoch": 0.21, "grad_norm": 0.7209609746932983, "learning_rate": 1.8060328294924337e-05, "loss": 2.1778, "step": 6459 }, { "epoch": 0.21, "grad_norm": 0.7071230411529541, "learning_rate": 1.8059699154071783e-05, "loss": 2.1099, "step": 6460 }, { "epoch": 0.21, "grad_norm": 0.7006473541259766, "learning_rate": 1.8059069922165474e-05, "loss": 2.2137, "step": 6461 }, { "epoch": 0.21, "grad_norm": 0.6914381384849548, "learning_rate": 1.8058440599212516e-05, "loss": 2.1719, "step": 6462 }, { "epoch": 0.22, "grad_norm": 0.6959124803543091, "learning_rate": 1.805781118522002e-05, "loss": 2.112, "step": 6463 }, { "epoch": 0.22, "grad_norm": 0.7534375786781311, "learning_rate": 1.8057181680195092e-05, "loss": 2.1671, "step": 6464 }, { "epoch": 0.22, "grad_norm": 0.6837925910949707, "learning_rate": 1.8056552084144847e-05, "loss": 2.1453, "step": 6465 }, { "epoch": 0.22, "grad_norm": 0.7110303044319153, "learning_rate": 1.80559223970764e-05, "loss": 2.2173, "step": 6466 }, { "epoch": 0.22, "grad_norm": 0.6862213611602783, "learning_rate": 1.8055292618996863e-05, "loss": 2.1573, "step": 6467 }, { "epoch": 0.22, "grad_norm": 0.705294132232666, "learning_rate": 1.8054662749913353e-05, "loss": 2.1875, "step": 6468 }, { "epoch": 0.22, "grad_norm": 0.6973253488540649, "learning_rate": 1.805403278983298e-05, "loss": 2.1887, "step": 6469 }, { "epoch": 0.22, "grad_norm": 0.6857908964157104, "learning_rate": 1.8053402738762863e-05, "loss": 2.2324, "step": 6470 }, { "epoch": 0.22, "grad_norm": 0.7354410886764526, "learning_rate": 1.8052772596710125e-05, "loss": 2.1174, "step": 6471 }, { "epoch": 0.22, "grad_norm": 0.687079668045044, "learning_rate": 1.8052142363681884e-05, "loss": 2.1314, "step": 6472 }, { "epoch": 0.22, "grad_norm": 0.7114191055297852, "learning_rate": 1.8051512039685256e-05, "loss": 2.2349, "step": 6473 }, { "epoch": 0.22, "grad_norm": 0.7184898257255554, "learning_rate": 1.805088162472736e-05, "loss": 2.189, "step": 6474 }, { "epoch": 0.22, "grad_norm": 0.6776912808418274, "learning_rate": 1.8050251118815326e-05, "loss": 2.1767, "step": 6475 }, { "epoch": 0.22, "grad_norm": 0.715488851070404, "learning_rate": 1.804962052195627e-05, "loss": 2.2062, "step": 6476 }, { "epoch": 0.22, "grad_norm": 0.7093402147293091, "learning_rate": 1.804898983415732e-05, "loss": 2.1505, "step": 6477 }, { "epoch": 0.22, "grad_norm": 0.7027968168258667, "learning_rate": 1.80483590554256e-05, "loss": 2.1883, "step": 6478 }, { "epoch": 0.22, "grad_norm": 0.6755682826042175, "learning_rate": 1.8047728185768237e-05, "loss": 2.1611, "step": 6479 }, { "epoch": 0.22, "grad_norm": 0.7189090251922607, "learning_rate": 1.8047097225192356e-05, "loss": 2.2126, "step": 6480 }, { "epoch": 0.22, "grad_norm": 0.7012996673583984, "learning_rate": 1.8046466173705086e-05, "loss": 2.3044, "step": 6481 }, { "epoch": 0.22, "grad_norm": 0.7429297566413879, "learning_rate": 1.804583503131356e-05, "loss": 2.1841, "step": 6482 }, { "epoch": 0.22, "grad_norm": 0.6993507146835327, "learning_rate": 1.80452037980249e-05, "loss": 2.2551, "step": 6483 }, { "epoch": 0.22, "grad_norm": 0.6889524459838867, "learning_rate": 1.8044572473846246e-05, "loss": 2.1253, "step": 6484 }, { "epoch": 0.22, "grad_norm": 0.6920259594917297, "learning_rate": 1.8043941058784728e-05, "loss": 2.1633, "step": 6485 }, { "epoch": 0.22, "grad_norm": 0.7178728580474854, "learning_rate": 1.8043309552847476e-05, "loss": 2.1903, "step": 6486 }, { "epoch": 0.22, "grad_norm": 0.7149829864501953, "learning_rate": 1.804267795604163e-05, "loss": 2.1432, "step": 6487 }, { "epoch": 0.22, "grad_norm": 0.7360720634460449, "learning_rate": 1.804204626837432e-05, "loss": 2.1205, "step": 6488 }, { "epoch": 0.22, "grad_norm": 0.7601842284202576, "learning_rate": 1.804141448985268e-05, "loss": 2.2712, "step": 6489 }, { "epoch": 0.22, "grad_norm": 0.7222402095794678, "learning_rate": 1.8040782620483854e-05, "loss": 2.2379, "step": 6490 }, { "epoch": 0.22, "grad_norm": 0.7046586275100708, "learning_rate": 1.804015066027498e-05, "loss": 2.1602, "step": 6491 }, { "epoch": 0.22, "grad_norm": 0.723450779914856, "learning_rate": 1.8039518609233196e-05, "loss": 2.2932, "step": 6492 }, { "epoch": 0.22, "grad_norm": 0.6879225969314575, "learning_rate": 1.803888646736564e-05, "loss": 2.1475, "step": 6493 }, { "epoch": 0.22, "grad_norm": 0.6795632243156433, "learning_rate": 1.803825423467946e-05, "loss": 2.2199, "step": 6494 }, { "epoch": 0.22, "grad_norm": 0.6736174821853638, "learning_rate": 1.803762191118179e-05, "loss": 2.174, "step": 6495 }, { "epoch": 0.22, "grad_norm": 0.6765211820602417, "learning_rate": 1.803698949687978e-05, "loss": 2.1198, "step": 6496 }, { "epoch": 0.22, "grad_norm": 0.6945918202400208, "learning_rate": 1.8036356991780573e-05, "loss": 2.1683, "step": 6497 }, { "epoch": 0.22, "grad_norm": 0.7008389830589294, "learning_rate": 1.8035724395891314e-05, "loss": 2.1578, "step": 6498 }, { "epoch": 0.22, "grad_norm": 0.7341498732566833, "learning_rate": 1.8035091709219153e-05, "loss": 2.1383, "step": 6499 }, { "epoch": 0.22, "grad_norm": 0.6831977367401123, "learning_rate": 1.803445893177123e-05, "loss": 2.17, "step": 6500 }, { "epoch": 0.22, "grad_norm": 0.6934776306152344, "learning_rate": 1.8033826063554706e-05, "loss": 2.1596, "step": 6501 }, { "epoch": 0.22, "grad_norm": 0.6972570419311523, "learning_rate": 1.8033193104576717e-05, "loss": 2.1951, "step": 6502 }, { "epoch": 0.22, "grad_norm": 0.7249875068664551, "learning_rate": 1.8032560054844422e-05, "loss": 2.2139, "step": 6503 }, { "epoch": 0.22, "grad_norm": 0.7270299792289734, "learning_rate": 1.803192691436497e-05, "loss": 2.2562, "step": 6504 }, { "epoch": 0.22, "grad_norm": 0.6792468428611755, "learning_rate": 1.8031293683145515e-05, "loss": 2.1996, "step": 6505 }, { "epoch": 0.22, "grad_norm": 0.7299947142601013, "learning_rate": 1.8030660361193213e-05, "loss": 2.1512, "step": 6506 }, { "epoch": 0.22, "grad_norm": 0.7278671264648438, "learning_rate": 1.8030026948515216e-05, "loss": 2.2307, "step": 6507 }, { "epoch": 0.22, "grad_norm": 0.7127509117126465, "learning_rate": 1.8029393445118678e-05, "loss": 2.2387, "step": 6508 }, { "epoch": 0.22, "grad_norm": 0.6947072744369507, "learning_rate": 1.8028759851010763e-05, "loss": 2.1922, "step": 6509 }, { "epoch": 0.22, "grad_norm": 0.7127625942230225, "learning_rate": 1.8028126166198617e-05, "loss": 2.1843, "step": 6510 }, { "epoch": 0.22, "grad_norm": 0.731911838054657, "learning_rate": 1.8027492390689415e-05, "loss": 2.1824, "step": 6511 }, { "epoch": 0.22, "grad_norm": 0.6634818911552429, "learning_rate": 1.8026858524490306e-05, "loss": 2.2563, "step": 6512 }, { "epoch": 0.22, "grad_norm": 0.7038640975952148, "learning_rate": 1.8026224567608453e-05, "loss": 2.1537, "step": 6513 }, { "epoch": 0.22, "grad_norm": 0.6808935403823853, "learning_rate": 1.8025590520051018e-05, "loss": 2.0971, "step": 6514 }, { "epoch": 0.22, "grad_norm": 0.699148416519165, "learning_rate": 1.8024956381825164e-05, "loss": 2.2262, "step": 6515 }, { "epoch": 0.22, "grad_norm": 0.7081369161605835, "learning_rate": 1.8024322152938056e-05, "loss": 2.1795, "step": 6516 }, { "epoch": 0.22, "grad_norm": 0.7216430306434631, "learning_rate": 1.8023687833396858e-05, "loss": 2.2134, "step": 6517 }, { "epoch": 0.22, "grad_norm": 0.7136854529380798, "learning_rate": 1.802305342320874e-05, "loss": 2.2434, "step": 6518 }, { "epoch": 0.22, "grad_norm": 0.6983802914619446, "learning_rate": 1.8022418922380866e-05, "loss": 2.1794, "step": 6519 }, { "epoch": 0.22, "grad_norm": 0.6896607279777527, "learning_rate": 1.8021784330920404e-05, "loss": 2.2337, "step": 6520 }, { "epoch": 0.22, "grad_norm": 0.6989573240280151, "learning_rate": 1.8021149648834525e-05, "loss": 2.1637, "step": 6521 }, { "epoch": 0.22, "grad_norm": 0.7296557426452637, "learning_rate": 1.8020514876130395e-05, "loss": 2.2675, "step": 6522 }, { "epoch": 0.22, "grad_norm": 0.7054111957550049, "learning_rate": 1.801988001281519e-05, "loss": 2.2258, "step": 6523 }, { "epoch": 0.22, "grad_norm": 0.725476086139679, "learning_rate": 1.8019245058896083e-05, "loss": 2.1431, "step": 6524 }, { "epoch": 0.22, "grad_norm": 0.7145067453384399, "learning_rate": 1.8018610014380242e-05, "loss": 2.0628, "step": 6525 }, { "epoch": 0.22, "grad_norm": 0.6953862905502319, "learning_rate": 1.8017974879274848e-05, "loss": 2.2062, "step": 6526 }, { "epoch": 0.22, "grad_norm": 0.7258676886558533, "learning_rate": 1.801733965358707e-05, "loss": 2.2444, "step": 6527 }, { "epoch": 0.22, "grad_norm": 0.7089829444885254, "learning_rate": 1.8016704337324086e-05, "loss": 2.2835, "step": 6528 }, { "epoch": 0.22, "grad_norm": 0.7030249834060669, "learning_rate": 1.8016068930493076e-05, "loss": 2.1972, "step": 6529 }, { "epoch": 0.22, "grad_norm": 0.706455409526825, "learning_rate": 1.8015433433101218e-05, "loss": 2.238, "step": 6530 }, { "epoch": 0.22, "grad_norm": 0.7113663554191589, "learning_rate": 1.8014797845155693e-05, "loss": 2.2018, "step": 6531 }, { "epoch": 0.22, "grad_norm": 0.7248159646987915, "learning_rate": 1.8014162166663674e-05, "loss": 2.1602, "step": 6532 }, { "epoch": 0.22, "grad_norm": 0.7148717641830444, "learning_rate": 1.801352639763235e-05, "loss": 2.1948, "step": 6533 }, { "epoch": 0.22, "grad_norm": 0.721083402633667, "learning_rate": 1.8012890538068902e-05, "loss": 2.2059, "step": 6534 }, { "epoch": 0.22, "grad_norm": 0.7552103996276855, "learning_rate": 1.8012254587980512e-05, "loss": 2.2157, "step": 6535 }, { "epoch": 0.22, "grad_norm": 0.6762883067131042, "learning_rate": 1.8011618547374366e-05, "loss": 2.2042, "step": 6536 }, { "epoch": 0.22, "grad_norm": 0.6903905868530273, "learning_rate": 1.8010982416257647e-05, "loss": 2.1375, "step": 6537 }, { "epoch": 0.22, "grad_norm": 0.6858564615249634, "learning_rate": 1.801034619463754e-05, "loss": 2.1554, "step": 6538 }, { "epoch": 0.22, "grad_norm": 0.7493283748626709, "learning_rate": 1.8009709882521242e-05, "loss": 2.148, "step": 6539 }, { "epoch": 0.22, "grad_norm": 0.6917670965194702, "learning_rate": 1.8009073479915935e-05, "loss": 2.206, "step": 6540 }, { "epoch": 0.22, "grad_norm": 0.7596235275268555, "learning_rate": 1.800843698682881e-05, "loss": 2.2567, "step": 6541 }, { "epoch": 0.22, "grad_norm": 0.757006049156189, "learning_rate": 1.8007800403267057e-05, "loss": 2.1531, "step": 6542 }, { "epoch": 0.22, "grad_norm": 0.738733172416687, "learning_rate": 1.8007163729237866e-05, "loss": 2.1861, "step": 6543 }, { "epoch": 0.22, "grad_norm": 0.7054998874664307, "learning_rate": 1.8006526964748435e-05, "loss": 2.2065, "step": 6544 }, { "epoch": 0.22, "grad_norm": 0.6866227388381958, "learning_rate": 1.8005890109805952e-05, "loss": 2.267, "step": 6545 }, { "epoch": 0.22, "grad_norm": 0.7243348956108093, "learning_rate": 1.8005253164417616e-05, "loss": 2.1476, "step": 6546 }, { "epoch": 0.22, "grad_norm": 0.7126927375793457, "learning_rate": 1.800461612859062e-05, "loss": 2.1831, "step": 6547 }, { "epoch": 0.22, "grad_norm": 0.7031869292259216, "learning_rate": 1.8003979002332165e-05, "loss": 2.1321, "step": 6548 }, { "epoch": 0.22, "grad_norm": 0.696779727935791, "learning_rate": 1.800334178564944e-05, "loss": 2.219, "step": 6549 }, { "epoch": 0.22, "grad_norm": 0.713350236415863, "learning_rate": 1.8002704478549655e-05, "loss": 2.1468, "step": 6550 }, { "epoch": 0.22, "grad_norm": 0.6896235942840576, "learning_rate": 1.8002067081040007e-05, "loss": 2.2027, "step": 6551 }, { "epoch": 0.22, "grad_norm": 0.688564121723175, "learning_rate": 1.8001429593127692e-05, "loss": 2.2271, "step": 6552 }, { "epoch": 0.22, "grad_norm": 0.6818128228187561, "learning_rate": 1.800079201481991e-05, "loss": 2.2218, "step": 6553 }, { "epoch": 0.22, "grad_norm": 0.652244508266449, "learning_rate": 1.8000154346123875e-05, "loss": 2.141, "step": 6554 }, { "epoch": 0.22, "grad_norm": 0.6858006119728088, "learning_rate": 1.7999516587046782e-05, "loss": 2.1967, "step": 6555 }, { "epoch": 0.22, "grad_norm": 0.709963858127594, "learning_rate": 1.799887873759584e-05, "loss": 2.1607, "step": 6556 }, { "epoch": 0.22, "grad_norm": 0.7003176212310791, "learning_rate": 1.7998240797778255e-05, "loss": 2.1554, "step": 6557 }, { "epoch": 0.22, "grad_norm": 0.6957613229751587, "learning_rate": 1.799760276760123e-05, "loss": 2.2217, "step": 6558 }, { "epoch": 0.22, "grad_norm": 0.6880059838294983, "learning_rate": 1.7996964647071977e-05, "loss": 2.1608, "step": 6559 }, { "epoch": 0.22, "grad_norm": 0.7173803448677063, "learning_rate": 1.7996326436197706e-05, "loss": 2.2572, "step": 6560 }, { "epoch": 0.22, "grad_norm": 0.6865238547325134, "learning_rate": 1.7995688134985622e-05, "loss": 2.201, "step": 6561 }, { "epoch": 0.22, "grad_norm": 0.6913920640945435, "learning_rate": 1.799504974344294e-05, "loss": 2.209, "step": 6562 }, { "epoch": 0.22, "grad_norm": 0.7052408456802368, "learning_rate": 1.7994411261576874e-05, "loss": 2.2589, "step": 6563 }, { "epoch": 0.22, "grad_norm": 0.7043444514274597, "learning_rate": 1.799377268939463e-05, "loss": 2.1344, "step": 6564 }, { "epoch": 0.22, "grad_norm": 0.6907814741134644, "learning_rate": 1.7993134026903432e-05, "loss": 2.1204, "step": 6565 }, { "epoch": 0.22, "grad_norm": 0.7150293588638306, "learning_rate": 1.7992495274110488e-05, "loss": 2.1116, "step": 6566 }, { "epoch": 0.22, "grad_norm": 0.6804690361022949, "learning_rate": 1.7991856431023018e-05, "loss": 2.2566, "step": 6567 }, { "epoch": 0.22, "grad_norm": 0.7090702056884766, "learning_rate": 1.7991217497648236e-05, "loss": 2.1704, "step": 6568 }, { "epoch": 0.22, "grad_norm": 0.7331114411354065, "learning_rate": 1.7990578473993362e-05, "loss": 2.1685, "step": 6569 }, { "epoch": 0.22, "grad_norm": 0.7139462828636169, "learning_rate": 1.7989939360065617e-05, "loss": 2.1706, "step": 6570 }, { "epoch": 0.22, "grad_norm": 0.7457171678543091, "learning_rate": 1.7989300155872217e-05, "loss": 2.1477, "step": 6571 }, { "epoch": 0.22, "grad_norm": 0.6962663531303406, "learning_rate": 1.7988660861420388e-05, "loss": 2.1674, "step": 6572 }, { "epoch": 0.22, "grad_norm": 0.6956488490104675, "learning_rate": 1.798802147671735e-05, "loss": 2.1655, "step": 6573 }, { "epoch": 0.22, "grad_norm": 0.7154152989387512, "learning_rate": 1.7987382001770327e-05, "loss": 2.2148, "step": 6574 }, { "epoch": 0.22, "grad_norm": 0.7448229193687439, "learning_rate": 1.7986742436586544e-05, "loss": 2.1096, "step": 6575 }, { "epoch": 0.22, "grad_norm": 0.7161765694618225, "learning_rate": 1.7986102781173228e-05, "loss": 2.2459, "step": 6576 }, { "epoch": 0.22, "grad_norm": 0.7332423329353333, "learning_rate": 1.7985463035537597e-05, "loss": 2.2572, "step": 6577 }, { "epoch": 0.22, "grad_norm": 0.7228856682777405, "learning_rate": 1.7984823199686886e-05, "loss": 2.2125, "step": 6578 }, { "epoch": 0.22, "grad_norm": 0.6857089400291443, "learning_rate": 1.7984183273628325e-05, "loss": 2.1786, "step": 6579 }, { "epoch": 0.22, "grad_norm": 0.694179892539978, "learning_rate": 1.7983543257369137e-05, "loss": 2.1752, "step": 6580 }, { "epoch": 0.22, "grad_norm": 0.6854165196418762, "learning_rate": 1.798290315091656e-05, "loss": 2.1365, "step": 6581 }, { "epoch": 0.22, "grad_norm": 0.7022131681442261, "learning_rate": 1.798226295427782e-05, "loss": 2.1647, "step": 6582 }, { "epoch": 0.22, "grad_norm": 0.7304705381393433, "learning_rate": 1.798162266746015e-05, "loss": 2.1365, "step": 6583 }, { "epoch": 0.22, "grad_norm": 0.7153554558753967, "learning_rate": 1.7980982290470786e-05, "loss": 2.1569, "step": 6584 }, { "epoch": 0.22, "grad_norm": 0.7307454943656921, "learning_rate": 1.798034182331696e-05, "loss": 2.146, "step": 6585 }, { "epoch": 0.22, "grad_norm": 0.7032563090324402, "learning_rate": 1.7979701266005912e-05, "loss": 2.1904, "step": 6586 }, { "epoch": 0.22, "grad_norm": 0.7243797779083252, "learning_rate": 1.7979060618544874e-05, "loss": 2.158, "step": 6587 }, { "epoch": 0.22, "grad_norm": 0.7189526557922363, "learning_rate": 1.7978419880941085e-05, "loss": 2.1461, "step": 6588 }, { "epoch": 0.22, "grad_norm": 0.7318780422210693, "learning_rate": 1.7977779053201785e-05, "loss": 2.2235, "step": 6589 }, { "epoch": 0.22, "grad_norm": 0.7007522583007812, "learning_rate": 1.797713813533421e-05, "loss": 2.1663, "step": 6590 }, { "epoch": 0.22, "grad_norm": 0.6892000436782837, "learning_rate": 1.7976497127345608e-05, "loss": 2.2301, "step": 6591 }, { "epoch": 0.22, "grad_norm": 0.7175612449645996, "learning_rate": 1.797585602924321e-05, "loss": 2.1522, "step": 6592 }, { "epoch": 0.22, "grad_norm": 0.6855272650718689, "learning_rate": 1.797521484103427e-05, "loss": 2.24, "step": 6593 }, { "epoch": 0.22, "grad_norm": 0.6977092623710632, "learning_rate": 1.7974573562726022e-05, "loss": 2.2286, "step": 6594 }, { "epoch": 0.22, "grad_norm": 0.6980246305465698, "learning_rate": 1.797393219432572e-05, "loss": 2.2004, "step": 6595 }, { "epoch": 0.22, "grad_norm": 0.7200573086738586, "learning_rate": 1.7973290735840603e-05, "loss": 2.178, "step": 6596 }, { "epoch": 0.22, "grad_norm": 0.6859830617904663, "learning_rate": 1.7972649187277923e-05, "loss": 2.1832, "step": 6597 }, { "epoch": 0.22, "grad_norm": 0.6967597007751465, "learning_rate": 1.797200754864492e-05, "loss": 2.1251, "step": 6598 }, { "epoch": 0.22, "grad_norm": 0.7362996935844421, "learning_rate": 1.797136581994885e-05, "loss": 2.2404, "step": 6599 }, { "epoch": 0.22, "grad_norm": 0.7006789445877075, "learning_rate": 1.7970724001196962e-05, "loss": 2.1868, "step": 6600 }, { "epoch": 0.22, "grad_norm": 0.696056067943573, "learning_rate": 1.7970082092396505e-05, "loss": 2.2004, "step": 6601 }, { "epoch": 0.22, "grad_norm": 0.7123087048530579, "learning_rate": 1.7969440093554732e-05, "loss": 2.1999, "step": 6602 }, { "epoch": 0.22, "grad_norm": 0.6767599582672119, "learning_rate": 1.7968798004678892e-05, "loss": 2.2067, "step": 6603 }, { "epoch": 0.22, "grad_norm": 0.7009679675102234, "learning_rate": 1.7968155825776244e-05, "loss": 2.287, "step": 6604 }, { "epoch": 0.22, "grad_norm": 0.7190284729003906, "learning_rate": 1.7967513556854045e-05, "loss": 2.2251, "step": 6605 }, { "epoch": 0.22, "grad_norm": 0.7339173555374146, "learning_rate": 1.7966871197919544e-05, "loss": 2.1661, "step": 6606 }, { "epoch": 0.22, "grad_norm": 0.7319211363792419, "learning_rate": 1.796622874898e-05, "loss": 2.1429, "step": 6607 }, { "epoch": 0.22, "grad_norm": 0.7431654930114746, "learning_rate": 1.7965586210042675e-05, "loss": 2.1866, "step": 6608 }, { "epoch": 0.22, "grad_norm": 0.6840620040893555, "learning_rate": 1.7964943581114823e-05, "loss": 2.1711, "step": 6609 }, { "epoch": 0.22, "grad_norm": 0.7071534991264343, "learning_rate": 1.796430086220371e-05, "loss": 2.1362, "step": 6610 }, { "epoch": 0.22, "grad_norm": 0.7259408831596375, "learning_rate": 1.7963658053316588e-05, "loss": 2.1168, "step": 6611 }, { "epoch": 0.22, "grad_norm": 0.7257355451583862, "learning_rate": 1.796301515446073e-05, "loss": 2.1886, "step": 6612 }, { "epoch": 0.22, "grad_norm": 0.7154070138931274, "learning_rate": 1.7962372165643387e-05, "loss": 2.1935, "step": 6613 }, { "epoch": 0.22, "grad_norm": 0.7240186333656311, "learning_rate": 1.7961729086871835e-05, "loss": 2.2037, "step": 6614 }, { "epoch": 0.22, "grad_norm": 0.7244178652763367, "learning_rate": 1.7961085918153332e-05, "loss": 2.2365, "step": 6615 }, { "epoch": 0.22, "grad_norm": 0.7270267009735107, "learning_rate": 1.7960442659495147e-05, "loss": 2.1786, "step": 6616 }, { "epoch": 0.22, "grad_norm": 0.710294246673584, "learning_rate": 1.7959799310904545e-05, "loss": 2.1643, "step": 6617 }, { "epoch": 0.22, "grad_norm": 0.7330827713012695, "learning_rate": 1.7959155872388797e-05, "loss": 2.2561, "step": 6618 }, { "epoch": 0.22, "grad_norm": 0.7038842439651489, "learning_rate": 1.7958512343955167e-05, "loss": 2.1791, "step": 6619 }, { "epoch": 0.22, "grad_norm": 0.7041257619857788, "learning_rate": 1.7957868725610932e-05, "loss": 2.192, "step": 6620 }, { "epoch": 0.22, "grad_norm": 0.7143344283103943, "learning_rate": 1.795722501736336e-05, "loss": 2.1639, "step": 6621 }, { "epoch": 0.22, "grad_norm": 0.7441373467445374, "learning_rate": 1.795658121921972e-05, "loss": 2.1501, "step": 6622 }, { "epoch": 0.22, "grad_norm": 0.7239783406257629, "learning_rate": 1.795593733118729e-05, "loss": 2.2269, "step": 6623 }, { "epoch": 0.22, "grad_norm": 0.6970530152320862, "learning_rate": 1.7955293353273344e-05, "loss": 2.1674, "step": 6624 }, { "epoch": 0.22, "grad_norm": 0.7018797397613525, "learning_rate": 1.7954649285485157e-05, "loss": 2.1611, "step": 6625 }, { "epoch": 0.22, "grad_norm": 0.6809978485107422, "learning_rate": 1.7954005127830003e-05, "loss": 2.1371, "step": 6626 }, { "epoch": 0.22, "grad_norm": 0.6916415691375732, "learning_rate": 1.7953360880315156e-05, "loss": 2.1818, "step": 6627 }, { "epoch": 0.22, "grad_norm": 0.6924553513526917, "learning_rate": 1.7952716542947905e-05, "loss": 2.121, "step": 6628 }, { "epoch": 0.22, "grad_norm": 0.6921359300613403, "learning_rate": 1.795207211573552e-05, "loss": 2.1718, "step": 6629 }, { "epoch": 0.22, "grad_norm": 0.708489179611206, "learning_rate": 1.7951427598685285e-05, "loss": 2.1784, "step": 6630 }, { "epoch": 0.22, "grad_norm": 0.7168723940849304, "learning_rate": 1.795078299180448e-05, "loss": 2.1614, "step": 6631 }, { "epoch": 0.22, "grad_norm": 0.6781845092773438, "learning_rate": 1.795013829510039e-05, "loss": 2.1534, "step": 6632 }, { "epoch": 0.22, "grad_norm": 0.7574804425239563, "learning_rate": 1.79494935085803e-05, "loss": 2.1353, "step": 6633 }, { "epoch": 0.22, "grad_norm": 0.693473756313324, "learning_rate": 1.7948848632251485e-05, "loss": 2.1414, "step": 6634 }, { "epoch": 0.22, "grad_norm": 0.7214508652687073, "learning_rate": 1.794820366612124e-05, "loss": 2.1962, "step": 6635 }, { "epoch": 0.22, "grad_norm": 0.6834951043128967, "learning_rate": 1.7947558610196846e-05, "loss": 2.2291, "step": 6636 }, { "epoch": 0.22, "grad_norm": 0.7094677090644836, "learning_rate": 1.7946913464485595e-05, "loss": 2.2176, "step": 6637 }, { "epoch": 0.22, "grad_norm": 0.7109693288803101, "learning_rate": 1.794626822899477e-05, "loss": 2.1726, "step": 6638 }, { "epoch": 0.22, "grad_norm": 0.7006967067718506, "learning_rate": 1.794562290373167e-05, "loss": 2.1951, "step": 6639 }, { "epoch": 0.22, "grad_norm": 0.7130532264709473, "learning_rate": 1.7944977488703572e-05, "loss": 2.2397, "step": 6640 }, { "epoch": 0.22, "grad_norm": 0.6664572954177856, "learning_rate": 1.7944331983917776e-05, "loss": 2.1613, "step": 6641 }, { "epoch": 0.22, "grad_norm": 0.7230521440505981, "learning_rate": 1.7943686389381575e-05, "loss": 2.1413, "step": 6642 }, { "epoch": 0.22, "grad_norm": 0.6880577206611633, "learning_rate": 1.794304070510226e-05, "loss": 2.1441, "step": 6643 }, { "epoch": 0.22, "grad_norm": 0.7133157849311829, "learning_rate": 1.7942394931087127e-05, "loss": 2.2102, "step": 6644 }, { "epoch": 0.22, "grad_norm": 0.7430444955825806, "learning_rate": 1.7941749067343472e-05, "loss": 2.184, "step": 6645 }, { "epoch": 0.22, "grad_norm": 0.6830583214759827, "learning_rate": 1.7941103113878587e-05, "loss": 2.0998, "step": 6646 }, { "epoch": 0.22, "grad_norm": 0.6989683508872986, "learning_rate": 1.7940457070699774e-05, "loss": 2.1725, "step": 6647 }, { "epoch": 0.22, "grad_norm": 0.697478175163269, "learning_rate": 1.793981093781433e-05, "loss": 2.0627, "step": 6648 }, { "epoch": 0.22, "grad_norm": 0.6964222192764282, "learning_rate": 1.7939164715229556e-05, "loss": 2.2161, "step": 6649 }, { "epoch": 0.22, "grad_norm": 0.704581618309021, "learning_rate": 1.793851840295275e-05, "loss": 2.1239, "step": 6650 }, { "epoch": 0.22, "grad_norm": 0.7236476540565491, "learning_rate": 1.7937872000991218e-05, "loss": 2.1688, "step": 6651 }, { "epoch": 0.22, "grad_norm": 0.7146232724189758, "learning_rate": 1.793722550935226e-05, "loss": 2.1019, "step": 6652 }, { "epoch": 0.22, "grad_norm": 0.6998026371002197, "learning_rate": 1.793657892804318e-05, "loss": 2.1766, "step": 6653 }, { "epoch": 0.22, "grad_norm": 0.7196683287620544, "learning_rate": 1.7935932257071284e-05, "loss": 2.215, "step": 6654 }, { "epoch": 0.22, "grad_norm": 0.6888589262962341, "learning_rate": 1.7935285496443872e-05, "loss": 2.1192, "step": 6655 }, { "epoch": 0.22, "grad_norm": 0.7328450679779053, "learning_rate": 1.793463864616826e-05, "loss": 2.1467, "step": 6656 }, { "epoch": 0.22, "grad_norm": 0.7402969002723694, "learning_rate": 1.7933991706251745e-05, "loss": 2.212, "step": 6657 }, { "epoch": 0.22, "grad_norm": 0.6689773201942444, "learning_rate": 1.793334467670165e-05, "loss": 2.184, "step": 6658 }, { "epoch": 0.22, "grad_norm": 0.6956059336662292, "learning_rate": 1.7932697557525265e-05, "loss": 2.1339, "step": 6659 }, { "epoch": 0.22, "grad_norm": 0.7008812427520752, "learning_rate": 1.793205034872992e-05, "loss": 2.2287, "step": 6660 }, { "epoch": 0.22, "grad_norm": 0.7328618764877319, "learning_rate": 1.7931403050322916e-05, "loss": 2.1426, "step": 6661 }, { "epoch": 0.22, "grad_norm": 0.6775570511817932, "learning_rate": 1.7930755662311566e-05, "loss": 2.1817, "step": 6662 }, { "epoch": 0.22, "grad_norm": 0.7062534689903259, "learning_rate": 1.793010818470319e-05, "loss": 2.1274, "step": 6663 }, { "epoch": 0.22, "grad_norm": 0.7023792266845703, "learning_rate": 1.79294606175051e-05, "loss": 2.1523, "step": 6664 }, { "epoch": 0.22, "grad_norm": 0.7355208396911621, "learning_rate": 1.792881296072461e-05, "loss": 2.1614, "step": 6665 }, { "epoch": 0.22, "grad_norm": 0.7151618003845215, "learning_rate": 1.7928165214369036e-05, "loss": 2.1148, "step": 6666 }, { "epoch": 0.22, "grad_norm": 0.6916586756706238, "learning_rate": 1.79275173784457e-05, "loss": 2.0807, "step": 6667 }, { "epoch": 0.22, "grad_norm": 0.7306488156318665, "learning_rate": 1.792686945296192e-05, "loss": 2.2012, "step": 6668 }, { "epoch": 0.22, "grad_norm": 0.7072722911834717, "learning_rate": 1.7926221437925014e-05, "loss": 2.2066, "step": 6669 }, { "epoch": 0.22, "grad_norm": 0.6857954263687134, "learning_rate": 1.79255733333423e-05, "loss": 2.1622, "step": 6670 }, { "epoch": 0.22, "grad_norm": 0.7160612344741821, "learning_rate": 1.7924925139221107e-05, "loss": 2.2173, "step": 6671 }, { "epoch": 0.22, "grad_norm": 0.7112611532211304, "learning_rate": 1.792427685556875e-05, "loss": 2.1385, "step": 6672 }, { "epoch": 0.22, "grad_norm": 0.7116711139678955, "learning_rate": 1.792362848239256e-05, "loss": 2.1129, "step": 6673 }, { "epoch": 0.22, "grad_norm": 0.6863235235214233, "learning_rate": 1.7922980019699858e-05, "loss": 2.1991, "step": 6674 }, { "epoch": 0.22, "grad_norm": 0.7066608667373657, "learning_rate": 1.7922331467497973e-05, "loss": 2.206, "step": 6675 }, { "epoch": 0.22, "grad_norm": 0.655047595500946, "learning_rate": 1.792168282579423e-05, "loss": 2.1688, "step": 6676 }, { "epoch": 0.22, "grad_norm": 0.701244592666626, "learning_rate": 1.7921034094595958e-05, "loss": 2.213, "step": 6677 }, { "epoch": 0.22, "grad_norm": 0.6872324347496033, "learning_rate": 1.792038527391048e-05, "loss": 2.0548, "step": 6678 }, { "epoch": 0.22, "grad_norm": 0.71617192029953, "learning_rate": 1.7919736363745135e-05, "loss": 2.2081, "step": 6679 }, { "epoch": 0.22, "grad_norm": 0.6914728283882141, "learning_rate": 1.7919087364107247e-05, "loss": 2.1946, "step": 6680 }, { "epoch": 0.22, "grad_norm": 0.7542549967765808, "learning_rate": 1.7918438275004156e-05, "loss": 2.1453, "step": 6681 }, { "epoch": 0.22, "grad_norm": 0.7000204920768738, "learning_rate": 1.7917789096443185e-05, "loss": 2.1744, "step": 6682 }, { "epoch": 0.22, "grad_norm": 0.6969152688980103, "learning_rate": 1.7917139828431675e-05, "loss": 2.2687, "step": 6683 }, { "epoch": 0.22, "grad_norm": 0.7273998856544495, "learning_rate": 1.791649047097696e-05, "loss": 2.2601, "step": 6684 }, { "epoch": 0.22, "grad_norm": 0.6938063502311707, "learning_rate": 1.7915841024086372e-05, "loss": 2.1484, "step": 6685 }, { "epoch": 0.22, "grad_norm": 0.7053707242012024, "learning_rate": 1.7915191487767253e-05, "loss": 2.1978, "step": 6686 }, { "epoch": 0.22, "grad_norm": 0.7187672853469849, "learning_rate": 1.7914541862026943e-05, "loss": 2.2416, "step": 6687 }, { "epoch": 0.22, "grad_norm": 0.704089879989624, "learning_rate": 1.7913892146872775e-05, "loss": 2.2611, "step": 6688 }, { "epoch": 0.22, "grad_norm": 0.6950346231460571, "learning_rate": 1.791324234231209e-05, "loss": 2.1764, "step": 6689 }, { "epoch": 0.22, "grad_norm": 0.7089124917984009, "learning_rate": 1.7912592448352232e-05, "loss": 2.2397, "step": 6690 }, { "epoch": 0.22, "grad_norm": 0.7141889929771423, "learning_rate": 1.791194246500054e-05, "loss": 2.0711, "step": 6691 }, { "epoch": 0.22, "grad_norm": 0.7076598405838013, "learning_rate": 1.791129239226436e-05, "loss": 2.2425, "step": 6692 }, { "epoch": 0.22, "grad_norm": 0.7320265769958496, "learning_rate": 1.7910642230151035e-05, "loss": 2.2193, "step": 6693 }, { "epoch": 0.22, "grad_norm": 0.6987038850784302, "learning_rate": 1.790999197866791e-05, "loss": 2.1945, "step": 6694 }, { "epoch": 0.22, "grad_norm": 0.7167535424232483, "learning_rate": 1.7909341637822332e-05, "loss": 2.1507, "step": 6695 }, { "epoch": 0.22, "grad_norm": 0.6794973611831665, "learning_rate": 1.7908691207621645e-05, "loss": 2.2056, "step": 6696 }, { "epoch": 0.22, "grad_norm": 0.720698356628418, "learning_rate": 1.7908040688073206e-05, "loss": 2.1165, "step": 6697 }, { "epoch": 0.22, "grad_norm": 0.6985532641410828, "learning_rate": 1.7907390079184353e-05, "loss": 2.2245, "step": 6698 }, { "epoch": 0.22, "grad_norm": 0.702853798866272, "learning_rate": 1.7906739380962442e-05, "loss": 2.1829, "step": 6699 }, { "epoch": 0.22, "grad_norm": 0.6892301440238953, "learning_rate": 1.790608859341482e-05, "loss": 2.2112, "step": 6700 }, { "epoch": 0.22, "grad_norm": 0.7039897441864014, "learning_rate": 1.7905437716548847e-05, "loss": 2.1829, "step": 6701 }, { "epoch": 0.22, "grad_norm": 0.7373062372207642, "learning_rate": 1.7904786750371868e-05, "loss": 2.2158, "step": 6702 }, { "epoch": 0.22, "grad_norm": 0.6866317391395569, "learning_rate": 1.7904135694891243e-05, "loss": 2.1776, "step": 6703 }, { "epoch": 0.22, "grad_norm": 0.7084851861000061, "learning_rate": 1.7903484550114328e-05, "loss": 2.1547, "step": 6704 }, { "epoch": 0.22, "grad_norm": 0.7180258631706238, "learning_rate": 1.7902833316048475e-05, "loss": 2.1391, "step": 6705 }, { "epoch": 0.22, "grad_norm": 0.7088985443115234, "learning_rate": 1.790218199270104e-05, "loss": 2.1339, "step": 6706 }, { "epoch": 0.22, "grad_norm": 0.7078843712806702, "learning_rate": 1.7901530580079387e-05, "loss": 2.1637, "step": 6707 }, { "epoch": 0.22, "grad_norm": 0.7053789496421814, "learning_rate": 1.7900879078190868e-05, "loss": 2.1417, "step": 6708 }, { "epoch": 0.22, "grad_norm": 0.7207679748535156, "learning_rate": 1.7900227487042853e-05, "loss": 2.2276, "step": 6709 }, { "epoch": 0.22, "grad_norm": 0.6909953951835632, "learning_rate": 1.7899575806642695e-05, "loss": 2.2244, "step": 6710 }, { "epoch": 0.22, "grad_norm": 0.7080489993095398, "learning_rate": 1.789892403699776e-05, "loss": 2.2077, "step": 6711 }, { "epoch": 0.22, "grad_norm": 0.6981737017631531, "learning_rate": 1.7898272178115407e-05, "loss": 2.1733, "step": 6712 }, { "epoch": 0.22, "grad_norm": 0.7211215496063232, "learning_rate": 1.789762023000301e-05, "loss": 2.1602, "step": 6713 }, { "epoch": 0.22, "grad_norm": 0.6821576356887817, "learning_rate": 1.7896968192667924e-05, "loss": 2.1013, "step": 6714 }, { "epoch": 0.22, "grad_norm": 0.7175663113594055, "learning_rate": 1.789631606611752e-05, "loss": 2.2145, "step": 6715 }, { "epoch": 0.22, "grad_norm": 0.7491844296455383, "learning_rate": 1.7895663850359165e-05, "loss": 2.0937, "step": 6716 }, { "epoch": 0.22, "grad_norm": 0.7000634074211121, "learning_rate": 1.7895011545400226e-05, "loss": 2.1272, "step": 6717 }, { "epoch": 0.22, "grad_norm": 0.7442992329597473, "learning_rate": 1.7894359151248074e-05, "loss": 2.2024, "step": 6718 }, { "epoch": 0.22, "grad_norm": 0.6817628145217896, "learning_rate": 1.7893706667910078e-05, "loss": 2.1675, "step": 6719 }, { "epoch": 0.22, "grad_norm": 0.7229297757148743, "learning_rate": 1.7893054095393614e-05, "loss": 2.2143, "step": 6720 }, { "epoch": 0.22, "grad_norm": 0.6896880269050598, "learning_rate": 1.7892401433706047e-05, "loss": 2.1671, "step": 6721 }, { "epoch": 0.22, "grad_norm": 0.7212055921554565, "learning_rate": 1.7891748682854758e-05, "loss": 2.1986, "step": 6722 }, { "epoch": 0.22, "grad_norm": 0.7027114629745483, "learning_rate": 1.7891095842847112e-05, "loss": 2.1849, "step": 6723 }, { "epoch": 0.22, "grad_norm": 0.6905052065849304, "learning_rate": 1.7890442913690492e-05, "loss": 2.1926, "step": 6724 }, { "epoch": 0.22, "grad_norm": 0.7455458045005798, "learning_rate": 1.7889789895392273e-05, "loss": 2.2669, "step": 6725 }, { "epoch": 0.22, "grad_norm": 0.7445131540298462, "learning_rate": 1.7889136787959832e-05, "loss": 2.1653, "step": 6726 }, { "epoch": 0.22, "grad_norm": 0.7124503254890442, "learning_rate": 1.7888483591400545e-05, "loss": 2.1634, "step": 6727 }, { "epoch": 0.22, "grad_norm": 0.7132136225700378, "learning_rate": 1.7887830305721798e-05, "loss": 2.1531, "step": 6728 }, { "epoch": 0.22, "grad_norm": 0.6970967054367065, "learning_rate": 1.788717693093096e-05, "loss": 2.2441, "step": 6729 }, { "epoch": 0.22, "grad_norm": 0.6777008175849915, "learning_rate": 1.7886523467035422e-05, "loss": 2.1577, "step": 6730 }, { "epoch": 0.22, "grad_norm": 0.7264772057533264, "learning_rate": 1.7885869914042567e-05, "loss": 2.159, "step": 6731 }, { "epoch": 0.22, "grad_norm": 0.693709135055542, "learning_rate": 1.7885216271959772e-05, "loss": 2.1447, "step": 6732 }, { "epoch": 0.22, "grad_norm": 0.6850021481513977, "learning_rate": 1.7884562540794427e-05, "loss": 2.1597, "step": 6733 }, { "epoch": 0.22, "grad_norm": 0.7083114981651306, "learning_rate": 1.7883908720553915e-05, "loss": 2.1781, "step": 6734 }, { "epoch": 0.22, "grad_norm": 0.7071018218994141, "learning_rate": 1.7883254811245622e-05, "loss": 2.1997, "step": 6735 }, { "epoch": 0.22, "grad_norm": 0.7036201357841492, "learning_rate": 1.7882600812876937e-05, "loss": 2.1797, "step": 6736 }, { "epoch": 0.22, "grad_norm": 0.7072071433067322, "learning_rate": 1.7881946725455247e-05, "loss": 2.1695, "step": 6737 }, { "epoch": 0.22, "grad_norm": 0.6878752112388611, "learning_rate": 1.7881292548987938e-05, "loss": 2.1631, "step": 6738 }, { "epoch": 0.22, "grad_norm": 0.7123965620994568, "learning_rate": 1.788063828348241e-05, "loss": 2.1545, "step": 6739 }, { "epoch": 0.22, "grad_norm": 0.698392391204834, "learning_rate": 1.787998392894605e-05, "loss": 2.1705, "step": 6740 }, { "epoch": 0.22, "grad_norm": 0.7054446935653687, "learning_rate": 1.7879329485386246e-05, "loss": 2.084, "step": 6741 }, { "epoch": 0.22, "grad_norm": 0.6947166323661804, "learning_rate": 1.78786749528104e-05, "loss": 2.1355, "step": 6742 }, { "epoch": 0.22, "grad_norm": 0.684367299079895, "learning_rate": 1.7878020331225897e-05, "loss": 2.2047, "step": 6743 }, { "epoch": 0.22, "grad_norm": 0.7165995240211487, "learning_rate": 1.7877365620640136e-05, "loss": 2.1029, "step": 6744 }, { "epoch": 0.22, "grad_norm": 0.7213699817657471, "learning_rate": 1.787671082106052e-05, "loss": 2.1612, "step": 6745 }, { "epoch": 0.22, "grad_norm": 0.6980475187301636, "learning_rate": 1.7876055932494435e-05, "loss": 2.1828, "step": 6746 }, { "epoch": 0.22, "grad_norm": 0.6845316290855408, "learning_rate": 1.7875400954949293e-05, "loss": 2.17, "step": 6747 }, { "epoch": 0.22, "grad_norm": 0.7053123712539673, "learning_rate": 1.7874745888432482e-05, "loss": 2.1305, "step": 6748 }, { "epoch": 0.22, "grad_norm": 0.6992340683937073, "learning_rate": 1.7874090732951407e-05, "loss": 2.161, "step": 6749 }, { "epoch": 0.22, "grad_norm": 0.7306883931159973, "learning_rate": 1.7873435488513472e-05, "loss": 2.281, "step": 6750 }, { "epoch": 0.22, "grad_norm": 0.7324796319007874, "learning_rate": 1.7872780155126076e-05, "loss": 2.2185, "step": 6751 }, { "epoch": 0.22, "grad_norm": 0.7234364748001099, "learning_rate": 1.7872124732796626e-05, "loss": 2.1428, "step": 6752 }, { "epoch": 0.22, "grad_norm": 0.6811234951019287, "learning_rate": 1.787146922153252e-05, "loss": 2.1215, "step": 6753 }, { "epoch": 0.22, "grad_norm": 0.6736853122711182, "learning_rate": 1.7870813621341168e-05, "loss": 2.1741, "step": 6754 }, { "epoch": 0.22, "grad_norm": 0.7128562331199646, "learning_rate": 1.787015793222998e-05, "loss": 2.2112, "step": 6755 }, { "epoch": 0.22, "grad_norm": 0.7001230120658875, "learning_rate": 1.7869502154206357e-05, "loss": 2.2458, "step": 6756 }, { "epoch": 0.22, "grad_norm": 0.6988021731376648, "learning_rate": 1.7868846287277712e-05, "loss": 2.185, "step": 6757 }, { "epoch": 0.22, "grad_norm": 0.6877143383026123, "learning_rate": 1.7868190331451455e-05, "loss": 2.2192, "step": 6758 }, { "epoch": 0.22, "grad_norm": 0.6942476034164429, "learning_rate": 1.7867534286734992e-05, "loss": 2.1803, "step": 6759 }, { "epoch": 0.22, "grad_norm": 0.6822233200073242, "learning_rate": 1.7866878153135737e-05, "loss": 2.1575, "step": 6760 }, { "epoch": 0.22, "grad_norm": 0.7128563523292542, "learning_rate": 1.7866221930661106e-05, "loss": 2.1747, "step": 6761 }, { "epoch": 0.22, "grad_norm": 0.6892821192741394, "learning_rate": 1.786556561931851e-05, "loss": 2.1538, "step": 6762 }, { "epoch": 0.23, "grad_norm": 0.7127853631973267, "learning_rate": 1.786490921911536e-05, "loss": 2.1916, "step": 6763 }, { "epoch": 0.23, "grad_norm": 0.6819483637809753, "learning_rate": 1.7864252730059076e-05, "loss": 2.1341, "step": 6764 }, { "epoch": 0.23, "grad_norm": 0.7072163820266724, "learning_rate": 1.7863596152157074e-05, "loss": 2.1627, "step": 6765 }, { "epoch": 0.23, "grad_norm": 0.7099435329437256, "learning_rate": 1.7862939485416772e-05, "loss": 2.1628, "step": 6766 }, { "epoch": 0.23, "grad_norm": 0.6882128119468689, "learning_rate": 1.7862282729845587e-05, "loss": 2.1305, "step": 6767 }, { "epoch": 0.23, "grad_norm": 0.7161807417869568, "learning_rate": 1.7861625885450937e-05, "loss": 2.1655, "step": 6768 }, { "epoch": 0.23, "grad_norm": 0.7024717330932617, "learning_rate": 1.786096895224025e-05, "loss": 2.1571, "step": 6769 }, { "epoch": 0.23, "grad_norm": 0.7148637771606445, "learning_rate": 1.786031193022094e-05, "loss": 2.1452, "step": 6770 }, { "epoch": 0.23, "grad_norm": 0.7146597504615784, "learning_rate": 1.785965481940043e-05, "loss": 2.1596, "step": 6771 }, { "epoch": 0.23, "grad_norm": 0.6785227656364441, "learning_rate": 1.7858997619786152e-05, "loss": 2.217, "step": 6772 }, { "epoch": 0.23, "grad_norm": 0.7046297788619995, "learning_rate": 1.785834033138552e-05, "loss": 2.2015, "step": 6773 }, { "epoch": 0.23, "grad_norm": 0.6983430981636047, "learning_rate": 1.7857682954205967e-05, "loss": 2.0605, "step": 6774 }, { "epoch": 0.23, "grad_norm": 0.6989234089851379, "learning_rate": 1.7857025488254914e-05, "loss": 2.2127, "step": 6775 }, { "epoch": 0.23, "grad_norm": 0.7047427296638489, "learning_rate": 1.7856367933539796e-05, "loss": 2.2234, "step": 6776 }, { "epoch": 0.23, "grad_norm": 0.7147670984268188, "learning_rate": 1.7855710290068035e-05, "loss": 2.2364, "step": 6777 }, { "epoch": 0.23, "grad_norm": 0.6983500123023987, "learning_rate": 1.7855052557847063e-05, "loss": 2.2129, "step": 6778 }, { "epoch": 0.23, "grad_norm": 0.6956278681755066, "learning_rate": 1.7854394736884312e-05, "loss": 2.1629, "step": 6779 }, { "epoch": 0.23, "grad_norm": 0.6807165741920471, "learning_rate": 1.7853736827187213e-05, "loss": 2.1385, "step": 6780 }, { "epoch": 0.23, "grad_norm": 0.6907092332839966, "learning_rate": 1.7853078828763192e-05, "loss": 2.1519, "step": 6781 }, { "epoch": 0.23, "grad_norm": 0.6958861947059631, "learning_rate": 1.7852420741619694e-05, "loss": 2.2483, "step": 6782 }, { "epoch": 0.23, "grad_norm": 0.7231794595718384, "learning_rate": 1.7851762565764148e-05, "loss": 2.1885, "step": 6783 }, { "epoch": 0.23, "grad_norm": 0.6978388428688049, "learning_rate": 1.785110430120399e-05, "loss": 2.208, "step": 6784 }, { "epoch": 0.23, "grad_norm": 0.7005677819252014, "learning_rate": 1.7850445947946658e-05, "loss": 2.2178, "step": 6785 }, { "epoch": 0.23, "grad_norm": 0.7164233922958374, "learning_rate": 1.7849787505999584e-05, "loss": 2.2552, "step": 6786 }, { "epoch": 0.23, "grad_norm": 0.6966948509216309, "learning_rate": 1.7849128975370214e-05, "loss": 2.1195, "step": 6787 }, { "epoch": 0.23, "grad_norm": 0.7261508703231812, "learning_rate": 1.7848470356065985e-05, "loss": 2.198, "step": 6788 }, { "epoch": 0.23, "grad_norm": 0.6945905685424805, "learning_rate": 1.7847811648094336e-05, "loss": 2.1478, "step": 6789 }, { "epoch": 0.23, "grad_norm": 0.6871730089187622, "learning_rate": 1.7847152851462716e-05, "loss": 2.2043, "step": 6790 }, { "epoch": 0.23, "grad_norm": 0.687189519405365, "learning_rate": 1.7846493966178557e-05, "loss": 2.0927, "step": 6791 }, { "epoch": 0.23, "grad_norm": 0.7218593955039978, "learning_rate": 1.7845834992249307e-05, "loss": 2.1607, "step": 6792 }, { "epoch": 0.23, "grad_norm": 0.6897413730621338, "learning_rate": 1.7845175929682412e-05, "loss": 2.1774, "step": 6793 }, { "epoch": 0.23, "grad_norm": 0.6789997816085815, "learning_rate": 1.784451677848532e-05, "loss": 2.241, "step": 6794 }, { "epoch": 0.23, "grad_norm": 0.6930385231971741, "learning_rate": 1.784385753866547e-05, "loss": 2.1561, "step": 6795 }, { "epoch": 0.23, "grad_norm": 0.710186243057251, "learning_rate": 1.7843198210230318e-05, "loss": 2.152, "step": 6796 }, { "epoch": 0.23, "grad_norm": 0.6987459659576416, "learning_rate": 1.7842538793187308e-05, "loss": 2.205, "step": 6797 }, { "epoch": 0.23, "grad_norm": 0.754103422164917, "learning_rate": 1.7841879287543893e-05, "loss": 2.1411, "step": 6798 }, { "epoch": 0.23, "grad_norm": 0.7063634395599365, "learning_rate": 1.784121969330752e-05, "loss": 2.127, "step": 6799 }, { "epoch": 0.23, "grad_norm": 0.7065637707710266, "learning_rate": 1.784056001048564e-05, "loss": 2.1813, "step": 6800 }, { "epoch": 0.23, "grad_norm": 0.6785488128662109, "learning_rate": 1.783990023908571e-05, "loss": 2.1728, "step": 6801 }, { "epoch": 0.23, "grad_norm": 0.7472487688064575, "learning_rate": 1.7839240379115182e-05, "loss": 2.2167, "step": 6802 }, { "epoch": 0.23, "grad_norm": 0.7082286477088928, "learning_rate": 1.7838580430581512e-05, "loss": 2.1691, "step": 6803 }, { "epoch": 0.23, "grad_norm": 0.6916322112083435, "learning_rate": 1.7837920393492153e-05, "loss": 2.1708, "step": 6804 }, { "epoch": 0.23, "grad_norm": 0.7010189294815063, "learning_rate": 1.783726026785456e-05, "loss": 2.098, "step": 6805 }, { "epoch": 0.23, "grad_norm": 0.6913995146751404, "learning_rate": 1.7836600053676198e-05, "loss": 2.2058, "step": 6806 }, { "epoch": 0.23, "grad_norm": 0.6896275281906128, "learning_rate": 1.783593975096452e-05, "loss": 2.1871, "step": 6807 }, { "epoch": 0.23, "grad_norm": 0.7041301727294922, "learning_rate": 1.7835279359726986e-05, "loss": 2.1883, "step": 6808 }, { "epoch": 0.23, "grad_norm": 0.7403188943862915, "learning_rate": 1.7834618879971056e-05, "loss": 2.1484, "step": 6809 }, { "epoch": 0.23, "grad_norm": 0.7081049680709839, "learning_rate": 1.7833958311704195e-05, "loss": 2.1113, "step": 6810 }, { "epoch": 0.23, "grad_norm": 0.68865567445755, "learning_rate": 1.7833297654933863e-05, "loss": 2.2313, "step": 6811 }, { "epoch": 0.23, "grad_norm": 0.6944952011108398, "learning_rate": 1.7832636909667528e-05, "loss": 2.15, "step": 6812 }, { "epoch": 0.23, "grad_norm": 0.7047025561332703, "learning_rate": 1.783197607591265e-05, "loss": 2.0817, "step": 6813 }, { "epoch": 0.23, "grad_norm": 0.6887606382369995, "learning_rate": 1.7831315153676697e-05, "loss": 2.1184, "step": 6814 }, { "epoch": 0.23, "grad_norm": 0.7089728116989136, "learning_rate": 1.7830654142967133e-05, "loss": 2.244, "step": 6815 }, { "epoch": 0.23, "grad_norm": 0.6902227997779846, "learning_rate": 1.782999304379143e-05, "loss": 2.1589, "step": 6816 }, { "epoch": 0.23, "grad_norm": 0.6971452832221985, "learning_rate": 1.7829331856157054e-05, "loss": 2.1804, "step": 6817 }, { "epoch": 0.23, "grad_norm": 0.7186415195465088, "learning_rate": 1.782867058007147e-05, "loss": 2.1595, "step": 6818 }, { "epoch": 0.23, "grad_norm": 0.6955856084823608, "learning_rate": 1.782800921554216e-05, "loss": 2.1716, "step": 6819 }, { "epoch": 0.23, "grad_norm": 0.7264643311500549, "learning_rate": 1.782734776257659e-05, "loss": 2.1205, "step": 6820 }, { "epoch": 0.23, "grad_norm": 0.7218306660652161, "learning_rate": 1.782668622118223e-05, "loss": 2.1635, "step": 6821 }, { "epoch": 0.23, "grad_norm": 0.6905709505081177, "learning_rate": 1.7826024591366556e-05, "loss": 2.1501, "step": 6822 }, { "epoch": 0.23, "grad_norm": 0.7445659041404724, "learning_rate": 1.7825362873137042e-05, "loss": 2.1679, "step": 6823 }, { "epoch": 0.23, "grad_norm": 0.6806520223617554, "learning_rate": 1.7824701066501165e-05, "loss": 2.1701, "step": 6824 }, { "epoch": 0.23, "grad_norm": 0.7084265351295471, "learning_rate": 1.78240391714664e-05, "loss": 2.1767, "step": 6825 }, { "epoch": 0.23, "grad_norm": 0.6946107745170593, "learning_rate": 1.7823377188040227e-05, "loss": 2.1565, "step": 6826 }, { "epoch": 0.23, "grad_norm": 0.7721121907234192, "learning_rate": 1.7822715116230124e-05, "loss": 2.2209, "step": 6827 }, { "epoch": 0.23, "grad_norm": 0.6846488118171692, "learning_rate": 1.782205295604357e-05, "loss": 2.147, "step": 6828 }, { "epoch": 0.23, "grad_norm": 0.7053474187850952, "learning_rate": 1.7821390707488048e-05, "loss": 2.1395, "step": 6829 }, { "epoch": 0.23, "grad_norm": 0.7143411040306091, "learning_rate": 1.7820728370571033e-05, "loss": 2.26, "step": 6830 }, { "epoch": 0.23, "grad_norm": 0.6922802329063416, "learning_rate": 1.7820065945300014e-05, "loss": 2.0962, "step": 6831 }, { "epoch": 0.23, "grad_norm": 0.6994383931159973, "learning_rate": 1.7819403431682473e-05, "loss": 2.1017, "step": 6832 }, { "epoch": 0.23, "grad_norm": 0.6988868117332458, "learning_rate": 1.7818740829725894e-05, "loss": 2.2113, "step": 6833 }, { "epoch": 0.23, "grad_norm": 0.7174085378646851, "learning_rate": 1.7818078139437765e-05, "loss": 2.1615, "step": 6834 }, { "epoch": 0.23, "grad_norm": 0.7122299075126648, "learning_rate": 1.781741536082557e-05, "loss": 2.152, "step": 6835 }, { "epoch": 0.23, "grad_norm": 0.7476032972335815, "learning_rate": 1.7816752493896798e-05, "loss": 2.0624, "step": 6836 }, { "epoch": 0.23, "grad_norm": 0.7121260166168213, "learning_rate": 1.7816089538658938e-05, "loss": 2.1682, "step": 6837 }, { "epoch": 0.23, "grad_norm": 0.7298016548156738, "learning_rate": 1.781542649511948e-05, "loss": 2.2233, "step": 6838 }, { "epoch": 0.23, "grad_norm": 0.7053233981132507, "learning_rate": 1.781476336328591e-05, "loss": 2.1786, "step": 6839 }, { "epoch": 0.23, "grad_norm": 0.6758351922035217, "learning_rate": 1.781410014316573e-05, "loss": 2.2082, "step": 6840 }, { "epoch": 0.23, "grad_norm": 0.6976833939552307, "learning_rate": 1.781343683476642e-05, "loss": 2.1605, "step": 6841 }, { "epoch": 0.23, "grad_norm": 0.743137776851654, "learning_rate": 1.781277343809548e-05, "loss": 2.1794, "step": 6842 }, { "epoch": 0.23, "grad_norm": 0.745040237903595, "learning_rate": 1.7812109953160405e-05, "loss": 2.2113, "step": 6843 }, { "epoch": 0.23, "grad_norm": 0.7027665376663208, "learning_rate": 1.781144637996869e-05, "loss": 2.1141, "step": 6844 }, { "epoch": 0.23, "grad_norm": 0.690324068069458, "learning_rate": 1.7810782718527834e-05, "loss": 2.0999, "step": 6845 }, { "epoch": 0.23, "grad_norm": 0.6947686076164246, "learning_rate": 1.781011896884533e-05, "loss": 2.2087, "step": 6846 }, { "epoch": 0.23, "grad_norm": 0.6965914964675903, "learning_rate": 1.780945513092868e-05, "loss": 2.2171, "step": 6847 }, { "epoch": 0.23, "grad_norm": 0.6865129470825195, "learning_rate": 1.7808791204785384e-05, "loss": 2.1142, "step": 6848 }, { "epoch": 0.23, "grad_norm": 0.7207651734352112, "learning_rate": 1.780812719042294e-05, "loss": 2.2166, "step": 6849 }, { "epoch": 0.23, "grad_norm": 0.7159828543663025, "learning_rate": 1.780746308784885e-05, "loss": 2.2117, "step": 6850 }, { "epoch": 0.23, "grad_norm": 0.6997160911560059, "learning_rate": 1.780679889707062e-05, "loss": 2.1425, "step": 6851 }, { "epoch": 0.23, "grad_norm": 0.6947236657142639, "learning_rate": 1.7806134618095752e-05, "loss": 2.2179, "step": 6852 }, { "epoch": 0.23, "grad_norm": 0.7177760601043701, "learning_rate": 1.7805470250931748e-05, "loss": 2.2012, "step": 6853 }, { "epoch": 0.23, "grad_norm": 0.7546408176422119, "learning_rate": 1.7804805795586114e-05, "loss": 2.1229, "step": 6854 }, { "epoch": 0.23, "grad_norm": 0.7468322515487671, "learning_rate": 1.7804141252066363e-05, "loss": 2.2103, "step": 6855 }, { "epoch": 0.23, "grad_norm": 0.7040621042251587, "learning_rate": 1.7803476620379993e-05, "loss": 2.1764, "step": 6856 }, { "epoch": 0.23, "grad_norm": 0.7218724489212036, "learning_rate": 1.7802811900534518e-05, "loss": 2.173, "step": 6857 }, { "epoch": 0.23, "grad_norm": 0.6986580491065979, "learning_rate": 1.780214709253745e-05, "loss": 2.1946, "step": 6858 }, { "epoch": 0.23, "grad_norm": 0.7062078714370728, "learning_rate": 1.7801482196396294e-05, "loss": 2.1584, "step": 6859 }, { "epoch": 0.23, "grad_norm": 0.6856774687767029, "learning_rate": 1.7800817212118565e-05, "loss": 2.2048, "step": 6860 }, { "epoch": 0.23, "grad_norm": 0.7328609824180603, "learning_rate": 1.7800152139711774e-05, "loss": 2.2265, "step": 6861 }, { "epoch": 0.23, "grad_norm": 0.7360959649085999, "learning_rate": 1.779948697918344e-05, "loss": 2.1738, "step": 6862 }, { "epoch": 0.23, "grad_norm": 0.7078776359558105, "learning_rate": 1.779882173054107e-05, "loss": 2.1151, "step": 6863 }, { "epoch": 0.23, "grad_norm": 0.701103150844574, "learning_rate": 1.7798156393792178e-05, "loss": 2.1202, "step": 6864 }, { "epoch": 0.23, "grad_norm": 0.7015447020530701, "learning_rate": 1.779749096894429e-05, "loss": 2.1846, "step": 6865 }, { "epoch": 0.23, "grad_norm": 0.7004568576812744, "learning_rate": 1.779682545600492e-05, "loss": 2.1816, "step": 6866 }, { "epoch": 0.23, "grad_norm": 0.7055882811546326, "learning_rate": 1.7796159854981584e-05, "loss": 2.1633, "step": 6867 }, { "epoch": 0.23, "grad_norm": 0.7181345820426941, "learning_rate": 1.77954941658818e-05, "loss": 2.1481, "step": 6868 }, { "epoch": 0.23, "grad_norm": 0.7153964638710022, "learning_rate": 1.7794828388713097e-05, "loss": 2.1544, "step": 6869 }, { "epoch": 0.23, "grad_norm": 0.713890790939331, "learning_rate": 1.7794162523482986e-05, "loss": 2.1867, "step": 6870 }, { "epoch": 0.23, "grad_norm": 0.6742919087409973, "learning_rate": 1.7793496570199e-05, "loss": 2.1635, "step": 6871 }, { "epoch": 0.23, "grad_norm": 0.7094627618789673, "learning_rate": 1.779283052886865e-05, "loss": 2.1961, "step": 6872 }, { "epoch": 0.23, "grad_norm": 0.7124009132385254, "learning_rate": 1.7792164399499475e-05, "loss": 2.1982, "step": 6873 }, { "epoch": 0.23, "grad_norm": 0.7158564329147339, "learning_rate": 1.779149818209899e-05, "loss": 2.2429, "step": 6874 }, { "epoch": 0.23, "grad_norm": 0.7279731035232544, "learning_rate": 1.7790831876674724e-05, "loss": 2.2099, "step": 6875 }, { "epoch": 0.23, "grad_norm": 0.6774821877479553, "learning_rate": 1.7790165483234206e-05, "loss": 2.0863, "step": 6876 }, { "epoch": 0.23, "grad_norm": 0.6874121427536011, "learning_rate": 1.7789499001784963e-05, "loss": 2.1245, "step": 6877 }, { "epoch": 0.23, "grad_norm": 0.7278497815132141, "learning_rate": 1.7788832432334526e-05, "loss": 2.1851, "step": 6878 }, { "epoch": 0.23, "grad_norm": 0.6820404529571533, "learning_rate": 1.7788165774890427e-05, "loss": 2.1506, "step": 6879 }, { "epoch": 0.23, "grad_norm": 0.6948745846748352, "learning_rate": 1.7787499029460195e-05, "loss": 2.1817, "step": 6880 }, { "epoch": 0.23, "grad_norm": 0.7137362360954285, "learning_rate": 1.778683219605136e-05, "loss": 2.158, "step": 6881 }, { "epoch": 0.23, "grad_norm": 0.7012460827827454, "learning_rate": 1.778616527467146e-05, "loss": 2.2094, "step": 6882 }, { "epoch": 0.23, "grad_norm": 0.7243384718894958, "learning_rate": 1.7785498265328028e-05, "loss": 2.1578, "step": 6883 }, { "epoch": 0.23, "grad_norm": 0.7307226657867432, "learning_rate": 1.77848311680286e-05, "loss": 2.2498, "step": 6884 }, { "epoch": 0.23, "grad_norm": 0.695855438709259, "learning_rate": 1.778416398278071e-05, "loss": 2.2159, "step": 6885 }, { "epoch": 0.23, "grad_norm": 0.7213189005851746, "learning_rate": 1.7783496709591896e-05, "loss": 2.1378, "step": 6886 }, { "epoch": 0.23, "grad_norm": 0.7287018299102783, "learning_rate": 1.77828293484697e-05, "loss": 2.162, "step": 6887 }, { "epoch": 0.23, "grad_norm": 0.6844534277915955, "learning_rate": 1.778216189942166e-05, "loss": 2.187, "step": 6888 }, { "epoch": 0.23, "grad_norm": 0.7013359069824219, "learning_rate": 1.7781494362455315e-05, "loss": 2.1443, "step": 6889 }, { "epoch": 0.23, "grad_norm": 0.6730186939239502, "learning_rate": 1.7780826737578207e-05, "loss": 2.1743, "step": 6890 }, { "epoch": 0.23, "grad_norm": 0.7261166572570801, "learning_rate": 1.778015902479788e-05, "loss": 2.1706, "step": 6891 }, { "epoch": 0.23, "grad_norm": 0.7098137736320496, "learning_rate": 1.7779491224121875e-05, "loss": 2.1456, "step": 6892 }, { "epoch": 0.23, "grad_norm": 0.7070254683494568, "learning_rate": 1.7778823335557736e-05, "loss": 2.2083, "step": 6893 }, { "epoch": 0.23, "grad_norm": 0.7378586530685425, "learning_rate": 1.7778155359113014e-05, "loss": 2.2822, "step": 6894 }, { "epoch": 0.23, "grad_norm": 0.7364852428436279, "learning_rate": 1.7777487294795243e-05, "loss": 2.1838, "step": 6895 }, { "epoch": 0.23, "grad_norm": 0.7221633195877075, "learning_rate": 1.7776819142611984e-05, "loss": 2.1188, "step": 6896 }, { "epoch": 0.23, "grad_norm": 0.6738303899765015, "learning_rate": 1.7776150902570778e-05, "loss": 2.1318, "step": 6897 }, { "epoch": 0.23, "grad_norm": 0.7121299505233765, "learning_rate": 1.777548257467918e-05, "loss": 2.1177, "step": 6898 }, { "epoch": 0.23, "grad_norm": 0.6904041171073914, "learning_rate": 1.7774814158944736e-05, "loss": 2.1602, "step": 6899 }, { "epoch": 0.23, "grad_norm": 0.667462170124054, "learning_rate": 1.7774145655374995e-05, "loss": 2.1384, "step": 6900 }, { "epoch": 0.23, "grad_norm": 0.6945261359214783, "learning_rate": 1.7773477063977512e-05, "loss": 2.1315, "step": 6901 }, { "epoch": 0.23, "grad_norm": 0.7075498104095459, "learning_rate": 1.7772808384759846e-05, "loss": 2.1646, "step": 6902 }, { "epoch": 0.23, "grad_norm": 0.7124106287956238, "learning_rate": 1.7772139617729543e-05, "loss": 2.1501, "step": 6903 }, { "epoch": 0.23, "grad_norm": 0.723992109298706, "learning_rate": 1.777147076289416e-05, "loss": 2.1225, "step": 6904 }, { "epoch": 0.23, "grad_norm": 0.7132938504219055, "learning_rate": 1.7770801820261257e-05, "loss": 2.2064, "step": 6905 }, { "epoch": 0.23, "grad_norm": 0.7230051159858704, "learning_rate": 1.777013278983839e-05, "loss": 2.152, "step": 6906 }, { "epoch": 0.23, "grad_norm": 0.692264974117279, "learning_rate": 1.7769463671633117e-05, "loss": 2.2115, "step": 6907 }, { "epoch": 0.23, "grad_norm": 0.7420234084129333, "learning_rate": 1.7768794465652994e-05, "loss": 2.1924, "step": 6908 }, { "epoch": 0.23, "grad_norm": 0.7254521250724792, "learning_rate": 1.7768125171905583e-05, "loss": 2.1549, "step": 6909 }, { "epoch": 0.23, "grad_norm": 0.6803408265113831, "learning_rate": 1.776745579039845e-05, "loss": 2.0715, "step": 6910 }, { "epoch": 0.23, "grad_norm": 0.718235969543457, "learning_rate": 1.776678632113915e-05, "loss": 2.23, "step": 6911 }, { "epoch": 0.23, "grad_norm": 0.7095158100128174, "learning_rate": 1.7766116764135252e-05, "loss": 2.1275, "step": 6912 }, { "epoch": 0.23, "grad_norm": 0.717291533946991, "learning_rate": 1.7765447119394318e-05, "loss": 2.1735, "step": 6913 }, { "epoch": 0.23, "grad_norm": 0.6823374032974243, "learning_rate": 1.7764777386923915e-05, "loss": 2.1684, "step": 6914 }, { "epoch": 0.23, "grad_norm": 0.680199146270752, "learning_rate": 1.7764107566731606e-05, "loss": 2.1922, "step": 6915 }, { "epoch": 0.23, "grad_norm": 0.6936507821083069, "learning_rate": 1.7763437658824962e-05, "loss": 2.1487, "step": 6916 }, { "epoch": 0.23, "grad_norm": 0.7469543814659119, "learning_rate": 1.7762767663211546e-05, "loss": 2.1431, "step": 6917 }, { "epoch": 0.23, "grad_norm": 0.6895522475242615, "learning_rate": 1.7762097579898934e-05, "loss": 2.1719, "step": 6918 }, { "epoch": 0.23, "grad_norm": 0.7110211849212646, "learning_rate": 1.776142740889469e-05, "loss": 2.1726, "step": 6919 }, { "epoch": 0.23, "grad_norm": 0.689598560333252, "learning_rate": 1.7760757150206386e-05, "loss": 2.1256, "step": 6920 }, { "epoch": 0.23, "grad_norm": 0.6985383629798889, "learning_rate": 1.77600868038416e-05, "loss": 2.1273, "step": 6921 }, { "epoch": 0.23, "grad_norm": 0.7095934748649597, "learning_rate": 1.7759416369807902e-05, "loss": 2.1946, "step": 6922 }, { "epoch": 0.23, "grad_norm": 0.6924101710319519, "learning_rate": 1.7758745848112856e-05, "loss": 2.1422, "step": 6923 }, { "epoch": 0.23, "grad_norm": 0.7058820724487305, "learning_rate": 1.7758075238764053e-05, "loss": 2.1465, "step": 6924 }, { "epoch": 0.23, "grad_norm": 0.674541175365448, "learning_rate": 1.7757404541769064e-05, "loss": 2.0787, "step": 6925 }, { "epoch": 0.23, "grad_norm": 0.7304415106773376, "learning_rate": 1.7756733757135466e-05, "loss": 2.2224, "step": 6926 }, { "epoch": 0.23, "grad_norm": 0.6868225336074829, "learning_rate": 1.7756062884870834e-05, "loss": 2.1023, "step": 6927 }, { "epoch": 0.23, "grad_norm": 0.7010774612426758, "learning_rate": 1.7755391924982745e-05, "loss": 2.1425, "step": 6928 }, { "epoch": 0.23, "grad_norm": 0.6922581195831299, "learning_rate": 1.7754720877478787e-05, "loss": 2.1399, "step": 6929 }, { "epoch": 0.23, "grad_norm": 0.6922581195831299, "learning_rate": 1.7754049742366537e-05, "loss": 2.1483, "step": 6930 }, { "epoch": 0.23, "grad_norm": 0.6921782493591309, "learning_rate": 1.775337851965358e-05, "loss": 2.1345, "step": 6931 }, { "epoch": 0.23, "grad_norm": 0.6929617524147034, "learning_rate": 1.7752707209347492e-05, "loss": 2.1222, "step": 6932 }, { "epoch": 0.23, "grad_norm": 0.687907874584198, "learning_rate": 1.7752035811455864e-05, "loss": 2.1598, "step": 6933 }, { "epoch": 0.23, "grad_norm": 0.7117748260498047, "learning_rate": 1.7751364325986277e-05, "loss": 2.1598, "step": 6934 }, { "epoch": 0.23, "grad_norm": 0.670447051525116, "learning_rate": 1.775069275294632e-05, "loss": 2.1551, "step": 6935 }, { "epoch": 0.23, "grad_norm": 0.6872610449790955, "learning_rate": 1.7750021092343578e-05, "loss": 2.2419, "step": 6936 }, { "epoch": 0.23, "grad_norm": 0.7005404829978943, "learning_rate": 1.7749349344185638e-05, "loss": 2.198, "step": 6937 }, { "epoch": 0.23, "grad_norm": 0.7544254064559937, "learning_rate": 1.774867750848009e-05, "loss": 2.2047, "step": 6938 }, { "epoch": 0.23, "grad_norm": 0.7147417664527893, "learning_rate": 1.774800558523453e-05, "loss": 2.1163, "step": 6939 }, { "epoch": 0.23, "grad_norm": 0.6980299353599548, "learning_rate": 1.774733357445654e-05, "loss": 2.1198, "step": 6940 }, { "epoch": 0.23, "grad_norm": 0.6910920143127441, "learning_rate": 1.7746661476153714e-05, "loss": 2.1569, "step": 6941 }, { "epoch": 0.23, "grad_norm": 0.6915248036384583, "learning_rate": 1.774598929033365e-05, "loss": 2.1883, "step": 6942 }, { "epoch": 0.23, "grad_norm": 0.6882959604263306, "learning_rate": 1.7745317017003937e-05, "loss": 2.0899, "step": 6943 }, { "epoch": 0.23, "grad_norm": 0.6960617303848267, "learning_rate": 1.7744644656172172e-05, "loss": 2.1908, "step": 6944 }, { "epoch": 0.23, "grad_norm": 0.7447720766067505, "learning_rate": 1.7743972207845952e-05, "loss": 2.1812, "step": 6945 }, { "epoch": 0.23, "grad_norm": 0.6778199672698975, "learning_rate": 1.7743299672032868e-05, "loss": 2.1864, "step": 6946 }, { "epoch": 0.23, "grad_norm": 0.6879332661628723, "learning_rate": 1.7742627048740525e-05, "loss": 2.1759, "step": 6947 }, { "epoch": 0.23, "grad_norm": 0.7041139602661133, "learning_rate": 1.7741954337976522e-05, "loss": 2.1894, "step": 6948 }, { "epoch": 0.23, "grad_norm": 0.6966969966888428, "learning_rate": 1.7741281539748453e-05, "loss": 2.0973, "step": 6949 }, { "epoch": 0.23, "grad_norm": 0.6998757123947144, "learning_rate": 1.774060865406392e-05, "loss": 2.1341, "step": 6950 }, { "epoch": 0.23, "grad_norm": 0.6866270899772644, "learning_rate": 1.773993568093053e-05, "loss": 2.1833, "step": 6951 }, { "epoch": 0.23, "grad_norm": 0.7264165878295898, "learning_rate": 1.7739262620355883e-05, "loss": 2.1901, "step": 6952 }, { "epoch": 0.23, "grad_norm": 0.7377843856811523, "learning_rate": 1.773858947234758e-05, "loss": 2.2135, "step": 6953 }, { "epoch": 0.23, "grad_norm": 0.7093053460121155, "learning_rate": 1.7737916236913234e-05, "loss": 2.1458, "step": 6954 }, { "epoch": 0.23, "grad_norm": 0.7045026421546936, "learning_rate": 1.7737242914060438e-05, "loss": 2.169, "step": 6955 }, { "epoch": 0.23, "grad_norm": 0.7239832282066345, "learning_rate": 1.773656950379681e-05, "loss": 2.1406, "step": 6956 }, { "epoch": 0.23, "grad_norm": 0.7197915315628052, "learning_rate": 1.7735896006129953e-05, "loss": 2.209, "step": 6957 }, { "epoch": 0.23, "grad_norm": 0.7131180763244629, "learning_rate": 1.7735222421067474e-05, "loss": 2.1907, "step": 6958 }, { "epoch": 0.23, "grad_norm": 0.6831712126731873, "learning_rate": 1.773454874861699e-05, "loss": 2.2196, "step": 6959 }, { "epoch": 0.23, "grad_norm": 0.7192733287811279, "learning_rate": 1.7733874988786105e-05, "loss": 2.2355, "step": 6960 }, { "epoch": 0.23, "grad_norm": 0.7124232053756714, "learning_rate": 1.7733201141582436e-05, "loss": 2.1509, "step": 6961 }, { "epoch": 0.23, "grad_norm": 0.6945650577545166, "learning_rate": 1.773252720701359e-05, "loss": 2.1304, "step": 6962 }, { "epoch": 0.23, "grad_norm": 0.7014550566673279, "learning_rate": 1.773185318508718e-05, "loss": 2.1409, "step": 6963 }, { "epoch": 0.23, "grad_norm": 0.7208422422409058, "learning_rate": 1.773117907581083e-05, "loss": 2.1306, "step": 6964 }, { "epoch": 0.23, "grad_norm": 0.7048414349555969, "learning_rate": 1.7730504879192146e-05, "loss": 2.1794, "step": 6965 }, { "epoch": 0.23, "grad_norm": 0.6916300654411316, "learning_rate": 1.772983059523875e-05, "loss": 2.1734, "step": 6966 }, { "epoch": 0.23, "grad_norm": 0.7062911987304688, "learning_rate": 1.7729156223958255e-05, "loss": 2.0856, "step": 6967 }, { "epoch": 0.23, "grad_norm": 0.7180600166320801, "learning_rate": 1.7728481765358286e-05, "loss": 2.2398, "step": 6968 }, { "epoch": 0.23, "grad_norm": 0.6934996843338013, "learning_rate": 1.7727807219446456e-05, "loss": 2.2006, "step": 6969 }, { "epoch": 0.23, "grad_norm": 0.7503741383552551, "learning_rate": 1.772713258623039e-05, "loss": 2.1749, "step": 6970 }, { "epoch": 0.23, "grad_norm": 0.717060387134552, "learning_rate": 1.772645786571771e-05, "loss": 2.1679, "step": 6971 }, { "epoch": 0.23, "grad_norm": 0.7326761484146118, "learning_rate": 1.772578305791604e-05, "loss": 2.1056, "step": 6972 }, { "epoch": 0.23, "grad_norm": 0.6984583139419556, "learning_rate": 1.7725108162832996e-05, "loss": 2.1224, "step": 6973 }, { "epoch": 0.23, "grad_norm": 0.6965352296829224, "learning_rate": 1.7724433180476212e-05, "loss": 2.1575, "step": 6974 }, { "epoch": 0.23, "grad_norm": 0.6931893825531006, "learning_rate": 1.7723758110853306e-05, "loss": 2.1079, "step": 6975 }, { "epoch": 0.23, "grad_norm": 0.732840359210968, "learning_rate": 1.7723082953971908e-05, "loss": 2.1613, "step": 6976 }, { "epoch": 0.23, "grad_norm": 0.6816244721412659, "learning_rate": 1.772240770983964e-05, "loss": 2.1744, "step": 6977 }, { "epoch": 0.23, "grad_norm": 0.6839824914932251, "learning_rate": 1.7721732378464144e-05, "loss": 2.1294, "step": 6978 }, { "epoch": 0.23, "grad_norm": 0.7114626169204712, "learning_rate": 1.7721056959853035e-05, "loss": 2.2076, "step": 6979 }, { "epoch": 0.23, "grad_norm": 0.7165341973304749, "learning_rate": 1.772038145401395e-05, "loss": 2.184, "step": 6980 }, { "epoch": 0.23, "grad_norm": 0.7392146587371826, "learning_rate": 1.7719705860954523e-05, "loss": 2.2029, "step": 6981 }, { "epoch": 0.23, "grad_norm": 0.7326892614364624, "learning_rate": 1.7719030180682383e-05, "loss": 2.1645, "step": 6982 }, { "epoch": 0.23, "grad_norm": 0.6810978055000305, "learning_rate": 1.7718354413205163e-05, "loss": 2.1694, "step": 6983 }, { "epoch": 0.23, "grad_norm": 0.7064249515533447, "learning_rate": 1.7717678558530494e-05, "loss": 2.2307, "step": 6984 }, { "epoch": 0.23, "grad_norm": 0.6877684593200684, "learning_rate": 1.771700261666602e-05, "loss": 2.095, "step": 6985 }, { "epoch": 0.23, "grad_norm": 0.7236197590827942, "learning_rate": 1.771632658761937e-05, "loss": 2.1905, "step": 6986 }, { "epoch": 0.23, "grad_norm": 0.6959729790687561, "learning_rate": 1.7715650471398186e-05, "loss": 2.1637, "step": 6987 }, { "epoch": 0.23, "grad_norm": 0.6859177350997925, "learning_rate": 1.771497426801011e-05, "loss": 2.1751, "step": 6988 }, { "epoch": 0.23, "grad_norm": 0.705310583114624, "learning_rate": 1.771429797746277e-05, "loss": 2.2228, "step": 6989 }, { "epoch": 0.23, "grad_norm": 0.6920613050460815, "learning_rate": 1.7713621599763816e-05, "loss": 2.1959, "step": 6990 }, { "epoch": 0.23, "grad_norm": 0.6908178329467773, "learning_rate": 1.7712945134920884e-05, "loss": 2.1529, "step": 6991 }, { "epoch": 0.23, "grad_norm": 0.7605025172233582, "learning_rate": 1.7712268582941616e-05, "loss": 2.1404, "step": 6992 }, { "epoch": 0.23, "grad_norm": 0.7012733221054077, "learning_rate": 1.771159194383366e-05, "loss": 2.1866, "step": 6993 }, { "epoch": 0.23, "grad_norm": 0.6853523254394531, "learning_rate": 1.7710915217604656e-05, "loss": 2.2352, "step": 6994 }, { "epoch": 0.23, "grad_norm": 0.7281190156936646, "learning_rate": 1.7710238404262252e-05, "loss": 2.1342, "step": 6995 }, { "epoch": 0.23, "grad_norm": 0.6976677775382996, "learning_rate": 1.7709561503814093e-05, "loss": 2.2016, "step": 6996 }, { "epoch": 0.23, "grad_norm": 0.7000302076339722, "learning_rate": 1.7708884516267827e-05, "loss": 2.1113, "step": 6997 }, { "epoch": 0.23, "grad_norm": 0.7196448445320129, "learning_rate": 1.77082074416311e-05, "loss": 2.1871, "step": 6998 }, { "epoch": 0.23, "grad_norm": 0.7371408939361572, "learning_rate": 1.7707530279911563e-05, "loss": 2.2159, "step": 6999 }, { "epoch": 0.23, "grad_norm": 0.6803138852119446, "learning_rate": 1.770685303111687e-05, "loss": 2.21, "step": 7000 }, { "epoch": 0.23, "grad_norm": 0.6637128591537476, "learning_rate": 1.7706175695254663e-05, "loss": 2.1477, "step": 7001 }, { "epoch": 0.23, "grad_norm": 0.6885989904403687, "learning_rate": 1.77054982723326e-05, "loss": 2.1902, "step": 7002 }, { "epoch": 0.23, "grad_norm": 0.6914072632789612, "learning_rate": 1.7704820762358338e-05, "loss": 2.1155, "step": 7003 }, { "epoch": 0.23, "grad_norm": 0.6976311802864075, "learning_rate": 1.7704143165339523e-05, "loss": 2.1543, "step": 7004 }, { "epoch": 0.23, "grad_norm": 0.7220262289047241, "learning_rate": 1.7703465481283814e-05, "loss": 2.2428, "step": 7005 }, { "epoch": 0.23, "grad_norm": 0.6914054155349731, "learning_rate": 1.7702787710198865e-05, "loss": 2.1638, "step": 7006 }, { "epoch": 0.23, "grad_norm": 0.7070133686065674, "learning_rate": 1.7702109852092335e-05, "loss": 2.1536, "step": 7007 }, { "epoch": 0.23, "grad_norm": 0.7063805460929871, "learning_rate": 1.7701431906971883e-05, "loss": 2.1977, "step": 7008 }, { "epoch": 0.23, "grad_norm": 0.7011902332305908, "learning_rate": 1.7700753874845165e-05, "loss": 2.1261, "step": 7009 }, { "epoch": 0.23, "grad_norm": 0.6850791573524475, "learning_rate": 1.7700075755719846e-05, "loss": 2.1723, "step": 7010 }, { "epoch": 0.23, "grad_norm": 0.7156707048416138, "learning_rate": 1.7699397549603583e-05, "loss": 2.1862, "step": 7011 }, { "epoch": 0.23, "grad_norm": 0.6938273310661316, "learning_rate": 1.769871925650404e-05, "loss": 2.1351, "step": 7012 }, { "epoch": 0.23, "grad_norm": 0.6962174773216248, "learning_rate": 1.7698040876428875e-05, "loss": 2.1195, "step": 7013 }, { "epoch": 0.23, "grad_norm": 0.6999425888061523, "learning_rate": 1.7697362409385755e-05, "loss": 2.1429, "step": 7014 }, { "epoch": 0.23, "grad_norm": 0.729241132736206, "learning_rate": 1.769668385538235e-05, "loss": 2.175, "step": 7015 }, { "epoch": 0.23, "grad_norm": 0.7249606251716614, "learning_rate": 1.769600521442632e-05, "loss": 2.1501, "step": 7016 }, { "epoch": 0.23, "grad_norm": 0.7185417413711548, "learning_rate": 1.769532648652533e-05, "loss": 2.1781, "step": 7017 }, { "epoch": 0.23, "grad_norm": 0.6729242205619812, "learning_rate": 1.7694647671687058e-05, "loss": 2.1217, "step": 7018 }, { "epoch": 0.23, "grad_norm": 0.7271994948387146, "learning_rate": 1.7693968769919162e-05, "loss": 2.1365, "step": 7019 }, { "epoch": 0.23, "grad_norm": 0.7043911218643188, "learning_rate": 1.7693289781229314e-05, "loss": 2.1888, "step": 7020 }, { "epoch": 0.23, "grad_norm": 0.7178796529769897, "learning_rate": 1.769261070562519e-05, "loss": 2.1697, "step": 7021 }, { "epoch": 0.23, "grad_norm": 0.7304026484489441, "learning_rate": 1.7691931543114457e-05, "loss": 2.238, "step": 7022 }, { "epoch": 0.23, "grad_norm": 0.7023227214813232, "learning_rate": 1.769125229370479e-05, "loss": 2.1849, "step": 7023 }, { "epoch": 0.23, "grad_norm": 0.7478675842285156, "learning_rate": 1.769057295740386e-05, "loss": 2.217, "step": 7024 }, { "epoch": 0.23, "grad_norm": 0.6802921891212463, "learning_rate": 1.768989353421935e-05, "loss": 2.0981, "step": 7025 }, { "epoch": 0.23, "grad_norm": 0.7544086575508118, "learning_rate": 1.7689214024158926e-05, "loss": 2.1034, "step": 7026 }, { "epoch": 0.23, "grad_norm": 0.6963983178138733, "learning_rate": 1.7688534427230264e-05, "loss": 2.2001, "step": 7027 }, { "epoch": 0.23, "grad_norm": 0.6966838240623474, "learning_rate": 1.768785474344105e-05, "loss": 2.1898, "step": 7028 }, { "epoch": 0.23, "grad_norm": 0.7023069858551025, "learning_rate": 1.7687174972798957e-05, "loss": 2.18, "step": 7029 }, { "epoch": 0.23, "grad_norm": 0.6893876194953918, "learning_rate": 1.7686495115311668e-05, "loss": 2.1816, "step": 7030 }, { "epoch": 0.23, "grad_norm": 0.73393714427948, "learning_rate": 1.7685815170986862e-05, "loss": 2.1925, "step": 7031 }, { "epoch": 0.23, "grad_norm": 0.696412205696106, "learning_rate": 1.7685135139832217e-05, "loss": 2.1382, "step": 7032 }, { "epoch": 0.23, "grad_norm": 0.7571011185646057, "learning_rate": 1.7684455021855425e-05, "loss": 2.1897, "step": 7033 }, { "epoch": 0.23, "grad_norm": 0.7051617503166199, "learning_rate": 1.768377481706416e-05, "loss": 2.1441, "step": 7034 }, { "epoch": 0.23, "grad_norm": 0.6864407062530518, "learning_rate": 1.7683094525466115e-05, "loss": 2.1558, "step": 7035 }, { "epoch": 0.23, "grad_norm": 0.7346776127815247, "learning_rate": 1.7682414147068962e-05, "loss": 2.1839, "step": 7036 }, { "epoch": 0.23, "grad_norm": 0.695116400718689, "learning_rate": 1.7681733681880405e-05, "loss": 2.2075, "step": 7037 }, { "epoch": 0.23, "grad_norm": 0.6876145601272583, "learning_rate": 1.7681053129908116e-05, "loss": 2.211, "step": 7038 }, { "epoch": 0.23, "grad_norm": 0.7274297475814819, "learning_rate": 1.7680372491159794e-05, "loss": 2.1938, "step": 7039 }, { "epoch": 0.23, "grad_norm": 0.7030305862426758, "learning_rate": 1.7679691765643125e-05, "loss": 2.1505, "step": 7040 }, { "epoch": 0.23, "grad_norm": 0.7753658294677734, "learning_rate": 1.76790109533658e-05, "loss": 2.1743, "step": 7041 }, { "epoch": 0.23, "grad_norm": 0.7701655030250549, "learning_rate": 1.7678330054335505e-05, "loss": 2.2, "step": 7042 }, { "epoch": 0.23, "grad_norm": 0.7166990041732788, "learning_rate": 1.767764906855994e-05, "loss": 2.1451, "step": 7043 }, { "epoch": 0.23, "grad_norm": 0.7324900031089783, "learning_rate": 1.7676967996046795e-05, "loss": 2.2178, "step": 7044 }, { "epoch": 0.23, "grad_norm": 0.7566375732421875, "learning_rate": 1.7676286836803768e-05, "loss": 2.1504, "step": 7045 }, { "epoch": 0.23, "grad_norm": 0.7311758399009705, "learning_rate": 1.7675605590838546e-05, "loss": 2.1594, "step": 7046 }, { "epoch": 0.23, "grad_norm": 0.704200804233551, "learning_rate": 1.767492425815883e-05, "loss": 2.1562, "step": 7047 }, { "epoch": 0.23, "grad_norm": 0.7691122889518738, "learning_rate": 1.7674242838772322e-05, "loss": 2.2035, "step": 7048 }, { "epoch": 0.23, "grad_norm": 0.7102630138397217, "learning_rate": 1.767356133268671e-05, "loss": 2.2291, "step": 7049 }, { "epoch": 0.23, "grad_norm": 0.7495120167732239, "learning_rate": 1.7672879739909704e-05, "loss": 2.1761, "step": 7050 }, { "epoch": 0.23, "grad_norm": 0.72347491979599, "learning_rate": 1.7672198060449e-05, "loss": 2.2156, "step": 7051 }, { "epoch": 0.23, "grad_norm": 0.6991457343101501, "learning_rate": 1.7671516294312296e-05, "loss": 2.1692, "step": 7052 }, { "epoch": 0.23, "grad_norm": 0.7387259006500244, "learning_rate": 1.76708344415073e-05, "loss": 2.2244, "step": 7053 }, { "epoch": 0.23, "grad_norm": 0.6986570358276367, "learning_rate": 1.767015250204171e-05, "loss": 2.1844, "step": 7054 }, { "epoch": 0.23, "grad_norm": 0.7126681804656982, "learning_rate": 1.766947047592323e-05, "loss": 2.185, "step": 7055 }, { "epoch": 0.23, "grad_norm": 0.7179346680641174, "learning_rate": 1.7668788363159572e-05, "loss": 2.1564, "step": 7056 }, { "epoch": 0.23, "grad_norm": 0.7141900658607483, "learning_rate": 1.7668106163758432e-05, "loss": 2.2348, "step": 7057 }, { "epoch": 0.23, "grad_norm": 0.7525929808616638, "learning_rate": 1.766742387772753e-05, "loss": 2.1186, "step": 7058 }, { "epoch": 0.23, "grad_norm": 0.7265985012054443, "learning_rate": 1.766674150507456e-05, "loss": 2.1401, "step": 7059 }, { "epoch": 0.23, "grad_norm": 0.7377471327781677, "learning_rate": 1.766605904580724e-05, "loss": 2.1818, "step": 7060 }, { "epoch": 0.23, "grad_norm": 0.7004332542419434, "learning_rate": 1.766537649993328e-05, "loss": 2.233, "step": 7061 }, { "epoch": 0.23, "grad_norm": 0.7027762532234192, "learning_rate": 1.766469386746038e-05, "loss": 2.1524, "step": 7062 }, { "epoch": 0.23, "grad_norm": 0.7092044353485107, "learning_rate": 1.7664011148396268e-05, "loss": 2.2288, "step": 7063 }, { "epoch": 0.24, "grad_norm": 0.7277551293373108, "learning_rate": 1.766332834274865e-05, "loss": 2.102, "step": 7064 }, { "epoch": 0.24, "grad_norm": 0.7051529288291931, "learning_rate": 1.7662645450525236e-05, "loss": 2.1962, "step": 7065 }, { "epoch": 0.24, "grad_norm": 0.7289415001869202, "learning_rate": 1.7661962471733747e-05, "loss": 2.257, "step": 7066 }, { "epoch": 0.24, "grad_norm": 0.7112755179405212, "learning_rate": 1.7661279406381897e-05, "loss": 2.1057, "step": 7067 }, { "epoch": 0.24, "grad_norm": 0.7327879071235657, "learning_rate": 1.7660596254477402e-05, "loss": 2.1188, "step": 7068 }, { "epoch": 0.24, "grad_norm": 0.7188847661018372, "learning_rate": 1.7659913016027977e-05, "loss": 2.1882, "step": 7069 }, { "epoch": 0.24, "grad_norm": 0.6882083415985107, "learning_rate": 1.7659229691041345e-05, "loss": 2.1659, "step": 7070 }, { "epoch": 0.24, "grad_norm": 0.6994731426239014, "learning_rate": 1.7658546279525226e-05, "loss": 2.1826, "step": 7071 }, { "epoch": 0.24, "grad_norm": 0.6930410265922546, "learning_rate": 1.765786278148734e-05, "loss": 2.104, "step": 7072 }, { "epoch": 0.24, "grad_norm": 0.7055414915084839, "learning_rate": 1.7657179196935404e-05, "loss": 2.1656, "step": 7073 }, { "epoch": 0.24, "grad_norm": 0.7362331748008728, "learning_rate": 1.7656495525877152e-05, "loss": 2.1441, "step": 7074 }, { "epoch": 0.24, "grad_norm": 0.767522394657135, "learning_rate": 1.7655811768320298e-05, "loss": 2.2187, "step": 7075 }, { "epoch": 0.24, "grad_norm": 0.7122377753257751, "learning_rate": 1.7655127924272567e-05, "loss": 2.1469, "step": 7076 }, { "epoch": 0.24, "grad_norm": 0.6900952458381653, "learning_rate": 1.765444399374169e-05, "loss": 2.1424, "step": 7077 }, { "epoch": 0.24, "grad_norm": 0.7402431964874268, "learning_rate": 1.7653759976735387e-05, "loss": 2.1794, "step": 7078 }, { "epoch": 0.24, "grad_norm": 0.7040723562240601, "learning_rate": 1.765307587326139e-05, "loss": 2.1603, "step": 7079 }, { "epoch": 0.24, "grad_norm": 0.6977261900901794, "learning_rate": 1.7652391683327428e-05, "loss": 2.2098, "step": 7080 }, { "epoch": 0.24, "grad_norm": 0.7347288131713867, "learning_rate": 1.7651707406941233e-05, "loss": 2.1827, "step": 7081 }, { "epoch": 0.24, "grad_norm": 0.7046722173690796, "learning_rate": 1.7651023044110525e-05, "loss": 2.2361, "step": 7082 }, { "epoch": 0.24, "grad_norm": 0.6939675807952881, "learning_rate": 1.765033859484305e-05, "loss": 2.169, "step": 7083 }, { "epoch": 0.24, "grad_norm": 0.7467039227485657, "learning_rate": 1.7649654059146527e-05, "loss": 2.1523, "step": 7084 }, { "epoch": 0.24, "grad_norm": 0.6875421404838562, "learning_rate": 1.76489694370287e-05, "loss": 2.1816, "step": 7085 }, { "epoch": 0.24, "grad_norm": 0.6921672821044922, "learning_rate": 1.7648284728497298e-05, "loss": 2.1743, "step": 7086 }, { "epoch": 0.24, "grad_norm": 0.7377331256866455, "learning_rate": 1.7647599933560057e-05, "loss": 2.1664, "step": 7087 }, { "epoch": 0.24, "grad_norm": 0.7586964964866638, "learning_rate": 1.764691505222471e-05, "loss": 2.1404, "step": 7088 }, { "epoch": 0.24, "grad_norm": 0.7179208397865295, "learning_rate": 1.7646230084499003e-05, "loss": 2.2189, "step": 7089 }, { "epoch": 0.24, "grad_norm": 0.7474113702774048, "learning_rate": 1.764554503039067e-05, "loss": 2.2019, "step": 7090 }, { "epoch": 0.24, "grad_norm": 0.702309250831604, "learning_rate": 1.7644859889907447e-05, "loss": 2.1623, "step": 7091 }, { "epoch": 0.24, "grad_norm": 0.6929931640625, "learning_rate": 1.7644174663057078e-05, "loss": 2.1661, "step": 7092 }, { "epoch": 0.24, "grad_norm": 0.7058249115943909, "learning_rate": 1.7643489349847306e-05, "loss": 2.17, "step": 7093 }, { "epoch": 0.24, "grad_norm": 0.7427285313606262, "learning_rate": 1.764280395028587e-05, "loss": 2.1432, "step": 7094 }, { "epoch": 0.24, "grad_norm": 0.6774200201034546, "learning_rate": 1.7642118464380512e-05, "loss": 2.138, "step": 7095 }, { "epoch": 0.24, "grad_norm": 0.693178117275238, "learning_rate": 1.7641432892138977e-05, "loss": 2.1933, "step": 7096 }, { "epoch": 0.24, "grad_norm": 0.696986973285675, "learning_rate": 1.7640747233569014e-05, "loss": 2.1764, "step": 7097 }, { "epoch": 0.24, "grad_norm": 0.6967857480049133, "learning_rate": 1.764006148867837e-05, "loss": 2.2353, "step": 7098 }, { "epoch": 0.24, "grad_norm": 0.6892018914222717, "learning_rate": 1.7639375657474784e-05, "loss": 2.1879, "step": 7099 }, { "epoch": 0.24, "grad_norm": 0.7344846725463867, "learning_rate": 1.7638689739966012e-05, "loss": 2.1801, "step": 7100 }, { "epoch": 0.24, "grad_norm": 0.6911873817443848, "learning_rate": 1.7638003736159798e-05, "loss": 2.2166, "step": 7101 }, { "epoch": 0.24, "grad_norm": 0.7187475562095642, "learning_rate": 1.7637317646063895e-05, "loss": 2.1824, "step": 7102 }, { "epoch": 0.24, "grad_norm": 0.7112851738929749, "learning_rate": 1.7636631469686052e-05, "loss": 2.1736, "step": 7103 }, { "epoch": 0.24, "grad_norm": 0.687077522277832, "learning_rate": 1.763594520703402e-05, "loss": 2.1646, "step": 7104 }, { "epoch": 0.24, "grad_norm": 0.6993844509124756, "learning_rate": 1.763525885811556e-05, "loss": 2.1577, "step": 7105 }, { "epoch": 0.24, "grad_norm": 0.6984513998031616, "learning_rate": 1.7634572422938417e-05, "loss": 2.1692, "step": 7106 }, { "epoch": 0.24, "grad_norm": 0.6940476298332214, "learning_rate": 1.763388590151035e-05, "loss": 2.2246, "step": 7107 }, { "epoch": 0.24, "grad_norm": 0.712977945804596, "learning_rate": 1.7633199293839114e-05, "loss": 2.0875, "step": 7108 }, { "epoch": 0.24, "grad_norm": 0.6979058384895325, "learning_rate": 1.7632512599932464e-05, "loss": 2.1307, "step": 7109 }, { "epoch": 0.24, "grad_norm": 0.7082684636116028, "learning_rate": 1.763182581979816e-05, "loss": 2.1423, "step": 7110 }, { "epoch": 0.24, "grad_norm": 0.7205988168716431, "learning_rate": 1.7631138953443964e-05, "loss": 2.1945, "step": 7111 }, { "epoch": 0.24, "grad_norm": 0.7150731086730957, "learning_rate": 1.7630452000877626e-05, "loss": 2.2081, "step": 7112 }, { "epoch": 0.24, "grad_norm": 0.6804044842720032, "learning_rate": 1.762976496210692e-05, "loss": 2.1488, "step": 7113 }, { "epoch": 0.24, "grad_norm": 0.750855565071106, "learning_rate": 1.7629077837139593e-05, "loss": 2.2993, "step": 7114 }, { "epoch": 0.24, "grad_norm": 0.7272837162017822, "learning_rate": 1.762839062598342e-05, "loss": 2.1838, "step": 7115 }, { "epoch": 0.24, "grad_norm": 0.6976091861724854, "learning_rate": 1.7627703328646167e-05, "loss": 2.1181, "step": 7116 }, { "epoch": 0.24, "grad_norm": 0.688254177570343, "learning_rate": 1.7627015945135585e-05, "loss": 2.1459, "step": 7117 }, { "epoch": 0.24, "grad_norm": 0.7192466855049133, "learning_rate": 1.7626328475459445e-05, "loss": 2.2415, "step": 7118 }, { "epoch": 0.24, "grad_norm": 0.6899108290672302, "learning_rate": 1.762564091962552e-05, "loss": 2.1828, "step": 7119 }, { "epoch": 0.24, "grad_norm": 0.7197570204734802, "learning_rate": 1.7624953277641574e-05, "loss": 2.1719, "step": 7120 }, { "epoch": 0.24, "grad_norm": 0.6901236176490784, "learning_rate": 1.762426554951537e-05, "loss": 2.1099, "step": 7121 }, { "epoch": 0.24, "grad_norm": 0.7143628001213074, "learning_rate": 1.7623577735254684e-05, "loss": 2.2154, "step": 7122 }, { "epoch": 0.24, "grad_norm": 0.7191237211227417, "learning_rate": 1.762288983486728e-05, "loss": 2.1818, "step": 7123 }, { "epoch": 0.24, "grad_norm": 0.6905342936515808, "learning_rate": 1.762220184836094e-05, "loss": 2.1773, "step": 7124 }, { "epoch": 0.24, "grad_norm": 0.6980713605880737, "learning_rate": 1.762151377574343e-05, "loss": 2.1639, "step": 7125 }, { "epoch": 0.24, "grad_norm": 0.6923877000808716, "learning_rate": 1.7620825617022523e-05, "loss": 2.1763, "step": 7126 }, { "epoch": 0.24, "grad_norm": 0.7071407437324524, "learning_rate": 1.7620137372205995e-05, "loss": 2.2079, "step": 7127 }, { "epoch": 0.24, "grad_norm": 0.7185677289962769, "learning_rate": 1.7619449041301618e-05, "loss": 2.1967, "step": 7128 }, { "epoch": 0.24, "grad_norm": 0.7291254997253418, "learning_rate": 1.7618760624317172e-05, "loss": 2.2212, "step": 7129 }, { "epoch": 0.24, "grad_norm": 0.7027580738067627, "learning_rate": 1.7618072121260437e-05, "loss": 2.1419, "step": 7130 }, { "epoch": 0.24, "grad_norm": 0.6847134232521057, "learning_rate": 1.7617383532139185e-05, "loss": 2.1927, "step": 7131 }, { "epoch": 0.24, "grad_norm": 0.707868754863739, "learning_rate": 1.7616694856961197e-05, "loss": 2.1189, "step": 7132 }, { "epoch": 0.24, "grad_norm": 0.6700188517570496, "learning_rate": 1.7616006095734257e-05, "loss": 2.1076, "step": 7133 }, { "epoch": 0.24, "grad_norm": 0.6947464346885681, "learning_rate": 1.7615317248466145e-05, "loss": 2.1699, "step": 7134 }, { "epoch": 0.24, "grad_norm": 0.7023497819900513, "learning_rate": 1.7614628315164638e-05, "loss": 2.2165, "step": 7135 }, { "epoch": 0.24, "grad_norm": 0.724716067314148, "learning_rate": 1.7613939295837523e-05, "loss": 2.2748, "step": 7136 }, { "epoch": 0.24, "grad_norm": 0.6894080638885498, "learning_rate": 1.7613250190492586e-05, "loss": 2.1114, "step": 7137 }, { "epoch": 0.24, "grad_norm": 0.7019832134246826, "learning_rate": 1.761256099913761e-05, "loss": 2.1725, "step": 7138 }, { "epoch": 0.24, "grad_norm": 0.7702217698097229, "learning_rate": 1.7611871721780383e-05, "loss": 2.1482, "step": 7139 }, { "epoch": 0.24, "grad_norm": 0.6937247514724731, "learning_rate": 1.7611182358428686e-05, "loss": 2.099, "step": 7140 }, { "epoch": 0.24, "grad_norm": 0.7083534598350525, "learning_rate": 1.7610492909090313e-05, "loss": 2.1761, "step": 7141 }, { "epoch": 0.24, "grad_norm": 0.733631432056427, "learning_rate": 1.760980337377305e-05, "loss": 2.2098, "step": 7142 }, { "epoch": 0.24, "grad_norm": 0.6954620480537415, "learning_rate": 1.760911375248469e-05, "loss": 2.1378, "step": 7143 }, { "epoch": 0.24, "grad_norm": 0.7171097993850708, "learning_rate": 1.7608424045233024e-05, "loss": 2.2137, "step": 7144 }, { "epoch": 0.24, "grad_norm": 0.7045809626579285, "learning_rate": 1.7607734252025838e-05, "loss": 2.1465, "step": 7145 }, { "epoch": 0.24, "grad_norm": 0.6942517757415771, "learning_rate": 1.7607044372870933e-05, "loss": 2.1518, "step": 7146 }, { "epoch": 0.24, "grad_norm": 0.7139645218849182, "learning_rate": 1.7606354407776096e-05, "loss": 2.1122, "step": 7147 }, { "epoch": 0.24, "grad_norm": 0.7286125421524048, "learning_rate": 1.7605664356749127e-05, "loss": 2.1573, "step": 7148 }, { "epoch": 0.24, "grad_norm": 0.7075738906860352, "learning_rate": 1.760497421979782e-05, "loss": 2.1792, "step": 7149 }, { "epoch": 0.24, "grad_norm": 0.6882444620132446, "learning_rate": 1.7604283996929966e-05, "loss": 2.1807, "step": 7150 }, { "epoch": 0.24, "grad_norm": 0.7576085925102234, "learning_rate": 1.7603593688153374e-05, "loss": 2.1999, "step": 7151 }, { "epoch": 0.24, "grad_norm": 0.6881242394447327, "learning_rate": 1.760290329347583e-05, "loss": 2.2123, "step": 7152 }, { "epoch": 0.24, "grad_norm": 0.7222576141357422, "learning_rate": 1.7602212812905145e-05, "loss": 2.2091, "step": 7153 }, { "epoch": 0.24, "grad_norm": 0.7324318289756775, "learning_rate": 1.7601522246449116e-05, "loss": 2.1703, "step": 7154 }, { "epoch": 0.24, "grad_norm": 0.7193495035171509, "learning_rate": 1.760083159411554e-05, "loss": 2.2076, "step": 7155 }, { "epoch": 0.24, "grad_norm": 0.7113542556762695, "learning_rate": 1.7600140855912228e-05, "loss": 2.139, "step": 7156 }, { "epoch": 0.24, "grad_norm": 0.7306573987007141, "learning_rate": 1.7599450031846975e-05, "loss": 2.1168, "step": 7157 }, { "epoch": 0.24, "grad_norm": 0.7024623155593872, "learning_rate": 1.759875912192759e-05, "loss": 2.2138, "step": 7158 }, { "epoch": 0.24, "grad_norm": 0.7339100241661072, "learning_rate": 1.7598068126161877e-05, "loss": 2.1328, "step": 7159 }, { "epoch": 0.24, "grad_norm": 0.7010151147842407, "learning_rate": 1.7597377044557645e-05, "loss": 2.0788, "step": 7160 }, { "epoch": 0.24, "grad_norm": 0.6988605856895447, "learning_rate": 1.7596685877122698e-05, "loss": 2.123, "step": 7161 }, { "epoch": 0.24, "grad_norm": 0.723537266254425, "learning_rate": 1.7595994623864844e-05, "loss": 2.1786, "step": 7162 }, { "epoch": 0.24, "grad_norm": 0.7330615520477295, "learning_rate": 1.7595303284791897e-05, "loss": 2.1655, "step": 7163 }, { "epoch": 0.24, "grad_norm": 0.7243181467056274, "learning_rate": 1.7594611859911666e-05, "loss": 2.1281, "step": 7164 }, { "epoch": 0.24, "grad_norm": 0.7109289765357971, "learning_rate": 1.759392034923196e-05, "loss": 2.1622, "step": 7165 }, { "epoch": 0.24, "grad_norm": 0.7568494081497192, "learning_rate": 1.7593228752760595e-05, "loss": 2.1907, "step": 7166 }, { "epoch": 0.24, "grad_norm": 0.745379626750946, "learning_rate": 1.7592537070505375e-05, "loss": 2.1535, "step": 7167 }, { "epoch": 0.24, "grad_norm": 0.7044739127159119, "learning_rate": 1.7591845302474127e-05, "loss": 2.0973, "step": 7168 }, { "epoch": 0.24, "grad_norm": 0.7193352580070496, "learning_rate": 1.7591153448674657e-05, "loss": 2.183, "step": 7169 }, { "epoch": 0.24, "grad_norm": 0.7042730450630188, "learning_rate": 1.7590461509114784e-05, "loss": 2.1836, "step": 7170 }, { "epoch": 0.24, "grad_norm": 0.7369201183319092, "learning_rate": 1.7589769483802327e-05, "loss": 2.2254, "step": 7171 }, { "epoch": 0.24, "grad_norm": 0.6803762316703796, "learning_rate": 1.7589077372745103e-05, "loss": 2.1385, "step": 7172 }, { "epoch": 0.24, "grad_norm": 0.6886398792266846, "learning_rate": 1.7588385175950927e-05, "loss": 2.1688, "step": 7173 }, { "epoch": 0.24, "grad_norm": 0.7159465551376343, "learning_rate": 1.7587692893427624e-05, "loss": 2.0772, "step": 7174 }, { "epoch": 0.24, "grad_norm": 0.7281029224395752, "learning_rate": 1.7587000525183015e-05, "loss": 2.1459, "step": 7175 }, { "epoch": 0.24, "grad_norm": 0.6926736831665039, "learning_rate": 1.758630807122492e-05, "loss": 2.1492, "step": 7176 }, { "epoch": 0.24, "grad_norm": 0.7265029549598694, "learning_rate": 1.758561553156116e-05, "loss": 2.2149, "step": 7177 }, { "epoch": 0.24, "grad_norm": 0.6997843980789185, "learning_rate": 1.7584922906199566e-05, "loss": 2.1439, "step": 7178 }, { "epoch": 0.24, "grad_norm": 0.7008737921714783, "learning_rate": 1.7584230195147957e-05, "loss": 2.1605, "step": 7179 }, { "epoch": 0.24, "grad_norm": 0.7115992903709412, "learning_rate": 1.7583537398414157e-05, "loss": 2.2262, "step": 7180 }, { "epoch": 0.24, "grad_norm": 0.6865088939666748, "learning_rate": 1.7582844516005998e-05, "loss": 2.1604, "step": 7181 }, { "epoch": 0.24, "grad_norm": 0.7284027338027954, "learning_rate": 1.7582151547931307e-05, "loss": 2.2171, "step": 7182 }, { "epoch": 0.24, "grad_norm": 0.7269845604896545, "learning_rate": 1.758145849419791e-05, "loss": 2.1779, "step": 7183 }, { "epoch": 0.24, "grad_norm": 0.70087069272995, "learning_rate": 1.7580765354813635e-05, "loss": 2.1639, "step": 7184 }, { "epoch": 0.24, "grad_norm": 0.7409170269966125, "learning_rate": 1.758007212978632e-05, "loss": 2.2228, "step": 7185 }, { "epoch": 0.24, "grad_norm": 0.7220888137817383, "learning_rate": 1.757937881912379e-05, "loss": 2.0875, "step": 7186 }, { "epoch": 0.24, "grad_norm": 0.7421817183494568, "learning_rate": 1.757868542283388e-05, "loss": 2.186, "step": 7187 }, { "epoch": 0.24, "grad_norm": 0.7095873951911926, "learning_rate": 1.7577991940924427e-05, "loss": 2.1767, "step": 7188 }, { "epoch": 0.24, "grad_norm": 0.6997811198234558, "learning_rate": 1.757729837340326e-05, "loss": 2.1697, "step": 7189 }, { "epoch": 0.24, "grad_norm": 0.6928690075874329, "learning_rate": 1.7576604720278215e-05, "loss": 2.1204, "step": 7190 }, { "epoch": 0.24, "grad_norm": 0.7197240591049194, "learning_rate": 1.757591098155713e-05, "loss": 2.1668, "step": 7191 }, { "epoch": 0.24, "grad_norm": 0.7520041465759277, "learning_rate": 1.7575217157247845e-05, "loss": 2.1643, "step": 7192 }, { "epoch": 0.24, "grad_norm": 0.7292707562446594, "learning_rate": 1.7574523247358193e-05, "loss": 2.1609, "step": 7193 }, { "epoch": 0.24, "grad_norm": 0.7159697413444519, "learning_rate": 1.757382925189602e-05, "loss": 2.1064, "step": 7194 }, { "epoch": 0.24, "grad_norm": 0.7121797800064087, "learning_rate": 1.7573135170869163e-05, "loss": 2.1888, "step": 7195 }, { "epoch": 0.24, "grad_norm": 0.6986796259880066, "learning_rate": 1.7572441004285462e-05, "loss": 2.1751, "step": 7196 }, { "epoch": 0.24, "grad_norm": 0.7070351243019104, "learning_rate": 1.7571746752152764e-05, "loss": 2.1809, "step": 7197 }, { "epoch": 0.24, "grad_norm": 0.7284947633743286, "learning_rate": 1.75710524144789e-05, "loss": 2.174, "step": 7198 }, { "epoch": 0.24, "grad_norm": 0.7155860662460327, "learning_rate": 1.757035799127173e-05, "loss": 2.2541, "step": 7199 }, { "epoch": 0.24, "grad_norm": 0.7115167379379272, "learning_rate": 1.756966348253909e-05, "loss": 2.155, "step": 7200 }, { "epoch": 0.24, "grad_norm": 0.7302448749542236, "learning_rate": 1.756896888828883e-05, "loss": 2.2762, "step": 7201 }, { "epoch": 0.24, "grad_norm": 0.7236961722373962, "learning_rate": 1.7568274208528793e-05, "loss": 2.1774, "step": 7202 }, { "epoch": 0.24, "grad_norm": 0.7028623223304749, "learning_rate": 1.756757944326683e-05, "loss": 2.175, "step": 7203 }, { "epoch": 0.24, "grad_norm": 0.685914933681488, "learning_rate": 1.7566884592510787e-05, "loss": 2.1462, "step": 7204 }, { "epoch": 0.24, "grad_norm": 0.712369441986084, "learning_rate": 1.7566189656268518e-05, "loss": 2.2266, "step": 7205 }, { "epoch": 0.24, "grad_norm": 0.6990700960159302, "learning_rate": 1.7565494634547875e-05, "loss": 2.1827, "step": 7206 }, { "epoch": 0.24, "grad_norm": 0.7054822444915771, "learning_rate": 1.7564799527356705e-05, "loss": 2.1786, "step": 7207 }, { "epoch": 0.24, "grad_norm": 0.7042509913444519, "learning_rate": 1.7564104334702864e-05, "loss": 2.2569, "step": 7208 }, { "epoch": 0.24, "grad_norm": 0.7008150815963745, "learning_rate": 1.7563409056594208e-05, "loss": 2.2296, "step": 7209 }, { "epoch": 0.24, "grad_norm": 0.7111454010009766, "learning_rate": 1.7562713693038586e-05, "loss": 2.1781, "step": 7210 }, { "epoch": 0.24, "grad_norm": 0.6818954944610596, "learning_rate": 1.7562018244043858e-05, "loss": 2.166, "step": 7211 }, { "epoch": 0.24, "grad_norm": 0.7062378525733948, "learning_rate": 1.756132270961788e-05, "loss": 2.2324, "step": 7212 }, { "epoch": 0.24, "grad_norm": 0.7248069047927856, "learning_rate": 1.7560627089768507e-05, "loss": 2.2316, "step": 7213 }, { "epoch": 0.24, "grad_norm": 0.7233408689498901, "learning_rate": 1.75599313845036e-05, "loss": 2.0961, "step": 7214 }, { "epoch": 0.24, "grad_norm": 0.7021569609642029, "learning_rate": 1.755923559383102e-05, "loss": 2.1695, "step": 7215 }, { "epoch": 0.24, "grad_norm": 0.7020743489265442, "learning_rate": 1.7558539717758628e-05, "loss": 2.1291, "step": 7216 }, { "epoch": 0.24, "grad_norm": 0.7038241028785706, "learning_rate": 1.7557843756294283e-05, "loss": 2.1557, "step": 7217 }, { "epoch": 0.24, "grad_norm": 0.7126986384391785, "learning_rate": 1.755714770944585e-05, "loss": 2.1401, "step": 7218 }, { "epoch": 0.24, "grad_norm": 0.7292580604553223, "learning_rate": 1.7556451577221186e-05, "loss": 2.1915, "step": 7219 }, { "epoch": 0.24, "grad_norm": 0.7099244594573975, "learning_rate": 1.7555755359628162e-05, "loss": 2.1581, "step": 7220 }, { "epoch": 0.24, "grad_norm": 0.7247647643089294, "learning_rate": 1.7555059056674644e-05, "loss": 2.1343, "step": 7221 }, { "epoch": 0.24, "grad_norm": 0.7021731734275818, "learning_rate": 1.75543626683685e-05, "loss": 2.1052, "step": 7222 }, { "epoch": 0.24, "grad_norm": 0.7235526442527771, "learning_rate": 1.755366619471759e-05, "loss": 2.1418, "step": 7223 }, { "epoch": 0.24, "grad_norm": 0.6962249279022217, "learning_rate": 1.7552969635729785e-05, "loss": 2.2227, "step": 7224 }, { "epoch": 0.24, "grad_norm": 0.7144291996955872, "learning_rate": 1.755227299141296e-05, "loss": 2.0845, "step": 7225 }, { "epoch": 0.24, "grad_norm": 0.7120006680488586, "learning_rate": 1.7551576261774978e-05, "loss": 2.1192, "step": 7226 }, { "epoch": 0.24, "grad_norm": 0.7338788509368896, "learning_rate": 1.755087944682371e-05, "loss": 2.1476, "step": 7227 }, { "epoch": 0.24, "grad_norm": 0.7074646353721619, "learning_rate": 1.7550182546567034e-05, "loss": 2.1194, "step": 7228 }, { "epoch": 0.24, "grad_norm": 0.7230750322341919, "learning_rate": 1.7549485561012822e-05, "loss": 2.1866, "step": 7229 }, { "epoch": 0.24, "grad_norm": 0.7354404926300049, "learning_rate": 1.7548788490168945e-05, "loss": 2.0884, "step": 7230 }, { "epoch": 0.24, "grad_norm": 0.719078004360199, "learning_rate": 1.754809133404328e-05, "loss": 2.1524, "step": 7231 }, { "epoch": 0.24, "grad_norm": 0.6931889057159424, "learning_rate": 1.7547394092643704e-05, "loss": 2.1343, "step": 7232 }, { "epoch": 0.24, "grad_norm": 0.7164390683174133, "learning_rate": 1.754669676597809e-05, "loss": 2.1451, "step": 7233 }, { "epoch": 0.24, "grad_norm": 0.714684784412384, "learning_rate": 1.7545999354054322e-05, "loss": 2.184, "step": 7234 }, { "epoch": 0.24, "grad_norm": 0.7204262018203735, "learning_rate": 1.7545301856880273e-05, "loss": 2.1609, "step": 7235 }, { "epoch": 0.24, "grad_norm": 0.6923215389251709, "learning_rate": 1.7544604274463824e-05, "loss": 2.207, "step": 7236 }, { "epoch": 0.24, "grad_norm": 0.6961036920547485, "learning_rate": 1.754390660681286e-05, "loss": 2.1285, "step": 7237 }, { "epoch": 0.24, "grad_norm": 0.725296676158905, "learning_rate": 1.754320885393526e-05, "loss": 2.1073, "step": 7238 }, { "epoch": 0.24, "grad_norm": 0.7500540018081665, "learning_rate": 1.7542511015838907e-05, "loss": 2.1075, "step": 7239 }, { "epoch": 0.24, "grad_norm": 0.7047463059425354, "learning_rate": 1.7541813092531686e-05, "loss": 2.1389, "step": 7240 }, { "epoch": 0.24, "grad_norm": 0.7001760601997375, "learning_rate": 1.7541115084021482e-05, "loss": 2.1657, "step": 7241 }, { "epoch": 0.24, "grad_norm": 0.7066453099250793, "learning_rate": 1.7540416990316176e-05, "loss": 2.2254, "step": 7242 }, { "epoch": 0.24, "grad_norm": 0.7352464199066162, "learning_rate": 1.753971881142366e-05, "loss": 2.1193, "step": 7243 }, { "epoch": 0.24, "grad_norm": 0.6961249709129333, "learning_rate": 1.753902054735182e-05, "loss": 2.1574, "step": 7244 }, { "epoch": 0.24, "grad_norm": 0.7194728255271912, "learning_rate": 1.7538322198108543e-05, "loss": 2.1432, "step": 7245 }, { "epoch": 0.24, "grad_norm": 0.7117722630500793, "learning_rate": 1.7537623763701716e-05, "loss": 2.1208, "step": 7246 }, { "epoch": 0.24, "grad_norm": 0.7025547027587891, "learning_rate": 1.753692524413924e-05, "loss": 2.1954, "step": 7247 }, { "epoch": 0.24, "grad_norm": 0.7396434545516968, "learning_rate": 1.7536226639428995e-05, "loss": 2.2003, "step": 7248 }, { "epoch": 0.24, "grad_norm": 0.6975345611572266, "learning_rate": 1.7535527949578878e-05, "loss": 2.1389, "step": 7249 }, { "epoch": 0.24, "grad_norm": 0.7192094326019287, "learning_rate": 1.7534829174596787e-05, "loss": 2.1633, "step": 7250 }, { "epoch": 0.24, "grad_norm": 0.7041571736335754, "learning_rate": 1.7534130314490605e-05, "loss": 2.1869, "step": 7251 }, { "epoch": 0.24, "grad_norm": 0.6926823258399963, "learning_rate": 1.753343136926824e-05, "loss": 2.2033, "step": 7252 }, { "epoch": 0.24, "grad_norm": 0.744451105594635, "learning_rate": 1.753273233893758e-05, "loss": 2.2377, "step": 7253 }, { "epoch": 0.24, "grad_norm": 0.6958133578300476, "learning_rate": 1.7532033223506527e-05, "loss": 2.1863, "step": 7254 }, { "epoch": 0.24, "grad_norm": 0.7272453904151917, "learning_rate": 1.7531334022982974e-05, "loss": 2.1706, "step": 7255 }, { "epoch": 0.24, "grad_norm": 0.7132975459098816, "learning_rate": 1.7530634737374823e-05, "loss": 2.1284, "step": 7256 }, { "epoch": 0.24, "grad_norm": 0.7043978571891785, "learning_rate": 1.7529935366689974e-05, "loss": 2.1633, "step": 7257 }, { "epoch": 0.24, "grad_norm": 0.6916427612304688, "learning_rate": 1.752923591093633e-05, "loss": 2.1578, "step": 7258 }, { "epoch": 0.24, "grad_norm": 0.7118101716041565, "learning_rate": 1.752853637012179e-05, "loss": 2.1702, "step": 7259 }, { "epoch": 0.24, "grad_norm": 0.7304262518882751, "learning_rate": 1.7527836744254258e-05, "loss": 2.1953, "step": 7260 }, { "epoch": 0.24, "grad_norm": 0.7017860412597656, "learning_rate": 1.7527137033341637e-05, "loss": 2.2032, "step": 7261 }, { "epoch": 0.24, "grad_norm": 0.6941752433776855, "learning_rate": 1.752643723739184e-05, "loss": 2.1451, "step": 7262 }, { "epoch": 0.24, "grad_norm": 0.7321357131004333, "learning_rate": 1.752573735641276e-05, "loss": 2.1279, "step": 7263 }, { "epoch": 0.24, "grad_norm": 0.69637531042099, "learning_rate": 1.752503739041231e-05, "loss": 2.0956, "step": 7264 }, { "epoch": 0.24, "grad_norm": 0.7315152883529663, "learning_rate": 1.75243373393984e-05, "loss": 2.1598, "step": 7265 }, { "epoch": 0.24, "grad_norm": 0.6988497376441956, "learning_rate": 1.7523637203378934e-05, "loss": 2.1443, "step": 7266 }, { "epoch": 0.24, "grad_norm": 0.7008230686187744, "learning_rate": 1.7522936982361825e-05, "loss": 2.186, "step": 7267 }, { "epoch": 0.24, "grad_norm": 0.7115781903266907, "learning_rate": 1.7522236676354983e-05, "loss": 2.1296, "step": 7268 }, { "epoch": 0.24, "grad_norm": 0.7292385697364807, "learning_rate": 1.7521536285366318e-05, "loss": 2.2256, "step": 7269 }, { "epoch": 0.24, "grad_norm": 0.7356202006340027, "learning_rate": 1.7520835809403745e-05, "loss": 2.1326, "step": 7270 }, { "epoch": 0.24, "grad_norm": 0.7628737092018127, "learning_rate": 1.7520135248475178e-05, "loss": 2.1813, "step": 7271 }, { "epoch": 0.24, "grad_norm": 0.7206861972808838, "learning_rate": 1.751943460258853e-05, "loss": 2.1567, "step": 7272 }, { "epoch": 0.24, "grad_norm": 0.7141769528388977, "learning_rate": 1.7518733871751716e-05, "loss": 2.1568, "step": 7273 }, { "epoch": 0.24, "grad_norm": 0.7164717316627502, "learning_rate": 1.7518033055972653e-05, "loss": 2.1305, "step": 7274 }, { "epoch": 0.24, "grad_norm": 0.7194958925247192, "learning_rate": 1.751733215525926e-05, "loss": 2.1489, "step": 7275 }, { "epoch": 0.24, "grad_norm": 0.66965651512146, "learning_rate": 1.7516631169619455e-05, "loss": 2.2196, "step": 7276 }, { "epoch": 0.24, "grad_norm": 0.7030080556869507, "learning_rate": 1.7515930099061152e-05, "loss": 2.141, "step": 7277 }, { "epoch": 0.24, "grad_norm": 0.7465642094612122, "learning_rate": 1.7515228943592275e-05, "loss": 2.1471, "step": 7278 }, { "epoch": 0.24, "grad_norm": 0.7094667553901672, "learning_rate": 1.751452770322075e-05, "loss": 2.2516, "step": 7279 }, { "epoch": 0.24, "grad_norm": 0.7043116688728333, "learning_rate": 1.7513826377954493e-05, "loss": 2.1317, "step": 7280 }, { "epoch": 0.24, "grad_norm": 0.7117936015129089, "learning_rate": 1.751312496780143e-05, "loss": 2.1652, "step": 7281 }, { "epoch": 0.24, "grad_norm": 0.7124249339103699, "learning_rate": 1.7512423472769483e-05, "loss": 2.1329, "step": 7282 }, { "epoch": 0.24, "grad_norm": 0.7335474491119385, "learning_rate": 1.751172189286658e-05, "loss": 2.2121, "step": 7283 }, { "epoch": 0.24, "grad_norm": 0.7250624895095825, "learning_rate": 1.751102022810064e-05, "loss": 2.2243, "step": 7284 }, { "epoch": 0.24, "grad_norm": 0.6745933890342712, "learning_rate": 1.75103184784796e-05, "loss": 2.1639, "step": 7285 }, { "epoch": 0.24, "grad_norm": 0.711452305316925, "learning_rate": 1.7509616644011384e-05, "loss": 2.1893, "step": 7286 }, { "epoch": 0.24, "grad_norm": 0.6868210434913635, "learning_rate": 1.750891472470392e-05, "loss": 2.1655, "step": 7287 }, { "epoch": 0.24, "grad_norm": 0.7069107890129089, "learning_rate": 1.7508212720565137e-05, "loss": 2.1877, "step": 7288 }, { "epoch": 0.24, "grad_norm": 0.6904305815696716, "learning_rate": 1.7507510631602967e-05, "loss": 2.2068, "step": 7289 }, { "epoch": 0.24, "grad_norm": 0.732614278793335, "learning_rate": 1.750680845782534e-05, "loss": 2.1553, "step": 7290 }, { "epoch": 0.24, "grad_norm": 0.6902382969856262, "learning_rate": 1.7506106199240192e-05, "loss": 2.1724, "step": 7291 }, { "epoch": 0.24, "grad_norm": 0.7217767238616943, "learning_rate": 1.7505403855855458e-05, "loss": 2.2679, "step": 7292 }, { "epoch": 0.24, "grad_norm": 0.7065476775169373, "learning_rate": 1.7504701427679066e-05, "loss": 2.1749, "step": 7293 }, { "epoch": 0.24, "grad_norm": 0.7379423379898071, "learning_rate": 1.750399891471896e-05, "loss": 2.2309, "step": 7294 }, { "epoch": 0.24, "grad_norm": 0.6837379336357117, "learning_rate": 1.7503296316983064e-05, "loss": 2.1935, "step": 7295 }, { "epoch": 0.24, "grad_norm": 0.6765915751457214, "learning_rate": 1.750259363447933e-05, "loss": 2.1636, "step": 7296 }, { "epoch": 0.24, "grad_norm": 0.7495899796485901, "learning_rate": 1.750189086721569e-05, "loss": 2.1608, "step": 7297 }, { "epoch": 0.24, "grad_norm": 0.6841633915901184, "learning_rate": 1.7501188015200082e-05, "loss": 2.1792, "step": 7298 }, { "epoch": 0.24, "grad_norm": 0.6756229996681213, "learning_rate": 1.7500485078440447e-05, "loss": 2.114, "step": 7299 }, { "epoch": 0.24, "grad_norm": 0.6933177709579468, "learning_rate": 1.7499782056944726e-05, "loss": 2.1558, "step": 7300 }, { "epoch": 0.24, "grad_norm": 0.7170290350914001, "learning_rate": 1.7499078950720866e-05, "loss": 2.1121, "step": 7301 }, { "epoch": 0.24, "grad_norm": 0.7026599645614624, "learning_rate": 1.7498375759776807e-05, "loss": 2.1191, "step": 7302 }, { "epoch": 0.24, "grad_norm": 0.6902745962142944, "learning_rate": 1.7497672484120492e-05, "loss": 2.1091, "step": 7303 }, { "epoch": 0.24, "grad_norm": 0.6959500312805176, "learning_rate": 1.7496969123759866e-05, "loss": 2.1364, "step": 7304 }, { "epoch": 0.24, "grad_norm": 0.7102234363555908, "learning_rate": 1.749626567870288e-05, "loss": 2.1135, "step": 7305 }, { "epoch": 0.24, "grad_norm": 0.757964551448822, "learning_rate": 1.749556214895747e-05, "loss": 2.1173, "step": 7306 }, { "epoch": 0.24, "grad_norm": 0.7235650420188904, "learning_rate": 1.74948585345316e-05, "loss": 2.1222, "step": 7307 }, { "epoch": 0.24, "grad_norm": 0.6940234899520874, "learning_rate": 1.7494154835433207e-05, "loss": 2.1321, "step": 7308 }, { "epoch": 0.24, "grad_norm": 0.6974302530288696, "learning_rate": 1.7493451051670244e-05, "loss": 2.1214, "step": 7309 }, { "epoch": 0.24, "grad_norm": 0.7239911556243896, "learning_rate": 1.7492747183250663e-05, "loss": 2.0629, "step": 7310 }, { "epoch": 0.24, "grad_norm": 0.7024223804473877, "learning_rate": 1.749204323018242e-05, "loss": 2.185, "step": 7311 }, { "epoch": 0.24, "grad_norm": 0.7538290619850159, "learning_rate": 1.749133919247346e-05, "loss": 2.1747, "step": 7312 }, { "epoch": 0.24, "grad_norm": 0.7445806264877319, "learning_rate": 1.749063507013174e-05, "loss": 2.1127, "step": 7313 }, { "epoch": 0.24, "grad_norm": 0.688978374004364, "learning_rate": 1.7489930863165214e-05, "loss": 1.9789, "step": 7314 }, { "epoch": 0.24, "grad_norm": 0.7351188659667969, "learning_rate": 1.7489226571581837e-05, "loss": 2.1352, "step": 7315 }, { "epoch": 0.24, "grad_norm": 0.6995475888252258, "learning_rate": 1.7488522195389573e-05, "loss": 2.145, "step": 7316 }, { "epoch": 0.24, "grad_norm": 0.7234391570091248, "learning_rate": 1.748781773459637e-05, "loss": 2.1589, "step": 7317 }, { "epoch": 0.24, "grad_norm": 0.7088303565979004, "learning_rate": 1.7487113189210192e-05, "loss": 2.1685, "step": 7318 }, { "epoch": 0.24, "grad_norm": 0.7406907081604004, "learning_rate": 1.7486408559238993e-05, "loss": 2.1189, "step": 7319 }, { "epoch": 0.24, "grad_norm": 0.7396381497383118, "learning_rate": 1.7485703844690744e-05, "loss": 2.1539, "step": 7320 }, { "epoch": 0.24, "grad_norm": 0.732856810092926, "learning_rate": 1.7484999045573395e-05, "loss": 2.2015, "step": 7321 }, { "epoch": 0.24, "grad_norm": 0.7239986062049866, "learning_rate": 1.7484294161894916e-05, "loss": 2.1977, "step": 7322 }, { "epoch": 0.24, "grad_norm": 0.6993224620819092, "learning_rate": 1.7483589193663268e-05, "loss": 2.1003, "step": 7323 }, { "epoch": 0.24, "grad_norm": 0.6834139227867126, "learning_rate": 1.7482884140886412e-05, "loss": 2.1539, "step": 7324 }, { "epoch": 0.24, "grad_norm": 0.7545121908187866, "learning_rate": 1.748217900357232e-05, "loss": 2.1639, "step": 7325 }, { "epoch": 0.24, "grad_norm": 0.6862412691116333, "learning_rate": 1.748147378172895e-05, "loss": 2.1949, "step": 7326 }, { "epoch": 0.24, "grad_norm": 0.6971762180328369, "learning_rate": 1.748076847536428e-05, "loss": 2.162, "step": 7327 }, { "epoch": 0.24, "grad_norm": 0.7125928401947021, "learning_rate": 1.748006308448627e-05, "loss": 2.1504, "step": 7328 }, { "epoch": 0.24, "grad_norm": 0.7318286299705505, "learning_rate": 1.7479357609102893e-05, "loss": 2.2281, "step": 7329 }, { "epoch": 0.24, "grad_norm": 0.7059075832366943, "learning_rate": 1.747865204922211e-05, "loss": 2.088, "step": 7330 }, { "epoch": 0.24, "grad_norm": 0.6982176303863525, "learning_rate": 1.7477946404851903e-05, "loss": 2.1215, "step": 7331 }, { "epoch": 0.24, "grad_norm": 0.7462006211280823, "learning_rate": 1.7477240676000243e-05, "loss": 2.1416, "step": 7332 }, { "epoch": 0.24, "grad_norm": 0.7491143345832825, "learning_rate": 1.7476534862675095e-05, "loss": 2.1555, "step": 7333 }, { "epoch": 0.24, "grad_norm": 0.6999737620353699, "learning_rate": 1.747582896488444e-05, "loss": 2.1447, "step": 7334 }, { "epoch": 0.24, "grad_norm": 0.7268891334533691, "learning_rate": 1.747512298263625e-05, "loss": 2.165, "step": 7335 }, { "epoch": 0.24, "grad_norm": 0.7181307077407837, "learning_rate": 1.7474416915938502e-05, "loss": 2.1499, "step": 7336 }, { "epoch": 0.24, "grad_norm": 0.6906822919845581, "learning_rate": 1.7473710764799173e-05, "loss": 2.1951, "step": 7337 }, { "epoch": 0.24, "grad_norm": 0.7302138209342957, "learning_rate": 1.7473004529226237e-05, "loss": 2.1269, "step": 7338 }, { "epoch": 0.24, "grad_norm": 0.7196494340896606, "learning_rate": 1.747229820922768e-05, "loss": 2.1088, "step": 7339 }, { "epoch": 0.24, "grad_norm": 0.7300520539283752, "learning_rate": 1.747159180481147e-05, "loss": 2.1777, "step": 7340 }, { "epoch": 0.24, "grad_norm": 0.7005968689918518, "learning_rate": 1.74708853159856e-05, "loss": 2.1143, "step": 7341 }, { "epoch": 0.24, "grad_norm": 0.8301087617874146, "learning_rate": 1.7470178742758046e-05, "loss": 2.1046, "step": 7342 }, { "epoch": 0.24, "grad_norm": 0.7460008263587952, "learning_rate": 1.746947208513679e-05, "loss": 2.1479, "step": 7343 }, { "epoch": 0.24, "grad_norm": 0.7298780679702759, "learning_rate": 1.7468765343129813e-05, "loss": 2.2079, "step": 7344 }, { "epoch": 0.24, "grad_norm": 0.7237032055854797, "learning_rate": 1.746805851674511e-05, "loss": 2.2102, "step": 7345 }, { "epoch": 0.24, "grad_norm": 0.6874216198921204, "learning_rate": 1.746735160599065e-05, "loss": 2.1385, "step": 7346 }, { "epoch": 0.24, "grad_norm": 0.7372941970825195, "learning_rate": 1.746664461087443e-05, "loss": 2.1788, "step": 7347 }, { "epoch": 0.24, "grad_norm": 0.6971107721328735, "learning_rate": 1.746593753140444e-05, "loss": 2.1717, "step": 7348 }, { "epoch": 0.24, "grad_norm": 0.6987071633338928, "learning_rate": 1.7465230367588656e-05, "loss": 2.2039, "step": 7349 }, { "epoch": 0.24, "grad_norm": 0.7064196467399597, "learning_rate": 1.7464523119435076e-05, "loss": 2.1982, "step": 7350 }, { "epoch": 0.24, "grad_norm": 0.7170935273170471, "learning_rate": 1.7463815786951692e-05, "loss": 2.1776, "step": 7351 }, { "epoch": 0.24, "grad_norm": 0.6694630980491638, "learning_rate": 1.746310837014649e-05, "loss": 2.1344, "step": 7352 }, { "epoch": 0.24, "grad_norm": 0.6851156949996948, "learning_rate": 1.7462400869027463e-05, "loss": 2.1789, "step": 7353 }, { "epoch": 0.24, "grad_norm": 0.7190427184104919, "learning_rate": 1.7461693283602606e-05, "loss": 2.1707, "step": 7354 }, { "epoch": 0.24, "grad_norm": 0.7038695216178894, "learning_rate": 1.746098561387991e-05, "loss": 2.2036, "step": 7355 }, { "epoch": 0.24, "grad_norm": 0.7284106612205505, "learning_rate": 1.746027785986737e-05, "loss": 2.2001, "step": 7356 }, { "epoch": 0.24, "grad_norm": 0.722726583480835, "learning_rate": 1.7459570021572983e-05, "loss": 2.1712, "step": 7357 }, { "epoch": 0.24, "grad_norm": 0.7008733153343201, "learning_rate": 1.7458862099004744e-05, "loss": 2.155, "step": 7358 }, { "epoch": 0.24, "grad_norm": 0.7053101062774658, "learning_rate": 1.7458154092170655e-05, "loss": 2.1325, "step": 7359 }, { "epoch": 0.24, "grad_norm": 0.7075342535972595, "learning_rate": 1.7457446001078712e-05, "loss": 2.1702, "step": 7360 }, { "epoch": 0.24, "grad_norm": 0.730897068977356, "learning_rate": 1.7456737825736916e-05, "loss": 2.137, "step": 7361 }, { "epoch": 0.24, "grad_norm": 0.7369991540908813, "learning_rate": 1.7456029566153264e-05, "loss": 2.092, "step": 7362 }, { "epoch": 0.24, "grad_norm": 0.6947185397148132, "learning_rate": 1.745532122233576e-05, "loss": 2.1808, "step": 7363 }, { "epoch": 0.25, "grad_norm": 0.725644588470459, "learning_rate": 1.7454612794292404e-05, "loss": 2.1444, "step": 7364 }, { "epoch": 0.25, "grad_norm": 0.7057211399078369, "learning_rate": 1.7453904282031207e-05, "loss": 2.2077, "step": 7365 }, { "epoch": 0.25, "grad_norm": 0.7329549789428711, "learning_rate": 1.7453195685560164e-05, "loss": 2.2167, "step": 7366 }, { "epoch": 0.25, "grad_norm": 0.6952570676803589, "learning_rate": 1.7452487004887282e-05, "loss": 2.1227, "step": 7367 }, { "epoch": 0.25, "grad_norm": 0.6819628477096558, "learning_rate": 1.7451778240020573e-05, "loss": 2.1415, "step": 7368 }, { "epoch": 0.25, "grad_norm": 0.7332285046577454, "learning_rate": 1.745106939096804e-05, "loss": 2.1867, "step": 7369 }, { "epoch": 0.25, "grad_norm": 0.7193008661270142, "learning_rate": 1.745036045773769e-05, "loss": 2.2043, "step": 7370 }, { "epoch": 0.25, "grad_norm": 0.6841698884963989, "learning_rate": 1.7449651440337538e-05, "loss": 2.1465, "step": 7371 }, { "epoch": 0.25, "grad_norm": 0.7153070569038391, "learning_rate": 1.7448942338775584e-05, "loss": 2.127, "step": 7372 }, { "epoch": 0.25, "grad_norm": 0.6940542459487915, "learning_rate": 1.7448233153059847e-05, "loss": 2.1652, "step": 7373 }, { "epoch": 0.25, "grad_norm": 0.702078640460968, "learning_rate": 1.7447523883198342e-05, "loss": 2.1799, "step": 7374 }, { "epoch": 0.25, "grad_norm": 0.7142959833145142, "learning_rate": 1.7446814529199072e-05, "loss": 2.0938, "step": 7375 }, { "epoch": 0.25, "grad_norm": 0.6977732181549072, "learning_rate": 1.7446105091070058e-05, "loss": 2.1918, "step": 7376 }, { "epoch": 0.25, "grad_norm": 0.716506838798523, "learning_rate": 1.744539556881931e-05, "loss": 2.2126, "step": 7377 }, { "epoch": 0.25, "grad_norm": 0.722317099571228, "learning_rate": 1.7444685962454845e-05, "loss": 2.1754, "step": 7378 }, { "epoch": 0.25, "grad_norm": 0.7083778977394104, "learning_rate": 1.7443976271984687e-05, "loss": 2.1985, "step": 7379 }, { "epoch": 0.25, "grad_norm": 0.7139623761177063, "learning_rate": 1.7443266497416842e-05, "loss": 2.2137, "step": 7380 }, { "epoch": 0.25, "grad_norm": 0.7358871698379517, "learning_rate": 1.7442556638759337e-05, "loss": 2.2826, "step": 7381 }, { "epoch": 0.25, "grad_norm": 0.6936337351799011, "learning_rate": 1.7441846696020185e-05, "loss": 2.142, "step": 7382 }, { "epoch": 0.25, "grad_norm": 0.7159985899925232, "learning_rate": 1.7441136669207416e-05, "loss": 2.1861, "step": 7383 }, { "epoch": 0.25, "grad_norm": 0.6704110503196716, "learning_rate": 1.7440426558329046e-05, "loss": 2.0958, "step": 7384 }, { "epoch": 0.25, "grad_norm": 0.7053582072257996, "learning_rate": 1.743971636339309e-05, "loss": 2.1454, "step": 7385 }, { "epoch": 0.25, "grad_norm": 0.6758845448493958, "learning_rate": 1.7439006084407585e-05, "loss": 2.1722, "step": 7386 }, { "epoch": 0.25, "grad_norm": 0.7335237264633179, "learning_rate": 1.7438295721380548e-05, "loss": 2.2505, "step": 7387 }, { "epoch": 0.25, "grad_norm": 0.692959725856781, "learning_rate": 1.7437585274320005e-05, "loss": 2.1461, "step": 7388 }, { "epoch": 0.25, "grad_norm": 0.6938751935958862, "learning_rate": 1.7436874743233984e-05, "loss": 2.1179, "step": 7389 }, { "epoch": 0.25, "grad_norm": 0.7352149486541748, "learning_rate": 1.7436164128130507e-05, "loss": 2.1928, "step": 7390 }, { "epoch": 0.25, "grad_norm": 0.6796690225601196, "learning_rate": 1.7435453429017604e-05, "loss": 2.1386, "step": 7391 }, { "epoch": 0.25, "grad_norm": 0.690133810043335, "learning_rate": 1.743474264590331e-05, "loss": 2.1405, "step": 7392 }, { "epoch": 0.25, "grad_norm": 0.7542551159858704, "learning_rate": 1.7434031778795652e-05, "loss": 2.2049, "step": 7393 }, { "epoch": 0.25, "grad_norm": 0.6859422326087952, "learning_rate": 1.743332082770266e-05, "loss": 2.1652, "step": 7394 }, { "epoch": 0.25, "grad_norm": 0.69484943151474, "learning_rate": 1.743260979263236e-05, "loss": 2.2245, "step": 7395 }, { "epoch": 0.25, "grad_norm": 0.7636570334434509, "learning_rate": 1.7431898673592793e-05, "loss": 2.1831, "step": 7396 }, { "epoch": 0.25, "grad_norm": 0.675305962562561, "learning_rate": 1.743118747059199e-05, "loss": 2.0869, "step": 7397 }, { "epoch": 0.25, "grad_norm": 0.6886658072471619, "learning_rate": 1.743047618363799e-05, "loss": 2.1854, "step": 7398 }, { "epoch": 0.25, "grad_norm": 0.708949625492096, "learning_rate": 1.742976481273882e-05, "loss": 2.1772, "step": 7399 }, { "epoch": 0.25, "grad_norm": 0.7116689085960388, "learning_rate": 1.7429053357902527e-05, "loss": 2.2006, "step": 7400 }, { "epoch": 0.25, "grad_norm": 0.6921509504318237, "learning_rate": 1.7428341819137136e-05, "loss": 2.1045, "step": 7401 }, { "epoch": 0.25, "grad_norm": 0.6995593309402466, "learning_rate": 1.74276301964507e-05, "loss": 2.203, "step": 7402 }, { "epoch": 0.25, "grad_norm": 0.7579448819160461, "learning_rate": 1.7426918489851242e-05, "loss": 2.1404, "step": 7403 }, { "epoch": 0.25, "grad_norm": 0.7423741817474365, "learning_rate": 1.7426206699346816e-05, "loss": 2.1423, "step": 7404 }, { "epoch": 0.25, "grad_norm": 0.6746935844421387, "learning_rate": 1.742549482494546e-05, "loss": 2.1538, "step": 7405 }, { "epoch": 0.25, "grad_norm": 0.7288904786109924, "learning_rate": 1.7424782866655215e-05, "loss": 2.1552, "step": 7406 }, { "epoch": 0.25, "grad_norm": 0.7345695495605469, "learning_rate": 1.7424070824484123e-05, "loss": 2.2146, "step": 7407 }, { "epoch": 0.25, "grad_norm": 0.7102009057998657, "learning_rate": 1.7423358698440232e-05, "loss": 2.1274, "step": 7408 }, { "epoch": 0.25, "grad_norm": 0.7081682682037354, "learning_rate": 1.7422646488531586e-05, "loss": 2.2366, "step": 7409 }, { "epoch": 0.25, "grad_norm": 0.7027879953384399, "learning_rate": 1.7421934194766227e-05, "loss": 2.2137, "step": 7410 }, { "epoch": 0.25, "grad_norm": 0.7048414945602417, "learning_rate": 1.7421221817152206e-05, "loss": 2.2222, "step": 7411 }, { "epoch": 0.25, "grad_norm": 0.7034715414047241, "learning_rate": 1.742050935569757e-05, "loss": 2.2014, "step": 7412 }, { "epoch": 0.25, "grad_norm": 0.7006920576095581, "learning_rate": 1.7419796810410368e-05, "loss": 2.1246, "step": 7413 }, { "epoch": 0.25, "grad_norm": 0.7030184864997864, "learning_rate": 1.7419084181298648e-05, "loss": 2.1018, "step": 7414 }, { "epoch": 0.25, "grad_norm": 0.7494039535522461, "learning_rate": 1.7418371468370466e-05, "loss": 2.1859, "step": 7415 }, { "epoch": 0.25, "grad_norm": 0.6777616143226624, "learning_rate": 1.741765867163387e-05, "loss": 2.1583, "step": 7416 }, { "epoch": 0.25, "grad_norm": 0.7057734131813049, "learning_rate": 1.7416945791096913e-05, "loss": 2.1813, "step": 7417 }, { "epoch": 0.25, "grad_norm": 0.6973326802253723, "learning_rate": 1.741623282676765e-05, "loss": 2.2326, "step": 7418 }, { "epoch": 0.25, "grad_norm": 0.6813613772392273, "learning_rate": 1.7415519778654134e-05, "loss": 2.1236, "step": 7419 }, { "epoch": 0.25, "grad_norm": 0.7161060571670532, "learning_rate": 1.7414806646764422e-05, "loss": 2.1569, "step": 7420 }, { "epoch": 0.25, "grad_norm": 0.6896141767501831, "learning_rate": 1.741409343110657e-05, "loss": 2.155, "step": 7421 }, { "epoch": 0.25, "grad_norm": 0.7087156176567078, "learning_rate": 1.7413380131688636e-05, "loss": 2.2002, "step": 7422 }, { "epoch": 0.25, "grad_norm": 0.6988806128501892, "learning_rate": 1.741266674851868e-05, "loss": 2.1226, "step": 7423 }, { "epoch": 0.25, "grad_norm": 0.7560961842536926, "learning_rate": 1.7411953281604753e-05, "loss": 2.2387, "step": 7424 }, { "epoch": 0.25, "grad_norm": 0.7142537236213684, "learning_rate": 1.741123973095493e-05, "loss": 2.1575, "step": 7425 }, { "epoch": 0.25, "grad_norm": 0.6950585246086121, "learning_rate": 1.7410526096577257e-05, "loss": 2.1462, "step": 7426 }, { "epoch": 0.25, "grad_norm": 0.7015814185142517, "learning_rate": 1.7409812378479803e-05, "loss": 2.1112, "step": 7427 }, { "epoch": 0.25, "grad_norm": 0.7047384977340698, "learning_rate": 1.7409098576670636e-05, "loss": 2.234, "step": 7428 }, { "epoch": 0.25, "grad_norm": 0.7405452132225037, "learning_rate": 1.7408384691157815e-05, "loss": 2.1384, "step": 7429 }, { "epoch": 0.25, "grad_norm": 0.7188785076141357, "learning_rate": 1.7407670721949404e-05, "loss": 2.156, "step": 7430 }, { "epoch": 0.25, "grad_norm": 0.7146674394607544, "learning_rate": 1.7406956669053467e-05, "loss": 2.1454, "step": 7431 }, { "epoch": 0.25, "grad_norm": 0.7263641357421875, "learning_rate": 1.7406242532478078e-05, "loss": 2.2112, "step": 7432 }, { "epoch": 0.25, "grad_norm": 0.7088471055030823, "learning_rate": 1.74055283122313e-05, "loss": 2.1552, "step": 7433 }, { "epoch": 0.25, "grad_norm": 0.7016251087188721, "learning_rate": 1.7404814008321206e-05, "loss": 2.1228, "step": 7434 }, { "epoch": 0.25, "grad_norm": 0.715968906879425, "learning_rate": 1.740409962075586e-05, "loss": 2.169, "step": 7435 }, { "epoch": 0.25, "grad_norm": 0.7019792199134827, "learning_rate": 1.740338514954334e-05, "loss": 2.0986, "step": 7436 }, { "epoch": 0.25, "grad_norm": 0.6957949995994568, "learning_rate": 1.740267059469171e-05, "loss": 2.2018, "step": 7437 }, { "epoch": 0.25, "grad_norm": 0.704764187335968, "learning_rate": 1.7401955956209047e-05, "loss": 2.1519, "step": 7438 }, { "epoch": 0.25, "grad_norm": 0.7123653888702393, "learning_rate": 1.7401241234103424e-05, "loss": 2.1578, "step": 7439 }, { "epoch": 0.25, "grad_norm": 0.6978668570518494, "learning_rate": 1.740052642838291e-05, "loss": 2.1586, "step": 7440 }, { "epoch": 0.25, "grad_norm": 0.7097553610801697, "learning_rate": 1.7399811539055592e-05, "loss": 2.1147, "step": 7441 }, { "epoch": 0.25, "grad_norm": 0.7407875061035156, "learning_rate": 1.7399096566129537e-05, "loss": 2.185, "step": 7442 }, { "epoch": 0.25, "grad_norm": 0.7441389560699463, "learning_rate": 1.7398381509612827e-05, "loss": 2.0968, "step": 7443 }, { "epoch": 0.25, "grad_norm": 0.7218737602233887, "learning_rate": 1.7397666369513534e-05, "loss": 2.1156, "step": 7444 }, { "epoch": 0.25, "grad_norm": 0.6910512447357178, "learning_rate": 1.7396951145839747e-05, "loss": 2.1398, "step": 7445 }, { "epoch": 0.25, "grad_norm": 0.6886022686958313, "learning_rate": 1.739623583859954e-05, "loss": 2.1143, "step": 7446 }, { "epoch": 0.25, "grad_norm": 0.6996616721153259, "learning_rate": 1.7395520447800994e-05, "loss": 2.173, "step": 7447 }, { "epoch": 0.25, "grad_norm": 0.7167861461639404, "learning_rate": 1.7394804973452194e-05, "loss": 2.192, "step": 7448 }, { "epoch": 0.25, "grad_norm": 0.7033651471138, "learning_rate": 1.7394089415561218e-05, "loss": 2.164, "step": 7449 }, { "epoch": 0.25, "grad_norm": 0.7103638648986816, "learning_rate": 1.7393373774136155e-05, "loss": 2.1629, "step": 7450 }, { "epoch": 0.25, "grad_norm": 0.7029015421867371, "learning_rate": 1.7392658049185088e-05, "loss": 2.1443, "step": 7451 }, { "epoch": 0.25, "grad_norm": 0.7431227564811707, "learning_rate": 1.73919422407161e-05, "loss": 2.2072, "step": 7452 }, { "epoch": 0.25, "grad_norm": 0.748414933681488, "learning_rate": 1.7391226348737286e-05, "loss": 2.1445, "step": 7453 }, { "epoch": 0.25, "grad_norm": 0.6983426809310913, "learning_rate": 1.739051037325673e-05, "loss": 2.1745, "step": 7454 }, { "epoch": 0.25, "grad_norm": 0.6963228583335876, "learning_rate": 1.738979431428251e-05, "loss": 2.1153, "step": 7455 }, { "epoch": 0.25, "grad_norm": 0.6816458702087402, "learning_rate": 1.738907817182273e-05, "loss": 2.1184, "step": 7456 }, { "epoch": 0.25, "grad_norm": 0.7504379749298096, "learning_rate": 1.7388361945885478e-05, "loss": 2.1968, "step": 7457 }, { "epoch": 0.25, "grad_norm": 0.7345001697540283, "learning_rate": 1.7387645636478838e-05, "loss": 2.1634, "step": 7458 }, { "epoch": 0.25, "grad_norm": 0.7751197814941406, "learning_rate": 1.7386929243610908e-05, "loss": 2.2579, "step": 7459 }, { "epoch": 0.25, "grad_norm": 0.7273992300033569, "learning_rate": 1.7386212767289782e-05, "loss": 2.169, "step": 7460 }, { "epoch": 0.25, "grad_norm": 0.6980360746383667, "learning_rate": 1.7385496207523554e-05, "loss": 2.1606, "step": 7461 }, { "epoch": 0.25, "grad_norm": 0.7287806868553162, "learning_rate": 1.7384779564320315e-05, "loss": 2.1308, "step": 7462 }, { "epoch": 0.25, "grad_norm": 0.7536101341247559, "learning_rate": 1.7384062837688166e-05, "loss": 2.1401, "step": 7463 }, { "epoch": 0.25, "grad_norm": 0.7021917104721069, "learning_rate": 1.73833460276352e-05, "loss": 2.1248, "step": 7464 }, { "epoch": 0.25, "grad_norm": 0.7641261219978333, "learning_rate": 1.7382629134169522e-05, "loss": 2.141, "step": 7465 }, { "epoch": 0.25, "grad_norm": 0.769978940486908, "learning_rate": 1.7381912157299222e-05, "loss": 2.0738, "step": 7466 }, { "epoch": 0.25, "grad_norm": 0.6940213441848755, "learning_rate": 1.738119509703241e-05, "loss": 2.1623, "step": 7467 }, { "epoch": 0.25, "grad_norm": 0.7132591009140015, "learning_rate": 1.738047795337718e-05, "loss": 2.1606, "step": 7468 }, { "epoch": 0.25, "grad_norm": 0.6867596507072449, "learning_rate": 1.737976072634163e-05, "loss": 2.1361, "step": 7469 }, { "epoch": 0.25, "grad_norm": 0.7038741111755371, "learning_rate": 1.7379043415933874e-05, "loss": 2.2105, "step": 7470 }, { "epoch": 0.25, "grad_norm": 0.7070055603981018, "learning_rate": 1.737832602216201e-05, "loss": 2.1602, "step": 7471 }, { "epoch": 0.25, "grad_norm": 0.7588679790496826, "learning_rate": 1.737760854503414e-05, "loss": 2.2208, "step": 7472 }, { "epoch": 0.25, "grad_norm": 0.733066201210022, "learning_rate": 1.7376890984558374e-05, "loss": 2.1702, "step": 7473 }, { "epoch": 0.25, "grad_norm": 0.7015243768692017, "learning_rate": 1.737617334074282e-05, "loss": 2.1432, "step": 7474 }, { "epoch": 0.25, "grad_norm": 0.7411099076271057, "learning_rate": 1.7375455613595577e-05, "loss": 2.1639, "step": 7475 }, { "epoch": 0.25, "grad_norm": 0.6995283961296082, "learning_rate": 1.737473780312476e-05, "loss": 2.1924, "step": 7476 }, { "epoch": 0.25, "grad_norm": 0.7202366590499878, "learning_rate": 1.737401990933848e-05, "loss": 2.2227, "step": 7477 }, { "epoch": 0.25, "grad_norm": 0.7031075358390808, "learning_rate": 1.7373301932244842e-05, "loss": 2.1387, "step": 7478 }, { "epoch": 0.25, "grad_norm": 0.6923456192016602, "learning_rate": 1.7372583871851962e-05, "loss": 2.1065, "step": 7479 }, { "epoch": 0.25, "grad_norm": 0.7043970823287964, "learning_rate": 1.737186572816795e-05, "loss": 2.1079, "step": 7480 }, { "epoch": 0.25, "grad_norm": 0.7098026871681213, "learning_rate": 1.7371147501200917e-05, "loss": 2.1695, "step": 7481 }, { "epoch": 0.25, "grad_norm": 0.7155427932739258, "learning_rate": 1.7370429190958982e-05, "loss": 2.1759, "step": 7482 }, { "epoch": 0.25, "grad_norm": 0.7232871651649475, "learning_rate": 1.7369710797450256e-05, "loss": 2.1602, "step": 7483 }, { "epoch": 0.25, "grad_norm": 0.7343376278877258, "learning_rate": 1.736899232068286e-05, "loss": 2.211, "step": 7484 }, { "epoch": 0.25, "grad_norm": 0.7422235012054443, "learning_rate": 1.73682737606649e-05, "loss": 2.1357, "step": 7485 }, { "epoch": 0.25, "grad_norm": 0.7326328754425049, "learning_rate": 1.7367555117404506e-05, "loss": 2.2202, "step": 7486 }, { "epoch": 0.25, "grad_norm": 0.7203668355941772, "learning_rate": 1.7366836390909794e-05, "loss": 2.1505, "step": 7487 }, { "epoch": 0.25, "grad_norm": 0.6873130798339844, "learning_rate": 1.7366117581188878e-05, "loss": 2.1756, "step": 7488 }, { "epoch": 0.25, "grad_norm": 0.7645033597946167, "learning_rate": 1.7365398688249885e-05, "loss": 2.2623, "step": 7489 }, { "epoch": 0.25, "grad_norm": 0.6934333443641663, "learning_rate": 1.7364679712100933e-05, "loss": 2.1253, "step": 7490 }, { "epoch": 0.25, "grad_norm": 0.7220542430877686, "learning_rate": 1.7363960652750148e-05, "loss": 2.121, "step": 7491 }, { "epoch": 0.25, "grad_norm": 0.676648736000061, "learning_rate": 1.736324151020565e-05, "loss": 2.0911, "step": 7492 }, { "epoch": 0.25, "grad_norm": 0.7199625372886658, "learning_rate": 1.7362522284475563e-05, "loss": 2.1996, "step": 7493 }, { "epoch": 0.25, "grad_norm": 0.7112314701080322, "learning_rate": 1.7361802975568014e-05, "loss": 2.1218, "step": 7494 }, { "epoch": 0.25, "grad_norm": 0.7417469620704651, "learning_rate": 1.7361083583491133e-05, "loss": 2.1805, "step": 7495 }, { "epoch": 0.25, "grad_norm": 0.7254404425621033, "learning_rate": 1.736036410825304e-05, "loss": 2.1814, "step": 7496 }, { "epoch": 0.25, "grad_norm": 0.6896488666534424, "learning_rate": 1.7359644549861866e-05, "loss": 2.1396, "step": 7497 }, { "epoch": 0.25, "grad_norm": 0.7474830150604248, "learning_rate": 1.7358924908325745e-05, "loss": 2.1735, "step": 7498 }, { "epoch": 0.25, "grad_norm": 0.7241219878196716, "learning_rate": 1.7358205183652802e-05, "loss": 2.126, "step": 7499 }, { "epoch": 0.25, "grad_norm": 0.7144243717193604, "learning_rate": 1.7357485375851165e-05, "loss": 2.2264, "step": 7500 }, { "epoch": 0.25, "grad_norm": 0.7307119369506836, "learning_rate": 1.735676548492898e-05, "loss": 2.1178, "step": 7501 }, { "epoch": 0.25, "grad_norm": 0.7247582674026489, "learning_rate": 1.735604551089436e-05, "loss": 2.2271, "step": 7502 }, { "epoch": 0.25, "grad_norm": 0.6875985860824585, "learning_rate": 1.7355325453755453e-05, "loss": 2.1737, "step": 7503 }, { "epoch": 0.25, "grad_norm": 0.7499565482139587, "learning_rate": 1.7354605313520387e-05, "loss": 2.1483, "step": 7504 }, { "epoch": 0.25, "grad_norm": 0.7241115570068359, "learning_rate": 1.7353885090197305e-05, "loss": 2.0983, "step": 7505 }, { "epoch": 0.25, "grad_norm": 0.7252108454704285, "learning_rate": 1.7353164783794335e-05, "loss": 2.2157, "step": 7506 }, { "epoch": 0.25, "grad_norm": 0.7384920716285706, "learning_rate": 1.735244439431962e-05, "loss": 2.1632, "step": 7507 }, { "epoch": 0.25, "grad_norm": 0.7114937901496887, "learning_rate": 1.73517239217813e-05, "loss": 2.1594, "step": 7508 }, { "epoch": 0.25, "grad_norm": 0.7011703252792358, "learning_rate": 1.735100336618751e-05, "loss": 2.1403, "step": 7509 }, { "epoch": 0.25, "grad_norm": 0.6802363991737366, "learning_rate": 1.7350282727546386e-05, "loss": 2.0931, "step": 7510 }, { "epoch": 0.25, "grad_norm": 0.6992273330688477, "learning_rate": 1.7349562005866083e-05, "loss": 2.1548, "step": 7511 }, { "epoch": 0.25, "grad_norm": 0.7036104202270508, "learning_rate": 1.7348841201154734e-05, "loss": 2.1983, "step": 7512 }, { "epoch": 0.25, "grad_norm": 0.718110978603363, "learning_rate": 1.7348120313420485e-05, "loss": 2.1642, "step": 7513 }, { "epoch": 0.25, "grad_norm": 0.7201098799705505, "learning_rate": 1.7347399342671475e-05, "loss": 2.0977, "step": 7514 }, { "epoch": 0.25, "grad_norm": 0.7423645853996277, "learning_rate": 1.7346678288915858e-05, "loss": 2.1916, "step": 7515 }, { "epoch": 0.25, "grad_norm": 0.7260878682136536, "learning_rate": 1.7345957152161773e-05, "loss": 2.1935, "step": 7516 }, { "epoch": 0.25, "grad_norm": 0.7157997488975525, "learning_rate": 1.734523593241737e-05, "loss": 2.2339, "step": 7517 }, { "epoch": 0.25, "grad_norm": 0.7187975645065308, "learning_rate": 1.7344514629690793e-05, "loss": 2.2074, "step": 7518 }, { "epoch": 0.25, "grad_norm": 0.7448701858520508, "learning_rate": 1.7343793243990198e-05, "loss": 2.2391, "step": 7519 }, { "epoch": 0.25, "grad_norm": 0.6892527937889099, "learning_rate": 1.7343071775323728e-05, "loss": 2.1601, "step": 7520 }, { "epoch": 0.25, "grad_norm": 0.693621039390564, "learning_rate": 1.734235022369954e-05, "loss": 2.1642, "step": 7521 }, { "epoch": 0.25, "grad_norm": 0.7147509455680847, "learning_rate": 1.7341628589125777e-05, "loss": 2.1624, "step": 7522 }, { "epoch": 0.25, "grad_norm": 0.6916564106941223, "learning_rate": 1.7340906871610603e-05, "loss": 2.2838, "step": 7523 }, { "epoch": 0.25, "grad_norm": 0.6941556930541992, "learning_rate": 1.734018507116216e-05, "loss": 2.1288, "step": 7524 }, { "epoch": 0.25, "grad_norm": 0.6766563057899475, "learning_rate": 1.733946318778861e-05, "loss": 2.1473, "step": 7525 }, { "epoch": 0.25, "grad_norm": 0.7380691766738892, "learning_rate": 1.7338741221498105e-05, "loss": 2.1491, "step": 7526 }, { "epoch": 0.25, "grad_norm": 0.726524293422699, "learning_rate": 1.7338019172298805e-05, "loss": 2.1338, "step": 7527 }, { "epoch": 0.25, "grad_norm": 0.7141581177711487, "learning_rate": 1.7337297040198865e-05, "loss": 2.2821, "step": 7528 }, { "epoch": 0.25, "grad_norm": 0.7193422913551331, "learning_rate": 1.733657482520644e-05, "loss": 2.124, "step": 7529 }, { "epoch": 0.25, "grad_norm": 0.7184845805168152, "learning_rate": 1.7335852527329695e-05, "loss": 2.166, "step": 7530 }, { "epoch": 0.25, "grad_norm": 0.6978302597999573, "learning_rate": 1.7335130146576786e-05, "loss": 2.1371, "step": 7531 }, { "epoch": 0.25, "grad_norm": 0.690495491027832, "learning_rate": 1.7334407682955876e-05, "loss": 2.1527, "step": 7532 }, { "epoch": 0.25, "grad_norm": 0.7555071115493774, "learning_rate": 1.7333685136475126e-05, "loss": 2.1825, "step": 7533 }, { "epoch": 0.25, "grad_norm": 0.6808302998542786, "learning_rate": 1.7332962507142703e-05, "loss": 2.1226, "step": 7534 }, { "epoch": 0.25, "grad_norm": 0.7210285067558289, "learning_rate": 1.733223979496676e-05, "loss": 2.115, "step": 7535 }, { "epoch": 0.25, "grad_norm": 0.7195649743080139, "learning_rate": 1.7331516999955475e-05, "loss": 2.146, "step": 7536 }, { "epoch": 0.25, "grad_norm": 0.725496768951416, "learning_rate": 1.7330794122117005e-05, "loss": 2.2002, "step": 7537 }, { "epoch": 0.25, "grad_norm": 0.7282800674438477, "learning_rate": 1.733007116145952e-05, "loss": 2.0893, "step": 7538 }, { "epoch": 0.25, "grad_norm": 0.7512715458869934, "learning_rate": 1.7329348117991186e-05, "loss": 2.2226, "step": 7539 }, { "epoch": 0.25, "grad_norm": 0.7053192257881165, "learning_rate": 1.7328624991720172e-05, "loss": 2.1011, "step": 7540 }, { "epoch": 0.25, "grad_norm": 0.761727511882782, "learning_rate": 1.732790178265465e-05, "loss": 2.0872, "step": 7541 }, { "epoch": 0.25, "grad_norm": 0.747033953666687, "learning_rate": 1.7327178490802784e-05, "loss": 2.2036, "step": 7542 }, { "epoch": 0.25, "grad_norm": 0.7316663861274719, "learning_rate": 1.7326455116172755e-05, "loss": 2.1965, "step": 7543 }, { "epoch": 0.25, "grad_norm": 0.7357268333435059, "learning_rate": 1.7325731658772726e-05, "loss": 2.1607, "step": 7544 }, { "epoch": 0.25, "grad_norm": 0.723380982875824, "learning_rate": 1.7325008118610877e-05, "loss": 2.1437, "step": 7545 }, { "epoch": 0.25, "grad_norm": 0.7776650190353394, "learning_rate": 1.7324284495695376e-05, "loss": 2.1591, "step": 7546 }, { "epoch": 0.25, "grad_norm": 0.6923309564590454, "learning_rate": 1.7323560790034405e-05, "loss": 2.1412, "step": 7547 }, { "epoch": 0.25, "grad_norm": 0.6813734769821167, "learning_rate": 1.7322837001636133e-05, "loss": 2.1431, "step": 7548 }, { "epoch": 0.25, "grad_norm": 0.7127648591995239, "learning_rate": 1.732211313050874e-05, "loss": 2.159, "step": 7549 }, { "epoch": 0.25, "grad_norm": 0.7070778608322144, "learning_rate": 1.7321389176660407e-05, "loss": 2.1349, "step": 7550 }, { "epoch": 0.25, "grad_norm": 0.7294631004333496, "learning_rate": 1.7320665140099306e-05, "loss": 2.1697, "step": 7551 }, { "epoch": 0.25, "grad_norm": 0.7419963479042053, "learning_rate": 1.731994102083362e-05, "loss": 2.1584, "step": 7552 }, { "epoch": 0.25, "grad_norm": 0.6788718104362488, "learning_rate": 1.7319216818871533e-05, "loss": 2.0694, "step": 7553 }, { "epoch": 0.25, "grad_norm": 0.6953298449516296, "learning_rate": 1.7318492534221225e-05, "loss": 2.1377, "step": 7554 }, { "epoch": 0.25, "grad_norm": 0.74330073595047, "learning_rate": 1.7317768166890876e-05, "loss": 2.1322, "step": 7555 }, { "epoch": 0.25, "grad_norm": 0.7187591791152954, "learning_rate": 1.731704371688867e-05, "loss": 2.0923, "step": 7556 }, { "epoch": 0.25, "grad_norm": 0.6934306025505066, "learning_rate": 1.7316319184222792e-05, "loss": 2.1229, "step": 7557 }, { "epoch": 0.25, "grad_norm": 0.7290444374084473, "learning_rate": 1.7315594568901433e-05, "loss": 2.2649, "step": 7558 }, { "epoch": 0.25, "grad_norm": 0.7017270922660828, "learning_rate": 1.731486987093277e-05, "loss": 2.0735, "step": 7559 }, { "epoch": 0.25, "grad_norm": 0.6965583562850952, "learning_rate": 1.7314145090324992e-05, "loss": 2.0838, "step": 7560 }, { "epoch": 0.25, "grad_norm": 0.7092208862304688, "learning_rate": 1.731342022708629e-05, "loss": 2.0909, "step": 7561 }, { "epoch": 0.25, "grad_norm": 0.6921049356460571, "learning_rate": 1.7312695281224856e-05, "loss": 2.1835, "step": 7562 }, { "epoch": 0.25, "grad_norm": 0.7226642370223999, "learning_rate": 1.7311970252748873e-05, "loss": 2.1902, "step": 7563 }, { "epoch": 0.25, "grad_norm": 0.7010865807533264, "learning_rate": 1.7311245141666536e-05, "loss": 2.1794, "step": 7564 }, { "epoch": 0.25, "grad_norm": 0.7060854434967041, "learning_rate": 1.731051994798604e-05, "loss": 2.1102, "step": 7565 }, { "epoch": 0.25, "grad_norm": 0.6836500763893127, "learning_rate": 1.7309794671715567e-05, "loss": 2.1947, "step": 7566 }, { "epoch": 0.25, "grad_norm": 0.6924862861633301, "learning_rate": 1.7309069312863324e-05, "loss": 2.1533, "step": 7567 }, { "epoch": 0.25, "grad_norm": 0.7123151421546936, "learning_rate": 1.7308343871437494e-05, "loss": 2.1525, "step": 7568 }, { "epoch": 0.25, "grad_norm": 0.6885114312171936, "learning_rate": 1.730761834744628e-05, "loss": 2.1661, "step": 7569 }, { "epoch": 0.25, "grad_norm": 0.7045148611068726, "learning_rate": 1.730689274089788e-05, "loss": 2.1576, "step": 7570 }, { "epoch": 0.25, "grad_norm": 0.7144096493721008, "learning_rate": 1.7306167051800483e-05, "loss": 2.1577, "step": 7571 }, { "epoch": 0.25, "grad_norm": 0.723014235496521, "learning_rate": 1.7305441280162294e-05, "loss": 2.1602, "step": 7572 }, { "epoch": 0.25, "grad_norm": 0.7233469486236572, "learning_rate": 1.7304715425991512e-05, "loss": 2.1592, "step": 7573 }, { "epoch": 0.25, "grad_norm": 0.7292847633361816, "learning_rate": 1.7303989489296336e-05, "loss": 2.1199, "step": 7574 }, { "epoch": 0.25, "grad_norm": 0.7259262204170227, "learning_rate": 1.730326347008497e-05, "loss": 2.1342, "step": 7575 }, { "epoch": 0.25, "grad_norm": 0.6966424584388733, "learning_rate": 1.730253736836561e-05, "loss": 2.214, "step": 7576 }, { "epoch": 0.25, "grad_norm": 0.7465115785598755, "learning_rate": 1.730181118414646e-05, "loss": 2.2096, "step": 7577 }, { "epoch": 0.25, "grad_norm": 0.7213481664657593, "learning_rate": 1.730108491743573e-05, "loss": 2.1914, "step": 7578 }, { "epoch": 0.25, "grad_norm": 0.7127256989479065, "learning_rate": 1.730035856824162e-05, "loss": 2.148, "step": 7579 }, { "epoch": 0.25, "grad_norm": 0.7505007386207581, "learning_rate": 1.729963213657234e-05, "loss": 2.1591, "step": 7580 }, { "epoch": 0.25, "grad_norm": 0.749862015247345, "learning_rate": 1.729890562243609e-05, "loss": 2.1181, "step": 7581 }, { "epoch": 0.25, "grad_norm": 0.7312537431716919, "learning_rate": 1.7298179025841087e-05, "loss": 2.0667, "step": 7582 }, { "epoch": 0.25, "grad_norm": 0.7174736857414246, "learning_rate": 1.729745234679553e-05, "loss": 2.136, "step": 7583 }, { "epoch": 0.25, "grad_norm": 0.7012611031532288, "learning_rate": 1.7296725585307635e-05, "loss": 2.1535, "step": 7584 }, { "epoch": 0.25, "grad_norm": 0.718070387840271, "learning_rate": 1.729599874138561e-05, "loss": 2.1692, "step": 7585 }, { "epoch": 0.25, "grad_norm": 0.714130163192749, "learning_rate": 1.7295271815037668e-05, "loss": 2.1803, "step": 7586 }, { "epoch": 0.25, "grad_norm": 0.734847366809845, "learning_rate": 1.7294544806272023e-05, "loss": 2.2203, "step": 7587 }, { "epoch": 0.25, "grad_norm": 0.6970461010932922, "learning_rate": 1.7293817715096883e-05, "loss": 2.1493, "step": 7588 }, { "epoch": 0.25, "grad_norm": 0.7138895392417908, "learning_rate": 1.7293090541520464e-05, "loss": 2.2119, "step": 7589 }, { "epoch": 0.25, "grad_norm": 0.7600546479225159, "learning_rate": 1.729236328555098e-05, "loss": 2.1131, "step": 7590 }, { "epoch": 0.25, "grad_norm": 0.6877783536911011, "learning_rate": 1.7291635947196658e-05, "loss": 2.1998, "step": 7591 }, { "epoch": 0.25, "grad_norm": 0.7334975600242615, "learning_rate": 1.72909085264657e-05, "loss": 2.1525, "step": 7592 }, { "epoch": 0.25, "grad_norm": 0.6944721341133118, "learning_rate": 1.729018102336633e-05, "loss": 2.1195, "step": 7593 }, { "epoch": 0.25, "grad_norm": 0.739058256149292, "learning_rate": 1.728945343790677e-05, "loss": 2.1471, "step": 7594 }, { "epoch": 0.25, "grad_norm": 0.6996712684631348, "learning_rate": 1.7288725770095235e-05, "loss": 2.1322, "step": 7595 }, { "epoch": 0.25, "grad_norm": 0.72550368309021, "learning_rate": 1.7287998019939947e-05, "loss": 2.1817, "step": 7596 }, { "epoch": 0.25, "grad_norm": 0.7203039526939392, "learning_rate": 1.728727018744913e-05, "loss": 2.0869, "step": 7597 }, { "epoch": 0.25, "grad_norm": 0.7318381667137146, "learning_rate": 1.7286542272631008e-05, "loss": 2.1581, "step": 7598 }, { "epoch": 0.25, "grad_norm": 0.7341532111167908, "learning_rate": 1.7285814275493798e-05, "loss": 2.1491, "step": 7599 }, { "epoch": 0.25, "grad_norm": 0.7220089435577393, "learning_rate": 1.7285086196045728e-05, "loss": 2.2054, "step": 7600 }, { "epoch": 0.25, "grad_norm": 0.7038966417312622, "learning_rate": 1.7284358034295024e-05, "loss": 2.1362, "step": 7601 }, { "epoch": 0.25, "grad_norm": 0.7343723773956299, "learning_rate": 1.7283629790249916e-05, "loss": 2.1662, "step": 7602 }, { "epoch": 0.25, "grad_norm": 0.7587023973464966, "learning_rate": 1.7282901463918623e-05, "loss": 2.1688, "step": 7603 }, { "epoch": 0.25, "grad_norm": 0.6875295639038086, "learning_rate": 1.728217305530938e-05, "loss": 2.1179, "step": 7604 }, { "epoch": 0.25, "grad_norm": 0.7254225015640259, "learning_rate": 1.728144456443041e-05, "loss": 2.1473, "step": 7605 }, { "epoch": 0.25, "grad_norm": 0.749250054359436, "learning_rate": 1.728071599128995e-05, "loss": 2.1945, "step": 7606 }, { "epoch": 0.25, "grad_norm": 0.7107381224632263, "learning_rate": 1.7279987335896226e-05, "loss": 2.1221, "step": 7607 }, { "epoch": 0.25, "grad_norm": 0.7231041193008423, "learning_rate": 1.727925859825747e-05, "loss": 2.2122, "step": 7608 }, { "epoch": 0.25, "grad_norm": 0.709024965763092, "learning_rate": 1.727852977838192e-05, "loss": 2.16, "step": 7609 }, { "epoch": 0.25, "grad_norm": 0.7269505262374878, "learning_rate": 1.7277800876277806e-05, "loss": 2.2096, "step": 7610 }, { "epoch": 0.25, "grad_norm": 0.6951438188552856, "learning_rate": 1.727707189195336e-05, "loss": 2.107, "step": 7611 }, { "epoch": 0.25, "grad_norm": 0.716847836971283, "learning_rate": 1.7276342825416823e-05, "loss": 2.125, "step": 7612 }, { "epoch": 0.25, "grad_norm": 0.7037303447723389, "learning_rate": 1.7275613676676427e-05, "loss": 2.1978, "step": 7613 }, { "epoch": 0.25, "grad_norm": 0.6980779767036438, "learning_rate": 1.7274884445740415e-05, "loss": 2.1849, "step": 7614 }, { "epoch": 0.25, "grad_norm": 0.7484356760978699, "learning_rate": 1.727415513261702e-05, "loss": 2.1095, "step": 7615 }, { "epoch": 0.25, "grad_norm": 0.6940260529518127, "learning_rate": 1.727342573731448e-05, "loss": 2.1474, "step": 7616 }, { "epoch": 0.25, "grad_norm": 0.7696806192398071, "learning_rate": 1.7272696259841045e-05, "loss": 2.0745, "step": 7617 }, { "epoch": 0.25, "grad_norm": 0.7069591879844666, "learning_rate": 1.7271966700204946e-05, "loss": 2.1479, "step": 7618 }, { "epoch": 0.25, "grad_norm": 0.7415960431098938, "learning_rate": 1.727123705841443e-05, "loss": 2.1696, "step": 7619 }, { "epoch": 0.25, "grad_norm": 0.7355552315711975, "learning_rate": 1.7270507334477738e-05, "loss": 2.1759, "step": 7620 }, { "epoch": 0.25, "grad_norm": 0.7433149218559265, "learning_rate": 1.726977752840312e-05, "loss": 2.205, "step": 7621 }, { "epoch": 0.25, "grad_norm": 0.721484363079071, "learning_rate": 1.726904764019881e-05, "loss": 2.1781, "step": 7622 }, { "epoch": 0.25, "grad_norm": 0.7339152097702026, "learning_rate": 1.726831766987306e-05, "loss": 2.1704, "step": 7623 }, { "epoch": 0.25, "grad_norm": 0.6840312480926514, "learning_rate": 1.7267587617434118e-05, "loss": 2.1546, "step": 7624 }, { "epoch": 0.25, "grad_norm": 0.6954158544540405, "learning_rate": 1.7266857482890232e-05, "loss": 2.1584, "step": 7625 }, { "epoch": 0.25, "grad_norm": 0.7378429174423218, "learning_rate": 1.726612726624965e-05, "loss": 2.1331, "step": 7626 }, { "epoch": 0.25, "grad_norm": 0.6984655261039734, "learning_rate": 1.7265396967520614e-05, "loss": 2.1923, "step": 7627 }, { "epoch": 0.25, "grad_norm": 0.705985963344574, "learning_rate": 1.726466658671139e-05, "loss": 2.2063, "step": 7628 }, { "epoch": 0.25, "grad_norm": 0.6962282061576843, "learning_rate": 1.7263936123830214e-05, "loss": 2.191, "step": 7629 }, { "epoch": 0.25, "grad_norm": 0.7112159729003906, "learning_rate": 1.7263205578885343e-05, "loss": 2.1498, "step": 7630 }, { "epoch": 0.25, "grad_norm": 0.6993825435638428, "learning_rate": 1.7262474951885037e-05, "loss": 2.1423, "step": 7631 }, { "epoch": 0.25, "grad_norm": 0.6986170411109924, "learning_rate": 1.7261744242837544e-05, "loss": 2.2199, "step": 7632 }, { "epoch": 0.25, "grad_norm": 0.6939405798912048, "learning_rate": 1.726101345175112e-05, "loss": 2.1223, "step": 7633 }, { "epoch": 0.25, "grad_norm": 0.6675134301185608, "learning_rate": 1.7260282578634023e-05, "loss": 2.1813, "step": 7634 }, { "epoch": 0.25, "grad_norm": 0.7185758352279663, "learning_rate": 1.7259551623494507e-05, "loss": 2.1824, "step": 7635 }, { "epoch": 0.25, "grad_norm": 0.7143075466156006, "learning_rate": 1.725882058634083e-05, "loss": 2.1824, "step": 7636 }, { "epoch": 0.25, "grad_norm": 0.6790210604667664, "learning_rate": 1.7258089467181252e-05, "loss": 2.0746, "step": 7637 }, { "epoch": 0.25, "grad_norm": 0.6795542240142822, "learning_rate": 1.7257358266024033e-05, "loss": 2.1269, "step": 7638 }, { "epoch": 0.25, "grad_norm": 0.7037162780761719, "learning_rate": 1.7256626982877436e-05, "loss": 2.1749, "step": 7639 }, { "epoch": 0.25, "grad_norm": 0.7341267466545105, "learning_rate": 1.725589561774972e-05, "loss": 2.1793, "step": 7640 }, { "epoch": 0.25, "grad_norm": 0.7170124053955078, "learning_rate": 1.7255164170649145e-05, "loss": 2.1276, "step": 7641 }, { "epoch": 0.25, "grad_norm": 0.71085524559021, "learning_rate": 1.7254432641583977e-05, "loss": 2.1204, "step": 7642 }, { "epoch": 0.25, "grad_norm": 0.726539134979248, "learning_rate": 1.7253701030562483e-05, "loss": 2.1488, "step": 7643 }, { "epoch": 0.25, "grad_norm": 0.710188090801239, "learning_rate": 1.7252969337592927e-05, "loss": 2.1363, "step": 7644 }, { "epoch": 0.25, "grad_norm": 0.6881375908851624, "learning_rate": 1.7252237562683572e-05, "loss": 2.1894, "step": 7645 }, { "epoch": 0.25, "grad_norm": 0.71407151222229, "learning_rate": 1.7251505705842685e-05, "loss": 2.1331, "step": 7646 }, { "epoch": 0.25, "grad_norm": 0.7067898511886597, "learning_rate": 1.725077376707854e-05, "loss": 2.1995, "step": 7647 }, { "epoch": 0.25, "grad_norm": 0.7009431719779968, "learning_rate": 1.72500417463994e-05, "loss": 2.1412, "step": 7648 }, { "epoch": 0.25, "grad_norm": 0.6963695883750916, "learning_rate": 1.724930964381354e-05, "loss": 2.1243, "step": 7649 }, { "epoch": 0.25, "grad_norm": 0.7290429472923279, "learning_rate": 1.7248577459329226e-05, "loss": 2.1453, "step": 7650 }, { "epoch": 0.25, "grad_norm": 0.708939790725708, "learning_rate": 1.724784519295473e-05, "loss": 2.1956, "step": 7651 }, { "epoch": 0.25, "grad_norm": 0.6971594095230103, "learning_rate": 1.7247112844698334e-05, "loss": 2.158, "step": 7652 }, { "epoch": 0.25, "grad_norm": 0.6983869075775146, "learning_rate": 1.7246380414568296e-05, "loss": 2.1477, "step": 7653 }, { "epoch": 0.25, "grad_norm": 0.7416946887969971, "learning_rate": 1.7245647902572903e-05, "loss": 2.1712, "step": 7654 }, { "epoch": 0.25, "grad_norm": 0.6916167736053467, "learning_rate": 1.7244915308720425e-05, "loss": 2.1513, "step": 7655 }, { "epoch": 0.25, "grad_norm": 0.7251725196838379, "learning_rate": 1.724418263301914e-05, "loss": 2.182, "step": 7656 }, { "epoch": 0.25, "grad_norm": 0.7102228403091431, "learning_rate": 1.7243449875477326e-05, "loss": 2.1498, "step": 7657 }, { "epoch": 0.25, "grad_norm": 0.7427058219909668, "learning_rate": 1.724271703610326e-05, "loss": 2.1184, "step": 7658 }, { "epoch": 0.25, "grad_norm": 0.7271700501441956, "learning_rate": 1.724198411490522e-05, "loss": 2.0709, "step": 7659 }, { "epoch": 0.25, "grad_norm": 0.7278515696525574, "learning_rate": 1.724125111189149e-05, "loss": 2.1955, "step": 7660 }, { "epoch": 0.25, "grad_norm": 0.7055908441543579, "learning_rate": 1.7240518027070348e-05, "loss": 2.1924, "step": 7661 }, { "epoch": 0.25, "grad_norm": 0.6929784417152405, "learning_rate": 1.7239784860450078e-05, "loss": 2.1776, "step": 7662 }, { "epoch": 0.25, "grad_norm": 0.7270942330360413, "learning_rate": 1.723905161203896e-05, "loss": 2.1704, "step": 7663 }, { "epoch": 0.25, "grad_norm": 0.7108985185623169, "learning_rate": 1.7238318281845277e-05, "loss": 2.0996, "step": 7664 }, { "epoch": 0.26, "grad_norm": 0.7238744497299194, "learning_rate": 1.723758486987732e-05, "loss": 2.184, "step": 7665 }, { "epoch": 0.26, "grad_norm": 0.7307531237602234, "learning_rate": 1.7236851376143368e-05, "loss": 2.1922, "step": 7666 }, { "epoch": 0.26, "grad_norm": 0.7151849269866943, "learning_rate": 1.7236117800651712e-05, "loss": 2.2465, "step": 7667 }, { "epoch": 0.26, "grad_norm": 0.7142261266708374, "learning_rate": 1.7235384143410637e-05, "loss": 2.1448, "step": 7668 }, { "epoch": 0.26, "grad_norm": 0.689373254776001, "learning_rate": 1.7234650404428433e-05, "loss": 2.1667, "step": 7669 }, { "epoch": 0.26, "grad_norm": 0.7123984098434448, "learning_rate": 1.7233916583713387e-05, "loss": 2.1923, "step": 7670 }, { "epoch": 0.26, "grad_norm": 0.7342566251754761, "learning_rate": 1.7233182681273794e-05, "loss": 2.1017, "step": 7671 }, { "epoch": 0.26, "grad_norm": 0.733862578868866, "learning_rate": 1.723244869711794e-05, "loss": 2.1822, "step": 7672 }, { "epoch": 0.26, "grad_norm": 0.7084027528762817, "learning_rate": 1.723171463125412e-05, "loss": 2.1368, "step": 7673 }, { "epoch": 0.26, "grad_norm": 0.6860386729240417, "learning_rate": 1.7230980483690626e-05, "loss": 2.1514, "step": 7674 }, { "epoch": 0.26, "grad_norm": 0.6763572692871094, "learning_rate": 1.7230246254435754e-05, "loss": 2.1408, "step": 7675 }, { "epoch": 0.26, "grad_norm": 0.7044961452484131, "learning_rate": 1.7229511943497794e-05, "loss": 2.1054, "step": 7676 }, { "epoch": 0.26, "grad_norm": 0.7081761360168457, "learning_rate": 1.722877755088505e-05, "loss": 2.1304, "step": 7677 }, { "epoch": 0.26, "grad_norm": 0.6801961064338684, "learning_rate": 1.7228043076605808e-05, "loss": 2.1542, "step": 7678 }, { "epoch": 0.26, "grad_norm": 0.7183455228805542, "learning_rate": 1.7227308520668373e-05, "loss": 2.1424, "step": 7679 }, { "epoch": 0.26, "grad_norm": 0.7013319730758667, "learning_rate": 1.7226573883081045e-05, "loss": 2.2342, "step": 7680 }, { "epoch": 0.26, "grad_norm": 0.7163876891136169, "learning_rate": 1.7225839163852117e-05, "loss": 2.178, "step": 7681 }, { "epoch": 0.26, "grad_norm": 0.7153096199035645, "learning_rate": 1.7225104362989894e-05, "loss": 2.1031, "step": 7682 }, { "epoch": 0.26, "grad_norm": 0.6976253986358643, "learning_rate": 1.7224369480502674e-05, "loss": 2.1376, "step": 7683 }, { "epoch": 0.26, "grad_norm": 0.68953537940979, "learning_rate": 1.7223634516398764e-05, "loss": 2.1572, "step": 7684 }, { "epoch": 0.26, "grad_norm": 0.7066412568092346, "learning_rate": 1.7222899470686466e-05, "loss": 2.1801, "step": 7685 }, { "epoch": 0.26, "grad_norm": 0.7250317931175232, "learning_rate": 1.722216434337408e-05, "loss": 2.144, "step": 7686 }, { "epoch": 0.26, "grad_norm": 0.7483562231063843, "learning_rate": 1.7221429134469913e-05, "loss": 2.176, "step": 7687 }, { "epoch": 0.26, "grad_norm": 0.7701245546340942, "learning_rate": 1.7220693843982277e-05, "loss": 2.1539, "step": 7688 }, { "epoch": 0.26, "grad_norm": 0.6969650983810425, "learning_rate": 1.721995847191947e-05, "loss": 2.1079, "step": 7689 }, { "epoch": 0.26, "grad_norm": 0.7545410990715027, "learning_rate": 1.7219223018289802e-05, "loss": 2.2084, "step": 7690 }, { "epoch": 0.26, "grad_norm": 0.7547464370727539, "learning_rate": 1.7218487483101588e-05, "loss": 2.1646, "step": 7691 }, { "epoch": 0.26, "grad_norm": 0.7234165072441101, "learning_rate": 1.721775186636313e-05, "loss": 2.1615, "step": 7692 }, { "epoch": 0.26, "grad_norm": 0.7450416684150696, "learning_rate": 1.721701616808274e-05, "loss": 2.1504, "step": 7693 }, { "epoch": 0.26, "grad_norm": 0.7488062381744385, "learning_rate": 1.7216280388268734e-05, "loss": 2.2552, "step": 7694 }, { "epoch": 0.26, "grad_norm": 0.6949632167816162, "learning_rate": 1.7215544526929417e-05, "loss": 2.1116, "step": 7695 }, { "epoch": 0.26, "grad_norm": 0.7128508687019348, "learning_rate": 1.7214808584073112e-05, "loss": 2.1198, "step": 7696 }, { "epoch": 0.26, "grad_norm": 0.7132129669189453, "learning_rate": 1.7214072559708125e-05, "loss": 2.2174, "step": 7697 }, { "epoch": 0.26, "grad_norm": 0.6808035373687744, "learning_rate": 1.7213336453842772e-05, "loss": 2.1415, "step": 7698 }, { "epoch": 0.26, "grad_norm": 0.7169152498245239, "learning_rate": 1.7212600266485376e-05, "loss": 2.1041, "step": 7699 }, { "epoch": 0.26, "grad_norm": 0.7282432913780212, "learning_rate": 1.7211863997644247e-05, "loss": 2.1199, "step": 7700 }, { "epoch": 0.26, "grad_norm": 0.7323275208473206, "learning_rate": 1.7211127647327704e-05, "loss": 2.1146, "step": 7701 }, { "epoch": 0.26, "grad_norm": 0.7298784852027893, "learning_rate": 1.721039121554407e-05, "loss": 2.2151, "step": 7702 }, { "epoch": 0.26, "grad_norm": 0.7169885635375977, "learning_rate": 1.7209654702301657e-05, "loss": 2.1455, "step": 7703 }, { "epoch": 0.26, "grad_norm": 0.705639123916626, "learning_rate": 1.7208918107608793e-05, "loss": 2.1673, "step": 7704 }, { "epoch": 0.26, "grad_norm": 0.7212986946105957, "learning_rate": 1.72081814314738e-05, "loss": 2.1698, "step": 7705 }, { "epoch": 0.26, "grad_norm": 0.7285521626472473, "learning_rate": 1.720744467390499e-05, "loss": 2.1519, "step": 7706 }, { "epoch": 0.26, "grad_norm": 0.7345906496047974, "learning_rate": 1.72067078349107e-05, "loss": 2.1381, "step": 7707 }, { "epoch": 0.26, "grad_norm": 0.7159486413002014, "learning_rate": 1.720597091449925e-05, "loss": 2.102, "step": 7708 }, { "epoch": 0.26, "grad_norm": 0.760722815990448, "learning_rate": 1.720523391267896e-05, "loss": 2.1874, "step": 7709 }, { "epoch": 0.26, "grad_norm": 0.6814525723457336, "learning_rate": 1.7204496829458162e-05, "loss": 2.1426, "step": 7710 }, { "epoch": 0.26, "grad_norm": 0.701771080493927, "learning_rate": 1.720375966484518e-05, "loss": 2.0886, "step": 7711 }, { "epoch": 0.26, "grad_norm": 0.7392334342002869, "learning_rate": 1.7203022418848344e-05, "loss": 2.1523, "step": 7712 }, { "epoch": 0.26, "grad_norm": 0.7213826179504395, "learning_rate": 1.720228509147598e-05, "loss": 2.1514, "step": 7713 }, { "epoch": 0.26, "grad_norm": 0.7078513503074646, "learning_rate": 1.7201547682736423e-05, "loss": 2.1591, "step": 7714 }, { "epoch": 0.26, "grad_norm": 0.7237377762794495, "learning_rate": 1.7200810192637996e-05, "loss": 2.0758, "step": 7715 }, { "epoch": 0.26, "grad_norm": 0.7191662788391113, "learning_rate": 1.720007262118904e-05, "loss": 2.184, "step": 7716 }, { "epoch": 0.26, "grad_norm": 0.6949969530105591, "learning_rate": 1.7199334968397877e-05, "loss": 2.1662, "step": 7717 }, { "epoch": 0.26, "grad_norm": 0.7313836216926575, "learning_rate": 1.7198597234272854e-05, "loss": 2.0782, "step": 7718 }, { "epoch": 0.26, "grad_norm": 0.7024040818214417, "learning_rate": 1.7197859418822296e-05, "loss": 2.1085, "step": 7719 }, { "epoch": 0.26, "grad_norm": 0.6957772374153137, "learning_rate": 1.719712152205454e-05, "loss": 2.1578, "step": 7720 }, { "epoch": 0.26, "grad_norm": 0.6967777013778687, "learning_rate": 1.7196383543977925e-05, "loss": 2.1456, "step": 7721 }, { "epoch": 0.26, "grad_norm": 0.7206192016601562, "learning_rate": 1.7195645484600785e-05, "loss": 2.1539, "step": 7722 }, { "epoch": 0.26, "grad_norm": 0.688813328742981, "learning_rate": 1.7194907343931456e-05, "loss": 2.2106, "step": 7723 }, { "epoch": 0.26, "grad_norm": 0.7512519955635071, "learning_rate": 1.7194169121978285e-05, "loss": 2.1557, "step": 7724 }, { "epoch": 0.26, "grad_norm": 0.765169620513916, "learning_rate": 1.7193430818749605e-05, "loss": 2.1333, "step": 7725 }, { "epoch": 0.26, "grad_norm": 0.7524273991584778, "learning_rate": 1.7192692434253762e-05, "loss": 2.0843, "step": 7726 }, { "epoch": 0.26, "grad_norm": 0.6931421756744385, "learning_rate": 1.719195396849909e-05, "loss": 2.1318, "step": 7727 }, { "epoch": 0.26, "grad_norm": 0.6800587773323059, "learning_rate": 1.7191215421493942e-05, "loss": 2.1586, "step": 7728 }, { "epoch": 0.26, "grad_norm": 0.7171177268028259, "learning_rate": 1.7190476793246655e-05, "loss": 2.1729, "step": 7729 }, { "epoch": 0.26, "grad_norm": 0.7246927618980408, "learning_rate": 1.7189738083765575e-05, "loss": 2.1476, "step": 7730 }, { "epoch": 0.26, "grad_norm": 0.7056196928024292, "learning_rate": 1.718899929305905e-05, "loss": 2.1996, "step": 7731 }, { "epoch": 0.26, "grad_norm": 0.7401746511459351, "learning_rate": 1.718826042113542e-05, "loss": 2.227, "step": 7732 }, { "epoch": 0.26, "grad_norm": 0.716092050075531, "learning_rate": 1.718752146800304e-05, "loss": 2.127, "step": 7733 }, { "epoch": 0.26, "grad_norm": 0.7027502655982971, "learning_rate": 1.7186782433670254e-05, "loss": 2.2089, "step": 7734 }, { "epoch": 0.26, "grad_norm": 0.7207968831062317, "learning_rate": 1.7186043318145408e-05, "loss": 2.1098, "step": 7735 }, { "epoch": 0.26, "grad_norm": 0.7254105806350708, "learning_rate": 1.718530412143686e-05, "loss": 2.1304, "step": 7736 }, { "epoch": 0.26, "grad_norm": 0.7859221696853638, "learning_rate": 1.7184564843552956e-05, "loss": 2.1023, "step": 7737 }, { "epoch": 0.26, "grad_norm": 0.7056150436401367, "learning_rate": 1.718382548450205e-05, "loss": 2.1338, "step": 7738 }, { "epoch": 0.26, "grad_norm": 0.6863545775413513, "learning_rate": 1.7183086044292495e-05, "loss": 2.101, "step": 7739 }, { "epoch": 0.26, "grad_norm": 0.713303804397583, "learning_rate": 1.718234652293264e-05, "loss": 2.0906, "step": 7740 }, { "epoch": 0.26, "grad_norm": 0.7170447111129761, "learning_rate": 1.7181606920430844e-05, "loss": 2.1679, "step": 7741 }, { "epoch": 0.26, "grad_norm": 0.6951106190681458, "learning_rate": 1.7180867236795463e-05, "loss": 2.166, "step": 7742 }, { "epoch": 0.26, "grad_norm": 0.7681471109390259, "learning_rate": 1.7180127472034852e-05, "loss": 2.1185, "step": 7743 }, { "epoch": 0.26, "grad_norm": 0.6858208775520325, "learning_rate": 1.717938762615737e-05, "loss": 2.1199, "step": 7744 }, { "epoch": 0.26, "grad_norm": 0.7252985239028931, "learning_rate": 1.7178647699171373e-05, "loss": 2.2203, "step": 7745 }, { "epoch": 0.26, "grad_norm": 0.7053428888320923, "learning_rate": 1.7177907691085223e-05, "loss": 2.1813, "step": 7746 }, { "epoch": 0.26, "grad_norm": 0.7169435620307922, "learning_rate": 1.7177167601907276e-05, "loss": 2.1588, "step": 7747 }, { "epoch": 0.26, "grad_norm": 0.676417350769043, "learning_rate": 1.7176427431645897e-05, "loss": 2.1057, "step": 7748 }, { "epoch": 0.26, "grad_norm": 0.7188929319381714, "learning_rate": 1.7175687180309445e-05, "loss": 2.1825, "step": 7749 }, { "epoch": 0.26, "grad_norm": 0.7432684898376465, "learning_rate": 1.7174946847906285e-05, "loss": 2.1339, "step": 7750 }, { "epoch": 0.26, "grad_norm": 0.6986342668533325, "learning_rate": 1.7174206434444783e-05, "loss": 2.1792, "step": 7751 }, { "epoch": 0.26, "grad_norm": 0.693280816078186, "learning_rate": 1.71734659399333e-05, "loss": 2.121, "step": 7752 }, { "epoch": 0.26, "grad_norm": 0.7147374749183655, "learning_rate": 1.7172725364380202e-05, "loss": 2.1187, "step": 7753 }, { "epoch": 0.26, "grad_norm": 0.6885616779327393, "learning_rate": 1.7171984707793857e-05, "loss": 2.2069, "step": 7754 }, { "epoch": 0.26, "grad_norm": 0.7237285375595093, "learning_rate": 1.7171243970182634e-05, "loss": 2.1614, "step": 7755 }, { "epoch": 0.26, "grad_norm": 0.7343361973762512, "learning_rate": 1.71705031515549e-05, "loss": 2.1448, "step": 7756 }, { "epoch": 0.26, "grad_norm": 0.7518948316574097, "learning_rate": 1.7169762251919015e-05, "loss": 2.1604, "step": 7757 }, { "epoch": 0.26, "grad_norm": 0.7037867903709412, "learning_rate": 1.7169021271283367e-05, "loss": 2.1536, "step": 7758 }, { "epoch": 0.26, "grad_norm": 0.7368143796920776, "learning_rate": 1.7168280209656313e-05, "loss": 2.2027, "step": 7759 }, { "epoch": 0.26, "grad_norm": 0.7020929455757141, "learning_rate": 1.7167539067046233e-05, "loss": 2.1053, "step": 7760 }, { "epoch": 0.26, "grad_norm": 0.7318944931030273, "learning_rate": 1.7166797843461495e-05, "loss": 2.2475, "step": 7761 }, { "epoch": 0.26, "grad_norm": 0.7208459377288818, "learning_rate": 1.716605653891048e-05, "loss": 2.1559, "step": 7762 }, { "epoch": 0.26, "grad_norm": 0.686706006526947, "learning_rate": 1.7165315153401554e-05, "loss": 2.0947, "step": 7763 }, { "epoch": 0.26, "grad_norm": 0.702031672000885, "learning_rate": 1.7164573686943095e-05, "loss": 2.1466, "step": 7764 }, { "epoch": 0.26, "grad_norm": 0.7381224036216736, "learning_rate": 1.7163832139543485e-05, "loss": 2.1821, "step": 7765 }, { "epoch": 0.26, "grad_norm": 0.7788733243942261, "learning_rate": 1.7163090511211097e-05, "loss": 2.1166, "step": 7766 }, { "epoch": 0.26, "grad_norm": 0.6938040852546692, "learning_rate": 1.716234880195431e-05, "loss": 2.1858, "step": 7767 }, { "epoch": 0.26, "grad_norm": 0.6925522685050964, "learning_rate": 1.7161607011781504e-05, "loss": 2.0945, "step": 7768 }, { "epoch": 0.26, "grad_norm": 0.7058216333389282, "learning_rate": 1.716086514070106e-05, "loss": 2.2084, "step": 7769 }, { "epoch": 0.26, "grad_norm": 0.6886893510818481, "learning_rate": 1.7160123188721355e-05, "loss": 2.1679, "step": 7770 }, { "epoch": 0.26, "grad_norm": 0.7198439836502075, "learning_rate": 1.7159381155850778e-05, "loss": 2.2192, "step": 7771 }, { "epoch": 0.26, "grad_norm": 0.6899675130844116, "learning_rate": 1.7158639042097706e-05, "loss": 2.1193, "step": 7772 }, { "epoch": 0.26, "grad_norm": 0.6987216472625732, "learning_rate": 1.7157896847470527e-05, "loss": 2.097, "step": 7773 }, { "epoch": 0.26, "grad_norm": 0.7111876606941223, "learning_rate": 1.7157154571977622e-05, "loss": 2.1797, "step": 7774 }, { "epoch": 0.26, "grad_norm": 0.718830943107605, "learning_rate": 1.7156412215627382e-05, "loss": 2.1111, "step": 7775 }, { "epoch": 0.26, "grad_norm": 0.6891503930091858, "learning_rate": 1.7155669778428192e-05, "loss": 2.1771, "step": 7776 }, { "epoch": 0.26, "grad_norm": 0.6862533092498779, "learning_rate": 1.7154927260388436e-05, "loss": 2.1374, "step": 7777 }, { "epoch": 0.26, "grad_norm": 0.7214975357055664, "learning_rate": 1.7154184661516505e-05, "loss": 2.1597, "step": 7778 }, { "epoch": 0.26, "grad_norm": 0.7311958074569702, "learning_rate": 1.7153441981820788e-05, "loss": 2.1588, "step": 7779 }, { "epoch": 0.26, "grad_norm": 0.7298711538314819, "learning_rate": 1.715269922130968e-05, "loss": 2.2677, "step": 7780 }, { "epoch": 0.26, "grad_norm": 0.7290353178977966, "learning_rate": 1.7151956379991564e-05, "loss": 2.1714, "step": 7781 }, { "epoch": 0.26, "grad_norm": 0.7301397323608398, "learning_rate": 1.7151213457874835e-05, "loss": 2.1664, "step": 7782 }, { "epoch": 0.26, "grad_norm": 0.6944224238395691, "learning_rate": 1.715047045496789e-05, "loss": 2.1812, "step": 7783 }, { "epoch": 0.26, "grad_norm": 0.72264164686203, "learning_rate": 1.714972737127912e-05, "loss": 2.1993, "step": 7784 }, { "epoch": 0.26, "grad_norm": 0.7018815875053406, "learning_rate": 1.7148984206816922e-05, "loss": 2.1408, "step": 7785 }, { "epoch": 0.26, "grad_norm": 0.7239283919334412, "learning_rate": 1.7148240961589687e-05, "loss": 2.1771, "step": 7786 }, { "epoch": 0.26, "grad_norm": 0.7128854990005493, "learning_rate": 1.7147497635605815e-05, "loss": 2.0971, "step": 7787 }, { "epoch": 0.26, "grad_norm": 0.7174303531646729, "learning_rate": 1.7146754228873704e-05, "loss": 2.1276, "step": 7788 }, { "epoch": 0.26, "grad_norm": 0.7037590146064758, "learning_rate": 1.7146010741401754e-05, "loss": 2.1133, "step": 7789 }, { "epoch": 0.26, "grad_norm": 0.704841673374176, "learning_rate": 1.7145267173198363e-05, "loss": 2.2071, "step": 7790 }, { "epoch": 0.26, "grad_norm": 0.7176227569580078, "learning_rate": 1.714452352427193e-05, "loss": 2.1578, "step": 7791 }, { "epoch": 0.26, "grad_norm": 0.7243119478225708, "learning_rate": 1.7143779794630857e-05, "loss": 2.1628, "step": 7792 }, { "epoch": 0.26, "grad_norm": 0.6822180151939392, "learning_rate": 1.7143035984283544e-05, "loss": 2.1132, "step": 7793 }, { "epoch": 0.26, "grad_norm": 0.692240297794342, "learning_rate": 1.71422920932384e-05, "loss": 2.1718, "step": 7794 }, { "epoch": 0.26, "grad_norm": 0.6943684816360474, "learning_rate": 1.7141548121503823e-05, "loss": 2.1393, "step": 7795 }, { "epoch": 0.26, "grad_norm": 0.716433048248291, "learning_rate": 1.7140804069088223e-05, "loss": 2.1996, "step": 7796 }, { "epoch": 0.26, "grad_norm": 0.6924384236335754, "learning_rate": 1.7140059936000002e-05, "loss": 2.1481, "step": 7797 }, { "epoch": 0.26, "grad_norm": 0.7412042021751404, "learning_rate": 1.713931572224757e-05, "loss": 2.1127, "step": 7798 }, { "epoch": 0.26, "grad_norm": 0.725852906703949, "learning_rate": 1.7138571427839333e-05, "loss": 2.1532, "step": 7799 }, { "epoch": 0.26, "grad_norm": 0.7315605282783508, "learning_rate": 1.71378270527837e-05, "loss": 2.1784, "step": 7800 }, { "epoch": 0.26, "grad_norm": 0.7074808478355408, "learning_rate": 1.713708259708908e-05, "loss": 2.1471, "step": 7801 }, { "epoch": 0.26, "grad_norm": 0.6726783514022827, "learning_rate": 1.713633806076388e-05, "loss": 2.1611, "step": 7802 }, { "epoch": 0.26, "grad_norm": 0.6805378198623657, "learning_rate": 1.713559344381652e-05, "loss": 2.1342, "step": 7803 }, { "epoch": 0.26, "grad_norm": 0.7407160401344299, "learning_rate": 1.7134848746255405e-05, "loss": 2.1911, "step": 7804 }, { "epoch": 0.26, "grad_norm": 0.7225199937820435, "learning_rate": 1.713410396808895e-05, "loss": 2.1709, "step": 7805 }, { "epoch": 0.26, "grad_norm": 0.7017956376075745, "learning_rate": 1.713335910932557e-05, "loss": 2.1566, "step": 7806 }, { "epoch": 0.26, "grad_norm": 0.729587197303772, "learning_rate": 1.713261416997368e-05, "loss": 2.1785, "step": 7807 }, { "epoch": 0.26, "grad_norm": 0.6951969861984253, "learning_rate": 1.7131869150041695e-05, "loss": 2.1225, "step": 7808 }, { "epoch": 0.26, "grad_norm": 0.7271116971969604, "learning_rate": 1.713112404953803e-05, "loss": 2.1011, "step": 7809 }, { "epoch": 0.26, "grad_norm": 0.7424437999725342, "learning_rate": 1.7130378868471105e-05, "loss": 2.1272, "step": 7810 }, { "epoch": 0.26, "grad_norm": 0.7134390473365784, "learning_rate": 1.7129633606849338e-05, "loss": 2.1633, "step": 7811 }, { "epoch": 0.26, "grad_norm": 0.7044578790664673, "learning_rate": 1.712888826468115e-05, "loss": 2.156, "step": 7812 }, { "epoch": 0.26, "grad_norm": 0.711760938167572, "learning_rate": 1.712814284197496e-05, "loss": 2.1559, "step": 7813 }, { "epoch": 0.26, "grad_norm": 0.7074726819992065, "learning_rate": 1.7127397338739192e-05, "loss": 2.1713, "step": 7814 }, { "epoch": 0.26, "grad_norm": 0.7287285327911377, "learning_rate": 1.712665175498226e-05, "loss": 2.1091, "step": 7815 }, { "epoch": 0.26, "grad_norm": 0.6875952482223511, "learning_rate": 1.71259060907126e-05, "loss": 2.1086, "step": 7816 }, { "epoch": 0.26, "grad_norm": 0.7718198895454407, "learning_rate": 1.7125160345938624e-05, "loss": 2.1822, "step": 7817 }, { "epoch": 0.26, "grad_norm": 0.7299543619155884, "learning_rate": 1.7124414520668767e-05, "loss": 2.2307, "step": 7818 }, { "epoch": 0.26, "grad_norm": 0.6945135593414307, "learning_rate": 1.7123668614911445e-05, "loss": 2.1438, "step": 7819 }, { "epoch": 0.26, "grad_norm": 0.7080686688423157, "learning_rate": 1.7122922628675092e-05, "loss": 2.1498, "step": 7820 }, { "epoch": 0.26, "grad_norm": 0.74290931224823, "learning_rate": 1.7122176561968133e-05, "loss": 2.1104, "step": 7821 }, { "epoch": 0.26, "grad_norm": 0.7897998094558716, "learning_rate": 1.7121430414799e-05, "loss": 2.2249, "step": 7822 }, { "epoch": 0.26, "grad_norm": 0.7258075475692749, "learning_rate": 1.7120684187176117e-05, "loss": 2.1421, "step": 7823 }, { "epoch": 0.26, "grad_norm": 0.698527991771698, "learning_rate": 1.711993787910792e-05, "loss": 2.1741, "step": 7824 }, { "epoch": 0.26, "grad_norm": 0.6947150826454163, "learning_rate": 1.7119191490602834e-05, "loss": 2.073, "step": 7825 }, { "epoch": 0.26, "grad_norm": 0.7102426290512085, "learning_rate": 1.7118445021669297e-05, "loss": 2.1823, "step": 7826 }, { "epoch": 0.26, "grad_norm": 0.740727424621582, "learning_rate": 1.7117698472315737e-05, "loss": 2.1667, "step": 7827 }, { "epoch": 0.26, "grad_norm": 0.699183464050293, "learning_rate": 1.7116951842550596e-05, "loss": 2.0765, "step": 7828 }, { "epoch": 0.26, "grad_norm": 0.6834732294082642, "learning_rate": 1.7116205132382302e-05, "loss": 2.1058, "step": 7829 }, { "epoch": 0.26, "grad_norm": 0.7036998271942139, "learning_rate": 1.711545834181929e-05, "loss": 2.1511, "step": 7830 }, { "epoch": 0.26, "grad_norm": 0.7054764628410339, "learning_rate": 1.7114711470870004e-05, "loss": 2.1169, "step": 7831 }, { "epoch": 0.26, "grad_norm": 0.7175148129463196, "learning_rate": 1.7113964519542875e-05, "loss": 2.1476, "step": 7832 }, { "epoch": 0.26, "grad_norm": 0.7632777094841003, "learning_rate": 1.7113217487846343e-05, "loss": 2.1509, "step": 7833 }, { "epoch": 0.26, "grad_norm": 0.7094358205795288, "learning_rate": 1.711247037578885e-05, "loss": 2.1711, "step": 7834 }, { "epoch": 0.26, "grad_norm": 0.7338247299194336, "learning_rate": 1.7111723183378835e-05, "loss": 2.1128, "step": 7835 }, { "epoch": 0.26, "grad_norm": 0.7362343072891235, "learning_rate": 1.711097591062474e-05, "loss": 2.1349, "step": 7836 }, { "epoch": 0.26, "grad_norm": 0.7271143794059753, "learning_rate": 1.7110228557535007e-05, "loss": 2.2233, "step": 7837 }, { "epoch": 0.26, "grad_norm": 0.6997426748275757, "learning_rate": 1.7109481124118076e-05, "loss": 2.1398, "step": 7838 }, { "epoch": 0.26, "grad_norm": 0.7205355167388916, "learning_rate": 1.71087336103824e-05, "loss": 2.1654, "step": 7839 }, { "epoch": 0.26, "grad_norm": 0.7339781522750854, "learning_rate": 1.710798601633641e-05, "loss": 2.1909, "step": 7840 }, { "epoch": 0.26, "grad_norm": 0.6762281060218811, "learning_rate": 1.7107238341988565e-05, "loss": 2.1575, "step": 7841 }, { "epoch": 0.26, "grad_norm": 0.7162430882453918, "learning_rate": 1.71064905873473e-05, "loss": 2.2021, "step": 7842 }, { "epoch": 0.26, "grad_norm": 0.704414963722229, "learning_rate": 1.7105742752421077e-05, "loss": 2.1754, "step": 7843 }, { "epoch": 0.26, "grad_norm": 0.7161237001419067, "learning_rate": 1.7104994837218332e-05, "loss": 2.1312, "step": 7844 }, { "epoch": 0.26, "grad_norm": 0.7203507423400879, "learning_rate": 1.7104246841747523e-05, "loss": 2.143, "step": 7845 }, { "epoch": 0.26, "grad_norm": 0.743654191493988, "learning_rate": 1.7103498766017096e-05, "loss": 2.2259, "step": 7846 }, { "epoch": 0.26, "grad_norm": 0.7108768224716187, "learning_rate": 1.71027506100355e-05, "loss": 2.1748, "step": 7847 }, { "epoch": 0.26, "grad_norm": 0.6944230794906616, "learning_rate": 1.7102002373811193e-05, "loss": 2.1621, "step": 7848 }, { "epoch": 0.26, "grad_norm": 0.7488688230514526, "learning_rate": 1.710125405735262e-05, "loss": 2.158, "step": 7849 }, { "epoch": 0.26, "grad_norm": 0.7385022640228271, "learning_rate": 1.7100505660668244e-05, "loss": 2.288, "step": 7850 }, { "epoch": 0.26, "grad_norm": 0.7253884673118591, "learning_rate": 1.709975718376652e-05, "loss": 2.2102, "step": 7851 }, { "epoch": 0.26, "grad_norm": 0.7080777883529663, "learning_rate": 1.7099008626655895e-05, "loss": 2.143, "step": 7852 }, { "epoch": 0.26, "grad_norm": 0.7025786638259888, "learning_rate": 1.7098259989344833e-05, "loss": 2.1763, "step": 7853 }, { "epoch": 0.26, "grad_norm": 0.687380850315094, "learning_rate": 1.709751127184179e-05, "loss": 2.1124, "step": 7854 }, { "epoch": 0.26, "grad_norm": 0.7233151197433472, "learning_rate": 1.7096762474155225e-05, "loss": 2.1529, "step": 7855 }, { "epoch": 0.26, "grad_norm": 0.7254893779754639, "learning_rate": 1.7096013596293596e-05, "loss": 2.0678, "step": 7856 }, { "epoch": 0.26, "grad_norm": 0.8375031352043152, "learning_rate": 1.7095264638265364e-05, "loss": 2.2048, "step": 7857 }, { "epoch": 0.26, "grad_norm": 0.7308005094528198, "learning_rate": 1.709451560007899e-05, "loss": 2.0957, "step": 7858 }, { "epoch": 0.26, "grad_norm": 0.6927459836006165, "learning_rate": 1.7093766481742934e-05, "loss": 2.1568, "step": 7859 }, { "epoch": 0.26, "grad_norm": 0.7318151593208313, "learning_rate": 1.7093017283265667e-05, "loss": 2.1217, "step": 7860 }, { "epoch": 0.26, "grad_norm": 0.7318304777145386, "learning_rate": 1.7092268004655644e-05, "loss": 2.1766, "step": 7861 }, { "epoch": 0.26, "grad_norm": 0.7013775706291199, "learning_rate": 1.7091518645921335e-05, "loss": 2.2367, "step": 7862 }, { "epoch": 0.26, "grad_norm": 0.7081071138381958, "learning_rate": 1.70907692070712e-05, "loss": 2.1679, "step": 7863 }, { "epoch": 0.26, "grad_norm": 0.7219730615615845, "learning_rate": 1.7090019688113716e-05, "loss": 2.1468, "step": 7864 }, { "epoch": 0.26, "grad_norm": 0.7015517354011536, "learning_rate": 1.7089270089057343e-05, "loss": 2.176, "step": 7865 }, { "epoch": 0.26, "grad_norm": 0.6918462514877319, "learning_rate": 1.708852040991055e-05, "loss": 2.1799, "step": 7866 }, { "epoch": 0.26, "grad_norm": 0.7144599556922913, "learning_rate": 1.7087770650681807e-05, "loss": 2.1685, "step": 7867 }, { "epoch": 0.26, "grad_norm": 0.7278997302055359, "learning_rate": 1.7087020811379588e-05, "loss": 2.0475, "step": 7868 }, { "epoch": 0.26, "grad_norm": 0.6881092190742493, "learning_rate": 1.708627089201236e-05, "loss": 2.1629, "step": 7869 }, { "epoch": 0.26, "grad_norm": 0.7210370898246765, "learning_rate": 1.7085520892588597e-05, "loss": 2.1167, "step": 7870 }, { "epoch": 0.26, "grad_norm": 0.7074130177497864, "learning_rate": 1.708477081311677e-05, "loss": 2.1643, "step": 7871 }, { "epoch": 0.26, "grad_norm": 0.7149900794029236, "learning_rate": 1.7084020653605353e-05, "loss": 2.1546, "step": 7872 }, { "epoch": 0.26, "grad_norm": 0.7233145833015442, "learning_rate": 1.7083270414062824e-05, "loss": 2.1295, "step": 7873 }, { "epoch": 0.26, "grad_norm": 0.7299347519874573, "learning_rate": 1.7082520094497658e-05, "loss": 2.1517, "step": 7874 }, { "epoch": 0.26, "grad_norm": 0.6943715214729309, "learning_rate": 1.708176969491833e-05, "loss": 2.1967, "step": 7875 }, { "epoch": 0.26, "grad_norm": 0.7194640040397644, "learning_rate": 1.708101921533332e-05, "loss": 2.1547, "step": 7876 }, { "epoch": 0.26, "grad_norm": 0.732223629951477, "learning_rate": 1.70802686557511e-05, "loss": 2.1397, "step": 7877 }, { "epoch": 0.26, "grad_norm": 0.7153412699699402, "learning_rate": 1.707951801618016e-05, "loss": 2.1529, "step": 7878 }, { "epoch": 0.26, "grad_norm": 0.7337096333503723, "learning_rate": 1.707876729662897e-05, "loss": 2.1527, "step": 7879 }, { "epoch": 0.26, "grad_norm": 0.7013744115829468, "learning_rate": 1.7078016497106017e-05, "loss": 2.1296, "step": 7880 }, { "epoch": 0.26, "grad_norm": 0.6861643195152283, "learning_rate": 1.7077265617619783e-05, "loss": 2.1686, "step": 7881 }, { "epoch": 0.26, "grad_norm": 0.7032082676887512, "learning_rate": 1.707651465817875e-05, "loss": 2.1257, "step": 7882 }, { "epoch": 0.26, "grad_norm": 0.7724486589431763, "learning_rate": 1.70757636187914e-05, "loss": 2.1404, "step": 7883 }, { "epoch": 0.26, "grad_norm": 0.7003377676010132, "learning_rate": 1.707501249946622e-05, "loss": 2.1534, "step": 7884 }, { "epoch": 0.26, "grad_norm": 0.7136922478675842, "learning_rate": 1.7074261300211696e-05, "loss": 2.0819, "step": 7885 }, { "epoch": 0.26, "grad_norm": 0.7247936725616455, "learning_rate": 1.7073510021036313e-05, "loss": 2.156, "step": 7886 }, { "epoch": 0.26, "grad_norm": 0.689854621887207, "learning_rate": 1.707275866194856e-05, "loss": 2.1033, "step": 7887 }, { "epoch": 0.26, "grad_norm": 0.7166960835456848, "learning_rate": 1.7072007222956925e-05, "loss": 2.1228, "step": 7888 }, { "epoch": 0.26, "grad_norm": 0.6709117889404297, "learning_rate": 1.7071255704069894e-05, "loss": 2.1364, "step": 7889 }, { "epoch": 0.26, "grad_norm": 0.7289060354232788, "learning_rate": 1.7070504105295963e-05, "loss": 2.1456, "step": 7890 }, { "epoch": 0.26, "grad_norm": 0.7029357552528381, "learning_rate": 1.706975242664362e-05, "loss": 2.211, "step": 7891 }, { "epoch": 0.26, "grad_norm": 0.7252646088600159, "learning_rate": 1.7069000668121356e-05, "loss": 2.0948, "step": 7892 }, { "epoch": 0.26, "grad_norm": 0.7137283086776733, "learning_rate": 1.7068248829737668e-05, "loss": 2.1006, "step": 7893 }, { "epoch": 0.26, "grad_norm": 0.683407723903656, "learning_rate": 1.7067496911501043e-05, "loss": 2.1348, "step": 7894 }, { "epoch": 0.26, "grad_norm": 0.7210580706596375, "learning_rate": 1.7066744913419982e-05, "loss": 2.1142, "step": 7895 }, { "epoch": 0.26, "grad_norm": 0.7027649283409119, "learning_rate": 1.706599283550298e-05, "loss": 2.1779, "step": 7896 }, { "epoch": 0.26, "grad_norm": 0.7076180577278137, "learning_rate": 1.706524067775853e-05, "loss": 2.0913, "step": 7897 }, { "epoch": 0.26, "grad_norm": 0.755730390548706, "learning_rate": 1.706448844019513e-05, "loss": 2.1764, "step": 7898 }, { "epoch": 0.26, "grad_norm": 0.6955671906471252, "learning_rate": 1.7063736122821284e-05, "loss": 2.1123, "step": 7899 }, { "epoch": 0.26, "grad_norm": 0.7206125855445862, "learning_rate": 1.7062983725645485e-05, "loss": 2.1599, "step": 7900 }, { "epoch": 0.26, "grad_norm": 0.7265791296958923, "learning_rate": 1.7062231248676234e-05, "loss": 2.1614, "step": 7901 }, { "epoch": 0.26, "grad_norm": 0.7384074926376343, "learning_rate": 1.7061478691922037e-05, "loss": 2.1348, "step": 7902 }, { "epoch": 0.26, "grad_norm": 0.7313788533210754, "learning_rate": 1.7060726055391386e-05, "loss": 2.195, "step": 7903 }, { "epoch": 0.26, "grad_norm": 0.725885272026062, "learning_rate": 1.7059973339092793e-05, "loss": 2.0983, "step": 7904 }, { "epoch": 0.26, "grad_norm": 0.7421061396598816, "learning_rate": 1.7059220543034763e-05, "loss": 2.1565, "step": 7905 }, { "epoch": 0.26, "grad_norm": 0.7270490527153015, "learning_rate": 1.7058467667225792e-05, "loss": 2.1294, "step": 7906 }, { "epoch": 0.26, "grad_norm": 0.7013449668884277, "learning_rate": 1.7057714711674388e-05, "loss": 2.1907, "step": 7907 }, { "epoch": 0.26, "grad_norm": 0.7751885652542114, "learning_rate": 1.7056961676389062e-05, "loss": 2.1606, "step": 7908 }, { "epoch": 0.26, "grad_norm": 0.6948938369750977, "learning_rate": 1.705620856137832e-05, "loss": 2.1428, "step": 7909 }, { "epoch": 0.26, "grad_norm": 0.7014685869216919, "learning_rate": 1.7055455366650666e-05, "loss": 2.2132, "step": 7910 }, { "epoch": 0.26, "grad_norm": 0.7074909806251526, "learning_rate": 1.7054702092214617e-05, "loss": 2.1413, "step": 7911 }, { "epoch": 0.26, "grad_norm": 0.6974443197250366, "learning_rate": 1.7053948738078677e-05, "loss": 2.1535, "step": 7912 }, { "epoch": 0.26, "grad_norm": 0.7107587456703186, "learning_rate": 1.7053195304251352e-05, "loss": 2.117, "step": 7913 }, { "epoch": 0.26, "grad_norm": 0.6895557641983032, "learning_rate": 1.7052441790741165e-05, "loss": 2.1151, "step": 7914 }, { "epoch": 0.26, "grad_norm": 0.7073776721954346, "learning_rate": 1.7051688197556627e-05, "loss": 2.0986, "step": 7915 }, { "epoch": 0.26, "grad_norm": 0.7559026479721069, "learning_rate": 1.7050934524706244e-05, "loss": 2.142, "step": 7916 }, { "epoch": 0.26, "grad_norm": 0.7243779301643372, "learning_rate": 1.7050180772198535e-05, "loss": 2.1526, "step": 7917 }, { "epoch": 0.26, "grad_norm": 0.7031018733978271, "learning_rate": 1.7049426940042014e-05, "loss": 2.1218, "step": 7918 }, { "epoch": 0.26, "grad_norm": 0.737893283367157, "learning_rate": 1.7048673028245202e-05, "loss": 2.1539, "step": 7919 }, { "epoch": 0.26, "grad_norm": 0.7016792297363281, "learning_rate": 1.7047919036816614e-05, "loss": 2.1607, "step": 7920 }, { "epoch": 0.26, "grad_norm": 0.7344710826873779, "learning_rate": 1.7047164965764764e-05, "loss": 2.1762, "step": 7921 }, { "epoch": 0.26, "grad_norm": 0.7150170207023621, "learning_rate": 1.7046410815098176e-05, "loss": 2.1988, "step": 7922 }, { "epoch": 0.26, "grad_norm": 0.7300527095794678, "learning_rate": 1.704565658482537e-05, "loss": 2.1416, "step": 7923 }, { "epoch": 0.26, "grad_norm": 0.7067297101020813, "learning_rate": 1.704490227495486e-05, "loss": 2.1325, "step": 7924 }, { "epoch": 0.26, "grad_norm": 0.7377815246582031, "learning_rate": 1.7044147885495175e-05, "loss": 2.1759, "step": 7925 }, { "epoch": 0.26, "grad_norm": 0.7164495587348938, "learning_rate": 1.7043393416454836e-05, "loss": 2.1722, "step": 7926 }, { "epoch": 0.26, "grad_norm": 0.7310929894447327, "learning_rate": 1.7042638867842364e-05, "loss": 2.1562, "step": 7927 }, { "epoch": 0.26, "grad_norm": 0.716425895690918, "learning_rate": 1.7041884239666292e-05, "loss": 2.2071, "step": 7928 }, { "epoch": 0.26, "grad_norm": 0.689057469367981, "learning_rate": 1.7041129531935134e-05, "loss": 2.1581, "step": 7929 }, { "epoch": 0.26, "grad_norm": 0.6819918751716614, "learning_rate": 1.704037474465742e-05, "loss": 2.1129, "step": 7930 }, { "epoch": 0.26, "grad_norm": 0.7058389186859131, "learning_rate": 1.703961987784168e-05, "loss": 2.1605, "step": 7931 }, { "epoch": 0.26, "grad_norm": 0.6986914873123169, "learning_rate": 1.703886493149644e-05, "loss": 2.1125, "step": 7932 }, { "epoch": 0.26, "grad_norm": 0.697096586227417, "learning_rate": 1.7038109905630226e-05, "loss": 2.0867, "step": 7933 }, { "epoch": 0.26, "grad_norm": 0.7282609343528748, "learning_rate": 1.7037354800251576e-05, "loss": 2.1989, "step": 7934 }, { "epoch": 0.26, "grad_norm": 0.7441202402114868, "learning_rate": 1.7036599615369015e-05, "loss": 2.0777, "step": 7935 }, { "epoch": 0.26, "grad_norm": 0.7383463382720947, "learning_rate": 1.7035844350991074e-05, "loss": 2.1203, "step": 7936 }, { "epoch": 0.26, "grad_norm": 0.7265232801437378, "learning_rate": 1.7035089007126287e-05, "loss": 2.2107, "step": 7937 }, { "epoch": 0.26, "grad_norm": 0.7042539715766907, "learning_rate": 1.7034333583783185e-05, "loss": 2.051, "step": 7938 }, { "epoch": 0.26, "grad_norm": 0.7071065902709961, "learning_rate": 1.7033578080970308e-05, "loss": 2.2016, "step": 7939 }, { "epoch": 0.26, "grad_norm": 0.6963499188423157, "learning_rate": 1.7032822498696188e-05, "loss": 2.2138, "step": 7940 }, { "epoch": 0.26, "grad_norm": 0.7050575613975525, "learning_rate": 1.703206683696936e-05, "loss": 2.137, "step": 7941 }, { "epoch": 0.26, "grad_norm": 0.7180153727531433, "learning_rate": 1.7031311095798363e-05, "loss": 2.1788, "step": 7942 }, { "epoch": 0.26, "grad_norm": 0.7360210418701172, "learning_rate": 1.703055527519173e-05, "loss": 2.1245, "step": 7943 }, { "epoch": 0.26, "grad_norm": 0.7026529312133789, "learning_rate": 1.702979937515801e-05, "loss": 2.141, "step": 7944 }, { "epoch": 0.26, "grad_norm": 0.7011356949806213, "learning_rate": 1.7029043395705733e-05, "loss": 2.1764, "step": 7945 }, { "epoch": 0.26, "grad_norm": 0.6785018444061279, "learning_rate": 1.7028287336843443e-05, "loss": 2.1822, "step": 7946 }, { "epoch": 0.26, "grad_norm": 0.743807852268219, "learning_rate": 1.7027531198579682e-05, "loss": 2.1363, "step": 7947 }, { "epoch": 0.26, "grad_norm": 0.7605055570602417, "learning_rate": 1.7026774980922994e-05, "loss": 2.091, "step": 7948 }, { "epoch": 0.26, "grad_norm": 0.7215128540992737, "learning_rate": 1.7026018683881918e-05, "loss": 2.1461, "step": 7949 }, { "epoch": 0.26, "grad_norm": 0.713710367679596, "learning_rate": 1.7025262307465e-05, "loss": 2.1123, "step": 7950 }, { "epoch": 0.26, "grad_norm": 0.7201887965202332, "learning_rate": 1.702450585168079e-05, "loss": 2.195, "step": 7951 }, { "epoch": 0.26, "grad_norm": 0.7062337398529053, "learning_rate": 1.7023749316537827e-05, "loss": 2.1098, "step": 7952 }, { "epoch": 0.26, "grad_norm": 0.7705143690109253, "learning_rate": 1.702299270204466e-05, "loss": 2.2221, "step": 7953 }, { "epoch": 0.26, "grad_norm": 0.7025548219680786, "learning_rate": 1.7022236008209833e-05, "loss": 2.0606, "step": 7954 }, { "epoch": 0.26, "grad_norm": 0.7815284729003906, "learning_rate": 1.7021479235041908e-05, "loss": 2.1329, "step": 7955 }, { "epoch": 0.26, "grad_norm": 0.7062655091285706, "learning_rate": 1.7020722382549418e-05, "loss": 2.1295, "step": 7956 }, { "epoch": 0.26, "grad_norm": 0.8637570738792419, "learning_rate": 1.7019965450740926e-05, "loss": 2.2093, "step": 7957 }, { "epoch": 0.26, "grad_norm": 0.7366224527359009, "learning_rate": 1.7019208439624977e-05, "loss": 2.2339, "step": 7958 }, { "epoch": 0.26, "grad_norm": 0.7094641923904419, "learning_rate": 1.7018451349210125e-05, "loss": 2.1993, "step": 7959 }, { "epoch": 0.26, "grad_norm": 0.7672893404960632, "learning_rate": 1.701769417950492e-05, "loss": 2.3043, "step": 7960 }, { "epoch": 0.26, "grad_norm": 0.6900611519813538, "learning_rate": 1.7016936930517922e-05, "loss": 2.191, "step": 7961 }, { "epoch": 0.26, "grad_norm": 0.7013009190559387, "learning_rate": 1.7016179602257682e-05, "loss": 2.1899, "step": 7962 }, { "epoch": 0.26, "grad_norm": 0.7456221580505371, "learning_rate": 1.7015422194732756e-05, "loss": 2.1829, "step": 7963 }, { "epoch": 0.26, "grad_norm": 0.7236294150352478, "learning_rate": 1.7014664707951706e-05, "loss": 2.1468, "step": 7964 }, { "epoch": 0.26, "grad_norm": 0.7210402488708496, "learning_rate": 1.7013907141923076e-05, "loss": 2.1507, "step": 7965 }, { "epoch": 0.27, "grad_norm": 0.7101784944534302, "learning_rate": 1.701314949665544e-05, "loss": 2.0943, "step": 7966 }, { "epoch": 0.27, "grad_norm": 0.7123855948448181, "learning_rate": 1.7012391772157354e-05, "loss": 2.1735, "step": 7967 }, { "epoch": 0.27, "grad_norm": 0.7276803255081177, "learning_rate": 1.7011633968437368e-05, "loss": 2.1236, "step": 7968 }, { "epoch": 0.27, "grad_norm": 0.6953967809677124, "learning_rate": 1.7010876085504057e-05, "loss": 2.1396, "step": 7969 }, { "epoch": 0.27, "grad_norm": 0.7428085803985596, "learning_rate": 1.7010118123365972e-05, "loss": 2.1059, "step": 7970 }, { "epoch": 0.27, "grad_norm": 0.7024562358856201, "learning_rate": 1.7009360082031682e-05, "loss": 2.1464, "step": 7971 }, { "epoch": 0.27, "grad_norm": 0.7196164131164551, "learning_rate": 1.7008601961509753e-05, "loss": 2.2172, "step": 7972 }, { "epoch": 0.27, "grad_norm": 0.7017200589179993, "learning_rate": 1.7007843761808742e-05, "loss": 2.1951, "step": 7973 }, { "epoch": 0.27, "grad_norm": 0.7219711542129517, "learning_rate": 1.700708548293722e-05, "loss": 2.0993, "step": 7974 }, { "epoch": 0.27, "grad_norm": 0.7260026931762695, "learning_rate": 1.7006327124903754e-05, "loss": 2.1858, "step": 7975 }, { "epoch": 0.27, "grad_norm": 0.7200074791908264, "learning_rate": 1.700556868771691e-05, "loss": 2.105, "step": 7976 }, { "epoch": 0.27, "grad_norm": 0.7227960228919983, "learning_rate": 1.7004810171385256e-05, "loss": 2.1721, "step": 7977 }, { "epoch": 0.27, "grad_norm": 0.6948323249816895, "learning_rate": 1.7004051575917364e-05, "loss": 2.1503, "step": 7978 }, { "epoch": 0.27, "grad_norm": 0.7318873405456543, "learning_rate": 1.70032929013218e-05, "loss": 2.2419, "step": 7979 }, { "epoch": 0.27, "grad_norm": 0.7254428267478943, "learning_rate": 1.7002534147607138e-05, "loss": 2.1377, "step": 7980 }, { "epoch": 0.27, "grad_norm": 0.8091462850570679, "learning_rate": 1.7001775314781948e-05, "loss": 2.0492, "step": 7981 }, { "epoch": 0.27, "grad_norm": 0.6963717341423035, "learning_rate": 1.7001016402854808e-05, "loss": 2.1075, "step": 7982 }, { "epoch": 0.27, "grad_norm": 0.7116683721542358, "learning_rate": 1.7000257411834283e-05, "loss": 2.1132, "step": 7983 }, { "epoch": 0.27, "grad_norm": 0.7118235230445862, "learning_rate": 1.6999498341728954e-05, "loss": 2.1676, "step": 7984 }, { "epoch": 0.27, "grad_norm": 0.7162246704101562, "learning_rate": 1.6998739192547394e-05, "loss": 2.106, "step": 7985 }, { "epoch": 0.27, "grad_norm": 0.7315239310264587, "learning_rate": 1.6997979964298182e-05, "loss": 2.1918, "step": 7986 }, { "epoch": 0.27, "grad_norm": 0.7427484393119812, "learning_rate": 1.6997220656989893e-05, "loss": 2.174, "step": 7987 }, { "epoch": 0.27, "grad_norm": 0.7121890187263489, "learning_rate": 1.6996461270631105e-05, "loss": 2.1346, "step": 7988 }, { "epoch": 0.27, "grad_norm": 0.7858433723449707, "learning_rate": 1.6995701805230397e-05, "loss": 2.161, "step": 7989 }, { "epoch": 0.27, "grad_norm": 0.7381325960159302, "learning_rate": 1.6994942260796353e-05, "loss": 2.0858, "step": 7990 }, { "epoch": 0.27, "grad_norm": 0.7145074605941772, "learning_rate": 1.6994182637337545e-05, "loss": 2.2733, "step": 7991 }, { "epoch": 0.27, "grad_norm": 0.7198360562324524, "learning_rate": 1.6993422934862565e-05, "loss": 2.1684, "step": 7992 }, { "epoch": 0.27, "grad_norm": 0.7395340800285339, "learning_rate": 1.6992663153379994e-05, "loss": 2.1291, "step": 7993 }, { "epoch": 0.27, "grad_norm": 0.7065789699554443, "learning_rate": 1.699190329289841e-05, "loss": 2.1054, "step": 7994 }, { "epoch": 0.27, "grad_norm": 0.7089216709136963, "learning_rate": 1.69911433534264e-05, "loss": 2.0744, "step": 7995 }, { "epoch": 0.27, "grad_norm": 0.7498329877853394, "learning_rate": 1.6990383334972548e-05, "loss": 2.1265, "step": 7996 }, { "epoch": 0.27, "grad_norm": 0.6979795098304749, "learning_rate": 1.6989623237545444e-05, "loss": 2.1121, "step": 7997 }, { "epoch": 0.27, "grad_norm": 0.7098645567893982, "learning_rate": 1.698886306115367e-05, "loss": 2.2209, "step": 7998 }, { "epoch": 0.27, "grad_norm": 0.7421597838401794, "learning_rate": 1.698810280580582e-05, "loss": 2.121, "step": 7999 }, { "epoch": 0.27, "grad_norm": 0.7312493324279785, "learning_rate": 1.698734247151048e-05, "loss": 2.123, "step": 8000 }, { "epoch": 0.27, "grad_norm": 0.7391269207000732, "learning_rate": 1.698658205827624e-05, "loss": 2.1927, "step": 8001 }, { "epoch": 0.27, "grad_norm": 0.717480480670929, "learning_rate": 1.6985821566111685e-05, "loss": 2.2087, "step": 8002 }, { "epoch": 0.27, "grad_norm": 0.7311884164810181, "learning_rate": 1.698506099502542e-05, "loss": 2.1699, "step": 8003 }, { "epoch": 0.27, "grad_norm": 0.6925169229507446, "learning_rate": 1.6984300345026026e-05, "loss": 2.1653, "step": 8004 }, { "epoch": 0.27, "grad_norm": 0.7030767202377319, "learning_rate": 1.6983539616122097e-05, "loss": 2.0961, "step": 8005 }, { "epoch": 0.27, "grad_norm": 0.7056183218955994, "learning_rate": 1.6982778808322233e-05, "loss": 2.1506, "step": 8006 }, { "epoch": 0.27, "grad_norm": 0.7171412110328674, "learning_rate": 1.698201792163503e-05, "loss": 2.1124, "step": 8007 }, { "epoch": 0.27, "grad_norm": 0.7261080145835876, "learning_rate": 1.6981256956069075e-05, "loss": 2.1752, "step": 8008 }, { "epoch": 0.27, "grad_norm": 0.7251702547073364, "learning_rate": 1.6980495911632973e-05, "loss": 2.1737, "step": 8009 }, { "epoch": 0.27, "grad_norm": 0.6978588700294495, "learning_rate": 1.6979734788335318e-05, "loss": 2.1561, "step": 8010 }, { "epoch": 0.27, "grad_norm": 0.7254132628440857, "learning_rate": 1.6978973586184707e-05, "loss": 2.1109, "step": 8011 }, { "epoch": 0.27, "grad_norm": 0.7048943638801575, "learning_rate": 1.6978212305189744e-05, "loss": 2.1732, "step": 8012 }, { "epoch": 0.27, "grad_norm": 0.7541685700416565, "learning_rate": 1.6977450945359033e-05, "loss": 2.1694, "step": 8013 }, { "epoch": 0.27, "grad_norm": 0.7133663892745972, "learning_rate": 1.6976689506701167e-05, "loss": 2.1601, "step": 8014 }, { "epoch": 0.27, "grad_norm": 0.7131994366645813, "learning_rate": 1.6975927989224753e-05, "loss": 2.1426, "step": 8015 }, { "epoch": 0.27, "grad_norm": 0.7086762189865112, "learning_rate": 1.6975166392938388e-05, "loss": 2.1216, "step": 8016 }, { "epoch": 0.27, "grad_norm": 0.718539834022522, "learning_rate": 1.6974404717850688e-05, "loss": 2.2322, "step": 8017 }, { "epoch": 0.27, "grad_norm": 0.7072193026542664, "learning_rate": 1.6973642963970245e-05, "loss": 2.1863, "step": 8018 }, { "epoch": 0.27, "grad_norm": 0.7035252451896667, "learning_rate": 1.6972881131305675e-05, "loss": 2.1708, "step": 8019 }, { "epoch": 0.27, "grad_norm": 0.7313700914382935, "learning_rate": 1.6972119219865577e-05, "loss": 2.2044, "step": 8020 }, { "epoch": 0.27, "grad_norm": 0.7427760362625122, "learning_rate": 1.6971357229658564e-05, "loss": 2.1373, "step": 8021 }, { "epoch": 0.27, "grad_norm": 0.7605355381965637, "learning_rate": 1.6970595160693242e-05, "loss": 2.2379, "step": 8022 }, { "epoch": 0.27, "grad_norm": 0.7150384783744812, "learning_rate": 1.6969833012978224e-05, "loss": 2.1413, "step": 8023 }, { "epoch": 0.27, "grad_norm": 0.7506532669067383, "learning_rate": 1.6969070786522114e-05, "loss": 2.0715, "step": 8024 }, { "epoch": 0.27, "grad_norm": 0.6972370147705078, "learning_rate": 1.6968308481333523e-05, "loss": 2.0523, "step": 8025 }, { "epoch": 0.27, "grad_norm": 0.6927030086517334, "learning_rate": 1.6967546097421073e-05, "loss": 2.1483, "step": 8026 }, { "epoch": 0.27, "grad_norm": 0.7181596159934998, "learning_rate": 1.696678363479337e-05, "loss": 2.1924, "step": 8027 }, { "epoch": 0.27, "grad_norm": 0.7039393782615662, "learning_rate": 1.6966021093459028e-05, "loss": 2.1442, "step": 8028 }, { "epoch": 0.27, "grad_norm": 0.694395899772644, "learning_rate": 1.6965258473426657e-05, "loss": 2.1439, "step": 8029 }, { "epoch": 0.27, "grad_norm": 0.689033567905426, "learning_rate": 1.6964495774704885e-05, "loss": 2.1915, "step": 8030 }, { "epoch": 0.27, "grad_norm": 0.6865559220314026, "learning_rate": 1.6963732997302317e-05, "loss": 2.1535, "step": 8031 }, { "epoch": 0.27, "grad_norm": 0.7057960033416748, "learning_rate": 1.6962970141227577e-05, "loss": 2.1348, "step": 8032 }, { "epoch": 0.27, "grad_norm": 0.7132707238197327, "learning_rate": 1.696220720648928e-05, "loss": 2.173, "step": 8033 }, { "epoch": 0.27, "grad_norm": 0.696930468082428, "learning_rate": 1.6961444193096045e-05, "loss": 2.1668, "step": 8034 }, { "epoch": 0.27, "grad_norm": 0.7106955051422119, "learning_rate": 1.6960681101056495e-05, "loss": 2.1367, "step": 8035 }, { "epoch": 0.27, "grad_norm": 0.6993038058280945, "learning_rate": 1.6959917930379248e-05, "loss": 2.1838, "step": 8036 }, { "epoch": 0.27, "grad_norm": 0.7253422737121582, "learning_rate": 1.6959154681072927e-05, "loss": 2.1525, "step": 8037 }, { "epoch": 0.27, "grad_norm": 0.7360081672668457, "learning_rate": 1.6958391353146158e-05, "loss": 2.1615, "step": 8038 }, { "epoch": 0.27, "grad_norm": 0.7317522764205933, "learning_rate": 1.695762794660756e-05, "loss": 2.1363, "step": 8039 }, { "epoch": 0.27, "grad_norm": 0.7145367860794067, "learning_rate": 1.6956864461465757e-05, "loss": 2.1493, "step": 8040 }, { "epoch": 0.27, "grad_norm": 0.7083792090415955, "learning_rate": 1.695610089772938e-05, "loss": 2.1437, "step": 8041 }, { "epoch": 0.27, "grad_norm": 0.7179403901100159, "learning_rate": 1.695533725540705e-05, "loss": 2.1711, "step": 8042 }, { "epoch": 0.27, "grad_norm": 0.6831409931182861, "learning_rate": 1.6954573534507393e-05, "loss": 2.1158, "step": 8043 }, { "epoch": 0.27, "grad_norm": 0.756127119064331, "learning_rate": 1.6953809735039045e-05, "loss": 2.1415, "step": 8044 }, { "epoch": 0.27, "grad_norm": 0.7312495708465576, "learning_rate": 1.6953045857010628e-05, "loss": 2.1293, "step": 8045 }, { "epoch": 0.27, "grad_norm": 0.7439257502555847, "learning_rate": 1.695228190043077e-05, "loss": 2.1521, "step": 8046 }, { "epoch": 0.27, "grad_norm": 0.715004563331604, "learning_rate": 1.695151786530811e-05, "loss": 2.2003, "step": 8047 }, { "epoch": 0.27, "grad_norm": 0.7093165516853333, "learning_rate": 1.6950753751651273e-05, "loss": 2.0874, "step": 8048 }, { "epoch": 0.27, "grad_norm": 0.7282658219337463, "learning_rate": 1.6949989559468892e-05, "loss": 2.1598, "step": 8049 }, { "epoch": 0.27, "grad_norm": 0.6960320472717285, "learning_rate": 1.6949225288769607e-05, "loss": 2.1254, "step": 8050 }, { "epoch": 0.27, "grad_norm": 0.6917444467544556, "learning_rate": 1.6948460939562043e-05, "loss": 2.1998, "step": 8051 }, { "epoch": 0.27, "grad_norm": 0.6972142457962036, "learning_rate": 1.6947696511854844e-05, "loss": 2.1455, "step": 8052 }, { "epoch": 0.27, "grad_norm": 0.7067061066627502, "learning_rate": 1.6946932005656638e-05, "loss": 2.1209, "step": 8053 }, { "epoch": 0.27, "grad_norm": 0.7279441952705383, "learning_rate": 1.6946167420976066e-05, "loss": 2.1664, "step": 8054 }, { "epoch": 0.27, "grad_norm": 0.7077794075012207, "learning_rate": 1.6945402757821768e-05, "loss": 2.134, "step": 8055 }, { "epoch": 0.27, "grad_norm": 0.7306479811668396, "learning_rate": 1.6944638016202376e-05, "loss": 2.2105, "step": 8056 }, { "epoch": 0.27, "grad_norm": 0.718594491481781, "learning_rate": 1.6943873196126537e-05, "loss": 2.1049, "step": 8057 }, { "epoch": 0.27, "grad_norm": 0.7279380559921265, "learning_rate": 1.6943108297602887e-05, "loss": 2.2009, "step": 8058 }, { "epoch": 0.27, "grad_norm": 0.6916021108627319, "learning_rate": 1.694234332064007e-05, "loss": 2.1807, "step": 8059 }, { "epoch": 0.27, "grad_norm": 0.7069020867347717, "learning_rate": 1.694157826524672e-05, "loss": 2.1464, "step": 8060 }, { "epoch": 0.27, "grad_norm": 0.722312331199646, "learning_rate": 1.6940813131431495e-05, "loss": 2.138, "step": 8061 }, { "epoch": 0.27, "grad_norm": 0.7316096425056458, "learning_rate": 1.6940047919203026e-05, "loss": 2.2364, "step": 8062 }, { "epoch": 0.27, "grad_norm": 0.7448049187660217, "learning_rate": 1.6939282628569967e-05, "loss": 2.1896, "step": 8063 }, { "epoch": 0.27, "grad_norm": 0.715130090713501, "learning_rate": 1.6938517259540955e-05, "loss": 2.1982, "step": 8064 }, { "epoch": 0.27, "grad_norm": 0.7153337001800537, "learning_rate": 1.6937751812124644e-05, "loss": 2.1577, "step": 8065 }, { "epoch": 0.27, "grad_norm": 0.7206545472145081, "learning_rate": 1.6936986286329678e-05, "loss": 2.154, "step": 8066 }, { "epoch": 0.27, "grad_norm": 0.6954268217086792, "learning_rate": 1.6936220682164706e-05, "loss": 2.1567, "step": 8067 }, { "epoch": 0.27, "grad_norm": 0.7146590948104858, "learning_rate": 1.6935454999638382e-05, "loss": 2.1984, "step": 8068 }, { "epoch": 0.27, "grad_norm": 0.7114245891571045, "learning_rate": 1.693468923875935e-05, "loss": 2.1612, "step": 8069 }, { "epoch": 0.27, "grad_norm": 0.7052518129348755, "learning_rate": 1.693392339953626e-05, "loss": 2.0681, "step": 8070 }, { "epoch": 0.27, "grad_norm": 0.7618337869644165, "learning_rate": 1.6933157481977768e-05, "loss": 2.1538, "step": 8071 }, { "epoch": 0.27, "grad_norm": 0.7551383972167969, "learning_rate": 1.6932391486092526e-05, "loss": 2.215, "step": 8072 }, { "epoch": 0.27, "grad_norm": 0.7414827942848206, "learning_rate": 1.6931625411889192e-05, "loss": 2.1049, "step": 8073 }, { "epoch": 0.27, "grad_norm": 0.7153434753417969, "learning_rate": 1.6930859259376412e-05, "loss": 2.2162, "step": 8074 }, { "epoch": 0.27, "grad_norm": 0.7411903738975525, "learning_rate": 1.693009302856285e-05, "loss": 2.1172, "step": 8075 }, { "epoch": 0.27, "grad_norm": 0.7050119638442993, "learning_rate": 1.6929326719457153e-05, "loss": 2.1202, "step": 8076 }, { "epoch": 0.27, "grad_norm": 0.7146358489990234, "learning_rate": 1.6928560332067988e-05, "loss": 2.0591, "step": 8077 }, { "epoch": 0.27, "grad_norm": 0.7181561589241028, "learning_rate": 1.6927793866404006e-05, "loss": 2.1103, "step": 8078 }, { "epoch": 0.27, "grad_norm": 0.7203660607337952, "learning_rate": 1.692702732247387e-05, "loss": 2.1229, "step": 8079 }, { "epoch": 0.27, "grad_norm": 0.7607026696205139, "learning_rate": 1.692626070028624e-05, "loss": 2.1658, "step": 8080 }, { "epoch": 0.27, "grad_norm": 0.7067725658416748, "learning_rate": 1.6925493999849773e-05, "loss": 2.1535, "step": 8081 }, { "epoch": 0.27, "grad_norm": 0.7124351263046265, "learning_rate": 1.6924727221173135e-05, "loss": 2.0787, "step": 8082 }, { "epoch": 0.27, "grad_norm": 0.6913341879844666, "learning_rate": 1.692396036426499e-05, "loss": 2.107, "step": 8083 }, { "epoch": 0.27, "grad_norm": 0.7637014985084534, "learning_rate": 1.6923193429133994e-05, "loss": 2.1074, "step": 8084 }, { "epoch": 0.27, "grad_norm": 0.6997377276420593, "learning_rate": 1.692242641578882e-05, "loss": 2.1823, "step": 8085 }, { "epoch": 0.27, "grad_norm": 0.7334240078926086, "learning_rate": 1.6921659324238126e-05, "loss": 2.1146, "step": 8086 }, { "epoch": 0.27, "grad_norm": 0.7359877824783325, "learning_rate": 1.6920892154490584e-05, "loss": 2.216, "step": 8087 }, { "epoch": 0.27, "grad_norm": 0.7286894917488098, "learning_rate": 1.6920124906554857e-05, "loss": 2.1528, "step": 8088 }, { "epoch": 0.27, "grad_norm": 0.7410925626754761, "learning_rate": 1.6919357580439615e-05, "loss": 2.121, "step": 8089 }, { "epoch": 0.27, "grad_norm": 0.7232389450073242, "learning_rate": 1.691859017615353e-05, "loss": 2.1989, "step": 8090 }, { "epoch": 0.27, "grad_norm": 0.7084623575210571, "learning_rate": 1.6917822693705262e-05, "loss": 2.2004, "step": 8091 }, { "epoch": 0.27, "grad_norm": 0.73396897315979, "learning_rate": 1.6917055133103487e-05, "loss": 2.1291, "step": 8092 }, { "epoch": 0.27, "grad_norm": 0.710496723651886, "learning_rate": 1.691628749435688e-05, "loss": 2.1599, "step": 8093 }, { "epoch": 0.27, "grad_norm": 0.7316484451293945, "learning_rate": 1.6915519777474113e-05, "loss": 2.1912, "step": 8094 }, { "epoch": 0.27, "grad_norm": 0.7637284994125366, "learning_rate": 1.691475198246385e-05, "loss": 2.1422, "step": 8095 }, { "epoch": 0.27, "grad_norm": 0.7148154973983765, "learning_rate": 1.6913984109334776e-05, "loss": 2.082, "step": 8096 }, { "epoch": 0.27, "grad_norm": 0.7341612577438354, "learning_rate": 1.691321615809556e-05, "loss": 2.1288, "step": 8097 }, { "epoch": 0.27, "grad_norm": 0.7228325605392456, "learning_rate": 1.691244812875488e-05, "loss": 2.1845, "step": 8098 }, { "epoch": 0.27, "grad_norm": 0.7305983901023865, "learning_rate": 1.691168002132141e-05, "loss": 2.2612, "step": 8099 }, { "epoch": 0.27, "grad_norm": 0.7235361933708191, "learning_rate": 1.6910911835803833e-05, "loss": 2.1056, "step": 8100 }, { "epoch": 0.27, "grad_norm": 0.7653730511665344, "learning_rate": 1.691014357221082e-05, "loss": 2.1176, "step": 8101 }, { "epoch": 0.27, "grad_norm": 0.7131959199905396, "learning_rate": 1.6909375230551058e-05, "loss": 2.1578, "step": 8102 }, { "epoch": 0.27, "grad_norm": 0.7110508680343628, "learning_rate": 1.6908606810833225e-05, "loss": 2.0944, "step": 8103 }, { "epoch": 0.27, "grad_norm": 0.7016122341156006, "learning_rate": 1.6907838313065998e-05, "loss": 2.1732, "step": 8104 }, { "epoch": 0.27, "grad_norm": 0.6847202181816101, "learning_rate": 1.6907069737258065e-05, "loss": 2.1049, "step": 8105 }, { "epoch": 0.27, "grad_norm": 0.7419757843017578, "learning_rate": 1.6906301083418106e-05, "loss": 2.2233, "step": 8106 }, { "epoch": 0.27, "grad_norm": 0.7221055626869202, "learning_rate": 1.69055323515548e-05, "loss": 2.157, "step": 8107 }, { "epoch": 0.27, "grad_norm": 0.7006413340568542, "learning_rate": 1.690476354167684e-05, "loss": 2.1716, "step": 8108 }, { "epoch": 0.27, "grad_norm": 0.7043401598930359, "learning_rate": 1.690399465379291e-05, "loss": 2.1418, "step": 8109 }, { "epoch": 0.27, "grad_norm": 0.7013598084449768, "learning_rate": 1.6903225687911692e-05, "loss": 2.1057, "step": 8110 }, { "epoch": 0.27, "grad_norm": 0.6882937550544739, "learning_rate": 1.6902456644041877e-05, "loss": 2.135, "step": 8111 }, { "epoch": 0.27, "grad_norm": 0.7238315939903259, "learning_rate": 1.6901687522192152e-05, "loss": 2.1692, "step": 8112 }, { "epoch": 0.27, "grad_norm": 0.7501814961433411, "learning_rate": 1.6900918322371204e-05, "loss": 2.1871, "step": 8113 }, { "epoch": 0.27, "grad_norm": 0.6972922682762146, "learning_rate": 1.6900149044587728e-05, "loss": 2.0657, "step": 8114 }, { "epoch": 0.27, "grad_norm": 0.7206215858459473, "learning_rate": 1.6899379688850407e-05, "loss": 2.1329, "step": 8115 }, { "epoch": 0.27, "grad_norm": 0.7121152877807617, "learning_rate": 1.6898610255167945e-05, "loss": 2.0767, "step": 8116 }, { "epoch": 0.27, "grad_norm": 0.712029218673706, "learning_rate": 1.6897840743549023e-05, "loss": 2.2257, "step": 8117 }, { "epoch": 0.27, "grad_norm": 0.7147645950317383, "learning_rate": 1.689707115400234e-05, "loss": 2.1688, "step": 8118 }, { "epoch": 0.27, "grad_norm": 0.7088053822517395, "learning_rate": 1.6896301486536588e-05, "loss": 2.066, "step": 8119 }, { "epoch": 0.27, "grad_norm": 0.7544865012168884, "learning_rate": 1.6895531741160465e-05, "loss": 2.1453, "step": 8120 }, { "epoch": 0.27, "grad_norm": 0.6798065900802612, "learning_rate": 1.6894761917882665e-05, "loss": 2.1154, "step": 8121 }, { "epoch": 0.27, "grad_norm": 0.7167335152626038, "learning_rate": 1.6893992016711885e-05, "loss": 2.1372, "step": 8122 }, { "epoch": 0.27, "grad_norm": 0.7570890784263611, "learning_rate": 1.6893222037656825e-05, "loss": 2.1945, "step": 8123 }, { "epoch": 0.27, "grad_norm": 0.7210062742233276, "learning_rate": 1.6892451980726182e-05, "loss": 2.1511, "step": 8124 }, { "epoch": 0.27, "grad_norm": 0.7151951789855957, "learning_rate": 1.6891681845928654e-05, "loss": 2.1267, "step": 8125 }, { "epoch": 0.27, "grad_norm": 0.7219679355621338, "learning_rate": 1.6890911633272942e-05, "loss": 2.1799, "step": 8126 }, { "epoch": 0.27, "grad_norm": 0.6984708309173584, "learning_rate": 1.689014134276775e-05, "loss": 2.125, "step": 8127 }, { "epoch": 0.27, "grad_norm": 0.6969929933547974, "learning_rate": 1.6889370974421782e-05, "loss": 2.1218, "step": 8128 }, { "epoch": 0.27, "grad_norm": 0.7126932144165039, "learning_rate": 1.688860052824374e-05, "loss": 2.207, "step": 8129 }, { "epoch": 0.27, "grad_norm": 0.7355309724807739, "learning_rate": 1.688783000424232e-05, "loss": 2.1554, "step": 8130 }, { "epoch": 0.27, "grad_norm": 0.7027552127838135, "learning_rate": 1.6887059402426235e-05, "loss": 2.1381, "step": 8131 }, { "epoch": 0.27, "grad_norm": 0.7012249827384949, "learning_rate": 1.688628872280419e-05, "loss": 2.1112, "step": 8132 }, { "epoch": 0.27, "grad_norm": 0.7139543294906616, "learning_rate": 1.6885517965384893e-05, "loss": 2.1294, "step": 8133 }, { "epoch": 0.27, "grad_norm": 0.7407210469245911, "learning_rate": 1.6884747130177048e-05, "loss": 2.1616, "step": 8134 }, { "epoch": 0.27, "grad_norm": 0.7053178548812866, "learning_rate": 1.6883976217189365e-05, "loss": 2.1059, "step": 8135 }, { "epoch": 0.27, "grad_norm": 0.7465366125106812, "learning_rate": 1.6883205226430553e-05, "loss": 2.1996, "step": 8136 }, { "epoch": 0.27, "grad_norm": 0.7339368462562561, "learning_rate": 1.688243415790932e-05, "loss": 2.0866, "step": 8137 }, { "epoch": 0.27, "grad_norm": 0.7101246118545532, "learning_rate": 1.688166301163438e-05, "loss": 2.1291, "step": 8138 }, { "epoch": 0.27, "grad_norm": 0.7082968354225159, "learning_rate": 1.6880891787614445e-05, "loss": 2.1409, "step": 8139 }, { "epoch": 0.27, "grad_norm": 0.7251446843147278, "learning_rate": 1.688012048585823e-05, "loss": 2.1302, "step": 8140 }, { "epoch": 0.27, "grad_norm": 0.7157198190689087, "learning_rate": 1.6879349106374443e-05, "loss": 2.2018, "step": 8141 }, { "epoch": 0.27, "grad_norm": 0.7369323372840881, "learning_rate": 1.6878577649171802e-05, "loss": 2.1875, "step": 8142 }, { "epoch": 0.27, "grad_norm": 0.7154991030693054, "learning_rate": 1.687780611425902e-05, "loss": 2.1027, "step": 8143 }, { "epoch": 0.27, "grad_norm": 0.7154111266136169, "learning_rate": 1.6877034501644818e-05, "loss": 2.1324, "step": 8144 }, { "epoch": 0.27, "grad_norm": 0.7388401627540588, "learning_rate": 1.687626281133791e-05, "loss": 2.1719, "step": 8145 }, { "epoch": 0.27, "grad_norm": 0.709808886051178, "learning_rate": 1.6875491043347016e-05, "loss": 2.0829, "step": 8146 }, { "epoch": 0.27, "grad_norm": 0.7194606065750122, "learning_rate": 1.6874719197680852e-05, "loss": 2.1545, "step": 8147 }, { "epoch": 0.27, "grad_norm": 0.7188669443130493, "learning_rate": 1.6873947274348137e-05, "loss": 2.1875, "step": 8148 }, { "epoch": 0.27, "grad_norm": 0.7057079076766968, "learning_rate": 1.6873175273357598e-05, "loss": 2.1571, "step": 8149 }, { "epoch": 0.27, "grad_norm": 0.7026749849319458, "learning_rate": 1.6872403194717953e-05, "loss": 2.1094, "step": 8150 }, { "epoch": 0.27, "grad_norm": 0.7317450642585754, "learning_rate": 1.6871631038437922e-05, "loss": 2.1423, "step": 8151 }, { "epoch": 0.27, "grad_norm": 0.717634916305542, "learning_rate": 1.687085880452623e-05, "loss": 2.1577, "step": 8152 }, { "epoch": 0.27, "grad_norm": 0.6946392059326172, "learning_rate": 1.6870086492991605e-05, "loss": 2.0983, "step": 8153 }, { "epoch": 0.27, "grad_norm": 0.7069228887557983, "learning_rate": 1.686931410384277e-05, "loss": 2.1568, "step": 8154 }, { "epoch": 0.27, "grad_norm": 0.7103220820426941, "learning_rate": 1.6868541637088447e-05, "loss": 2.1689, "step": 8155 }, { "epoch": 0.27, "grad_norm": 0.700924277305603, "learning_rate": 1.6867769092737365e-05, "loss": 2.0884, "step": 8156 }, { "epoch": 0.27, "grad_norm": 0.7578829526901245, "learning_rate": 1.6866996470798255e-05, "loss": 2.1409, "step": 8157 }, { "epoch": 0.27, "grad_norm": 0.6955701112747192, "learning_rate": 1.686622377127984e-05, "loss": 2.1014, "step": 8158 }, { "epoch": 0.27, "grad_norm": 0.719598114490509, "learning_rate": 1.6865450994190857e-05, "loss": 2.1266, "step": 8159 }, { "epoch": 0.27, "grad_norm": 0.7287486791610718, "learning_rate": 1.6864678139540033e-05, "loss": 2.1767, "step": 8160 }, { "epoch": 0.27, "grad_norm": 0.7337954640388489, "learning_rate": 1.6863905207336095e-05, "loss": 2.1465, "step": 8161 }, { "epoch": 0.27, "grad_norm": 0.7136723399162292, "learning_rate": 1.686313219758778e-05, "loss": 2.1723, "step": 8162 }, { "epoch": 0.27, "grad_norm": 0.7117597460746765, "learning_rate": 1.686235911030382e-05, "loss": 2.1984, "step": 8163 }, { "epoch": 0.27, "grad_norm": 0.7197886109352112, "learning_rate": 1.6861585945492945e-05, "loss": 2.1736, "step": 8164 }, { "epoch": 0.27, "grad_norm": 0.7394952774047852, "learning_rate": 1.68608127031639e-05, "loss": 2.1479, "step": 8165 }, { "epoch": 0.27, "grad_norm": 0.7111626863479614, "learning_rate": 1.686003938332541e-05, "loss": 2.1732, "step": 8166 }, { "epoch": 0.27, "grad_norm": 0.7040988206863403, "learning_rate": 1.6859265985986213e-05, "loss": 2.1797, "step": 8167 }, { "epoch": 0.27, "grad_norm": 0.7538870573043823, "learning_rate": 1.6858492511155052e-05, "loss": 2.1488, "step": 8168 }, { "epoch": 0.27, "grad_norm": 0.6979429125785828, "learning_rate": 1.685771895884066e-05, "loss": 2.1244, "step": 8169 }, { "epoch": 0.27, "grad_norm": 0.699279248714447, "learning_rate": 1.6856945329051782e-05, "loss": 2.1317, "step": 8170 }, { "epoch": 0.27, "grad_norm": 0.7493459582328796, "learning_rate": 1.6856171621797153e-05, "loss": 2.1604, "step": 8171 }, { "epoch": 0.27, "grad_norm": 0.7153160572052002, "learning_rate": 1.6855397837085514e-05, "loss": 2.1766, "step": 8172 }, { "epoch": 0.27, "grad_norm": 0.7087304592132568, "learning_rate": 1.6854623974925607e-05, "loss": 2.1578, "step": 8173 }, { "epoch": 0.27, "grad_norm": 0.7083562016487122, "learning_rate": 1.6853850035326174e-05, "loss": 2.1075, "step": 8174 }, { "epoch": 0.27, "grad_norm": 0.7284536957740784, "learning_rate": 1.6853076018295963e-05, "loss": 2.1145, "step": 8175 }, { "epoch": 0.27, "grad_norm": 0.7345038056373596, "learning_rate": 1.685230192384372e-05, "loss": 2.2218, "step": 8176 }, { "epoch": 0.27, "grad_norm": 0.73847895860672, "learning_rate": 1.6851527751978175e-05, "loss": 2.2136, "step": 8177 }, { "epoch": 0.27, "grad_norm": 0.7068412899971008, "learning_rate": 1.6850753502708094e-05, "loss": 2.1286, "step": 8178 }, { "epoch": 0.27, "grad_norm": 0.6908994913101196, "learning_rate": 1.684997917604221e-05, "loss": 2.1152, "step": 8179 }, { "epoch": 0.27, "grad_norm": 0.7088663578033447, "learning_rate": 1.6849204771989276e-05, "loss": 2.1014, "step": 8180 }, { "epoch": 0.27, "grad_norm": 0.716882586479187, "learning_rate": 1.6848430290558046e-05, "loss": 2.177, "step": 8181 }, { "epoch": 0.27, "grad_norm": 0.7196425795555115, "learning_rate": 1.6847655731757257e-05, "loss": 2.2238, "step": 8182 }, { "epoch": 0.27, "grad_norm": 0.7443209290504456, "learning_rate": 1.684688109559567e-05, "loss": 2.1341, "step": 8183 }, { "epoch": 0.27, "grad_norm": 0.7341402173042297, "learning_rate": 1.6846106382082032e-05, "loss": 2.1472, "step": 8184 }, { "epoch": 0.27, "grad_norm": 0.7248502969741821, "learning_rate": 1.6845331591225095e-05, "loss": 2.1305, "step": 8185 }, { "epoch": 0.27, "grad_norm": 0.7372217774391174, "learning_rate": 1.684455672303362e-05, "loss": 2.1903, "step": 8186 }, { "epoch": 0.27, "grad_norm": 0.7091234922409058, "learning_rate": 1.6843781777516344e-05, "loss": 2.1907, "step": 8187 }, { "epoch": 0.27, "grad_norm": 0.7076736688613892, "learning_rate": 1.684300675468204e-05, "loss": 2.1589, "step": 8188 }, { "epoch": 0.27, "grad_norm": 0.7167781591415405, "learning_rate": 1.6842231654539456e-05, "loss": 2.1733, "step": 8189 }, { "epoch": 0.27, "grad_norm": 0.7146630883216858, "learning_rate": 1.6841456477097343e-05, "loss": 2.1145, "step": 8190 }, { "epoch": 0.27, "grad_norm": 0.7279488444328308, "learning_rate": 1.684068122236447e-05, "loss": 2.1362, "step": 8191 }, { "epoch": 0.27, "grad_norm": 0.6903893947601318, "learning_rate": 1.6839905890349587e-05, "loss": 2.1346, "step": 8192 }, { "epoch": 0.27, "grad_norm": 0.7163598537445068, "learning_rate": 1.6839130481061458e-05, "loss": 2.1218, "step": 8193 }, { "epoch": 0.27, "grad_norm": 0.7111249566078186, "learning_rate": 1.683835499450884e-05, "loss": 2.1122, "step": 8194 }, { "epoch": 0.27, "grad_norm": 0.7025899887084961, "learning_rate": 1.683757943070049e-05, "loss": 2.1729, "step": 8195 }, { "epoch": 0.27, "grad_norm": 0.7012423276901245, "learning_rate": 1.6836803789645184e-05, "loss": 2.1226, "step": 8196 }, { "epoch": 0.27, "grad_norm": 0.7256345748901367, "learning_rate": 1.683602807135167e-05, "loss": 2.2321, "step": 8197 }, { "epoch": 0.27, "grad_norm": 0.7525830864906311, "learning_rate": 1.6835252275828717e-05, "loss": 2.132, "step": 8198 }, { "epoch": 0.27, "grad_norm": 0.6893534660339355, "learning_rate": 1.683447640308509e-05, "loss": 2.123, "step": 8199 }, { "epoch": 0.27, "grad_norm": 0.7206797003746033, "learning_rate": 1.6833700453129553e-05, "loss": 2.1569, "step": 8200 }, { "epoch": 0.27, "grad_norm": 0.7020494937896729, "learning_rate": 1.683292442597088e-05, "loss": 2.1894, "step": 8201 }, { "epoch": 0.27, "grad_norm": 0.7730892300605774, "learning_rate": 1.6832148321617823e-05, "loss": 2.1399, "step": 8202 }, { "epoch": 0.27, "grad_norm": 0.7217729687690735, "learning_rate": 1.6831372140079164e-05, "loss": 2.1365, "step": 8203 }, { "epoch": 0.27, "grad_norm": 0.728108286857605, "learning_rate": 1.683059588136366e-05, "loss": 2.1405, "step": 8204 }, { "epoch": 0.27, "grad_norm": 0.6715365648269653, "learning_rate": 1.6829819545480092e-05, "loss": 2.2119, "step": 8205 }, { "epoch": 0.27, "grad_norm": 0.710647463798523, "learning_rate": 1.6829043132437225e-05, "loss": 2.1001, "step": 8206 }, { "epoch": 0.27, "grad_norm": 0.7033864259719849, "learning_rate": 1.6828266642243826e-05, "loss": 2.1681, "step": 8207 }, { "epoch": 0.27, "grad_norm": 0.7252451181411743, "learning_rate": 1.6827490074908677e-05, "loss": 2.1136, "step": 8208 }, { "epoch": 0.27, "grad_norm": 0.7354610562324524, "learning_rate": 1.6826713430440546e-05, "loss": 2.1045, "step": 8209 }, { "epoch": 0.27, "grad_norm": 0.7123645544052124, "learning_rate": 1.6825936708848205e-05, "loss": 2.1464, "step": 8210 }, { "epoch": 0.27, "grad_norm": 0.7007272243499756, "learning_rate": 1.6825159910140433e-05, "loss": 2.1523, "step": 8211 }, { "epoch": 0.27, "grad_norm": 0.703607976436615, "learning_rate": 1.6824383034326005e-05, "loss": 2.136, "step": 8212 }, { "epoch": 0.27, "grad_norm": 0.6920743584632874, "learning_rate": 1.6823606081413696e-05, "loss": 2.1082, "step": 8213 }, { "epoch": 0.27, "grad_norm": 0.7157577276229858, "learning_rate": 1.6822829051412285e-05, "loss": 2.1073, "step": 8214 }, { "epoch": 0.27, "grad_norm": 0.731047511100769, "learning_rate": 1.682205194433055e-05, "loss": 2.1453, "step": 8215 }, { "epoch": 0.27, "grad_norm": 0.7084121108055115, "learning_rate": 1.682127476017727e-05, "loss": 2.1385, "step": 8216 }, { "epoch": 0.27, "grad_norm": 0.6725792288780212, "learning_rate": 1.682049749896123e-05, "loss": 2.1152, "step": 8217 }, { "epoch": 0.27, "grad_norm": 0.7226374745368958, "learning_rate": 1.68197201606912e-05, "loss": 2.1444, "step": 8218 }, { "epoch": 0.27, "grad_norm": 0.7008115649223328, "learning_rate": 1.681894274537597e-05, "loss": 2.1081, "step": 8219 }, { "epoch": 0.27, "grad_norm": 0.7104893326759338, "learning_rate": 1.6818165253024322e-05, "loss": 2.1692, "step": 8220 }, { "epoch": 0.27, "grad_norm": 0.6952874660491943, "learning_rate": 1.681738768364504e-05, "loss": 2.0989, "step": 8221 }, { "epoch": 0.27, "grad_norm": 0.7030425667762756, "learning_rate": 1.6816610037246903e-05, "loss": 2.0449, "step": 8222 }, { "epoch": 0.27, "grad_norm": 0.7712633609771729, "learning_rate": 1.6815832313838704e-05, "loss": 2.0826, "step": 8223 }, { "epoch": 0.27, "grad_norm": 0.7134078741073608, "learning_rate": 1.6815054513429226e-05, "loss": 2.1914, "step": 8224 }, { "epoch": 0.27, "grad_norm": 0.7146320343017578, "learning_rate": 1.6814276636027255e-05, "loss": 2.1565, "step": 8225 }, { "epoch": 0.27, "grad_norm": 0.7184485793113708, "learning_rate": 1.681349868164158e-05, "loss": 2.1965, "step": 8226 }, { "epoch": 0.27, "grad_norm": 0.7314838767051697, "learning_rate": 1.6812720650280994e-05, "loss": 2.1331, "step": 8227 }, { "epoch": 0.27, "grad_norm": 0.7233169674873352, "learning_rate": 1.6811942541954277e-05, "loss": 2.1323, "step": 8228 }, { "epoch": 0.27, "grad_norm": 0.70633864402771, "learning_rate": 1.681116435667023e-05, "loss": 2.0925, "step": 8229 }, { "epoch": 0.27, "grad_norm": 0.7111966013908386, "learning_rate": 1.6810386094437637e-05, "loss": 2.1374, "step": 8230 }, { "epoch": 0.27, "grad_norm": 0.7626006007194519, "learning_rate": 1.680960775526529e-05, "loss": 2.1396, "step": 8231 }, { "epoch": 0.27, "grad_norm": 0.6895802617073059, "learning_rate": 1.680882933916199e-05, "loss": 2.0748, "step": 8232 }, { "epoch": 0.27, "grad_norm": 0.7243805527687073, "learning_rate": 1.6808050846136528e-05, "loss": 2.1607, "step": 8233 }, { "epoch": 0.27, "grad_norm": 0.7030206322669983, "learning_rate": 1.6807272276197696e-05, "loss": 2.2233, "step": 8234 }, { "epoch": 0.27, "grad_norm": 0.7152889966964722, "learning_rate": 1.680649362935429e-05, "loss": 2.1177, "step": 8235 }, { "epoch": 0.27, "grad_norm": 0.6976685523986816, "learning_rate": 1.680571490561511e-05, "loss": 2.1297, "step": 8236 }, { "epoch": 0.27, "grad_norm": 0.6971706748008728, "learning_rate": 1.680493610498895e-05, "loss": 2.1723, "step": 8237 }, { "epoch": 0.27, "grad_norm": 0.7076480388641357, "learning_rate": 1.6804157227484606e-05, "loss": 2.2193, "step": 8238 }, { "epoch": 0.27, "grad_norm": 0.704615592956543, "learning_rate": 1.680337827311089e-05, "loss": 2.108, "step": 8239 }, { "epoch": 0.27, "grad_norm": 0.7076357007026672, "learning_rate": 1.680259924187659e-05, "loss": 2.1563, "step": 8240 }, { "epoch": 0.27, "grad_norm": 0.7415187358856201, "learning_rate": 1.6801820133790504e-05, "loss": 2.2339, "step": 8241 }, { "epoch": 0.27, "grad_norm": 0.6887537240982056, "learning_rate": 1.6801040948861446e-05, "loss": 2.1614, "step": 8242 }, { "epoch": 0.27, "grad_norm": 0.728999674320221, "learning_rate": 1.6800261687098215e-05, "loss": 2.172, "step": 8243 }, { "epoch": 0.27, "grad_norm": 0.7224252820014954, "learning_rate": 1.6799482348509607e-05, "loss": 2.1779, "step": 8244 }, { "epoch": 0.27, "grad_norm": 0.7017470598220825, "learning_rate": 1.679870293310444e-05, "loss": 2.075, "step": 8245 }, { "epoch": 0.27, "grad_norm": 0.7249842882156372, "learning_rate": 1.6797923440891505e-05, "loss": 2.1235, "step": 8246 }, { "epoch": 0.27, "grad_norm": 0.6951231956481934, "learning_rate": 1.679714387187962e-05, "loss": 2.1208, "step": 8247 }, { "epoch": 0.27, "grad_norm": 0.7260183095932007, "learning_rate": 1.6796364226077582e-05, "loss": 2.1822, "step": 8248 }, { "epoch": 0.27, "grad_norm": 0.7375438809394836, "learning_rate": 1.6795584503494207e-05, "loss": 2.165, "step": 8249 }, { "epoch": 0.27, "grad_norm": 0.7089533805847168, "learning_rate": 1.67948047041383e-05, "loss": 2.1819, "step": 8250 }, { "epoch": 0.27, "grad_norm": 0.7065329551696777, "learning_rate": 1.6794024828018675e-05, "loss": 2.1518, "step": 8251 }, { "epoch": 0.27, "grad_norm": 0.7169740796089172, "learning_rate": 1.6793244875144132e-05, "loss": 2.1594, "step": 8252 }, { "epoch": 0.27, "grad_norm": 0.6889315843582153, "learning_rate": 1.6792464845523494e-05, "loss": 2.0848, "step": 8253 }, { "epoch": 0.27, "grad_norm": 0.7495715618133545, "learning_rate": 1.679168473916557e-05, "loss": 2.1362, "step": 8254 }, { "epoch": 0.27, "grad_norm": 0.7213800549507141, "learning_rate": 1.679090455607917e-05, "loss": 2.1531, "step": 8255 }, { "epoch": 0.27, "grad_norm": 0.6942418217658997, "learning_rate": 1.6790124296273114e-05, "loss": 2.1153, "step": 8256 }, { "epoch": 0.27, "grad_norm": 0.7012165188789368, "learning_rate": 1.6789343959756207e-05, "loss": 2.1485, "step": 8257 }, { "epoch": 0.27, "grad_norm": 0.751469075679779, "learning_rate": 1.6788563546537275e-05, "loss": 2.071, "step": 8258 }, { "epoch": 0.27, "grad_norm": 0.7323615550994873, "learning_rate": 1.678778305662513e-05, "loss": 2.1121, "step": 8259 }, { "epoch": 0.27, "grad_norm": 0.7156904339790344, "learning_rate": 1.678700249002859e-05, "loss": 2.1633, "step": 8260 }, { "epoch": 0.27, "grad_norm": 0.7246262431144714, "learning_rate": 1.6786221846756472e-05, "loss": 2.1547, "step": 8261 }, { "epoch": 0.27, "grad_norm": 0.7387124300003052, "learning_rate": 1.6785441126817596e-05, "loss": 2.1047, "step": 8262 }, { "epoch": 0.27, "grad_norm": 0.7257428169250488, "learning_rate": 1.6784660330220788e-05, "loss": 2.1553, "step": 8263 }, { "epoch": 0.27, "grad_norm": 0.7381401062011719, "learning_rate": 1.678387945697486e-05, "loss": 2.0597, "step": 8264 }, { "epoch": 0.27, "grad_norm": 0.7107391357421875, "learning_rate": 1.678309850708864e-05, "loss": 2.1646, "step": 8265 }, { "epoch": 0.28, "grad_norm": 0.7003684639930725, "learning_rate": 1.6782317480570943e-05, "loss": 2.1272, "step": 8266 }, { "epoch": 0.28, "grad_norm": 0.6960376501083374, "learning_rate": 1.6781536377430604e-05, "loss": 2.1271, "step": 8267 }, { "epoch": 0.28, "grad_norm": 0.7366856336593628, "learning_rate": 1.6780755197676437e-05, "loss": 2.1617, "step": 8268 }, { "epoch": 0.28, "grad_norm": 0.6857305765151978, "learning_rate": 1.6779973941317274e-05, "loss": 2.1715, "step": 8269 }, { "epoch": 0.28, "grad_norm": 0.6927384734153748, "learning_rate": 1.6779192608361938e-05, "loss": 2.1503, "step": 8270 }, { "epoch": 0.28, "grad_norm": 0.7158834934234619, "learning_rate": 1.6778411198819258e-05, "loss": 2.1308, "step": 8271 }, { "epoch": 0.28, "grad_norm": 0.6907369494438171, "learning_rate": 1.677762971269806e-05, "loss": 2.1912, "step": 8272 }, { "epoch": 0.28, "grad_norm": 0.7156636714935303, "learning_rate": 1.6776848150007173e-05, "loss": 2.143, "step": 8273 }, { "epoch": 0.28, "grad_norm": 0.7007836699485779, "learning_rate": 1.677606651075543e-05, "loss": 2.0942, "step": 8274 }, { "epoch": 0.28, "grad_norm": 0.7127761244773865, "learning_rate": 1.6775284794951654e-05, "loss": 2.0989, "step": 8275 }, { "epoch": 0.28, "grad_norm": 0.7096700668334961, "learning_rate": 1.6774503002604686e-05, "loss": 2.2125, "step": 8276 }, { "epoch": 0.28, "grad_norm": 0.6989270448684692, "learning_rate": 1.6773721133723347e-05, "loss": 2.0727, "step": 8277 }, { "epoch": 0.28, "grad_norm": 0.7447015643119812, "learning_rate": 1.6772939188316485e-05, "loss": 2.1774, "step": 8278 }, { "epoch": 0.28, "grad_norm": 0.7023367285728455, "learning_rate": 1.6772157166392918e-05, "loss": 2.1555, "step": 8279 }, { "epoch": 0.28, "grad_norm": 0.6987752318382263, "learning_rate": 1.6771375067961494e-05, "loss": 2.2008, "step": 8280 }, { "epoch": 0.28, "grad_norm": 0.6770157217979431, "learning_rate": 1.6770592893031037e-05, "loss": 2.1727, "step": 8281 }, { "epoch": 0.28, "grad_norm": 0.7365054488182068, "learning_rate": 1.6769810641610392e-05, "loss": 2.2102, "step": 8282 }, { "epoch": 0.28, "grad_norm": 0.6906271576881409, "learning_rate": 1.6769028313708394e-05, "loss": 2.161, "step": 8283 }, { "epoch": 0.28, "grad_norm": 0.7183332443237305, "learning_rate": 1.676824590933388e-05, "loss": 2.1348, "step": 8284 }, { "epoch": 0.28, "grad_norm": 0.7114660143852234, "learning_rate": 1.6767463428495693e-05, "loss": 2.1544, "step": 8285 }, { "epoch": 0.28, "grad_norm": 0.7062199115753174, "learning_rate": 1.676668087120267e-05, "loss": 2.1454, "step": 8286 }, { "epoch": 0.28, "grad_norm": 0.6940359473228455, "learning_rate": 1.676589823746365e-05, "loss": 2.1202, "step": 8287 }, { "epoch": 0.28, "grad_norm": 0.7147840857505798, "learning_rate": 1.676511552728748e-05, "loss": 2.1315, "step": 8288 }, { "epoch": 0.28, "grad_norm": 0.7117205262184143, "learning_rate": 1.6764332740682996e-05, "loss": 2.13, "step": 8289 }, { "epoch": 0.28, "grad_norm": 0.7808775901794434, "learning_rate": 1.6763549877659047e-05, "loss": 2.1542, "step": 8290 }, { "epoch": 0.28, "grad_norm": 0.7333183884620667, "learning_rate": 1.6762766938224476e-05, "loss": 2.1379, "step": 8291 }, { "epoch": 0.28, "grad_norm": 0.7025657892227173, "learning_rate": 1.6761983922388125e-05, "loss": 2.0825, "step": 8292 }, { "epoch": 0.28, "grad_norm": 0.7131985425949097, "learning_rate": 1.6761200830158843e-05, "loss": 2.1719, "step": 8293 }, { "epoch": 0.28, "grad_norm": 0.7128108739852905, "learning_rate": 1.6760417661545477e-05, "loss": 2.1223, "step": 8294 }, { "epoch": 0.28, "grad_norm": 0.7385960221290588, "learning_rate": 1.6759634416556876e-05, "loss": 2.1175, "step": 8295 }, { "epoch": 0.28, "grad_norm": 0.7357921600341797, "learning_rate": 1.6758851095201888e-05, "loss": 2.1118, "step": 8296 }, { "epoch": 0.28, "grad_norm": 0.718222439289093, "learning_rate": 1.6758067697489356e-05, "loss": 2.1226, "step": 8297 }, { "epoch": 0.28, "grad_norm": 0.6995694041252136, "learning_rate": 1.675728422342814e-05, "loss": 2.1624, "step": 8298 }, { "epoch": 0.28, "grad_norm": 0.7512602806091309, "learning_rate": 1.6756500673027085e-05, "loss": 2.2035, "step": 8299 }, { "epoch": 0.28, "grad_norm": 0.7235705256462097, "learning_rate": 1.6755717046295046e-05, "loss": 2.1831, "step": 8300 }, { "epoch": 0.28, "grad_norm": 0.7105406522750854, "learning_rate": 1.6754933343240876e-05, "loss": 2.1052, "step": 8301 }, { "epoch": 0.28, "grad_norm": 0.7159382104873657, "learning_rate": 1.6754149563873428e-05, "loss": 2.1358, "step": 8302 }, { "epoch": 0.28, "grad_norm": 0.7522639036178589, "learning_rate": 1.6753365708201552e-05, "loss": 2.2017, "step": 8303 }, { "epoch": 0.28, "grad_norm": 0.7046844363212585, "learning_rate": 1.6752581776234113e-05, "loss": 2.1157, "step": 8304 }, { "epoch": 0.28, "grad_norm": 0.7020446062088013, "learning_rate": 1.6751797767979958e-05, "loss": 2.1251, "step": 8305 }, { "epoch": 0.28, "grad_norm": 0.7152548432350159, "learning_rate": 1.6751013683447953e-05, "loss": 2.1626, "step": 8306 }, { "epoch": 0.28, "grad_norm": 0.7360692620277405, "learning_rate": 1.675022952264695e-05, "loss": 2.1307, "step": 8307 }, { "epoch": 0.28, "grad_norm": 0.7157758474349976, "learning_rate": 1.674944528558581e-05, "loss": 2.1199, "step": 8308 }, { "epoch": 0.28, "grad_norm": 0.7382952570915222, "learning_rate": 1.6748660972273394e-05, "loss": 2.1937, "step": 8309 }, { "epoch": 0.28, "grad_norm": 0.7215338349342346, "learning_rate": 1.674787658271856e-05, "loss": 2.1216, "step": 8310 }, { "epoch": 0.28, "grad_norm": 0.7211667895317078, "learning_rate": 1.6747092116930173e-05, "loss": 2.1462, "step": 8311 }, { "epoch": 0.28, "grad_norm": 0.7023290395736694, "learning_rate": 1.6746307574917093e-05, "loss": 2.1455, "step": 8312 }, { "epoch": 0.28, "grad_norm": 0.73150634765625, "learning_rate": 1.6745522956688184e-05, "loss": 2.1186, "step": 8313 }, { "epoch": 0.28, "grad_norm": 0.697403609752655, "learning_rate": 1.6744738262252303e-05, "loss": 2.1688, "step": 8314 }, { "epoch": 0.28, "grad_norm": 0.8051870465278625, "learning_rate": 1.674395349161833e-05, "loss": 2.0316, "step": 8315 }, { "epoch": 0.28, "grad_norm": 0.7292209267616272, "learning_rate": 1.6743168644795122e-05, "loss": 2.1886, "step": 8316 }, { "epoch": 0.28, "grad_norm": 0.7143980860710144, "learning_rate": 1.6742383721791544e-05, "loss": 2.2072, "step": 8317 }, { "epoch": 0.28, "grad_norm": 0.7262305021286011, "learning_rate": 1.674159872261647e-05, "loss": 2.1488, "step": 8318 }, { "epoch": 0.28, "grad_norm": 0.7264353036880493, "learning_rate": 1.6740813647278756e-05, "loss": 2.1718, "step": 8319 }, { "epoch": 0.28, "grad_norm": 0.6919220685958862, "learning_rate": 1.674002849578729e-05, "loss": 2.1457, "step": 8320 }, { "epoch": 0.28, "grad_norm": 0.7476584315299988, "learning_rate": 1.6739243268150924e-05, "loss": 2.1521, "step": 8321 }, { "epoch": 0.28, "grad_norm": 0.7002483010292053, "learning_rate": 1.6738457964378545e-05, "loss": 2.1546, "step": 8322 }, { "epoch": 0.28, "grad_norm": 0.7533687353134155, "learning_rate": 1.673767258447901e-05, "loss": 2.1098, "step": 8323 }, { "epoch": 0.28, "grad_norm": 0.7503271698951721, "learning_rate": 1.6736887128461203e-05, "loss": 2.0784, "step": 8324 }, { "epoch": 0.28, "grad_norm": 0.7119193077087402, "learning_rate": 1.673610159633399e-05, "loss": 2.1547, "step": 8325 }, { "epoch": 0.28, "grad_norm": 0.7029410004615784, "learning_rate": 1.673531598810625e-05, "loss": 2.1347, "step": 8326 }, { "epoch": 0.28, "grad_norm": 0.726902186870575, "learning_rate": 1.673453030378686e-05, "loss": 2.1404, "step": 8327 }, { "epoch": 0.28, "grad_norm": 0.6724271178245544, "learning_rate": 1.6733744543384693e-05, "loss": 2.1356, "step": 8328 }, { "epoch": 0.28, "grad_norm": 0.7225965261459351, "learning_rate": 1.6732958706908628e-05, "loss": 2.0981, "step": 8329 }, { "epoch": 0.28, "grad_norm": 0.7144973874092102, "learning_rate": 1.6732172794367538e-05, "loss": 2.175, "step": 8330 }, { "epoch": 0.28, "grad_norm": 0.7380536794662476, "learning_rate": 1.6731386805770302e-05, "loss": 2.1244, "step": 8331 }, { "epoch": 0.28, "grad_norm": 0.7548120021820068, "learning_rate": 1.6730600741125808e-05, "loss": 2.1325, "step": 8332 }, { "epoch": 0.28, "grad_norm": 0.7191427946090698, "learning_rate": 1.6729814600442932e-05, "loss": 2.1493, "step": 8333 }, { "epoch": 0.28, "grad_norm": 0.7184178233146667, "learning_rate": 1.672902838373055e-05, "loss": 2.1327, "step": 8334 }, { "epoch": 0.28, "grad_norm": 0.7234036326408386, "learning_rate": 1.6728242090997554e-05, "loss": 2.1388, "step": 8335 }, { "epoch": 0.28, "grad_norm": 0.7199417352676392, "learning_rate": 1.672745572225282e-05, "loss": 2.1508, "step": 8336 }, { "epoch": 0.28, "grad_norm": 0.7441931962966919, "learning_rate": 1.672666927750523e-05, "loss": 2.1783, "step": 8337 }, { "epoch": 0.28, "grad_norm": 0.7059201598167419, "learning_rate": 1.672588275676368e-05, "loss": 2.0918, "step": 8338 }, { "epoch": 0.28, "grad_norm": 0.700907289981842, "learning_rate": 1.6725096160037042e-05, "loss": 2.073, "step": 8339 }, { "epoch": 0.28, "grad_norm": 0.7274459600448608, "learning_rate": 1.6724309487334212e-05, "loss": 2.1595, "step": 8340 }, { "epoch": 0.28, "grad_norm": 0.6845276355743408, "learning_rate": 1.6723522738664074e-05, "loss": 2.1264, "step": 8341 }, { "epoch": 0.28, "grad_norm": 0.7291163802146912, "learning_rate": 1.6722735914035517e-05, "loss": 2.2317, "step": 8342 }, { "epoch": 0.28, "grad_norm": 0.7830685377120972, "learning_rate": 1.672194901345743e-05, "loss": 2.0342, "step": 8343 }, { "epoch": 0.28, "grad_norm": 0.6782450675964355, "learning_rate": 1.6721162036938698e-05, "loss": 2.1267, "step": 8344 }, { "epoch": 0.28, "grad_norm": 0.7008998394012451, "learning_rate": 1.672037498448822e-05, "loss": 2.1159, "step": 8345 }, { "epoch": 0.28, "grad_norm": 0.7255397439002991, "learning_rate": 1.6719587856114885e-05, "loss": 2.1883, "step": 8346 }, { "epoch": 0.28, "grad_norm": 0.7182254791259766, "learning_rate": 1.6718800651827584e-05, "loss": 2.1091, "step": 8347 }, { "epoch": 0.28, "grad_norm": 0.7435699701309204, "learning_rate": 1.6718013371635208e-05, "loss": 2.1272, "step": 8348 }, { "epoch": 0.28, "grad_norm": 0.7311944365501404, "learning_rate": 1.6717226015546657e-05, "loss": 2.2433, "step": 8349 }, { "epoch": 0.28, "grad_norm": 0.7117100954055786, "learning_rate": 1.671643858357082e-05, "loss": 2.1646, "step": 8350 }, { "epoch": 0.28, "grad_norm": 0.7185521125793457, "learning_rate": 1.67156510757166e-05, "loss": 2.1113, "step": 8351 }, { "epoch": 0.28, "grad_norm": 0.6903740167617798, "learning_rate": 1.6714863491992884e-05, "loss": 2.0606, "step": 8352 }, { "epoch": 0.28, "grad_norm": 0.6981849074363708, "learning_rate": 1.6714075832408583e-05, "loss": 2.1869, "step": 8353 }, { "epoch": 0.28, "grad_norm": 0.7545415759086609, "learning_rate": 1.6713288096972586e-05, "loss": 2.1934, "step": 8354 }, { "epoch": 0.28, "grad_norm": 0.699349582195282, "learning_rate": 1.671250028569379e-05, "loss": 2.1133, "step": 8355 }, { "epoch": 0.28, "grad_norm": 0.7073742151260376, "learning_rate": 1.6711712398581105e-05, "loss": 2.1475, "step": 8356 }, { "epoch": 0.28, "grad_norm": 0.7133447527885437, "learning_rate": 1.6710924435643426e-05, "loss": 2.2382, "step": 8357 }, { "epoch": 0.28, "grad_norm": 0.7232763171195984, "learning_rate": 1.6710136396889654e-05, "loss": 2.1209, "step": 8358 }, { "epoch": 0.28, "grad_norm": 0.6911131143569946, "learning_rate": 1.6709348282328693e-05, "loss": 2.1182, "step": 8359 }, { "epoch": 0.28, "grad_norm": 0.7193372845649719, "learning_rate": 1.6708560091969447e-05, "loss": 2.089, "step": 8360 }, { "epoch": 0.28, "grad_norm": 0.7212553024291992, "learning_rate": 1.6707771825820823e-05, "loss": 2.1283, "step": 8361 }, { "epoch": 0.28, "grad_norm": 0.6816129684448242, "learning_rate": 1.670698348389172e-05, "loss": 2.1855, "step": 8362 }, { "epoch": 0.28, "grad_norm": 0.7554389238357544, "learning_rate": 1.6706195066191053e-05, "loss": 2.1181, "step": 8363 }, { "epoch": 0.28, "grad_norm": 0.7077310681343079, "learning_rate": 1.670540657272772e-05, "loss": 2.1316, "step": 8364 }, { "epoch": 0.28, "grad_norm": 0.7212445735931396, "learning_rate": 1.670461800351064e-05, "loss": 2.1734, "step": 8365 }, { "epoch": 0.28, "grad_norm": 0.6918357610702515, "learning_rate": 1.6703829358548708e-05, "loss": 2.101, "step": 8366 }, { "epoch": 0.28, "grad_norm": 0.7416213154792786, "learning_rate": 1.6703040637850845e-05, "loss": 2.1361, "step": 8367 }, { "epoch": 0.28, "grad_norm": 0.7355925440788269, "learning_rate": 1.6702251841425955e-05, "loss": 2.111, "step": 8368 }, { "epoch": 0.28, "grad_norm": 0.6752378344535828, "learning_rate": 1.6701462969282955e-05, "loss": 2.1532, "step": 8369 }, { "epoch": 0.28, "grad_norm": 0.7172408103942871, "learning_rate": 1.670067402143075e-05, "loss": 2.1626, "step": 8370 }, { "epoch": 0.28, "grad_norm": 0.7430909276008606, "learning_rate": 1.669988499787826e-05, "loss": 2.163, "step": 8371 }, { "epoch": 0.28, "grad_norm": 0.7323562502861023, "learning_rate": 1.669909589863439e-05, "loss": 2.2295, "step": 8372 }, { "epoch": 0.28, "grad_norm": 0.6948447823524475, "learning_rate": 1.6698306723708066e-05, "loss": 2.1245, "step": 8373 }, { "epoch": 0.28, "grad_norm": 0.7318553924560547, "learning_rate": 1.6697517473108197e-05, "loss": 2.1301, "step": 8374 }, { "epoch": 0.28, "grad_norm": 0.7293442487716675, "learning_rate": 1.66967281468437e-05, "loss": 2.1382, "step": 8375 }, { "epoch": 0.28, "grad_norm": 0.6928600072860718, "learning_rate": 1.6695938744923493e-05, "loss": 2.1421, "step": 8376 }, { "epoch": 0.28, "grad_norm": 0.6966847777366638, "learning_rate": 1.6695149267356493e-05, "loss": 2.18, "step": 8377 }, { "epoch": 0.28, "grad_norm": 0.7042227983474731, "learning_rate": 1.6694359714151623e-05, "loss": 2.0846, "step": 8378 }, { "epoch": 0.28, "grad_norm": 0.6989268064498901, "learning_rate": 1.66935700853178e-05, "loss": 2.1497, "step": 8379 }, { "epoch": 0.28, "grad_norm": 0.7188319563865662, "learning_rate": 1.6692780380863946e-05, "loss": 2.1777, "step": 8380 }, { "epoch": 0.28, "grad_norm": 0.6994345784187317, "learning_rate": 1.6691990600798977e-05, "loss": 2.1279, "step": 8381 }, { "epoch": 0.28, "grad_norm": 0.6980247497558594, "learning_rate": 1.6691200745131825e-05, "loss": 2.118, "step": 8382 }, { "epoch": 0.28, "grad_norm": 0.7195389866828918, "learning_rate": 1.6690410813871407e-05, "loss": 2.147, "step": 8383 }, { "epoch": 0.28, "grad_norm": 0.7061222791671753, "learning_rate": 1.6689620807026648e-05, "loss": 2.1469, "step": 8384 }, { "epoch": 0.28, "grad_norm": 0.7184776663780212, "learning_rate": 1.668883072460647e-05, "loss": 2.2292, "step": 8385 }, { "epoch": 0.28, "grad_norm": 0.6848839521408081, "learning_rate": 1.6688040566619806e-05, "loss": 2.072, "step": 8386 }, { "epoch": 0.28, "grad_norm": 0.6812568306922913, "learning_rate": 1.6687250333075583e-05, "loss": 2.1698, "step": 8387 }, { "epoch": 0.28, "grad_norm": 0.7278180122375488, "learning_rate": 1.668646002398272e-05, "loss": 2.1683, "step": 8388 }, { "epoch": 0.28, "grad_norm": 0.6893550157546997, "learning_rate": 1.6685669639350152e-05, "loss": 2.0946, "step": 8389 }, { "epoch": 0.28, "grad_norm": 0.6801903247833252, "learning_rate": 1.6684879179186807e-05, "loss": 2.1645, "step": 8390 }, { "epoch": 0.28, "grad_norm": 0.7107182145118713, "learning_rate": 1.668408864350161e-05, "loss": 2.1383, "step": 8391 }, { "epoch": 0.28, "grad_norm": 0.7200020551681519, "learning_rate": 1.6683298032303503e-05, "loss": 2.1573, "step": 8392 }, { "epoch": 0.28, "grad_norm": 0.698668360710144, "learning_rate": 1.668250734560141e-05, "loss": 2.157, "step": 8393 }, { "epoch": 0.28, "grad_norm": 0.723284125328064, "learning_rate": 1.6681716583404263e-05, "loss": 2.1707, "step": 8394 }, { "epoch": 0.28, "grad_norm": 0.7130258679389954, "learning_rate": 1.6680925745721003e-05, "loss": 2.1787, "step": 8395 }, { "epoch": 0.28, "grad_norm": 0.7131319046020508, "learning_rate": 1.6680134832560555e-05, "loss": 2.1454, "step": 8396 }, { "epoch": 0.28, "grad_norm": 0.763205885887146, "learning_rate": 1.667934384393186e-05, "loss": 2.1734, "step": 8397 }, { "epoch": 0.28, "grad_norm": 0.7104514837265015, "learning_rate": 1.667855277984385e-05, "loss": 2.1275, "step": 8398 }, { "epoch": 0.28, "grad_norm": 0.7180097699165344, "learning_rate": 1.6677761640305464e-05, "loss": 2.2083, "step": 8399 }, { "epoch": 0.28, "grad_norm": 0.7494561076164246, "learning_rate": 1.6676970425325646e-05, "loss": 2.1904, "step": 8400 }, { "epoch": 0.28, "grad_norm": 0.7247684597969055, "learning_rate": 1.6676179134913325e-05, "loss": 2.1325, "step": 8401 }, { "epoch": 0.28, "grad_norm": 0.718853235244751, "learning_rate": 1.667538776907745e-05, "loss": 2.164, "step": 8402 }, { "epoch": 0.28, "grad_norm": 0.7261177897453308, "learning_rate": 1.6674596327826952e-05, "loss": 2.1968, "step": 8403 }, { "epoch": 0.28, "grad_norm": 0.7159755229949951, "learning_rate": 1.667380481117078e-05, "loss": 2.0809, "step": 8404 }, { "epoch": 0.28, "grad_norm": 0.7119253873825073, "learning_rate": 1.6673013219117866e-05, "loss": 2.1298, "step": 8405 }, { "epoch": 0.28, "grad_norm": 0.7206315994262695, "learning_rate": 1.6672221551677163e-05, "loss": 2.2023, "step": 8406 }, { "epoch": 0.28, "grad_norm": 0.740351140499115, "learning_rate": 1.667142980885761e-05, "loss": 2.1002, "step": 8407 }, { "epoch": 0.28, "grad_norm": 0.730655312538147, "learning_rate": 1.6670637990668153e-05, "loss": 2.1188, "step": 8408 }, { "epoch": 0.28, "grad_norm": 0.706206738948822, "learning_rate": 1.6669846097117738e-05, "loss": 2.1719, "step": 8409 }, { "epoch": 0.28, "grad_norm": 0.7733293175697327, "learning_rate": 1.6669054128215313e-05, "loss": 2.137, "step": 8410 }, { "epoch": 0.28, "grad_norm": 0.7247806191444397, "learning_rate": 1.6668262083969822e-05, "loss": 2.1317, "step": 8411 }, { "epoch": 0.28, "grad_norm": 0.7170233726501465, "learning_rate": 1.666746996439021e-05, "loss": 2.1185, "step": 8412 }, { "epoch": 0.28, "grad_norm": 0.7162359952926636, "learning_rate": 1.6666677769485432e-05, "loss": 2.1113, "step": 8413 }, { "epoch": 0.28, "grad_norm": 0.7174320816993713, "learning_rate": 1.666588549926444e-05, "loss": 2.109, "step": 8414 }, { "epoch": 0.28, "grad_norm": 0.7953102588653564, "learning_rate": 1.6665093153736177e-05, "loss": 2.1272, "step": 8415 }, { "epoch": 0.28, "grad_norm": 0.7284758687019348, "learning_rate": 1.6664300732909595e-05, "loss": 2.1221, "step": 8416 }, { "epoch": 0.28, "grad_norm": 0.7045118808746338, "learning_rate": 1.6663508236793653e-05, "loss": 2.1435, "step": 8417 }, { "epoch": 0.28, "grad_norm": 0.7355021238327026, "learning_rate": 1.66627156653973e-05, "loss": 2.1627, "step": 8418 }, { "epoch": 0.28, "grad_norm": 0.7339742183685303, "learning_rate": 1.6661923018729488e-05, "loss": 2.1397, "step": 8419 }, { "epoch": 0.28, "grad_norm": 0.7345160245895386, "learning_rate": 1.6661130296799173e-05, "loss": 2.1937, "step": 8420 }, { "epoch": 0.28, "grad_norm": 0.7035835385322571, "learning_rate": 1.6660337499615315e-05, "loss": 2.0899, "step": 8421 }, { "epoch": 0.28, "grad_norm": 0.7480828166007996, "learning_rate": 1.6659544627186863e-05, "loss": 2.1465, "step": 8422 }, { "epoch": 0.28, "grad_norm": 0.707321286201477, "learning_rate": 1.6658751679522783e-05, "loss": 2.1243, "step": 8423 }, { "epoch": 0.28, "grad_norm": 0.703544020652771, "learning_rate": 1.6657958656632027e-05, "loss": 2.1214, "step": 8424 }, { "epoch": 0.28, "grad_norm": 0.7414666414260864, "learning_rate": 1.6657165558523555e-05, "loss": 2.1719, "step": 8425 }, { "epoch": 0.28, "grad_norm": 0.716022789478302, "learning_rate": 1.665637238520633e-05, "loss": 2.0527, "step": 8426 }, { "epoch": 0.28, "grad_norm": 0.7326505780220032, "learning_rate": 1.665557913668931e-05, "loss": 2.1086, "step": 8427 }, { "epoch": 0.28, "grad_norm": 0.6998535990715027, "learning_rate": 1.6654785812981455e-05, "loss": 2.0982, "step": 8428 }, { "epoch": 0.28, "grad_norm": 0.6943238973617554, "learning_rate": 1.6653992414091736e-05, "loss": 2.0497, "step": 8429 }, { "epoch": 0.28, "grad_norm": 0.7014089822769165, "learning_rate": 1.6653198940029104e-05, "loss": 2.1706, "step": 8430 }, { "epoch": 0.28, "grad_norm": 0.7265568971633911, "learning_rate": 1.6652405390802534e-05, "loss": 2.2045, "step": 8431 }, { "epoch": 0.28, "grad_norm": 0.7365382313728333, "learning_rate": 1.6651611766420983e-05, "loss": 2.1409, "step": 8432 }, { "epoch": 0.28, "grad_norm": 0.7257308959960938, "learning_rate": 1.6650818066893423e-05, "loss": 2.1283, "step": 8433 }, { "epoch": 0.28, "grad_norm": 0.7167442440986633, "learning_rate": 1.6650024292228817e-05, "loss": 2.1801, "step": 8434 }, { "epoch": 0.28, "grad_norm": 0.7947908043861389, "learning_rate": 1.664923044243614e-05, "loss": 2.1535, "step": 8435 }, { "epoch": 0.28, "grad_norm": 0.6911414265632629, "learning_rate": 1.6648436517524344e-05, "loss": 2.1545, "step": 8436 }, { "epoch": 0.28, "grad_norm": 0.6991528272628784, "learning_rate": 1.6647642517502417e-05, "loss": 2.1625, "step": 8437 }, { "epoch": 0.28, "grad_norm": 0.7230033278465271, "learning_rate": 1.6646848442379314e-05, "loss": 2.1526, "step": 8438 }, { "epoch": 0.28, "grad_norm": 0.71552574634552, "learning_rate": 1.6646054292164016e-05, "loss": 2.2209, "step": 8439 }, { "epoch": 0.28, "grad_norm": 0.73277747631073, "learning_rate": 1.664526006686549e-05, "loss": 2.1951, "step": 8440 }, { "epoch": 0.28, "grad_norm": 0.7186518907546997, "learning_rate": 1.664446576649271e-05, "loss": 2.1679, "step": 8441 }, { "epoch": 0.28, "grad_norm": 0.7248024344444275, "learning_rate": 1.6643671391054653e-05, "loss": 2.1947, "step": 8442 }, { "epoch": 0.28, "grad_norm": 0.751242458820343, "learning_rate": 1.664287694056029e-05, "loss": 2.1242, "step": 8443 }, { "epoch": 0.28, "grad_norm": 0.6933706402778625, "learning_rate": 1.6642082415018594e-05, "loss": 2.1546, "step": 8444 }, { "epoch": 0.28, "grad_norm": 0.7158252000808716, "learning_rate": 1.6641287814438544e-05, "loss": 2.1716, "step": 8445 }, { "epoch": 0.28, "grad_norm": 0.7104446887969971, "learning_rate": 1.664049313882912e-05, "loss": 2.0644, "step": 8446 }, { "epoch": 0.28, "grad_norm": 0.7372139096260071, "learning_rate": 1.6639698388199293e-05, "loss": 2.1729, "step": 8447 }, { "epoch": 0.28, "grad_norm": 0.7316924929618835, "learning_rate": 1.6638903562558046e-05, "loss": 2.1271, "step": 8448 }, { "epoch": 0.28, "grad_norm": 0.770596981048584, "learning_rate": 1.6638108661914355e-05, "loss": 2.1226, "step": 8449 }, { "epoch": 0.28, "grad_norm": 0.7030594944953918, "learning_rate": 1.6637313686277203e-05, "loss": 2.1657, "step": 8450 }, { "epoch": 0.28, "grad_norm": 0.6895646452903748, "learning_rate": 1.6636518635655572e-05, "loss": 2.1375, "step": 8451 }, { "epoch": 0.28, "grad_norm": 0.7410181760787964, "learning_rate": 1.6635723510058443e-05, "loss": 2.137, "step": 8452 }, { "epoch": 0.28, "grad_norm": 0.6950429081916809, "learning_rate": 1.66349283094948e-05, "loss": 2.1358, "step": 8453 }, { "epoch": 0.28, "grad_norm": 0.7217109203338623, "learning_rate": 1.663413303397362e-05, "loss": 2.2561, "step": 8454 }, { "epoch": 0.28, "grad_norm": 0.7148011326789856, "learning_rate": 1.66333376835039e-05, "loss": 2.147, "step": 8455 }, { "epoch": 0.28, "grad_norm": 0.714650571346283, "learning_rate": 1.6632542258094614e-05, "loss": 2.0939, "step": 8456 }, { "epoch": 0.28, "grad_norm": 0.7211595177650452, "learning_rate": 1.6631746757754754e-05, "loss": 2.1421, "step": 8457 }, { "epoch": 0.28, "grad_norm": 0.7004099488258362, "learning_rate": 1.6630951182493306e-05, "loss": 2.144, "step": 8458 }, { "epoch": 0.28, "grad_norm": 0.6834902167320251, "learning_rate": 1.6630155532319257e-05, "loss": 2.1436, "step": 8459 }, { "epoch": 0.28, "grad_norm": 0.7063214182853699, "learning_rate": 1.6629359807241597e-05, "loss": 2.1505, "step": 8460 }, { "epoch": 0.28, "grad_norm": 0.7732962369918823, "learning_rate": 1.6628564007269315e-05, "loss": 2.1846, "step": 8461 }, { "epoch": 0.28, "grad_norm": 0.7055607438087463, "learning_rate": 1.66277681324114e-05, "loss": 2.2373, "step": 8462 }, { "epoch": 0.28, "grad_norm": 0.689182460308075, "learning_rate": 1.6626972182676847e-05, "loss": 2.1085, "step": 8463 }, { "epoch": 0.28, "grad_norm": 0.6998549699783325, "learning_rate": 1.662617615807465e-05, "loss": 2.116, "step": 8464 }, { "epoch": 0.28, "grad_norm": 0.7300270199775696, "learning_rate": 1.662538005861379e-05, "loss": 2.1492, "step": 8465 }, { "epoch": 0.28, "grad_norm": 0.7549532651901245, "learning_rate": 1.662458388430327e-05, "loss": 2.1431, "step": 8466 }, { "epoch": 0.28, "grad_norm": 0.7259175777435303, "learning_rate": 1.662378763515209e-05, "loss": 2.2179, "step": 8467 }, { "epoch": 0.28, "grad_norm": 0.7303707599639893, "learning_rate": 1.662299131116923e-05, "loss": 2.082, "step": 8468 }, { "epoch": 0.28, "grad_norm": 0.7546704411506653, "learning_rate": 1.6622194912363702e-05, "loss": 2.2049, "step": 8469 }, { "epoch": 0.28, "grad_norm": 0.6981831789016724, "learning_rate": 1.6621398438744497e-05, "loss": 2.169, "step": 8470 }, { "epoch": 0.28, "grad_norm": 0.6856091618537903, "learning_rate": 1.662060189032061e-05, "loss": 2.1368, "step": 8471 }, { "epoch": 0.28, "grad_norm": 0.7288587689399719, "learning_rate": 1.661980526710105e-05, "loss": 2.1485, "step": 8472 }, { "epoch": 0.28, "grad_norm": 0.731757402420044, "learning_rate": 1.66190085690948e-05, "loss": 2.1266, "step": 8473 }, { "epoch": 0.28, "grad_norm": 0.7040241956710815, "learning_rate": 1.6618211796310876e-05, "loss": 2.1543, "step": 8474 }, { "epoch": 0.28, "grad_norm": 0.7040770053863525, "learning_rate": 1.6617414948758273e-05, "loss": 2.1694, "step": 8475 }, { "epoch": 0.28, "grad_norm": 0.7272975444793701, "learning_rate": 1.6616618026445994e-05, "loss": 2.1573, "step": 8476 }, { "epoch": 0.28, "grad_norm": 0.7112754583358765, "learning_rate": 1.6615821029383043e-05, "loss": 2.05, "step": 8477 }, { "epoch": 0.28, "grad_norm": 0.7027485370635986, "learning_rate": 1.6615023957578416e-05, "loss": 2.143, "step": 8478 }, { "epoch": 0.28, "grad_norm": 0.7477208971977234, "learning_rate": 1.6614226811041134e-05, "loss": 2.1382, "step": 8479 }, { "epoch": 0.28, "grad_norm": 0.7543733716011047, "learning_rate": 1.6613429589780193e-05, "loss": 2.1455, "step": 8480 }, { "epoch": 0.28, "grad_norm": 0.7092347145080566, "learning_rate": 1.6612632293804594e-05, "loss": 2.1214, "step": 8481 }, { "epoch": 0.28, "grad_norm": 0.7138279676437378, "learning_rate": 1.661183492312336e-05, "loss": 2.085, "step": 8482 }, { "epoch": 0.28, "grad_norm": 0.7034551501274109, "learning_rate": 1.6611037477745483e-05, "loss": 2.1731, "step": 8483 }, { "epoch": 0.28, "grad_norm": 0.7038451433181763, "learning_rate": 1.6610239957679983e-05, "loss": 2.1823, "step": 8484 }, { "epoch": 0.28, "grad_norm": 0.7277883887290955, "learning_rate": 1.660944236293586e-05, "loss": 2.098, "step": 8485 }, { "epoch": 0.28, "grad_norm": 0.7263098359107971, "learning_rate": 1.6608644693522136e-05, "loss": 2.2134, "step": 8486 }, { "epoch": 0.28, "grad_norm": 0.7074192762374878, "learning_rate": 1.6607846949447816e-05, "loss": 2.1099, "step": 8487 }, { "epoch": 0.28, "grad_norm": 0.7036694288253784, "learning_rate": 1.6607049130721913e-05, "loss": 2.1641, "step": 8488 }, { "epoch": 0.28, "grad_norm": 0.7400307059288025, "learning_rate": 1.660625123735344e-05, "loss": 2.1215, "step": 8489 }, { "epoch": 0.28, "grad_norm": 0.715126097202301, "learning_rate": 1.6605453269351416e-05, "loss": 2.1314, "step": 8490 }, { "epoch": 0.28, "grad_norm": 0.703659176826477, "learning_rate": 1.660465522672485e-05, "loss": 2.1438, "step": 8491 }, { "epoch": 0.28, "grad_norm": 0.7191702127456665, "learning_rate": 1.6603857109482757e-05, "loss": 2.0984, "step": 8492 }, { "epoch": 0.28, "grad_norm": 0.7585740089416504, "learning_rate": 1.6603058917634162e-05, "loss": 2.1753, "step": 8493 }, { "epoch": 0.28, "grad_norm": 0.6940328478813171, "learning_rate": 1.6602260651188073e-05, "loss": 2.1386, "step": 8494 }, { "epoch": 0.28, "grad_norm": 0.705902099609375, "learning_rate": 1.6601462310153517e-05, "loss": 2.1367, "step": 8495 }, { "epoch": 0.28, "grad_norm": 0.6982494592666626, "learning_rate": 1.6600663894539502e-05, "loss": 2.1372, "step": 8496 }, { "epoch": 0.28, "grad_norm": 0.715004563331604, "learning_rate": 1.659986540435506e-05, "loss": 2.0947, "step": 8497 }, { "epoch": 0.28, "grad_norm": 0.6934937834739685, "learning_rate": 1.6599066839609204e-05, "loss": 2.1878, "step": 8498 }, { "epoch": 0.28, "grad_norm": 0.8017980456352234, "learning_rate": 1.6598268200310962e-05, "loss": 2.2035, "step": 8499 }, { "epoch": 0.28, "grad_norm": 0.7302924990653992, "learning_rate": 1.6597469486469348e-05, "loss": 2.0979, "step": 8500 }, { "epoch": 0.28, "grad_norm": 0.7269686460494995, "learning_rate": 1.6596670698093392e-05, "loss": 2.0866, "step": 8501 }, { "epoch": 0.28, "grad_norm": 0.7090336084365845, "learning_rate": 1.6595871835192117e-05, "loss": 2.1728, "step": 8502 }, { "epoch": 0.28, "grad_norm": 0.7512612342834473, "learning_rate": 1.6595072897774547e-05, "loss": 2.1771, "step": 8503 }, { "epoch": 0.28, "grad_norm": 0.7363735437393188, "learning_rate": 1.659427388584971e-05, "loss": 2.1241, "step": 8504 }, { "epoch": 0.28, "grad_norm": 0.6985809206962585, "learning_rate": 1.659347479942663e-05, "loss": 2.14, "step": 8505 }, { "epoch": 0.28, "grad_norm": 0.731840968132019, "learning_rate": 1.6592675638514337e-05, "loss": 2.1731, "step": 8506 }, { "epoch": 0.28, "grad_norm": 0.7617955803871155, "learning_rate": 1.6591876403121855e-05, "loss": 2.1518, "step": 8507 }, { "epoch": 0.28, "grad_norm": 0.7017560601234436, "learning_rate": 1.659107709325822e-05, "loss": 2.1333, "step": 8508 }, { "epoch": 0.28, "grad_norm": 0.7608446478843689, "learning_rate": 1.6590277708932458e-05, "loss": 2.1565, "step": 8509 }, { "epoch": 0.28, "grad_norm": 0.7530316114425659, "learning_rate": 1.65894782501536e-05, "loss": 2.1841, "step": 8510 }, { "epoch": 0.28, "grad_norm": 0.7154077887535095, "learning_rate": 1.658867871693068e-05, "loss": 2.1075, "step": 8511 }, { "epoch": 0.28, "grad_norm": 0.7149897217750549, "learning_rate": 1.6587879109272726e-05, "loss": 2.1193, "step": 8512 }, { "epoch": 0.28, "grad_norm": 0.7030389904975891, "learning_rate": 1.658707942718878e-05, "loss": 2.1882, "step": 8513 }, { "epoch": 0.28, "grad_norm": 0.7259228825569153, "learning_rate": 1.6586279670687867e-05, "loss": 2.1006, "step": 8514 }, { "epoch": 0.28, "grad_norm": 0.7169371843338013, "learning_rate": 1.658547983977903e-05, "loss": 2.0825, "step": 8515 }, { "epoch": 0.28, "grad_norm": 0.7480980157852173, "learning_rate": 1.6584679934471294e-05, "loss": 2.1582, "step": 8516 }, { "epoch": 0.28, "grad_norm": 0.7195892333984375, "learning_rate": 1.6583879954773707e-05, "loss": 2.0497, "step": 8517 }, { "epoch": 0.28, "grad_norm": 0.7250720262527466, "learning_rate": 1.6583079900695303e-05, "loss": 2.1564, "step": 8518 }, { "epoch": 0.28, "grad_norm": 0.7400161623954773, "learning_rate": 1.6582279772245123e-05, "loss": 2.1467, "step": 8519 }, { "epoch": 0.28, "grad_norm": 0.7226407527923584, "learning_rate": 1.65814795694322e-05, "loss": 2.1859, "step": 8520 }, { "epoch": 0.28, "grad_norm": 0.7086884379386902, "learning_rate": 1.658067929226558e-05, "loss": 2.1824, "step": 8521 }, { "epoch": 0.28, "grad_norm": 0.7324230670928955, "learning_rate": 1.65798789407543e-05, "loss": 2.206, "step": 8522 }, { "epoch": 0.28, "grad_norm": 0.6903305053710938, "learning_rate": 1.6579078514907404e-05, "loss": 2.1037, "step": 8523 }, { "epoch": 0.28, "grad_norm": 0.7690555453300476, "learning_rate": 1.6578278014733938e-05, "loss": 2.1065, "step": 8524 }, { "epoch": 0.28, "grad_norm": 0.6953065395355225, "learning_rate": 1.657747744024294e-05, "loss": 2.1035, "step": 8525 }, { "epoch": 0.28, "grad_norm": 0.7228513956069946, "learning_rate": 1.6576676791443457e-05, "loss": 2.1905, "step": 8526 }, { "epoch": 0.28, "grad_norm": 0.7356615662574768, "learning_rate": 1.6575876068344533e-05, "loss": 2.1777, "step": 8527 }, { "epoch": 0.28, "grad_norm": 0.747215747833252, "learning_rate": 1.657507527095522e-05, "loss": 2.241, "step": 8528 }, { "epoch": 0.28, "grad_norm": 0.7284457087516785, "learning_rate": 1.6574274399284552e-05, "loss": 2.1066, "step": 8529 }, { "epoch": 0.28, "grad_norm": 0.7463632225990295, "learning_rate": 1.6573473453341587e-05, "loss": 2.159, "step": 8530 }, { "epoch": 0.28, "grad_norm": 0.7322075963020325, "learning_rate": 1.6572672433135375e-05, "loss": 2.1637, "step": 8531 }, { "epoch": 0.28, "grad_norm": 0.759601354598999, "learning_rate": 1.6571871338674957e-05, "loss": 2.1192, "step": 8532 }, { "epoch": 0.28, "grad_norm": 0.70357346534729, "learning_rate": 1.6571070169969395e-05, "loss": 2.1886, "step": 8533 }, { "epoch": 0.28, "grad_norm": 0.7102460861206055, "learning_rate": 1.6570268927027727e-05, "loss": 2.1235, "step": 8534 }, { "epoch": 0.28, "grad_norm": 0.7317030429840088, "learning_rate": 1.6569467609859013e-05, "loss": 2.138, "step": 8535 }, { "epoch": 0.28, "grad_norm": 0.6941621899604797, "learning_rate": 1.6568666218472304e-05, "loss": 2.1995, "step": 8536 }, { "epoch": 0.28, "grad_norm": 0.7085536122322083, "learning_rate": 1.6567864752876656e-05, "loss": 2.1135, "step": 8537 }, { "epoch": 0.28, "grad_norm": 0.6942277550697327, "learning_rate": 1.6567063213081117e-05, "loss": 2.1104, "step": 8538 }, { "epoch": 0.28, "grad_norm": 0.7417043447494507, "learning_rate": 1.656626159909475e-05, "loss": 2.1428, "step": 8539 }, { "epoch": 0.28, "grad_norm": 0.7053242325782776, "learning_rate": 1.6565459910926605e-05, "loss": 2.1594, "step": 8540 }, { "epoch": 0.28, "grad_norm": 0.6774251461029053, "learning_rate": 1.6564658148585743e-05, "loss": 2.1338, "step": 8541 }, { "epoch": 0.28, "grad_norm": 0.7282190322875977, "learning_rate": 1.656385631208122e-05, "loss": 2.1722, "step": 8542 }, { "epoch": 0.28, "grad_norm": 0.7296175360679626, "learning_rate": 1.6563054401422095e-05, "loss": 2.2193, "step": 8543 }, { "epoch": 0.28, "grad_norm": 0.7117055654525757, "learning_rate": 1.6562252416617432e-05, "loss": 2.1307, "step": 8544 }, { "epoch": 0.28, "grad_norm": 0.7168688178062439, "learning_rate": 1.656145035767628e-05, "loss": 2.1571, "step": 8545 }, { "epoch": 0.28, "grad_norm": 0.6976636052131653, "learning_rate": 1.6560648224607713e-05, "loss": 2.0826, "step": 8546 }, { "epoch": 0.28, "grad_norm": 0.7028382420539856, "learning_rate": 1.655984601742078e-05, "loss": 2.1246, "step": 8547 }, { "epoch": 0.28, "grad_norm": 0.6970878839492798, "learning_rate": 1.655904373612456e-05, "loss": 2.1403, "step": 8548 }, { "epoch": 0.28, "grad_norm": 0.7084276080131531, "learning_rate": 1.6558241380728102e-05, "loss": 2.152, "step": 8549 }, { "epoch": 0.28, "grad_norm": 0.7015260457992554, "learning_rate": 1.6557438951240482e-05, "loss": 2.1362, "step": 8550 }, { "epoch": 0.28, "grad_norm": 0.710869550704956, "learning_rate": 1.6556636447670755e-05, "loss": 2.1249, "step": 8551 }, { "epoch": 0.28, "grad_norm": 0.7144033908843994, "learning_rate": 1.6555833870027993e-05, "loss": 2.1458, "step": 8552 }, { "epoch": 0.28, "grad_norm": 0.7335296273231506, "learning_rate": 1.655503121832126e-05, "loss": 2.1684, "step": 8553 }, { "epoch": 0.28, "grad_norm": 0.6994426250457764, "learning_rate": 1.6554228492559628e-05, "loss": 2.1711, "step": 8554 }, { "epoch": 0.28, "grad_norm": 0.6763849258422852, "learning_rate": 1.6553425692752165e-05, "loss": 2.1012, "step": 8555 }, { "epoch": 0.28, "grad_norm": 0.7549949288368225, "learning_rate": 1.6552622818907935e-05, "loss": 2.123, "step": 8556 }, { "epoch": 0.28, "grad_norm": 0.7343417406082153, "learning_rate": 1.655181987103602e-05, "loss": 2.0746, "step": 8557 }, { "epoch": 0.28, "grad_norm": 0.7319876551628113, "learning_rate": 1.6551016849145476e-05, "loss": 2.1482, "step": 8558 }, { "epoch": 0.28, "grad_norm": 0.7186651825904846, "learning_rate": 1.655021375324539e-05, "loss": 2.0865, "step": 8559 }, { "epoch": 0.28, "grad_norm": 0.7188573479652405, "learning_rate": 1.654941058334482e-05, "loss": 2.1263, "step": 8560 }, { "epoch": 0.28, "grad_norm": 0.6992587447166443, "learning_rate": 1.6548607339452853e-05, "loss": 2.1866, "step": 8561 }, { "epoch": 0.28, "grad_norm": 0.7086381912231445, "learning_rate": 1.6547804021578556e-05, "loss": 2.1448, "step": 8562 }, { "epoch": 0.28, "grad_norm": 0.7193552255630493, "learning_rate": 1.6547000629731008e-05, "loss": 2.1399, "step": 8563 }, { "epoch": 0.28, "grad_norm": 0.7114681005477905, "learning_rate": 1.6546197163919282e-05, "loss": 2.1589, "step": 8564 }, { "epoch": 0.28, "grad_norm": 0.7475398778915405, "learning_rate": 1.6545393624152456e-05, "loss": 2.1892, "step": 8565 }, { "epoch": 0.28, "grad_norm": 0.741888701915741, "learning_rate": 1.6544590010439613e-05, "loss": 2.2071, "step": 8566 }, { "epoch": 0.29, "grad_norm": 0.6935592293739319, "learning_rate": 1.6543786322789827e-05, "loss": 2.1692, "step": 8567 }, { "epoch": 0.29, "grad_norm": 0.7552782893180847, "learning_rate": 1.6542982561212174e-05, "loss": 2.2293, "step": 8568 }, { "epoch": 0.29, "grad_norm": 0.7516879439353943, "learning_rate": 1.6542178725715744e-05, "loss": 2.1764, "step": 8569 }, { "epoch": 0.29, "grad_norm": 0.7252159714698792, "learning_rate": 1.6541374816309608e-05, "loss": 2.1718, "step": 8570 }, { "epoch": 0.29, "grad_norm": 0.7396523952484131, "learning_rate": 1.6540570833002853e-05, "loss": 2.2216, "step": 8571 }, { "epoch": 0.29, "grad_norm": 0.7281020283699036, "learning_rate": 1.6539766775804565e-05, "loss": 2.1355, "step": 8572 }, { "epoch": 0.29, "grad_norm": 0.7397704124450684, "learning_rate": 1.6538962644723825e-05, "loss": 2.1136, "step": 8573 }, { "epoch": 0.29, "grad_norm": 0.7383582592010498, "learning_rate": 1.6538158439769713e-05, "loss": 2.1956, "step": 8574 }, { "epoch": 0.29, "grad_norm": 0.6808381080627441, "learning_rate": 1.6537354160951323e-05, "loss": 2.1152, "step": 8575 }, { "epoch": 0.29, "grad_norm": 0.7280012965202332, "learning_rate": 1.6536549808277735e-05, "loss": 2.0951, "step": 8576 }, { "epoch": 0.29, "grad_norm": 0.6991639733314514, "learning_rate": 1.653574538175804e-05, "loss": 2.0888, "step": 8577 }, { "epoch": 0.29, "grad_norm": 0.7489104866981506, "learning_rate": 1.653494088140132e-05, "loss": 2.0993, "step": 8578 }, { "epoch": 0.29, "grad_norm": 0.6890525817871094, "learning_rate": 1.653413630721667e-05, "loss": 2.1885, "step": 8579 }, { "epoch": 0.29, "grad_norm": 0.7387303113937378, "learning_rate": 1.6533331659213177e-05, "loss": 2.1598, "step": 8580 }, { "epoch": 0.29, "grad_norm": 0.7390830516815186, "learning_rate": 1.6532526937399934e-05, "loss": 2.1788, "step": 8581 }, { "epoch": 0.29, "grad_norm": 0.6796732544898987, "learning_rate": 1.6531722141786027e-05, "loss": 2.0676, "step": 8582 }, { "epoch": 0.29, "grad_norm": 0.7047191262245178, "learning_rate": 1.6530917272380552e-05, "loss": 2.1098, "step": 8583 }, { "epoch": 0.29, "grad_norm": 0.7175714373588562, "learning_rate": 1.65301123291926e-05, "loss": 2.1388, "step": 8584 }, { "epoch": 0.29, "grad_norm": 0.7000894546508789, "learning_rate": 1.652930731223127e-05, "loss": 2.1714, "step": 8585 }, { "epoch": 0.29, "grad_norm": 0.7137597799301147, "learning_rate": 1.652850222150565e-05, "loss": 2.0534, "step": 8586 }, { "epoch": 0.29, "grad_norm": 0.7001762390136719, "learning_rate": 1.652769705702484e-05, "loss": 2.0566, "step": 8587 }, { "epoch": 0.29, "grad_norm": 0.738588809967041, "learning_rate": 1.6526891818797932e-05, "loss": 2.1602, "step": 8588 }, { "epoch": 0.29, "grad_norm": 0.7601026296615601, "learning_rate": 1.6526086506834025e-05, "loss": 2.1668, "step": 8589 }, { "epoch": 0.29, "grad_norm": 0.6844609975814819, "learning_rate": 1.652528112114222e-05, "loss": 2.2094, "step": 8590 }, { "epoch": 0.29, "grad_norm": 0.723003089427948, "learning_rate": 1.652447566173161e-05, "loss": 2.123, "step": 8591 }, { "epoch": 0.29, "grad_norm": 0.7601599097251892, "learning_rate": 1.65236701286113e-05, "loss": 2.1215, "step": 8592 }, { "epoch": 0.29, "grad_norm": 0.7292881608009338, "learning_rate": 1.6522864521790384e-05, "loss": 2.2064, "step": 8593 }, { "epoch": 0.29, "grad_norm": 0.6910530924797058, "learning_rate": 1.6522058841277972e-05, "loss": 2.1627, "step": 8594 }, { "epoch": 0.29, "grad_norm": 0.7271946668624878, "learning_rate": 1.652125308708316e-05, "loss": 2.1611, "step": 8595 }, { "epoch": 0.29, "grad_norm": 0.7418900728225708, "learning_rate": 1.652044725921505e-05, "loss": 2.1461, "step": 8596 }, { "epoch": 0.29, "grad_norm": 0.6954464912414551, "learning_rate": 1.6519641357682754e-05, "loss": 2.1595, "step": 8597 }, { "epoch": 0.29, "grad_norm": 0.7137049436569214, "learning_rate": 1.6518835382495363e-05, "loss": 2.2013, "step": 8598 }, { "epoch": 0.29, "grad_norm": 0.7366760969161987, "learning_rate": 1.6518029333662e-05, "loss": 2.1932, "step": 8599 }, { "epoch": 0.29, "grad_norm": 0.7147172689437866, "learning_rate": 1.6517223211191753e-05, "loss": 2.1012, "step": 8600 }, { "epoch": 0.29, "grad_norm": 0.6887701749801636, "learning_rate": 1.651641701509374e-05, "loss": 2.1469, "step": 8601 }, { "epoch": 0.29, "grad_norm": 0.7105332612991333, "learning_rate": 1.651561074537707e-05, "loss": 2.1269, "step": 8602 }, { "epoch": 0.29, "grad_norm": 0.7148953676223755, "learning_rate": 1.6514804402050843e-05, "loss": 2.1258, "step": 8603 }, { "epoch": 0.29, "grad_norm": 0.7073917984962463, "learning_rate": 1.6513997985124176e-05, "loss": 2.1035, "step": 8604 }, { "epoch": 0.29, "grad_norm": 0.7075343132019043, "learning_rate": 1.6513191494606182e-05, "loss": 2.0877, "step": 8605 }, { "epoch": 0.29, "grad_norm": 0.7029696702957153, "learning_rate": 1.651238493050596e-05, "loss": 2.2, "step": 8606 }, { "epoch": 0.29, "grad_norm": 0.6935347318649292, "learning_rate": 1.6511578292832635e-05, "loss": 2.1688, "step": 8607 }, { "epoch": 0.29, "grad_norm": 0.7413133978843689, "learning_rate": 1.6510771581595314e-05, "loss": 2.1712, "step": 8608 }, { "epoch": 0.29, "grad_norm": 0.7197940349578857, "learning_rate": 1.650996479680311e-05, "loss": 2.1333, "step": 8609 }, { "epoch": 0.29, "grad_norm": 0.7398563027381897, "learning_rate": 1.650915793846514e-05, "loss": 2.1485, "step": 8610 }, { "epoch": 0.29, "grad_norm": 0.7056536674499512, "learning_rate": 1.6508351006590518e-05, "loss": 2.1274, "step": 8611 }, { "epoch": 0.29, "grad_norm": 0.7039489150047302, "learning_rate": 1.650754400118836e-05, "loss": 2.1344, "step": 8612 }, { "epoch": 0.29, "grad_norm": 0.7109227180480957, "learning_rate": 1.650673692226778e-05, "loss": 2.0707, "step": 8613 }, { "epoch": 0.29, "grad_norm": 0.7520394325256348, "learning_rate": 1.6505929769837905e-05, "loss": 2.0625, "step": 8614 }, { "epoch": 0.29, "grad_norm": 0.7551279664039612, "learning_rate": 1.6505122543907847e-05, "loss": 2.2104, "step": 8615 }, { "epoch": 0.29, "grad_norm": 0.7096158266067505, "learning_rate": 1.6504315244486728e-05, "loss": 2.1345, "step": 8616 }, { "epoch": 0.29, "grad_norm": 0.716400146484375, "learning_rate": 1.6503507871583667e-05, "loss": 2.1754, "step": 8617 }, { "epoch": 0.29, "grad_norm": 0.700905442237854, "learning_rate": 1.6502700425207782e-05, "loss": 2.2002, "step": 8618 }, { "epoch": 0.29, "grad_norm": 0.7089530825614929, "learning_rate": 1.65018929053682e-05, "loss": 2.1146, "step": 8619 }, { "epoch": 0.29, "grad_norm": 0.7055644989013672, "learning_rate": 1.6501085312074042e-05, "loss": 2.1102, "step": 8620 }, { "epoch": 0.29, "grad_norm": 0.7378947138786316, "learning_rate": 1.6500277645334435e-05, "loss": 2.1813, "step": 8621 }, { "epoch": 0.29, "grad_norm": 0.7238419055938721, "learning_rate": 1.64994699051585e-05, "loss": 2.1264, "step": 8622 }, { "epoch": 0.29, "grad_norm": 0.6934816837310791, "learning_rate": 1.6498662091555366e-05, "loss": 2.1511, "step": 8623 }, { "epoch": 0.29, "grad_norm": 0.6926857829093933, "learning_rate": 1.6497854204534148e-05, "loss": 2.1347, "step": 8624 }, { "epoch": 0.29, "grad_norm": 0.7049158215522766, "learning_rate": 1.6497046244103986e-05, "loss": 2.1505, "step": 8625 }, { "epoch": 0.29, "grad_norm": 0.7279732823371887, "learning_rate": 1.6496238210274005e-05, "loss": 2.1111, "step": 8626 }, { "epoch": 0.29, "grad_norm": 0.7275941371917725, "learning_rate": 1.649543010305333e-05, "loss": 2.2188, "step": 8627 }, { "epoch": 0.29, "grad_norm": 0.716028094291687, "learning_rate": 1.649462192245109e-05, "loss": 2.1244, "step": 8628 }, { "epoch": 0.29, "grad_norm": 0.7254250645637512, "learning_rate": 1.649381366847642e-05, "loss": 2.1393, "step": 8629 }, { "epoch": 0.29, "grad_norm": 0.7290170788764954, "learning_rate": 1.649300534113845e-05, "loss": 2.1159, "step": 8630 }, { "epoch": 0.29, "grad_norm": 0.6990953087806702, "learning_rate": 1.649219694044631e-05, "loss": 2.1616, "step": 8631 }, { "epoch": 0.29, "grad_norm": 0.7147461175918579, "learning_rate": 1.6491388466409134e-05, "loss": 2.127, "step": 8632 }, { "epoch": 0.29, "grad_norm": 0.7202890515327454, "learning_rate": 1.6490579919036057e-05, "loss": 2.0824, "step": 8633 }, { "epoch": 0.29, "grad_norm": 0.67803555727005, "learning_rate": 1.648977129833621e-05, "loss": 2.1154, "step": 8634 }, { "epoch": 0.29, "grad_norm": 0.7088366150856018, "learning_rate": 1.648896260431873e-05, "loss": 2.1029, "step": 8635 }, { "epoch": 0.29, "grad_norm": 0.7328715920448303, "learning_rate": 1.648815383699275e-05, "loss": 2.1116, "step": 8636 }, { "epoch": 0.29, "grad_norm": 0.7269423007965088, "learning_rate": 1.6487344996367415e-05, "loss": 2.1388, "step": 8637 }, { "epoch": 0.29, "grad_norm": 0.7346189022064209, "learning_rate": 1.6486536082451858e-05, "loss": 2.148, "step": 8638 }, { "epoch": 0.29, "grad_norm": 0.7402300238609314, "learning_rate": 1.648572709525522e-05, "loss": 2.1861, "step": 8639 }, { "epoch": 0.29, "grad_norm": 0.6965931057929993, "learning_rate": 1.648491803478663e-05, "loss": 2.1325, "step": 8640 }, { "epoch": 0.29, "grad_norm": 0.743628978729248, "learning_rate": 1.6484108901055244e-05, "loss": 2.1256, "step": 8641 }, { "epoch": 0.29, "grad_norm": 0.7043337225914001, "learning_rate": 1.6483299694070194e-05, "loss": 2.1679, "step": 8642 }, { "epoch": 0.29, "grad_norm": 0.6928893327713013, "learning_rate": 1.6482490413840623e-05, "loss": 2.1849, "step": 8643 }, { "epoch": 0.29, "grad_norm": 0.7343873977661133, "learning_rate": 1.6481681060375675e-05, "loss": 2.1332, "step": 8644 }, { "epoch": 0.29, "grad_norm": 0.7067716121673584, "learning_rate": 1.6480871633684495e-05, "loss": 2.1708, "step": 8645 }, { "epoch": 0.29, "grad_norm": 0.7068729996681213, "learning_rate": 1.648006213377622e-05, "loss": 2.1317, "step": 8646 }, { "epoch": 0.29, "grad_norm": 0.7325693964958191, "learning_rate": 1.6479252560660004e-05, "loss": 2.143, "step": 8647 }, { "epoch": 0.29, "grad_norm": 0.7149820327758789, "learning_rate": 1.6478442914344988e-05, "loss": 2.2159, "step": 8648 }, { "epoch": 0.29, "grad_norm": 0.716033399105072, "learning_rate": 1.6477633194840322e-05, "loss": 2.1152, "step": 8649 }, { "epoch": 0.29, "grad_norm": 0.7164447903633118, "learning_rate": 1.6476823402155154e-05, "loss": 2.0901, "step": 8650 }, { "epoch": 0.29, "grad_norm": 0.6766393780708313, "learning_rate": 1.647601353629863e-05, "loss": 2.1432, "step": 8651 }, { "epoch": 0.29, "grad_norm": 0.7457061409950256, "learning_rate": 1.64752035972799e-05, "loss": 2.1008, "step": 8652 }, { "epoch": 0.29, "grad_norm": 0.7502416968345642, "learning_rate": 1.6474393585108117e-05, "loss": 2.081, "step": 8653 }, { "epoch": 0.29, "grad_norm": 0.7059175968170166, "learning_rate": 1.647358349979243e-05, "loss": 2.1548, "step": 8654 }, { "epoch": 0.29, "grad_norm": 0.7197018265724182, "learning_rate": 1.6472773341341984e-05, "loss": 2.2745, "step": 8655 }, { "epoch": 0.29, "grad_norm": 0.7112841010093689, "learning_rate": 1.6471963109765942e-05, "loss": 2.1559, "step": 8656 }, { "epoch": 0.29, "grad_norm": 0.7062753438949585, "learning_rate": 1.6471152805073454e-05, "loss": 2.1862, "step": 8657 }, { "epoch": 0.29, "grad_norm": 0.7131178975105286, "learning_rate": 1.6470342427273673e-05, "loss": 2.0975, "step": 8658 }, { "epoch": 0.29, "grad_norm": 0.7311652302742004, "learning_rate": 1.6469531976375756e-05, "loss": 2.1208, "step": 8659 }, { "epoch": 0.29, "grad_norm": 0.7294530272483826, "learning_rate": 1.646872145238886e-05, "loss": 2.107, "step": 8660 }, { "epoch": 0.29, "grad_norm": 0.755399227142334, "learning_rate": 1.6467910855322136e-05, "loss": 2.0934, "step": 8661 }, { "epoch": 0.29, "grad_norm": 0.7106584906578064, "learning_rate": 1.646710018518475e-05, "loss": 2.1423, "step": 8662 }, { "epoch": 0.29, "grad_norm": 0.7575503587722778, "learning_rate": 1.6466289441985853e-05, "loss": 2.1657, "step": 8663 }, { "epoch": 0.29, "grad_norm": 0.6909418106079102, "learning_rate": 1.646547862573461e-05, "loss": 2.1373, "step": 8664 }, { "epoch": 0.29, "grad_norm": 0.7176034450531006, "learning_rate": 1.646466773644018e-05, "loss": 2.1539, "step": 8665 }, { "epoch": 0.29, "grad_norm": 0.7235804796218872, "learning_rate": 1.6463856774111718e-05, "loss": 2.155, "step": 8666 }, { "epoch": 0.29, "grad_norm": 0.7293908596038818, "learning_rate": 1.6463045738758394e-05, "loss": 2.0896, "step": 8667 }, { "epoch": 0.29, "grad_norm": 0.6998721361160278, "learning_rate": 1.6462234630389366e-05, "loss": 2.1473, "step": 8668 }, { "epoch": 0.29, "grad_norm": 0.7099831104278564, "learning_rate": 1.6461423449013796e-05, "loss": 2.1542, "step": 8669 }, { "epoch": 0.29, "grad_norm": 0.7209317088127136, "learning_rate": 1.6460612194640852e-05, "loss": 2.1557, "step": 8670 }, { "epoch": 0.29, "grad_norm": 0.6948076486587524, "learning_rate": 1.64598008672797e-05, "loss": 2.1168, "step": 8671 }, { "epoch": 0.29, "grad_norm": 0.6941455602645874, "learning_rate": 1.64589894669395e-05, "loss": 2.156, "step": 8672 }, { "epoch": 0.29, "grad_norm": 0.7441786527633667, "learning_rate": 1.6458177993629425e-05, "loss": 2.1611, "step": 8673 }, { "epoch": 0.29, "grad_norm": 0.7092084884643555, "learning_rate": 1.6457366447358638e-05, "loss": 2.1527, "step": 8674 }, { "epoch": 0.29, "grad_norm": 0.6901856064796448, "learning_rate": 1.645655482813631e-05, "loss": 2.151, "step": 8675 }, { "epoch": 0.29, "grad_norm": 0.703450083732605, "learning_rate": 1.645574313597161e-05, "loss": 2.1625, "step": 8676 }, { "epoch": 0.29, "grad_norm": 0.7163329720497131, "learning_rate": 1.6454931370873707e-05, "loss": 2.0686, "step": 8677 }, { "epoch": 0.29, "grad_norm": 0.7170610427856445, "learning_rate": 1.6454119532851772e-05, "loss": 2.1833, "step": 8678 }, { "epoch": 0.29, "grad_norm": 0.7266414165496826, "learning_rate": 1.645330762191498e-05, "loss": 2.116, "step": 8679 }, { "epoch": 0.29, "grad_norm": 0.7273832559585571, "learning_rate": 1.6452495638072496e-05, "loss": 2.1479, "step": 8680 }, { "epoch": 0.29, "grad_norm": 0.6916490793228149, "learning_rate": 1.64516835813335e-05, "loss": 2.1933, "step": 8681 }, { "epoch": 0.29, "grad_norm": 0.7194525003433228, "learning_rate": 1.6450871451707166e-05, "loss": 2.129, "step": 8682 }, { "epoch": 0.29, "grad_norm": 0.7347792983055115, "learning_rate": 1.6450059249202665e-05, "loss": 2.1805, "step": 8683 }, { "epoch": 0.29, "grad_norm": 0.7341068387031555, "learning_rate": 1.6449246973829172e-05, "loss": 2.1565, "step": 8684 }, { "epoch": 0.29, "grad_norm": 0.7437977194786072, "learning_rate": 1.644843462559587e-05, "loss": 2.1665, "step": 8685 }, { "epoch": 0.29, "grad_norm": 0.7272555232048035, "learning_rate": 1.644762220451193e-05, "loss": 2.1583, "step": 8686 }, { "epoch": 0.29, "grad_norm": 0.7330377697944641, "learning_rate": 1.644680971058654e-05, "loss": 2.1352, "step": 8687 }, { "epoch": 0.29, "grad_norm": 0.7000678181648254, "learning_rate": 1.644599714382886e-05, "loss": 2.1119, "step": 8688 }, { "epoch": 0.29, "grad_norm": 0.6793113350868225, "learning_rate": 1.644518450424809e-05, "loss": 2.117, "step": 8689 }, { "epoch": 0.29, "grad_norm": 0.7002363801002502, "learning_rate": 1.6444371791853405e-05, "loss": 2.0819, "step": 8690 }, { "epoch": 0.29, "grad_norm": 0.7260528802871704, "learning_rate": 1.6443559006653977e-05, "loss": 2.1111, "step": 8691 }, { "epoch": 0.29, "grad_norm": 0.7341198325157166, "learning_rate": 1.6442746148659002e-05, "loss": 2.2254, "step": 8692 }, { "epoch": 0.29, "grad_norm": 0.7159674167633057, "learning_rate": 1.6441933217877653e-05, "loss": 2.1475, "step": 8693 }, { "epoch": 0.29, "grad_norm": 0.719525933265686, "learning_rate": 1.644112021431912e-05, "loss": 2.1687, "step": 8694 }, { "epoch": 0.29, "grad_norm": 0.7415254712104797, "learning_rate": 1.6440307137992585e-05, "loss": 2.1598, "step": 8695 }, { "epoch": 0.29, "grad_norm": 0.7052748203277588, "learning_rate": 1.6439493988907234e-05, "loss": 2.2062, "step": 8696 }, { "epoch": 0.29, "grad_norm": 0.7486609816551208, "learning_rate": 1.6438680767072252e-05, "loss": 2.1648, "step": 8697 }, { "epoch": 0.29, "grad_norm": 0.7042666673660278, "learning_rate": 1.6437867472496832e-05, "loss": 2.1855, "step": 8698 }, { "epoch": 0.29, "grad_norm": 0.6793323755264282, "learning_rate": 1.6437054105190155e-05, "loss": 2.0925, "step": 8699 }, { "epoch": 0.29, "grad_norm": 0.7216567397117615, "learning_rate": 1.643624066516141e-05, "loss": 2.1075, "step": 8700 }, { "epoch": 0.29, "grad_norm": 0.70390385389328, "learning_rate": 1.6435427152419797e-05, "loss": 2.1534, "step": 8701 }, { "epoch": 0.29, "grad_norm": 0.7354033589363098, "learning_rate": 1.64346135669745e-05, "loss": 2.1255, "step": 8702 }, { "epoch": 0.29, "grad_norm": 0.7334848046302795, "learning_rate": 1.6433799908834703e-05, "loss": 2.1991, "step": 8703 }, { "epoch": 0.29, "grad_norm": 0.7464609146118164, "learning_rate": 1.643298617800961e-05, "loss": 2.1287, "step": 8704 }, { "epoch": 0.29, "grad_norm": 0.7090374827384949, "learning_rate": 1.643217237450841e-05, "loss": 2.1402, "step": 8705 }, { "epoch": 0.29, "grad_norm": 0.7271326780319214, "learning_rate": 1.6431358498340293e-05, "loss": 2.1258, "step": 8706 }, { "epoch": 0.29, "grad_norm": 0.7055995464324951, "learning_rate": 1.643054454951446e-05, "loss": 2.1364, "step": 8707 }, { "epoch": 0.29, "grad_norm": 0.7151098847389221, "learning_rate": 1.64297305280401e-05, "loss": 2.1987, "step": 8708 }, { "epoch": 0.29, "grad_norm": 0.7521646022796631, "learning_rate": 1.6428916433926415e-05, "loss": 2.13, "step": 8709 }, { "epoch": 0.29, "grad_norm": 0.6807918548583984, "learning_rate": 1.64281022671826e-05, "loss": 2.0933, "step": 8710 }, { "epoch": 0.29, "grad_norm": 0.7281333804130554, "learning_rate": 1.642728802781785e-05, "loss": 2.2045, "step": 8711 }, { "epoch": 0.29, "grad_norm": 0.7047079205513, "learning_rate": 1.642647371584137e-05, "loss": 2.1284, "step": 8712 }, { "epoch": 0.29, "grad_norm": 0.7147891521453857, "learning_rate": 1.6425659331262353e-05, "loss": 2.1327, "step": 8713 }, { "epoch": 0.29, "grad_norm": 0.6735539436340332, "learning_rate": 1.6424844874090008e-05, "loss": 2.0491, "step": 8714 }, { "epoch": 0.29, "grad_norm": 0.7134787440299988, "learning_rate": 1.6424030344333526e-05, "loss": 2.0897, "step": 8715 }, { "epoch": 0.29, "grad_norm": 0.7234330773353577, "learning_rate": 1.6423215742002115e-05, "loss": 2.1982, "step": 8716 }, { "epoch": 0.29, "grad_norm": 0.7160414457321167, "learning_rate": 1.642240106710498e-05, "loss": 2.1127, "step": 8717 }, { "epoch": 0.29, "grad_norm": 0.6791563630104065, "learning_rate": 1.6421586319651315e-05, "loss": 2.1201, "step": 8718 }, { "epoch": 0.29, "grad_norm": 0.7297214865684509, "learning_rate": 1.6420771499650337e-05, "loss": 2.1483, "step": 8719 }, { "epoch": 0.29, "grad_norm": 0.7450850009918213, "learning_rate": 1.6419956607111246e-05, "loss": 2.2155, "step": 8720 }, { "epoch": 0.29, "grad_norm": 0.7217132449150085, "learning_rate": 1.6419141642043245e-05, "loss": 2.1235, "step": 8721 }, { "epoch": 0.29, "grad_norm": 0.6960335373878479, "learning_rate": 1.6418326604455545e-05, "loss": 2.1715, "step": 8722 }, { "epoch": 0.29, "grad_norm": 0.7075805068016052, "learning_rate": 1.6417511494357353e-05, "loss": 2.1347, "step": 8723 }, { "epoch": 0.29, "grad_norm": 0.7439130544662476, "learning_rate": 1.6416696311757873e-05, "loss": 2.1443, "step": 8724 }, { "epoch": 0.29, "grad_norm": 0.7126134037971497, "learning_rate": 1.641588105666632e-05, "loss": 2.1566, "step": 8725 }, { "epoch": 0.29, "grad_norm": 0.7144042253494263, "learning_rate": 1.6415065729091906e-05, "loss": 2.1124, "step": 8726 }, { "epoch": 0.29, "grad_norm": 0.6928504109382629, "learning_rate": 1.6414250329043836e-05, "loss": 2.1516, "step": 8727 }, { "epoch": 0.29, "grad_norm": 0.7137312293052673, "learning_rate": 1.6413434856531328e-05, "loss": 2.1447, "step": 8728 }, { "epoch": 0.29, "grad_norm": 0.7179322838783264, "learning_rate": 1.6412619311563588e-05, "loss": 2.16, "step": 8729 }, { "epoch": 0.29, "grad_norm": 0.7052469253540039, "learning_rate": 1.641180369414984e-05, "loss": 2.1228, "step": 8730 }, { "epoch": 0.29, "grad_norm": 0.7070691585540771, "learning_rate": 1.641098800429928e-05, "loss": 2.1589, "step": 8731 }, { "epoch": 0.29, "grad_norm": 0.7290984392166138, "learning_rate": 1.6410172242021146e-05, "loss": 2.1536, "step": 8732 }, { "epoch": 0.29, "grad_norm": 0.7093420028686523, "learning_rate": 1.6409356407324638e-05, "loss": 2.1757, "step": 8733 }, { "epoch": 0.29, "grad_norm": 0.7079386115074158, "learning_rate": 1.640854050021898e-05, "loss": 2.1372, "step": 8734 }, { "epoch": 0.29, "grad_norm": 0.7045636177062988, "learning_rate": 1.640772452071338e-05, "loss": 2.0838, "step": 8735 }, { "epoch": 0.29, "grad_norm": 0.7122809290885925, "learning_rate": 1.6406908468817072e-05, "loss": 2.1526, "step": 8736 }, { "epoch": 0.29, "grad_norm": 0.7370101809501648, "learning_rate": 1.640609234453926e-05, "loss": 2.0996, "step": 8737 }, { "epoch": 0.29, "grad_norm": 0.7180492281913757, "learning_rate": 1.640527614788918e-05, "loss": 2.1469, "step": 8738 }, { "epoch": 0.29, "grad_norm": 0.7035475373268127, "learning_rate": 1.6404459878876036e-05, "loss": 2.1176, "step": 8739 }, { "epoch": 0.29, "grad_norm": 0.6948394179344177, "learning_rate": 1.640364353750906e-05, "loss": 2.1903, "step": 8740 }, { "epoch": 0.29, "grad_norm": 0.775216281414032, "learning_rate": 1.6402827123797472e-05, "loss": 2.0726, "step": 8741 }, { "epoch": 0.29, "grad_norm": 0.7295196652412415, "learning_rate": 1.6402010637750497e-05, "loss": 2.1011, "step": 8742 }, { "epoch": 0.29, "grad_norm": 0.7750604152679443, "learning_rate": 1.6401194079377357e-05, "loss": 2.183, "step": 8743 }, { "epoch": 0.29, "grad_norm": 0.699211061000824, "learning_rate": 1.6400377448687278e-05, "loss": 2.1482, "step": 8744 }, { "epoch": 0.29, "grad_norm": 0.7084868550300598, "learning_rate": 1.6399560745689486e-05, "loss": 2.0831, "step": 8745 }, { "epoch": 0.29, "grad_norm": 0.6998394131660461, "learning_rate": 1.6398743970393207e-05, "loss": 2.1181, "step": 8746 }, { "epoch": 0.29, "grad_norm": 0.7365617752075195, "learning_rate": 1.6397927122807666e-05, "loss": 2.1257, "step": 8747 }, { "epoch": 0.29, "grad_norm": 0.7238606810569763, "learning_rate": 1.6397110202942098e-05, "loss": 2.1638, "step": 8748 }, { "epoch": 0.29, "grad_norm": 0.7203308343887329, "learning_rate": 1.6396293210805723e-05, "loss": 2.1237, "step": 8749 }, { "epoch": 0.29, "grad_norm": 0.7496856451034546, "learning_rate": 1.6395476146407778e-05, "loss": 2.1332, "step": 8750 }, { "epoch": 0.29, "grad_norm": 0.735129177570343, "learning_rate": 1.6394659009757493e-05, "loss": 2.1361, "step": 8751 }, { "epoch": 0.29, "grad_norm": 0.710877001285553, "learning_rate": 1.63938418008641e-05, "loss": 2.1201, "step": 8752 }, { "epoch": 0.29, "grad_norm": 0.706219494342804, "learning_rate": 1.6393024519736824e-05, "loss": 2.0817, "step": 8753 }, { "epoch": 0.29, "grad_norm": 0.736022412776947, "learning_rate": 1.639220716638491e-05, "loss": 2.1648, "step": 8754 }, { "epoch": 0.29, "grad_norm": 0.7418921589851379, "learning_rate": 1.639138974081758e-05, "loss": 2.1882, "step": 8755 }, { "epoch": 0.29, "grad_norm": 0.7078555226325989, "learning_rate": 1.639057224304408e-05, "loss": 2.2109, "step": 8756 }, { "epoch": 0.29, "grad_norm": 0.7736138701438904, "learning_rate": 1.6389754673073635e-05, "loss": 2.1076, "step": 8757 }, { "epoch": 0.29, "grad_norm": 0.7333442568778992, "learning_rate": 1.6388937030915486e-05, "loss": 2.135, "step": 8758 }, { "epoch": 0.29, "grad_norm": 0.7175415754318237, "learning_rate": 1.6388119316578874e-05, "loss": 2.1358, "step": 8759 }, { "epoch": 0.29, "grad_norm": 0.7150269150733948, "learning_rate": 1.6387301530073033e-05, "loss": 2.1755, "step": 8760 }, { "epoch": 0.29, "grad_norm": 0.7132679224014282, "learning_rate": 1.63864836714072e-05, "loss": 2.1651, "step": 8761 }, { "epoch": 0.29, "grad_norm": 0.7067186832427979, "learning_rate": 1.6385665740590622e-05, "loss": 2.1035, "step": 8762 }, { "epoch": 0.29, "grad_norm": 0.7212598323822021, "learning_rate": 1.638484773763253e-05, "loss": 2.1719, "step": 8763 }, { "epoch": 0.29, "grad_norm": 0.709871768951416, "learning_rate": 1.6384029662542175e-05, "loss": 2.1227, "step": 8764 }, { "epoch": 0.29, "grad_norm": 0.7096536755561829, "learning_rate": 1.6383211515328793e-05, "loss": 2.1158, "step": 8765 }, { "epoch": 0.29, "grad_norm": 0.7124400734901428, "learning_rate": 1.6382393296001625e-05, "loss": 2.1401, "step": 8766 }, { "epoch": 0.29, "grad_norm": 0.7127114534378052, "learning_rate": 1.6381575004569923e-05, "loss": 2.0982, "step": 8767 }, { "epoch": 0.29, "grad_norm": 0.6893149614334106, "learning_rate": 1.6380756641042924e-05, "loss": 2.0642, "step": 8768 }, { "epoch": 0.29, "grad_norm": 0.7137542963027954, "learning_rate": 1.6379938205429874e-05, "loss": 2.2296, "step": 8769 }, { "epoch": 0.29, "grad_norm": 0.7044686675071716, "learning_rate": 1.6379119697740025e-05, "loss": 2.1124, "step": 8770 }, { "epoch": 0.29, "grad_norm": 0.7261411547660828, "learning_rate": 1.6378301117982618e-05, "loss": 2.2042, "step": 8771 }, { "epoch": 0.29, "grad_norm": 0.7239534258842468, "learning_rate": 1.63774824661669e-05, "loss": 2.1688, "step": 8772 }, { "epoch": 0.29, "grad_norm": 0.7159039974212646, "learning_rate": 1.6376663742302125e-05, "loss": 2.1593, "step": 8773 }, { "epoch": 0.29, "grad_norm": 0.7057291269302368, "learning_rate": 1.6375844946397546e-05, "loss": 2.176, "step": 8774 }, { "epoch": 0.29, "grad_norm": 0.7203836441040039, "learning_rate": 1.63750260784624e-05, "loss": 2.215, "step": 8775 }, { "epoch": 0.29, "grad_norm": 0.7096632719039917, "learning_rate": 1.637420713850595e-05, "loss": 2.1158, "step": 8776 }, { "epoch": 0.29, "grad_norm": 0.745130717754364, "learning_rate": 1.6373388126537437e-05, "loss": 2.1832, "step": 8777 }, { "epoch": 0.29, "grad_norm": 0.7339268922805786, "learning_rate": 1.6372569042566128e-05, "loss": 2.1505, "step": 8778 }, { "epoch": 0.29, "grad_norm": 0.7244040966033936, "learning_rate": 1.6371749886601263e-05, "loss": 2.1477, "step": 8779 }, { "epoch": 0.29, "grad_norm": 0.7341741919517517, "learning_rate": 1.6370930658652103e-05, "loss": 2.154, "step": 8780 }, { "epoch": 0.29, "grad_norm": 0.7291587591171265, "learning_rate": 1.6370111358727904e-05, "loss": 2.2091, "step": 8781 }, { "epoch": 0.29, "grad_norm": 0.6986908316612244, "learning_rate": 1.6369291986837922e-05, "loss": 2.0877, "step": 8782 }, { "epoch": 0.29, "grad_norm": 0.7447097301483154, "learning_rate": 1.6368472542991407e-05, "loss": 2.089, "step": 8783 }, { "epoch": 0.29, "grad_norm": 0.6976644396781921, "learning_rate": 1.6367653027197622e-05, "loss": 2.1786, "step": 8784 }, { "epoch": 0.29, "grad_norm": 0.7244577407836914, "learning_rate": 1.6366833439465827e-05, "loss": 2.1528, "step": 8785 }, { "epoch": 0.29, "grad_norm": 0.7220506072044373, "learning_rate": 1.6366013779805278e-05, "loss": 2.1257, "step": 8786 }, { "epoch": 0.29, "grad_norm": 0.7239930033683777, "learning_rate": 1.636519404822524e-05, "loss": 2.1943, "step": 8787 }, { "epoch": 0.29, "grad_norm": 0.7204551696777344, "learning_rate": 1.6364374244734965e-05, "loss": 2.0981, "step": 8788 }, { "epoch": 0.29, "grad_norm": 0.7207492589950562, "learning_rate": 1.6363554369343724e-05, "loss": 2.074, "step": 8789 }, { "epoch": 0.29, "grad_norm": 0.7105235457420349, "learning_rate": 1.636273442206077e-05, "loss": 2.13, "step": 8790 }, { "epoch": 0.29, "grad_norm": 0.7341360449790955, "learning_rate": 1.6361914402895377e-05, "loss": 2.0734, "step": 8791 }, { "epoch": 0.29, "grad_norm": 0.7436923384666443, "learning_rate": 1.6361094311856798e-05, "loss": 2.1613, "step": 8792 }, { "epoch": 0.29, "grad_norm": 0.7519908547401428, "learning_rate": 1.6360274148954307e-05, "loss": 2.1294, "step": 8793 }, { "epoch": 0.29, "grad_norm": 0.6914599537849426, "learning_rate": 1.6359453914197164e-05, "loss": 2.2098, "step": 8794 }, { "epoch": 0.29, "grad_norm": 0.7312342524528503, "learning_rate": 1.635863360759464e-05, "loss": 2.1477, "step": 8795 }, { "epoch": 0.29, "grad_norm": 0.7123243808746338, "learning_rate": 1.6357813229156e-05, "loss": 2.1621, "step": 8796 }, { "epoch": 0.29, "grad_norm": 0.723903238773346, "learning_rate": 1.635699277889051e-05, "loss": 2.1575, "step": 8797 }, { "epoch": 0.29, "grad_norm": 0.7797050476074219, "learning_rate": 1.6356172256807445e-05, "loss": 2.1658, "step": 8798 }, { "epoch": 0.29, "grad_norm": 0.7093600034713745, "learning_rate": 1.6355351662916064e-05, "loss": 2.17, "step": 8799 }, { "epoch": 0.29, "grad_norm": 0.721022367477417, "learning_rate": 1.635453099722565e-05, "loss": 2.0877, "step": 8800 }, { "epoch": 0.29, "grad_norm": 0.7305416464805603, "learning_rate": 1.635371025974547e-05, "loss": 2.1786, "step": 8801 }, { "epoch": 0.29, "grad_norm": 0.7348678112030029, "learning_rate": 1.6352889450484794e-05, "loss": 2.1508, "step": 8802 }, { "epoch": 0.29, "grad_norm": 0.7743626236915588, "learning_rate": 1.6352068569452893e-05, "loss": 2.1255, "step": 8803 }, { "epoch": 0.29, "grad_norm": 0.7292847037315369, "learning_rate": 1.6351247616659046e-05, "loss": 2.1044, "step": 8804 }, { "epoch": 0.29, "grad_norm": 0.7174158692359924, "learning_rate": 1.6350426592112523e-05, "loss": 2.134, "step": 8805 }, { "epoch": 0.29, "grad_norm": 0.702694296836853, "learning_rate": 1.6349605495822605e-05, "loss": 2.0817, "step": 8806 }, { "epoch": 0.29, "grad_norm": 0.7096382975578308, "learning_rate": 1.6348784327798567e-05, "loss": 2.1485, "step": 8807 }, { "epoch": 0.29, "grad_norm": 0.7276924848556519, "learning_rate": 1.634796308804968e-05, "loss": 2.1361, "step": 8808 }, { "epoch": 0.29, "grad_norm": 0.7243310213088989, "learning_rate": 1.634714177658523e-05, "loss": 2.0894, "step": 8809 }, { "epoch": 0.29, "grad_norm": 0.738064706325531, "learning_rate": 1.634632039341449e-05, "loss": 2.1871, "step": 8810 }, { "epoch": 0.29, "grad_norm": 0.7024344205856323, "learning_rate": 1.6345498938546742e-05, "loss": 2.1296, "step": 8811 }, { "epoch": 0.29, "grad_norm": 0.7270868420600891, "learning_rate": 1.6344677411991266e-05, "loss": 2.1841, "step": 8812 }, { "epoch": 0.29, "grad_norm": 0.7622269988059998, "learning_rate": 1.6343855813757344e-05, "loss": 2.1947, "step": 8813 }, { "epoch": 0.29, "grad_norm": 0.7116913795471191, "learning_rate": 1.6343034143854254e-05, "loss": 2.0811, "step": 8814 }, { "epoch": 0.29, "grad_norm": 0.7222337126731873, "learning_rate": 1.6342212402291285e-05, "loss": 2.0714, "step": 8815 }, { "epoch": 0.29, "grad_norm": 0.6948091983795166, "learning_rate": 1.6341390589077715e-05, "loss": 2.1652, "step": 8816 }, { "epoch": 0.29, "grad_norm": 0.730566680431366, "learning_rate": 1.634056870422283e-05, "loss": 2.1516, "step": 8817 }, { "epoch": 0.29, "grad_norm": 0.7419631481170654, "learning_rate": 1.6339746747735916e-05, "loss": 2.1248, "step": 8818 }, { "epoch": 0.29, "grad_norm": 0.7744646072387695, "learning_rate": 1.6338924719626262e-05, "loss": 2.1506, "step": 8819 }, { "epoch": 0.29, "grad_norm": 0.7301687598228455, "learning_rate": 1.633810261990315e-05, "loss": 2.1438, "step": 8820 }, { "epoch": 0.29, "grad_norm": 0.7711130380630493, "learning_rate": 1.6337280448575868e-05, "loss": 2.1334, "step": 8821 }, { "epoch": 0.29, "grad_norm": 0.7505212426185608, "learning_rate": 1.6336458205653705e-05, "loss": 2.1573, "step": 8822 }, { "epoch": 0.29, "grad_norm": 0.7354183197021484, "learning_rate": 1.633563589114595e-05, "loss": 2.1934, "step": 8823 }, { "epoch": 0.29, "grad_norm": 0.7145424485206604, "learning_rate": 1.6334813505061898e-05, "loss": 2.0968, "step": 8824 }, { "epoch": 0.29, "grad_norm": 0.7443456649780273, "learning_rate": 1.6333991047410828e-05, "loss": 2.1553, "step": 8825 }, { "epoch": 0.29, "grad_norm": 0.7150864601135254, "learning_rate": 1.6333168518202045e-05, "loss": 2.0695, "step": 8826 }, { "epoch": 0.29, "grad_norm": 0.7085060477256775, "learning_rate": 1.6332345917444837e-05, "loss": 2.1055, "step": 8827 }, { "epoch": 0.29, "grad_norm": 0.7280860543251038, "learning_rate": 1.6331523245148493e-05, "loss": 2.1907, "step": 8828 }, { "epoch": 0.29, "grad_norm": 0.7594099044799805, "learning_rate": 1.633070050132231e-05, "loss": 2.1361, "step": 8829 }, { "epoch": 0.29, "grad_norm": 0.7153259515762329, "learning_rate": 1.6329877685975583e-05, "loss": 2.1616, "step": 8830 }, { "epoch": 0.29, "grad_norm": 0.6950773000717163, "learning_rate": 1.6329054799117608e-05, "loss": 2.1086, "step": 8831 }, { "epoch": 0.29, "grad_norm": 0.7078160047531128, "learning_rate": 1.6328231840757682e-05, "loss": 2.2081, "step": 8832 }, { "epoch": 0.29, "grad_norm": 0.6944240927696228, "learning_rate": 1.6327408810905102e-05, "loss": 2.2008, "step": 8833 }, { "epoch": 0.29, "grad_norm": 0.6990172266960144, "learning_rate": 1.6326585709569162e-05, "loss": 2.1064, "step": 8834 }, { "epoch": 0.29, "grad_norm": 0.727824866771698, "learning_rate": 1.6325762536759166e-05, "loss": 2.1484, "step": 8835 }, { "epoch": 0.29, "grad_norm": 0.6902204155921936, "learning_rate": 1.632493929248441e-05, "loss": 2.1592, "step": 8836 }, { "epoch": 0.29, "grad_norm": 0.7441250681877136, "learning_rate": 1.6324115976754203e-05, "loss": 2.205, "step": 8837 }, { "epoch": 0.29, "grad_norm": 0.7540885210037231, "learning_rate": 1.6323292589577837e-05, "loss": 2.1061, "step": 8838 }, { "epoch": 0.29, "grad_norm": 0.7254241108894348, "learning_rate": 1.6322469130964616e-05, "loss": 2.0898, "step": 8839 }, { "epoch": 0.29, "grad_norm": 0.7198905348777771, "learning_rate": 1.6321645600923844e-05, "loss": 2.1784, "step": 8840 }, { "epoch": 0.29, "grad_norm": 0.7073347568511963, "learning_rate": 1.6320821999464826e-05, "loss": 2.0828, "step": 8841 }, { "epoch": 0.29, "grad_norm": 0.6953628063201904, "learning_rate": 1.631999832659686e-05, "loss": 2.0739, "step": 8842 }, { "epoch": 0.29, "grad_norm": 0.7028694748878479, "learning_rate": 1.6319174582329266e-05, "loss": 2.1717, "step": 8843 }, { "epoch": 0.29, "grad_norm": 0.7340285778045654, "learning_rate": 1.6318350766671333e-05, "loss": 2.126, "step": 8844 }, { "epoch": 0.29, "grad_norm": 0.7425290942192078, "learning_rate": 1.631752687963238e-05, "loss": 2.1112, "step": 8845 }, { "epoch": 0.29, "grad_norm": 0.766482412815094, "learning_rate": 1.6316702921221708e-05, "loss": 2.1888, "step": 8846 }, { "epoch": 0.29, "grad_norm": 0.7340766787528992, "learning_rate": 1.6315878891448635e-05, "loss": 2.1639, "step": 8847 }, { "epoch": 0.29, "grad_norm": 0.6977381706237793, "learning_rate": 1.6315054790322458e-05, "loss": 2.1266, "step": 8848 }, { "epoch": 0.29, "grad_norm": 0.7038753628730774, "learning_rate": 1.6314230617852492e-05, "loss": 2.1159, "step": 8849 }, { "epoch": 0.29, "grad_norm": 0.6921088099479675, "learning_rate": 1.6313406374048054e-05, "loss": 2.1113, "step": 8850 }, { "epoch": 0.29, "grad_norm": 0.7174754738807678, "learning_rate": 1.6312582058918447e-05, "loss": 2.1747, "step": 8851 }, { "epoch": 0.29, "grad_norm": 0.7212690711021423, "learning_rate": 1.6311757672472987e-05, "loss": 2.2322, "step": 8852 }, { "epoch": 0.29, "grad_norm": 0.7302584052085876, "learning_rate": 1.6310933214720992e-05, "loss": 2.2476, "step": 8853 }, { "epoch": 0.29, "grad_norm": 0.7185867428779602, "learning_rate": 1.6310108685671768e-05, "loss": 2.182, "step": 8854 }, { "epoch": 0.29, "grad_norm": 0.7199523448944092, "learning_rate": 1.6309284085334635e-05, "loss": 2.115, "step": 8855 }, { "epoch": 0.29, "grad_norm": 0.6832734942436218, "learning_rate": 1.630845941371891e-05, "loss": 2.082, "step": 8856 }, { "epoch": 0.29, "grad_norm": 0.7254805564880371, "learning_rate": 1.63076346708339e-05, "loss": 2.164, "step": 8857 }, { "epoch": 0.29, "grad_norm": 0.7400642037391663, "learning_rate": 1.6306809856688942e-05, "loss": 2.087, "step": 8858 }, { "epoch": 0.29, "grad_norm": 0.697419285774231, "learning_rate": 1.6305984971293337e-05, "loss": 2.1324, "step": 8859 }, { "epoch": 0.29, "grad_norm": 0.7521214485168457, "learning_rate": 1.6305160014656406e-05, "loss": 2.1685, "step": 8860 }, { "epoch": 0.29, "grad_norm": 0.7082265615463257, "learning_rate": 1.6304334986787477e-05, "loss": 2.1524, "step": 8861 }, { "epoch": 0.29, "grad_norm": 0.7007856369018555, "learning_rate": 1.6303509887695864e-05, "loss": 2.1332, "step": 8862 }, { "epoch": 0.29, "grad_norm": 0.748794436454773, "learning_rate": 1.6302684717390894e-05, "loss": 2.1539, "step": 8863 }, { "epoch": 0.29, "grad_norm": 0.7005513310432434, "learning_rate": 1.6301859475881882e-05, "loss": 2.0851, "step": 8864 }, { "epoch": 0.29, "grad_norm": 0.7241923809051514, "learning_rate": 1.6301034163178155e-05, "loss": 2.1796, "step": 8865 }, { "epoch": 0.29, "grad_norm": 0.744773805141449, "learning_rate": 1.6300208779289036e-05, "loss": 2.1582, "step": 8866 }, { "epoch": 0.3, "grad_norm": 0.6742888689041138, "learning_rate": 1.6299383324223855e-05, "loss": 2.1298, "step": 8867 }, { "epoch": 0.3, "grad_norm": 0.6955166459083557, "learning_rate": 1.6298557797991927e-05, "loss": 2.1325, "step": 8868 }, { "epoch": 0.3, "grad_norm": 0.702983021736145, "learning_rate": 1.6297732200602588e-05, "loss": 2.1273, "step": 8869 }, { "epoch": 0.3, "grad_norm": 0.7420945763587952, "learning_rate": 1.629690653206516e-05, "loss": 2.215, "step": 8870 }, { "epoch": 0.3, "grad_norm": 0.7193183302879333, "learning_rate": 1.6296080792388975e-05, "loss": 2.161, "step": 8871 }, { "epoch": 0.3, "grad_norm": 0.7072712779045105, "learning_rate": 1.6295254981583356e-05, "loss": 2.1187, "step": 8872 }, { "epoch": 0.3, "grad_norm": 0.7018760442733765, "learning_rate": 1.6294429099657638e-05, "loss": 2.1413, "step": 8873 }, { "epoch": 0.3, "grad_norm": 0.7001602649688721, "learning_rate": 1.629360314662115e-05, "loss": 2.1876, "step": 8874 }, { "epoch": 0.3, "grad_norm": 0.6876716613769531, "learning_rate": 1.629277712248322e-05, "loss": 2.0828, "step": 8875 }, { "epoch": 0.3, "grad_norm": 0.6851648092269897, "learning_rate": 1.629195102725318e-05, "loss": 2.1274, "step": 8876 }, { "epoch": 0.3, "grad_norm": 0.7131861448287964, "learning_rate": 1.6291124860940364e-05, "loss": 2.1521, "step": 8877 }, { "epoch": 0.3, "grad_norm": 0.703472912311554, "learning_rate": 1.629029862355411e-05, "loss": 2.0658, "step": 8878 }, { "epoch": 0.3, "grad_norm": 0.7242181897163391, "learning_rate": 1.6289472315103748e-05, "loss": 2.1965, "step": 8879 }, { "epoch": 0.3, "grad_norm": 0.7093063592910767, "learning_rate": 1.6288645935598612e-05, "loss": 2.1472, "step": 8880 }, { "epoch": 0.3, "grad_norm": 0.7213153839111328, "learning_rate": 1.6287819485048042e-05, "loss": 2.1557, "step": 8881 }, { "epoch": 0.3, "grad_norm": 0.7248581051826477, "learning_rate": 1.6286992963461373e-05, "loss": 2.1838, "step": 8882 }, { "epoch": 0.3, "grad_norm": 0.7324124574661255, "learning_rate": 1.6286166370847938e-05, "loss": 2.1358, "step": 8883 }, { "epoch": 0.3, "grad_norm": 0.7122989892959595, "learning_rate": 1.6285339707217083e-05, "loss": 2.1307, "step": 8884 }, { "epoch": 0.3, "grad_norm": 0.723582923412323, "learning_rate": 1.628451297257814e-05, "loss": 2.1371, "step": 8885 }, { "epoch": 0.3, "grad_norm": 0.7509222030639648, "learning_rate": 1.628368616694046e-05, "loss": 2.2076, "step": 8886 }, { "epoch": 0.3, "grad_norm": 0.692204475402832, "learning_rate": 1.6282859290313367e-05, "loss": 2.0991, "step": 8887 }, { "epoch": 0.3, "grad_norm": 0.6966241002082825, "learning_rate": 1.6282032342706215e-05, "loss": 2.1488, "step": 8888 }, { "epoch": 0.3, "grad_norm": 0.7217451930046082, "learning_rate": 1.6281205324128348e-05, "loss": 2.1477, "step": 8889 }, { "epoch": 0.3, "grad_norm": 0.7313122749328613, "learning_rate": 1.62803782345891e-05, "loss": 2.156, "step": 8890 }, { "epoch": 0.3, "grad_norm": 0.7585131525993347, "learning_rate": 1.627955107409782e-05, "loss": 2.1118, "step": 8891 }, { "epoch": 0.3, "grad_norm": 0.7477955222129822, "learning_rate": 1.627872384266385e-05, "loss": 2.1316, "step": 8892 }, { "epoch": 0.3, "grad_norm": 0.7714107632637024, "learning_rate": 1.627789654029654e-05, "loss": 2.1702, "step": 8893 }, { "epoch": 0.3, "grad_norm": 0.7142125368118286, "learning_rate": 1.6277069167005236e-05, "loss": 2.1266, "step": 8894 }, { "epoch": 0.3, "grad_norm": 0.7066044807434082, "learning_rate": 1.6276241722799283e-05, "loss": 2.1583, "step": 8895 }, { "epoch": 0.3, "grad_norm": 0.7052202224731445, "learning_rate": 1.6275414207688025e-05, "loss": 2.1031, "step": 8896 }, { "epoch": 0.3, "grad_norm": 0.7488783001899719, "learning_rate": 1.6274586621680818e-05, "loss": 2.2091, "step": 8897 }, { "epoch": 0.3, "grad_norm": 0.6974542737007141, "learning_rate": 1.627375896478701e-05, "loss": 2.1284, "step": 8898 }, { "epoch": 0.3, "grad_norm": 0.6987120509147644, "learning_rate": 1.6272931237015946e-05, "loss": 2.1194, "step": 8899 }, { "epoch": 0.3, "grad_norm": 0.7495936155319214, "learning_rate": 1.6272103438376983e-05, "loss": 2.1288, "step": 8900 }, { "epoch": 0.3, "grad_norm": 0.7632669806480408, "learning_rate": 1.6271275568879473e-05, "loss": 2.1519, "step": 8901 }, { "epoch": 0.3, "grad_norm": 0.7524606585502625, "learning_rate": 1.6270447628532763e-05, "loss": 2.1329, "step": 8902 }, { "epoch": 0.3, "grad_norm": 0.7062490582466125, "learning_rate": 1.6269619617346214e-05, "loss": 2.1789, "step": 8903 }, { "epoch": 0.3, "grad_norm": 0.7303714156150818, "learning_rate": 1.6268791535329178e-05, "loss": 2.1431, "step": 8904 }, { "epoch": 0.3, "grad_norm": 0.7478488087654114, "learning_rate": 1.6267963382491005e-05, "loss": 2.1338, "step": 8905 }, { "epoch": 0.3, "grad_norm": 0.7119024991989136, "learning_rate": 1.6267135158841057e-05, "loss": 2.135, "step": 8906 }, { "epoch": 0.3, "grad_norm": 0.7577170133590698, "learning_rate": 1.626630686438869e-05, "loss": 2.1089, "step": 8907 }, { "epoch": 0.3, "grad_norm": 0.7047663927078247, "learning_rate": 1.626547849914326e-05, "loss": 2.1295, "step": 8908 }, { "epoch": 0.3, "grad_norm": 0.7324678301811218, "learning_rate": 1.6264650063114126e-05, "loss": 2.1463, "step": 8909 }, { "epoch": 0.3, "grad_norm": 0.704318642616272, "learning_rate": 1.6263821556310642e-05, "loss": 2.1334, "step": 8910 }, { "epoch": 0.3, "grad_norm": 0.7360548973083496, "learning_rate": 1.626299297874218e-05, "loss": 2.1137, "step": 8911 }, { "epoch": 0.3, "grad_norm": 0.7151976227760315, "learning_rate": 1.626216433041809e-05, "loss": 2.1272, "step": 8912 }, { "epoch": 0.3, "grad_norm": 0.7338722944259644, "learning_rate": 1.6261335611347743e-05, "loss": 2.1485, "step": 8913 }, { "epoch": 0.3, "grad_norm": 0.7278993129730225, "learning_rate": 1.6260506821540485e-05, "loss": 2.1021, "step": 8914 }, { "epoch": 0.3, "grad_norm": 0.7019294500350952, "learning_rate": 1.62596779610057e-05, "loss": 2.1049, "step": 8915 }, { "epoch": 0.3, "grad_norm": 0.7294197678565979, "learning_rate": 1.625884902975274e-05, "loss": 2.2182, "step": 8916 }, { "epoch": 0.3, "grad_norm": 0.7339378595352173, "learning_rate": 1.625802002779097e-05, "loss": 2.0896, "step": 8917 }, { "epoch": 0.3, "grad_norm": 0.7534977197647095, "learning_rate": 1.6257190955129757e-05, "loss": 2.0876, "step": 8918 }, { "epoch": 0.3, "grad_norm": 0.6987320184707642, "learning_rate": 1.6256361811778466e-05, "loss": 2.1723, "step": 8919 }, { "epoch": 0.3, "grad_norm": 0.6845196485519409, "learning_rate": 1.625553259774647e-05, "loss": 2.1065, "step": 8920 }, { "epoch": 0.3, "grad_norm": 0.7144913077354431, "learning_rate": 1.6254703313043127e-05, "loss": 2.0659, "step": 8921 }, { "epoch": 0.3, "grad_norm": 0.706723690032959, "learning_rate": 1.625387395767782e-05, "loss": 2.1204, "step": 8922 }, { "epoch": 0.3, "grad_norm": 0.7010518312454224, "learning_rate": 1.62530445316599e-05, "loss": 2.0792, "step": 8923 }, { "epoch": 0.3, "grad_norm": 0.7021790146827698, "learning_rate": 1.6252215034998755e-05, "loss": 2.1956, "step": 8924 }, { "epoch": 0.3, "grad_norm": 0.7198841571807861, "learning_rate": 1.6251385467703747e-05, "loss": 2.1197, "step": 8925 }, { "epoch": 0.3, "grad_norm": 0.7137982845306396, "learning_rate": 1.625055582978425e-05, "loss": 2.1395, "step": 8926 }, { "epoch": 0.3, "grad_norm": 0.7104584574699402, "learning_rate": 1.6249726121249635e-05, "loss": 2.163, "step": 8927 }, { "epoch": 0.3, "grad_norm": 0.7310519218444824, "learning_rate": 1.6248896342109277e-05, "loss": 2.1281, "step": 8928 }, { "epoch": 0.3, "grad_norm": 0.7242134213447571, "learning_rate": 1.624806649237255e-05, "loss": 2.159, "step": 8929 }, { "epoch": 0.3, "grad_norm": 0.7312332391738892, "learning_rate": 1.624723657204883e-05, "loss": 2.2189, "step": 8930 }, { "epoch": 0.3, "grad_norm": 0.7232054471969604, "learning_rate": 1.6246406581147493e-05, "loss": 2.0991, "step": 8931 }, { "epoch": 0.3, "grad_norm": 0.692573606967926, "learning_rate": 1.6245576519677915e-05, "loss": 2.1192, "step": 8932 }, { "epoch": 0.3, "grad_norm": 0.7269569039344788, "learning_rate": 1.6244746387649476e-05, "loss": 2.2017, "step": 8933 }, { "epoch": 0.3, "grad_norm": 0.7473732829093933, "learning_rate": 1.6243916185071548e-05, "loss": 2.1832, "step": 8934 }, { "epoch": 0.3, "grad_norm": 0.7651844620704651, "learning_rate": 1.6243085911953516e-05, "loss": 2.1119, "step": 8935 }, { "epoch": 0.3, "grad_norm": 0.7066621780395508, "learning_rate": 1.6242255568304757e-05, "loss": 2.0934, "step": 8936 }, { "epoch": 0.3, "grad_norm": 0.7401037812232971, "learning_rate": 1.6241425154134655e-05, "loss": 2.2184, "step": 8937 }, { "epoch": 0.3, "grad_norm": 0.7021321058273315, "learning_rate": 1.624059466945259e-05, "loss": 2.1139, "step": 8938 }, { "epoch": 0.3, "grad_norm": 0.727994441986084, "learning_rate": 1.623976411426794e-05, "loss": 2.1272, "step": 8939 }, { "epoch": 0.3, "grad_norm": 0.7134078145027161, "learning_rate": 1.6238933488590097e-05, "loss": 2.0149, "step": 8940 }, { "epoch": 0.3, "grad_norm": 0.7448168992996216, "learning_rate": 1.6238102792428435e-05, "loss": 2.1625, "step": 8941 }, { "epoch": 0.3, "grad_norm": 0.7107097506523132, "learning_rate": 1.6237272025792348e-05, "loss": 2.1668, "step": 8942 }, { "epoch": 0.3, "grad_norm": 0.7188959717750549, "learning_rate": 1.6236441188691214e-05, "loss": 2.1161, "step": 8943 }, { "epoch": 0.3, "grad_norm": 0.7116233706474304, "learning_rate": 1.6235610281134425e-05, "loss": 2.1555, "step": 8944 }, { "epoch": 0.3, "grad_norm": 0.7164419889450073, "learning_rate": 1.6234779303131362e-05, "loss": 2.2121, "step": 8945 }, { "epoch": 0.3, "grad_norm": 0.7134877443313599, "learning_rate": 1.6233948254691418e-05, "loss": 2.1065, "step": 8946 }, { "epoch": 0.3, "grad_norm": 0.733862042427063, "learning_rate": 1.6233117135823978e-05, "loss": 2.0999, "step": 8947 }, { "epoch": 0.3, "grad_norm": 0.7173563838005066, "learning_rate": 1.6232285946538437e-05, "loss": 2.1461, "step": 8948 }, { "epoch": 0.3, "grad_norm": 0.6966747045516968, "learning_rate": 1.623145468684418e-05, "loss": 2.1697, "step": 8949 }, { "epoch": 0.3, "grad_norm": 0.7019830942153931, "learning_rate": 1.62306233567506e-05, "loss": 2.0592, "step": 8950 }, { "epoch": 0.3, "grad_norm": 0.6916581988334656, "learning_rate": 1.622979195626709e-05, "loss": 2.0856, "step": 8951 }, { "epoch": 0.3, "grad_norm": 0.6881765127182007, "learning_rate": 1.622896048540304e-05, "loss": 2.074, "step": 8952 }, { "epoch": 0.3, "grad_norm": 0.7288192510604858, "learning_rate": 1.6228128944167848e-05, "loss": 2.1247, "step": 8953 }, { "epoch": 0.3, "grad_norm": 0.7455095052719116, "learning_rate": 1.62272973325709e-05, "loss": 2.1643, "step": 8954 }, { "epoch": 0.3, "grad_norm": 0.7479301691055298, "learning_rate": 1.6226465650621598e-05, "loss": 2.1373, "step": 8955 }, { "epoch": 0.3, "grad_norm": 0.7094659805297852, "learning_rate": 1.622563389832934e-05, "loss": 2.1093, "step": 8956 }, { "epoch": 0.3, "grad_norm": 0.6840853095054626, "learning_rate": 1.6224802075703515e-05, "loss": 2.0851, "step": 8957 }, { "epoch": 0.3, "grad_norm": 0.7015469670295715, "learning_rate": 1.6223970182753525e-05, "loss": 2.0948, "step": 8958 }, { "epoch": 0.3, "grad_norm": 0.7198246121406555, "learning_rate": 1.622313821948877e-05, "loss": 2.2097, "step": 8959 }, { "epoch": 0.3, "grad_norm": 0.6909313797950745, "learning_rate": 1.6222306185918645e-05, "loss": 2.1223, "step": 8960 }, { "epoch": 0.3, "grad_norm": 0.7057914137840271, "learning_rate": 1.622147408205255e-05, "loss": 2.1095, "step": 8961 }, { "epoch": 0.3, "grad_norm": 0.7460607290267944, "learning_rate": 1.622064190789989e-05, "loss": 2.1432, "step": 8962 }, { "epoch": 0.3, "grad_norm": 0.6938759684562683, "learning_rate": 1.621980966347006e-05, "loss": 2.1748, "step": 8963 }, { "epoch": 0.3, "grad_norm": 0.7186651825904846, "learning_rate": 1.6218977348772466e-05, "loss": 2.2098, "step": 8964 }, { "epoch": 0.3, "grad_norm": 0.7120456099510193, "learning_rate": 1.621814496381651e-05, "loss": 2.146, "step": 8965 }, { "epoch": 0.3, "grad_norm": 0.7139744162559509, "learning_rate": 1.6217312508611597e-05, "loss": 2.0768, "step": 8966 }, { "epoch": 0.3, "grad_norm": 0.6866811513900757, "learning_rate": 1.6216479983167133e-05, "loss": 2.0386, "step": 8967 }, { "epoch": 0.3, "grad_norm": 0.7079299092292786, "learning_rate": 1.6215647387492518e-05, "loss": 2.1422, "step": 8968 }, { "epoch": 0.3, "grad_norm": 0.7179138660430908, "learning_rate": 1.6214814721597164e-05, "loss": 2.143, "step": 8969 }, { "epoch": 0.3, "grad_norm": 0.7471867203712463, "learning_rate": 1.6213981985490476e-05, "loss": 2.1949, "step": 8970 }, { "epoch": 0.3, "grad_norm": 0.7435702085494995, "learning_rate": 1.621314917918186e-05, "loss": 2.1242, "step": 8971 }, { "epoch": 0.3, "grad_norm": 0.7322764992713928, "learning_rate": 1.6212316302680723e-05, "loss": 2.1591, "step": 8972 }, { "epoch": 0.3, "grad_norm": 0.7437384724617004, "learning_rate": 1.621148335599648e-05, "loss": 2.0912, "step": 8973 }, { "epoch": 0.3, "grad_norm": 0.7254350781440735, "learning_rate": 1.621065033913854e-05, "loss": 2.1919, "step": 8974 }, { "epoch": 0.3, "grad_norm": 0.7194914817810059, "learning_rate": 1.6209817252116313e-05, "loss": 2.1169, "step": 8975 }, { "epoch": 0.3, "grad_norm": 0.6991117596626282, "learning_rate": 1.6208984094939206e-05, "loss": 2.1374, "step": 8976 }, { "epoch": 0.3, "grad_norm": 0.733970582485199, "learning_rate": 1.620815086761664e-05, "loss": 2.1179, "step": 8977 }, { "epoch": 0.3, "grad_norm": 0.6946825981140137, "learning_rate": 1.620731757015802e-05, "loss": 2.1284, "step": 8978 }, { "epoch": 0.3, "grad_norm": 0.7070122361183167, "learning_rate": 1.6206484202572765e-05, "loss": 2.1042, "step": 8979 }, { "epoch": 0.3, "grad_norm": 0.7761732339859009, "learning_rate": 1.620565076487029e-05, "loss": 2.1783, "step": 8980 }, { "epoch": 0.3, "grad_norm": 0.7341007590293884, "learning_rate": 1.6204817257060006e-05, "loss": 2.1719, "step": 8981 }, { "epoch": 0.3, "grad_norm": 0.7100133299827576, "learning_rate": 1.620398367915134e-05, "loss": 2.1772, "step": 8982 }, { "epoch": 0.3, "grad_norm": 0.6998253464698792, "learning_rate": 1.6203150031153693e-05, "loss": 2.1283, "step": 8983 }, { "epoch": 0.3, "grad_norm": 0.7178162932395935, "learning_rate": 1.62023163130765e-05, "loss": 2.0893, "step": 8984 }, { "epoch": 0.3, "grad_norm": 0.7074682116508484, "learning_rate": 1.620148252492917e-05, "loss": 2.1625, "step": 8985 }, { "epoch": 0.3, "grad_norm": 0.7318282127380371, "learning_rate": 1.620064866672112e-05, "loss": 2.1964, "step": 8986 }, { "epoch": 0.3, "grad_norm": 0.7359329462051392, "learning_rate": 1.619981473846178e-05, "loss": 2.1485, "step": 8987 }, { "epoch": 0.3, "grad_norm": 0.694105327129364, "learning_rate": 1.6198980740160568e-05, "loss": 2.1176, "step": 8988 }, { "epoch": 0.3, "grad_norm": 0.7203445434570312, "learning_rate": 1.6198146671826902e-05, "loss": 2.0862, "step": 8989 }, { "epoch": 0.3, "grad_norm": 0.7165696620941162, "learning_rate": 1.619731253347021e-05, "loss": 2.171, "step": 8990 }, { "epoch": 0.3, "grad_norm": 0.7560213804244995, "learning_rate": 1.6196478325099908e-05, "loss": 2.1725, "step": 8991 }, { "epoch": 0.3, "grad_norm": 0.7538928389549255, "learning_rate": 1.619564404672543e-05, "loss": 2.1157, "step": 8992 }, { "epoch": 0.3, "grad_norm": 0.7136211395263672, "learning_rate": 1.6194809698356192e-05, "loss": 2.1034, "step": 8993 }, { "epoch": 0.3, "grad_norm": 0.7151387333869934, "learning_rate": 1.619397528000163e-05, "loss": 2.0754, "step": 8994 }, { "epoch": 0.3, "grad_norm": 0.6979206800460815, "learning_rate": 1.6193140791671164e-05, "loss": 2.1314, "step": 8995 }, { "epoch": 0.3, "grad_norm": 0.7063075304031372, "learning_rate": 1.619230623337422e-05, "loss": 2.1154, "step": 8996 }, { "epoch": 0.3, "grad_norm": 0.7320974469184875, "learning_rate": 1.6191471605120232e-05, "loss": 2.183, "step": 8997 }, { "epoch": 0.3, "grad_norm": 0.7277300357818604, "learning_rate": 1.6190636906918626e-05, "loss": 2.1677, "step": 8998 }, { "epoch": 0.3, "grad_norm": 0.7248032689094543, "learning_rate": 1.618980213877883e-05, "loss": 2.1266, "step": 8999 }, { "epoch": 0.3, "grad_norm": 0.6959267258644104, "learning_rate": 1.618896730071028e-05, "loss": 2.1184, "step": 9000 }, { "epoch": 0.3, "grad_norm": 0.7078720927238464, "learning_rate": 1.6188132392722404e-05, "loss": 2.1367, "step": 9001 }, { "epoch": 0.3, "grad_norm": 0.7187773585319519, "learning_rate": 1.6187297414824633e-05, "loss": 2.1379, "step": 9002 }, { "epoch": 0.3, "grad_norm": 0.7332285046577454, "learning_rate": 1.61864623670264e-05, "loss": 2.1368, "step": 9003 }, { "epoch": 0.3, "grad_norm": 0.7162072062492371, "learning_rate": 1.6185627249337145e-05, "loss": 2.1621, "step": 9004 }, { "epoch": 0.3, "grad_norm": 0.7183804512023926, "learning_rate": 1.61847920617663e-05, "loss": 2.114, "step": 9005 }, { "epoch": 0.3, "grad_norm": 0.6891884207725525, "learning_rate": 1.6183956804323292e-05, "loss": 2.1134, "step": 9006 }, { "epoch": 0.3, "grad_norm": 0.743189811706543, "learning_rate": 1.618312147701757e-05, "loss": 2.1012, "step": 9007 }, { "epoch": 0.3, "grad_norm": 0.7743074893951416, "learning_rate": 1.6182286079858562e-05, "loss": 2.1569, "step": 9008 }, { "epoch": 0.3, "grad_norm": 0.7137490510940552, "learning_rate": 1.618145061285571e-05, "loss": 2.1666, "step": 9009 }, { "epoch": 0.3, "grad_norm": 0.7253501415252686, "learning_rate": 1.6180615076018456e-05, "loss": 2.2228, "step": 9010 }, { "epoch": 0.3, "grad_norm": 0.7498502731323242, "learning_rate": 1.617977946935623e-05, "loss": 2.1648, "step": 9011 }, { "epoch": 0.3, "grad_norm": 0.6997895836830139, "learning_rate": 1.6178943792878478e-05, "loss": 2.1655, "step": 9012 }, { "epoch": 0.3, "grad_norm": 0.7311301231384277, "learning_rate": 1.617810804659464e-05, "loss": 2.1689, "step": 9013 }, { "epoch": 0.3, "grad_norm": 0.709051251411438, "learning_rate": 1.6177272230514157e-05, "loss": 2.1446, "step": 9014 }, { "epoch": 0.3, "grad_norm": 0.6879650950431824, "learning_rate": 1.6176436344646476e-05, "loss": 2.1112, "step": 9015 }, { "epoch": 0.3, "grad_norm": 0.7095572352409363, "learning_rate": 1.6175600389001034e-05, "loss": 2.0908, "step": 9016 }, { "epoch": 0.3, "grad_norm": 0.7236729264259338, "learning_rate": 1.6174764363587284e-05, "loss": 2.1153, "step": 9017 }, { "epoch": 0.3, "grad_norm": 0.7203137874603271, "learning_rate": 1.617392826841466e-05, "loss": 2.1663, "step": 9018 }, { "epoch": 0.3, "grad_norm": 0.7368481159210205, "learning_rate": 1.617309210349261e-05, "loss": 2.1254, "step": 9019 }, { "epoch": 0.3, "grad_norm": 0.718671977519989, "learning_rate": 1.6172255868830586e-05, "loss": 2.1766, "step": 9020 }, { "epoch": 0.3, "grad_norm": 0.7523877620697021, "learning_rate": 1.6171419564438037e-05, "loss": 2.1286, "step": 9021 }, { "epoch": 0.3, "grad_norm": 0.701265811920166, "learning_rate": 1.6170583190324398e-05, "loss": 2.0959, "step": 9022 }, { "epoch": 0.3, "grad_norm": 0.7192615866661072, "learning_rate": 1.616974674649913e-05, "loss": 2.1058, "step": 9023 }, { "epoch": 0.3, "grad_norm": 0.7001630067825317, "learning_rate": 1.6168910232971683e-05, "loss": 2.1264, "step": 9024 }, { "epoch": 0.3, "grad_norm": 0.7107051610946655, "learning_rate": 1.6168073649751498e-05, "loss": 2.1413, "step": 9025 }, { "epoch": 0.3, "grad_norm": 0.7077645659446716, "learning_rate": 1.6167236996848036e-05, "loss": 2.1035, "step": 9026 }, { "epoch": 0.3, "grad_norm": 0.7043297290802002, "learning_rate": 1.616640027427074e-05, "loss": 2.1767, "step": 9027 }, { "epoch": 0.3, "grad_norm": 0.7185124158859253, "learning_rate": 1.616556348202907e-05, "loss": 2.1148, "step": 9028 }, { "epoch": 0.3, "grad_norm": 0.6983075737953186, "learning_rate": 1.6164726620132478e-05, "loss": 2.1162, "step": 9029 }, { "epoch": 0.3, "grad_norm": 0.7281727194786072, "learning_rate": 1.6163889688590416e-05, "loss": 2.1481, "step": 9030 }, { "epoch": 0.3, "grad_norm": 0.7135176658630371, "learning_rate": 1.6163052687412343e-05, "loss": 2.1164, "step": 9031 }, { "epoch": 0.3, "grad_norm": 0.6879961490631104, "learning_rate": 1.6162215616607714e-05, "loss": 2.1641, "step": 9032 }, { "epoch": 0.3, "grad_norm": 0.6878626942634583, "learning_rate": 1.616137847618598e-05, "loss": 2.1461, "step": 9033 }, { "epoch": 0.3, "grad_norm": 0.7165526151657104, "learning_rate": 1.6160541266156605e-05, "loss": 2.1356, "step": 9034 }, { "epoch": 0.3, "grad_norm": 0.691294252872467, "learning_rate": 1.6159703986529044e-05, "loss": 2.083, "step": 9035 }, { "epoch": 0.3, "grad_norm": 0.7055017948150635, "learning_rate": 1.615886663731276e-05, "loss": 2.1519, "step": 9036 }, { "epoch": 0.3, "grad_norm": 0.7372446060180664, "learning_rate": 1.6158029218517207e-05, "loss": 2.1293, "step": 9037 }, { "epoch": 0.3, "grad_norm": 0.7621408700942993, "learning_rate": 1.615719173015185e-05, "loss": 2.2021, "step": 9038 }, { "epoch": 0.3, "grad_norm": 0.7190671563148499, "learning_rate": 1.615635417222615e-05, "loss": 2.166, "step": 9039 }, { "epoch": 0.3, "grad_norm": 0.716773509979248, "learning_rate": 1.6155516544749567e-05, "loss": 2.0983, "step": 9040 }, { "epoch": 0.3, "grad_norm": 0.718288242816925, "learning_rate": 1.615467884773157e-05, "loss": 2.1386, "step": 9041 }, { "epoch": 0.3, "grad_norm": 0.6960080862045288, "learning_rate": 1.6153841081181613e-05, "loss": 2.0962, "step": 9042 }, { "epoch": 0.3, "grad_norm": 0.7400527596473694, "learning_rate": 1.6153003245109167e-05, "loss": 2.1168, "step": 9043 }, { "epoch": 0.3, "grad_norm": 0.7312054634094238, "learning_rate": 1.61521653395237e-05, "loss": 2.1429, "step": 9044 }, { "epoch": 0.3, "grad_norm": 0.7034080028533936, "learning_rate": 1.6151327364434668e-05, "loss": 2.1731, "step": 9045 }, { "epoch": 0.3, "grad_norm": 0.7475862503051758, "learning_rate": 1.615048931985155e-05, "loss": 2.1292, "step": 9046 }, { "epoch": 0.3, "grad_norm": 0.7107519507408142, "learning_rate": 1.6149651205783807e-05, "loss": 2.1497, "step": 9047 }, { "epoch": 0.3, "grad_norm": 0.728132426738739, "learning_rate": 1.614881302224091e-05, "loss": 2.0438, "step": 9048 }, { "epoch": 0.3, "grad_norm": 0.7282428741455078, "learning_rate": 1.6147974769232325e-05, "loss": 2.201, "step": 9049 }, { "epoch": 0.3, "grad_norm": 0.7181493639945984, "learning_rate": 1.6147136446767523e-05, "loss": 2.1295, "step": 9050 }, { "epoch": 0.3, "grad_norm": 0.7265649437904358, "learning_rate": 1.6146298054855977e-05, "loss": 2.2017, "step": 9051 }, { "epoch": 0.3, "grad_norm": 0.7228426337242126, "learning_rate": 1.6145459593507158e-05, "loss": 2.1885, "step": 9052 }, { "epoch": 0.3, "grad_norm": 0.6979672908782959, "learning_rate": 1.6144621062730537e-05, "loss": 2.1016, "step": 9053 }, { "epoch": 0.3, "grad_norm": 0.7314849495887756, "learning_rate": 1.614378246253559e-05, "loss": 2.1651, "step": 9054 }, { "epoch": 0.3, "grad_norm": 0.7438293099403381, "learning_rate": 1.614294379293179e-05, "loss": 2.1504, "step": 9055 }, { "epoch": 0.3, "grad_norm": 0.723396360874176, "learning_rate": 1.614210505392861e-05, "loss": 2.1252, "step": 9056 }, { "epoch": 0.3, "grad_norm": 0.6903195381164551, "learning_rate": 1.6141266245535527e-05, "loss": 2.1004, "step": 9057 }, { "epoch": 0.3, "grad_norm": 0.7252946496009827, "learning_rate": 1.6140427367762013e-05, "loss": 2.1212, "step": 9058 }, { "epoch": 0.3, "grad_norm": 0.728418231010437, "learning_rate": 1.6139588420617555e-05, "loss": 2.1324, "step": 9059 }, { "epoch": 0.3, "grad_norm": 0.7290331125259399, "learning_rate": 1.6138749404111626e-05, "loss": 2.0888, "step": 9060 }, { "epoch": 0.3, "grad_norm": 0.7248278856277466, "learning_rate": 1.61379103182537e-05, "loss": 2.1773, "step": 9061 }, { "epoch": 0.3, "grad_norm": 0.7369230389595032, "learning_rate": 1.6137071163053262e-05, "loss": 2.1476, "step": 9062 }, { "epoch": 0.3, "grad_norm": 0.7253776788711548, "learning_rate": 1.613623193851979e-05, "loss": 2.0841, "step": 9063 }, { "epoch": 0.3, "grad_norm": 0.7122920155525208, "learning_rate": 1.6135392644662762e-05, "loss": 2.1002, "step": 9064 }, { "epoch": 0.3, "grad_norm": 0.7598580121994019, "learning_rate": 1.613455328149167e-05, "loss": 2.1341, "step": 9065 }, { "epoch": 0.3, "grad_norm": 0.7112950086593628, "learning_rate": 1.6133713849015987e-05, "loss": 2.0898, "step": 9066 }, { "epoch": 0.3, "grad_norm": 0.7106995582580566, "learning_rate": 1.6132874347245204e-05, "loss": 2.1188, "step": 9067 }, { "epoch": 0.3, "grad_norm": 0.7363333702087402, "learning_rate": 1.6132034776188796e-05, "loss": 2.1318, "step": 9068 }, { "epoch": 0.3, "grad_norm": 0.7098732590675354, "learning_rate": 1.6131195135856253e-05, "loss": 2.1089, "step": 9069 }, { "epoch": 0.3, "grad_norm": 0.7206540107727051, "learning_rate": 1.6130355426257063e-05, "loss": 2.1553, "step": 9070 }, { "epoch": 0.3, "grad_norm": 0.697512149810791, "learning_rate": 1.6129515647400705e-05, "loss": 2.1651, "step": 9071 }, { "epoch": 0.3, "grad_norm": 0.700214684009552, "learning_rate": 1.612867579929668e-05, "loss": 2.0557, "step": 9072 }, { "epoch": 0.3, "grad_norm": 0.7461444139480591, "learning_rate": 1.612783588195446e-05, "loss": 2.1188, "step": 9073 }, { "epoch": 0.3, "grad_norm": 0.6864328980445862, "learning_rate": 1.6126995895383542e-05, "loss": 2.031, "step": 9074 }, { "epoch": 0.3, "grad_norm": 0.6722150444984436, "learning_rate": 1.612615583959342e-05, "loss": 2.1271, "step": 9075 }, { "epoch": 0.3, "grad_norm": 0.7236228585243225, "learning_rate": 1.6125315714593573e-05, "loss": 2.141, "step": 9076 }, { "epoch": 0.3, "grad_norm": 0.6886546611785889, "learning_rate": 1.6124475520393508e-05, "loss": 2.0355, "step": 9077 }, { "epoch": 0.3, "grad_norm": 0.7419759035110474, "learning_rate": 1.61236352570027e-05, "loss": 2.1241, "step": 9078 }, { "epoch": 0.3, "grad_norm": 0.7256042957305908, "learning_rate": 1.6122794924430655e-05, "loss": 2.1811, "step": 9079 }, { "epoch": 0.3, "grad_norm": 0.7233948707580566, "learning_rate": 1.6121954522686857e-05, "loss": 2.1728, "step": 9080 }, { "epoch": 0.3, "grad_norm": 0.7298876047134399, "learning_rate": 1.6121114051780807e-05, "loss": 2.0923, "step": 9081 }, { "epoch": 0.3, "grad_norm": 0.6840257048606873, "learning_rate": 1.6120273511721997e-05, "loss": 2.1553, "step": 9082 }, { "epoch": 0.3, "grad_norm": 0.740852415561676, "learning_rate": 1.6119432902519925e-05, "loss": 2.1631, "step": 9083 }, { "epoch": 0.3, "grad_norm": 0.7222468852996826, "learning_rate": 1.6118592224184085e-05, "loss": 2.1989, "step": 9084 }, { "epoch": 0.3, "grad_norm": 0.7189435362815857, "learning_rate": 1.6117751476723972e-05, "loss": 2.162, "step": 9085 }, { "epoch": 0.3, "grad_norm": 0.7160603404045105, "learning_rate": 1.6116910660149096e-05, "loss": 2.1591, "step": 9086 }, { "epoch": 0.3, "grad_norm": 0.729033887386322, "learning_rate": 1.6116069774468944e-05, "loss": 2.1807, "step": 9087 }, { "epoch": 0.3, "grad_norm": 0.7417169213294983, "learning_rate": 1.6115228819693022e-05, "loss": 2.1222, "step": 9088 }, { "epoch": 0.3, "grad_norm": 0.7592983841896057, "learning_rate": 1.6114387795830823e-05, "loss": 2.1079, "step": 9089 }, { "epoch": 0.3, "grad_norm": 0.7269306778907776, "learning_rate": 1.611354670289186e-05, "loss": 2.1275, "step": 9090 }, { "epoch": 0.3, "grad_norm": 0.7094120979309082, "learning_rate": 1.611270554088563e-05, "loss": 2.095, "step": 9091 }, { "epoch": 0.3, "grad_norm": 0.6961520910263062, "learning_rate": 1.611186430982163e-05, "loss": 2.1044, "step": 9092 }, { "epoch": 0.3, "grad_norm": 0.7210471630096436, "learning_rate": 1.611102300970937e-05, "loss": 2.1773, "step": 9093 }, { "epoch": 0.3, "grad_norm": 0.7415155172348022, "learning_rate": 1.6110181640558358e-05, "loss": 2.1331, "step": 9094 }, { "epoch": 0.3, "grad_norm": 0.7176220417022705, "learning_rate": 1.6109340202378093e-05, "loss": 2.1611, "step": 9095 }, { "epoch": 0.3, "grad_norm": 0.7124950885772705, "learning_rate": 1.6108498695178078e-05, "loss": 2.1661, "step": 9096 }, { "epoch": 0.3, "grad_norm": 0.7172918319702148, "learning_rate": 1.610765711896783e-05, "loss": 2.0726, "step": 9097 }, { "epoch": 0.3, "grad_norm": 0.7279451489448547, "learning_rate": 1.6106815473756848e-05, "loss": 2.0828, "step": 9098 }, { "epoch": 0.3, "grad_norm": 0.7020747661590576, "learning_rate": 1.6105973759554648e-05, "loss": 2.0909, "step": 9099 }, { "epoch": 0.3, "grad_norm": 0.674650251865387, "learning_rate": 1.6105131976370732e-05, "loss": 2.1026, "step": 9100 }, { "epoch": 0.3, "grad_norm": 0.7160528302192688, "learning_rate": 1.610429012421461e-05, "loss": 2.1424, "step": 9101 }, { "epoch": 0.3, "grad_norm": 0.7222595810890198, "learning_rate": 1.61034482030958e-05, "loss": 2.1229, "step": 9102 }, { "epoch": 0.3, "grad_norm": 0.7355678677558899, "learning_rate": 1.610260621302381e-05, "loss": 2.1477, "step": 9103 }, { "epoch": 0.3, "grad_norm": 0.7936947345733643, "learning_rate": 1.6101764154008148e-05, "loss": 2.174, "step": 9104 }, { "epoch": 0.3, "grad_norm": 0.7312738299369812, "learning_rate": 1.6100922026058336e-05, "loss": 2.1422, "step": 9105 }, { "epoch": 0.3, "grad_norm": 0.7081700563430786, "learning_rate": 1.6100079829183877e-05, "loss": 2.1572, "step": 9106 }, { "epoch": 0.3, "grad_norm": 0.728241503238678, "learning_rate": 1.609923756339429e-05, "loss": 2.1513, "step": 9107 }, { "epoch": 0.3, "grad_norm": 0.72907954454422, "learning_rate": 1.6098395228699094e-05, "loss": 2.1665, "step": 9108 }, { "epoch": 0.3, "grad_norm": 0.6978530883789062, "learning_rate": 1.6097552825107803e-05, "loss": 2.1052, "step": 9109 }, { "epoch": 0.3, "grad_norm": 0.7283750772476196, "learning_rate": 1.6096710352629935e-05, "loss": 2.1299, "step": 9110 }, { "epoch": 0.3, "grad_norm": 0.7288293242454529, "learning_rate": 1.6095867811275003e-05, "loss": 2.1771, "step": 9111 }, { "epoch": 0.3, "grad_norm": 0.7400304079055786, "learning_rate": 1.6095025201052532e-05, "loss": 2.1902, "step": 9112 }, { "epoch": 0.3, "grad_norm": 0.7296028137207031, "learning_rate": 1.6094182521972036e-05, "loss": 2.1115, "step": 9113 }, { "epoch": 0.3, "grad_norm": 0.7431108951568604, "learning_rate": 1.609333977404304e-05, "loss": 2.0976, "step": 9114 }, { "epoch": 0.3, "grad_norm": 0.7112067341804504, "learning_rate": 1.6092496957275062e-05, "loss": 2.1574, "step": 9115 }, { "epoch": 0.3, "grad_norm": 0.7418813109397888, "learning_rate": 1.6091654071677623e-05, "loss": 2.1188, "step": 9116 }, { "epoch": 0.3, "grad_norm": 0.6986328959465027, "learning_rate": 1.6090811117260245e-05, "loss": 2.1436, "step": 9117 }, { "epoch": 0.3, "grad_norm": 0.6848977208137512, "learning_rate": 1.6089968094032454e-05, "loss": 2.148, "step": 9118 }, { "epoch": 0.3, "grad_norm": 0.7670209407806396, "learning_rate": 1.608912500200377e-05, "loss": 2.1364, "step": 9119 }, { "epoch": 0.3, "grad_norm": 0.7172484397888184, "learning_rate": 1.6088281841183725e-05, "loss": 2.1509, "step": 9120 }, { "epoch": 0.3, "grad_norm": 0.728642463684082, "learning_rate": 1.6087438611581835e-05, "loss": 2.0701, "step": 9121 }, { "epoch": 0.3, "grad_norm": 0.6924048662185669, "learning_rate": 1.6086595313207633e-05, "loss": 2.0979, "step": 9122 }, { "epoch": 0.3, "grad_norm": 0.7285665273666382, "learning_rate": 1.608575194607065e-05, "loss": 2.1435, "step": 9123 }, { "epoch": 0.3, "grad_norm": 0.6986537575721741, "learning_rate": 1.60849085101804e-05, "loss": 2.0778, "step": 9124 }, { "epoch": 0.3, "grad_norm": 0.7152937650680542, "learning_rate": 1.6084065005546424e-05, "loss": 2.1787, "step": 9125 }, { "epoch": 0.3, "grad_norm": 0.7102826237678528, "learning_rate": 1.6083221432178246e-05, "loss": 2.1672, "step": 9126 }, { "epoch": 0.3, "grad_norm": 0.7468326091766357, "learning_rate": 1.6082377790085395e-05, "loss": 2.1497, "step": 9127 }, { "epoch": 0.3, "grad_norm": 0.702418863773346, "learning_rate": 1.6081534079277406e-05, "loss": 2.1326, "step": 9128 }, { "epoch": 0.3, "grad_norm": 0.7368364930152893, "learning_rate": 1.608069029976381e-05, "loss": 2.1836, "step": 9129 }, { "epoch": 0.3, "grad_norm": 0.7184275388717651, "learning_rate": 1.6079846451554142e-05, "loss": 2.1719, "step": 9130 }, { "epoch": 0.3, "grad_norm": 0.7169089317321777, "learning_rate": 1.607900253465793e-05, "loss": 2.1915, "step": 9131 }, { "epoch": 0.3, "grad_norm": 0.6985670924186707, "learning_rate": 1.6078158549084707e-05, "loss": 2.1547, "step": 9132 }, { "epoch": 0.3, "grad_norm": 0.7136695384979248, "learning_rate": 1.607731449484401e-05, "loss": 2.0828, "step": 9133 }, { "epoch": 0.3, "grad_norm": 0.7812572121620178, "learning_rate": 1.607647037194538e-05, "loss": 2.2155, "step": 9134 }, { "epoch": 0.3, "grad_norm": 0.7029160261154175, "learning_rate": 1.6075626180398346e-05, "loss": 2.1264, "step": 9135 }, { "epoch": 0.3, "grad_norm": 0.7137871384620667, "learning_rate": 1.607478192021245e-05, "loss": 2.117, "step": 9136 }, { "epoch": 0.3, "grad_norm": 0.6959608793258667, "learning_rate": 1.6073937591397225e-05, "loss": 2.0638, "step": 9137 }, { "epoch": 0.3, "grad_norm": 0.7332161068916321, "learning_rate": 1.607309319396221e-05, "loss": 2.1515, "step": 9138 }, { "epoch": 0.3, "grad_norm": 0.7073734402656555, "learning_rate": 1.6072248727916953e-05, "loss": 2.0645, "step": 9139 }, { "epoch": 0.3, "grad_norm": 0.7115029692649841, "learning_rate": 1.6071404193270988e-05, "loss": 2.1261, "step": 9140 }, { "epoch": 0.3, "grad_norm": 0.6940522789955139, "learning_rate": 1.6070559590033857e-05, "loss": 2.1456, "step": 9141 }, { "epoch": 0.3, "grad_norm": 0.7211481928825378, "learning_rate": 1.60697149182151e-05, "loss": 2.1263, "step": 9142 }, { "epoch": 0.3, "grad_norm": 0.719411313533783, "learning_rate": 1.6068870177824263e-05, "loss": 2.1389, "step": 9143 }, { "epoch": 0.3, "grad_norm": 0.6907497644424438, "learning_rate": 1.6068025368870883e-05, "loss": 2.1278, "step": 9144 }, { "epoch": 0.3, "grad_norm": 0.720475435256958, "learning_rate": 1.6067180491364514e-05, "loss": 2.0869, "step": 9145 }, { "epoch": 0.3, "grad_norm": 0.7141836285591125, "learning_rate": 1.6066335545314693e-05, "loss": 2.1806, "step": 9146 }, { "epoch": 0.3, "grad_norm": 0.7256964445114136, "learning_rate": 1.606549053073097e-05, "loss": 2.1216, "step": 9147 }, { "epoch": 0.3, "grad_norm": 0.7165928483009338, "learning_rate": 1.6064645447622887e-05, "loss": 2.1144, "step": 9148 }, { "epoch": 0.3, "grad_norm": 0.7332196831703186, "learning_rate": 1.606380029599999e-05, "loss": 2.1664, "step": 9149 }, { "epoch": 0.3, "grad_norm": 0.7206238508224487, "learning_rate": 1.606295507587184e-05, "loss": 2.1206, "step": 9150 }, { "epoch": 0.3, "grad_norm": 0.7073818445205688, "learning_rate": 1.6062109787247974e-05, "loss": 2.1009, "step": 9151 }, { "epoch": 0.3, "grad_norm": 0.7226938009262085, "learning_rate": 1.6061264430137943e-05, "loss": 2.1238, "step": 9152 }, { "epoch": 0.3, "grad_norm": 0.7461568117141724, "learning_rate": 1.6060419004551296e-05, "loss": 2.1419, "step": 9153 }, { "epoch": 0.3, "grad_norm": 0.7064507603645325, "learning_rate": 1.6059573510497593e-05, "loss": 2.1393, "step": 9154 }, { "epoch": 0.3, "grad_norm": 0.7169920206069946, "learning_rate": 1.6058727947986376e-05, "loss": 2.2082, "step": 9155 }, { "epoch": 0.3, "grad_norm": 0.7013477087020874, "learning_rate": 1.6057882317027202e-05, "loss": 2.0941, "step": 9156 }, { "epoch": 0.3, "grad_norm": 0.6975902318954468, "learning_rate": 1.605703661762962e-05, "loss": 2.113, "step": 9157 }, { "epoch": 0.3, "grad_norm": 0.7152775526046753, "learning_rate": 1.6056190849803192e-05, "loss": 2.119, "step": 9158 }, { "epoch": 0.3, "grad_norm": 0.7113847136497498, "learning_rate": 1.605534501355747e-05, "loss": 2.1438, "step": 9159 }, { "epoch": 0.3, "grad_norm": 0.6999005079269409, "learning_rate": 1.6054499108902007e-05, "loss": 2.1607, "step": 9160 }, { "epoch": 0.3, "grad_norm": 0.7313960194587708, "learning_rate": 1.605365313584636e-05, "loss": 2.1366, "step": 9161 }, { "epoch": 0.3, "grad_norm": 0.7339703440666199, "learning_rate": 1.6052807094400093e-05, "loss": 2.1109, "step": 9162 }, { "epoch": 0.3, "grad_norm": 0.7059674263000488, "learning_rate": 1.6051960984572753e-05, "loss": 2.1883, "step": 9163 }, { "epoch": 0.3, "grad_norm": 0.722870945930481, "learning_rate": 1.605111480637391e-05, "loss": 2.1347, "step": 9164 }, { "epoch": 0.3, "grad_norm": 0.7093409895896912, "learning_rate": 1.6050268559813115e-05, "loss": 2.1132, "step": 9165 }, { "epoch": 0.3, "grad_norm": 0.6932336091995239, "learning_rate": 1.6049422244899928e-05, "loss": 2.1067, "step": 9166 }, { "epoch": 0.3, "grad_norm": 0.7209358811378479, "learning_rate": 1.604857586164392e-05, "loss": 2.0301, "step": 9167 }, { "epoch": 0.31, "grad_norm": 0.7585920691490173, "learning_rate": 1.6047729410054645e-05, "loss": 2.1395, "step": 9168 }, { "epoch": 0.31, "grad_norm": 0.7451275587081909, "learning_rate": 1.6046882890141664e-05, "loss": 2.0805, "step": 9169 }, { "epoch": 0.31, "grad_norm": 0.7101922631263733, "learning_rate": 1.6046036301914547e-05, "loss": 2.1187, "step": 9170 }, { "epoch": 0.31, "grad_norm": 0.7058566212654114, "learning_rate": 1.6045189645382856e-05, "loss": 2.0607, "step": 9171 }, { "epoch": 0.31, "grad_norm": 0.7104160785675049, "learning_rate": 1.6044342920556152e-05, "loss": 2.0338, "step": 9172 }, { "epoch": 0.31, "grad_norm": 0.7436968088150024, "learning_rate": 1.6043496127444007e-05, "loss": 2.0851, "step": 9173 }, { "epoch": 0.31, "grad_norm": 0.7205508351325989, "learning_rate": 1.604264926605598e-05, "loss": 2.1315, "step": 9174 }, { "epoch": 0.31, "grad_norm": 0.7143407464027405, "learning_rate": 1.604180233640165e-05, "loss": 2.1885, "step": 9175 }, { "epoch": 0.31, "grad_norm": 0.6849372386932373, "learning_rate": 1.6040955338490576e-05, "loss": 2.1422, "step": 9176 }, { "epoch": 0.31, "grad_norm": 0.7259867787361145, "learning_rate": 1.6040108272332325e-05, "loss": 2.1147, "step": 9177 }, { "epoch": 0.31, "grad_norm": 0.7319706678390503, "learning_rate": 1.6039261137936474e-05, "loss": 2.1421, "step": 9178 }, { "epoch": 0.31, "grad_norm": 0.713388204574585, "learning_rate": 1.603841393531259e-05, "loss": 2.09, "step": 9179 }, { "epoch": 0.31, "grad_norm": 0.6804453730583191, "learning_rate": 1.603756666447024e-05, "loss": 2.1083, "step": 9180 }, { "epoch": 0.31, "grad_norm": 0.7508741021156311, "learning_rate": 1.6036719325419007e-05, "loss": 2.1244, "step": 9181 }, { "epoch": 0.31, "grad_norm": 0.6907243728637695, "learning_rate": 1.6035871918168452e-05, "loss": 2.0895, "step": 9182 }, { "epoch": 0.31, "grad_norm": 0.7081133723258972, "learning_rate": 1.6035024442728157e-05, "loss": 2.0405, "step": 9183 }, { "epoch": 0.31, "grad_norm": 0.6962268352508545, "learning_rate": 1.603417689910769e-05, "loss": 2.1157, "step": 9184 }, { "epoch": 0.31, "grad_norm": 0.6961023211479187, "learning_rate": 1.603332928731663e-05, "loss": 2.1785, "step": 9185 }, { "epoch": 0.31, "grad_norm": 0.7001343369483948, "learning_rate": 1.603248160736455e-05, "loss": 2.1178, "step": 9186 }, { "epoch": 0.31, "grad_norm": 0.7052466869354248, "learning_rate": 1.603163385926103e-05, "loss": 2.1258, "step": 9187 }, { "epoch": 0.31, "grad_norm": 0.6974732875823975, "learning_rate": 1.6030786043015644e-05, "loss": 2.1652, "step": 9188 }, { "epoch": 0.31, "grad_norm": 0.7125571370124817, "learning_rate": 1.6029938158637973e-05, "loss": 2.1205, "step": 9189 }, { "epoch": 0.31, "grad_norm": 0.722271740436554, "learning_rate": 1.6029090206137592e-05, "loss": 2.1224, "step": 9190 }, { "epoch": 0.31, "grad_norm": 0.7402487397193909, "learning_rate": 1.6028242185524087e-05, "loss": 2.1522, "step": 9191 }, { "epoch": 0.31, "grad_norm": 0.6940657496452332, "learning_rate": 1.6027394096807035e-05, "loss": 2.179, "step": 9192 }, { "epoch": 0.31, "grad_norm": 0.6938674449920654, "learning_rate": 1.602654593999601e-05, "loss": 2.1405, "step": 9193 }, { "epoch": 0.31, "grad_norm": 0.7211888432502747, "learning_rate": 1.6025697715100606e-05, "loss": 2.1961, "step": 9194 }, { "epoch": 0.31, "grad_norm": 0.6818050146102905, "learning_rate": 1.6024849422130403e-05, "loss": 2.0702, "step": 9195 }, { "epoch": 0.31, "grad_norm": 0.7188803553581238, "learning_rate": 1.602400106109498e-05, "loss": 2.1377, "step": 9196 }, { "epoch": 0.31, "grad_norm": 0.7143694758415222, "learning_rate": 1.602315263200392e-05, "loss": 2.1284, "step": 9197 }, { "epoch": 0.31, "grad_norm": 0.7232929468154907, "learning_rate": 1.6022304134866814e-05, "loss": 2.1792, "step": 9198 }, { "epoch": 0.31, "grad_norm": 0.6955771446228027, "learning_rate": 1.6021455569693248e-05, "loss": 2.1351, "step": 9199 }, { "epoch": 0.31, "grad_norm": 0.7464007139205933, "learning_rate": 1.6020606936492803e-05, "loss": 2.1728, "step": 9200 }, { "epoch": 0.31, "grad_norm": 0.7254986763000488, "learning_rate": 1.601975823527507e-05, "loss": 2.2202, "step": 9201 }, { "epoch": 0.31, "grad_norm": 0.7056158781051636, "learning_rate": 1.6018909466049635e-05, "loss": 2.1285, "step": 9202 }, { "epoch": 0.31, "grad_norm": 0.7082623839378357, "learning_rate": 1.601806062882609e-05, "loss": 2.1202, "step": 9203 }, { "epoch": 0.31, "grad_norm": 0.7204388976097107, "learning_rate": 1.6017211723614023e-05, "loss": 2.1415, "step": 9204 }, { "epoch": 0.31, "grad_norm": 0.7447026968002319, "learning_rate": 1.6016362750423023e-05, "loss": 2.1626, "step": 9205 }, { "epoch": 0.31, "grad_norm": 0.691584050655365, "learning_rate": 1.601551370926268e-05, "loss": 2.1264, "step": 9206 }, { "epoch": 0.31, "grad_norm": 0.7081149220466614, "learning_rate": 1.6014664600142595e-05, "loss": 2.0592, "step": 9207 }, { "epoch": 0.31, "grad_norm": 0.7025768756866455, "learning_rate": 1.601381542307235e-05, "loss": 2.1791, "step": 9208 }, { "epoch": 0.31, "grad_norm": 0.7228710651397705, "learning_rate": 1.6012966178061543e-05, "loss": 2.1401, "step": 9209 }, { "epoch": 0.31, "grad_norm": 0.7501245141029358, "learning_rate": 1.601211686511977e-05, "loss": 2.2134, "step": 9210 }, { "epoch": 0.31, "grad_norm": 0.7173078060150146, "learning_rate": 1.6011267484256624e-05, "loss": 2.1123, "step": 9211 }, { "epoch": 0.31, "grad_norm": 0.7497999668121338, "learning_rate": 1.60104180354817e-05, "loss": 2.122, "step": 9212 }, { "epoch": 0.31, "grad_norm": 0.6962926983833313, "learning_rate": 1.6009568518804596e-05, "loss": 2.1564, "step": 9213 }, { "epoch": 0.31, "grad_norm": 0.7197548151016235, "learning_rate": 1.6008718934234912e-05, "loss": 2.1983, "step": 9214 }, { "epoch": 0.31, "grad_norm": 0.7410464882850647, "learning_rate": 1.6007869281782235e-05, "loss": 2.1191, "step": 9215 }, { "epoch": 0.31, "grad_norm": 0.7107625603675842, "learning_rate": 1.600701956145618e-05, "loss": 2.1727, "step": 9216 }, { "epoch": 0.31, "grad_norm": 0.7562989592552185, "learning_rate": 1.600616977326633e-05, "loss": 2.1419, "step": 9217 }, { "epoch": 0.31, "grad_norm": 0.7248507142066956, "learning_rate": 1.6005319917222298e-05, "loss": 2.1126, "step": 9218 }, { "epoch": 0.31, "grad_norm": 0.7140735983848572, "learning_rate": 1.6004469993333685e-05, "loss": 2.1175, "step": 9219 }, { "epoch": 0.31, "grad_norm": 0.7225379943847656, "learning_rate": 1.6003620001610083e-05, "loss": 2.124, "step": 9220 }, { "epoch": 0.31, "grad_norm": 0.7579736113548279, "learning_rate": 1.6002769942061107e-05, "loss": 2.1667, "step": 9221 }, { "epoch": 0.31, "grad_norm": 0.7073781490325928, "learning_rate": 1.600191981469635e-05, "loss": 2.1004, "step": 9222 }, { "epoch": 0.31, "grad_norm": 0.7554293870925903, "learning_rate": 1.6001069619525417e-05, "loss": 2.0418, "step": 9223 }, { "epoch": 0.31, "grad_norm": 0.8300036787986755, "learning_rate": 1.600021935655792e-05, "loss": 2.1243, "step": 9224 }, { "epoch": 0.31, "grad_norm": 0.7187107801437378, "learning_rate": 1.5999369025803463e-05, "loss": 2.1137, "step": 9225 }, { "epoch": 0.31, "grad_norm": 0.7158005237579346, "learning_rate": 1.5998518627271647e-05, "loss": 2.1661, "step": 9226 }, { "epoch": 0.31, "grad_norm": 0.7382926940917969, "learning_rate": 1.5997668160972086e-05, "loss": 2.1454, "step": 9227 }, { "epoch": 0.31, "grad_norm": 0.7160718441009521, "learning_rate": 1.5996817626914386e-05, "loss": 2.0932, "step": 9228 }, { "epoch": 0.31, "grad_norm": 0.7767157554626465, "learning_rate": 1.599596702510815e-05, "loss": 2.1123, "step": 9229 }, { "epoch": 0.31, "grad_norm": 0.7325469851493835, "learning_rate": 1.5995116355562997e-05, "loss": 2.1483, "step": 9230 }, { "epoch": 0.31, "grad_norm": 0.7318035960197449, "learning_rate": 1.599426561828853e-05, "loss": 2.1002, "step": 9231 }, { "epoch": 0.31, "grad_norm": 0.6890215873718262, "learning_rate": 1.5993414813294368e-05, "loss": 2.0897, "step": 9232 }, { "epoch": 0.31, "grad_norm": 0.7208920121192932, "learning_rate": 1.5992563940590114e-05, "loss": 2.0999, "step": 9233 }, { "epoch": 0.31, "grad_norm": 0.7177322506904602, "learning_rate": 1.5991713000185382e-05, "loss": 2.1628, "step": 9234 }, { "epoch": 0.31, "grad_norm": 0.7050248980522156, "learning_rate": 1.5990861992089792e-05, "loss": 2.079, "step": 9235 }, { "epoch": 0.31, "grad_norm": 0.7299240827560425, "learning_rate": 1.5990010916312956e-05, "loss": 2.1838, "step": 9236 }, { "epoch": 0.31, "grad_norm": 0.73721843957901, "learning_rate": 1.5989159772864483e-05, "loss": 2.1084, "step": 9237 }, { "epoch": 0.31, "grad_norm": 0.7857838869094849, "learning_rate": 1.5988308561753995e-05, "loss": 2.1617, "step": 9238 }, { "epoch": 0.31, "grad_norm": 0.7269307374954224, "learning_rate": 1.5987457282991107e-05, "loss": 2.1562, "step": 9239 }, { "epoch": 0.31, "grad_norm": 0.7437485456466675, "learning_rate": 1.5986605936585432e-05, "loss": 2.1184, "step": 9240 }, { "epoch": 0.31, "grad_norm": 0.7098572254180908, "learning_rate": 1.598575452254659e-05, "loss": 2.1512, "step": 9241 }, { "epoch": 0.31, "grad_norm": 0.7172836065292358, "learning_rate": 1.598490304088421e-05, "loss": 2.1955, "step": 9242 }, { "epoch": 0.31, "grad_norm": 0.7143498063087463, "learning_rate": 1.5984051491607898e-05, "loss": 2.1556, "step": 9243 }, { "epoch": 0.31, "grad_norm": 0.6794418096542358, "learning_rate": 1.598319987472728e-05, "loss": 2.1414, "step": 9244 }, { "epoch": 0.31, "grad_norm": 0.7081068754196167, "learning_rate": 1.5982348190251974e-05, "loss": 2.1379, "step": 9245 }, { "epoch": 0.31, "grad_norm": 0.7376105785369873, "learning_rate": 1.5981496438191606e-05, "loss": 2.0912, "step": 9246 }, { "epoch": 0.31, "grad_norm": 0.7500844597816467, "learning_rate": 1.5980644618555796e-05, "loss": 2.1541, "step": 9247 }, { "epoch": 0.31, "grad_norm": 0.6969217658042908, "learning_rate": 1.597979273135417e-05, "loss": 2.1342, "step": 9248 }, { "epoch": 0.31, "grad_norm": 0.6875256896018982, "learning_rate": 1.597894077659635e-05, "loss": 2.0596, "step": 9249 }, { "epoch": 0.31, "grad_norm": 0.732375979423523, "learning_rate": 1.597808875429196e-05, "loss": 2.1545, "step": 9250 }, { "epoch": 0.31, "grad_norm": 0.7435402274131775, "learning_rate": 1.5977236664450627e-05, "loss": 2.0905, "step": 9251 }, { "epoch": 0.31, "grad_norm": 0.7759851217269897, "learning_rate": 1.5976384507081974e-05, "loss": 2.1992, "step": 9252 }, { "epoch": 0.31, "grad_norm": 0.7100881338119507, "learning_rate": 1.5975532282195637e-05, "loss": 2.1343, "step": 9253 }, { "epoch": 0.31, "grad_norm": 0.723523736000061, "learning_rate": 1.5974679989801235e-05, "loss": 2.152, "step": 9254 }, { "epoch": 0.31, "grad_norm": 0.7145118713378906, "learning_rate": 1.59738276299084e-05, "loss": 2.1073, "step": 9255 }, { "epoch": 0.31, "grad_norm": 0.746797502040863, "learning_rate": 1.5972975202526763e-05, "loss": 2.1959, "step": 9256 }, { "epoch": 0.31, "grad_norm": 0.7300235629081726, "learning_rate": 1.597212270766595e-05, "loss": 2.1183, "step": 9257 }, { "epoch": 0.31, "grad_norm": 0.7386276721954346, "learning_rate": 1.59712701453356e-05, "loss": 2.1796, "step": 9258 }, { "epoch": 0.31, "grad_norm": 0.7125728726387024, "learning_rate": 1.5970417515545335e-05, "loss": 2.0942, "step": 9259 }, { "epoch": 0.31, "grad_norm": 0.6984363794326782, "learning_rate": 1.5969564818304793e-05, "loss": 2.1497, "step": 9260 }, { "epoch": 0.31, "grad_norm": 0.7038816213607788, "learning_rate": 1.5968712053623604e-05, "loss": 2.1059, "step": 9261 }, { "epoch": 0.31, "grad_norm": 0.7059393525123596, "learning_rate": 1.5967859221511406e-05, "loss": 2.1676, "step": 9262 }, { "epoch": 0.31, "grad_norm": 0.718723475933075, "learning_rate": 1.5967006321977834e-05, "loss": 2.1623, "step": 9263 }, { "epoch": 0.31, "grad_norm": 0.7173232436180115, "learning_rate": 1.596615335503252e-05, "loss": 2.1257, "step": 9264 }, { "epoch": 0.31, "grad_norm": 0.7239296436309814, "learning_rate": 1.59653003206851e-05, "loss": 2.0621, "step": 9265 }, { "epoch": 0.31, "grad_norm": 0.7134047150611877, "learning_rate": 1.5964447218945214e-05, "loss": 2.1374, "step": 9266 }, { "epoch": 0.31, "grad_norm": 0.7569999098777771, "learning_rate": 1.59635940498225e-05, "loss": 2.1341, "step": 9267 }, { "epoch": 0.31, "grad_norm": 0.7099441885948181, "learning_rate": 1.5962740813326597e-05, "loss": 2.1652, "step": 9268 }, { "epoch": 0.31, "grad_norm": 0.7193514108657837, "learning_rate": 1.596188750946714e-05, "loss": 2.1912, "step": 9269 }, { "epoch": 0.31, "grad_norm": 0.7716888189315796, "learning_rate": 1.596103413825377e-05, "loss": 2.1129, "step": 9270 }, { "epoch": 0.31, "grad_norm": 0.7170916795730591, "learning_rate": 1.5960180699696132e-05, "loss": 2.1951, "step": 9271 }, { "epoch": 0.31, "grad_norm": 0.7163340449333191, "learning_rate": 1.5959327193803868e-05, "loss": 2.1645, "step": 9272 }, { "epoch": 0.31, "grad_norm": 0.7287588715553284, "learning_rate": 1.5958473620586613e-05, "loss": 2.1258, "step": 9273 }, { "epoch": 0.31, "grad_norm": 0.7344598770141602, "learning_rate": 1.595761998005402e-05, "loss": 2.1368, "step": 9274 }, { "epoch": 0.31, "grad_norm": 0.7135640978813171, "learning_rate": 1.5956766272215725e-05, "loss": 2.1053, "step": 9275 }, { "epoch": 0.31, "grad_norm": 0.7311667203903198, "learning_rate": 1.5955912497081377e-05, "loss": 2.1348, "step": 9276 }, { "epoch": 0.31, "grad_norm": 0.7427075505256653, "learning_rate": 1.595505865466062e-05, "loss": 2.099, "step": 9277 }, { "epoch": 0.31, "grad_norm": 0.7743737101554871, "learning_rate": 1.59542047449631e-05, "loss": 2.0539, "step": 9278 }, { "epoch": 0.31, "grad_norm": 0.7061465978622437, "learning_rate": 1.5953350767998463e-05, "loss": 2.1601, "step": 9279 }, { "epoch": 0.31, "grad_norm": 0.7167195081710815, "learning_rate": 1.5952496723776364e-05, "loss": 2.1161, "step": 9280 }, { "epoch": 0.31, "grad_norm": 0.6850878596305847, "learning_rate": 1.5951642612306436e-05, "loss": 2.1017, "step": 9281 }, { "epoch": 0.31, "grad_norm": 0.713681697845459, "learning_rate": 1.5950788433598345e-05, "loss": 2.0973, "step": 9282 }, { "epoch": 0.31, "grad_norm": 0.6899462938308716, "learning_rate": 1.594993418766173e-05, "loss": 2.0692, "step": 9283 }, { "epoch": 0.31, "grad_norm": 0.7102536559104919, "learning_rate": 1.5949079874506246e-05, "loss": 2.1644, "step": 9284 }, { "epoch": 0.31, "grad_norm": 0.7040597200393677, "learning_rate": 1.5948225494141545e-05, "loss": 2.1001, "step": 9285 }, { "epoch": 0.31, "grad_norm": 0.7178003787994385, "learning_rate": 1.5947371046577278e-05, "loss": 2.1227, "step": 9286 }, { "epoch": 0.31, "grad_norm": 0.7293294072151184, "learning_rate": 1.5946516531823098e-05, "loss": 2.1198, "step": 9287 }, { "epoch": 0.31, "grad_norm": 0.7625541687011719, "learning_rate": 1.594566194988866e-05, "loss": 2.1493, "step": 9288 }, { "epoch": 0.31, "grad_norm": 0.7138282656669617, "learning_rate": 1.594480730078362e-05, "loss": 2.1141, "step": 9289 }, { "epoch": 0.31, "grad_norm": 0.7239620685577393, "learning_rate": 1.5943952584517627e-05, "loss": 2.1815, "step": 9290 }, { "epoch": 0.31, "grad_norm": 0.7259666323661804, "learning_rate": 1.5943097801100344e-05, "loss": 2.0702, "step": 9291 }, { "epoch": 0.31, "grad_norm": 0.7165936231613159, "learning_rate": 1.5942242950541423e-05, "loss": 2.1533, "step": 9292 }, { "epoch": 0.31, "grad_norm": 0.7234683632850647, "learning_rate": 1.5941388032850526e-05, "loss": 2.1468, "step": 9293 }, { "epoch": 0.31, "grad_norm": 0.7195634245872498, "learning_rate": 1.5940533048037306e-05, "loss": 2.1009, "step": 9294 }, { "epoch": 0.31, "grad_norm": 0.7426090836524963, "learning_rate": 1.593967799611143e-05, "loss": 2.1868, "step": 9295 }, { "epoch": 0.31, "grad_norm": 0.735403299331665, "learning_rate": 1.593882287708255e-05, "loss": 2.0949, "step": 9296 }, { "epoch": 0.31, "grad_norm": 0.699145495891571, "learning_rate": 1.593796769096033e-05, "loss": 2.0949, "step": 9297 }, { "epoch": 0.31, "grad_norm": 0.7040101885795593, "learning_rate": 1.593711243775443e-05, "loss": 2.1892, "step": 9298 }, { "epoch": 0.31, "grad_norm": 0.7026491761207581, "learning_rate": 1.5936257117474512e-05, "loss": 2.0946, "step": 9299 }, { "epoch": 0.31, "grad_norm": 0.7546688914299011, "learning_rate": 1.5935401730130243e-05, "loss": 2.1376, "step": 9300 }, { "epoch": 0.31, "grad_norm": 0.706497073173523, "learning_rate": 1.593454627573128e-05, "loss": 2.1687, "step": 9301 }, { "epoch": 0.31, "grad_norm": 0.711330771446228, "learning_rate": 1.5933690754287297e-05, "loss": 2.1293, "step": 9302 }, { "epoch": 0.31, "grad_norm": 0.6929452419281006, "learning_rate": 1.5932835165807946e-05, "loss": 2.1038, "step": 9303 }, { "epoch": 0.31, "grad_norm": 0.749926745891571, "learning_rate": 1.5931979510302905e-05, "loss": 2.2167, "step": 9304 }, { "epoch": 0.31, "grad_norm": 0.7023991942405701, "learning_rate": 1.5931123787781837e-05, "loss": 2.1228, "step": 9305 }, { "epoch": 0.31, "grad_norm": 0.7499391436576843, "learning_rate": 1.593026799825441e-05, "loss": 2.1255, "step": 9306 }, { "epoch": 0.31, "grad_norm": 0.7123939990997314, "learning_rate": 1.5929412141730286e-05, "loss": 2.1516, "step": 9307 }, { "epoch": 0.31, "grad_norm": 0.699347198009491, "learning_rate": 1.5928556218219138e-05, "loss": 2.0914, "step": 9308 }, { "epoch": 0.31, "grad_norm": 0.6875589489936829, "learning_rate": 1.592770022773064e-05, "loss": 2.1464, "step": 9309 }, { "epoch": 0.31, "grad_norm": 0.7070686221122742, "learning_rate": 1.5926844170274454e-05, "loss": 2.1166, "step": 9310 }, { "epoch": 0.31, "grad_norm": 0.7435624599456787, "learning_rate": 1.592598804586026e-05, "loss": 2.0973, "step": 9311 }, { "epoch": 0.31, "grad_norm": 0.7203469276428223, "learning_rate": 1.5925131854497722e-05, "loss": 2.1745, "step": 9312 }, { "epoch": 0.31, "grad_norm": 0.7259249687194824, "learning_rate": 1.5924275596196524e-05, "loss": 2.0451, "step": 9313 }, { "epoch": 0.31, "grad_norm": 0.7620490789413452, "learning_rate": 1.5923419270966327e-05, "loss": 2.1113, "step": 9314 }, { "epoch": 0.31, "grad_norm": 0.7011623382568359, "learning_rate": 1.5922562878816813e-05, "loss": 2.0573, "step": 9315 }, { "epoch": 0.31, "grad_norm": 0.7292693853378296, "learning_rate": 1.5921706419757653e-05, "loss": 2.0924, "step": 9316 }, { "epoch": 0.31, "grad_norm": 0.7191295623779297, "learning_rate": 1.5920849893798523e-05, "loss": 2.1491, "step": 9317 }, { "epoch": 0.31, "grad_norm": 0.6986382603645325, "learning_rate": 1.5919993300949103e-05, "loss": 2.1228, "step": 9318 }, { "epoch": 0.31, "grad_norm": 0.7097615003585815, "learning_rate": 1.591913664121907e-05, "loss": 2.1283, "step": 9319 }, { "epoch": 0.31, "grad_norm": 0.72479647397995, "learning_rate": 1.5918279914618095e-05, "loss": 2.1574, "step": 9320 }, { "epoch": 0.31, "grad_norm": 0.6991228461265564, "learning_rate": 1.5917423121155863e-05, "loss": 2.0836, "step": 9321 }, { "epoch": 0.31, "grad_norm": 0.6978242993354797, "learning_rate": 1.5916566260842058e-05, "loss": 2.1033, "step": 9322 }, { "epoch": 0.31, "grad_norm": 0.72382652759552, "learning_rate": 1.5915709333686348e-05, "loss": 2.1684, "step": 9323 }, { "epoch": 0.31, "grad_norm": 0.711806058883667, "learning_rate": 1.5914852339698424e-05, "loss": 2.0954, "step": 9324 }, { "epoch": 0.31, "grad_norm": 0.6966581344604492, "learning_rate": 1.591399527888797e-05, "loss": 2.0669, "step": 9325 }, { "epoch": 0.31, "grad_norm": 0.7167718410491943, "learning_rate": 1.5913138151264654e-05, "loss": 2.1015, "step": 9326 }, { "epoch": 0.31, "grad_norm": 0.7108865976333618, "learning_rate": 1.5912280956838174e-05, "loss": 2.0901, "step": 9327 }, { "epoch": 0.31, "grad_norm": 0.7295517325401306, "learning_rate": 1.5911423695618206e-05, "loss": 2.1619, "step": 9328 }, { "epoch": 0.31, "grad_norm": 0.6814781427383423, "learning_rate": 1.591056636761444e-05, "loss": 2.1046, "step": 9329 }, { "epoch": 0.31, "grad_norm": 0.7195289134979248, "learning_rate": 1.5909708972836554e-05, "loss": 2.1368, "step": 9330 }, { "epoch": 0.31, "grad_norm": 0.7272166013717651, "learning_rate": 1.590885151129425e-05, "loss": 2.135, "step": 9331 }, { "epoch": 0.31, "grad_norm": 0.7215157747268677, "learning_rate": 1.5907993982997194e-05, "loss": 2.2216, "step": 9332 }, { "epoch": 0.31, "grad_norm": 0.7225114703178406, "learning_rate": 1.590713638795509e-05, "loss": 2.1911, "step": 9333 }, { "epoch": 0.31, "grad_norm": 0.7003817558288574, "learning_rate": 1.5906278726177616e-05, "loss": 2.1757, "step": 9334 }, { "epoch": 0.31, "grad_norm": 0.7157624959945679, "learning_rate": 1.590542099767447e-05, "loss": 2.1128, "step": 9335 }, { "epoch": 0.31, "grad_norm": 0.7406724691390991, "learning_rate": 1.5904563202455337e-05, "loss": 2.2628, "step": 9336 }, { "epoch": 0.31, "grad_norm": 0.7400164008140564, "learning_rate": 1.5903705340529913e-05, "loss": 2.2008, "step": 9337 }, { "epoch": 0.31, "grad_norm": 0.7527457475662231, "learning_rate": 1.5902847411907885e-05, "loss": 2.142, "step": 9338 }, { "epoch": 0.31, "grad_norm": 0.7302672863006592, "learning_rate": 1.5901989416598943e-05, "loss": 2.0443, "step": 9339 }, { "epoch": 0.31, "grad_norm": 0.713763952255249, "learning_rate": 1.5901131354612787e-05, "loss": 2.1518, "step": 9340 }, { "epoch": 0.31, "grad_norm": 0.7410566806793213, "learning_rate": 1.5900273225959104e-05, "loss": 2.124, "step": 9341 }, { "epoch": 0.31, "grad_norm": 0.7286756634712219, "learning_rate": 1.5899415030647593e-05, "loss": 2.1996, "step": 9342 }, { "epoch": 0.31, "grad_norm": 0.7066284418106079, "learning_rate": 1.589855676868795e-05, "loss": 2.1599, "step": 9343 }, { "epoch": 0.31, "grad_norm": 0.7200791239738464, "learning_rate": 1.589769844008987e-05, "loss": 2.1145, "step": 9344 }, { "epoch": 0.31, "grad_norm": 0.7457669973373413, "learning_rate": 1.5896840044863045e-05, "loss": 2.2582, "step": 9345 }, { "epoch": 0.31, "grad_norm": 0.694642961025238, "learning_rate": 1.5895981583017182e-05, "loss": 2.0973, "step": 9346 }, { "epoch": 0.31, "grad_norm": 0.7158113718032837, "learning_rate": 1.5895123054561976e-05, "loss": 2.1097, "step": 9347 }, { "epoch": 0.31, "grad_norm": 0.7374735474586487, "learning_rate": 1.5894264459507116e-05, "loss": 2.054, "step": 9348 }, { "epoch": 0.31, "grad_norm": 0.7377456426620483, "learning_rate": 1.589340579786232e-05, "loss": 2.1702, "step": 9349 }, { "epoch": 0.31, "grad_norm": 0.7287301421165466, "learning_rate": 1.589254706963727e-05, "loss": 2.0944, "step": 9350 }, { "epoch": 0.31, "grad_norm": 0.7183813452720642, "learning_rate": 1.5891688274841683e-05, "loss": 2.1142, "step": 9351 }, { "epoch": 0.31, "grad_norm": 0.7221638560295105, "learning_rate": 1.5890829413485255e-05, "loss": 2.1353, "step": 9352 }, { "epoch": 0.31, "grad_norm": 0.699821412563324, "learning_rate": 1.5889970485577687e-05, "loss": 2.1422, "step": 9353 }, { "epoch": 0.31, "grad_norm": 0.7150502800941467, "learning_rate": 1.5889111491128683e-05, "loss": 2.1305, "step": 9354 }, { "epoch": 0.31, "grad_norm": 0.6975569725036621, "learning_rate": 1.588825243014795e-05, "loss": 2.0958, "step": 9355 }, { "epoch": 0.31, "grad_norm": 0.7854176759719849, "learning_rate": 1.588739330264519e-05, "loss": 2.1754, "step": 9356 }, { "epoch": 0.31, "grad_norm": 0.7363340854644775, "learning_rate": 1.5886534108630113e-05, "loss": 2.1417, "step": 9357 }, { "epoch": 0.31, "grad_norm": 0.7523507475852966, "learning_rate": 1.5885674848112425e-05, "loss": 2.1669, "step": 9358 }, { "epoch": 0.31, "grad_norm": 0.709601104259491, "learning_rate": 1.588481552110183e-05, "loss": 2.11, "step": 9359 }, { "epoch": 0.31, "grad_norm": 0.7014585733413696, "learning_rate": 1.5883956127608036e-05, "loss": 2.0255, "step": 9360 }, { "epoch": 0.31, "grad_norm": 0.7283008098602295, "learning_rate": 1.5883096667640757e-05, "loss": 2.1471, "step": 9361 }, { "epoch": 0.31, "grad_norm": 0.7762390971183777, "learning_rate": 1.58822371412097e-05, "loss": 2.192, "step": 9362 }, { "epoch": 0.31, "grad_norm": 0.7231695055961609, "learning_rate": 1.5881377548324573e-05, "loss": 2.1392, "step": 9363 }, { "epoch": 0.31, "grad_norm": 0.7393805384635925, "learning_rate": 1.588051788899509e-05, "loss": 2.1102, "step": 9364 }, { "epoch": 0.31, "grad_norm": 0.7337321639060974, "learning_rate": 1.5879658163230962e-05, "loss": 2.1958, "step": 9365 }, { "epoch": 0.31, "grad_norm": 0.7064790725708008, "learning_rate": 1.5878798371041904e-05, "loss": 2.135, "step": 9366 }, { "epoch": 0.31, "grad_norm": 0.6932360529899597, "learning_rate": 1.5877938512437623e-05, "loss": 2.0814, "step": 9367 }, { "epoch": 0.31, "grad_norm": 0.7264076471328735, "learning_rate": 1.5877078587427843e-05, "loss": 2.1409, "step": 9368 }, { "epoch": 0.31, "grad_norm": 0.7129030227661133, "learning_rate": 1.5876218596022267e-05, "loss": 2.1248, "step": 9369 }, { "epoch": 0.31, "grad_norm": 0.7282942533493042, "learning_rate": 1.5875358538230622e-05, "loss": 2.1388, "step": 9370 }, { "epoch": 0.31, "grad_norm": 0.744226336479187, "learning_rate": 1.5874498414062617e-05, "loss": 2.1115, "step": 9371 }, { "epoch": 0.31, "grad_norm": 0.7208715677261353, "learning_rate": 1.5873638223527974e-05, "loss": 2.0923, "step": 9372 }, { "epoch": 0.31, "grad_norm": 0.742484986782074, "learning_rate": 1.5872777966636407e-05, "loss": 2.1057, "step": 9373 }, { "epoch": 0.31, "grad_norm": 0.7470643520355225, "learning_rate": 1.5871917643397637e-05, "loss": 2.1095, "step": 9374 }, { "epoch": 0.31, "grad_norm": 0.7380080819129944, "learning_rate": 1.587105725382138e-05, "loss": 2.1459, "step": 9375 }, { "epoch": 0.31, "grad_norm": 0.7264449596405029, "learning_rate": 1.5870196797917364e-05, "loss": 2.1382, "step": 9376 }, { "epoch": 0.31, "grad_norm": 0.6858323812484741, "learning_rate": 1.58693362756953e-05, "loss": 2.0887, "step": 9377 }, { "epoch": 0.31, "grad_norm": 0.7045590877532959, "learning_rate": 1.586847568716492e-05, "loss": 2.0999, "step": 9378 }, { "epoch": 0.31, "grad_norm": 0.7243896126747131, "learning_rate": 1.5867615032335938e-05, "loss": 2.1392, "step": 9379 }, { "epoch": 0.31, "grad_norm": 0.709012508392334, "learning_rate": 1.5866754311218078e-05, "loss": 2.1841, "step": 9380 }, { "epoch": 0.31, "grad_norm": 0.7338648438453674, "learning_rate": 1.586589352382107e-05, "loss": 2.1355, "step": 9381 }, { "epoch": 0.31, "grad_norm": 0.7011011242866516, "learning_rate": 1.586503267015463e-05, "loss": 2.0781, "step": 9382 }, { "epoch": 0.31, "grad_norm": 0.7398859858512878, "learning_rate": 1.5864171750228493e-05, "loss": 2.1129, "step": 9383 }, { "epoch": 0.31, "grad_norm": 0.7238042950630188, "learning_rate": 1.5863310764052377e-05, "loss": 2.0833, "step": 9384 }, { "epoch": 0.31, "grad_norm": 0.748180627822876, "learning_rate": 1.5862449711636015e-05, "loss": 2.0895, "step": 9385 }, { "epoch": 0.31, "grad_norm": 0.7103529572486877, "learning_rate": 1.586158859298913e-05, "loss": 2.0477, "step": 9386 }, { "epoch": 0.31, "grad_norm": 0.7032710313796997, "learning_rate": 1.5860727408121453e-05, "loss": 2.1162, "step": 9387 }, { "epoch": 0.31, "grad_norm": 0.7389243245124817, "learning_rate": 1.5859866157042708e-05, "loss": 2.1359, "step": 9388 }, { "epoch": 0.31, "grad_norm": 0.7248475551605225, "learning_rate": 1.5859004839762636e-05, "loss": 2.1971, "step": 9389 }, { "epoch": 0.31, "grad_norm": 0.7445863485336304, "learning_rate": 1.5858143456290957e-05, "loss": 2.0935, "step": 9390 }, { "epoch": 0.31, "grad_norm": 0.7575012445449829, "learning_rate": 1.585728200663741e-05, "loss": 2.1435, "step": 9391 }, { "epoch": 0.31, "grad_norm": 0.7120488286018372, "learning_rate": 1.585642049081172e-05, "loss": 2.069, "step": 9392 }, { "epoch": 0.31, "grad_norm": 0.7273603081703186, "learning_rate": 1.5855558908823624e-05, "loss": 2.1504, "step": 9393 }, { "epoch": 0.31, "grad_norm": 0.6936109066009521, "learning_rate": 1.5854697260682857e-05, "loss": 2.1565, "step": 9394 }, { "epoch": 0.31, "grad_norm": 0.7172167301177979, "learning_rate": 1.585383554639915e-05, "loss": 2.2064, "step": 9395 }, { "epoch": 0.31, "grad_norm": 0.7418685555458069, "learning_rate": 1.585297376598224e-05, "loss": 2.022, "step": 9396 }, { "epoch": 0.31, "grad_norm": 0.7625650763511658, "learning_rate": 1.5852111919441867e-05, "loss": 2.1119, "step": 9397 }, { "epoch": 0.31, "grad_norm": 0.7353043556213379, "learning_rate": 1.5851250006787754e-05, "loss": 2.1042, "step": 9398 }, { "epoch": 0.31, "grad_norm": 0.741849422454834, "learning_rate": 1.5850388028029656e-05, "loss": 2.0738, "step": 9399 }, { "epoch": 0.31, "grad_norm": 0.6942260265350342, "learning_rate": 1.5849525983177298e-05, "loss": 2.1395, "step": 9400 }, { "epoch": 0.31, "grad_norm": 0.7396000027656555, "learning_rate": 1.5848663872240426e-05, "loss": 2.1977, "step": 9401 }, { "epoch": 0.31, "grad_norm": 0.7268931269645691, "learning_rate": 1.5847801695228773e-05, "loss": 2.1354, "step": 9402 }, { "epoch": 0.31, "grad_norm": 0.6917301416397095, "learning_rate": 1.5846939452152092e-05, "loss": 2.0879, "step": 9403 }, { "epoch": 0.31, "grad_norm": 0.7451241612434387, "learning_rate": 1.5846077143020108e-05, "loss": 2.0965, "step": 9404 }, { "epoch": 0.31, "grad_norm": 0.7706646919250488, "learning_rate": 1.584521476784257e-05, "loss": 2.0862, "step": 9405 }, { "epoch": 0.31, "grad_norm": 0.6801823377609253, "learning_rate": 1.584435232662923e-05, "loss": 2.0701, "step": 9406 }, { "epoch": 0.31, "grad_norm": 0.7284613847732544, "learning_rate": 1.5843489819389815e-05, "loss": 2.1161, "step": 9407 }, { "epoch": 0.31, "grad_norm": 0.7244563698768616, "learning_rate": 1.5842627246134077e-05, "loss": 2.1431, "step": 9408 }, { "epoch": 0.31, "grad_norm": 0.7015225887298584, "learning_rate": 1.5841764606871764e-05, "loss": 2.1401, "step": 9409 }, { "epoch": 0.31, "grad_norm": 0.7111163139343262, "learning_rate": 1.5840901901612612e-05, "loss": 2.1635, "step": 9410 }, { "epoch": 0.31, "grad_norm": 0.7268586158752441, "learning_rate": 1.584003913036638e-05, "loss": 2.0753, "step": 9411 }, { "epoch": 0.31, "grad_norm": 0.7158044576644897, "learning_rate": 1.5839176293142802e-05, "loss": 2.1375, "step": 9412 }, { "epoch": 0.31, "grad_norm": 0.6889114379882812, "learning_rate": 1.583831338995164e-05, "loss": 2.108, "step": 9413 }, { "epoch": 0.31, "grad_norm": 0.6827769875526428, "learning_rate": 1.5837450420802633e-05, "loss": 2.0551, "step": 9414 }, { "epoch": 0.31, "grad_norm": 0.6834708452224731, "learning_rate": 1.583658738570553e-05, "loss": 2.1315, "step": 9415 }, { "epoch": 0.31, "grad_norm": 0.7518619894981384, "learning_rate": 1.583572428467008e-05, "loss": 2.0399, "step": 9416 }, { "epoch": 0.31, "grad_norm": 0.7287269234657288, "learning_rate": 1.5834861117706043e-05, "loss": 2.2061, "step": 9417 }, { "epoch": 0.31, "grad_norm": 0.6860605478286743, "learning_rate": 1.5833997884823166e-05, "loss": 2.0921, "step": 9418 }, { "epoch": 0.31, "grad_norm": 0.6967748999595642, "learning_rate": 1.5833134586031192e-05, "loss": 2.0759, "step": 9419 }, { "epoch": 0.31, "grad_norm": 0.7458381056785583, "learning_rate": 1.583227122133989e-05, "loss": 2.176, "step": 9420 }, { "epoch": 0.31, "grad_norm": 0.7498286962509155, "learning_rate": 1.5831407790759e-05, "loss": 2.1065, "step": 9421 }, { "epoch": 0.31, "grad_norm": 0.7366600036621094, "learning_rate": 1.5830544294298285e-05, "loss": 2.1394, "step": 9422 }, { "epoch": 0.31, "grad_norm": 0.7680703401565552, "learning_rate": 1.5829680731967496e-05, "loss": 2.1454, "step": 9423 }, { "epoch": 0.31, "grad_norm": 0.7394464612007141, "learning_rate": 1.5828817103776393e-05, "loss": 2.1434, "step": 9424 }, { "epoch": 0.31, "grad_norm": 0.7061243653297424, "learning_rate": 1.582795340973473e-05, "loss": 2.1018, "step": 9425 }, { "epoch": 0.31, "grad_norm": 0.7081742882728577, "learning_rate": 1.5827089649852262e-05, "loss": 2.1638, "step": 9426 }, { "epoch": 0.31, "grad_norm": 0.7141790986061096, "learning_rate": 1.582622582413875e-05, "loss": 2.0311, "step": 9427 }, { "epoch": 0.31, "grad_norm": 0.7654913067817688, "learning_rate": 1.582536193260396e-05, "loss": 2.058, "step": 9428 }, { "epoch": 0.31, "grad_norm": 0.7559537887573242, "learning_rate": 1.5824497975257638e-05, "loss": 2.0857, "step": 9429 }, { "epoch": 0.31, "grad_norm": 0.7193070650100708, "learning_rate": 1.5823633952109555e-05, "loss": 2.1137, "step": 9430 }, { "epoch": 0.31, "grad_norm": 0.6875612735748291, "learning_rate": 1.5822769863169466e-05, "loss": 2.1259, "step": 9431 }, { "epoch": 0.31, "grad_norm": 0.7018385529518127, "learning_rate": 1.5821905708447138e-05, "loss": 2.1033, "step": 9432 }, { "epoch": 0.31, "grad_norm": 0.736066997051239, "learning_rate": 1.582104148795233e-05, "loss": 2.1127, "step": 9433 }, { "epoch": 0.31, "grad_norm": 0.7211173176765442, "learning_rate": 1.5820177201694806e-05, "loss": 2.1441, "step": 9434 }, { "epoch": 0.31, "grad_norm": 0.7701630592346191, "learning_rate": 1.5819312849684332e-05, "loss": 2.2309, "step": 9435 }, { "epoch": 0.31, "grad_norm": 0.7922909259796143, "learning_rate": 1.5818448431930674e-05, "loss": 2.1249, "step": 9436 }, { "epoch": 0.31, "grad_norm": 0.718798816204071, "learning_rate": 1.581758394844359e-05, "loss": 2.149, "step": 9437 }, { "epoch": 0.31, "grad_norm": 0.7408843040466309, "learning_rate": 1.5816719399232855e-05, "loss": 2.1312, "step": 9438 }, { "epoch": 0.31, "grad_norm": 0.7309269309043884, "learning_rate": 1.5815854784308233e-05, "loss": 2.0993, "step": 9439 }, { "epoch": 0.31, "grad_norm": 0.7137578129768372, "learning_rate": 1.5814990103679492e-05, "loss": 2.129, "step": 9440 }, { "epoch": 0.31, "grad_norm": 0.7637180685997009, "learning_rate": 1.5814125357356404e-05, "loss": 2.1656, "step": 9441 }, { "epoch": 0.31, "grad_norm": 0.6977443695068359, "learning_rate": 1.581326054534873e-05, "loss": 2.1297, "step": 9442 }, { "epoch": 0.31, "grad_norm": 0.7001411318778992, "learning_rate": 1.581239566766625e-05, "loss": 2.1991, "step": 9443 }, { "epoch": 0.31, "grad_norm": 0.7499983906745911, "learning_rate": 1.5811530724318725e-05, "loss": 2.0963, "step": 9444 }, { "epoch": 0.31, "grad_norm": 0.7667697072029114, "learning_rate": 1.5810665715315934e-05, "loss": 2.1679, "step": 9445 }, { "epoch": 0.31, "grad_norm": 0.7213220000267029, "learning_rate": 1.5809800640667646e-05, "loss": 2.0904, "step": 9446 }, { "epoch": 0.31, "grad_norm": 0.7774108648300171, "learning_rate": 1.580893550038364e-05, "loss": 2.1252, "step": 9447 }, { "epoch": 0.31, "grad_norm": 0.757612943649292, "learning_rate": 1.5808070294473684e-05, "loss": 2.1299, "step": 9448 }, { "epoch": 0.31, "grad_norm": 0.726270854473114, "learning_rate": 1.5807205022947546e-05, "loss": 2.1325, "step": 9449 }, { "epoch": 0.31, "grad_norm": 0.7092653512954712, "learning_rate": 1.5806339685815016e-05, "loss": 2.096, "step": 9450 }, { "epoch": 0.31, "grad_norm": 0.7162549495697021, "learning_rate": 1.5805474283085863e-05, "loss": 2.1713, "step": 9451 }, { "epoch": 0.31, "grad_norm": 0.7511173486709595, "learning_rate": 1.5804608814769862e-05, "loss": 2.1674, "step": 9452 }, { "epoch": 0.31, "grad_norm": 0.7152876853942871, "learning_rate": 1.5803743280876793e-05, "loss": 2.1214, "step": 9453 }, { "epoch": 0.31, "grad_norm": 0.742279052734375, "learning_rate": 1.5802877681416435e-05, "loss": 2.2081, "step": 9454 }, { "epoch": 0.31, "grad_norm": 0.7373721599578857, "learning_rate": 1.5802012016398563e-05, "loss": 2.1039, "step": 9455 }, { "epoch": 0.31, "grad_norm": 0.7521692514419556, "learning_rate": 1.580114628583296e-05, "loss": 2.1895, "step": 9456 }, { "epoch": 0.31, "grad_norm": 0.7203809022903442, "learning_rate": 1.580028048972941e-05, "loss": 2.1184, "step": 9457 }, { "epoch": 0.31, "grad_norm": 0.6909192800521851, "learning_rate": 1.5799414628097687e-05, "loss": 2.1398, "step": 9458 }, { "epoch": 0.31, "grad_norm": 0.7108280658721924, "learning_rate": 1.579854870094758e-05, "loss": 2.1717, "step": 9459 }, { "epoch": 0.31, "grad_norm": 0.7120805978775024, "learning_rate": 1.5797682708288863e-05, "loss": 2.1169, "step": 9460 }, { "epoch": 0.31, "grad_norm": 0.7278050780296326, "learning_rate": 1.5796816650131324e-05, "loss": 2.2593, "step": 9461 }, { "epoch": 0.31, "grad_norm": 0.7110636830329895, "learning_rate": 1.579595052648475e-05, "loss": 2.1484, "step": 9462 }, { "epoch": 0.31, "grad_norm": 0.7246778011322021, "learning_rate": 1.5795084337358925e-05, "loss": 2.1747, "step": 9463 }, { "epoch": 0.31, "grad_norm": 0.7256181836128235, "learning_rate": 1.579421808276363e-05, "loss": 2.151, "step": 9464 }, { "epoch": 0.31, "grad_norm": 0.7386330366134644, "learning_rate": 1.579335176270866e-05, "loss": 2.1752, "step": 9465 }, { "epoch": 0.31, "grad_norm": 0.7566443681716919, "learning_rate": 1.5792485377203793e-05, "loss": 2.1853, "step": 9466 }, { "epoch": 0.31, "grad_norm": 0.7201268076896667, "learning_rate": 1.579161892625882e-05, "loss": 2.0811, "step": 9467 }, { "epoch": 0.32, "grad_norm": 0.7124336361885071, "learning_rate": 1.5790752409883533e-05, "loss": 2.0542, "step": 9468 }, { "epoch": 0.32, "grad_norm": 0.7368142008781433, "learning_rate": 1.578988582808772e-05, "loss": 2.1207, "step": 9469 }, { "epoch": 0.32, "grad_norm": 0.7141274213790894, "learning_rate": 1.5789019180881168e-05, "loss": 2.1098, "step": 9470 }, { "epoch": 0.32, "grad_norm": 0.7566022872924805, "learning_rate": 1.578815246827367e-05, "loss": 2.1009, "step": 9471 }, { "epoch": 0.32, "grad_norm": 0.7603344321250916, "learning_rate": 1.5787285690275016e-05, "loss": 2.1171, "step": 9472 }, { "epoch": 0.32, "grad_norm": 0.733277440071106, "learning_rate": 1.5786418846895007e-05, "loss": 2.1771, "step": 9473 }, { "epoch": 0.32, "grad_norm": 0.6996964812278748, "learning_rate": 1.5785551938143422e-05, "loss": 2.1505, "step": 9474 }, { "epoch": 0.32, "grad_norm": 0.7126699686050415, "learning_rate": 1.5784684964030065e-05, "loss": 2.1401, "step": 9475 }, { "epoch": 0.32, "grad_norm": 0.7585956454277039, "learning_rate": 1.5783817924564725e-05, "loss": 2.1734, "step": 9476 }, { "epoch": 0.32, "grad_norm": 0.7639651894569397, "learning_rate": 1.5782950819757203e-05, "loss": 2.1451, "step": 9477 }, { "epoch": 0.32, "grad_norm": 0.7288308143615723, "learning_rate": 1.5782083649617292e-05, "loss": 2.1835, "step": 9478 }, { "epoch": 0.32, "grad_norm": 0.7138044238090515, "learning_rate": 1.5781216414154785e-05, "loss": 2.1379, "step": 9479 }, { "epoch": 0.32, "grad_norm": 0.7208316922187805, "learning_rate": 1.5780349113379483e-05, "loss": 2.0688, "step": 9480 }, { "epoch": 0.32, "grad_norm": 0.7289469242095947, "learning_rate": 1.5779481747301186e-05, "loss": 2.1293, "step": 9481 }, { "epoch": 0.32, "grad_norm": 0.7546771168708801, "learning_rate": 1.5778614315929693e-05, "loss": 2.2088, "step": 9482 }, { "epoch": 0.32, "grad_norm": 0.720140278339386, "learning_rate": 1.57777468192748e-05, "loss": 2.1878, "step": 9483 }, { "epoch": 0.32, "grad_norm": 0.7224448323249817, "learning_rate": 1.577687925734631e-05, "loss": 2.177, "step": 9484 }, { "epoch": 0.32, "grad_norm": 0.7290453314781189, "learning_rate": 1.5776011630154023e-05, "loss": 2.1306, "step": 9485 }, { "epoch": 0.32, "grad_norm": 0.7161919474601746, "learning_rate": 1.577514393770774e-05, "loss": 2.2324, "step": 9486 }, { "epoch": 0.32, "grad_norm": 0.7317053079605103, "learning_rate": 1.577427618001727e-05, "loss": 2.0505, "step": 9487 }, { "epoch": 0.32, "grad_norm": 0.7158587574958801, "learning_rate": 1.577340835709241e-05, "loss": 2.1329, "step": 9488 }, { "epoch": 0.32, "grad_norm": 0.7073089480400085, "learning_rate": 1.5772540468942964e-05, "loss": 2.1659, "step": 9489 }, { "epoch": 0.32, "grad_norm": 0.7054910063743591, "learning_rate": 1.5771672515578743e-05, "loss": 2.1204, "step": 9490 }, { "epoch": 0.32, "grad_norm": 0.715887188911438, "learning_rate": 1.5770804497009544e-05, "loss": 2.0894, "step": 9491 }, { "epoch": 0.32, "grad_norm": 0.7075126767158508, "learning_rate": 1.576993641324518e-05, "loss": 2.1834, "step": 9492 }, { "epoch": 0.32, "grad_norm": 0.7366071939468384, "learning_rate": 1.5769068264295456e-05, "loss": 2.1915, "step": 9493 }, { "epoch": 0.32, "grad_norm": 0.7239155173301697, "learning_rate": 1.5768200050170177e-05, "loss": 2.1354, "step": 9494 }, { "epoch": 0.32, "grad_norm": 0.7025246024131775, "learning_rate": 1.5767331770879158e-05, "loss": 2.1724, "step": 9495 }, { "epoch": 0.32, "grad_norm": 0.7343201041221619, "learning_rate": 1.5766463426432203e-05, "loss": 2.131, "step": 9496 }, { "epoch": 0.32, "grad_norm": 0.75413978099823, "learning_rate": 1.5765595016839127e-05, "loss": 2.1168, "step": 9497 }, { "epoch": 0.32, "grad_norm": 0.7159507274627686, "learning_rate": 1.5764726542109732e-05, "loss": 2.2244, "step": 9498 }, { "epoch": 0.32, "grad_norm": 0.717211127281189, "learning_rate": 1.5763858002253838e-05, "loss": 2.1286, "step": 9499 }, { "epoch": 0.32, "grad_norm": 0.7077893614768982, "learning_rate": 1.5762989397281252e-05, "loss": 2.1646, "step": 9500 }, { "epoch": 0.32, "grad_norm": 0.7396695017814636, "learning_rate": 1.5762120727201794e-05, "loss": 2.157, "step": 9501 }, { "epoch": 0.32, "grad_norm": 0.7099971175193787, "learning_rate": 1.576125199202527e-05, "loss": 2.0659, "step": 9502 }, { "epoch": 0.32, "grad_norm": 0.7169786095619202, "learning_rate": 1.5760383191761496e-05, "loss": 2.1055, "step": 9503 }, { "epoch": 0.32, "grad_norm": 0.6997612714767456, "learning_rate": 1.575951432642029e-05, "loss": 2.1445, "step": 9504 }, { "epoch": 0.32, "grad_norm": 0.7236524820327759, "learning_rate": 1.5758645396011466e-05, "loss": 2.1581, "step": 9505 }, { "epoch": 0.32, "grad_norm": 0.710495114326477, "learning_rate": 1.5757776400544843e-05, "loss": 2.1879, "step": 9506 }, { "epoch": 0.32, "grad_norm": 0.702593982219696, "learning_rate": 1.575690734003023e-05, "loss": 2.0816, "step": 9507 }, { "epoch": 0.32, "grad_norm": 0.7106673121452332, "learning_rate": 1.575603821447746e-05, "loss": 2.0962, "step": 9508 }, { "epoch": 0.32, "grad_norm": 0.6896476745605469, "learning_rate": 1.575516902389634e-05, "loss": 2.1703, "step": 9509 }, { "epoch": 0.32, "grad_norm": 0.6999567747116089, "learning_rate": 1.5754299768296695e-05, "loss": 2.1081, "step": 9510 }, { "epoch": 0.32, "grad_norm": 0.7676829695701599, "learning_rate": 1.5753430447688344e-05, "loss": 2.1694, "step": 9511 }, { "epoch": 0.32, "grad_norm": 0.7136418223381042, "learning_rate": 1.5752561062081104e-05, "loss": 2.1243, "step": 9512 }, { "epoch": 0.32, "grad_norm": 0.7588100433349609, "learning_rate": 1.5751691611484803e-05, "loss": 2.1602, "step": 9513 }, { "epoch": 0.32, "grad_norm": 0.7047775387763977, "learning_rate": 1.5750822095909258e-05, "loss": 2.1807, "step": 9514 }, { "epoch": 0.32, "grad_norm": 0.7533066272735596, "learning_rate": 1.57499525153643e-05, "loss": 2.1384, "step": 9515 }, { "epoch": 0.32, "grad_norm": 0.7027696967124939, "learning_rate": 1.5749082869859745e-05, "loss": 2.0984, "step": 9516 }, { "epoch": 0.32, "grad_norm": 0.6989158987998962, "learning_rate": 1.5748213159405422e-05, "loss": 2.1355, "step": 9517 }, { "epoch": 0.32, "grad_norm": 0.7062950134277344, "learning_rate": 1.5747343384011157e-05, "loss": 2.1985, "step": 9518 }, { "epoch": 0.32, "grad_norm": 0.6866962313652039, "learning_rate": 1.5746473543686775e-05, "loss": 2.145, "step": 9519 }, { "epoch": 0.32, "grad_norm": 0.7092107534408569, "learning_rate": 1.57456036384421e-05, "loss": 2.1825, "step": 9520 }, { "epoch": 0.32, "grad_norm": 0.7087661623954773, "learning_rate": 1.574473366828696e-05, "loss": 2.1107, "step": 9521 }, { "epoch": 0.32, "grad_norm": 0.7195688486099243, "learning_rate": 1.574386363323119e-05, "loss": 2.0866, "step": 9522 }, { "epoch": 0.32, "grad_norm": 0.6997634172439575, "learning_rate": 1.5742993533284614e-05, "loss": 2.145, "step": 9523 }, { "epoch": 0.32, "grad_norm": 0.7220474481582642, "learning_rate": 1.5742123368457065e-05, "loss": 2.1756, "step": 9524 }, { "epoch": 0.32, "grad_norm": 0.7579584717750549, "learning_rate": 1.574125313875837e-05, "loss": 2.0839, "step": 9525 }, { "epoch": 0.32, "grad_norm": 0.7131385803222656, "learning_rate": 1.574038284419836e-05, "loss": 2.1274, "step": 9526 }, { "epoch": 0.32, "grad_norm": 0.7348940968513489, "learning_rate": 1.5739512484786866e-05, "loss": 2.0994, "step": 9527 }, { "epoch": 0.32, "grad_norm": 0.749491810798645, "learning_rate": 1.573864206053373e-05, "loss": 2.1788, "step": 9528 }, { "epoch": 0.32, "grad_norm": 0.7153084874153137, "learning_rate": 1.5737771571448776e-05, "loss": 2.1363, "step": 9529 }, { "epoch": 0.32, "grad_norm": 0.7272295355796814, "learning_rate": 1.5736901017541843e-05, "loss": 2.0987, "step": 9530 }, { "epoch": 0.32, "grad_norm": 0.7216790318489075, "learning_rate": 1.573603039882276e-05, "loss": 2.2127, "step": 9531 }, { "epoch": 0.32, "grad_norm": 0.7159500122070312, "learning_rate": 1.5735159715301373e-05, "loss": 2.0835, "step": 9532 }, { "epoch": 0.32, "grad_norm": 0.7494335770606995, "learning_rate": 1.5734288966987514e-05, "loss": 2.0973, "step": 9533 }, { "epoch": 0.32, "grad_norm": 0.7109895348548889, "learning_rate": 1.5733418153891013e-05, "loss": 2.0859, "step": 9534 }, { "epoch": 0.32, "grad_norm": 0.7220141291618347, "learning_rate": 1.5732547276021716e-05, "loss": 2.1052, "step": 9535 }, { "epoch": 0.32, "grad_norm": 0.7249950766563416, "learning_rate": 1.573167633338946e-05, "loss": 2.1341, "step": 9536 }, { "epoch": 0.32, "grad_norm": 0.7163698077201843, "learning_rate": 1.5730805326004085e-05, "loss": 2.1211, "step": 9537 }, { "epoch": 0.32, "grad_norm": 0.7331742644309998, "learning_rate": 1.5729934253875425e-05, "loss": 2.0906, "step": 9538 }, { "epoch": 0.32, "grad_norm": 0.7258093953132629, "learning_rate": 1.572906311701333e-05, "loss": 2.1952, "step": 9539 }, { "epoch": 0.32, "grad_norm": 0.7199559807777405, "learning_rate": 1.5728191915427634e-05, "loss": 2.1366, "step": 9540 }, { "epoch": 0.32, "grad_norm": 0.7624881267547607, "learning_rate": 1.5727320649128187e-05, "loss": 2.1805, "step": 9541 }, { "epoch": 0.32, "grad_norm": 0.7302795052528381, "learning_rate": 1.572644931812483e-05, "loss": 2.1467, "step": 9542 }, { "epoch": 0.32, "grad_norm": 0.7440550923347473, "learning_rate": 1.57255779224274e-05, "loss": 2.2007, "step": 9543 }, { "epoch": 0.32, "grad_norm": 0.7277077436447144, "learning_rate": 1.5724706462045744e-05, "loss": 2.1112, "step": 9544 }, { "epoch": 0.32, "grad_norm": 0.7130835652351379, "learning_rate": 1.5723834936989713e-05, "loss": 2.148, "step": 9545 }, { "epoch": 0.32, "grad_norm": 0.7161419987678528, "learning_rate": 1.572296334726915e-05, "loss": 2.1175, "step": 9546 }, { "epoch": 0.32, "grad_norm": 0.7154002785682678, "learning_rate": 1.5722091692893898e-05, "loss": 2.1295, "step": 9547 }, { "epoch": 0.32, "grad_norm": 0.7618816494941711, "learning_rate": 1.572121997387381e-05, "loss": 2.0115, "step": 9548 }, { "epoch": 0.32, "grad_norm": 0.7276881337165833, "learning_rate": 1.572034819021873e-05, "loss": 2.211, "step": 9549 }, { "epoch": 0.32, "grad_norm": 0.7261018753051758, "learning_rate": 1.5719476341938512e-05, "loss": 2.1398, "step": 9550 }, { "epoch": 0.32, "grad_norm": 0.749725341796875, "learning_rate": 1.5718604429043e-05, "loss": 2.1149, "step": 9551 }, { "epoch": 0.32, "grad_norm": 0.6969484090805054, "learning_rate": 1.5717732451542047e-05, "loss": 2.1031, "step": 9552 }, { "epoch": 0.32, "grad_norm": 0.7233068346977234, "learning_rate": 1.5716860409445504e-05, "loss": 2.1048, "step": 9553 }, { "epoch": 0.32, "grad_norm": 0.6973428726196289, "learning_rate": 1.571598830276322e-05, "loss": 2.1798, "step": 9554 }, { "epoch": 0.32, "grad_norm": 0.7416917085647583, "learning_rate": 1.5715116131505052e-05, "loss": 2.0262, "step": 9555 }, { "epoch": 0.32, "grad_norm": 0.7377226948738098, "learning_rate": 1.571424389568085e-05, "loss": 2.0619, "step": 9556 }, { "epoch": 0.32, "grad_norm": 0.7081400156021118, "learning_rate": 1.5713371595300473e-05, "loss": 2.0919, "step": 9557 }, { "epoch": 0.32, "grad_norm": 0.6876232028007507, "learning_rate": 1.5712499230373768e-05, "loss": 2.0479, "step": 9558 }, { "epoch": 0.32, "grad_norm": 0.7329896092414856, "learning_rate": 1.57116268009106e-05, "loss": 2.1534, "step": 9559 }, { "epoch": 0.32, "grad_norm": 0.7370933294296265, "learning_rate": 1.5710754306920813e-05, "loss": 2.1273, "step": 9560 }, { "epoch": 0.32, "grad_norm": 0.742594301700592, "learning_rate": 1.5709881748414275e-05, "loss": 2.0766, "step": 9561 }, { "epoch": 0.32, "grad_norm": 0.7008952498435974, "learning_rate": 1.5709009125400835e-05, "loss": 2.0758, "step": 9562 }, { "epoch": 0.32, "grad_norm": 0.7532712817192078, "learning_rate": 1.5708136437890363e-05, "loss": 2.1701, "step": 9563 }, { "epoch": 0.32, "grad_norm": 0.7653273344039917, "learning_rate": 1.5707263685892705e-05, "loss": 2.1042, "step": 9564 }, { "epoch": 0.32, "grad_norm": 0.6916401386260986, "learning_rate": 1.5706390869417726e-05, "loss": 2.1362, "step": 9565 }, { "epoch": 0.32, "grad_norm": 0.6997584104537964, "learning_rate": 1.5705517988475293e-05, "loss": 2.0919, "step": 9566 }, { "epoch": 0.32, "grad_norm": 0.7480838298797607, "learning_rate": 1.5704645043075256e-05, "loss": 2.0627, "step": 9567 }, { "epoch": 0.32, "grad_norm": 0.7022796273231506, "learning_rate": 1.5703772033227483e-05, "loss": 2.08, "step": 9568 }, { "epoch": 0.32, "grad_norm": 0.70134437084198, "learning_rate": 1.5702898958941835e-05, "loss": 2.2027, "step": 9569 }, { "epoch": 0.32, "grad_norm": 0.6919594407081604, "learning_rate": 1.5702025820228185e-05, "loss": 2.1394, "step": 9570 }, { "epoch": 0.32, "grad_norm": 0.699573814868927, "learning_rate": 1.570115261709638e-05, "loss": 2.0919, "step": 9571 }, { "epoch": 0.32, "grad_norm": 0.718121349811554, "learning_rate": 1.57002793495563e-05, "loss": 2.0955, "step": 9572 }, { "epoch": 0.32, "grad_norm": 0.7320795655250549, "learning_rate": 1.56994060176178e-05, "loss": 2.1064, "step": 9573 }, { "epoch": 0.32, "grad_norm": 0.7241496443748474, "learning_rate": 1.5698532621290755e-05, "loss": 2.1564, "step": 9574 }, { "epoch": 0.32, "grad_norm": 0.7316837310791016, "learning_rate": 1.5697659160585024e-05, "loss": 2.12, "step": 9575 }, { "epoch": 0.32, "grad_norm": 0.7229344248771667, "learning_rate": 1.569678563551048e-05, "loss": 2.1038, "step": 9576 }, { "epoch": 0.32, "grad_norm": 0.7267295718193054, "learning_rate": 1.5695912046076993e-05, "loss": 2.1607, "step": 9577 }, { "epoch": 0.32, "grad_norm": 0.6990330219268799, "learning_rate": 1.5695038392294428e-05, "loss": 2.1009, "step": 9578 }, { "epoch": 0.32, "grad_norm": 0.7229902744293213, "learning_rate": 1.5694164674172657e-05, "loss": 2.062, "step": 9579 }, { "epoch": 0.32, "grad_norm": 0.7202978730201721, "learning_rate": 1.5693290891721552e-05, "loss": 2.1183, "step": 9580 }, { "epoch": 0.32, "grad_norm": 0.722460150718689, "learning_rate": 1.5692417044950984e-05, "loss": 2.0628, "step": 9581 }, { "epoch": 0.32, "grad_norm": 0.7395266890525818, "learning_rate": 1.5691543133870818e-05, "loss": 2.1198, "step": 9582 }, { "epoch": 0.32, "grad_norm": 0.7214912176132202, "learning_rate": 1.569066915849094e-05, "loss": 2.2318, "step": 9583 }, { "epoch": 0.32, "grad_norm": 0.734996497631073, "learning_rate": 1.5689795118821212e-05, "loss": 2.1246, "step": 9584 }, { "epoch": 0.32, "grad_norm": 0.7530327439308167, "learning_rate": 1.5688921014871516e-05, "loss": 2.1565, "step": 9585 }, { "epoch": 0.32, "grad_norm": 0.7424435019493103, "learning_rate": 1.5688046846651727e-05, "loss": 2.0547, "step": 9586 }, { "epoch": 0.32, "grad_norm": 0.7094511985778809, "learning_rate": 1.5687172614171716e-05, "loss": 2.1595, "step": 9587 }, { "epoch": 0.32, "grad_norm": 0.7107856273651123, "learning_rate": 1.568629831744136e-05, "loss": 2.1035, "step": 9588 }, { "epoch": 0.32, "grad_norm": 0.7119570374488831, "learning_rate": 1.568542395647054e-05, "loss": 2.1014, "step": 9589 }, { "epoch": 0.32, "grad_norm": 0.6754194498062134, "learning_rate": 1.568454953126913e-05, "loss": 2.1741, "step": 9590 }, { "epoch": 0.32, "grad_norm": 0.7398413419723511, "learning_rate": 1.5683675041847014e-05, "loss": 2.1005, "step": 9591 }, { "epoch": 0.32, "grad_norm": 0.721780002117157, "learning_rate": 1.5682800488214068e-05, "loss": 2.0811, "step": 9592 }, { "epoch": 0.32, "grad_norm": 0.7383992075920105, "learning_rate": 1.5681925870380172e-05, "loss": 2.1526, "step": 9593 }, { "epoch": 0.32, "grad_norm": 0.7144381403923035, "learning_rate": 1.5681051188355205e-05, "loss": 2.0615, "step": 9594 }, { "epoch": 0.32, "grad_norm": 0.7038764357566833, "learning_rate": 1.5680176442149054e-05, "loss": 2.0843, "step": 9595 }, { "epoch": 0.32, "grad_norm": 0.7094604969024658, "learning_rate": 1.5679301631771596e-05, "loss": 2.1016, "step": 9596 }, { "epoch": 0.32, "grad_norm": 0.7397826910018921, "learning_rate": 1.567842675723272e-05, "loss": 2.1635, "step": 9597 }, { "epoch": 0.32, "grad_norm": 0.7061549425125122, "learning_rate": 1.56775518185423e-05, "loss": 2.0496, "step": 9598 }, { "epoch": 0.32, "grad_norm": 0.7178221344947815, "learning_rate": 1.5676676815710234e-05, "loss": 2.0624, "step": 9599 }, { "epoch": 0.32, "grad_norm": 0.708868145942688, "learning_rate": 1.5675801748746398e-05, "loss": 2.1848, "step": 9600 }, { "epoch": 0.32, "grad_norm": 0.7420101761817932, "learning_rate": 1.567492661766068e-05, "loss": 2.0407, "step": 9601 }, { "epoch": 0.32, "grad_norm": 0.7334026098251343, "learning_rate": 1.5674051422462963e-05, "loss": 2.1414, "step": 9602 }, { "epoch": 0.32, "grad_norm": 0.7109665870666504, "learning_rate": 1.5673176163163145e-05, "loss": 2.0832, "step": 9603 }, { "epoch": 0.32, "grad_norm": 0.7079555988311768, "learning_rate": 1.5672300839771102e-05, "loss": 2.0636, "step": 9604 }, { "epoch": 0.32, "grad_norm": 0.7286210060119629, "learning_rate": 1.567142545229673e-05, "loss": 2.1746, "step": 9605 }, { "epoch": 0.32, "grad_norm": 0.741376519203186, "learning_rate": 1.567055000074992e-05, "loss": 2.1565, "step": 9606 }, { "epoch": 0.32, "grad_norm": 0.695118248462677, "learning_rate": 1.5669674485140556e-05, "loss": 2.1242, "step": 9607 }, { "epoch": 0.32, "grad_norm": 0.7205113768577576, "learning_rate": 1.5668798905478535e-05, "loss": 2.1159, "step": 9608 }, { "epoch": 0.32, "grad_norm": 0.7460171580314636, "learning_rate": 1.5667923261773743e-05, "loss": 2.1477, "step": 9609 }, { "epoch": 0.32, "grad_norm": 0.7187833189964294, "learning_rate": 1.5667047554036078e-05, "loss": 2.1223, "step": 9610 }, { "epoch": 0.32, "grad_norm": 0.7472878098487854, "learning_rate": 1.566617178227543e-05, "loss": 2.1371, "step": 9611 }, { "epoch": 0.32, "grad_norm": 0.7552516460418701, "learning_rate": 1.5665295946501697e-05, "loss": 2.1796, "step": 9612 }, { "epoch": 0.32, "grad_norm": 0.7284395098686218, "learning_rate": 1.5664420046724767e-05, "loss": 2.1306, "step": 9613 }, { "epoch": 0.32, "grad_norm": 0.7214163541793823, "learning_rate": 1.566354408295454e-05, "loss": 2.1494, "step": 9614 }, { "epoch": 0.32, "grad_norm": 0.7119484543800354, "learning_rate": 1.5662668055200908e-05, "loss": 2.1303, "step": 9615 }, { "epoch": 0.32, "grad_norm": 0.7451114058494568, "learning_rate": 1.5661791963473775e-05, "loss": 2.1109, "step": 9616 }, { "epoch": 0.32, "grad_norm": 0.707645058631897, "learning_rate": 1.5660915807783035e-05, "loss": 2.0225, "step": 9617 }, { "epoch": 0.32, "grad_norm": 0.7205560207366943, "learning_rate": 1.5660039588138583e-05, "loss": 2.0906, "step": 9618 }, { "epoch": 0.32, "grad_norm": 0.7707234621047974, "learning_rate": 1.5659163304550323e-05, "loss": 2.1227, "step": 9619 }, { "epoch": 0.32, "grad_norm": 0.7582520842552185, "learning_rate": 1.565828695702815e-05, "loss": 2.1891, "step": 9620 }, { "epoch": 0.32, "grad_norm": 0.7272468209266663, "learning_rate": 1.565741054558197e-05, "loss": 2.134, "step": 9621 }, { "epoch": 0.32, "grad_norm": 0.7442682981491089, "learning_rate": 1.565653407022168e-05, "loss": 2.1559, "step": 9622 }, { "epoch": 0.32, "grad_norm": 0.744335412979126, "learning_rate": 1.565565753095718e-05, "loss": 2.114, "step": 9623 }, { "epoch": 0.32, "grad_norm": 0.7005484700202942, "learning_rate": 1.5654780927798382e-05, "loss": 2.1597, "step": 9624 }, { "epoch": 0.32, "grad_norm": 0.7300022840499878, "learning_rate": 1.565390426075518e-05, "loss": 2.0513, "step": 9625 }, { "epoch": 0.32, "grad_norm": 0.7206262350082397, "learning_rate": 1.565302752983748e-05, "loss": 2.1894, "step": 9626 }, { "epoch": 0.32, "grad_norm": 0.7290781736373901, "learning_rate": 1.565215073505519e-05, "loss": 2.1122, "step": 9627 }, { "epoch": 0.32, "grad_norm": 0.7283538579940796, "learning_rate": 1.5651273876418214e-05, "loss": 2.1367, "step": 9628 }, { "epoch": 0.32, "grad_norm": 0.698457658290863, "learning_rate": 1.5650396953936455e-05, "loss": 2.1004, "step": 9629 }, { "epoch": 0.32, "grad_norm": 0.7498002648353577, "learning_rate": 1.5649519967619826e-05, "loss": 2.1383, "step": 9630 }, { "epoch": 0.32, "grad_norm": 0.7227504849433899, "learning_rate": 1.5648642917478227e-05, "loss": 2.0743, "step": 9631 }, { "epoch": 0.32, "grad_norm": 0.7126960158348083, "learning_rate": 1.5647765803521573e-05, "loss": 2.1988, "step": 9632 }, { "epoch": 0.32, "grad_norm": 0.715067982673645, "learning_rate": 1.5646888625759774e-05, "loss": 2.1768, "step": 9633 }, { "epoch": 0.32, "grad_norm": 0.7235296368598938, "learning_rate": 1.5646011384202733e-05, "loss": 2.2027, "step": 9634 }, { "epoch": 0.32, "grad_norm": 0.7500407099723816, "learning_rate": 1.5645134078860363e-05, "loss": 2.1222, "step": 9635 }, { "epoch": 0.32, "grad_norm": 0.7103623151779175, "learning_rate": 1.564425670974258e-05, "loss": 2.1598, "step": 9636 }, { "epoch": 0.32, "grad_norm": 0.7132090330123901, "learning_rate": 1.564337927685929e-05, "loss": 2.1487, "step": 9637 }, { "epoch": 0.32, "grad_norm": 0.7143595814704895, "learning_rate": 1.5642501780220405e-05, "loss": 2.0656, "step": 9638 }, { "epoch": 0.32, "grad_norm": 0.721831202507019, "learning_rate": 1.5641624219835853e-05, "loss": 2.1336, "step": 9639 }, { "epoch": 0.32, "grad_norm": 0.7576766014099121, "learning_rate": 1.5640746595715527e-05, "loss": 2.1204, "step": 9640 }, { "epoch": 0.32, "grad_norm": 0.704969584941864, "learning_rate": 1.5639868907869356e-05, "loss": 2.0931, "step": 9641 }, { "epoch": 0.32, "grad_norm": 0.698978066444397, "learning_rate": 1.5638991156307248e-05, "loss": 2.1002, "step": 9642 }, { "epoch": 0.32, "grad_norm": 0.733176589012146, "learning_rate": 1.5638113341039125e-05, "loss": 2.1458, "step": 9643 }, { "epoch": 0.32, "grad_norm": 0.7193590402603149, "learning_rate": 1.5637235462074903e-05, "loss": 2.1283, "step": 9644 }, { "epoch": 0.32, "grad_norm": 0.7169396281242371, "learning_rate": 1.56363575194245e-05, "loss": 2.1778, "step": 9645 }, { "epoch": 0.32, "grad_norm": 0.7564206123352051, "learning_rate": 1.5635479513097827e-05, "loss": 2.2108, "step": 9646 }, { "epoch": 0.32, "grad_norm": 0.7212381958961487, "learning_rate": 1.5634601443104812e-05, "loss": 2.1225, "step": 9647 }, { "epoch": 0.32, "grad_norm": 0.7097349762916565, "learning_rate": 1.563372330945537e-05, "loss": 2.1498, "step": 9648 }, { "epoch": 0.32, "grad_norm": 0.6714023351669312, "learning_rate": 1.5632845112159428e-05, "loss": 2.1314, "step": 9649 }, { "epoch": 0.32, "grad_norm": 0.7411195635795593, "learning_rate": 1.56319668512269e-05, "loss": 2.1672, "step": 9650 }, { "epoch": 0.32, "grad_norm": 0.703231930732727, "learning_rate": 1.5631088526667713e-05, "loss": 2.1897, "step": 9651 }, { "epoch": 0.32, "grad_norm": 0.6935396790504456, "learning_rate": 1.5630210138491786e-05, "loss": 2.147, "step": 9652 }, { "epoch": 0.32, "grad_norm": 0.7113943099975586, "learning_rate": 1.5629331686709046e-05, "loss": 2.1304, "step": 9653 }, { "epoch": 0.32, "grad_norm": 0.7628820538520813, "learning_rate": 1.5628453171329418e-05, "loss": 2.1103, "step": 9654 }, { "epoch": 0.32, "grad_norm": 0.736619234085083, "learning_rate": 1.562757459236282e-05, "loss": 2.1834, "step": 9655 }, { "epoch": 0.32, "grad_norm": 0.7182301878929138, "learning_rate": 1.5626695949819186e-05, "loss": 2.1551, "step": 9656 }, { "epoch": 0.32, "grad_norm": 0.7071012854576111, "learning_rate": 1.5625817243708437e-05, "loss": 2.1194, "step": 9657 }, { "epoch": 0.32, "grad_norm": 0.732288658618927, "learning_rate": 1.5624938474040502e-05, "loss": 2.1598, "step": 9658 }, { "epoch": 0.32, "grad_norm": 0.7055097818374634, "learning_rate": 1.562405964082531e-05, "loss": 2.1195, "step": 9659 }, { "epoch": 0.32, "grad_norm": 0.7581003904342651, "learning_rate": 1.5623180744072784e-05, "loss": 2.1182, "step": 9660 }, { "epoch": 0.32, "grad_norm": 0.7259427905082703, "learning_rate": 1.562230178379286e-05, "loss": 2.2026, "step": 9661 }, { "epoch": 0.32, "grad_norm": 0.7154667377471924, "learning_rate": 1.5621422759995466e-05, "loss": 2.1247, "step": 9662 }, { "epoch": 0.32, "grad_norm": 0.7540830373764038, "learning_rate": 1.5620543672690532e-05, "loss": 2.1474, "step": 9663 }, { "epoch": 0.32, "grad_norm": 0.7043972015380859, "learning_rate": 1.561966452188799e-05, "loss": 2.1137, "step": 9664 }, { "epoch": 0.32, "grad_norm": 0.7038115859031677, "learning_rate": 1.561878530759777e-05, "loss": 2.1321, "step": 9665 }, { "epoch": 0.32, "grad_norm": 0.7378524541854858, "learning_rate": 1.5617906029829808e-05, "loss": 2.1209, "step": 9666 }, { "epoch": 0.32, "grad_norm": 0.7201337218284607, "learning_rate": 1.5617026688594036e-05, "loss": 2.0737, "step": 9667 }, { "epoch": 0.32, "grad_norm": 0.7516124248504639, "learning_rate": 1.561614728390039e-05, "loss": 2.075, "step": 9668 }, { "epoch": 0.32, "grad_norm": 0.6917712688446045, "learning_rate": 1.56152678157588e-05, "loss": 2.1478, "step": 9669 }, { "epoch": 0.32, "grad_norm": 0.725277304649353, "learning_rate": 1.5614388284179206e-05, "loss": 2.0715, "step": 9670 }, { "epoch": 0.32, "grad_norm": 0.7047340869903564, "learning_rate": 1.5613508689171543e-05, "loss": 2.1364, "step": 9671 }, { "epoch": 0.32, "grad_norm": 0.7251754999160767, "learning_rate": 1.561262903074575e-05, "loss": 2.0938, "step": 9672 }, { "epoch": 0.32, "grad_norm": 0.7610461115837097, "learning_rate": 1.5611749308911763e-05, "loss": 2.1606, "step": 9673 }, { "epoch": 0.32, "grad_norm": 0.7318379282951355, "learning_rate": 1.561086952367952e-05, "loss": 2.124, "step": 9674 }, { "epoch": 0.32, "grad_norm": 0.7205722332000732, "learning_rate": 1.5609989675058965e-05, "loss": 2.0929, "step": 9675 }, { "epoch": 0.32, "grad_norm": 0.7249205112457275, "learning_rate": 1.5609109763060033e-05, "loss": 2.1646, "step": 9676 }, { "epoch": 0.32, "grad_norm": 0.727459192276001, "learning_rate": 1.5608229787692665e-05, "loss": 2.2156, "step": 9677 }, { "epoch": 0.32, "grad_norm": 0.7697524428367615, "learning_rate": 1.5607349748966806e-05, "loss": 2.2548, "step": 9678 }, { "epoch": 0.32, "grad_norm": 0.7445123791694641, "learning_rate": 1.560646964689239e-05, "loss": 2.1198, "step": 9679 }, { "epoch": 0.32, "grad_norm": 0.7075611352920532, "learning_rate": 1.560558948147937e-05, "loss": 2.1079, "step": 9680 }, { "epoch": 0.32, "grad_norm": 0.7296414375305176, "learning_rate": 1.5604709252737685e-05, "loss": 2.074, "step": 9681 }, { "epoch": 0.32, "grad_norm": 0.7431594133377075, "learning_rate": 1.560382896067728e-05, "loss": 2.0554, "step": 9682 }, { "epoch": 0.32, "grad_norm": 0.7352239489555359, "learning_rate": 1.5602948605308098e-05, "loss": 2.1339, "step": 9683 }, { "epoch": 0.32, "grad_norm": 0.7060679793357849, "learning_rate": 1.5602068186640088e-05, "loss": 2.0358, "step": 9684 }, { "epoch": 0.32, "grad_norm": 0.7050802111625671, "learning_rate": 1.5601187704683192e-05, "loss": 2.0464, "step": 9685 }, { "epoch": 0.32, "grad_norm": 0.7456352710723877, "learning_rate": 1.5600307159447362e-05, "loss": 2.191, "step": 9686 }, { "epoch": 0.32, "grad_norm": 0.690403163433075, "learning_rate": 1.5599426550942544e-05, "loss": 2.1071, "step": 9687 }, { "epoch": 0.32, "grad_norm": 0.7531574368476868, "learning_rate": 1.5598545879178687e-05, "loss": 2.0715, "step": 9688 }, { "epoch": 0.32, "grad_norm": 0.7362563014030457, "learning_rate": 1.5597665144165735e-05, "loss": 2.144, "step": 9689 }, { "epoch": 0.32, "grad_norm": 0.721060574054718, "learning_rate": 1.559678434591365e-05, "loss": 2.1294, "step": 9690 }, { "epoch": 0.32, "grad_norm": 0.7231608629226685, "learning_rate": 1.5595903484432372e-05, "loss": 2.1079, "step": 9691 }, { "epoch": 0.32, "grad_norm": 0.7737318873405457, "learning_rate": 1.5595022559731856e-05, "loss": 2.0954, "step": 9692 }, { "epoch": 0.32, "grad_norm": 0.7315420508384705, "learning_rate": 1.5594141571822055e-05, "loss": 2.173, "step": 9693 }, { "epoch": 0.32, "grad_norm": 0.7234458327293396, "learning_rate": 1.5593260520712917e-05, "loss": 2.1957, "step": 9694 }, { "epoch": 0.32, "grad_norm": 0.706051230430603, "learning_rate": 1.5592379406414403e-05, "loss": 2.171, "step": 9695 }, { "epoch": 0.32, "grad_norm": 0.7145938873291016, "learning_rate": 1.5591498228936464e-05, "loss": 2.0657, "step": 9696 }, { "epoch": 0.32, "grad_norm": 0.7288222312927246, "learning_rate": 1.5590616988289054e-05, "loss": 2.1432, "step": 9697 }, { "epoch": 0.32, "grad_norm": 0.6874464750289917, "learning_rate": 1.5589735684482134e-05, "loss": 2.1252, "step": 9698 }, { "epoch": 0.32, "grad_norm": 0.7213895320892334, "learning_rate": 1.558885431752565e-05, "loss": 2.0905, "step": 9699 }, { "epoch": 0.32, "grad_norm": 0.7034483551979065, "learning_rate": 1.5587972887429565e-05, "loss": 2.1318, "step": 9700 }, { "epoch": 0.32, "grad_norm": 0.7301508784294128, "learning_rate": 1.5587091394203842e-05, "loss": 2.0836, "step": 9701 }, { "epoch": 0.32, "grad_norm": 0.7120941877365112, "learning_rate": 1.558620983785843e-05, "loss": 2.1018, "step": 9702 }, { "epoch": 0.32, "grad_norm": 0.7034561634063721, "learning_rate": 1.5585328218403296e-05, "loss": 2.11, "step": 9703 }, { "epoch": 0.32, "grad_norm": 0.7046024203300476, "learning_rate": 1.55844465358484e-05, "loss": 2.0964, "step": 9704 }, { "epoch": 0.32, "grad_norm": 0.7175389528274536, "learning_rate": 1.5583564790203697e-05, "loss": 2.1949, "step": 9705 }, { "epoch": 0.32, "grad_norm": 0.7190459966659546, "learning_rate": 1.558268298147915e-05, "loss": 2.1111, "step": 9706 }, { "epoch": 0.32, "grad_norm": 0.7264535427093506, "learning_rate": 1.5581801109684725e-05, "loss": 2.0915, "step": 9707 }, { "epoch": 0.32, "grad_norm": 0.73790043592453, "learning_rate": 1.558091917483038e-05, "loss": 2.1259, "step": 9708 }, { "epoch": 0.32, "grad_norm": 0.7432808876037598, "learning_rate": 1.558003717692608e-05, "loss": 2.2039, "step": 9709 }, { "epoch": 0.32, "grad_norm": 0.7579568028450012, "learning_rate": 1.557915511598179e-05, "loss": 2.0407, "step": 9710 }, { "epoch": 0.32, "grad_norm": 0.7072755098342896, "learning_rate": 1.557827299200748e-05, "loss": 2.1199, "step": 9711 }, { "epoch": 0.32, "grad_norm": 0.6925066113471985, "learning_rate": 1.557739080501311e-05, "loss": 2.0794, "step": 9712 }, { "epoch": 0.32, "grad_norm": 0.7213560342788696, "learning_rate": 1.5576508555008643e-05, "loss": 2.1352, "step": 9713 }, { "epoch": 0.32, "grad_norm": 0.7008641362190247, "learning_rate": 1.557562624200405e-05, "loss": 2.1148, "step": 9714 }, { "epoch": 0.32, "grad_norm": 0.7303087711334229, "learning_rate": 1.5574743866009307e-05, "loss": 2.0961, "step": 9715 }, { "epoch": 0.32, "grad_norm": 0.7058576941490173, "learning_rate": 1.5573861427034368e-05, "loss": 2.1023, "step": 9716 }, { "epoch": 0.32, "grad_norm": 0.745682954788208, "learning_rate": 1.557297892508921e-05, "loss": 2.0735, "step": 9717 }, { "epoch": 0.32, "grad_norm": 0.763796865940094, "learning_rate": 1.5572096360183805e-05, "loss": 2.1054, "step": 9718 }, { "epoch": 0.32, "grad_norm": 0.7220934629440308, "learning_rate": 1.5571213732328118e-05, "loss": 2.1797, "step": 9719 }, { "epoch": 0.32, "grad_norm": 0.7159127593040466, "learning_rate": 1.5570331041532127e-05, "loss": 2.1119, "step": 9720 }, { "epoch": 0.32, "grad_norm": 0.7099900841712952, "learning_rate": 1.55694482878058e-05, "loss": 2.1781, "step": 9721 }, { "epoch": 0.32, "grad_norm": 0.7351802587509155, "learning_rate": 1.5568565471159105e-05, "loss": 2.0964, "step": 9722 }, { "epoch": 0.32, "grad_norm": 0.7681098580360413, "learning_rate": 1.5567682591602025e-05, "loss": 2.1356, "step": 9723 }, { "epoch": 0.32, "grad_norm": 0.7014427781105042, "learning_rate": 1.5566799649144528e-05, "loss": 2.1336, "step": 9724 }, { "epoch": 0.32, "grad_norm": 0.6984763145446777, "learning_rate": 1.5565916643796594e-05, "loss": 2.0458, "step": 9725 }, { "epoch": 0.32, "grad_norm": 0.7065165042877197, "learning_rate": 1.5565033575568193e-05, "loss": 2.1095, "step": 9726 }, { "epoch": 0.32, "grad_norm": 0.7118334174156189, "learning_rate": 1.5564150444469306e-05, "loss": 2.1977, "step": 9727 }, { "epoch": 0.32, "grad_norm": 0.7149691581726074, "learning_rate": 1.5563267250509906e-05, "loss": 2.164, "step": 9728 }, { "epoch": 0.32, "grad_norm": 0.7476349472999573, "learning_rate": 1.5562383993699977e-05, "loss": 2.0644, "step": 9729 }, { "epoch": 0.32, "grad_norm": 0.7497175931930542, "learning_rate": 1.556150067404949e-05, "loss": 2.1512, "step": 9730 }, { "epoch": 0.32, "grad_norm": 0.7219561338424683, "learning_rate": 1.5560617291568427e-05, "loss": 2.1152, "step": 9731 }, { "epoch": 0.32, "grad_norm": 0.7257176637649536, "learning_rate": 1.5559733846266772e-05, "loss": 2.1317, "step": 9732 }, { "epoch": 0.32, "grad_norm": 0.7180116772651672, "learning_rate": 1.55588503381545e-05, "loss": 2.11, "step": 9733 }, { "epoch": 0.32, "grad_norm": 0.7305561900138855, "learning_rate": 1.5557966767241596e-05, "loss": 2.0841, "step": 9734 }, { "epoch": 0.32, "grad_norm": 0.7213141918182373, "learning_rate": 1.5557083133538038e-05, "loss": 2.1015, "step": 9735 }, { "epoch": 0.32, "grad_norm": 0.7244415283203125, "learning_rate": 1.5556199437053814e-05, "loss": 2.1909, "step": 9736 }, { "epoch": 0.32, "grad_norm": 0.7275545597076416, "learning_rate": 1.5555315677798906e-05, "loss": 2.1289, "step": 9737 }, { "epoch": 0.32, "grad_norm": 0.7024979591369629, "learning_rate": 1.5554431855783295e-05, "loss": 2.1182, "step": 9738 }, { "epoch": 0.32, "grad_norm": 0.6924365162849426, "learning_rate": 1.5553547971016966e-05, "loss": 2.1226, "step": 9739 }, { "epoch": 0.32, "grad_norm": 0.7123757600784302, "learning_rate": 1.555266402350991e-05, "loss": 2.1115, "step": 9740 }, { "epoch": 0.32, "grad_norm": 0.7232630848884583, "learning_rate": 1.555178001327211e-05, "loss": 2.1744, "step": 9741 }, { "epoch": 0.32, "grad_norm": 0.7116715908050537, "learning_rate": 1.5550895940313552e-05, "loss": 2.1361, "step": 9742 }, { "epoch": 0.32, "grad_norm": 0.6923726201057434, "learning_rate": 1.5550011804644226e-05, "loss": 2.0649, "step": 9743 }, { "epoch": 0.32, "grad_norm": 0.7449272871017456, "learning_rate": 1.554912760627412e-05, "loss": 2.1537, "step": 9744 }, { "epoch": 0.32, "grad_norm": 0.6884171962738037, "learning_rate": 1.5548243345213223e-05, "loss": 2.1485, "step": 9745 }, { "epoch": 0.32, "grad_norm": 0.7247269153594971, "learning_rate": 1.554735902147152e-05, "loss": 2.1648, "step": 9746 }, { "epoch": 0.32, "grad_norm": 0.7230300903320312, "learning_rate": 1.554647463505901e-05, "loss": 2.1397, "step": 9747 }, { "epoch": 0.32, "grad_norm": 0.740444004535675, "learning_rate": 1.554559018598568e-05, "loss": 2.1322, "step": 9748 }, { "epoch": 0.32, "grad_norm": 0.7256143093109131, "learning_rate": 1.5544705674261517e-05, "loss": 2.1217, "step": 9749 }, { "epoch": 0.32, "grad_norm": 0.7350302934646606, "learning_rate": 1.554382109989652e-05, "loss": 2.164, "step": 9750 }, { "epoch": 0.32, "grad_norm": 0.6851370334625244, "learning_rate": 1.5542936462900685e-05, "loss": 2.0827, "step": 9751 }, { "epoch": 0.32, "grad_norm": 0.7035511136054993, "learning_rate": 1.5542051763284002e-05, "loss": 2.0926, "step": 9752 }, { "epoch": 0.32, "grad_norm": 0.7148852944374084, "learning_rate": 1.5541167001056466e-05, "loss": 2.1065, "step": 9753 }, { "epoch": 0.32, "grad_norm": 0.7390248775482178, "learning_rate": 1.5540282176228073e-05, "loss": 2.0664, "step": 9754 }, { "epoch": 0.32, "grad_norm": 0.7371506094932556, "learning_rate": 1.5539397288808817e-05, "loss": 2.1897, "step": 9755 }, { "epoch": 0.32, "grad_norm": 0.7095745801925659, "learning_rate": 1.5538512338808696e-05, "loss": 2.1245, "step": 9756 }, { "epoch": 0.32, "grad_norm": 0.7180996537208557, "learning_rate": 1.553762732623771e-05, "loss": 2.121, "step": 9757 }, { "epoch": 0.32, "grad_norm": 0.71940016746521, "learning_rate": 1.5536742251105856e-05, "loss": 2.0846, "step": 9758 }, { "epoch": 0.32, "grad_norm": 0.7432024478912354, "learning_rate": 1.5535857113423133e-05, "loss": 2.1328, "step": 9759 }, { "epoch": 0.32, "grad_norm": 0.7416552305221558, "learning_rate": 1.553497191319954e-05, "loss": 2.2062, "step": 9760 }, { "epoch": 0.32, "grad_norm": 0.7160609364509583, "learning_rate": 1.5534086650445077e-05, "loss": 2.1748, "step": 9761 }, { "epoch": 0.32, "grad_norm": 0.7547546625137329, "learning_rate": 1.5533201325169746e-05, "loss": 2.1229, "step": 9762 }, { "epoch": 0.32, "grad_norm": 0.7350295782089233, "learning_rate": 1.5532315937383554e-05, "loss": 2.1763, "step": 9763 }, { "epoch": 0.32, "grad_norm": 0.7067313194274902, "learning_rate": 1.5531430487096495e-05, "loss": 2.1024, "step": 9764 }, { "epoch": 0.32, "grad_norm": 0.7448184490203857, "learning_rate": 1.5530544974318577e-05, "loss": 2.1582, "step": 9765 }, { "epoch": 0.32, "grad_norm": 0.6961895227432251, "learning_rate": 1.5529659399059803e-05, "loss": 2.1542, "step": 9766 }, { "epoch": 0.32, "grad_norm": 0.7170191407203674, "learning_rate": 1.552877376133018e-05, "loss": 2.0092, "step": 9767 }, { "epoch": 0.32, "grad_norm": 0.7149031758308411, "learning_rate": 1.5527888061139707e-05, "loss": 2.1091, "step": 9768 }, { "epoch": 0.33, "grad_norm": 0.7349783182144165, "learning_rate": 1.5527002298498396e-05, "loss": 2.1104, "step": 9769 }, { "epoch": 0.33, "grad_norm": 0.7329489588737488, "learning_rate": 1.552611647341625e-05, "loss": 2.1871, "step": 9770 }, { "epoch": 0.33, "grad_norm": 0.7452016472816467, "learning_rate": 1.5525230585903285e-05, "loss": 2.0818, "step": 9771 }, { "epoch": 0.33, "grad_norm": 0.7211071252822876, "learning_rate": 1.5524344635969498e-05, "loss": 2.1826, "step": 9772 }, { "epoch": 0.33, "grad_norm": 0.7095054984092712, "learning_rate": 1.5523458623624904e-05, "loss": 2.0897, "step": 9773 }, { "epoch": 0.33, "grad_norm": 0.7175541520118713, "learning_rate": 1.552257254887951e-05, "loss": 2.1136, "step": 9774 }, { "epoch": 0.33, "grad_norm": 0.7369424104690552, "learning_rate": 1.552168641174333e-05, "loss": 2.2051, "step": 9775 }, { "epoch": 0.33, "grad_norm": 0.6945841312408447, "learning_rate": 1.5520800212226374e-05, "loss": 2.1022, "step": 9776 }, { "epoch": 0.33, "grad_norm": 0.7432894706726074, "learning_rate": 1.551991395033865e-05, "loss": 2.1468, "step": 9777 }, { "epoch": 0.33, "grad_norm": 0.7032977938652039, "learning_rate": 1.5519027626090175e-05, "loss": 2.1469, "step": 9778 }, { "epoch": 0.33, "grad_norm": 0.7035618424415588, "learning_rate": 1.5518141239490958e-05, "loss": 2.1076, "step": 9779 }, { "epoch": 0.33, "grad_norm": 0.7174363136291504, "learning_rate": 1.5517254790551017e-05, "loss": 2.1474, "step": 9780 }, { "epoch": 0.33, "grad_norm": 0.7240395545959473, "learning_rate": 1.5516368279280365e-05, "loss": 2.1265, "step": 9781 }, { "epoch": 0.33, "grad_norm": 0.730636477470398, "learning_rate": 1.551548170568902e-05, "loss": 2.1643, "step": 9782 }, { "epoch": 0.33, "grad_norm": 0.7373067736625671, "learning_rate": 1.5514595069786992e-05, "loss": 2.085, "step": 9783 }, { "epoch": 0.33, "grad_norm": 0.736629068851471, "learning_rate": 1.5513708371584296e-05, "loss": 2.0876, "step": 9784 }, { "epoch": 0.33, "grad_norm": 0.7060760855674744, "learning_rate": 1.551282161109096e-05, "loss": 2.0933, "step": 9785 }, { "epoch": 0.33, "grad_norm": 0.7124506831169128, "learning_rate": 1.5511934788316995e-05, "loss": 2.1651, "step": 9786 }, { "epoch": 0.33, "grad_norm": 0.7154838442802429, "learning_rate": 1.551104790327242e-05, "loss": 2.2181, "step": 9787 }, { "epoch": 0.33, "grad_norm": 0.7429937720298767, "learning_rate": 1.5510160955967256e-05, "loss": 2.1809, "step": 9788 }, { "epoch": 0.33, "grad_norm": 0.7234243750572205, "learning_rate": 1.5509273946411525e-05, "loss": 2.1139, "step": 9789 }, { "epoch": 0.33, "grad_norm": 0.691798746585846, "learning_rate": 1.5508386874615244e-05, "loss": 2.0765, "step": 9790 }, { "epoch": 0.33, "grad_norm": 0.7239765524864197, "learning_rate": 1.550749974058844e-05, "loss": 2.1571, "step": 9791 }, { "epoch": 0.33, "grad_norm": 0.713736891746521, "learning_rate": 1.5506612544341124e-05, "loss": 2.1501, "step": 9792 }, { "epoch": 0.33, "grad_norm": 0.7478756308555603, "learning_rate": 1.550572528588333e-05, "loss": 2.1259, "step": 9793 }, { "epoch": 0.33, "grad_norm": 0.6878976225852966, "learning_rate": 1.550483796522508e-05, "loss": 2.0555, "step": 9794 }, { "epoch": 0.33, "grad_norm": 0.7342193722724915, "learning_rate": 1.5503950582376398e-05, "loss": 2.1753, "step": 9795 }, { "epoch": 0.33, "grad_norm": 0.7443256378173828, "learning_rate": 1.5503063137347307e-05, "loss": 2.1324, "step": 9796 }, { "epoch": 0.33, "grad_norm": 0.7581925988197327, "learning_rate": 1.550217563014783e-05, "loss": 2.0882, "step": 9797 }, { "epoch": 0.33, "grad_norm": 0.7454391121864319, "learning_rate": 1.5501288060788e-05, "loss": 2.1966, "step": 9798 }, { "epoch": 0.33, "grad_norm": 0.7362740635871887, "learning_rate": 1.550040042927784e-05, "loss": 2.1479, "step": 9799 }, { "epoch": 0.33, "grad_norm": 0.7669014930725098, "learning_rate": 1.5499512735627385e-05, "loss": 2.1624, "step": 9800 }, { "epoch": 0.33, "grad_norm": 0.7412266731262207, "learning_rate": 1.5498624979846653e-05, "loss": 2.1285, "step": 9801 }, { "epoch": 0.33, "grad_norm": 0.724396288394928, "learning_rate": 1.549773716194568e-05, "loss": 2.1935, "step": 9802 }, { "epoch": 0.33, "grad_norm": 0.7174577116966248, "learning_rate": 1.5496849281934494e-05, "loss": 2.089, "step": 9803 }, { "epoch": 0.33, "grad_norm": 0.7296112179756165, "learning_rate": 1.5495961339823125e-05, "loss": 2.1034, "step": 9804 }, { "epoch": 0.33, "grad_norm": 0.7373788356781006, "learning_rate": 1.549507333562161e-05, "loss": 2.1389, "step": 9805 }, { "epoch": 0.33, "grad_norm": 0.7346022129058838, "learning_rate": 1.549418526933997e-05, "loss": 2.0909, "step": 9806 }, { "epoch": 0.33, "grad_norm": 0.7354152798652649, "learning_rate": 1.5493297140988253e-05, "loss": 2.1904, "step": 9807 }, { "epoch": 0.33, "grad_norm": 0.6868494749069214, "learning_rate": 1.549240895057648e-05, "loss": 2.1085, "step": 9808 }, { "epoch": 0.33, "grad_norm": 0.7333486080169678, "learning_rate": 1.549152069811469e-05, "loss": 2.0694, "step": 9809 }, { "epoch": 0.33, "grad_norm": 0.7068595290184021, "learning_rate": 1.5490632383612915e-05, "loss": 2.0819, "step": 9810 }, { "epoch": 0.33, "grad_norm": 0.7222439646720886, "learning_rate": 1.5489744007081198e-05, "loss": 2.0458, "step": 9811 }, { "epoch": 0.33, "grad_norm": 0.7160742878913879, "learning_rate": 1.5488855568529565e-05, "loss": 2.1086, "step": 9812 }, { "epoch": 0.33, "grad_norm": 0.717342734336853, "learning_rate": 1.5487967067968063e-05, "loss": 2.1249, "step": 9813 }, { "epoch": 0.33, "grad_norm": 0.747526228427887, "learning_rate": 1.5487078505406724e-05, "loss": 2.0738, "step": 9814 }, { "epoch": 0.33, "grad_norm": 0.6963484287261963, "learning_rate": 1.5486189880855587e-05, "loss": 2.1135, "step": 9815 }, { "epoch": 0.33, "grad_norm": 0.7524906992912292, "learning_rate": 1.5485301194324695e-05, "loss": 2.1046, "step": 9816 }, { "epoch": 0.33, "grad_norm": 0.7156223058700562, "learning_rate": 1.5484412445824082e-05, "loss": 2.137, "step": 9817 }, { "epoch": 0.33, "grad_norm": 0.7152125239372253, "learning_rate": 1.548352363536379e-05, "loss": 2.0648, "step": 9818 }, { "epoch": 0.33, "grad_norm": 0.758443295955658, "learning_rate": 1.5482634762953864e-05, "loss": 2.1882, "step": 9819 }, { "epoch": 0.33, "grad_norm": 0.7346198558807373, "learning_rate": 1.5481745828604344e-05, "loss": 2.1159, "step": 9820 }, { "epoch": 0.33, "grad_norm": 0.7042797803878784, "learning_rate": 1.548085683232527e-05, "loss": 2.1502, "step": 9821 }, { "epoch": 0.33, "grad_norm": 0.7387520670890808, "learning_rate": 1.547996777412669e-05, "loss": 2.1844, "step": 9822 }, { "epoch": 0.33, "grad_norm": 0.7866591811180115, "learning_rate": 1.5479078654018644e-05, "loss": 2.2013, "step": 9823 }, { "epoch": 0.33, "grad_norm": 0.7142841219902039, "learning_rate": 1.547818947201118e-05, "loss": 2.1571, "step": 9824 }, { "epoch": 0.33, "grad_norm": 0.7088612914085388, "learning_rate": 1.547730022811434e-05, "loss": 2.1592, "step": 9825 }, { "epoch": 0.33, "grad_norm": 0.7097257971763611, "learning_rate": 1.547641092233817e-05, "loss": 2.1393, "step": 9826 }, { "epoch": 0.33, "grad_norm": 0.6985494494438171, "learning_rate": 1.5475521554692724e-05, "loss": 2.0859, "step": 9827 }, { "epoch": 0.33, "grad_norm": 0.7373705506324768, "learning_rate": 1.547463212518804e-05, "loss": 2.0419, "step": 9828 }, { "epoch": 0.33, "grad_norm": 0.7425941824913025, "learning_rate": 1.5473742633834174e-05, "loss": 2.0636, "step": 9829 }, { "epoch": 0.33, "grad_norm": 0.704028844833374, "learning_rate": 1.547285308064117e-05, "loss": 2.1561, "step": 9830 }, { "epoch": 0.33, "grad_norm": 0.6992152333259583, "learning_rate": 1.5471963465619082e-05, "loss": 2.1074, "step": 9831 }, { "epoch": 0.33, "grad_norm": 0.7251248359680176, "learning_rate": 1.5471073788777956e-05, "loss": 2.1368, "step": 9832 }, { "epoch": 0.33, "grad_norm": 0.7286407351493835, "learning_rate": 1.5470184050127843e-05, "loss": 2.1674, "step": 9833 }, { "epoch": 0.33, "grad_norm": 0.7300108671188354, "learning_rate": 1.5469294249678795e-05, "loss": 2.1612, "step": 9834 }, { "epoch": 0.33, "grad_norm": 0.7284534573554993, "learning_rate": 1.546840438744087e-05, "loss": 2.1442, "step": 9835 }, { "epoch": 0.33, "grad_norm": 0.7208967208862305, "learning_rate": 1.5467514463424115e-05, "loss": 2.0659, "step": 9836 }, { "epoch": 0.33, "grad_norm": 0.7511195540428162, "learning_rate": 1.5466624477638587e-05, "loss": 2.0932, "step": 9837 }, { "epoch": 0.33, "grad_norm": 0.7420071959495544, "learning_rate": 1.546573443009434e-05, "loss": 2.0748, "step": 9838 }, { "epoch": 0.33, "grad_norm": 0.7343674302101135, "learning_rate": 1.5464844320801425e-05, "loss": 2.1689, "step": 9839 }, { "epoch": 0.33, "grad_norm": 0.7268196940422058, "learning_rate": 1.5463954149769906e-05, "loss": 2.1533, "step": 9840 }, { "epoch": 0.33, "grad_norm": 0.7534284591674805, "learning_rate": 1.5463063917009832e-05, "loss": 2.2125, "step": 9841 }, { "epoch": 0.33, "grad_norm": 0.7615901231765747, "learning_rate": 1.5462173622531268e-05, "loss": 2.172, "step": 9842 }, { "epoch": 0.33, "grad_norm": 0.756576657295227, "learning_rate": 1.5461283266344263e-05, "loss": 2.1248, "step": 9843 }, { "epoch": 0.33, "grad_norm": 0.7452014088630676, "learning_rate": 1.5460392848458883e-05, "loss": 2.1654, "step": 9844 }, { "epoch": 0.33, "grad_norm": 0.7113962769508362, "learning_rate": 1.5459502368885182e-05, "loss": 2.0892, "step": 9845 }, { "epoch": 0.33, "grad_norm": 0.7467913627624512, "learning_rate": 1.5458611827633224e-05, "loss": 2.2005, "step": 9846 }, { "epoch": 0.33, "grad_norm": 0.714866042137146, "learning_rate": 1.545772122471307e-05, "loss": 2.1368, "step": 9847 }, { "epoch": 0.33, "grad_norm": 0.7434927225112915, "learning_rate": 1.545683056013478e-05, "loss": 2.1496, "step": 9848 }, { "epoch": 0.33, "grad_norm": 0.704870343208313, "learning_rate": 1.5455939833908415e-05, "loss": 2.0809, "step": 9849 }, { "epoch": 0.33, "grad_norm": 0.7373092174530029, "learning_rate": 1.5455049046044038e-05, "loss": 2.1198, "step": 9850 }, { "epoch": 0.33, "grad_norm": 0.7553548216819763, "learning_rate": 1.5454158196551716e-05, "loss": 2.0814, "step": 9851 }, { "epoch": 0.33, "grad_norm": 0.7285803556442261, "learning_rate": 1.545326728544151e-05, "loss": 2.1672, "step": 9852 }, { "epoch": 0.33, "grad_norm": 0.6988288164138794, "learning_rate": 1.5452376312723486e-05, "loss": 2.1192, "step": 9853 }, { "epoch": 0.33, "grad_norm": 0.7022321224212646, "learning_rate": 1.545148527840771e-05, "loss": 2.0755, "step": 9854 }, { "epoch": 0.33, "grad_norm": 0.7328547835350037, "learning_rate": 1.5450594182504247e-05, "loss": 2.1475, "step": 9855 }, { "epoch": 0.33, "grad_norm": 0.7344414591789246, "learning_rate": 1.544970302502317e-05, "loss": 2.1254, "step": 9856 }, { "epoch": 0.33, "grad_norm": 0.7214395403862, "learning_rate": 1.5448811805974537e-05, "loss": 2.1237, "step": 9857 }, { "epoch": 0.33, "grad_norm": 0.7196457386016846, "learning_rate": 1.5447920525368424e-05, "loss": 2.1504, "step": 9858 }, { "epoch": 0.33, "grad_norm": 0.7674043774604797, "learning_rate": 1.5447029183214896e-05, "loss": 2.1422, "step": 9859 }, { "epoch": 0.33, "grad_norm": 0.7161842584609985, "learning_rate": 1.5446137779524027e-05, "loss": 2.1256, "step": 9860 }, { "epoch": 0.33, "grad_norm": 0.7090122103691101, "learning_rate": 1.5445246314305885e-05, "loss": 2.0829, "step": 9861 }, { "epoch": 0.33, "grad_norm": 0.7219876646995544, "learning_rate": 1.544435478757054e-05, "loss": 2.1002, "step": 9862 }, { "epoch": 0.33, "grad_norm": 0.7130935788154602, "learning_rate": 1.5443463199328066e-05, "loss": 2.0865, "step": 9863 }, { "epoch": 0.33, "grad_norm": 0.7210922837257385, "learning_rate": 1.5442571549588533e-05, "loss": 2.1697, "step": 9864 }, { "epoch": 0.33, "grad_norm": 0.7407726049423218, "learning_rate": 1.5441679838362017e-05, "loss": 2.1573, "step": 9865 }, { "epoch": 0.33, "grad_norm": 0.7217987179756165, "learning_rate": 1.5440788065658593e-05, "loss": 2.1135, "step": 9866 }, { "epoch": 0.33, "grad_norm": 0.7374716997146606, "learning_rate": 1.5439896231488335e-05, "loss": 2.1197, "step": 9867 }, { "epoch": 0.33, "grad_norm": 0.7338510751724243, "learning_rate": 1.5439004335861313e-05, "loss": 2.0922, "step": 9868 }, { "epoch": 0.33, "grad_norm": 0.7532132863998413, "learning_rate": 1.5438112378787615e-05, "loss": 2.0739, "step": 9869 }, { "epoch": 0.33, "grad_norm": 0.7063016891479492, "learning_rate": 1.5437220360277302e-05, "loss": 2.1581, "step": 9870 }, { "epoch": 0.33, "grad_norm": 0.7058444023132324, "learning_rate": 1.5436328280340465e-05, "loss": 2.0959, "step": 9871 }, { "epoch": 0.33, "grad_norm": 0.7520977854728699, "learning_rate": 1.5435436138987173e-05, "loss": 2.1477, "step": 9872 }, { "epoch": 0.33, "grad_norm": 0.6891581416130066, "learning_rate": 1.5434543936227516e-05, "loss": 2.0731, "step": 9873 }, { "epoch": 0.33, "grad_norm": 0.7480459809303284, "learning_rate": 1.543365167207156e-05, "loss": 2.1227, "step": 9874 }, { "epoch": 0.33, "grad_norm": 0.789496898651123, "learning_rate": 1.5432759346529395e-05, "loss": 2.1305, "step": 9875 }, { "epoch": 0.33, "grad_norm": 0.7157832384109497, "learning_rate": 1.5431866959611098e-05, "loss": 2.0395, "step": 9876 }, { "epoch": 0.33, "grad_norm": 0.6972479224205017, "learning_rate": 1.5430974511326747e-05, "loss": 2.1008, "step": 9877 }, { "epoch": 0.33, "grad_norm": 0.6888520121574402, "learning_rate": 1.5430082001686436e-05, "loss": 2.1229, "step": 9878 }, { "epoch": 0.33, "grad_norm": 0.702393114566803, "learning_rate": 1.542918943070024e-05, "loss": 2.1191, "step": 9879 }, { "epoch": 0.33, "grad_norm": 0.7181735634803772, "learning_rate": 1.542829679837824e-05, "loss": 2.0763, "step": 9880 }, { "epoch": 0.33, "grad_norm": 0.7311133146286011, "learning_rate": 1.5427404104730526e-05, "loss": 2.1268, "step": 9881 }, { "epoch": 0.33, "grad_norm": 0.7658363580703735, "learning_rate": 1.542651134976718e-05, "loss": 2.1161, "step": 9882 }, { "epoch": 0.33, "grad_norm": 0.7795351147651672, "learning_rate": 1.5425618533498294e-05, "loss": 2.0841, "step": 9883 }, { "epoch": 0.33, "grad_norm": 0.7189720869064331, "learning_rate": 1.5424725655933942e-05, "loss": 2.1377, "step": 9884 }, { "epoch": 0.33, "grad_norm": 0.7353165745735168, "learning_rate": 1.5423832717084223e-05, "loss": 2.1416, "step": 9885 }, { "epoch": 0.33, "grad_norm": 0.730982780456543, "learning_rate": 1.542293971695922e-05, "loss": 2.1549, "step": 9886 }, { "epoch": 0.33, "grad_norm": 0.769044816493988, "learning_rate": 1.5422046655569022e-05, "loss": 2.1356, "step": 9887 }, { "epoch": 0.33, "grad_norm": 0.7480217218399048, "learning_rate": 1.5421153532923717e-05, "loss": 2.1525, "step": 9888 }, { "epoch": 0.33, "grad_norm": 0.7241945266723633, "learning_rate": 1.54202603490334e-05, "loss": 2.0659, "step": 9889 }, { "epoch": 0.33, "grad_norm": 0.6988205313682556, "learning_rate": 1.5419367103908157e-05, "loss": 2.1106, "step": 9890 }, { "epoch": 0.33, "grad_norm": 0.7037733197212219, "learning_rate": 1.541847379755808e-05, "loss": 2.1116, "step": 9891 }, { "epoch": 0.33, "grad_norm": 0.7654491066932678, "learning_rate": 1.541758042999326e-05, "loss": 2.1954, "step": 9892 }, { "epoch": 0.33, "grad_norm": 0.8013147711753845, "learning_rate": 1.5416687001223792e-05, "loss": 2.1497, "step": 9893 }, { "epoch": 0.33, "grad_norm": 0.7342007756233215, "learning_rate": 1.5415793511259773e-05, "loss": 2.1371, "step": 9894 }, { "epoch": 0.33, "grad_norm": 0.7167657613754272, "learning_rate": 1.5414899960111288e-05, "loss": 2.1428, "step": 9895 }, { "epoch": 0.33, "grad_norm": 0.7281323075294495, "learning_rate": 1.5414006347788436e-05, "loss": 2.0863, "step": 9896 }, { "epoch": 0.33, "grad_norm": 0.7209944128990173, "learning_rate": 1.541311267430132e-05, "loss": 2.135, "step": 9897 }, { "epoch": 0.33, "grad_norm": 0.7091156244277954, "learning_rate": 1.541221893966002e-05, "loss": 2.2141, "step": 9898 }, { "epoch": 0.33, "grad_norm": 0.7492038011550903, "learning_rate": 1.5411325143874646e-05, "loss": 2.0966, "step": 9899 }, { "epoch": 0.33, "grad_norm": 0.7224010825157166, "learning_rate": 1.5410431286955293e-05, "loss": 2.1006, "step": 9900 }, { "epoch": 0.33, "grad_norm": 0.7136358022689819, "learning_rate": 1.540953736891206e-05, "loss": 2.1388, "step": 9901 }, { "epoch": 0.33, "grad_norm": 0.7419625520706177, "learning_rate": 1.5408643389755043e-05, "loss": 2.1652, "step": 9902 }, { "epoch": 0.33, "grad_norm": 0.7292348742485046, "learning_rate": 1.5407749349494338e-05, "loss": 2.1878, "step": 9903 }, { "epoch": 0.33, "grad_norm": 0.6981943249702454, "learning_rate": 1.5406855248140057e-05, "loss": 2.0896, "step": 9904 }, { "epoch": 0.33, "grad_norm": 0.7167201042175293, "learning_rate": 1.540596108570229e-05, "loss": 2.1466, "step": 9905 }, { "epoch": 0.33, "grad_norm": 0.7755454778671265, "learning_rate": 1.5405066862191144e-05, "loss": 2.1449, "step": 9906 }, { "epoch": 0.33, "grad_norm": 0.7494082450866699, "learning_rate": 1.540417257761672e-05, "loss": 2.0588, "step": 9907 }, { "epoch": 0.33, "grad_norm": 0.7106969952583313, "learning_rate": 1.5403278231989123e-05, "loss": 2.0879, "step": 9908 }, { "epoch": 0.33, "grad_norm": 0.7271022200584412, "learning_rate": 1.540238382531845e-05, "loss": 2.0686, "step": 9909 }, { "epoch": 0.33, "grad_norm": 0.7402494549751282, "learning_rate": 1.5401489357614815e-05, "loss": 2.0353, "step": 9910 }, { "epoch": 0.33, "grad_norm": 0.7261745929718018, "learning_rate": 1.540059482888832e-05, "loss": 2.1017, "step": 9911 }, { "epoch": 0.33, "grad_norm": 0.7259093523025513, "learning_rate": 1.5399700239149067e-05, "loss": 2.1564, "step": 9912 }, { "epoch": 0.33, "grad_norm": 0.7282025218009949, "learning_rate": 1.5398805588407167e-05, "loss": 2.188, "step": 9913 }, { "epoch": 0.33, "grad_norm": 0.7093106508255005, "learning_rate": 1.5397910876672725e-05, "loss": 2.0741, "step": 9914 }, { "epoch": 0.33, "grad_norm": 0.7277908325195312, "learning_rate": 1.5397016103955848e-05, "loss": 2.0758, "step": 9915 }, { "epoch": 0.33, "grad_norm": 0.7237734198570251, "learning_rate": 1.539612127026665e-05, "loss": 2.1128, "step": 9916 }, { "epoch": 0.33, "grad_norm": 0.7731038331985474, "learning_rate": 1.539522637561523e-05, "loss": 2.1854, "step": 9917 }, { "epoch": 0.33, "grad_norm": 0.6891342997550964, "learning_rate": 1.5394331420011706e-05, "loss": 2.0909, "step": 9918 }, { "epoch": 0.33, "grad_norm": 0.7118052244186401, "learning_rate": 1.539343640346619e-05, "loss": 2.0269, "step": 9919 }, { "epoch": 0.33, "grad_norm": 0.7081072926521301, "learning_rate": 1.539254132598879e-05, "loss": 2.1136, "step": 9920 }, { "epoch": 0.33, "grad_norm": 0.7083466053009033, "learning_rate": 1.5391646187589618e-05, "loss": 2.1611, "step": 9921 }, { "epoch": 0.33, "grad_norm": 0.7518587708473206, "learning_rate": 1.539075098827879e-05, "loss": 2.1501, "step": 9922 }, { "epoch": 0.33, "grad_norm": 0.7448289394378662, "learning_rate": 1.5389855728066408e-05, "loss": 2.1538, "step": 9923 }, { "epoch": 0.33, "grad_norm": 0.7354655265808105, "learning_rate": 1.5388960406962602e-05, "loss": 2.1285, "step": 9924 }, { "epoch": 0.33, "grad_norm": 0.7223306894302368, "learning_rate": 1.5388065024977477e-05, "loss": 2.1777, "step": 9925 }, { "epoch": 0.33, "grad_norm": 0.7092664837837219, "learning_rate": 1.5387169582121153e-05, "loss": 2.1183, "step": 9926 }, { "epoch": 0.33, "grad_norm": 0.7027783989906311, "learning_rate": 1.5386274078403742e-05, "loss": 2.2018, "step": 9927 }, { "epoch": 0.33, "grad_norm": 0.7217532992362976, "learning_rate": 1.5385378513835366e-05, "loss": 2.1276, "step": 9928 }, { "epoch": 0.33, "grad_norm": 0.7172870635986328, "learning_rate": 1.5384482888426135e-05, "loss": 2.0906, "step": 9929 }, { "epoch": 0.33, "grad_norm": 0.751054584980011, "learning_rate": 1.5383587202186176e-05, "loss": 2.0431, "step": 9930 }, { "epoch": 0.33, "grad_norm": 0.7331206798553467, "learning_rate": 1.53826914551256e-05, "loss": 2.171, "step": 9931 }, { "epoch": 0.33, "grad_norm": 0.7286101579666138, "learning_rate": 1.5381795647254537e-05, "loss": 2.1675, "step": 9932 }, { "epoch": 0.33, "grad_norm": 0.7480999231338501, "learning_rate": 1.5380899778583094e-05, "loss": 2.0751, "step": 9933 }, { "epoch": 0.33, "grad_norm": 0.7488501071929932, "learning_rate": 1.5380003849121402e-05, "loss": 2.1048, "step": 9934 }, { "epoch": 0.33, "grad_norm": 0.7640261650085449, "learning_rate": 1.537910785887958e-05, "loss": 2.0773, "step": 9935 }, { "epoch": 0.33, "grad_norm": 0.732803463935852, "learning_rate": 1.537821180786775e-05, "loss": 2.1771, "step": 9936 }, { "epoch": 0.33, "grad_norm": 0.7260231971740723, "learning_rate": 1.5377315696096034e-05, "loss": 2.191, "step": 9937 }, { "epoch": 0.33, "grad_norm": 0.7418771982192993, "learning_rate": 1.5376419523574554e-05, "loss": 2.1695, "step": 9938 }, { "epoch": 0.33, "grad_norm": 0.7330632209777832, "learning_rate": 1.5375523290313443e-05, "loss": 2.1246, "step": 9939 }, { "epoch": 0.33, "grad_norm": 0.7042512893676758, "learning_rate": 1.5374626996322817e-05, "loss": 2.1056, "step": 9940 }, { "epoch": 0.33, "grad_norm": 0.7159243822097778, "learning_rate": 1.5373730641612804e-05, "loss": 2.1369, "step": 9941 }, { "epoch": 0.33, "grad_norm": 0.705363392829895, "learning_rate": 1.537283422619353e-05, "loss": 2.1225, "step": 9942 }, { "epoch": 0.33, "grad_norm": 0.8177266716957092, "learning_rate": 1.5371937750075128e-05, "loss": 2.0374, "step": 9943 }, { "epoch": 0.33, "grad_norm": 0.7357922792434692, "learning_rate": 1.5371041213267722e-05, "loss": 2.1691, "step": 9944 }, { "epoch": 0.33, "grad_norm": 0.7095556855201721, "learning_rate": 1.5370144615781434e-05, "loss": 2.1771, "step": 9945 }, { "epoch": 0.33, "grad_norm": 0.7182725667953491, "learning_rate": 1.5369247957626408e-05, "loss": 2.0836, "step": 9946 }, { "epoch": 0.33, "grad_norm": 0.7115985751152039, "learning_rate": 1.536835123881276e-05, "loss": 2.0627, "step": 9947 }, { "epoch": 0.33, "grad_norm": 0.7677529454231262, "learning_rate": 1.5367454459350625e-05, "loss": 2.1948, "step": 9948 }, { "epoch": 0.33, "grad_norm": 0.7491755485534668, "learning_rate": 1.5366557619250137e-05, "loss": 2.1202, "step": 9949 }, { "epoch": 0.33, "grad_norm": 0.7151673436164856, "learning_rate": 1.5365660718521425e-05, "loss": 2.1107, "step": 9950 }, { "epoch": 0.33, "grad_norm": 0.7090498805046082, "learning_rate": 1.5364763757174625e-05, "loss": 2.1193, "step": 9951 }, { "epoch": 0.33, "grad_norm": 0.7554393410682678, "learning_rate": 1.5363866735219866e-05, "loss": 2.1116, "step": 9952 }, { "epoch": 0.33, "grad_norm": 0.7136741280555725, "learning_rate": 1.5362969652667286e-05, "loss": 2.1138, "step": 9953 }, { "epoch": 0.33, "grad_norm": 0.7398972511291504, "learning_rate": 1.5362072509527015e-05, "loss": 2.1595, "step": 9954 }, { "epoch": 0.33, "grad_norm": 0.7205328941345215, "learning_rate": 1.5361175305809194e-05, "loss": 2.1002, "step": 9955 }, { "epoch": 0.33, "grad_norm": 0.7061589360237122, "learning_rate": 1.5360278041523953e-05, "loss": 2.1428, "step": 9956 }, { "epoch": 0.33, "grad_norm": 0.703381359577179, "learning_rate": 1.5359380716681437e-05, "loss": 2.111, "step": 9957 }, { "epoch": 0.33, "grad_norm": 0.7750102281570435, "learning_rate": 1.5358483331291776e-05, "loss": 2.111, "step": 9958 }, { "epoch": 0.33, "grad_norm": 0.7327236533164978, "learning_rate": 1.535758588536511e-05, "loss": 2.0984, "step": 9959 }, { "epoch": 0.33, "grad_norm": 0.7597827911376953, "learning_rate": 1.535668837891158e-05, "loss": 2.1211, "step": 9960 }, { "epoch": 0.33, "grad_norm": 0.7324711084365845, "learning_rate": 1.535579081194132e-05, "loss": 2.1065, "step": 9961 }, { "epoch": 0.33, "grad_norm": 0.7440337538719177, "learning_rate": 1.5354893184464482e-05, "loss": 2.1782, "step": 9962 }, { "epoch": 0.33, "grad_norm": 0.7318512201309204, "learning_rate": 1.5353995496491193e-05, "loss": 2.1907, "step": 9963 }, { "epoch": 0.33, "grad_norm": 0.6927915215492249, "learning_rate": 1.5353097748031603e-05, "loss": 2.1267, "step": 9964 }, { "epoch": 0.33, "grad_norm": 0.7288481593132019, "learning_rate": 1.535219993909585e-05, "loss": 2.0988, "step": 9965 }, { "epoch": 0.33, "grad_norm": 0.7203844785690308, "learning_rate": 1.535130206969408e-05, "loss": 2.0986, "step": 9966 }, { "epoch": 0.33, "grad_norm": 0.7276561260223389, "learning_rate": 1.5350404139836434e-05, "loss": 2.1159, "step": 9967 }, { "epoch": 0.33, "grad_norm": 0.7588036060333252, "learning_rate": 1.534950614953306e-05, "loss": 2.1008, "step": 9968 }, { "epoch": 0.33, "grad_norm": 0.7034308910369873, "learning_rate": 1.5348608098794097e-05, "loss": 2.0805, "step": 9969 }, { "epoch": 0.33, "grad_norm": 0.7161398530006409, "learning_rate": 1.53477099876297e-05, "loss": 2.0332, "step": 9970 }, { "epoch": 0.33, "grad_norm": 0.7189461588859558, "learning_rate": 1.5346811816050004e-05, "loss": 2.0768, "step": 9971 }, { "epoch": 0.33, "grad_norm": 0.7164156436920166, "learning_rate": 1.5345913584065166e-05, "loss": 2.1495, "step": 9972 }, { "epoch": 0.33, "grad_norm": 0.77699875831604, "learning_rate": 1.5345015291685327e-05, "loss": 2.2366, "step": 9973 }, { "epoch": 0.33, "grad_norm": 0.7561855912208557, "learning_rate": 1.5344116938920638e-05, "loss": 2.1528, "step": 9974 }, { "epoch": 0.33, "grad_norm": 0.7107703685760498, "learning_rate": 1.5343218525781247e-05, "loss": 2.1251, "step": 9975 }, { "epoch": 0.33, "grad_norm": 0.7027930617332458, "learning_rate": 1.5342320052277307e-05, "loss": 2.0786, "step": 9976 }, { "epoch": 0.33, "grad_norm": 0.6941022872924805, "learning_rate": 1.5341421518418963e-05, "loss": 2.0993, "step": 9977 }, { "epoch": 0.33, "grad_norm": 0.7118350863456726, "learning_rate": 1.534052292421637e-05, "loss": 2.0717, "step": 9978 }, { "epoch": 0.33, "grad_norm": 0.7233568429946899, "learning_rate": 1.533962426967968e-05, "loss": 2.1068, "step": 9979 }, { "epoch": 0.33, "grad_norm": 0.7118363380432129, "learning_rate": 1.5338725554819043e-05, "loss": 2.0943, "step": 9980 }, { "epoch": 0.33, "grad_norm": 0.7006356120109558, "learning_rate": 1.5337826779644617e-05, "loss": 2.0703, "step": 9981 }, { "epoch": 0.33, "grad_norm": 0.7241905331611633, "learning_rate": 1.5336927944166548e-05, "loss": 2.162, "step": 9982 }, { "epoch": 0.33, "grad_norm": 0.7173171043395996, "learning_rate": 1.5336029048394997e-05, "loss": 2.1389, "step": 9983 }, { "epoch": 0.33, "grad_norm": 0.7141885757446289, "learning_rate": 1.5335130092340117e-05, "loss": 2.1684, "step": 9984 }, { "epoch": 0.33, "grad_norm": 0.7098783254623413, "learning_rate": 1.5334231076012064e-05, "loss": 2.1473, "step": 9985 }, { "epoch": 0.33, "grad_norm": 0.7285926342010498, "learning_rate": 1.533333199942099e-05, "loss": 2.0833, "step": 9986 }, { "epoch": 0.33, "grad_norm": 0.7388181090354919, "learning_rate": 1.5332432862577062e-05, "loss": 2.1173, "step": 9987 }, { "epoch": 0.33, "grad_norm": 0.7213814854621887, "learning_rate": 1.533153366549043e-05, "loss": 2.1227, "step": 9988 }, { "epoch": 0.33, "grad_norm": 0.7296930551528931, "learning_rate": 1.5330634408171257e-05, "loss": 2.0856, "step": 9989 }, { "epoch": 0.33, "grad_norm": 0.7041009068489075, "learning_rate": 1.5329735090629702e-05, "loss": 2.1039, "step": 9990 }, { "epoch": 0.33, "grad_norm": 0.7245975732803345, "learning_rate": 1.532883571287592e-05, "loss": 2.1567, "step": 9991 }, { "epoch": 0.33, "grad_norm": 0.7275087833404541, "learning_rate": 1.5327936274920075e-05, "loss": 2.1653, "step": 9992 }, { "epoch": 0.33, "grad_norm": 0.7535735368728638, "learning_rate": 1.532703677677233e-05, "loss": 2.14, "step": 9993 }, { "epoch": 0.33, "grad_norm": 0.722087562084198, "learning_rate": 1.5326137218442845e-05, "loss": 2.1575, "step": 9994 }, { "epoch": 0.33, "grad_norm": 0.7065912485122681, "learning_rate": 1.5325237599941786e-05, "loss": 2.1354, "step": 9995 }, { "epoch": 0.33, "grad_norm": 0.6966012120246887, "learning_rate": 1.5324337921279308e-05, "loss": 2.1188, "step": 9996 }, { "epoch": 0.33, "grad_norm": 0.6988899111747742, "learning_rate": 1.5323438182465585e-05, "loss": 2.1296, "step": 9997 }, { "epoch": 0.33, "grad_norm": 0.7006117701530457, "learning_rate": 1.5322538383510774e-05, "loss": 2.0668, "step": 9998 }, { "epoch": 0.33, "grad_norm": 0.7583548426628113, "learning_rate": 1.5321638524425047e-05, "loss": 2.1825, "step": 9999 }, { "epoch": 0.33, "grad_norm": 0.7304947972297668, "learning_rate": 1.5320738605218564e-05, "loss": 2.0628, "step": 10000 }, { "epoch": 0.33, "grad_norm": 0.717784583568573, "learning_rate": 1.5319838625901497e-05, "loss": 2.0766, "step": 10001 }, { "epoch": 0.33, "grad_norm": 0.7098894119262695, "learning_rate": 1.5318938586484007e-05, "loss": 2.1641, "step": 10002 }, { "epoch": 0.33, "grad_norm": 0.7608881592750549, "learning_rate": 1.531803848697627e-05, "loss": 2.1629, "step": 10003 }, { "epoch": 0.33, "grad_norm": 0.7275397777557373, "learning_rate": 1.531713832738845e-05, "loss": 2.1159, "step": 10004 }, { "epoch": 0.33, "grad_norm": 0.7441413998603821, "learning_rate": 1.5316238107730717e-05, "loss": 2.1733, "step": 10005 }, { "epoch": 0.33, "grad_norm": 0.7365608811378479, "learning_rate": 1.5315337828013243e-05, "loss": 2.136, "step": 10006 }, { "epoch": 0.33, "grad_norm": 0.7113382816314697, "learning_rate": 1.531443748824619e-05, "loss": 2.1169, "step": 10007 }, { "epoch": 0.33, "grad_norm": 0.7332295179367065, "learning_rate": 1.5313537088439746e-05, "loss": 2.1024, "step": 10008 }, { "epoch": 0.33, "grad_norm": 0.7116252183914185, "learning_rate": 1.531263662860407e-05, "loss": 2.0703, "step": 10009 }, { "epoch": 0.33, "grad_norm": 0.7264032959938049, "learning_rate": 1.5311736108749337e-05, "loss": 2.1368, "step": 10010 }, { "epoch": 0.33, "grad_norm": 0.7282747030258179, "learning_rate": 1.5310835528885727e-05, "loss": 2.0475, "step": 10011 }, { "epoch": 0.33, "grad_norm": 0.7228419780731201, "learning_rate": 1.5309934889023406e-05, "loss": 2.1583, "step": 10012 }, { "epoch": 0.33, "grad_norm": 0.7247503399848938, "learning_rate": 1.5309034189172556e-05, "loss": 2.174, "step": 10013 }, { "epoch": 0.33, "grad_norm": 0.7345393896102905, "learning_rate": 1.5308133429343346e-05, "loss": 2.0899, "step": 10014 }, { "epoch": 0.33, "grad_norm": 0.7341177463531494, "learning_rate": 1.5307232609545958e-05, "loss": 2.1062, "step": 10015 }, { "epoch": 0.33, "grad_norm": 0.7032195329666138, "learning_rate": 1.530633172979056e-05, "loss": 2.0295, "step": 10016 }, { "epoch": 0.33, "grad_norm": 0.693708598613739, "learning_rate": 1.5305430790087345e-05, "loss": 2.0597, "step": 10017 }, { "epoch": 0.33, "grad_norm": 0.7378243803977966, "learning_rate": 1.5304529790446476e-05, "loss": 2.1099, "step": 10018 }, { "epoch": 0.33, "grad_norm": 0.7657470703125, "learning_rate": 1.530362873087814e-05, "loss": 2.14, "step": 10019 }, { "epoch": 0.33, "grad_norm": 0.7148919701576233, "learning_rate": 1.5302727611392517e-05, "loss": 2.0718, "step": 10020 }, { "epoch": 0.33, "grad_norm": 0.6995805501937866, "learning_rate": 1.530182643199978e-05, "loss": 2.1045, "step": 10021 }, { "epoch": 0.33, "grad_norm": 0.7176634669303894, "learning_rate": 1.530092519271012e-05, "loss": 2.0682, "step": 10022 }, { "epoch": 0.33, "grad_norm": 0.7703898549079895, "learning_rate": 1.530002389353371e-05, "loss": 2.1773, "step": 10023 }, { "epoch": 0.33, "grad_norm": 0.7362306118011475, "learning_rate": 1.5299122534480738e-05, "loss": 2.287, "step": 10024 }, { "epoch": 0.33, "grad_norm": 0.7134345173835754, "learning_rate": 1.5298221115561385e-05, "loss": 2.1785, "step": 10025 }, { "epoch": 0.33, "grad_norm": 0.7660397291183472, "learning_rate": 1.529731963678584e-05, "loss": 2.1399, "step": 10026 }, { "epoch": 0.33, "grad_norm": 0.7364994883537292, "learning_rate": 1.5296418098164275e-05, "loss": 2.2059, "step": 10027 }, { "epoch": 0.33, "grad_norm": 0.7345845103263855, "learning_rate": 1.5295516499706887e-05, "loss": 2.1169, "step": 10028 }, { "epoch": 0.33, "grad_norm": 0.7142937779426575, "learning_rate": 1.5294614841423854e-05, "loss": 2.0992, "step": 10029 }, { "epoch": 0.33, "grad_norm": 0.6972905993461609, "learning_rate": 1.5293713123325366e-05, "loss": 2.0863, "step": 10030 }, { "epoch": 0.33, "grad_norm": 0.7371915578842163, "learning_rate": 1.529281134542161e-05, "loss": 2.2024, "step": 10031 }, { "epoch": 0.33, "grad_norm": 0.7531124353408813, "learning_rate": 1.5291909507722773e-05, "loss": 2.1074, "step": 10032 }, { "epoch": 0.33, "grad_norm": 0.7074714303016663, "learning_rate": 1.5291007610239045e-05, "loss": 2.1263, "step": 10033 }, { "epoch": 0.33, "grad_norm": 0.7162356376647949, "learning_rate": 1.529010565298061e-05, "loss": 2.1638, "step": 10034 }, { "epoch": 0.33, "grad_norm": 0.7348289489746094, "learning_rate": 1.5289203635957667e-05, "loss": 2.0836, "step": 10035 }, { "epoch": 0.33, "grad_norm": 0.7351411581039429, "learning_rate": 1.5288301559180398e-05, "loss": 2.1134, "step": 10036 }, { "epoch": 0.33, "grad_norm": 0.7018687129020691, "learning_rate": 1.5287399422659e-05, "loss": 2.1286, "step": 10037 }, { "epoch": 0.33, "grad_norm": 0.7249342799186707, "learning_rate": 1.5286497226403655e-05, "loss": 2.1089, "step": 10038 }, { "epoch": 0.33, "grad_norm": 0.6925143003463745, "learning_rate": 1.5285594970424572e-05, "loss": 2.0753, "step": 10039 }, { "epoch": 0.33, "grad_norm": 0.7001458406448364, "learning_rate": 1.528469265473193e-05, "loss": 2.112, "step": 10040 }, { "epoch": 0.33, "grad_norm": 0.7512720227241516, "learning_rate": 1.5283790279335925e-05, "loss": 2.0943, "step": 10041 }, { "epoch": 0.33, "grad_norm": 0.7175402045249939, "learning_rate": 1.5282887844246758e-05, "loss": 2.1508, "step": 10042 }, { "epoch": 0.33, "grad_norm": 0.7568737268447876, "learning_rate": 1.5281985349474616e-05, "loss": 2.171, "step": 10043 }, { "epoch": 0.33, "grad_norm": 0.7141623497009277, "learning_rate": 1.5281082795029704e-05, "loss": 2.1291, "step": 10044 }, { "epoch": 0.33, "grad_norm": 0.7126940488815308, "learning_rate": 1.528018018092221e-05, "loss": 2.0879, "step": 10045 }, { "epoch": 0.33, "grad_norm": 0.7380043864250183, "learning_rate": 1.5279277507162337e-05, "loss": 2.1142, "step": 10046 }, { "epoch": 0.33, "grad_norm": 0.7129105925559998, "learning_rate": 1.527837477376028e-05, "loss": 2.1489, "step": 10047 }, { "epoch": 0.33, "grad_norm": 0.7440321445465088, "learning_rate": 1.527747198072624e-05, "loss": 2.1598, "step": 10048 }, { "epoch": 0.33, "grad_norm": 0.7014553546905518, "learning_rate": 1.527656912807041e-05, "loss": 2.1405, "step": 10049 }, { "epoch": 0.33, "grad_norm": 0.7701312303543091, "learning_rate": 1.5275666215803e-05, "loss": 2.1116, "step": 10050 }, { "epoch": 0.33, "grad_norm": 0.7119162678718567, "learning_rate": 1.5274763243934203e-05, "loss": 2.113, "step": 10051 }, { "epoch": 0.33, "grad_norm": 0.7237569689750671, "learning_rate": 1.527386021247422e-05, "loss": 2.0938, "step": 10052 }, { "epoch": 0.33, "grad_norm": 0.7306511998176575, "learning_rate": 1.527295712143326e-05, "loss": 2.0881, "step": 10053 }, { "epoch": 0.33, "grad_norm": 0.7547991871833801, "learning_rate": 1.527205397082151e-05, "loss": 2.1578, "step": 10054 }, { "epoch": 0.33, "grad_norm": 0.7612103223800659, "learning_rate": 1.5271150760649197e-05, "loss": 2.1034, "step": 10055 }, { "epoch": 0.33, "grad_norm": 0.7557827830314636, "learning_rate": 1.5270247490926503e-05, "loss": 2.0853, "step": 10056 }, { "epoch": 0.33, "grad_norm": 0.7160493731498718, "learning_rate": 1.5269344161663644e-05, "loss": 2.199, "step": 10057 }, { "epoch": 0.33, "grad_norm": 0.7116184830665588, "learning_rate": 1.5268440772870822e-05, "loss": 2.1476, "step": 10058 }, { "epoch": 0.33, "grad_norm": 0.7349880933761597, "learning_rate": 1.5267537324558248e-05, "loss": 2.1842, "step": 10059 }, { "epoch": 0.33, "grad_norm": 0.7381742596626282, "learning_rate": 1.526663381673612e-05, "loss": 2.1727, "step": 10060 }, { "epoch": 0.33, "grad_norm": 0.7480927109718323, "learning_rate": 1.5265730249414652e-05, "loss": 1.9984, "step": 10061 }, { "epoch": 0.33, "grad_norm": 0.7254616618156433, "learning_rate": 1.5264826622604047e-05, "loss": 2.1766, "step": 10062 }, { "epoch": 0.33, "grad_norm": 0.769919216632843, "learning_rate": 1.526392293631452e-05, "loss": 2.1294, "step": 10063 }, { "epoch": 0.33, "grad_norm": 0.7038577198982239, "learning_rate": 1.5263019190556275e-05, "loss": 2.061, "step": 10064 }, { "epoch": 0.33, "grad_norm": 0.7674484848976135, "learning_rate": 1.526211538533952e-05, "loss": 2.0606, "step": 10065 }, { "epoch": 0.33, "grad_norm": 0.7276782989501953, "learning_rate": 1.5261211520674475e-05, "loss": 2.0537, "step": 10066 }, { "epoch": 0.33, "grad_norm": 0.693509578704834, "learning_rate": 1.5260307596571342e-05, "loss": 2.0649, "step": 10067 }, { "epoch": 0.33, "grad_norm": 0.7392958402633667, "learning_rate": 1.525940361304034e-05, "loss": 2.1051, "step": 10068 }, { "epoch": 0.33, "grad_norm": 0.7127280235290527, "learning_rate": 1.5258499570091673e-05, "loss": 2.0899, "step": 10069 }, { "epoch": 0.34, "grad_norm": 0.73350590467453, "learning_rate": 1.5257595467735563e-05, "loss": 2.1438, "step": 10070 }, { "epoch": 0.34, "grad_norm": 0.6800039410591125, "learning_rate": 1.525669130598222e-05, "loss": 2.1154, "step": 10071 }, { "epoch": 0.34, "grad_norm": 0.7113328576087952, "learning_rate": 1.5255787084841863e-05, "loss": 2.0811, "step": 10072 }, { "epoch": 0.34, "grad_norm": 0.7271084189414978, "learning_rate": 1.5254882804324698e-05, "loss": 2.1298, "step": 10073 }, { "epoch": 0.34, "grad_norm": 0.7235487699508667, "learning_rate": 1.525397846444095e-05, "loss": 2.1552, "step": 10074 }, { "epoch": 0.34, "grad_norm": 0.7181670069694519, "learning_rate": 1.5253074065200832e-05, "loss": 2.1006, "step": 10075 }, { "epoch": 0.34, "grad_norm": 0.7539389729499817, "learning_rate": 1.525216960661456e-05, "loss": 2.199, "step": 10076 }, { "epoch": 0.34, "grad_norm": 0.715266227722168, "learning_rate": 1.5251265088692356e-05, "loss": 2.1355, "step": 10077 }, { "epoch": 0.34, "grad_norm": 0.6826164722442627, "learning_rate": 1.5250360511444436e-05, "loss": 2.0438, "step": 10078 }, { "epoch": 0.34, "grad_norm": 0.7137704491615295, "learning_rate": 1.5249455874881021e-05, "loss": 2.1742, "step": 10079 }, { "epoch": 0.34, "grad_norm": 0.6959431171417236, "learning_rate": 1.5248551179012327e-05, "loss": 2.1222, "step": 10080 }, { "epoch": 0.34, "grad_norm": 0.7458562850952148, "learning_rate": 1.524764642384858e-05, "loss": 2.1365, "step": 10081 }, { "epoch": 0.34, "grad_norm": 0.7086814641952515, "learning_rate": 1.5246741609399998e-05, "loss": 2.1047, "step": 10082 }, { "epoch": 0.34, "grad_norm": 0.7142043709754944, "learning_rate": 1.5245836735676806e-05, "loss": 2.0732, "step": 10083 }, { "epoch": 0.34, "grad_norm": 0.7026371955871582, "learning_rate": 1.5244931802689223e-05, "loss": 2.12, "step": 10084 }, { "epoch": 0.34, "grad_norm": 0.7161905169487, "learning_rate": 1.5244026810447476e-05, "loss": 2.151, "step": 10085 }, { "epoch": 0.34, "grad_norm": 0.7201579213142395, "learning_rate": 1.5243121758961787e-05, "loss": 2.0986, "step": 10086 }, { "epoch": 0.34, "grad_norm": 0.7146960496902466, "learning_rate": 1.5242216648242378e-05, "loss": 2.0986, "step": 10087 }, { "epoch": 0.34, "grad_norm": 0.7470773458480835, "learning_rate": 1.5241311478299482e-05, "loss": 2.0586, "step": 10088 }, { "epoch": 0.34, "grad_norm": 0.7542617917060852, "learning_rate": 1.5240406249143319e-05, "loss": 2.1768, "step": 10089 }, { "epoch": 0.34, "grad_norm": 0.7179585099220276, "learning_rate": 1.5239500960784118e-05, "loss": 2.1645, "step": 10090 }, { "epoch": 0.34, "grad_norm": 0.7630740404129028, "learning_rate": 1.5238595613232106e-05, "loss": 2.1987, "step": 10091 }, { "epoch": 0.34, "grad_norm": 0.7415053844451904, "learning_rate": 1.5237690206497509e-05, "loss": 2.1573, "step": 10092 }, { "epoch": 0.34, "grad_norm": 0.7079194188117981, "learning_rate": 1.5236784740590558e-05, "loss": 2.1964, "step": 10093 }, { "epoch": 0.34, "grad_norm": 0.7079851031303406, "learning_rate": 1.5235879215521486e-05, "loss": 2.0738, "step": 10094 }, { "epoch": 0.34, "grad_norm": 0.7330510020256042, "learning_rate": 1.5234973631300512e-05, "loss": 2.0947, "step": 10095 }, { "epoch": 0.34, "grad_norm": 0.7014070153236389, "learning_rate": 1.5234067987937878e-05, "loss": 2.1612, "step": 10096 }, { "epoch": 0.34, "grad_norm": 0.72809898853302, "learning_rate": 1.5233162285443813e-05, "loss": 2.096, "step": 10097 }, { "epoch": 0.34, "grad_norm": 0.7351396679878235, "learning_rate": 1.5232256523828542e-05, "loss": 2.1379, "step": 10098 }, { "epoch": 0.34, "grad_norm": 0.7281804084777832, "learning_rate": 1.5231350703102308e-05, "loss": 2.1177, "step": 10099 }, { "epoch": 0.34, "grad_norm": 0.7390543818473816, "learning_rate": 1.523044482327534e-05, "loss": 2.1655, "step": 10100 }, { "epoch": 0.34, "grad_norm": 0.7415804862976074, "learning_rate": 1.5229538884357869e-05, "loss": 2.1151, "step": 10101 }, { "epoch": 0.34, "grad_norm": 0.7407842874526978, "learning_rate": 1.522863288636013e-05, "loss": 2.0566, "step": 10102 }, { "epoch": 0.34, "grad_norm": 0.7362062335014343, "learning_rate": 1.5227726829292367e-05, "loss": 2.0746, "step": 10103 }, { "epoch": 0.34, "grad_norm": 0.7202370762825012, "learning_rate": 1.5226820713164807e-05, "loss": 2.1043, "step": 10104 }, { "epoch": 0.34, "grad_norm": 0.7315031886100769, "learning_rate": 1.5225914537987692e-05, "loss": 2.1437, "step": 10105 }, { "epoch": 0.34, "grad_norm": 0.7333627343177795, "learning_rate": 1.5225008303771254e-05, "loss": 2.1346, "step": 10106 }, { "epoch": 0.34, "grad_norm": 0.7167739272117615, "learning_rate": 1.5224102010525737e-05, "loss": 2.1236, "step": 10107 }, { "epoch": 0.34, "grad_norm": 0.7488238215446472, "learning_rate": 1.5223195658261375e-05, "loss": 2.1451, "step": 10108 }, { "epoch": 0.34, "grad_norm": 0.7118038535118103, "learning_rate": 1.5222289246988409e-05, "loss": 2.1629, "step": 10109 }, { "epoch": 0.34, "grad_norm": 0.7536574006080627, "learning_rate": 1.5221382776717084e-05, "loss": 2.1372, "step": 10110 }, { "epoch": 0.34, "grad_norm": 0.6996644139289856, "learning_rate": 1.5220476247457632e-05, "loss": 2.1374, "step": 10111 }, { "epoch": 0.34, "grad_norm": 0.7120413780212402, "learning_rate": 1.5219569659220299e-05, "loss": 2.171, "step": 10112 }, { "epoch": 0.34, "grad_norm": 0.7156165242195129, "learning_rate": 1.521866301201533e-05, "loss": 2.0741, "step": 10113 }, { "epoch": 0.34, "grad_norm": 0.7156989574432373, "learning_rate": 1.5217756305852962e-05, "loss": 2.1327, "step": 10114 }, { "epoch": 0.34, "grad_norm": 0.731597900390625, "learning_rate": 1.5216849540743442e-05, "loss": 2.2012, "step": 10115 }, { "epoch": 0.34, "grad_norm": 0.7137221693992615, "learning_rate": 1.5215942716697014e-05, "loss": 2.1428, "step": 10116 }, { "epoch": 0.34, "grad_norm": 0.7302429676055908, "learning_rate": 1.5215035833723922e-05, "loss": 2.1457, "step": 10117 }, { "epoch": 0.34, "grad_norm": 0.7069265246391296, "learning_rate": 1.521412889183441e-05, "loss": 2.1095, "step": 10118 }, { "epoch": 0.34, "grad_norm": 0.7193915843963623, "learning_rate": 1.5213221891038727e-05, "loss": 2.13, "step": 10119 }, { "epoch": 0.34, "grad_norm": 0.775750994682312, "learning_rate": 1.5212314831347117e-05, "loss": 2.0978, "step": 10120 }, { "epoch": 0.34, "grad_norm": 0.695470929145813, "learning_rate": 1.5211407712769832e-05, "loss": 2.1209, "step": 10121 }, { "epoch": 0.34, "grad_norm": 0.7211986780166626, "learning_rate": 1.5210500535317114e-05, "loss": 2.0966, "step": 10122 }, { "epoch": 0.34, "grad_norm": 0.7539097666740417, "learning_rate": 1.5209593298999215e-05, "loss": 2.1879, "step": 10123 }, { "epoch": 0.34, "grad_norm": 0.7180412411689758, "learning_rate": 1.5208686003826386e-05, "loss": 2.1687, "step": 10124 }, { "epoch": 0.34, "grad_norm": 0.7558460235595703, "learning_rate": 1.5207778649808871e-05, "loss": 2.0185, "step": 10125 }, { "epoch": 0.34, "grad_norm": 0.7342061400413513, "learning_rate": 1.5206871236956926e-05, "loss": 2.1148, "step": 10126 }, { "epoch": 0.34, "grad_norm": 0.7195091247558594, "learning_rate": 1.5205963765280802e-05, "loss": 2.1366, "step": 10127 }, { "epoch": 0.34, "grad_norm": 0.736455500125885, "learning_rate": 1.520505623479075e-05, "loss": 2.1394, "step": 10128 }, { "epoch": 0.34, "grad_norm": 0.7169816493988037, "learning_rate": 1.5204148645497023e-05, "loss": 2.1111, "step": 10129 }, { "epoch": 0.34, "grad_norm": 0.719291090965271, "learning_rate": 1.5203240997409881e-05, "loss": 2.055, "step": 10130 }, { "epoch": 0.34, "grad_norm": 0.7686059474945068, "learning_rate": 1.5202333290539562e-05, "loss": 2.1627, "step": 10131 }, { "epoch": 0.34, "grad_norm": 0.6843876838684082, "learning_rate": 1.5201425524896336e-05, "loss": 2.0869, "step": 10132 }, { "epoch": 0.34, "grad_norm": 0.6983750462532043, "learning_rate": 1.5200517700490451e-05, "loss": 2.1436, "step": 10133 }, { "epoch": 0.34, "grad_norm": 0.7205191850662231, "learning_rate": 1.5199609817332164e-05, "loss": 2.1617, "step": 10134 }, { "epoch": 0.34, "grad_norm": 0.7300103306770325, "learning_rate": 1.5198701875431734e-05, "loss": 2.1396, "step": 10135 }, { "epoch": 0.34, "grad_norm": 0.7241202592849731, "learning_rate": 1.5197793874799419e-05, "loss": 2.1648, "step": 10136 }, { "epoch": 0.34, "grad_norm": 0.7154372334480286, "learning_rate": 1.519688581544547e-05, "loss": 2.1443, "step": 10137 }, { "epoch": 0.34, "grad_norm": 0.7359454035758972, "learning_rate": 1.5195977697380152e-05, "loss": 2.1352, "step": 10138 }, { "epoch": 0.34, "grad_norm": 0.6980564594268799, "learning_rate": 1.5195069520613724e-05, "loss": 2.1059, "step": 10139 }, { "epoch": 0.34, "grad_norm": 0.7291387915611267, "learning_rate": 1.5194161285156446e-05, "loss": 2.116, "step": 10140 }, { "epoch": 0.34, "grad_norm": 0.7412888407707214, "learning_rate": 1.5193252991018578e-05, "loss": 2.0577, "step": 10141 }, { "epoch": 0.34, "grad_norm": 0.712284505367279, "learning_rate": 1.5192344638210382e-05, "loss": 2.1597, "step": 10142 }, { "epoch": 0.34, "grad_norm": 0.7140550017356873, "learning_rate": 1.5191436226742118e-05, "loss": 2.189, "step": 10143 }, { "epoch": 0.34, "grad_norm": 0.7818500995635986, "learning_rate": 1.5190527756624049e-05, "loss": 2.1182, "step": 10144 }, { "epoch": 0.34, "grad_norm": 0.7108513712882996, "learning_rate": 1.5189619227866442e-05, "loss": 2.1707, "step": 10145 }, { "epoch": 0.34, "grad_norm": 0.7283231019973755, "learning_rate": 1.5188710640479553e-05, "loss": 2.0942, "step": 10146 }, { "epoch": 0.34, "grad_norm": 0.7268214821815491, "learning_rate": 1.5187801994473658e-05, "loss": 2.1274, "step": 10147 }, { "epoch": 0.34, "grad_norm": 0.7127975821495056, "learning_rate": 1.5186893289859012e-05, "loss": 2.1966, "step": 10148 }, { "epoch": 0.34, "grad_norm": 0.7417986392974854, "learning_rate": 1.5185984526645889e-05, "loss": 2.0895, "step": 10149 }, { "epoch": 0.34, "grad_norm": 0.7573621869087219, "learning_rate": 1.518507570484455e-05, "loss": 2.0942, "step": 10150 }, { "epoch": 0.34, "grad_norm": 0.8116528987884521, "learning_rate": 1.5184166824465265e-05, "loss": 2.0429, "step": 10151 }, { "epoch": 0.34, "grad_norm": 0.7381929755210876, "learning_rate": 1.5183257885518304e-05, "loss": 2.0743, "step": 10152 }, { "epoch": 0.34, "grad_norm": 0.7514052987098694, "learning_rate": 1.5182348888013928e-05, "loss": 2.1545, "step": 10153 }, { "epoch": 0.34, "grad_norm": 0.7120821475982666, "learning_rate": 1.5181439831962417e-05, "loss": 2.0973, "step": 10154 }, { "epoch": 0.34, "grad_norm": 0.7056170701980591, "learning_rate": 1.518053071737403e-05, "loss": 2.1477, "step": 10155 }, { "epoch": 0.34, "grad_norm": 0.7355800271034241, "learning_rate": 1.5179621544259045e-05, "loss": 2.1324, "step": 10156 }, { "epoch": 0.34, "grad_norm": 0.7193593382835388, "learning_rate": 1.5178712312627732e-05, "loss": 2.0879, "step": 10157 }, { "epoch": 0.34, "grad_norm": 0.7022790312767029, "learning_rate": 1.517780302249036e-05, "loss": 2.0873, "step": 10158 }, { "epoch": 0.34, "grad_norm": 0.7303242087364197, "learning_rate": 1.517689367385721e-05, "loss": 2.1087, "step": 10159 }, { "epoch": 0.34, "grad_norm": 0.7229496240615845, "learning_rate": 1.5175984266738544e-05, "loss": 2.0533, "step": 10160 }, { "epoch": 0.34, "grad_norm": 0.7070238590240479, "learning_rate": 1.5175074801144643e-05, "loss": 2.0632, "step": 10161 }, { "epoch": 0.34, "grad_norm": 0.7178279161453247, "learning_rate": 1.5174165277085777e-05, "loss": 2.1639, "step": 10162 }, { "epoch": 0.34, "grad_norm": 0.7114453315734863, "learning_rate": 1.5173255694572229e-05, "loss": 2.1478, "step": 10163 }, { "epoch": 0.34, "grad_norm": 0.7243159413337708, "learning_rate": 1.5172346053614266e-05, "loss": 2.1299, "step": 10164 }, { "epoch": 0.34, "grad_norm": 0.7140703201293945, "learning_rate": 1.517143635422217e-05, "loss": 2.1942, "step": 10165 }, { "epoch": 0.34, "grad_norm": 0.7631360292434692, "learning_rate": 1.5170526596406214e-05, "loss": 2.1439, "step": 10166 }, { "epoch": 0.34, "grad_norm": 0.7161300778388977, "learning_rate": 1.5169616780176686e-05, "loss": 2.0916, "step": 10167 }, { "epoch": 0.34, "grad_norm": 0.7268704175949097, "learning_rate": 1.5168706905543853e-05, "loss": 2.0673, "step": 10168 }, { "epoch": 0.34, "grad_norm": 0.7197861075401306, "learning_rate": 1.5167796972517997e-05, "loss": 2.1099, "step": 10169 }, { "epoch": 0.34, "grad_norm": 0.7375487685203552, "learning_rate": 1.5166886981109402e-05, "loss": 2.1715, "step": 10170 }, { "epoch": 0.34, "grad_norm": 0.7384415864944458, "learning_rate": 1.5165976931328343e-05, "loss": 2.1104, "step": 10171 }, { "epoch": 0.34, "grad_norm": 0.7145796418190002, "learning_rate": 1.5165066823185106e-05, "loss": 2.1192, "step": 10172 }, { "epoch": 0.34, "grad_norm": 0.7070500254631042, "learning_rate": 1.516415665668997e-05, "loss": 2.2016, "step": 10173 }, { "epoch": 0.34, "grad_norm": 0.7318699359893799, "learning_rate": 1.5163246431853221e-05, "loss": 2.0483, "step": 10174 }, { "epoch": 0.34, "grad_norm": 0.7059455513954163, "learning_rate": 1.5162336148685137e-05, "loss": 2.0838, "step": 10175 }, { "epoch": 0.34, "grad_norm": 0.7218620181083679, "learning_rate": 1.5161425807196009e-05, "loss": 2.127, "step": 10176 }, { "epoch": 0.34, "grad_norm": 0.7254737615585327, "learning_rate": 1.516051540739611e-05, "loss": 2.1323, "step": 10177 }, { "epoch": 0.34, "grad_norm": 0.7498472929000854, "learning_rate": 1.5159604949295739e-05, "loss": 2.1462, "step": 10178 }, { "epoch": 0.34, "grad_norm": 0.7851585149765015, "learning_rate": 1.5158694432905173e-05, "loss": 2.1289, "step": 10179 }, { "epoch": 0.34, "grad_norm": 0.7177717685699463, "learning_rate": 1.5157783858234698e-05, "loss": 2.1086, "step": 10180 }, { "epoch": 0.34, "grad_norm": 0.7086904644966125, "learning_rate": 1.5156873225294608e-05, "loss": 2.1469, "step": 10181 }, { "epoch": 0.34, "grad_norm": 0.7533708810806274, "learning_rate": 1.5155962534095183e-05, "loss": 2.1736, "step": 10182 }, { "epoch": 0.34, "grad_norm": 0.7106229662895203, "learning_rate": 1.5155051784646717e-05, "loss": 2.1236, "step": 10183 }, { "epoch": 0.34, "grad_norm": 0.7571940422058105, "learning_rate": 1.5154140976959494e-05, "loss": 2.1568, "step": 10184 }, { "epoch": 0.34, "grad_norm": 0.7139760851860046, "learning_rate": 1.515323011104381e-05, "loss": 2.1698, "step": 10185 }, { "epoch": 0.34, "grad_norm": 0.7233843207359314, "learning_rate": 1.5152319186909952e-05, "loss": 2.1466, "step": 10186 }, { "epoch": 0.34, "grad_norm": 0.7144239544868469, "learning_rate": 1.5151408204568212e-05, "loss": 2.1446, "step": 10187 }, { "epoch": 0.34, "grad_norm": 0.7265833616256714, "learning_rate": 1.5150497164028877e-05, "loss": 2.0779, "step": 10188 }, { "epoch": 0.34, "grad_norm": 0.7402101159095764, "learning_rate": 1.5149586065302249e-05, "loss": 2.1751, "step": 10189 }, { "epoch": 0.34, "grad_norm": 0.7012652158737183, "learning_rate": 1.5148674908398616e-05, "loss": 2.0595, "step": 10190 }, { "epoch": 0.34, "grad_norm": 0.7403795123100281, "learning_rate": 1.5147763693328263e-05, "loss": 2.159, "step": 10191 }, { "epoch": 0.34, "grad_norm": 0.7063884735107422, "learning_rate": 1.51468524201015e-05, "loss": 2.0006, "step": 10192 }, { "epoch": 0.34, "grad_norm": 0.7251092195510864, "learning_rate": 1.5145941088728613e-05, "loss": 2.1587, "step": 10193 }, { "epoch": 0.34, "grad_norm": 0.7288483381271362, "learning_rate": 1.51450296992199e-05, "loss": 2.0203, "step": 10194 }, { "epoch": 0.34, "grad_norm": 0.7674341201782227, "learning_rate": 1.5144118251585655e-05, "loss": 2.1435, "step": 10195 }, { "epoch": 0.34, "grad_norm": 0.7153727412223816, "learning_rate": 1.5143206745836179e-05, "loss": 2.0925, "step": 10196 }, { "epoch": 0.34, "grad_norm": 0.7176985740661621, "learning_rate": 1.5142295181981765e-05, "loss": 2.1629, "step": 10197 }, { "epoch": 0.34, "grad_norm": 0.732745349407196, "learning_rate": 1.5141383560032717e-05, "loss": 2.1163, "step": 10198 }, { "epoch": 0.34, "grad_norm": 0.7387077212333679, "learning_rate": 1.5140471879999328e-05, "loss": 2.1483, "step": 10199 }, { "epoch": 0.34, "grad_norm": 0.714963972568512, "learning_rate": 1.5139560141891899e-05, "loss": 2.0191, "step": 10200 }, { "epoch": 0.34, "grad_norm": 0.7100968360900879, "learning_rate": 1.5138648345720737e-05, "loss": 2.0694, "step": 10201 }, { "epoch": 0.34, "grad_norm": 0.7089889049530029, "learning_rate": 1.5137736491496129e-05, "loss": 2.0732, "step": 10202 }, { "epoch": 0.34, "grad_norm": 0.7154504060745239, "learning_rate": 1.5136824579228393e-05, "loss": 2.1097, "step": 10203 }, { "epoch": 0.34, "grad_norm": 0.7036659121513367, "learning_rate": 1.5135912608927821e-05, "loss": 2.1226, "step": 10204 }, { "epoch": 0.34, "grad_norm": 0.7435508370399475, "learning_rate": 1.5135000580604717e-05, "loss": 2.0424, "step": 10205 }, { "epoch": 0.34, "grad_norm": 0.7407211661338806, "learning_rate": 1.5134088494269388e-05, "loss": 2.1433, "step": 10206 }, { "epoch": 0.34, "grad_norm": 0.7177254557609558, "learning_rate": 1.5133176349932133e-05, "loss": 2.0809, "step": 10207 }, { "epoch": 0.34, "grad_norm": 0.7436890006065369, "learning_rate": 1.5132264147603262e-05, "loss": 2.0775, "step": 10208 }, { "epoch": 0.34, "grad_norm": 0.7249823808670044, "learning_rate": 1.513135188729308e-05, "loss": 2.1762, "step": 10209 }, { "epoch": 0.34, "grad_norm": 0.7352937459945679, "learning_rate": 1.5130439569011887e-05, "loss": 2.0668, "step": 10210 }, { "epoch": 0.34, "grad_norm": 0.7332491278648376, "learning_rate": 1.5129527192769998e-05, "loss": 2.1216, "step": 10211 }, { "epoch": 0.34, "grad_norm": 0.8026663661003113, "learning_rate": 1.5128614758577716e-05, "loss": 2.1369, "step": 10212 }, { "epoch": 0.34, "grad_norm": 0.746737003326416, "learning_rate": 1.512770226644535e-05, "loss": 2.0801, "step": 10213 }, { "epoch": 0.34, "grad_norm": 0.7212234139442444, "learning_rate": 1.512678971638321e-05, "loss": 2.0859, "step": 10214 }, { "epoch": 0.34, "grad_norm": 0.7143864035606384, "learning_rate": 1.5125877108401604e-05, "loss": 2.0687, "step": 10215 }, { "epoch": 0.34, "grad_norm": 0.7290762066841125, "learning_rate": 1.5124964442510842e-05, "loss": 2.1229, "step": 10216 }, { "epoch": 0.34, "grad_norm": 0.7154545783996582, "learning_rate": 1.5124051718721236e-05, "loss": 2.0922, "step": 10217 }, { "epoch": 0.34, "grad_norm": 0.6948208808898926, "learning_rate": 1.5123138937043096e-05, "loss": 2.068, "step": 10218 }, { "epoch": 0.34, "grad_norm": 0.7190092206001282, "learning_rate": 1.5122226097486735e-05, "loss": 2.1497, "step": 10219 }, { "epoch": 0.34, "grad_norm": 0.7374728918075562, "learning_rate": 1.5121313200062466e-05, "loss": 2.102, "step": 10220 }, { "epoch": 0.34, "grad_norm": 0.7338109612464905, "learning_rate": 1.5120400244780597e-05, "loss": 2.1218, "step": 10221 }, { "epoch": 0.34, "grad_norm": 0.719439685344696, "learning_rate": 1.511948723165145e-05, "loss": 2.1292, "step": 10222 }, { "epoch": 0.34, "grad_norm": 0.7246389985084534, "learning_rate": 1.511857416068534e-05, "loss": 2.1318, "step": 10223 }, { "epoch": 0.34, "grad_norm": 0.735849142074585, "learning_rate": 1.5117661031892574e-05, "loss": 2.077, "step": 10224 }, { "epoch": 0.34, "grad_norm": 0.7003060579299927, "learning_rate": 1.511674784528348e-05, "loss": 2.0615, "step": 10225 }, { "epoch": 0.34, "grad_norm": 0.7314756512641907, "learning_rate": 1.5115834600868362e-05, "loss": 2.1126, "step": 10226 }, { "epoch": 0.34, "grad_norm": 0.7211246490478516, "learning_rate": 1.5114921298657543e-05, "loss": 2.123, "step": 10227 }, { "epoch": 0.34, "grad_norm": 0.7200993299484253, "learning_rate": 1.5114007938661342e-05, "loss": 2.0411, "step": 10228 }, { "epoch": 0.34, "grad_norm": 0.7090663909912109, "learning_rate": 1.5113094520890075e-05, "loss": 2.0869, "step": 10229 }, { "epoch": 0.34, "grad_norm": 0.7302254438400269, "learning_rate": 1.5112181045354065e-05, "loss": 2.1559, "step": 10230 }, { "epoch": 0.34, "grad_norm": 0.7411405444145203, "learning_rate": 1.5111267512063626e-05, "loss": 2.0763, "step": 10231 }, { "epoch": 0.34, "grad_norm": 0.7253177762031555, "learning_rate": 1.5110353921029087e-05, "loss": 2.1202, "step": 10232 }, { "epoch": 0.34, "grad_norm": 0.7203285098075867, "learning_rate": 1.5109440272260761e-05, "loss": 2.0994, "step": 10233 }, { "epoch": 0.34, "grad_norm": 0.731164276599884, "learning_rate": 1.5108526565768973e-05, "loss": 2.1118, "step": 10234 }, { "epoch": 0.34, "grad_norm": 0.7556511759757996, "learning_rate": 1.5107612801564044e-05, "loss": 2.1148, "step": 10235 }, { "epoch": 0.34, "grad_norm": 0.7315865755081177, "learning_rate": 1.5106698979656304e-05, "loss": 2.1263, "step": 10236 }, { "epoch": 0.34, "grad_norm": 0.730632483959198, "learning_rate": 1.510578510005607e-05, "loss": 2.0696, "step": 10237 }, { "epoch": 0.34, "grad_norm": 0.7701296806335449, "learning_rate": 1.5104871162773668e-05, "loss": 2.0765, "step": 10238 }, { "epoch": 0.34, "grad_norm": 0.6920897960662842, "learning_rate": 1.5103957167819423e-05, "loss": 2.0837, "step": 10239 }, { "epoch": 0.34, "grad_norm": 0.7125385403633118, "learning_rate": 1.5103043115203662e-05, "loss": 2.1019, "step": 10240 }, { "epoch": 0.34, "grad_norm": 0.7424910664558411, "learning_rate": 1.510212900493671e-05, "loss": 2.1277, "step": 10241 }, { "epoch": 0.34, "grad_norm": 0.7422478795051575, "learning_rate": 1.5101214837028892e-05, "loss": 2.158, "step": 10242 }, { "epoch": 0.34, "grad_norm": 0.7804239392280579, "learning_rate": 1.5100300611490539e-05, "loss": 2.0967, "step": 10243 }, { "epoch": 0.34, "grad_norm": 0.7511236667633057, "learning_rate": 1.5099386328331981e-05, "loss": 2.0986, "step": 10244 }, { "epoch": 0.34, "grad_norm": 0.7510626316070557, "learning_rate": 1.5098471987563548e-05, "loss": 2.048, "step": 10245 }, { "epoch": 0.34, "grad_norm": 0.7645931839942932, "learning_rate": 1.5097557589195559e-05, "loss": 2.1135, "step": 10246 }, { "epoch": 0.34, "grad_norm": 0.7372564673423767, "learning_rate": 1.5096643133238359e-05, "loss": 2.1448, "step": 10247 }, { "epoch": 0.34, "grad_norm": 0.7620286345481873, "learning_rate": 1.5095728619702267e-05, "loss": 2.0992, "step": 10248 }, { "epoch": 0.34, "grad_norm": 1.0034486055374146, "learning_rate": 1.509481404859762e-05, "loss": 2.021, "step": 10249 }, { "epoch": 0.34, "grad_norm": 0.7202479243278503, "learning_rate": 1.509389941993475e-05, "loss": 2.1456, "step": 10250 }, { "epoch": 0.34, "grad_norm": 0.7199007868766785, "learning_rate": 1.509298473372399e-05, "loss": 2.1081, "step": 10251 }, { "epoch": 0.34, "grad_norm": 0.7580626606941223, "learning_rate": 1.5092069989975675e-05, "loss": 2.0434, "step": 10252 }, { "epoch": 0.34, "grad_norm": 0.7029133439064026, "learning_rate": 1.5091155188700136e-05, "loss": 2.1337, "step": 10253 }, { "epoch": 0.34, "grad_norm": 0.739310085773468, "learning_rate": 1.5090240329907708e-05, "loss": 2.2218, "step": 10254 }, { "epoch": 0.34, "grad_norm": 0.7283275723457336, "learning_rate": 1.5089325413608729e-05, "loss": 2.1131, "step": 10255 }, { "epoch": 0.34, "grad_norm": 0.7090397477149963, "learning_rate": 1.5088410439813537e-05, "loss": 2.1414, "step": 10256 }, { "epoch": 0.34, "grad_norm": 0.7785203456878662, "learning_rate": 1.5087495408532461e-05, "loss": 2.1726, "step": 10257 }, { "epoch": 0.34, "grad_norm": 0.7300161719322205, "learning_rate": 1.5086580319775848e-05, "loss": 2.0917, "step": 10258 }, { "epoch": 0.34, "grad_norm": 0.7645954489707947, "learning_rate": 1.5085665173554026e-05, "loss": 2.1428, "step": 10259 }, { "epoch": 0.34, "grad_norm": 0.7352378368377686, "learning_rate": 1.5084749969877347e-05, "loss": 2.0755, "step": 10260 }, { "epoch": 0.34, "grad_norm": 0.7733477354049683, "learning_rate": 1.5083834708756136e-05, "loss": 2.0918, "step": 10261 }, { "epoch": 0.34, "grad_norm": 0.7641263008117676, "learning_rate": 1.5082919390200743e-05, "loss": 2.1255, "step": 10262 }, { "epoch": 0.34, "grad_norm": 0.7576525211334229, "learning_rate": 1.5082004014221505e-05, "loss": 2.1548, "step": 10263 }, { "epoch": 0.34, "grad_norm": 0.7089620232582092, "learning_rate": 1.5081088580828765e-05, "loss": 2.0619, "step": 10264 }, { "epoch": 0.34, "grad_norm": 0.7123631238937378, "learning_rate": 1.5080173090032861e-05, "loss": 2.1571, "step": 10265 }, { "epoch": 0.34, "grad_norm": 0.7292981743812561, "learning_rate": 1.5079257541844141e-05, "loss": 2.1569, "step": 10266 }, { "epoch": 0.34, "grad_norm": 0.6959831118583679, "learning_rate": 1.5078341936272947e-05, "loss": 2.1106, "step": 10267 }, { "epoch": 0.34, "grad_norm": 0.7832688689231873, "learning_rate": 1.5077426273329621e-05, "loss": 2.1802, "step": 10268 }, { "epoch": 0.34, "grad_norm": 0.7376165986061096, "learning_rate": 1.5076510553024509e-05, "loss": 2.0988, "step": 10269 }, { "epoch": 0.34, "grad_norm": 0.7325858473777771, "learning_rate": 1.5075594775367951e-05, "loss": 2.08, "step": 10270 }, { "epoch": 0.34, "grad_norm": 0.7741001844406128, "learning_rate": 1.5074678940370304e-05, "loss": 2.1681, "step": 10271 }, { "epoch": 0.34, "grad_norm": 0.7445299625396729, "learning_rate": 1.5073763048041908e-05, "loss": 2.1172, "step": 10272 }, { "epoch": 0.34, "grad_norm": 0.7177271246910095, "learning_rate": 1.5072847098393107e-05, "loss": 2.1041, "step": 10273 }, { "epoch": 0.34, "grad_norm": 0.7253889441490173, "learning_rate": 1.5071931091434256e-05, "loss": 2.1462, "step": 10274 }, { "epoch": 0.34, "grad_norm": 0.7408947348594666, "learning_rate": 1.50710150271757e-05, "loss": 2.1264, "step": 10275 }, { "epoch": 0.34, "grad_norm": 0.7105762362480164, "learning_rate": 1.5070098905627784e-05, "loss": 2.1616, "step": 10276 }, { "epoch": 0.34, "grad_norm": 0.7541555762290955, "learning_rate": 1.5069182726800864e-05, "loss": 2.0972, "step": 10277 }, { "epoch": 0.34, "grad_norm": 0.709560751914978, "learning_rate": 1.506826649070529e-05, "loss": 2.1854, "step": 10278 }, { "epoch": 0.34, "grad_norm": 0.7341075539588928, "learning_rate": 1.5067350197351412e-05, "loss": 2.112, "step": 10279 }, { "epoch": 0.34, "grad_norm": 0.7066092491149902, "learning_rate": 1.506643384674958e-05, "loss": 2.113, "step": 10280 }, { "epoch": 0.34, "grad_norm": 0.732146680355072, "learning_rate": 1.5065517438910147e-05, "loss": 2.0779, "step": 10281 }, { "epoch": 0.34, "grad_norm": 0.7378636002540588, "learning_rate": 1.5064600973843466e-05, "loss": 2.1643, "step": 10282 }, { "epoch": 0.34, "grad_norm": 0.6972429156303406, "learning_rate": 1.5063684451559892e-05, "loss": 2.1123, "step": 10283 }, { "epoch": 0.34, "grad_norm": 0.7225801944732666, "learning_rate": 1.5062767872069776e-05, "loss": 2.1024, "step": 10284 }, { "epoch": 0.34, "grad_norm": 0.7435677647590637, "learning_rate": 1.5061851235383481e-05, "loss": 2.1075, "step": 10285 }, { "epoch": 0.34, "grad_norm": 0.7366815209388733, "learning_rate": 1.5060934541511356e-05, "loss": 2.1467, "step": 10286 }, { "epoch": 0.34, "grad_norm": 0.7169926166534424, "learning_rate": 1.5060017790463758e-05, "loss": 2.1452, "step": 10287 }, { "epoch": 0.34, "grad_norm": 0.7394550442695618, "learning_rate": 1.5059100982251042e-05, "loss": 2.0776, "step": 10288 }, { "epoch": 0.34, "grad_norm": 0.7188639640808105, "learning_rate": 1.505818411688357e-05, "loss": 2.0668, "step": 10289 }, { "epoch": 0.34, "grad_norm": 0.7361927032470703, "learning_rate": 1.5057267194371698e-05, "loss": 2.0387, "step": 10290 }, { "epoch": 0.34, "grad_norm": 0.6998142004013062, "learning_rate": 1.5056350214725788e-05, "loss": 2.0848, "step": 10291 }, { "epoch": 0.34, "grad_norm": 0.734306812286377, "learning_rate": 1.5055433177956191e-05, "loss": 2.1236, "step": 10292 }, { "epoch": 0.34, "grad_norm": 0.7162696719169617, "learning_rate": 1.5054516084073278e-05, "loss": 2.1153, "step": 10293 }, { "epoch": 0.34, "grad_norm": 0.7276742458343506, "learning_rate": 1.5053598933087404e-05, "loss": 2.1398, "step": 10294 }, { "epoch": 0.34, "grad_norm": 0.7033008933067322, "learning_rate": 1.5052681725008924e-05, "loss": 2.0563, "step": 10295 }, { "epoch": 0.34, "grad_norm": 0.7212196588516235, "learning_rate": 1.5051764459848214e-05, "loss": 2.1485, "step": 10296 }, { "epoch": 0.34, "grad_norm": 0.7071161270141602, "learning_rate": 1.5050847137615629e-05, "loss": 2.145, "step": 10297 }, { "epoch": 0.34, "grad_norm": 0.7241649031639099, "learning_rate": 1.504992975832153e-05, "loss": 2.122, "step": 10298 }, { "epoch": 0.34, "grad_norm": 0.73282790184021, "learning_rate": 1.5049012321976285e-05, "loss": 2.1464, "step": 10299 }, { "epoch": 0.34, "grad_norm": 0.6962893009185791, "learning_rate": 1.5048094828590261e-05, "loss": 2.0795, "step": 10300 }, { "epoch": 0.34, "grad_norm": 0.7504282593727112, "learning_rate": 1.5047177278173818e-05, "loss": 2.1502, "step": 10301 }, { "epoch": 0.34, "grad_norm": 0.7163746953010559, "learning_rate": 1.5046259670737327e-05, "loss": 2.0661, "step": 10302 }, { "epoch": 0.34, "grad_norm": 0.7137919068336487, "learning_rate": 1.5045342006291145e-05, "loss": 2.0882, "step": 10303 }, { "epoch": 0.34, "grad_norm": 0.7386662364006042, "learning_rate": 1.5044424284845649e-05, "loss": 2.0709, "step": 10304 }, { "epoch": 0.34, "grad_norm": 0.7412835955619812, "learning_rate": 1.5043506506411208e-05, "loss": 2.0494, "step": 10305 }, { "epoch": 0.34, "grad_norm": 0.7208580374717712, "learning_rate": 1.5042588670998179e-05, "loss": 2.1164, "step": 10306 }, { "epoch": 0.34, "grad_norm": 0.7306938171386719, "learning_rate": 1.5041670778616944e-05, "loss": 2.1731, "step": 10307 }, { "epoch": 0.34, "grad_norm": 0.7092154622077942, "learning_rate": 1.5040752829277865e-05, "loss": 2.0894, "step": 10308 }, { "epoch": 0.34, "grad_norm": 0.7602466344833374, "learning_rate": 1.5039834822991317e-05, "loss": 2.1554, "step": 10309 }, { "epoch": 0.34, "grad_norm": 0.7629290223121643, "learning_rate": 1.5038916759767666e-05, "loss": 2.1429, "step": 10310 }, { "epoch": 0.34, "grad_norm": 0.7038528323173523, "learning_rate": 1.5037998639617286e-05, "loss": 2.0687, "step": 10311 }, { "epoch": 0.34, "grad_norm": 0.7083216905593872, "learning_rate": 1.5037080462550551e-05, "loss": 2.1193, "step": 10312 }, { "epoch": 0.34, "grad_norm": 0.7246485352516174, "learning_rate": 1.5036162228577836e-05, "loss": 2.1635, "step": 10313 }, { "epoch": 0.34, "grad_norm": 0.7515565156936646, "learning_rate": 1.5035243937709507e-05, "loss": 2.0849, "step": 10314 }, { "epoch": 0.34, "grad_norm": 0.7317960262298584, "learning_rate": 1.5034325589955945e-05, "loss": 2.1035, "step": 10315 }, { "epoch": 0.34, "grad_norm": 0.6860764622688293, "learning_rate": 1.5033407185327522e-05, "loss": 2.1132, "step": 10316 }, { "epoch": 0.34, "grad_norm": 0.7260316014289856, "learning_rate": 1.5032488723834613e-05, "loss": 2.1487, "step": 10317 }, { "epoch": 0.34, "grad_norm": 0.7630167007446289, "learning_rate": 1.50315702054876e-05, "loss": 2.1187, "step": 10318 }, { "epoch": 0.34, "grad_norm": 0.7175998687744141, "learning_rate": 1.5030651630296853e-05, "loss": 2.1117, "step": 10319 }, { "epoch": 0.34, "grad_norm": 0.7112496495246887, "learning_rate": 1.5029732998272754e-05, "loss": 2.1334, "step": 10320 }, { "epoch": 0.34, "grad_norm": 0.705259382724762, "learning_rate": 1.5028814309425678e-05, "loss": 2.1495, "step": 10321 }, { "epoch": 0.34, "grad_norm": 0.7591829299926758, "learning_rate": 1.5027895563766004e-05, "loss": 2.0832, "step": 10322 }, { "epoch": 0.34, "grad_norm": 0.7291409969329834, "learning_rate": 1.5026976761304112e-05, "loss": 2.1422, "step": 10323 }, { "epoch": 0.34, "grad_norm": 0.715740978717804, "learning_rate": 1.5026057902050389e-05, "loss": 2.1182, "step": 10324 }, { "epoch": 0.34, "grad_norm": 0.7589407563209534, "learning_rate": 1.5025138986015203e-05, "loss": 2.2218, "step": 10325 }, { "epoch": 0.34, "grad_norm": 0.702508807182312, "learning_rate": 1.5024220013208944e-05, "loss": 2.1066, "step": 10326 }, { "epoch": 0.34, "grad_norm": 0.7366430759429932, "learning_rate": 1.5023300983641991e-05, "loss": 2.1699, "step": 10327 }, { "epoch": 0.34, "grad_norm": 0.714024543762207, "learning_rate": 1.5022381897324726e-05, "loss": 2.1521, "step": 10328 }, { "epoch": 0.34, "grad_norm": 0.7490982413291931, "learning_rate": 1.5021462754267536e-05, "loss": 2.1693, "step": 10329 }, { "epoch": 0.34, "grad_norm": 0.7185238003730774, "learning_rate": 1.5020543554480802e-05, "loss": 2.1678, "step": 10330 }, { "epoch": 0.34, "grad_norm": 0.7415304780006409, "learning_rate": 1.5019624297974908e-05, "loss": 2.1903, "step": 10331 }, { "epoch": 0.34, "grad_norm": 0.714748203754425, "learning_rate": 1.5018704984760244e-05, "loss": 2.1323, "step": 10332 }, { "epoch": 0.34, "grad_norm": 0.7412862777709961, "learning_rate": 1.5017785614847189e-05, "loss": 2.1295, "step": 10333 }, { "epoch": 0.34, "grad_norm": 0.729502260684967, "learning_rate": 1.5016866188246132e-05, "loss": 2.1134, "step": 10334 }, { "epoch": 0.34, "grad_norm": 0.7605142593383789, "learning_rate": 1.5015946704967465e-05, "loss": 2.1107, "step": 10335 }, { "epoch": 0.34, "grad_norm": 0.7379709482192993, "learning_rate": 1.5015027165021568e-05, "loss": 2.1928, "step": 10336 }, { "epoch": 0.34, "grad_norm": 0.7751691341400146, "learning_rate": 1.5014107568418834e-05, "loss": 2.1, "step": 10337 }, { "epoch": 0.34, "grad_norm": 0.7118521928787231, "learning_rate": 1.5013187915169653e-05, "loss": 2.084, "step": 10338 }, { "epoch": 0.34, "grad_norm": 0.6936752796173096, "learning_rate": 1.5012268205284412e-05, "loss": 2.1154, "step": 10339 }, { "epoch": 0.34, "grad_norm": 0.7451189756393433, "learning_rate": 1.5011348438773503e-05, "loss": 2.1776, "step": 10340 }, { "epoch": 0.34, "grad_norm": 0.7534626722335815, "learning_rate": 1.5010428615647317e-05, "loss": 2.1622, "step": 10341 }, { "epoch": 0.34, "grad_norm": 0.726686418056488, "learning_rate": 1.5009508735916242e-05, "loss": 2.1138, "step": 10342 }, { "epoch": 0.34, "grad_norm": 0.7750912308692932, "learning_rate": 1.5008588799590675e-05, "loss": 2.0848, "step": 10343 }, { "epoch": 0.34, "grad_norm": 0.7680246829986572, "learning_rate": 1.5007668806681009e-05, "loss": 2.0902, "step": 10344 }, { "epoch": 0.34, "grad_norm": 0.7062492370605469, "learning_rate": 1.5006748757197632e-05, "loss": 2.0987, "step": 10345 }, { "epoch": 0.34, "grad_norm": 0.7390596866607666, "learning_rate": 1.5005828651150942e-05, "loss": 2.053, "step": 10346 }, { "epoch": 0.34, "grad_norm": 0.7019400000572205, "learning_rate": 1.5004908488551336e-05, "loss": 2.1106, "step": 10347 }, { "epoch": 0.34, "grad_norm": 0.7123686671257019, "learning_rate": 1.5003988269409205e-05, "loss": 2.1459, "step": 10348 }, { "epoch": 0.34, "grad_norm": 0.7197229862213135, "learning_rate": 1.5003067993734947e-05, "loss": 2.1655, "step": 10349 }, { "epoch": 0.34, "grad_norm": 0.7065096497535706, "learning_rate": 1.500214766153896e-05, "loss": 2.1101, "step": 10350 }, { "epoch": 0.34, "grad_norm": 0.6985183358192444, "learning_rate": 1.5001227272831642e-05, "loss": 2.1329, "step": 10351 }, { "epoch": 0.34, "grad_norm": 0.7091954946517944, "learning_rate": 1.5000306827623386e-05, "loss": 2.0944, "step": 10352 }, { "epoch": 0.34, "grad_norm": 0.7143648862838745, "learning_rate": 1.4999386325924596e-05, "loss": 2.0991, "step": 10353 }, { "epoch": 0.34, "grad_norm": 0.7221906781196594, "learning_rate": 1.4998465767745667e-05, "loss": 2.125, "step": 10354 }, { "epoch": 0.34, "grad_norm": 0.6930164098739624, "learning_rate": 1.4997545153097004e-05, "loss": 2.0814, "step": 10355 }, { "epoch": 0.34, "grad_norm": 0.7225677967071533, "learning_rate": 1.4996624481989001e-05, "loss": 2.0677, "step": 10356 }, { "epoch": 0.34, "grad_norm": 0.7153261303901672, "learning_rate": 1.4995703754432067e-05, "loss": 2.0675, "step": 10357 }, { "epoch": 0.34, "grad_norm": 0.6866571307182312, "learning_rate": 1.49947829704366e-05, "loss": 2.0981, "step": 10358 }, { "epoch": 0.34, "grad_norm": 0.7465630769729614, "learning_rate": 1.4993862130012998e-05, "loss": 2.1825, "step": 10359 }, { "epoch": 0.34, "grad_norm": 0.7296932339668274, "learning_rate": 1.4992941233171669e-05, "loss": 2.1167, "step": 10360 }, { "epoch": 0.34, "grad_norm": 0.7211138606071472, "learning_rate": 1.4992020279923018e-05, "loss": 2.0485, "step": 10361 }, { "epoch": 0.34, "grad_norm": 0.7493512034416199, "learning_rate": 1.499109927027745e-05, "loss": 2.059, "step": 10362 }, { "epoch": 0.34, "grad_norm": 0.7193006873130798, "learning_rate": 1.4990178204245362e-05, "loss": 2.165, "step": 10363 }, { "epoch": 0.34, "grad_norm": 0.6909987926483154, "learning_rate": 1.498925708183717e-05, "loss": 2.0426, "step": 10364 }, { "epoch": 0.34, "grad_norm": 0.7321874499320984, "learning_rate": 1.4988335903063273e-05, "loss": 2.1224, "step": 10365 }, { "epoch": 0.34, "grad_norm": 0.7004453539848328, "learning_rate": 1.498741466793408e-05, "loss": 2.0681, "step": 10366 }, { "epoch": 0.34, "grad_norm": 0.7139745354652405, "learning_rate": 1.498649337646e-05, "loss": 2.087, "step": 10367 }, { "epoch": 0.34, "grad_norm": 0.7002428770065308, "learning_rate": 1.4985572028651441e-05, "loss": 2.0921, "step": 10368 }, { "epoch": 0.34, "grad_norm": 0.7547236084938049, "learning_rate": 1.4984650624518809e-05, "loss": 2.1227, "step": 10369 }, { "epoch": 0.35, "grad_norm": 0.7229976654052734, "learning_rate": 1.4983729164072516e-05, "loss": 2.1896, "step": 10370 }, { "epoch": 0.35, "grad_norm": 0.7958829998970032, "learning_rate": 1.4982807647322972e-05, "loss": 2.1319, "step": 10371 }, { "epoch": 0.35, "grad_norm": 0.754949152469635, "learning_rate": 1.4981886074280588e-05, "loss": 2.1197, "step": 10372 }, { "epoch": 0.35, "grad_norm": 0.7111319899559021, "learning_rate": 1.4980964444955776e-05, "loss": 2.1018, "step": 10373 }, { "epoch": 0.35, "grad_norm": 0.748814046382904, "learning_rate": 1.4980042759358944e-05, "loss": 2.1299, "step": 10374 }, { "epoch": 0.35, "grad_norm": 0.7135460376739502, "learning_rate": 1.4979121017500512e-05, "loss": 2.1592, "step": 10375 }, { "epoch": 0.35, "grad_norm": 0.731593668460846, "learning_rate": 1.4978199219390884e-05, "loss": 2.1546, "step": 10376 }, { "epoch": 0.35, "grad_norm": 0.7230343818664551, "learning_rate": 1.4977277365040477e-05, "loss": 2.1055, "step": 10377 }, { "epoch": 0.35, "grad_norm": 0.7991068363189697, "learning_rate": 1.4976355454459712e-05, "loss": 2.133, "step": 10378 }, { "epoch": 0.35, "grad_norm": 0.7679474353790283, "learning_rate": 1.4975433487658998e-05, "loss": 2.0911, "step": 10379 }, { "epoch": 0.35, "grad_norm": 0.7250180840492249, "learning_rate": 1.4974511464648753e-05, "loss": 2.1292, "step": 10380 }, { "epoch": 0.35, "grad_norm": 0.736171543598175, "learning_rate": 1.4973589385439393e-05, "loss": 2.0646, "step": 10381 }, { "epoch": 0.35, "grad_norm": 0.7305517792701721, "learning_rate": 1.4972667250041332e-05, "loss": 2.0894, "step": 10382 }, { "epoch": 0.35, "grad_norm": 0.7441668510437012, "learning_rate": 1.4971745058464993e-05, "loss": 2.0548, "step": 10383 }, { "epoch": 0.35, "grad_norm": 0.7640621066093445, "learning_rate": 1.4970822810720793e-05, "loss": 2.1489, "step": 10384 }, { "epoch": 0.35, "grad_norm": 0.6901702880859375, "learning_rate": 1.4969900506819149e-05, "loss": 2.0988, "step": 10385 }, { "epoch": 0.35, "grad_norm": 0.7494155764579773, "learning_rate": 1.4968978146770483e-05, "loss": 2.0859, "step": 10386 }, { "epoch": 0.35, "grad_norm": 0.7437639832496643, "learning_rate": 1.4968055730585214e-05, "loss": 2.0612, "step": 10387 }, { "epoch": 0.35, "grad_norm": 0.716599702835083, "learning_rate": 1.4967133258273759e-05, "loss": 2.1612, "step": 10388 }, { "epoch": 0.35, "grad_norm": 0.7363559603691101, "learning_rate": 1.4966210729846547e-05, "loss": 2.115, "step": 10389 }, { "epoch": 0.35, "grad_norm": 0.7077569961547852, "learning_rate": 1.4965288145313994e-05, "loss": 2.0629, "step": 10390 }, { "epoch": 0.35, "grad_norm": 0.7086566090583801, "learning_rate": 1.4964365504686529e-05, "loss": 2.1083, "step": 10391 }, { "epoch": 0.35, "grad_norm": 0.698201060295105, "learning_rate": 1.496344280797457e-05, "loss": 2.1248, "step": 10392 }, { "epoch": 0.35, "grad_norm": 0.7233189940452576, "learning_rate": 1.4962520055188543e-05, "loss": 2.1615, "step": 10393 }, { "epoch": 0.35, "grad_norm": 0.7760753035545349, "learning_rate": 1.4961597246338873e-05, "loss": 2.2683, "step": 10394 }, { "epoch": 0.35, "grad_norm": 0.7282745242118835, "learning_rate": 1.4960674381435986e-05, "loss": 2.1014, "step": 10395 }, { "epoch": 0.35, "grad_norm": 0.7469928860664368, "learning_rate": 1.4959751460490303e-05, "loss": 2.0985, "step": 10396 }, { "epoch": 0.35, "grad_norm": 0.7423644661903381, "learning_rate": 1.4958828483512263e-05, "loss": 2.0822, "step": 10397 }, { "epoch": 0.35, "grad_norm": 0.6997212171554565, "learning_rate": 1.4957905450512278e-05, "loss": 2.0564, "step": 10398 }, { "epoch": 0.35, "grad_norm": 0.7488000988960266, "learning_rate": 1.4956982361500782e-05, "loss": 2.1356, "step": 10399 }, { "epoch": 0.35, "grad_norm": 0.6968668103218079, "learning_rate": 1.4956059216488208e-05, "loss": 2.0392, "step": 10400 }, { "epoch": 0.35, "grad_norm": 0.7572245597839355, "learning_rate": 1.495513601548498e-05, "loss": 2.1186, "step": 10401 }, { "epoch": 0.35, "grad_norm": 0.7399706840515137, "learning_rate": 1.4954212758501529e-05, "loss": 2.2061, "step": 10402 }, { "epoch": 0.35, "grad_norm": 0.7279837727546692, "learning_rate": 1.495328944554829e-05, "loss": 2.1012, "step": 10403 }, { "epoch": 0.35, "grad_norm": 0.7321770787239075, "learning_rate": 1.4952366076635685e-05, "loss": 2.1344, "step": 10404 }, { "epoch": 0.35, "grad_norm": 0.724321186542511, "learning_rate": 1.495144265177415e-05, "loss": 2.0893, "step": 10405 }, { "epoch": 0.35, "grad_norm": 0.7022022008895874, "learning_rate": 1.4950519170974123e-05, "loss": 2.1125, "step": 10406 }, { "epoch": 0.35, "grad_norm": 0.7245703339576721, "learning_rate": 1.4949595634246024e-05, "loss": 2.0896, "step": 10407 }, { "epoch": 0.35, "grad_norm": 0.7087785601615906, "learning_rate": 1.4948672041600298e-05, "loss": 2.1412, "step": 10408 }, { "epoch": 0.35, "grad_norm": 0.7188130617141724, "learning_rate": 1.494774839304738e-05, "loss": 2.0717, "step": 10409 }, { "epoch": 0.35, "grad_norm": 0.7065389752388, "learning_rate": 1.4946824688597694e-05, "loss": 2.0767, "step": 10410 }, { "epoch": 0.35, "grad_norm": 0.7327172756195068, "learning_rate": 1.4945900928261684e-05, "loss": 2.0997, "step": 10411 }, { "epoch": 0.35, "grad_norm": 0.7502834796905518, "learning_rate": 1.4944977112049786e-05, "loss": 2.089, "step": 10412 }, { "epoch": 0.35, "grad_norm": 0.7189309000968933, "learning_rate": 1.4944053239972431e-05, "loss": 2.0768, "step": 10413 }, { "epoch": 0.35, "grad_norm": 0.7220255732536316, "learning_rate": 1.4943129312040062e-05, "loss": 2.1049, "step": 10414 }, { "epoch": 0.35, "grad_norm": 0.7026408314704895, "learning_rate": 1.4942205328263114e-05, "loss": 2.1123, "step": 10415 }, { "epoch": 0.35, "grad_norm": 0.7300592064857483, "learning_rate": 1.4941281288652025e-05, "loss": 2.1302, "step": 10416 }, { "epoch": 0.35, "grad_norm": 0.7076727747917175, "learning_rate": 1.494035719321724e-05, "loss": 2.1603, "step": 10417 }, { "epoch": 0.35, "grad_norm": 0.7582526206970215, "learning_rate": 1.4939433041969192e-05, "loss": 2.2236, "step": 10418 }, { "epoch": 0.35, "grad_norm": 0.7354879975318909, "learning_rate": 1.4938508834918324e-05, "loss": 2.1324, "step": 10419 }, { "epoch": 0.35, "grad_norm": 0.7018381953239441, "learning_rate": 1.493758457207508e-05, "loss": 2.0883, "step": 10420 }, { "epoch": 0.35, "grad_norm": 0.7061810493469238, "learning_rate": 1.4936660253449898e-05, "loss": 2.1218, "step": 10421 }, { "epoch": 0.35, "grad_norm": 0.7005699872970581, "learning_rate": 1.4935735879053221e-05, "loss": 2.0894, "step": 10422 }, { "epoch": 0.35, "grad_norm": 0.7697465419769287, "learning_rate": 1.4934811448895495e-05, "loss": 2.1483, "step": 10423 }, { "epoch": 0.35, "grad_norm": 0.7204517722129822, "learning_rate": 1.4933886962987156e-05, "loss": 2.1694, "step": 10424 }, { "epoch": 0.35, "grad_norm": 0.722022294998169, "learning_rate": 1.4932962421338659e-05, "loss": 2.165, "step": 10425 }, { "epoch": 0.35, "grad_norm": 0.7072915434837341, "learning_rate": 1.493203782396044e-05, "loss": 2.1159, "step": 10426 }, { "epoch": 0.35, "grad_norm": 0.717031717300415, "learning_rate": 1.493111317086295e-05, "loss": 2.044, "step": 10427 }, { "epoch": 0.35, "grad_norm": 0.7328306436538696, "learning_rate": 1.4930188462056636e-05, "loss": 2.0956, "step": 10428 }, { "epoch": 0.35, "grad_norm": 0.7058218121528625, "learning_rate": 1.4929263697551937e-05, "loss": 2.134, "step": 10429 }, { "epoch": 0.35, "grad_norm": 0.7317168116569519, "learning_rate": 1.4928338877359308e-05, "loss": 2.1048, "step": 10430 }, { "epoch": 0.35, "grad_norm": 0.7269558906555176, "learning_rate": 1.4927414001489196e-05, "loss": 2.1082, "step": 10431 }, { "epoch": 0.35, "grad_norm": 0.7535553574562073, "learning_rate": 1.4926489069952043e-05, "loss": 2.0856, "step": 10432 }, { "epoch": 0.35, "grad_norm": 0.7451537251472473, "learning_rate": 1.492556408275831e-05, "loss": 2.1492, "step": 10433 }, { "epoch": 0.35, "grad_norm": 0.714319109916687, "learning_rate": 1.4924639039918436e-05, "loss": 2.0898, "step": 10434 }, { "epoch": 0.35, "grad_norm": 0.697678804397583, "learning_rate": 1.4923713941442878e-05, "loss": 2.1035, "step": 10435 }, { "epoch": 0.35, "grad_norm": 0.7103835940361023, "learning_rate": 1.4922788787342086e-05, "loss": 2.0643, "step": 10436 }, { "epoch": 0.35, "grad_norm": 0.7077265977859497, "learning_rate": 1.4921863577626513e-05, "loss": 2.1212, "step": 10437 }, { "epoch": 0.35, "grad_norm": 0.7205677032470703, "learning_rate": 1.4920938312306605e-05, "loss": 2.1544, "step": 10438 }, { "epoch": 0.35, "grad_norm": 0.7194802165031433, "learning_rate": 1.4920012991392826e-05, "loss": 2.0831, "step": 10439 }, { "epoch": 0.35, "grad_norm": 0.717306911945343, "learning_rate": 1.4919087614895618e-05, "loss": 2.1347, "step": 10440 }, { "epoch": 0.35, "grad_norm": 0.7321506142616272, "learning_rate": 1.4918162182825441e-05, "loss": 2.1455, "step": 10441 }, { "epoch": 0.35, "grad_norm": 0.7375169992446899, "learning_rate": 1.4917236695192756e-05, "loss": 2.0854, "step": 10442 }, { "epoch": 0.35, "grad_norm": 0.7321113348007202, "learning_rate": 1.4916311152008007e-05, "loss": 2.0741, "step": 10443 }, { "epoch": 0.35, "grad_norm": 0.7229140400886536, "learning_rate": 1.491538555328166e-05, "loss": 2.1295, "step": 10444 }, { "epoch": 0.35, "grad_norm": 0.6926165819168091, "learning_rate": 1.4914459899024165e-05, "loss": 2.0971, "step": 10445 }, { "epoch": 0.35, "grad_norm": 0.7356226444244385, "learning_rate": 1.4913534189245983e-05, "loss": 2.0913, "step": 10446 }, { "epoch": 0.35, "grad_norm": 0.6954305768013, "learning_rate": 1.4912608423957572e-05, "loss": 2.1465, "step": 10447 }, { "epoch": 0.35, "grad_norm": 0.7343324422836304, "learning_rate": 1.491168260316939e-05, "loss": 2.0841, "step": 10448 }, { "epoch": 0.35, "grad_norm": 0.7240501642227173, "learning_rate": 1.4910756726891892e-05, "loss": 2.0793, "step": 10449 }, { "epoch": 0.35, "grad_norm": 0.7118881344795227, "learning_rate": 1.4909830795135546e-05, "loss": 2.0981, "step": 10450 }, { "epoch": 0.35, "grad_norm": 0.7070509195327759, "learning_rate": 1.4908904807910811e-05, "loss": 2.0921, "step": 10451 }, { "epoch": 0.35, "grad_norm": 0.7430326342582703, "learning_rate": 1.4907978765228143e-05, "loss": 2.1043, "step": 10452 }, { "epoch": 0.35, "grad_norm": 0.7531911134719849, "learning_rate": 1.490705266709801e-05, "loss": 2.0278, "step": 10453 }, { "epoch": 0.35, "grad_norm": 0.7078771591186523, "learning_rate": 1.490612651353087e-05, "loss": 2.0929, "step": 10454 }, { "epoch": 0.35, "grad_norm": 0.7775567770004272, "learning_rate": 1.490520030453719e-05, "loss": 2.0864, "step": 10455 }, { "epoch": 0.35, "grad_norm": 0.7243703007698059, "learning_rate": 1.4904274040127428e-05, "loss": 2.0745, "step": 10456 }, { "epoch": 0.35, "grad_norm": 0.7435544729232788, "learning_rate": 1.4903347720312055e-05, "loss": 2.0954, "step": 10457 }, { "epoch": 0.35, "grad_norm": 0.7295030355453491, "learning_rate": 1.4902421345101533e-05, "loss": 2.0956, "step": 10458 }, { "epoch": 0.35, "grad_norm": 0.7206933498382568, "learning_rate": 1.490149491450633e-05, "loss": 2.1157, "step": 10459 }, { "epoch": 0.35, "grad_norm": 0.7240864634513855, "learning_rate": 1.4900568428536906e-05, "loss": 2.1242, "step": 10460 }, { "epoch": 0.35, "grad_norm": 0.7165495753288269, "learning_rate": 1.4899641887203733e-05, "loss": 2.128, "step": 10461 }, { "epoch": 0.35, "grad_norm": 0.7247999906539917, "learning_rate": 1.4898715290517276e-05, "loss": 2.1085, "step": 10462 }, { "epoch": 0.35, "grad_norm": 0.7359534502029419, "learning_rate": 1.4897788638488007e-05, "loss": 2.0735, "step": 10463 }, { "epoch": 0.35, "grad_norm": 0.7117490768432617, "learning_rate": 1.4896861931126391e-05, "loss": 2.0801, "step": 10464 }, { "epoch": 0.35, "grad_norm": 0.7139006853103638, "learning_rate": 1.48959351684429e-05, "loss": 2.1382, "step": 10465 }, { "epoch": 0.35, "grad_norm": 0.7375780344009399, "learning_rate": 1.4895008350448004e-05, "loss": 2.158, "step": 10466 }, { "epoch": 0.35, "grad_norm": 0.7131018042564392, "learning_rate": 1.4894081477152167e-05, "loss": 2.0598, "step": 10467 }, { "epoch": 0.35, "grad_norm": 0.7269315123558044, "learning_rate": 1.4893154548565871e-05, "loss": 2.1019, "step": 10468 }, { "epoch": 0.35, "grad_norm": 0.6978451609611511, "learning_rate": 1.4892227564699581e-05, "loss": 2.0587, "step": 10469 }, { "epoch": 0.35, "grad_norm": 0.7096911668777466, "learning_rate": 1.489130052556377e-05, "loss": 2.13, "step": 10470 }, { "epoch": 0.35, "grad_norm": 0.719523549079895, "learning_rate": 1.4890373431168913e-05, "loss": 2.0238, "step": 10471 }, { "epoch": 0.35, "grad_norm": 0.6851069927215576, "learning_rate": 1.4889446281525484e-05, "loss": 2.0578, "step": 10472 }, { "epoch": 0.35, "grad_norm": 0.7608821988105774, "learning_rate": 1.4888519076643954e-05, "loss": 2.1292, "step": 10473 }, { "epoch": 0.35, "grad_norm": 0.7138228416442871, "learning_rate": 1.4887591816534803e-05, "loss": 2.0917, "step": 10474 }, { "epoch": 0.35, "grad_norm": 0.7031155824661255, "learning_rate": 1.4886664501208503e-05, "loss": 2.1274, "step": 10475 }, { "epoch": 0.35, "grad_norm": 0.6930181980133057, "learning_rate": 1.488573713067553e-05, "loss": 2.0519, "step": 10476 }, { "epoch": 0.35, "grad_norm": 0.7102878093719482, "learning_rate": 1.4884809704946365e-05, "loss": 2.1132, "step": 10477 }, { "epoch": 0.35, "grad_norm": 0.7405447959899902, "learning_rate": 1.488388222403148e-05, "loss": 2.1209, "step": 10478 }, { "epoch": 0.35, "grad_norm": 0.7276825904846191, "learning_rate": 1.488295468794136e-05, "loss": 2.0838, "step": 10479 }, { "epoch": 0.35, "grad_norm": 0.7264253497123718, "learning_rate": 1.4882027096686476e-05, "loss": 2.1321, "step": 10480 }, { "epoch": 0.35, "grad_norm": 0.7306627035140991, "learning_rate": 1.488109945027731e-05, "loss": 2.1571, "step": 10481 }, { "epoch": 0.35, "grad_norm": 0.7229866981506348, "learning_rate": 1.4880171748724346e-05, "loss": 2.0787, "step": 10482 }, { "epoch": 0.35, "grad_norm": 0.7677871584892273, "learning_rate": 1.487924399203806e-05, "loss": 2.101, "step": 10483 }, { "epoch": 0.35, "grad_norm": 0.7415304183959961, "learning_rate": 1.4878316180228936e-05, "loss": 2.1051, "step": 10484 }, { "epoch": 0.35, "grad_norm": 0.7222515940666199, "learning_rate": 1.4877388313307453e-05, "loss": 2.0475, "step": 10485 }, { "epoch": 0.35, "grad_norm": 0.7173749804496765, "learning_rate": 1.4876460391284095e-05, "loss": 2.1455, "step": 10486 }, { "epoch": 0.35, "grad_norm": 0.7043889760971069, "learning_rate": 1.4875532414169347e-05, "loss": 2.1354, "step": 10487 }, { "epoch": 0.35, "grad_norm": 0.7284137606620789, "learning_rate": 1.4874604381973694e-05, "loss": 2.0635, "step": 10488 }, { "epoch": 0.35, "grad_norm": 0.7181965112686157, "learning_rate": 1.4873676294707612e-05, "loss": 2.0734, "step": 10489 }, { "epoch": 0.35, "grad_norm": 0.7312502861022949, "learning_rate": 1.4872748152381595e-05, "loss": 2.1388, "step": 10490 }, { "epoch": 0.35, "grad_norm": 0.7134490609169006, "learning_rate": 1.4871819955006123e-05, "loss": 2.0798, "step": 10491 }, { "epoch": 0.35, "grad_norm": 0.728509247303009, "learning_rate": 1.4870891702591684e-05, "loss": 2.104, "step": 10492 }, { "epoch": 0.35, "grad_norm": 0.7252039909362793, "learning_rate": 1.4869963395148768e-05, "loss": 2.1094, "step": 10493 }, { "epoch": 0.35, "grad_norm": 0.7166321277618408, "learning_rate": 1.4869035032687857e-05, "loss": 2.1168, "step": 10494 }, { "epoch": 0.35, "grad_norm": 0.7361246347427368, "learning_rate": 1.4868106615219444e-05, "loss": 2.1154, "step": 10495 }, { "epoch": 0.35, "grad_norm": 0.7485746741294861, "learning_rate": 1.4867178142754015e-05, "loss": 2.0781, "step": 10496 }, { "epoch": 0.35, "grad_norm": 0.7183189988136292, "learning_rate": 1.4866249615302057e-05, "loss": 2.1706, "step": 10497 }, { "epoch": 0.35, "grad_norm": 0.7119051218032837, "learning_rate": 1.4865321032874064e-05, "loss": 2.166, "step": 10498 }, { "epoch": 0.35, "grad_norm": 0.7493982315063477, "learning_rate": 1.486439239548053e-05, "loss": 2.0672, "step": 10499 }, { "epoch": 0.35, "grad_norm": 0.7383939623832703, "learning_rate": 1.4863463703131936e-05, "loss": 2.1507, "step": 10500 }, { "epoch": 0.35, "grad_norm": 0.7388848662376404, "learning_rate": 1.4862534955838781e-05, "loss": 2.1251, "step": 10501 }, { "epoch": 0.35, "grad_norm": 0.7299036383628845, "learning_rate": 1.4861606153611556e-05, "loss": 2.1879, "step": 10502 }, { "epoch": 0.35, "grad_norm": 0.7220633029937744, "learning_rate": 1.4860677296460751e-05, "loss": 2.1471, "step": 10503 }, { "epoch": 0.35, "grad_norm": 0.7154396176338196, "learning_rate": 1.4859748384396868e-05, "loss": 2.1351, "step": 10504 }, { "epoch": 0.35, "grad_norm": 0.7401567697525024, "learning_rate": 1.4858819417430392e-05, "loss": 2.0747, "step": 10505 }, { "epoch": 0.35, "grad_norm": 0.7309392690658569, "learning_rate": 1.4857890395571823e-05, "loss": 2.1357, "step": 10506 }, { "epoch": 0.35, "grad_norm": 0.7369203567504883, "learning_rate": 1.4856961318831655e-05, "loss": 2.1082, "step": 10507 }, { "epoch": 0.35, "grad_norm": 0.7487432956695557, "learning_rate": 1.4856032187220384e-05, "loss": 2.0868, "step": 10508 }, { "epoch": 0.35, "grad_norm": 0.7392724752426147, "learning_rate": 1.4855103000748507e-05, "loss": 2.0824, "step": 10509 }, { "epoch": 0.35, "grad_norm": 0.6983421444892883, "learning_rate": 1.4854173759426527e-05, "loss": 2.1034, "step": 10510 }, { "epoch": 0.35, "grad_norm": 0.7250156998634338, "learning_rate": 1.4853244463264929e-05, "loss": 2.184, "step": 10511 }, { "epoch": 0.35, "grad_norm": 0.7094531655311584, "learning_rate": 1.4852315112274225e-05, "loss": 2.1054, "step": 10512 }, { "epoch": 0.35, "grad_norm": 0.7636260390281677, "learning_rate": 1.4851385706464904e-05, "loss": 2.1546, "step": 10513 }, { "epoch": 0.35, "grad_norm": 0.7198039889335632, "learning_rate": 1.4850456245847472e-05, "loss": 2.0479, "step": 10514 }, { "epoch": 0.35, "grad_norm": 0.7443731427192688, "learning_rate": 1.4849526730432428e-05, "loss": 2.106, "step": 10515 }, { "epoch": 0.35, "grad_norm": 0.7250052690505981, "learning_rate": 1.4848597160230276e-05, "loss": 2.1323, "step": 10516 }, { "epoch": 0.35, "grad_norm": 0.7147414088249207, "learning_rate": 1.484766753525151e-05, "loss": 2.1115, "step": 10517 }, { "epoch": 0.35, "grad_norm": 0.7056184411048889, "learning_rate": 1.484673785550664e-05, "loss": 2.1591, "step": 10518 }, { "epoch": 0.35, "grad_norm": 0.7206652760505676, "learning_rate": 1.4845808121006166e-05, "loss": 2.0536, "step": 10519 }, { "epoch": 0.35, "grad_norm": 0.7376263737678528, "learning_rate": 1.4844878331760593e-05, "loss": 2.1601, "step": 10520 }, { "epoch": 0.35, "grad_norm": 0.7320221066474915, "learning_rate": 1.4843948487780425e-05, "loss": 2.0822, "step": 10521 }, { "epoch": 0.35, "grad_norm": 0.7012993693351746, "learning_rate": 1.484301858907616e-05, "loss": 2.1, "step": 10522 }, { "epoch": 0.35, "grad_norm": 0.7236185073852539, "learning_rate": 1.4842088635658312e-05, "loss": 2.0744, "step": 10523 }, { "epoch": 0.35, "grad_norm": 0.7262057065963745, "learning_rate": 1.484115862753739e-05, "loss": 2.102, "step": 10524 }, { "epoch": 0.35, "grad_norm": 0.7356593608856201, "learning_rate": 1.4840228564723887e-05, "loss": 2.1163, "step": 10525 }, { "epoch": 0.35, "grad_norm": 0.7310892939567566, "learning_rate": 1.483929844722832e-05, "loss": 2.1092, "step": 10526 }, { "epoch": 0.35, "grad_norm": 0.7332732081413269, "learning_rate": 1.48383682750612e-05, "loss": 2.1135, "step": 10527 }, { "epoch": 0.35, "grad_norm": 0.7348324060440063, "learning_rate": 1.4837438048233026e-05, "loss": 2.098, "step": 10528 }, { "epoch": 0.35, "grad_norm": 0.7708093523979187, "learning_rate": 1.4836507766754314e-05, "loss": 2.1329, "step": 10529 }, { "epoch": 0.35, "grad_norm": 0.7241346836090088, "learning_rate": 1.483557743063557e-05, "loss": 2.1412, "step": 10530 }, { "epoch": 0.35, "grad_norm": 0.7094256281852722, "learning_rate": 1.4834647039887307e-05, "loss": 2.1273, "step": 10531 }, { "epoch": 0.35, "grad_norm": 0.7067854404449463, "learning_rate": 1.4833716594520037e-05, "loss": 2.0856, "step": 10532 }, { "epoch": 0.35, "grad_norm": 0.7352094054222107, "learning_rate": 1.4832786094544267e-05, "loss": 2.1571, "step": 10533 }, { "epoch": 0.35, "grad_norm": 0.7399184107780457, "learning_rate": 1.4831855539970512e-05, "loss": 2.0595, "step": 10534 }, { "epoch": 0.35, "grad_norm": 0.7360782027244568, "learning_rate": 1.4830924930809288e-05, "loss": 2.1515, "step": 10535 }, { "epoch": 0.35, "grad_norm": 0.7636145949363708, "learning_rate": 1.4829994267071102e-05, "loss": 2.0853, "step": 10536 }, { "epoch": 0.35, "grad_norm": 0.7282500267028809, "learning_rate": 1.4829063548766474e-05, "loss": 2.1467, "step": 10537 }, { "epoch": 0.35, "grad_norm": 0.7503781318664551, "learning_rate": 1.4828132775905914e-05, "loss": 2.1371, "step": 10538 }, { "epoch": 0.35, "grad_norm": 0.7393903732299805, "learning_rate": 1.482720194849994e-05, "loss": 2.1011, "step": 10539 }, { "epoch": 0.35, "grad_norm": 0.733180046081543, "learning_rate": 1.4826271066559068e-05, "loss": 2.1936, "step": 10540 }, { "epoch": 0.35, "grad_norm": 0.7636940479278564, "learning_rate": 1.4825340130093815e-05, "loss": 2.1331, "step": 10541 }, { "epoch": 0.35, "grad_norm": 0.7593944072723389, "learning_rate": 1.4824409139114695e-05, "loss": 2.1394, "step": 10542 }, { "epoch": 0.35, "grad_norm": 0.7695443034172058, "learning_rate": 1.4823478093632231e-05, "loss": 2.1357, "step": 10543 }, { "epoch": 0.35, "grad_norm": 0.7230416536331177, "learning_rate": 1.4822546993656932e-05, "loss": 2.1202, "step": 10544 }, { "epoch": 0.35, "grad_norm": 0.722801923751831, "learning_rate": 1.4821615839199329e-05, "loss": 2.1665, "step": 10545 }, { "epoch": 0.35, "grad_norm": 0.7306049466133118, "learning_rate": 1.4820684630269936e-05, "loss": 2.1299, "step": 10546 }, { "epoch": 0.35, "grad_norm": 0.745830237865448, "learning_rate": 1.4819753366879268e-05, "loss": 2.1465, "step": 10547 }, { "epoch": 0.35, "grad_norm": 0.7438483834266663, "learning_rate": 1.4818822049037857e-05, "loss": 2.1491, "step": 10548 }, { "epoch": 0.35, "grad_norm": 0.7464824318885803, "learning_rate": 1.4817890676756215e-05, "loss": 2.0877, "step": 10549 }, { "epoch": 0.35, "grad_norm": 0.7562946081161499, "learning_rate": 1.4816959250044872e-05, "loss": 2.136, "step": 10550 }, { "epoch": 0.35, "grad_norm": 0.6988404989242554, "learning_rate": 1.4816027768914343e-05, "loss": 2.1708, "step": 10551 }, { "epoch": 0.35, "grad_norm": 0.7500683665275574, "learning_rate": 1.4815096233375155e-05, "loss": 2.1226, "step": 10552 }, { "epoch": 0.35, "grad_norm": 0.746777355670929, "learning_rate": 1.4814164643437832e-05, "loss": 2.0321, "step": 10553 }, { "epoch": 0.35, "grad_norm": 0.715187132358551, "learning_rate": 1.4813232999112898e-05, "loss": 2.1128, "step": 10554 }, { "epoch": 0.35, "grad_norm": 0.7032173871994019, "learning_rate": 1.4812301300410878e-05, "loss": 2.0912, "step": 10555 }, { "epoch": 0.35, "grad_norm": 0.7155032157897949, "learning_rate": 1.4811369547342298e-05, "loss": 2.1007, "step": 10556 }, { "epoch": 0.35, "grad_norm": 0.737968385219574, "learning_rate": 1.4810437739917687e-05, "loss": 2.0715, "step": 10557 }, { "epoch": 0.35, "grad_norm": 0.7057023644447327, "learning_rate": 1.4809505878147566e-05, "loss": 2.0921, "step": 10558 }, { "epoch": 0.35, "grad_norm": 0.7230681777000427, "learning_rate": 1.4808573962042471e-05, "loss": 2.0197, "step": 10559 }, { "epoch": 0.35, "grad_norm": 0.7395954132080078, "learning_rate": 1.480764199161292e-05, "loss": 2.0976, "step": 10560 }, { "epoch": 0.35, "grad_norm": 0.7191762328147888, "learning_rate": 1.4806709966869452e-05, "loss": 2.1201, "step": 10561 }, { "epoch": 0.35, "grad_norm": 0.7204675674438477, "learning_rate": 1.480577788782259e-05, "loss": 2.1215, "step": 10562 }, { "epoch": 0.35, "grad_norm": 0.7214027643203735, "learning_rate": 1.4804845754482865e-05, "loss": 2.124, "step": 10563 }, { "epoch": 0.35, "grad_norm": 0.754606306552887, "learning_rate": 1.4803913566860808e-05, "loss": 2.0976, "step": 10564 }, { "epoch": 0.35, "grad_norm": 0.7406073212623596, "learning_rate": 1.4802981324966953e-05, "loss": 2.1338, "step": 10565 }, { "epoch": 0.35, "grad_norm": 0.7012860178947449, "learning_rate": 1.480204902881183e-05, "loss": 2.0892, "step": 10566 }, { "epoch": 0.35, "grad_norm": 0.7526813745498657, "learning_rate": 1.4801116678405969e-05, "loss": 2.1355, "step": 10567 }, { "epoch": 0.35, "grad_norm": 0.7804391980171204, "learning_rate": 1.4800184273759907e-05, "loss": 2.1392, "step": 10568 }, { "epoch": 0.35, "grad_norm": 0.733037531375885, "learning_rate": 1.4799251814884176e-05, "loss": 2.217, "step": 10569 }, { "epoch": 0.35, "grad_norm": 0.7024804353713989, "learning_rate": 1.4798319301789312e-05, "loss": 2.1368, "step": 10570 }, { "epoch": 0.35, "grad_norm": 0.7046234607696533, "learning_rate": 1.4797386734485845e-05, "loss": 2.133, "step": 10571 }, { "epoch": 0.35, "grad_norm": 0.7400580644607544, "learning_rate": 1.479645411298432e-05, "loss": 2.1856, "step": 10572 }, { "epoch": 0.35, "grad_norm": 0.724421501159668, "learning_rate": 1.4795521437295264e-05, "loss": 2.171, "step": 10573 }, { "epoch": 0.35, "grad_norm": 0.7011397480964661, "learning_rate": 1.4794588707429217e-05, "loss": 2.0649, "step": 10574 }, { "epoch": 0.35, "grad_norm": 0.72963947057724, "learning_rate": 1.4793655923396717e-05, "loss": 2.1113, "step": 10575 }, { "epoch": 0.35, "grad_norm": 0.7208470702171326, "learning_rate": 1.47927230852083e-05, "loss": 2.183, "step": 10576 }, { "epoch": 0.35, "grad_norm": 0.743577778339386, "learning_rate": 1.4791790192874509e-05, "loss": 2.1312, "step": 10577 }, { "epoch": 0.35, "grad_norm": 0.7263944149017334, "learning_rate": 1.4790857246405879e-05, "loss": 2.1037, "step": 10578 }, { "epoch": 0.35, "grad_norm": 0.7544155716896057, "learning_rate": 1.4789924245812952e-05, "loss": 2.1417, "step": 10579 }, { "epoch": 0.35, "grad_norm": 0.7245422005653381, "learning_rate": 1.4788991191106268e-05, "loss": 2.1138, "step": 10580 }, { "epoch": 0.35, "grad_norm": 0.7459631562232971, "learning_rate": 1.4788058082296371e-05, "loss": 2.1463, "step": 10581 }, { "epoch": 0.35, "grad_norm": 0.716444730758667, "learning_rate": 1.4787124919393793e-05, "loss": 2.1114, "step": 10582 }, { "epoch": 0.35, "grad_norm": 0.7301832437515259, "learning_rate": 1.4786191702409091e-05, "loss": 2.0493, "step": 10583 }, { "epoch": 0.35, "grad_norm": 0.7089590430259705, "learning_rate": 1.4785258431352798e-05, "loss": 2.1109, "step": 10584 }, { "epoch": 0.35, "grad_norm": 0.7470178008079529, "learning_rate": 1.4784325106235457e-05, "loss": 2.1053, "step": 10585 }, { "epoch": 0.35, "grad_norm": 0.7108622789382935, "learning_rate": 1.4783391727067615e-05, "loss": 2.026, "step": 10586 }, { "epoch": 0.35, "grad_norm": 0.7535647749900818, "learning_rate": 1.4782458293859819e-05, "loss": 2.1831, "step": 10587 }, { "epoch": 0.35, "grad_norm": 0.6880736947059631, "learning_rate": 1.478152480662261e-05, "loss": 2.1105, "step": 10588 }, { "epoch": 0.35, "grad_norm": 0.7120999693870544, "learning_rate": 1.4780591265366538e-05, "loss": 2.1359, "step": 10589 }, { "epoch": 0.35, "grad_norm": 0.7136363983154297, "learning_rate": 1.4779657670102145e-05, "loss": 2.126, "step": 10590 }, { "epoch": 0.35, "grad_norm": 0.7094338536262512, "learning_rate": 1.4778724020839982e-05, "loss": 2.1197, "step": 10591 }, { "epoch": 0.35, "grad_norm": 0.7233152389526367, "learning_rate": 1.4777790317590596e-05, "loss": 2.116, "step": 10592 }, { "epoch": 0.35, "grad_norm": 0.7566646933555603, "learning_rate": 1.4776856560364535e-05, "loss": 2.0689, "step": 10593 }, { "epoch": 0.35, "grad_norm": 0.7554349899291992, "learning_rate": 1.477592274917235e-05, "loss": 2.1131, "step": 10594 }, { "epoch": 0.35, "grad_norm": 0.7407272458076477, "learning_rate": 1.4774988884024586e-05, "loss": 2.1729, "step": 10595 }, { "epoch": 0.35, "grad_norm": 0.7396735548973083, "learning_rate": 1.4774054964931794e-05, "loss": 2.129, "step": 10596 }, { "epoch": 0.35, "grad_norm": 0.7124592065811157, "learning_rate": 1.4773120991904533e-05, "loss": 2.1109, "step": 10597 }, { "epoch": 0.35, "grad_norm": 0.7445746064186096, "learning_rate": 1.4772186964953343e-05, "loss": 2.0262, "step": 10598 }, { "epoch": 0.35, "grad_norm": 0.7359894514083862, "learning_rate": 1.4771252884088783e-05, "loss": 2.1211, "step": 10599 }, { "epoch": 0.35, "grad_norm": 0.7731152176856995, "learning_rate": 1.4770318749321404e-05, "loss": 2.0924, "step": 10600 }, { "epoch": 0.35, "grad_norm": 0.6916806697845459, "learning_rate": 1.4769384560661761e-05, "loss": 2.1023, "step": 10601 }, { "epoch": 0.35, "grad_norm": 0.7274885177612305, "learning_rate": 1.4768450318120404e-05, "loss": 2.1491, "step": 10602 }, { "epoch": 0.35, "grad_norm": 0.7249237895011902, "learning_rate": 1.4767516021707895e-05, "loss": 2.1291, "step": 10603 }, { "epoch": 0.35, "grad_norm": 0.7154503464698792, "learning_rate": 1.4766581671434777e-05, "loss": 2.1047, "step": 10604 }, { "epoch": 0.35, "grad_norm": 0.7300462126731873, "learning_rate": 1.4765647267311618e-05, "loss": 2.1929, "step": 10605 }, { "epoch": 0.35, "grad_norm": 0.7393168210983276, "learning_rate": 1.4764712809348967e-05, "loss": 2.1085, "step": 10606 }, { "epoch": 0.35, "grad_norm": 0.7266584634780884, "learning_rate": 1.4763778297557381e-05, "loss": 2.1526, "step": 10607 }, { "epoch": 0.35, "grad_norm": 0.7454675436019897, "learning_rate": 1.4762843731947422e-05, "loss": 2.0567, "step": 10608 }, { "epoch": 0.35, "grad_norm": 0.7402105331420898, "learning_rate": 1.4761909112529646e-05, "loss": 2.0699, "step": 10609 }, { "epoch": 0.35, "grad_norm": 0.7279614210128784, "learning_rate": 1.4760974439314613e-05, "loss": 2.1363, "step": 10610 }, { "epoch": 0.35, "grad_norm": 0.719496488571167, "learning_rate": 1.476003971231288e-05, "loss": 2.1142, "step": 10611 }, { "epoch": 0.35, "grad_norm": 0.7673267126083374, "learning_rate": 1.4759104931535004e-05, "loss": 2.0291, "step": 10612 }, { "epoch": 0.35, "grad_norm": 0.7183021903038025, "learning_rate": 1.4758170096991552e-05, "loss": 2.1576, "step": 10613 }, { "epoch": 0.35, "grad_norm": 0.7354138493537903, "learning_rate": 1.4757235208693086e-05, "loss": 2.132, "step": 10614 }, { "epoch": 0.35, "grad_norm": 0.6772012710571289, "learning_rate": 1.4756300266650161e-05, "loss": 2.0775, "step": 10615 }, { "epoch": 0.35, "grad_norm": 0.7167621850967407, "learning_rate": 1.4755365270873348e-05, "loss": 2.1945, "step": 10616 }, { "epoch": 0.35, "grad_norm": 0.7289667725563049, "learning_rate": 1.47544302213732e-05, "loss": 2.1157, "step": 10617 }, { "epoch": 0.35, "grad_norm": 0.7327138781547546, "learning_rate": 1.4753495118160283e-05, "loss": 2.0014, "step": 10618 }, { "epoch": 0.35, "grad_norm": 0.726081371307373, "learning_rate": 1.4752559961245172e-05, "loss": 2.136, "step": 10619 }, { "epoch": 0.35, "grad_norm": 0.6932653188705444, "learning_rate": 1.4751624750638418e-05, "loss": 2.1021, "step": 10620 }, { "epoch": 0.35, "grad_norm": 0.738296389579773, "learning_rate": 1.4750689486350595e-05, "loss": 2.1061, "step": 10621 }, { "epoch": 0.35, "grad_norm": 0.6972613334655762, "learning_rate": 1.4749754168392266e-05, "loss": 2.1224, "step": 10622 }, { "epoch": 0.35, "grad_norm": 0.7374845743179321, "learning_rate": 1.4748818796773998e-05, "loss": 2.0677, "step": 10623 }, { "epoch": 0.35, "grad_norm": 0.7273114323616028, "learning_rate": 1.4747883371506357e-05, "loss": 2.1475, "step": 10624 }, { "epoch": 0.35, "grad_norm": 0.7235073447227478, "learning_rate": 1.4746947892599914e-05, "loss": 2.0495, "step": 10625 }, { "epoch": 0.35, "grad_norm": 0.6947516798973083, "learning_rate": 1.4746012360065234e-05, "loss": 2.1637, "step": 10626 }, { "epoch": 0.35, "grad_norm": 0.7210212349891663, "learning_rate": 1.4745076773912887e-05, "loss": 2.1589, "step": 10627 }, { "epoch": 0.35, "grad_norm": 0.741195559501648, "learning_rate": 1.4744141134153446e-05, "loss": 2.0898, "step": 10628 }, { "epoch": 0.35, "grad_norm": 0.7275981903076172, "learning_rate": 1.4743205440797477e-05, "loss": 2.141, "step": 10629 }, { "epoch": 0.35, "grad_norm": 0.733950138092041, "learning_rate": 1.4742269693855558e-05, "loss": 2.0787, "step": 10630 }, { "epoch": 0.35, "grad_norm": 0.7199733257293701, "learning_rate": 1.474133389333825e-05, "loss": 2.1381, "step": 10631 }, { "epoch": 0.35, "grad_norm": 0.7123814821243286, "learning_rate": 1.474039803925613e-05, "loss": 2.1302, "step": 10632 }, { "epoch": 0.35, "grad_norm": 0.7444413304328918, "learning_rate": 1.4739462131619773e-05, "loss": 2.1569, "step": 10633 }, { "epoch": 0.35, "grad_norm": 0.7282716631889343, "learning_rate": 1.4738526170439748e-05, "loss": 2.0456, "step": 10634 }, { "epoch": 0.35, "grad_norm": 0.7091349959373474, "learning_rate": 1.4737590155726633e-05, "loss": 2.1012, "step": 10635 }, { "epoch": 0.35, "grad_norm": 0.7262563705444336, "learning_rate": 1.4736654087491005e-05, "loss": 2.1402, "step": 10636 }, { "epoch": 0.35, "grad_norm": 0.7245771884918213, "learning_rate": 1.4735717965743427e-05, "loss": 2.1075, "step": 10637 }, { "epoch": 0.35, "grad_norm": 0.7241370677947998, "learning_rate": 1.4734781790494489e-05, "loss": 2.1261, "step": 10638 }, { "epoch": 0.35, "grad_norm": 0.7440412044525146, "learning_rate": 1.473384556175476e-05, "loss": 2.0871, "step": 10639 }, { "epoch": 0.35, "grad_norm": 0.70985347032547, "learning_rate": 1.4732909279534815e-05, "loss": 2.0531, "step": 10640 }, { "epoch": 0.35, "grad_norm": 0.727592408657074, "learning_rate": 1.473197294384524e-05, "loss": 2.1026, "step": 10641 }, { "epoch": 0.35, "grad_norm": 0.7280829548835754, "learning_rate": 1.47310365546966e-05, "loss": 2.153, "step": 10642 }, { "epoch": 0.35, "grad_norm": 0.7090317606925964, "learning_rate": 1.4730100112099491e-05, "loss": 2.0744, "step": 10643 }, { "epoch": 0.35, "grad_norm": 0.7340937256813049, "learning_rate": 1.4729163616064478e-05, "loss": 2.1438, "step": 10644 }, { "epoch": 0.35, "grad_norm": 0.7132500410079956, "learning_rate": 1.4728227066602146e-05, "loss": 2.1138, "step": 10645 }, { "epoch": 0.35, "grad_norm": 0.7084729075431824, "learning_rate": 1.4727290463723076e-05, "loss": 2.1024, "step": 10646 }, { "epoch": 0.35, "grad_norm": 0.7285545468330383, "learning_rate": 1.472635380743785e-05, "loss": 2.1535, "step": 10647 }, { "epoch": 0.35, "grad_norm": 0.7258221507072449, "learning_rate": 1.4725417097757046e-05, "loss": 2.0476, "step": 10648 }, { "epoch": 0.35, "grad_norm": 0.7319920063018799, "learning_rate": 1.4724480334691252e-05, "loss": 2.1264, "step": 10649 }, { "epoch": 0.35, "grad_norm": 0.7296038866043091, "learning_rate": 1.4723543518251044e-05, "loss": 2.1207, "step": 10650 }, { "epoch": 0.35, "grad_norm": 0.7396849989891052, "learning_rate": 1.4722606648447013e-05, "loss": 2.1328, "step": 10651 }, { "epoch": 0.35, "grad_norm": 0.7211018800735474, "learning_rate": 1.4721669725289742e-05, "loss": 2.0991, "step": 10652 }, { "epoch": 0.35, "grad_norm": 0.7875257730484009, "learning_rate": 1.4720732748789808e-05, "loss": 2.1144, "step": 10653 }, { "epoch": 0.35, "grad_norm": 0.7283000349998474, "learning_rate": 1.4719795718957807e-05, "loss": 2.0817, "step": 10654 }, { "epoch": 0.35, "grad_norm": 0.711953341960907, "learning_rate": 1.4718858635804317e-05, "loss": 2.1122, "step": 10655 }, { "epoch": 0.35, "grad_norm": 0.7097283601760864, "learning_rate": 1.4717921499339928e-05, "loss": 2.1411, "step": 10656 }, { "epoch": 0.35, "grad_norm": 0.7133876085281372, "learning_rate": 1.4716984309575229e-05, "loss": 2.0972, "step": 10657 }, { "epoch": 0.35, "grad_norm": 0.7025846838951111, "learning_rate": 1.4716047066520802e-05, "loss": 2.1515, "step": 10658 }, { "epoch": 0.35, "grad_norm": 0.694673478603363, "learning_rate": 1.4715109770187242e-05, "loss": 2.1083, "step": 10659 }, { "epoch": 0.35, "grad_norm": 0.7403780221939087, "learning_rate": 1.4714172420585134e-05, "loss": 2.1976, "step": 10660 }, { "epoch": 0.35, "grad_norm": 0.7139356136322021, "learning_rate": 1.4713235017725066e-05, "loss": 2.1342, "step": 10661 }, { "epoch": 0.35, "grad_norm": 0.740252673625946, "learning_rate": 1.4712297561617637e-05, "loss": 2.1538, "step": 10662 }, { "epoch": 0.35, "grad_norm": 0.7596855759620667, "learning_rate": 1.471136005227343e-05, "loss": 2.1501, "step": 10663 }, { "epoch": 0.35, "grad_norm": 0.7212527990341187, "learning_rate": 1.4710422489703036e-05, "loss": 2.1039, "step": 10664 }, { "epoch": 0.35, "grad_norm": 0.6830949783325195, "learning_rate": 1.470948487391705e-05, "loss": 2.1044, "step": 10665 }, { "epoch": 0.35, "grad_norm": 0.6985042691230774, "learning_rate": 1.4708547204926064e-05, "loss": 2.1154, "step": 10666 }, { "epoch": 0.35, "grad_norm": 0.7282301783561707, "learning_rate": 1.4707609482740673e-05, "loss": 2.0644, "step": 10667 }, { "epoch": 0.35, "grad_norm": 0.7447898387908936, "learning_rate": 1.4706671707371466e-05, "loss": 2.0883, "step": 10668 }, { "epoch": 0.35, "grad_norm": 0.7376669049263, "learning_rate": 1.4705733878829042e-05, "loss": 2.1139, "step": 10669 }, { "epoch": 0.35, "grad_norm": 0.7449215054512024, "learning_rate": 1.4704795997123995e-05, "loss": 2.0451, "step": 10670 }, { "epoch": 0.36, "grad_norm": 0.7168874740600586, "learning_rate": 1.470385806226692e-05, "loss": 2.0914, "step": 10671 }, { "epoch": 0.36, "grad_norm": 0.7354474663734436, "learning_rate": 1.4702920074268412e-05, "loss": 2.1448, "step": 10672 }, { "epoch": 0.36, "grad_norm": 0.7651484608650208, "learning_rate": 1.4701982033139071e-05, "loss": 2.1086, "step": 10673 }, { "epoch": 0.36, "grad_norm": 0.7756704688072205, "learning_rate": 1.4701043938889492e-05, "loss": 2.1092, "step": 10674 }, { "epoch": 0.36, "grad_norm": 0.7392194271087646, "learning_rate": 1.470010579153027e-05, "loss": 2.1041, "step": 10675 }, { "epoch": 0.36, "grad_norm": 0.7607390284538269, "learning_rate": 1.4699167591072013e-05, "loss": 2.1497, "step": 10676 }, { "epoch": 0.36, "grad_norm": 0.8281517624855042, "learning_rate": 1.4698229337525311e-05, "loss": 2.0652, "step": 10677 }, { "epoch": 0.36, "grad_norm": 0.7029891014099121, "learning_rate": 1.4697291030900771e-05, "loss": 2.2133, "step": 10678 }, { "epoch": 0.36, "grad_norm": 0.6847342848777771, "learning_rate": 1.4696352671208986e-05, "loss": 2.0701, "step": 10679 }, { "epoch": 0.36, "grad_norm": 0.714811384677887, "learning_rate": 1.4695414258460564e-05, "loss": 2.125, "step": 10680 }, { "epoch": 0.36, "grad_norm": 0.7215642929077148, "learning_rate": 1.4694475792666098e-05, "loss": 2.0745, "step": 10681 }, { "epoch": 0.36, "grad_norm": 0.7549083232879639, "learning_rate": 1.4693537273836201e-05, "loss": 2.0591, "step": 10682 }, { "epoch": 0.36, "grad_norm": 0.7193545699119568, "learning_rate": 1.4692598701981469e-05, "loss": 2.1202, "step": 10683 }, { "epoch": 0.36, "grad_norm": 0.7159538865089417, "learning_rate": 1.4691660077112507e-05, "loss": 2.1022, "step": 10684 }, { "epoch": 0.36, "grad_norm": 0.7144292593002319, "learning_rate": 1.469072139923992e-05, "loss": 2.1217, "step": 10685 }, { "epoch": 0.36, "grad_norm": 0.7088968753814697, "learning_rate": 1.4689782668374309e-05, "loss": 2.1863, "step": 10686 }, { "epoch": 0.36, "grad_norm": 0.7178087830543518, "learning_rate": 1.4688843884526285e-05, "loss": 2.0307, "step": 10687 }, { "epoch": 0.36, "grad_norm": 0.7296062707901001, "learning_rate": 1.4687905047706448e-05, "loss": 2.0879, "step": 10688 }, { "epoch": 0.36, "grad_norm": 0.699737548828125, "learning_rate": 1.4686966157925409e-05, "loss": 2.0873, "step": 10689 }, { "epoch": 0.36, "grad_norm": 0.7272664308547974, "learning_rate": 1.4686027215193772e-05, "loss": 2.075, "step": 10690 }, { "epoch": 0.36, "grad_norm": 0.7221621870994568, "learning_rate": 1.4685088219522147e-05, "loss": 2.0958, "step": 10691 }, { "epoch": 0.36, "grad_norm": 0.7861818671226501, "learning_rate": 1.468414917092114e-05, "loss": 2.1617, "step": 10692 }, { "epoch": 0.36, "grad_norm": 0.7191817164421082, "learning_rate": 1.4683210069401361e-05, "loss": 2.0327, "step": 10693 }, { "epoch": 0.36, "grad_norm": 0.711847722530365, "learning_rate": 1.468227091497342e-05, "loss": 2.0676, "step": 10694 }, { "epoch": 0.36, "grad_norm": 0.7502296566963196, "learning_rate": 1.4681331707647925e-05, "loss": 2.1205, "step": 10695 }, { "epoch": 0.36, "grad_norm": 0.7163909673690796, "learning_rate": 1.4680392447435492e-05, "loss": 2.0897, "step": 10696 }, { "epoch": 0.36, "grad_norm": 0.7378166913986206, "learning_rate": 1.4679453134346722e-05, "loss": 2.1559, "step": 10697 }, { "epoch": 0.36, "grad_norm": 0.760080873966217, "learning_rate": 1.4678513768392237e-05, "loss": 2.0706, "step": 10698 }, { "epoch": 0.36, "grad_norm": 0.7390359044075012, "learning_rate": 1.4677574349582644e-05, "loss": 2.1519, "step": 10699 }, { "epoch": 0.36, "grad_norm": 0.7275302410125732, "learning_rate": 1.4676634877928554e-05, "loss": 2.1615, "step": 10700 }, { "epoch": 0.36, "grad_norm": 0.715356707572937, "learning_rate": 1.4675695353440591e-05, "loss": 2.0973, "step": 10701 }, { "epoch": 0.36, "grad_norm": 0.7154173254966736, "learning_rate": 1.4674755776129357e-05, "loss": 2.0608, "step": 10702 }, { "epoch": 0.36, "grad_norm": 0.7507113218307495, "learning_rate": 1.4673816146005473e-05, "loss": 2.0811, "step": 10703 }, { "epoch": 0.36, "grad_norm": 0.7092428207397461, "learning_rate": 1.4672876463079555e-05, "loss": 2.11, "step": 10704 }, { "epoch": 0.36, "grad_norm": 0.7321333289146423, "learning_rate": 1.4671936727362214e-05, "loss": 2.1619, "step": 10705 }, { "epoch": 0.36, "grad_norm": 0.7313870191574097, "learning_rate": 1.4670996938864073e-05, "loss": 2.1284, "step": 10706 }, { "epoch": 0.36, "grad_norm": 0.7189551591873169, "learning_rate": 1.4670057097595747e-05, "loss": 2.1673, "step": 10707 }, { "epoch": 0.36, "grad_norm": 0.7239701747894287, "learning_rate": 1.4669117203567848e-05, "loss": 2.1397, "step": 10708 }, { "epoch": 0.36, "grad_norm": 0.7288504838943481, "learning_rate": 1.4668177256791003e-05, "loss": 2.1879, "step": 10709 }, { "epoch": 0.36, "grad_norm": 0.7403784394264221, "learning_rate": 1.4667237257275826e-05, "loss": 2.1761, "step": 10710 }, { "epoch": 0.36, "grad_norm": 0.6891387104988098, "learning_rate": 1.4666297205032935e-05, "loss": 2.0966, "step": 10711 }, { "epoch": 0.36, "grad_norm": 0.7422135472297668, "learning_rate": 1.466535710007296e-05, "loss": 2.1045, "step": 10712 }, { "epoch": 0.36, "grad_norm": 0.7195985913276672, "learning_rate": 1.466441694240651e-05, "loss": 2.0519, "step": 10713 }, { "epoch": 0.36, "grad_norm": 0.7184195518493652, "learning_rate": 1.466347673204421e-05, "loss": 2.1365, "step": 10714 }, { "epoch": 0.36, "grad_norm": 0.7611271142959595, "learning_rate": 1.4662536468996684e-05, "loss": 2.1449, "step": 10715 }, { "epoch": 0.36, "grad_norm": 0.7371851205825806, "learning_rate": 1.4661596153274555e-05, "loss": 2.1129, "step": 10716 }, { "epoch": 0.36, "grad_norm": 0.7106055021286011, "learning_rate": 1.4660655784888442e-05, "loss": 2.1347, "step": 10717 }, { "epoch": 0.36, "grad_norm": 0.7725675702095032, "learning_rate": 1.4659715363848976e-05, "loss": 2.1875, "step": 10718 }, { "epoch": 0.36, "grad_norm": 0.6984225511550903, "learning_rate": 1.4658774890166768e-05, "loss": 2.169, "step": 10719 }, { "epoch": 0.36, "grad_norm": 0.7144049406051636, "learning_rate": 1.465783436385246e-05, "loss": 2.0814, "step": 10720 }, { "epoch": 0.36, "grad_norm": 0.7294212579727173, "learning_rate": 1.4656893784916668e-05, "loss": 2.0594, "step": 10721 }, { "epoch": 0.36, "grad_norm": 0.7128740549087524, "learning_rate": 1.4655953153370014e-05, "loss": 2.1168, "step": 10722 }, { "epoch": 0.36, "grad_norm": 0.7078655362129211, "learning_rate": 1.4655012469223136e-05, "loss": 2.0562, "step": 10723 }, { "epoch": 0.36, "grad_norm": 0.7319775819778442, "learning_rate": 1.4654071732486652e-05, "loss": 2.1196, "step": 10724 }, { "epoch": 0.36, "grad_norm": 0.7235652804374695, "learning_rate": 1.4653130943171194e-05, "loss": 2.1462, "step": 10725 }, { "epoch": 0.36, "grad_norm": 0.7009215950965881, "learning_rate": 1.465219010128739e-05, "loss": 2.1263, "step": 10726 }, { "epoch": 0.36, "grad_norm": 0.7108410596847534, "learning_rate": 1.4651249206845868e-05, "loss": 2.0909, "step": 10727 }, { "epoch": 0.36, "grad_norm": 0.7604007124900818, "learning_rate": 1.4650308259857258e-05, "loss": 2.1175, "step": 10728 }, { "epoch": 0.36, "grad_norm": 0.746516227722168, "learning_rate": 1.4649367260332192e-05, "loss": 2.111, "step": 10729 }, { "epoch": 0.36, "grad_norm": 0.7076823711395264, "learning_rate": 1.4648426208281297e-05, "loss": 2.0996, "step": 10730 }, { "epoch": 0.36, "grad_norm": 0.7431108951568604, "learning_rate": 1.4647485103715207e-05, "loss": 2.0979, "step": 10731 }, { "epoch": 0.36, "grad_norm": 0.7201970219612122, "learning_rate": 1.4646543946644558e-05, "loss": 2.0842, "step": 10732 }, { "epoch": 0.36, "grad_norm": 0.7544375658035278, "learning_rate": 1.4645602737079974e-05, "loss": 2.1014, "step": 10733 }, { "epoch": 0.36, "grad_norm": 0.7263185381889343, "learning_rate": 1.4644661475032096e-05, "loss": 2.1331, "step": 10734 }, { "epoch": 0.36, "grad_norm": 0.730281412601471, "learning_rate": 1.4643720160511554e-05, "loss": 2.1958, "step": 10735 }, { "epoch": 0.36, "grad_norm": 0.7261035442352295, "learning_rate": 1.4642778793528983e-05, "loss": 2.2242, "step": 10736 }, { "epoch": 0.36, "grad_norm": 0.7320296168327332, "learning_rate": 1.4641837374095019e-05, "loss": 2.0946, "step": 10737 }, { "epoch": 0.36, "grad_norm": 0.7540250420570374, "learning_rate": 1.4640895902220293e-05, "loss": 2.1462, "step": 10738 }, { "epoch": 0.36, "grad_norm": 0.7177721261978149, "learning_rate": 1.4639954377915448e-05, "loss": 2.1154, "step": 10739 }, { "epoch": 0.36, "grad_norm": 0.703453004360199, "learning_rate": 1.4639012801191119e-05, "loss": 2.1231, "step": 10740 }, { "epoch": 0.36, "grad_norm": 0.7644084692001343, "learning_rate": 1.4638071172057937e-05, "loss": 2.1142, "step": 10741 }, { "epoch": 0.36, "grad_norm": 0.7115994691848755, "learning_rate": 1.463712949052655e-05, "loss": 2.0805, "step": 10742 }, { "epoch": 0.36, "grad_norm": 0.7387803792953491, "learning_rate": 1.4636187756607591e-05, "loss": 2.1379, "step": 10743 }, { "epoch": 0.36, "grad_norm": 0.7143697142601013, "learning_rate": 1.4635245970311696e-05, "loss": 2.0551, "step": 10744 }, { "epoch": 0.36, "grad_norm": 0.7333706021308899, "learning_rate": 1.4634304131649516e-05, "loss": 2.1594, "step": 10745 }, { "epoch": 0.36, "grad_norm": 0.7061779499053955, "learning_rate": 1.4633362240631675e-05, "loss": 2.0513, "step": 10746 }, { "epoch": 0.36, "grad_norm": 0.7650803327560425, "learning_rate": 1.463242029726883e-05, "loss": 2.1049, "step": 10747 }, { "epoch": 0.36, "grad_norm": 0.7329779267311096, "learning_rate": 1.463147830157161e-05, "loss": 2.156, "step": 10748 }, { "epoch": 0.36, "grad_norm": 0.7555022835731506, "learning_rate": 1.4630536253550666e-05, "loss": 2.1121, "step": 10749 }, { "epoch": 0.36, "grad_norm": 0.7738577723503113, "learning_rate": 1.4629594153216635e-05, "loss": 2.1724, "step": 10750 }, { "epoch": 0.36, "grad_norm": 0.7368938326835632, "learning_rate": 1.4628652000580163e-05, "loss": 2.1247, "step": 10751 }, { "epoch": 0.36, "grad_norm": 0.7199434041976929, "learning_rate": 1.4627709795651895e-05, "loss": 2.0774, "step": 10752 }, { "epoch": 0.36, "grad_norm": 0.7110752463340759, "learning_rate": 1.4626767538442473e-05, "loss": 2.1559, "step": 10753 }, { "epoch": 0.36, "grad_norm": 0.7562914490699768, "learning_rate": 1.4625825228962543e-05, "loss": 2.1231, "step": 10754 }, { "epoch": 0.36, "grad_norm": 0.7123403549194336, "learning_rate": 1.4624882867222749e-05, "loss": 2.0972, "step": 10755 }, { "epoch": 0.36, "grad_norm": 0.7152132391929626, "learning_rate": 1.4623940453233742e-05, "loss": 2.084, "step": 10756 }, { "epoch": 0.36, "grad_norm": 0.7218719124794006, "learning_rate": 1.4622997987006162e-05, "loss": 2.2151, "step": 10757 }, { "epoch": 0.36, "grad_norm": 0.7454134821891785, "learning_rate": 1.4622055468550663e-05, "loss": 2.1839, "step": 10758 }, { "epoch": 0.36, "grad_norm": 0.751155436038971, "learning_rate": 1.462111289787789e-05, "loss": 2.0662, "step": 10759 }, { "epoch": 0.36, "grad_norm": 0.7157754898071289, "learning_rate": 1.4620170274998488e-05, "loss": 2.1229, "step": 10760 }, { "epoch": 0.36, "grad_norm": 0.7539533972740173, "learning_rate": 1.4619227599923114e-05, "loss": 2.0677, "step": 10761 }, { "epoch": 0.36, "grad_norm": 0.7011374235153198, "learning_rate": 1.4618284872662412e-05, "loss": 2.1359, "step": 10762 }, { "epoch": 0.36, "grad_norm": 0.7207832336425781, "learning_rate": 1.4617342093227034e-05, "loss": 2.1337, "step": 10763 }, { "epoch": 0.36, "grad_norm": 0.7410914897918701, "learning_rate": 1.461639926162763e-05, "loss": 2.1402, "step": 10764 }, { "epoch": 0.36, "grad_norm": 0.7477924227714539, "learning_rate": 1.4615456377874854e-05, "loss": 2.1298, "step": 10765 }, { "epoch": 0.36, "grad_norm": 0.7417619824409485, "learning_rate": 1.4614513441979357e-05, "loss": 2.0995, "step": 10766 }, { "epoch": 0.36, "grad_norm": 0.7469790577888489, "learning_rate": 1.4613570453951794e-05, "loss": 2.0885, "step": 10767 }, { "epoch": 0.36, "grad_norm": 0.6892023086547852, "learning_rate": 1.4612627413802808e-05, "loss": 2.1173, "step": 10768 }, { "epoch": 0.36, "grad_norm": 0.7399068474769592, "learning_rate": 1.4611684321543069e-05, "loss": 2.1545, "step": 10769 }, { "epoch": 0.36, "grad_norm": 0.7480810880661011, "learning_rate": 1.461074117718322e-05, "loss": 2.0427, "step": 10770 }, { "epoch": 0.36, "grad_norm": 0.7500886917114258, "learning_rate": 1.460979798073392e-05, "loss": 2.1223, "step": 10771 }, { "epoch": 0.36, "grad_norm": 0.6938839554786682, "learning_rate": 1.4608854732205824e-05, "loss": 2.0547, "step": 10772 }, { "epoch": 0.36, "grad_norm": 0.7379195690155029, "learning_rate": 1.4607911431609587e-05, "loss": 2.139, "step": 10773 }, { "epoch": 0.36, "grad_norm": 0.7262565493583679, "learning_rate": 1.4606968078955868e-05, "loss": 2.0891, "step": 10774 }, { "epoch": 0.36, "grad_norm": 0.7446302771568298, "learning_rate": 1.4606024674255324e-05, "loss": 2.1435, "step": 10775 }, { "epoch": 0.36, "grad_norm": 0.749920666217804, "learning_rate": 1.4605081217518611e-05, "loss": 2.1642, "step": 10776 }, { "epoch": 0.36, "grad_norm": 0.7277040481567383, "learning_rate": 1.460413770875639e-05, "loss": 2.13, "step": 10777 }, { "epoch": 0.36, "grad_norm": 0.6896201968193054, "learning_rate": 1.4603194147979322e-05, "loss": 2.1219, "step": 10778 }, { "epoch": 0.36, "grad_norm": 0.706484317779541, "learning_rate": 1.460225053519806e-05, "loss": 2.1169, "step": 10779 }, { "epoch": 0.36, "grad_norm": 0.743334174156189, "learning_rate": 1.460130687042327e-05, "loss": 2.1258, "step": 10780 }, { "epoch": 0.36, "grad_norm": 0.7265442609786987, "learning_rate": 1.4600363153665613e-05, "loss": 2.1119, "step": 10781 }, { "epoch": 0.36, "grad_norm": 0.7329130172729492, "learning_rate": 1.4599419384935749e-05, "loss": 2.0479, "step": 10782 }, { "epoch": 0.36, "grad_norm": 0.7100732326507568, "learning_rate": 1.4598475564244341e-05, "loss": 2.1189, "step": 10783 }, { "epoch": 0.36, "grad_norm": 0.7102295160293579, "learning_rate": 1.459753169160205e-05, "loss": 2.0738, "step": 10784 }, { "epoch": 0.36, "grad_norm": 0.7193012833595276, "learning_rate": 1.4596587767019538e-05, "loss": 2.0452, "step": 10785 }, { "epoch": 0.36, "grad_norm": 0.7266778349876404, "learning_rate": 1.4595643790507475e-05, "loss": 2.1064, "step": 10786 }, { "epoch": 0.36, "grad_norm": 0.7419440150260925, "learning_rate": 1.4594699762076518e-05, "loss": 2.0794, "step": 10787 }, { "epoch": 0.36, "grad_norm": 0.7537574172019958, "learning_rate": 1.4593755681737338e-05, "loss": 2.0985, "step": 10788 }, { "epoch": 0.36, "grad_norm": 0.724121630191803, "learning_rate": 1.45928115495006e-05, "loss": 2.1709, "step": 10789 }, { "epoch": 0.36, "grad_norm": 0.7019773721694946, "learning_rate": 1.4591867365376965e-05, "loss": 2.164, "step": 10790 }, { "epoch": 0.36, "grad_norm": 0.7442371249198914, "learning_rate": 1.459092312937711e-05, "loss": 2.0974, "step": 10791 }, { "epoch": 0.36, "grad_norm": 0.7568175792694092, "learning_rate": 1.4589978841511692e-05, "loss": 2.1592, "step": 10792 }, { "epoch": 0.36, "grad_norm": 0.7406652569770813, "learning_rate": 1.4589034501791381e-05, "loss": 2.1429, "step": 10793 }, { "epoch": 0.36, "grad_norm": 0.7298339605331421, "learning_rate": 1.4588090110226852e-05, "loss": 2.1166, "step": 10794 }, { "epoch": 0.36, "grad_norm": 0.6889939308166504, "learning_rate": 1.4587145666828766e-05, "loss": 2.1064, "step": 10795 }, { "epoch": 0.36, "grad_norm": 0.7417524456977844, "learning_rate": 1.4586201171607796e-05, "loss": 2.1753, "step": 10796 }, { "epoch": 0.36, "grad_norm": 0.7101824283599854, "learning_rate": 1.4585256624574618e-05, "loss": 2.0869, "step": 10797 }, { "epoch": 0.36, "grad_norm": 0.7211170792579651, "learning_rate": 1.4584312025739893e-05, "loss": 2.1187, "step": 10798 }, { "epoch": 0.36, "grad_norm": 0.7860206961631775, "learning_rate": 1.4583367375114299e-05, "loss": 2.1442, "step": 10799 }, { "epoch": 0.36, "grad_norm": 0.7377907037734985, "learning_rate": 1.458242267270851e-05, "loss": 2.2027, "step": 10800 }, { "epoch": 0.36, "grad_norm": 0.6976315379142761, "learning_rate": 1.4581477918533188e-05, "loss": 2.08, "step": 10801 }, { "epoch": 0.36, "grad_norm": 0.7417261600494385, "learning_rate": 1.458053311259902e-05, "loss": 2.0475, "step": 10802 }, { "epoch": 0.36, "grad_norm": 0.7454881072044373, "learning_rate": 1.457958825491667e-05, "loss": 2.0978, "step": 10803 }, { "epoch": 0.36, "grad_norm": 0.7373024821281433, "learning_rate": 1.4578643345496815e-05, "loss": 2.1205, "step": 10804 }, { "epoch": 0.36, "grad_norm": 0.749143123626709, "learning_rate": 1.4577698384350135e-05, "loss": 2.1054, "step": 10805 }, { "epoch": 0.36, "grad_norm": 0.7391125559806824, "learning_rate": 1.4576753371487297e-05, "loss": 2.1247, "step": 10806 }, { "epoch": 0.36, "grad_norm": 0.8038753867149353, "learning_rate": 1.4575808306918984e-05, "loss": 2.063, "step": 10807 }, { "epoch": 0.36, "grad_norm": 0.7392237186431885, "learning_rate": 1.457486319065587e-05, "loss": 2.0919, "step": 10808 }, { "epoch": 0.36, "grad_norm": 0.7688574194908142, "learning_rate": 1.4573918022708634e-05, "loss": 2.0183, "step": 10809 }, { "epoch": 0.36, "grad_norm": 0.7133106589317322, "learning_rate": 1.457297280308795e-05, "loss": 2.0748, "step": 10810 }, { "epoch": 0.36, "grad_norm": 0.728495180606842, "learning_rate": 1.4572027531804502e-05, "loss": 2.1385, "step": 10811 }, { "epoch": 0.36, "grad_norm": 0.707040011882782, "learning_rate": 1.4571082208868963e-05, "loss": 2.1198, "step": 10812 }, { "epoch": 0.36, "grad_norm": 0.7182496190071106, "learning_rate": 1.4570136834292021e-05, "loss": 2.1119, "step": 10813 }, { "epoch": 0.36, "grad_norm": 0.7041711211204529, "learning_rate": 1.456919140808435e-05, "loss": 2.081, "step": 10814 }, { "epoch": 0.36, "grad_norm": 0.7287329435348511, "learning_rate": 1.4568245930256628e-05, "loss": 2.1193, "step": 10815 }, { "epoch": 0.36, "grad_norm": 0.7133411765098572, "learning_rate": 1.4567300400819547e-05, "loss": 2.1259, "step": 10816 }, { "epoch": 0.36, "grad_norm": 0.727554976940155, "learning_rate": 1.4566354819783776e-05, "loss": 2.0984, "step": 10817 }, { "epoch": 0.36, "grad_norm": 0.688666582107544, "learning_rate": 1.4565409187160011e-05, "loss": 2.0431, "step": 10818 }, { "epoch": 0.36, "grad_norm": 0.69767165184021, "learning_rate": 1.4564463502958926e-05, "loss": 2.0929, "step": 10819 }, { "epoch": 0.36, "grad_norm": 0.6941933631896973, "learning_rate": 1.4563517767191203e-05, "loss": 2.0815, "step": 10820 }, { "epoch": 0.36, "grad_norm": 0.7267068028450012, "learning_rate": 1.4562571979867536e-05, "loss": 2.0718, "step": 10821 }, { "epoch": 0.36, "grad_norm": 0.7111235857009888, "learning_rate": 1.4561626140998606e-05, "loss": 2.1077, "step": 10822 }, { "epoch": 0.36, "grad_norm": 0.779595136642456, "learning_rate": 1.4560680250595092e-05, "loss": 2.1109, "step": 10823 }, { "epoch": 0.36, "grad_norm": 0.7360061407089233, "learning_rate": 1.455973430866769e-05, "loss": 2.1407, "step": 10824 }, { "epoch": 0.36, "grad_norm": 0.745019257068634, "learning_rate": 1.455878831522708e-05, "loss": 2.1865, "step": 10825 }, { "epoch": 0.36, "grad_norm": 0.7326734066009521, "learning_rate": 1.455784227028395e-05, "loss": 2.135, "step": 10826 }, { "epoch": 0.36, "grad_norm": 0.725932776927948, "learning_rate": 1.4556896173848991e-05, "loss": 2.0618, "step": 10827 }, { "epoch": 0.36, "grad_norm": 0.7488042712211609, "learning_rate": 1.455595002593289e-05, "loss": 2.1003, "step": 10828 }, { "epoch": 0.36, "grad_norm": 0.7197643518447876, "learning_rate": 1.4555003826546334e-05, "loss": 2.0368, "step": 10829 }, { "epoch": 0.36, "grad_norm": 0.7350213527679443, "learning_rate": 1.4554057575700016e-05, "loss": 2.1598, "step": 10830 }, { "epoch": 0.36, "grad_norm": 0.7123099565505981, "learning_rate": 1.4553111273404624e-05, "loss": 2.1037, "step": 10831 }, { "epoch": 0.36, "grad_norm": 0.7257305383682251, "learning_rate": 1.4552164919670848e-05, "loss": 2.1229, "step": 10832 }, { "epoch": 0.36, "grad_norm": 0.7463681101799011, "learning_rate": 1.4551218514509382e-05, "loss": 2.1104, "step": 10833 }, { "epoch": 0.36, "grad_norm": 0.7100160121917725, "learning_rate": 1.4550272057930915e-05, "loss": 2.1429, "step": 10834 }, { "epoch": 0.36, "grad_norm": 0.7083027958869934, "learning_rate": 1.4549325549946143e-05, "loss": 2.0732, "step": 10835 }, { "epoch": 0.36, "grad_norm": 0.7189925312995911, "learning_rate": 1.4548378990565754e-05, "loss": 2.0574, "step": 10836 }, { "epoch": 0.36, "grad_norm": 0.7410835027694702, "learning_rate": 1.4547432379800446e-05, "loss": 2.1301, "step": 10837 }, { "epoch": 0.36, "grad_norm": 0.8349276781082153, "learning_rate": 1.4546485717660914e-05, "loss": 2.1316, "step": 10838 }, { "epoch": 0.36, "grad_norm": 0.7540861368179321, "learning_rate": 1.454553900415785e-05, "loss": 2.1142, "step": 10839 }, { "epoch": 0.36, "grad_norm": 0.71322101354599, "learning_rate": 1.4544592239301949e-05, "loss": 2.0422, "step": 10840 }, { "epoch": 0.36, "grad_norm": 0.7297841310501099, "learning_rate": 1.454364542310391e-05, "loss": 2.0814, "step": 10841 }, { "epoch": 0.36, "grad_norm": 0.7599559426307678, "learning_rate": 1.4542698555574428e-05, "loss": 2.0643, "step": 10842 }, { "epoch": 0.36, "grad_norm": 0.7415245175361633, "learning_rate": 1.4541751636724197e-05, "loss": 2.1242, "step": 10843 }, { "epoch": 0.36, "grad_norm": 0.7150170803070068, "learning_rate": 1.4540804666563922e-05, "loss": 2.1166, "step": 10844 }, { "epoch": 0.36, "grad_norm": 0.6866410374641418, "learning_rate": 1.4539857645104294e-05, "loss": 2.0342, "step": 10845 }, { "epoch": 0.36, "grad_norm": 0.7315654754638672, "learning_rate": 1.4538910572356015e-05, "loss": 2.1193, "step": 10846 }, { "epoch": 0.36, "grad_norm": 0.7464458346366882, "learning_rate": 1.4537963448329785e-05, "loss": 2.147, "step": 10847 }, { "epoch": 0.36, "grad_norm": 0.7058669328689575, "learning_rate": 1.4537016273036304e-05, "loss": 2.1424, "step": 10848 }, { "epoch": 0.36, "grad_norm": 0.7047792077064514, "learning_rate": 1.4536069046486274e-05, "loss": 2.061, "step": 10849 }, { "epoch": 0.36, "grad_norm": 0.7293535470962524, "learning_rate": 1.453512176869039e-05, "loss": 2.1038, "step": 10850 }, { "epoch": 0.36, "grad_norm": 0.7221577167510986, "learning_rate": 1.453417443965936e-05, "loss": 2.0948, "step": 10851 }, { "epoch": 0.36, "grad_norm": 0.708378255367279, "learning_rate": 1.4533227059403885e-05, "loss": 2.0588, "step": 10852 }, { "epoch": 0.36, "grad_norm": 0.7083930969238281, "learning_rate": 1.4532279627934667e-05, "loss": 2.1745, "step": 10853 }, { "epoch": 0.36, "grad_norm": 0.7348835468292236, "learning_rate": 1.4531332145262412e-05, "loss": 2.1056, "step": 10854 }, { "epoch": 0.36, "grad_norm": 0.7022838592529297, "learning_rate": 1.453038461139782e-05, "loss": 2.0842, "step": 10855 }, { "epoch": 0.36, "grad_norm": 0.6926270723342896, "learning_rate": 1.4529437026351596e-05, "loss": 2.1476, "step": 10856 }, { "epoch": 0.36, "grad_norm": 0.7325962781906128, "learning_rate": 1.4528489390134448e-05, "loss": 2.1105, "step": 10857 }, { "epoch": 0.36, "grad_norm": 0.7328006029129028, "learning_rate": 1.4527541702757082e-05, "loss": 2.1299, "step": 10858 }, { "epoch": 0.36, "grad_norm": 0.6998463273048401, "learning_rate": 1.4526593964230203e-05, "loss": 2.0754, "step": 10859 }, { "epoch": 0.36, "grad_norm": 0.7028898596763611, "learning_rate": 1.4525646174564518e-05, "loss": 2.1532, "step": 10860 }, { "epoch": 0.36, "grad_norm": 0.7503082752227783, "learning_rate": 1.4524698333770735e-05, "loss": 2.1873, "step": 10861 }, { "epoch": 0.36, "grad_norm": 0.7635589838027954, "learning_rate": 1.4523750441859563e-05, "loss": 2.1596, "step": 10862 }, { "epoch": 0.36, "grad_norm": 0.7186018228530884, "learning_rate": 1.4522802498841709e-05, "loss": 2.1475, "step": 10863 }, { "epoch": 0.36, "grad_norm": 0.7263177633285522, "learning_rate": 1.4521854504727882e-05, "loss": 2.115, "step": 10864 }, { "epoch": 0.36, "grad_norm": 0.7370553016662598, "learning_rate": 1.4520906459528792e-05, "loss": 2.1552, "step": 10865 }, { "epoch": 0.36, "grad_norm": 0.7102876901626587, "learning_rate": 1.4519958363255151e-05, "loss": 2.1163, "step": 10866 }, { "epoch": 0.36, "grad_norm": 0.7208355665206909, "learning_rate": 1.451901021591767e-05, "loss": 2.0658, "step": 10867 }, { "epoch": 0.36, "grad_norm": 0.7425570487976074, "learning_rate": 1.4518062017527062e-05, "loss": 2.1783, "step": 10868 }, { "epoch": 0.36, "grad_norm": 0.7198006510734558, "learning_rate": 1.4517113768094033e-05, "loss": 2.172, "step": 10869 }, { "epoch": 0.36, "grad_norm": 0.7048671245574951, "learning_rate": 1.4516165467629302e-05, "loss": 2.1586, "step": 10870 }, { "epoch": 0.36, "grad_norm": 0.7216575145721436, "learning_rate": 1.4515217116143583e-05, "loss": 2.1169, "step": 10871 }, { "epoch": 0.36, "grad_norm": 0.7407144904136658, "learning_rate": 1.4514268713647584e-05, "loss": 2.1412, "step": 10872 }, { "epoch": 0.36, "grad_norm": 0.7324246168136597, "learning_rate": 1.4513320260152024e-05, "loss": 2.1501, "step": 10873 }, { "epoch": 0.36, "grad_norm": 0.7167472243309021, "learning_rate": 1.4512371755667616e-05, "loss": 2.1128, "step": 10874 }, { "epoch": 0.36, "grad_norm": 0.7325664162635803, "learning_rate": 1.4511423200205074e-05, "loss": 2.0935, "step": 10875 }, { "epoch": 0.36, "grad_norm": 0.718949556350708, "learning_rate": 1.451047459377512e-05, "loss": 2.1462, "step": 10876 }, { "epoch": 0.36, "grad_norm": 0.7398171424865723, "learning_rate": 1.4509525936388466e-05, "loss": 2.1264, "step": 10877 }, { "epoch": 0.36, "grad_norm": 0.7043357491493225, "learning_rate": 1.450857722805583e-05, "loss": 2.1135, "step": 10878 }, { "epoch": 0.36, "grad_norm": 0.7486860752105713, "learning_rate": 1.4507628468787935e-05, "loss": 2.0986, "step": 10879 }, { "epoch": 0.36, "grad_norm": 0.7363168597221375, "learning_rate": 1.4506679658595491e-05, "loss": 2.1759, "step": 10880 }, { "epoch": 0.36, "grad_norm": 0.7130545973777771, "learning_rate": 1.4505730797489224e-05, "loss": 2.0961, "step": 10881 }, { "epoch": 0.36, "grad_norm": 0.7040376663208008, "learning_rate": 1.450478188547985e-05, "loss": 2.1021, "step": 10882 }, { "epoch": 0.36, "grad_norm": 0.7261214852333069, "learning_rate": 1.4503832922578089e-05, "loss": 2.1328, "step": 10883 }, { "epoch": 0.36, "grad_norm": 0.7242673635482788, "learning_rate": 1.4502883908794668e-05, "loss": 2.1511, "step": 10884 }, { "epoch": 0.36, "grad_norm": 0.7208804488182068, "learning_rate": 1.4501934844140298e-05, "loss": 2.0874, "step": 10885 }, { "epoch": 0.36, "grad_norm": 0.7144356369972229, "learning_rate": 1.450098572862571e-05, "loss": 2.1028, "step": 10886 }, { "epoch": 0.36, "grad_norm": 0.7056716084480286, "learning_rate": 1.450003656226162e-05, "loss": 2.1162, "step": 10887 }, { "epoch": 0.36, "grad_norm": 0.7253352403640747, "learning_rate": 1.4499087345058755e-05, "loss": 2.1318, "step": 10888 }, { "epoch": 0.36, "grad_norm": 0.7326635718345642, "learning_rate": 1.449813807702784e-05, "loss": 2.1288, "step": 10889 }, { "epoch": 0.36, "grad_norm": 0.7527998685836792, "learning_rate": 1.4497188758179594e-05, "loss": 2.0779, "step": 10890 }, { "epoch": 0.36, "grad_norm": 0.7145019769668579, "learning_rate": 1.4496239388524747e-05, "loss": 2.0462, "step": 10891 }, { "epoch": 0.36, "grad_norm": 0.7095299363136292, "learning_rate": 1.4495289968074023e-05, "loss": 2.0985, "step": 10892 }, { "epoch": 0.36, "grad_norm": 0.7044367790222168, "learning_rate": 1.4494340496838147e-05, "loss": 2.11, "step": 10893 }, { "epoch": 0.36, "grad_norm": 0.7180089354515076, "learning_rate": 1.4493390974827844e-05, "loss": 2.065, "step": 10894 }, { "epoch": 0.36, "grad_norm": 0.7672885060310364, "learning_rate": 1.4492441402053847e-05, "loss": 2.0492, "step": 10895 }, { "epoch": 0.36, "grad_norm": 0.7067762017250061, "learning_rate": 1.4491491778526876e-05, "loss": 2.1265, "step": 10896 }, { "epoch": 0.36, "grad_norm": 0.7342500686645508, "learning_rate": 1.4490542104257665e-05, "loss": 2.0786, "step": 10897 }, { "epoch": 0.36, "grad_norm": 0.7158714532852173, "learning_rate": 1.4489592379256943e-05, "loss": 2.1077, "step": 10898 }, { "epoch": 0.36, "grad_norm": 0.7045587301254272, "learning_rate": 1.4488642603535434e-05, "loss": 2.0991, "step": 10899 }, { "epoch": 0.36, "grad_norm": 0.7314188480377197, "learning_rate": 1.4487692777103872e-05, "loss": 2.0476, "step": 10900 }, { "epoch": 0.36, "grad_norm": 0.7130773663520813, "learning_rate": 1.4486742899972987e-05, "loss": 2.0436, "step": 10901 }, { "epoch": 0.36, "grad_norm": 0.713201642036438, "learning_rate": 1.4485792972153511e-05, "loss": 2.1237, "step": 10902 }, { "epoch": 0.36, "grad_norm": 0.6957569718360901, "learning_rate": 1.4484842993656175e-05, "loss": 2.0787, "step": 10903 }, { "epoch": 0.36, "grad_norm": 0.7366880178451538, "learning_rate": 1.4483892964491714e-05, "loss": 2.0914, "step": 10904 }, { "epoch": 0.36, "grad_norm": 0.7480309009552002, "learning_rate": 1.4482942884670853e-05, "loss": 2.1052, "step": 10905 }, { "epoch": 0.36, "grad_norm": 0.7176265120506287, "learning_rate": 1.4481992754204334e-05, "loss": 2.1913, "step": 10906 }, { "epoch": 0.36, "grad_norm": 0.7319594621658325, "learning_rate": 1.4481042573102887e-05, "loss": 2.1537, "step": 10907 }, { "epoch": 0.36, "grad_norm": 0.7314065098762512, "learning_rate": 1.4480092341377246e-05, "loss": 2.1417, "step": 10908 }, { "epoch": 0.36, "grad_norm": 0.726470410823822, "learning_rate": 1.4479142059038149e-05, "loss": 2.0456, "step": 10909 }, { "epoch": 0.36, "grad_norm": 0.7410440444946289, "learning_rate": 1.447819172609633e-05, "loss": 2.1008, "step": 10910 }, { "epoch": 0.36, "grad_norm": 0.7370637655258179, "learning_rate": 1.4477241342562523e-05, "loss": 2.1506, "step": 10911 }, { "epoch": 0.36, "grad_norm": 0.7347295880317688, "learning_rate": 1.4476290908447467e-05, "loss": 2.1154, "step": 10912 }, { "epoch": 0.36, "grad_norm": 0.7187429666519165, "learning_rate": 1.4475340423761903e-05, "loss": 2.1125, "step": 10913 }, { "epoch": 0.36, "grad_norm": 0.7010026574134827, "learning_rate": 1.4474389888516562e-05, "loss": 2.0997, "step": 10914 }, { "epoch": 0.36, "grad_norm": 0.7269919514656067, "learning_rate": 1.447343930272219e-05, "loss": 2.0665, "step": 10915 }, { "epoch": 0.36, "grad_norm": 0.7168606519699097, "learning_rate": 1.4472488666389518e-05, "loss": 2.0581, "step": 10916 }, { "epoch": 0.36, "grad_norm": 0.7325286269187927, "learning_rate": 1.4471537979529298e-05, "loss": 2.0813, "step": 10917 }, { "epoch": 0.36, "grad_norm": 0.7726890444755554, "learning_rate": 1.4470587242152257e-05, "loss": 2.0679, "step": 10918 }, { "epoch": 0.36, "grad_norm": 0.742205798625946, "learning_rate": 1.4469636454269138e-05, "loss": 2.1163, "step": 10919 }, { "epoch": 0.36, "grad_norm": 0.7359945774078369, "learning_rate": 1.4468685615890692e-05, "loss": 2.1488, "step": 10920 }, { "epoch": 0.36, "grad_norm": 0.7210713028907776, "learning_rate": 1.4467734727027652e-05, "loss": 2.0359, "step": 10921 }, { "epoch": 0.36, "grad_norm": 0.7315620183944702, "learning_rate": 1.4466783787690763e-05, "loss": 2.1334, "step": 10922 }, { "epoch": 0.36, "grad_norm": 0.7648641467094421, "learning_rate": 1.446583279789077e-05, "loss": 2.0994, "step": 10923 }, { "epoch": 0.36, "grad_norm": 0.787000298500061, "learning_rate": 1.4464881757638414e-05, "loss": 2.1573, "step": 10924 }, { "epoch": 0.36, "grad_norm": 0.7458527684211731, "learning_rate": 1.4463930666944442e-05, "loss": 2.1377, "step": 10925 }, { "epoch": 0.36, "grad_norm": 0.8063718676567078, "learning_rate": 1.4462979525819598e-05, "loss": 2.1465, "step": 10926 }, { "epoch": 0.36, "grad_norm": 0.7354551553726196, "learning_rate": 1.4462028334274622e-05, "loss": 2.0684, "step": 10927 }, { "epoch": 0.36, "grad_norm": 0.7330908179283142, "learning_rate": 1.446107709232027e-05, "loss": 2.1185, "step": 10928 }, { "epoch": 0.36, "grad_norm": 0.7285526990890503, "learning_rate": 1.4460125799967281e-05, "loss": 2.139, "step": 10929 }, { "epoch": 0.36, "grad_norm": 0.7355754971504211, "learning_rate": 1.4459174457226406e-05, "loss": 2.1123, "step": 10930 }, { "epoch": 0.36, "grad_norm": 0.7244279980659485, "learning_rate": 1.4458223064108393e-05, "loss": 2.0825, "step": 10931 }, { "epoch": 0.36, "grad_norm": 0.6986587643623352, "learning_rate": 1.4457271620623985e-05, "loss": 2.0643, "step": 10932 }, { "epoch": 0.36, "grad_norm": 0.7295422554016113, "learning_rate": 1.4456320126783936e-05, "loss": 2.1732, "step": 10933 }, { "epoch": 0.36, "grad_norm": 0.72148197889328, "learning_rate": 1.4455368582598995e-05, "loss": 2.0792, "step": 10934 }, { "epoch": 0.36, "grad_norm": 0.7096263766288757, "learning_rate": 1.4454416988079907e-05, "loss": 2.0761, "step": 10935 }, { "epoch": 0.36, "grad_norm": 0.7505578994750977, "learning_rate": 1.445346534323743e-05, "loss": 2.1351, "step": 10936 }, { "epoch": 0.36, "grad_norm": 0.7266875505447388, "learning_rate": 1.4452513648082311e-05, "loss": 2.0368, "step": 10937 }, { "epoch": 0.36, "grad_norm": 0.6972452402114868, "learning_rate": 1.4451561902625299e-05, "loss": 2.1115, "step": 10938 }, { "epoch": 0.36, "grad_norm": 0.740939736366272, "learning_rate": 1.4450610106877156e-05, "loss": 2.0487, "step": 10939 }, { "epoch": 0.36, "grad_norm": 0.7329394221305847, "learning_rate": 1.4449658260848623e-05, "loss": 2.1026, "step": 10940 }, { "epoch": 0.36, "grad_norm": 0.7096993923187256, "learning_rate": 1.444870636455046e-05, "loss": 2.1233, "step": 10941 }, { "epoch": 0.36, "grad_norm": 0.7099723815917969, "learning_rate": 1.444775441799342e-05, "loss": 2.1275, "step": 10942 }, { "epoch": 0.36, "grad_norm": 0.725796103477478, "learning_rate": 1.4446802421188255e-05, "loss": 2.0754, "step": 10943 }, { "epoch": 0.36, "grad_norm": 0.7467756271362305, "learning_rate": 1.4445850374145727e-05, "loss": 2.1314, "step": 10944 }, { "epoch": 0.36, "grad_norm": 0.7312129139900208, "learning_rate": 1.4444898276876584e-05, "loss": 2.1195, "step": 10945 }, { "epoch": 0.36, "grad_norm": 0.737076997756958, "learning_rate": 1.4443946129391586e-05, "loss": 2.081, "step": 10946 }, { "epoch": 0.36, "grad_norm": 0.7481940388679504, "learning_rate": 1.4442993931701488e-05, "loss": 2.1332, "step": 10947 }, { "epoch": 0.36, "grad_norm": 0.7531542181968689, "learning_rate": 1.444204168381705e-05, "loss": 2.0704, "step": 10948 }, { "epoch": 0.36, "grad_norm": 0.7210349440574646, "learning_rate": 1.4441089385749027e-05, "loss": 2.1961, "step": 10949 }, { "epoch": 0.36, "grad_norm": 0.7450878024101257, "learning_rate": 1.444013703750818e-05, "loss": 2.1039, "step": 10950 }, { "epoch": 0.36, "grad_norm": 0.7782377004623413, "learning_rate": 1.4439184639105266e-05, "loss": 2.1753, "step": 10951 }, { "epoch": 0.36, "grad_norm": 0.7599091529846191, "learning_rate": 1.4438232190551045e-05, "loss": 2.1296, "step": 10952 }, { "epoch": 0.36, "grad_norm": 0.7225616574287415, "learning_rate": 1.443727969185628e-05, "loss": 2.0826, "step": 10953 }, { "epoch": 0.36, "grad_norm": 0.7230786085128784, "learning_rate": 1.4436327143031728e-05, "loss": 2.106, "step": 10954 }, { "epoch": 0.36, "grad_norm": 0.8053635358810425, "learning_rate": 1.4435374544088152e-05, "loss": 2.0593, "step": 10955 }, { "epoch": 0.36, "grad_norm": 0.7252182960510254, "learning_rate": 1.4434421895036315e-05, "loss": 2.111, "step": 10956 }, { "epoch": 0.36, "grad_norm": 0.7089612483978271, "learning_rate": 1.4433469195886977e-05, "loss": 2.1092, "step": 10957 }, { "epoch": 0.36, "grad_norm": 0.7707215547561646, "learning_rate": 1.4432516446650902e-05, "loss": 2.1239, "step": 10958 }, { "epoch": 0.36, "grad_norm": 0.7279652953147888, "learning_rate": 1.4431563647338853e-05, "loss": 2.1468, "step": 10959 }, { "epoch": 0.36, "grad_norm": 0.7217979431152344, "learning_rate": 1.4430610797961597e-05, "loss": 2.1101, "step": 10960 }, { "epoch": 0.36, "grad_norm": 0.7599738836288452, "learning_rate": 1.4429657898529897e-05, "loss": 2.1634, "step": 10961 }, { "epoch": 0.36, "grad_norm": 0.7474250197410583, "learning_rate": 1.4428704949054516e-05, "loss": 2.169, "step": 10962 }, { "epoch": 0.36, "grad_norm": 0.737455427646637, "learning_rate": 1.4427751949546223e-05, "loss": 2.1138, "step": 10963 }, { "epoch": 0.36, "grad_norm": 0.723259449005127, "learning_rate": 1.4426798900015786e-05, "loss": 2.074, "step": 10964 }, { "epoch": 0.36, "grad_norm": 0.6938008666038513, "learning_rate": 1.4425845800473965e-05, "loss": 2.1703, "step": 10965 }, { "epoch": 0.36, "grad_norm": 0.7192168235778809, "learning_rate": 1.4424892650931535e-05, "loss": 2.1419, "step": 10966 }, { "epoch": 0.36, "grad_norm": 0.7198514938354492, "learning_rate": 1.4423939451399261e-05, "loss": 2.1041, "step": 10967 }, { "epoch": 0.36, "grad_norm": 0.7028201818466187, "learning_rate": 1.442298620188791e-05, "loss": 2.0459, "step": 10968 }, { "epoch": 0.36, "grad_norm": 0.7281674742698669, "learning_rate": 1.4422032902408254e-05, "loss": 2.0467, "step": 10969 }, { "epoch": 0.36, "grad_norm": 0.7188822031021118, "learning_rate": 1.442107955297106e-05, "loss": 2.1042, "step": 10970 }, { "epoch": 0.37, "grad_norm": 0.7240476012229919, "learning_rate": 1.4420126153587104e-05, "loss": 2.0655, "step": 10971 }, { "epoch": 0.37, "grad_norm": 0.7237669229507446, "learning_rate": 1.441917270426715e-05, "loss": 2.0923, "step": 10972 }, { "epoch": 0.37, "grad_norm": 0.7235566973686218, "learning_rate": 1.4418219205021972e-05, "loss": 2.1215, "step": 10973 }, { "epoch": 0.37, "grad_norm": 0.7344925403594971, "learning_rate": 1.4417265655862345e-05, "loss": 2.1361, "step": 10974 }, { "epoch": 0.37, "grad_norm": 0.746641218662262, "learning_rate": 1.441631205679904e-05, "loss": 2.1247, "step": 10975 }, { "epoch": 0.37, "grad_norm": 0.7065014839172363, "learning_rate": 1.4415358407842828e-05, "loss": 2.1162, "step": 10976 }, { "epoch": 0.37, "grad_norm": 0.7090362906455994, "learning_rate": 1.4414404709004486e-05, "loss": 2.1161, "step": 10977 }, { "epoch": 0.37, "grad_norm": 0.7639482021331787, "learning_rate": 1.4413450960294786e-05, "loss": 2.076, "step": 10978 }, { "epoch": 0.37, "grad_norm": 0.7675634622573853, "learning_rate": 1.4412497161724504e-05, "loss": 1.9846, "step": 10979 }, { "epoch": 0.37, "grad_norm": 0.723737359046936, "learning_rate": 1.4411543313304413e-05, "loss": 2.1005, "step": 10980 }, { "epoch": 0.37, "grad_norm": 0.7130969166755676, "learning_rate": 1.441058941504529e-05, "loss": 2.167, "step": 10981 }, { "epoch": 0.37, "grad_norm": 0.7220802307128906, "learning_rate": 1.4409635466957916e-05, "loss": 2.0338, "step": 10982 }, { "epoch": 0.37, "grad_norm": 0.7066482901573181, "learning_rate": 1.4408681469053063e-05, "loss": 2.0862, "step": 10983 }, { "epoch": 0.37, "grad_norm": 0.7124679088592529, "learning_rate": 1.4407727421341511e-05, "loss": 2.1052, "step": 10984 }, { "epoch": 0.37, "grad_norm": 0.6884941458702087, "learning_rate": 1.4406773323834038e-05, "loss": 2.0652, "step": 10985 }, { "epoch": 0.37, "grad_norm": 0.7236327528953552, "learning_rate": 1.4405819176541425e-05, "loss": 2.1254, "step": 10986 }, { "epoch": 0.37, "grad_norm": 0.7210729718208313, "learning_rate": 1.4404864979474444e-05, "loss": 2.072, "step": 10987 }, { "epoch": 0.37, "grad_norm": 0.7433964014053345, "learning_rate": 1.4403910732643884e-05, "loss": 2.1967, "step": 10988 }, { "epoch": 0.37, "grad_norm": 0.7166556715965271, "learning_rate": 1.440295643606052e-05, "loss": 2.0949, "step": 10989 }, { "epoch": 0.37, "grad_norm": 0.7392034530639648, "learning_rate": 1.4402002089735134e-05, "loss": 2.1939, "step": 10990 }, { "epoch": 0.37, "grad_norm": 0.7213878035545349, "learning_rate": 1.440104769367851e-05, "loss": 2.0616, "step": 10991 }, { "epoch": 0.37, "grad_norm": 0.7424634099006653, "learning_rate": 1.4400093247901426e-05, "loss": 2.0829, "step": 10992 }, { "epoch": 0.37, "grad_norm": 0.7225481867790222, "learning_rate": 1.4399138752414667e-05, "loss": 2.1214, "step": 10993 }, { "epoch": 0.37, "grad_norm": 0.7312317490577698, "learning_rate": 1.4398184207229018e-05, "loss": 2.1228, "step": 10994 }, { "epoch": 0.37, "grad_norm": 0.72078537940979, "learning_rate": 1.4397229612355262e-05, "loss": 2.126, "step": 10995 }, { "epoch": 0.37, "grad_norm": 0.7222729325294495, "learning_rate": 1.4396274967804182e-05, "loss": 2.0879, "step": 10996 }, { "epoch": 0.37, "grad_norm": 0.7208762168884277, "learning_rate": 1.4395320273586565e-05, "loss": 2.0235, "step": 10997 }, { "epoch": 0.37, "grad_norm": 0.7261767387390137, "learning_rate": 1.4394365529713195e-05, "loss": 2.0774, "step": 10998 }, { "epoch": 0.37, "grad_norm": 0.7472676038742065, "learning_rate": 1.439341073619486e-05, "loss": 2.1451, "step": 10999 }, { "epoch": 0.37, "grad_norm": 0.7569315433502197, "learning_rate": 1.439245589304234e-05, "loss": 2.0944, "step": 11000 }, { "epoch": 0.37, "grad_norm": 0.7565671801567078, "learning_rate": 1.4391501000266433e-05, "loss": 2.0761, "step": 11001 }, { "epoch": 0.37, "grad_norm": 0.7336362600326538, "learning_rate": 1.4390546057877919e-05, "loss": 2.0928, "step": 11002 }, { "epoch": 0.37, "grad_norm": 0.7405349016189575, "learning_rate": 1.438959106588759e-05, "loss": 2.1121, "step": 11003 }, { "epoch": 0.37, "grad_norm": 0.7202802896499634, "learning_rate": 1.4388636024306232e-05, "loss": 2.1496, "step": 11004 }, { "epoch": 0.37, "grad_norm": 0.7333552837371826, "learning_rate": 1.4387680933144637e-05, "loss": 2.0726, "step": 11005 }, { "epoch": 0.37, "grad_norm": 0.7481369972229004, "learning_rate": 1.4386725792413594e-05, "loss": 2.0554, "step": 11006 }, { "epoch": 0.37, "grad_norm": 0.7050867676734924, "learning_rate": 1.4385770602123894e-05, "loss": 2.0215, "step": 11007 }, { "epoch": 0.37, "grad_norm": 0.7272744178771973, "learning_rate": 1.4384815362286331e-05, "loss": 2.1404, "step": 11008 }, { "epoch": 0.37, "grad_norm": 0.7782060503959656, "learning_rate": 1.4383860072911687e-05, "loss": 2.1032, "step": 11009 }, { "epoch": 0.37, "grad_norm": 0.7342901825904846, "learning_rate": 1.4382904734010768e-05, "loss": 2.0913, "step": 11010 }, { "epoch": 0.37, "grad_norm": 0.7254053950309753, "learning_rate": 1.4381949345594355e-05, "loss": 2.1383, "step": 11011 }, { "epoch": 0.37, "grad_norm": 0.7419200539588928, "learning_rate": 1.4380993907673248e-05, "loss": 2.0881, "step": 11012 }, { "epoch": 0.37, "grad_norm": 0.7126465439796448, "learning_rate": 1.438003842025824e-05, "loss": 2.0379, "step": 11013 }, { "epoch": 0.37, "grad_norm": 0.738862156867981, "learning_rate": 1.4379082883360125e-05, "loss": 2.0451, "step": 11014 }, { "epoch": 0.37, "grad_norm": 0.7291884422302246, "learning_rate": 1.4378127296989698e-05, "loss": 2.1312, "step": 11015 }, { "epoch": 0.37, "grad_norm": 0.7657714486122131, "learning_rate": 1.4377171661157755e-05, "loss": 2.1354, "step": 11016 }, { "epoch": 0.37, "grad_norm": 0.7402278780937195, "learning_rate": 1.4376215975875089e-05, "loss": 2.097, "step": 11017 }, { "epoch": 0.37, "grad_norm": 0.7361637949943542, "learning_rate": 1.4375260241152503e-05, "loss": 2.0977, "step": 11018 }, { "epoch": 0.37, "grad_norm": 0.7245307564735413, "learning_rate": 1.4374304457000792e-05, "loss": 2.072, "step": 11019 }, { "epoch": 0.37, "grad_norm": 0.7223939895629883, "learning_rate": 1.4373348623430747e-05, "loss": 2.1585, "step": 11020 }, { "epoch": 0.37, "grad_norm": 0.6904551386833191, "learning_rate": 1.4372392740453178e-05, "loss": 2.0594, "step": 11021 }, { "epoch": 0.37, "grad_norm": 0.6923089623451233, "learning_rate": 1.4371436808078876e-05, "loss": 2.1211, "step": 11022 }, { "epoch": 0.37, "grad_norm": 0.7443510890007019, "learning_rate": 1.437048082631864e-05, "loss": 2.0624, "step": 11023 }, { "epoch": 0.37, "grad_norm": 0.7004593014717102, "learning_rate": 1.4369524795183278e-05, "loss": 2.0124, "step": 11024 }, { "epoch": 0.37, "grad_norm": 0.7271811962127686, "learning_rate": 1.4368568714683584e-05, "loss": 2.1124, "step": 11025 }, { "epoch": 0.37, "grad_norm": 0.7128680348396301, "learning_rate": 1.436761258483036e-05, "loss": 2.0778, "step": 11026 }, { "epoch": 0.37, "grad_norm": 0.7634636163711548, "learning_rate": 1.4366656405634408e-05, "loss": 2.1213, "step": 11027 }, { "epoch": 0.37, "grad_norm": 0.7193471193313599, "learning_rate": 1.4365700177106533e-05, "loss": 2.0803, "step": 11028 }, { "epoch": 0.37, "grad_norm": 0.7342803478240967, "learning_rate": 1.4364743899257534e-05, "loss": 2.1395, "step": 11029 }, { "epoch": 0.37, "grad_norm": 0.7173555493354797, "learning_rate": 1.436378757209822e-05, "loss": 2.1625, "step": 11030 }, { "epoch": 0.37, "grad_norm": 0.7119832038879395, "learning_rate": 1.4362831195639387e-05, "loss": 2.1487, "step": 11031 }, { "epoch": 0.37, "grad_norm": 0.7320874333381653, "learning_rate": 1.4361874769891849e-05, "loss": 2.0987, "step": 11032 }, { "epoch": 0.37, "grad_norm": 0.722071647644043, "learning_rate": 1.4360918294866402e-05, "loss": 2.084, "step": 11033 }, { "epoch": 0.37, "grad_norm": 0.7141579985618591, "learning_rate": 1.4359961770573854e-05, "loss": 2.1022, "step": 11034 }, { "epoch": 0.37, "grad_norm": 0.6977395415306091, "learning_rate": 1.4359005197025018e-05, "loss": 2.13, "step": 11035 }, { "epoch": 0.37, "grad_norm": 0.73216712474823, "learning_rate": 1.4358048574230693e-05, "loss": 2.1278, "step": 11036 }, { "epoch": 0.37, "grad_norm": 0.7248515486717224, "learning_rate": 1.4357091902201693e-05, "loss": 2.1612, "step": 11037 }, { "epoch": 0.37, "grad_norm": 0.7488477230072021, "learning_rate": 1.4356135180948818e-05, "loss": 2.1416, "step": 11038 }, { "epoch": 0.37, "grad_norm": 0.7104046940803528, "learning_rate": 1.4355178410482882e-05, "loss": 2.1407, "step": 11039 }, { "epoch": 0.37, "grad_norm": 0.7590029835700989, "learning_rate": 1.4354221590814693e-05, "loss": 2.1583, "step": 11040 }, { "epoch": 0.37, "grad_norm": 0.7558960318565369, "learning_rate": 1.4353264721955058e-05, "loss": 2.0891, "step": 11041 }, { "epoch": 0.37, "grad_norm": 0.7772329449653625, "learning_rate": 1.4352307803914793e-05, "loss": 2.0879, "step": 11042 }, { "epoch": 0.37, "grad_norm": 0.7465712428092957, "learning_rate": 1.4351350836704705e-05, "loss": 2.0408, "step": 11043 }, { "epoch": 0.37, "grad_norm": 0.732805073261261, "learning_rate": 1.4350393820335602e-05, "loss": 2.1925, "step": 11044 }, { "epoch": 0.37, "grad_norm": 0.6990310549736023, "learning_rate": 1.4349436754818302e-05, "loss": 2.1764, "step": 11045 }, { "epoch": 0.37, "grad_norm": 0.7209809422492981, "learning_rate": 1.4348479640163614e-05, "loss": 2.2235, "step": 11046 }, { "epoch": 0.37, "grad_norm": 0.7356286644935608, "learning_rate": 1.4347522476382349e-05, "loss": 2.1074, "step": 11047 }, { "epoch": 0.37, "grad_norm": 0.7203242182731628, "learning_rate": 1.4346565263485328e-05, "loss": 2.1255, "step": 11048 }, { "epoch": 0.37, "grad_norm": 0.7170785069465637, "learning_rate": 1.4345608001483357e-05, "loss": 2.0362, "step": 11049 }, { "epoch": 0.37, "grad_norm": 0.7259039282798767, "learning_rate": 1.4344650690387255e-05, "loss": 2.1136, "step": 11050 }, { "epoch": 0.37, "grad_norm": 0.739266574382782, "learning_rate": 1.4343693330207833e-05, "loss": 2.0615, "step": 11051 }, { "epoch": 0.37, "grad_norm": 0.7370533347129822, "learning_rate": 1.434273592095591e-05, "loss": 2.0629, "step": 11052 }, { "epoch": 0.37, "grad_norm": 0.7941794991493225, "learning_rate": 1.4341778462642305e-05, "loss": 2.1063, "step": 11053 }, { "epoch": 0.37, "grad_norm": 0.7523413300514221, "learning_rate": 1.4340820955277828e-05, "loss": 2.0863, "step": 11054 }, { "epoch": 0.37, "grad_norm": 0.7648608088493347, "learning_rate": 1.4339863398873302e-05, "loss": 2.1059, "step": 11055 }, { "epoch": 0.37, "grad_norm": 0.7154421806335449, "learning_rate": 1.4338905793439541e-05, "loss": 2.1347, "step": 11056 }, { "epoch": 0.37, "grad_norm": 0.7509371042251587, "learning_rate": 1.4337948138987368e-05, "loss": 2.116, "step": 11057 }, { "epoch": 0.37, "grad_norm": 0.7362762093544006, "learning_rate": 1.4336990435527593e-05, "loss": 2.0682, "step": 11058 }, { "epoch": 0.37, "grad_norm": 0.7995498180389404, "learning_rate": 1.4336032683071049e-05, "loss": 2.0501, "step": 11059 }, { "epoch": 0.37, "grad_norm": 0.7454381585121155, "learning_rate": 1.4335074881628546e-05, "loss": 2.0681, "step": 11060 }, { "epoch": 0.37, "grad_norm": 0.7226183414459229, "learning_rate": 1.4334117031210906e-05, "loss": 2.1514, "step": 11061 }, { "epoch": 0.37, "grad_norm": 0.7082606554031372, "learning_rate": 1.4333159131828953e-05, "loss": 2.0745, "step": 11062 }, { "epoch": 0.37, "grad_norm": 0.7274426221847534, "learning_rate": 1.4332201183493509e-05, "loss": 2.0408, "step": 11063 }, { "epoch": 0.37, "grad_norm": 0.7160465717315674, "learning_rate": 1.4331243186215393e-05, "loss": 2.0713, "step": 11064 }, { "epoch": 0.37, "grad_norm": 0.7047940492630005, "learning_rate": 1.4330285140005432e-05, "loss": 2.134, "step": 11065 }, { "epoch": 0.37, "grad_norm": 0.705583393573761, "learning_rate": 1.4329327044874444e-05, "loss": 2.0535, "step": 11066 }, { "epoch": 0.37, "grad_norm": 0.7313485145568848, "learning_rate": 1.4328368900833259e-05, "loss": 2.0484, "step": 11067 }, { "epoch": 0.37, "grad_norm": 0.7553391456604004, "learning_rate": 1.43274107078927e-05, "loss": 2.0883, "step": 11068 }, { "epoch": 0.37, "grad_norm": 0.7202082276344299, "learning_rate": 1.4326452466063585e-05, "loss": 2.0871, "step": 11069 }, { "epoch": 0.37, "grad_norm": 0.7087470889091492, "learning_rate": 1.432549417535675e-05, "loss": 2.082, "step": 11070 }, { "epoch": 0.37, "grad_norm": 0.7409905791282654, "learning_rate": 1.4324535835783017e-05, "loss": 2.1651, "step": 11071 }, { "epoch": 0.37, "grad_norm": 0.7737860083580017, "learning_rate": 1.4323577447353212e-05, "loss": 2.105, "step": 11072 }, { "epoch": 0.37, "grad_norm": 0.739077091217041, "learning_rate": 1.4322619010078163e-05, "loss": 2.1639, "step": 11073 }, { "epoch": 0.37, "grad_norm": 0.7896743416786194, "learning_rate": 1.4321660523968697e-05, "loss": 2.0989, "step": 11074 }, { "epoch": 0.37, "grad_norm": 0.7045981287956238, "learning_rate": 1.4320701989035643e-05, "loss": 2.0502, "step": 11075 }, { "epoch": 0.37, "grad_norm": 0.7547368407249451, "learning_rate": 1.4319743405289833e-05, "loss": 2.0714, "step": 11076 }, { "epoch": 0.37, "grad_norm": 0.7796988487243652, "learning_rate": 1.431878477274209e-05, "loss": 2.2137, "step": 11077 }, { "epoch": 0.37, "grad_norm": 0.7328726053237915, "learning_rate": 1.4317826091403249e-05, "loss": 2.0675, "step": 11078 }, { "epoch": 0.37, "grad_norm": 0.7560614347457886, "learning_rate": 1.4316867361284141e-05, "loss": 2.194, "step": 11079 }, { "epoch": 0.37, "grad_norm": 0.7340458631515503, "learning_rate": 1.431590858239559e-05, "loss": 2.0681, "step": 11080 }, { "epoch": 0.37, "grad_norm": 0.7251687049865723, "learning_rate": 1.431494975474844e-05, "loss": 2.1052, "step": 11081 }, { "epoch": 0.37, "grad_norm": 0.7459153532981873, "learning_rate": 1.4313990878353515e-05, "loss": 2.1627, "step": 11082 }, { "epoch": 0.37, "grad_norm": 0.7301740050315857, "learning_rate": 1.4313031953221646e-05, "loss": 2.0577, "step": 11083 }, { "epoch": 0.37, "grad_norm": 0.7670778632164001, "learning_rate": 1.4312072979363671e-05, "loss": 2.1043, "step": 11084 }, { "epoch": 0.37, "grad_norm": 0.7218716740608215, "learning_rate": 1.4311113956790423e-05, "loss": 2.1657, "step": 11085 }, { "epoch": 0.37, "grad_norm": 0.7021493315696716, "learning_rate": 1.4310154885512737e-05, "loss": 2.0981, "step": 11086 }, { "epoch": 0.37, "grad_norm": 0.7529250979423523, "learning_rate": 1.4309195765541445e-05, "loss": 2.0664, "step": 11087 }, { "epoch": 0.37, "grad_norm": 0.7032921314239502, "learning_rate": 1.4308236596887385e-05, "loss": 2.1013, "step": 11088 }, { "epoch": 0.37, "grad_norm": 0.7842296957969666, "learning_rate": 1.4307277379561394e-05, "loss": 2.0878, "step": 11089 }, { "epoch": 0.37, "grad_norm": 0.7105204463005066, "learning_rate": 1.4306318113574311e-05, "loss": 2.1299, "step": 11090 }, { "epoch": 0.37, "grad_norm": 0.7196194529533386, "learning_rate": 1.4305358798936963e-05, "loss": 2.0936, "step": 11091 }, { "epoch": 0.37, "grad_norm": 0.7477278709411621, "learning_rate": 1.4304399435660199e-05, "loss": 2.0721, "step": 11092 }, { "epoch": 0.37, "grad_norm": 0.7170090675354004, "learning_rate": 1.4303440023754851e-05, "loss": 2.1958, "step": 11093 }, { "epoch": 0.37, "grad_norm": 0.7261789441108704, "learning_rate": 1.4302480563231759e-05, "loss": 2.14, "step": 11094 }, { "epoch": 0.37, "grad_norm": 0.7644128203392029, "learning_rate": 1.4301521054101764e-05, "loss": 2.0635, "step": 11095 }, { "epoch": 0.37, "grad_norm": 0.7188597321510315, "learning_rate": 1.4300561496375705e-05, "loss": 2.1334, "step": 11096 }, { "epoch": 0.37, "grad_norm": 0.7074153423309326, "learning_rate": 1.4299601890064422e-05, "loss": 2.1319, "step": 11097 }, { "epoch": 0.37, "grad_norm": 0.7150753140449524, "learning_rate": 1.429864223517876e-05, "loss": 2.1304, "step": 11098 }, { "epoch": 0.37, "grad_norm": 0.7301631569862366, "learning_rate": 1.4297682531729553e-05, "loss": 2.1902, "step": 11099 }, { "epoch": 0.37, "grad_norm": 0.7252650856971741, "learning_rate": 1.429672277972765e-05, "loss": 2.1327, "step": 11100 }, { "epoch": 0.37, "grad_norm": 0.7567165493965149, "learning_rate": 1.4295762979183891e-05, "loss": 2.1784, "step": 11101 }, { "epoch": 0.37, "grad_norm": 0.7652906775474548, "learning_rate": 1.4294803130109117e-05, "loss": 2.1697, "step": 11102 }, { "epoch": 0.37, "grad_norm": 0.7174903750419617, "learning_rate": 1.4293843232514177e-05, "loss": 2.0361, "step": 11103 }, { "epoch": 0.37, "grad_norm": 0.7215195894241333, "learning_rate": 1.429288328640991e-05, "loss": 2.0586, "step": 11104 }, { "epoch": 0.37, "grad_norm": 0.7259606719017029, "learning_rate": 1.4291923291807166e-05, "loss": 2.0784, "step": 11105 }, { "epoch": 0.37, "grad_norm": 0.739876389503479, "learning_rate": 1.4290963248716788e-05, "loss": 2.0641, "step": 11106 }, { "epoch": 0.37, "grad_norm": 0.7501680850982666, "learning_rate": 1.429000315714962e-05, "loss": 2.1648, "step": 11107 }, { "epoch": 0.37, "grad_norm": 0.7290095686912537, "learning_rate": 1.428904301711651e-05, "loss": 2.0177, "step": 11108 }, { "epoch": 0.37, "grad_norm": 0.7456393837928772, "learning_rate": 1.4288082828628307e-05, "loss": 2.0828, "step": 11109 }, { "epoch": 0.37, "grad_norm": 0.7451617121696472, "learning_rate": 1.4287122591695859e-05, "loss": 2.1659, "step": 11110 }, { "epoch": 0.37, "grad_norm": 0.7310173511505127, "learning_rate": 1.4286162306330009e-05, "loss": 2.0502, "step": 11111 }, { "epoch": 0.37, "grad_norm": 0.7474257946014404, "learning_rate": 1.4285201972541613e-05, "loss": 2.0621, "step": 11112 }, { "epoch": 0.37, "grad_norm": 0.7389144897460938, "learning_rate": 1.4284241590341512e-05, "loss": 2.1135, "step": 11113 }, { "epoch": 0.37, "grad_norm": 0.7029172778129578, "learning_rate": 1.4283281159740565e-05, "loss": 2.1263, "step": 11114 }, { "epoch": 0.37, "grad_norm": 0.7077636122703552, "learning_rate": 1.4282320680749616e-05, "loss": 2.0843, "step": 11115 }, { "epoch": 0.37, "grad_norm": 0.7399885058403015, "learning_rate": 1.4281360153379517e-05, "loss": 2.1341, "step": 11116 }, { "epoch": 0.37, "grad_norm": 0.735608696937561, "learning_rate": 1.428039957764112e-05, "loss": 2.1334, "step": 11117 }, { "epoch": 0.37, "grad_norm": 0.735717236995697, "learning_rate": 1.4279438953545277e-05, "loss": 2.0716, "step": 11118 }, { "epoch": 0.37, "grad_norm": 0.7126065492630005, "learning_rate": 1.4278478281102841e-05, "loss": 2.1087, "step": 11119 }, { "epoch": 0.37, "grad_norm": 0.6903116106987, "learning_rate": 1.4277517560324663e-05, "loss": 2.0883, "step": 11120 }, { "epoch": 0.37, "grad_norm": 0.7217884659767151, "learning_rate": 1.42765567912216e-05, "loss": 2.1188, "step": 11121 }, { "epoch": 0.37, "grad_norm": 0.7285078763961792, "learning_rate": 1.4275595973804505e-05, "loss": 2.1336, "step": 11122 }, { "epoch": 0.37, "grad_norm": 0.7281102538108826, "learning_rate": 1.4274635108084236e-05, "loss": 2.071, "step": 11123 }, { "epoch": 0.37, "grad_norm": 0.7387408018112183, "learning_rate": 1.4273674194071639e-05, "loss": 2.0707, "step": 11124 }, { "epoch": 0.37, "grad_norm": 0.7337959408760071, "learning_rate": 1.427271323177758e-05, "loss": 2.0836, "step": 11125 }, { "epoch": 0.37, "grad_norm": 0.7263737320899963, "learning_rate": 1.4271752221212908e-05, "loss": 2.0969, "step": 11126 }, { "epoch": 0.37, "grad_norm": 0.718824565410614, "learning_rate": 1.427079116238848e-05, "loss": 2.1588, "step": 11127 }, { "epoch": 0.37, "grad_norm": 0.7114875912666321, "learning_rate": 1.4269830055315164e-05, "loss": 2.1052, "step": 11128 }, { "epoch": 0.37, "grad_norm": 0.6948983073234558, "learning_rate": 1.4268868900003803e-05, "loss": 2.0783, "step": 11129 }, { "epoch": 0.37, "grad_norm": 0.7300153374671936, "learning_rate": 1.4267907696465269e-05, "loss": 2.1526, "step": 11130 }, { "epoch": 0.37, "grad_norm": 0.6916389465332031, "learning_rate": 1.4266946444710412e-05, "loss": 2.1058, "step": 11131 }, { "epoch": 0.37, "grad_norm": 0.7249911427497864, "learning_rate": 1.4265985144750095e-05, "loss": 2.0656, "step": 11132 }, { "epoch": 0.37, "grad_norm": 0.6803973913192749, "learning_rate": 1.4265023796595179e-05, "loss": 2.0699, "step": 11133 }, { "epoch": 0.37, "grad_norm": 0.7414565086364746, "learning_rate": 1.4264062400256524e-05, "loss": 2.134, "step": 11134 }, { "epoch": 0.37, "grad_norm": 0.7456116080284119, "learning_rate": 1.4263100955744987e-05, "loss": 2.1763, "step": 11135 }, { "epoch": 0.37, "grad_norm": 0.7009689807891846, "learning_rate": 1.4262139463071439e-05, "loss": 2.1134, "step": 11136 }, { "epoch": 0.37, "grad_norm": 0.7296027541160583, "learning_rate": 1.4261177922246732e-05, "loss": 2.1365, "step": 11137 }, { "epoch": 0.37, "grad_norm": 0.6952452063560486, "learning_rate": 1.4260216333281739e-05, "loss": 2.1519, "step": 11138 }, { "epoch": 0.37, "grad_norm": 0.7457868456840515, "learning_rate": 1.4259254696187316e-05, "loss": 2.0657, "step": 11139 }, { "epoch": 0.37, "grad_norm": 0.7168306708335876, "learning_rate": 1.425829301097433e-05, "loss": 2.0244, "step": 11140 }, { "epoch": 0.37, "grad_norm": 0.767706036567688, "learning_rate": 1.4257331277653646e-05, "loss": 2.1113, "step": 11141 }, { "epoch": 0.37, "grad_norm": 0.710503876209259, "learning_rate": 1.4256369496236127e-05, "loss": 2.0871, "step": 11142 }, { "epoch": 0.37, "grad_norm": 0.7095200419425964, "learning_rate": 1.4255407666732642e-05, "loss": 2.1485, "step": 11143 }, { "epoch": 0.37, "grad_norm": 0.7167171835899353, "learning_rate": 1.4254445789154053e-05, "loss": 2.1488, "step": 11144 }, { "epoch": 0.37, "grad_norm": 0.7402951121330261, "learning_rate": 1.4253483863511228e-05, "loss": 2.1512, "step": 11145 }, { "epoch": 0.37, "grad_norm": 0.7066875100135803, "learning_rate": 1.4252521889815037e-05, "loss": 2.0603, "step": 11146 }, { "epoch": 0.37, "grad_norm": 0.7391627430915833, "learning_rate": 1.4251559868076348e-05, "loss": 2.1808, "step": 11147 }, { "epoch": 0.37, "grad_norm": 0.823256254196167, "learning_rate": 1.425059779830602e-05, "loss": 2.0949, "step": 11148 }, { "epoch": 0.37, "grad_norm": 0.6952913403511047, "learning_rate": 1.4249635680514936e-05, "loss": 2.0154, "step": 11149 }, { "epoch": 0.37, "grad_norm": 0.7182970643043518, "learning_rate": 1.4248673514713957e-05, "loss": 2.0283, "step": 11150 }, { "epoch": 0.37, "grad_norm": 0.7535386681556702, "learning_rate": 1.4247711300913953e-05, "loss": 2.0722, "step": 11151 }, { "epoch": 0.37, "grad_norm": 0.7249448299407959, "learning_rate": 1.42467490391258e-05, "loss": 2.0991, "step": 11152 }, { "epoch": 0.37, "grad_norm": 0.7416818737983704, "learning_rate": 1.4245786729360362e-05, "loss": 2.077, "step": 11153 }, { "epoch": 0.37, "grad_norm": 0.7161725163459778, "learning_rate": 1.4244824371628515e-05, "loss": 2.1532, "step": 11154 }, { "epoch": 0.37, "grad_norm": 0.7586443424224854, "learning_rate": 1.4243861965941127e-05, "loss": 2.0419, "step": 11155 }, { "epoch": 0.37, "grad_norm": 0.7333353757858276, "learning_rate": 1.4242899512309077e-05, "loss": 2.045, "step": 11156 }, { "epoch": 0.37, "grad_norm": 0.7093413472175598, "learning_rate": 1.4241937010743232e-05, "loss": 2.101, "step": 11157 }, { "epoch": 0.37, "grad_norm": 0.6973031759262085, "learning_rate": 1.4240974461254473e-05, "loss": 2.1135, "step": 11158 }, { "epoch": 0.37, "grad_norm": 0.7134810090065002, "learning_rate": 1.4240011863853664e-05, "loss": 2.1313, "step": 11159 }, { "epoch": 0.37, "grad_norm": 0.7472101449966431, "learning_rate": 1.4239049218551691e-05, "loss": 2.1091, "step": 11160 }, { "epoch": 0.37, "grad_norm": 0.7165377140045166, "learning_rate": 1.4238086525359424e-05, "loss": 2.1667, "step": 11161 }, { "epoch": 0.37, "grad_norm": 0.749644935131073, "learning_rate": 1.4237123784287734e-05, "loss": 2.1844, "step": 11162 }, { "epoch": 0.37, "grad_norm": 0.7497105002403259, "learning_rate": 1.423616099534751e-05, "loss": 2.1359, "step": 11163 }, { "epoch": 0.37, "grad_norm": 0.733754575252533, "learning_rate": 1.4235198158549618e-05, "loss": 2.127, "step": 11164 }, { "epoch": 0.37, "grad_norm": 0.7308863997459412, "learning_rate": 1.423423527390494e-05, "loss": 2.087, "step": 11165 }, { "epoch": 0.37, "grad_norm": 0.7337420582771301, "learning_rate": 1.4233272341424351e-05, "loss": 2.1412, "step": 11166 }, { "epoch": 0.37, "grad_norm": 0.727179765701294, "learning_rate": 1.4232309361118735e-05, "loss": 2.1399, "step": 11167 }, { "epoch": 0.37, "grad_norm": 0.7389503121376038, "learning_rate": 1.4231346332998965e-05, "loss": 2.1281, "step": 11168 }, { "epoch": 0.37, "grad_norm": 0.7143319249153137, "learning_rate": 1.4230383257075926e-05, "loss": 2.1064, "step": 11169 }, { "epoch": 0.37, "grad_norm": 0.7208426594734192, "learning_rate": 1.4229420133360497e-05, "loss": 2.0868, "step": 11170 }, { "epoch": 0.37, "grad_norm": 0.7088901996612549, "learning_rate": 1.4228456961863556e-05, "loss": 2.0777, "step": 11171 }, { "epoch": 0.37, "grad_norm": 0.7331553101539612, "learning_rate": 1.422749374259599e-05, "loss": 2.0266, "step": 11172 }, { "epoch": 0.37, "grad_norm": 0.7248064279556274, "learning_rate": 1.4226530475568673e-05, "loss": 2.0646, "step": 11173 }, { "epoch": 0.37, "grad_norm": 0.7564190626144409, "learning_rate": 1.4225567160792494e-05, "loss": 2.0066, "step": 11174 }, { "epoch": 0.37, "grad_norm": 0.7105141282081604, "learning_rate": 1.4224603798278333e-05, "loss": 2.0284, "step": 11175 }, { "epoch": 0.37, "grad_norm": 0.7353984713554382, "learning_rate": 1.4223640388037074e-05, "loss": 2.1149, "step": 11176 }, { "epoch": 0.37, "grad_norm": 0.7325558662414551, "learning_rate": 1.4222676930079603e-05, "loss": 2.1431, "step": 11177 }, { "epoch": 0.37, "grad_norm": 0.7351787686347961, "learning_rate": 1.42217134244168e-05, "loss": 2.1375, "step": 11178 }, { "epoch": 0.37, "grad_norm": 0.7309549450874329, "learning_rate": 1.4220749871059555e-05, "loss": 2.0433, "step": 11179 }, { "epoch": 0.37, "grad_norm": 0.7133582830429077, "learning_rate": 1.421978627001875e-05, "loss": 2.1523, "step": 11180 }, { "epoch": 0.37, "grad_norm": 0.740746021270752, "learning_rate": 1.4218822621305272e-05, "loss": 2.1162, "step": 11181 }, { "epoch": 0.37, "grad_norm": 0.749692440032959, "learning_rate": 1.4217858924930012e-05, "loss": 2.1137, "step": 11182 }, { "epoch": 0.37, "grad_norm": 0.7487459182739258, "learning_rate": 1.4216895180903853e-05, "loss": 2.127, "step": 11183 }, { "epoch": 0.37, "grad_norm": 0.7453940510749817, "learning_rate": 1.421593138923768e-05, "loss": 2.1659, "step": 11184 }, { "epoch": 0.37, "grad_norm": 0.6963984370231628, "learning_rate": 1.4214967549942389e-05, "loss": 2.0389, "step": 11185 }, { "epoch": 0.37, "grad_norm": 0.722974956035614, "learning_rate": 1.4214003663028864e-05, "loss": 2.0811, "step": 11186 }, { "epoch": 0.37, "grad_norm": 0.7186281085014343, "learning_rate": 1.4213039728507994e-05, "loss": 2.168, "step": 11187 }, { "epoch": 0.37, "grad_norm": 0.7200491428375244, "learning_rate": 1.421207574639067e-05, "loss": 2.0644, "step": 11188 }, { "epoch": 0.37, "grad_norm": 0.711393415927887, "learning_rate": 1.4211111716687782e-05, "loss": 2.1257, "step": 11189 }, { "epoch": 0.37, "grad_norm": 0.7236936688423157, "learning_rate": 1.4210147639410223e-05, "loss": 2.173, "step": 11190 }, { "epoch": 0.37, "grad_norm": 0.7565777897834778, "learning_rate": 1.4209183514568885e-05, "loss": 2.1224, "step": 11191 }, { "epoch": 0.37, "grad_norm": 0.7428798675537109, "learning_rate": 1.4208219342174655e-05, "loss": 2.1587, "step": 11192 }, { "epoch": 0.37, "grad_norm": 0.7308961153030396, "learning_rate": 1.4207255122238432e-05, "loss": 2.1311, "step": 11193 }, { "epoch": 0.37, "grad_norm": 0.7124369144439697, "learning_rate": 1.4206290854771107e-05, "loss": 2.1172, "step": 11194 }, { "epoch": 0.37, "grad_norm": 0.7443590760231018, "learning_rate": 1.420532653978357e-05, "loss": 2.1043, "step": 11195 }, { "epoch": 0.37, "grad_norm": 0.7128584980964661, "learning_rate": 1.4204362177286722e-05, "loss": 2.1073, "step": 11196 }, { "epoch": 0.37, "grad_norm": 0.7255983948707581, "learning_rate": 1.4203397767291452e-05, "loss": 2.1244, "step": 11197 }, { "epoch": 0.37, "grad_norm": 0.7299162149429321, "learning_rate": 1.4202433309808657e-05, "loss": 2.1315, "step": 11198 }, { "epoch": 0.37, "grad_norm": 0.7553777098655701, "learning_rate": 1.4201468804849233e-05, "loss": 2.0709, "step": 11199 }, { "epoch": 0.37, "grad_norm": 0.7665624618530273, "learning_rate": 1.4200504252424077e-05, "loss": 2.1495, "step": 11200 }, { "epoch": 0.37, "grad_norm": 0.7205923795700073, "learning_rate": 1.4199539652544086e-05, "loss": 2.1178, "step": 11201 }, { "epoch": 0.37, "grad_norm": 0.7153617143630981, "learning_rate": 1.4198575005220157e-05, "loss": 2.0675, "step": 11202 }, { "epoch": 0.37, "grad_norm": 0.7182059288024902, "learning_rate": 1.4197610310463188e-05, "loss": 2.117, "step": 11203 }, { "epoch": 0.37, "grad_norm": 0.7073665857315063, "learning_rate": 1.4196645568284078e-05, "loss": 2.1047, "step": 11204 }, { "epoch": 0.37, "grad_norm": 0.7404348254203796, "learning_rate": 1.4195680778693728e-05, "loss": 2.0887, "step": 11205 }, { "epoch": 0.37, "grad_norm": 0.766862690448761, "learning_rate": 1.4194715941703032e-05, "loss": 2.1858, "step": 11206 }, { "epoch": 0.37, "grad_norm": 0.719981849193573, "learning_rate": 1.4193751057322898e-05, "loss": 2.102, "step": 11207 }, { "epoch": 0.37, "grad_norm": 0.7175014019012451, "learning_rate": 1.4192786125564217e-05, "loss": 2.1391, "step": 11208 }, { "epoch": 0.37, "grad_norm": 0.7436515688896179, "learning_rate": 1.4191821146437898e-05, "loss": 2.1259, "step": 11209 }, { "epoch": 0.37, "grad_norm": 0.73243248462677, "learning_rate": 1.419085611995484e-05, "loss": 2.0753, "step": 11210 }, { "epoch": 0.37, "grad_norm": 0.7517950534820557, "learning_rate": 1.4189891046125948e-05, "loss": 2.0716, "step": 11211 }, { "epoch": 0.37, "grad_norm": 0.771217942237854, "learning_rate": 1.4188925924962119e-05, "loss": 2.1326, "step": 11212 }, { "epoch": 0.37, "grad_norm": 0.7113727331161499, "learning_rate": 1.4187960756474262e-05, "loss": 2.0873, "step": 11213 }, { "epoch": 0.37, "grad_norm": 0.7072215676307678, "learning_rate": 1.4186995540673278e-05, "loss": 2.1199, "step": 11214 }, { "epoch": 0.37, "grad_norm": 0.7198404669761658, "learning_rate": 1.418603027757007e-05, "loss": 2.136, "step": 11215 }, { "epoch": 0.37, "grad_norm": 0.7018731236457825, "learning_rate": 1.418506496717555e-05, "loss": 2.0726, "step": 11216 }, { "epoch": 0.37, "grad_norm": 0.732787013053894, "learning_rate": 1.4184099609500615e-05, "loss": 2.1254, "step": 11217 }, { "epoch": 0.37, "grad_norm": 0.713353157043457, "learning_rate": 1.418313420455618e-05, "loss": 2.1718, "step": 11218 }, { "epoch": 0.37, "grad_norm": 0.738911509513855, "learning_rate": 1.418216875235314e-05, "loss": 2.1517, "step": 11219 }, { "epoch": 0.37, "grad_norm": 0.7707206606864929, "learning_rate": 1.418120325290241e-05, "loss": 2.0662, "step": 11220 }, { "epoch": 0.37, "grad_norm": 0.6887251734733582, "learning_rate": 1.4180237706214897e-05, "loss": 2.0978, "step": 11221 }, { "epoch": 0.37, "grad_norm": 0.7137007117271423, "learning_rate": 1.4179272112301507e-05, "loss": 2.1184, "step": 11222 }, { "epoch": 0.37, "grad_norm": 0.7035083770751953, "learning_rate": 1.4178306471173154e-05, "loss": 2.0873, "step": 11223 }, { "epoch": 0.37, "grad_norm": 0.7317951321601868, "learning_rate": 1.4177340782840738e-05, "loss": 2.0584, "step": 11224 }, { "epoch": 0.37, "grad_norm": 0.7241039872169495, "learning_rate": 1.4176375047315176e-05, "loss": 2.1039, "step": 11225 }, { "epoch": 0.37, "grad_norm": 0.7058163285255432, "learning_rate": 1.4175409264607376e-05, "loss": 2.1532, "step": 11226 }, { "epoch": 0.37, "grad_norm": 0.7255133390426636, "learning_rate": 1.4174443434728251e-05, "loss": 2.1352, "step": 11227 }, { "epoch": 0.37, "grad_norm": 0.7064658403396606, "learning_rate": 1.4173477557688708e-05, "loss": 2.0906, "step": 11228 }, { "epoch": 0.37, "grad_norm": 0.7195213437080383, "learning_rate": 1.4172511633499663e-05, "loss": 2.0963, "step": 11229 }, { "epoch": 0.37, "grad_norm": 0.7163783311843872, "learning_rate": 1.4171545662172026e-05, "loss": 2.1103, "step": 11230 }, { "epoch": 0.37, "grad_norm": 0.7199503183364868, "learning_rate": 1.417057964371671e-05, "loss": 2.1444, "step": 11231 }, { "epoch": 0.37, "grad_norm": 0.7062727808952332, "learning_rate": 1.4169613578144635e-05, "loss": 2.0899, "step": 11232 }, { "epoch": 0.37, "grad_norm": 0.7281644344329834, "learning_rate": 1.4168647465466702e-05, "loss": 2.1675, "step": 11233 }, { "epoch": 0.37, "grad_norm": 0.740689754486084, "learning_rate": 1.416768130569384e-05, "loss": 2.014, "step": 11234 }, { "epoch": 0.37, "grad_norm": 0.7241379022598267, "learning_rate": 1.4166715098836951e-05, "loss": 2.0267, "step": 11235 }, { "epoch": 0.37, "grad_norm": 0.6982423663139343, "learning_rate": 1.416574884490696e-05, "loss": 2.1057, "step": 11236 }, { "epoch": 0.37, "grad_norm": 0.7420019507408142, "learning_rate": 1.416478254391478e-05, "loss": 2.2113, "step": 11237 }, { "epoch": 0.37, "grad_norm": 0.6994154453277588, "learning_rate": 1.4163816195871324e-05, "loss": 2.1112, "step": 11238 }, { "epoch": 0.37, "grad_norm": 0.757770299911499, "learning_rate": 1.4162849800787517e-05, "loss": 2.0877, "step": 11239 }, { "epoch": 0.37, "grad_norm": 0.7443302869796753, "learning_rate": 1.4161883358674273e-05, "loss": 2.086, "step": 11240 }, { "epoch": 0.37, "grad_norm": 0.7188851833343506, "learning_rate": 1.4160916869542505e-05, "loss": 2.1156, "step": 11241 }, { "epoch": 0.37, "grad_norm": 0.72759610414505, "learning_rate": 1.4159950333403139e-05, "loss": 2.1929, "step": 11242 }, { "epoch": 0.37, "grad_norm": 0.7391787171363831, "learning_rate": 1.4158983750267097e-05, "loss": 2.1368, "step": 11243 }, { "epoch": 0.37, "grad_norm": 0.722474217414856, "learning_rate": 1.4158017120145287e-05, "loss": 2.0946, "step": 11244 }, { "epoch": 0.37, "grad_norm": 0.7508598566055298, "learning_rate": 1.415705044304864e-05, "loss": 2.0879, "step": 11245 }, { "epoch": 0.37, "grad_norm": 0.7179402709007263, "learning_rate": 1.4156083718988071e-05, "loss": 2.1025, "step": 11246 }, { "epoch": 0.37, "grad_norm": 0.7372363805770874, "learning_rate": 1.4155116947974507e-05, "loss": 2.0202, "step": 11247 }, { "epoch": 0.37, "grad_norm": 0.7209445834159851, "learning_rate": 1.4154150130018867e-05, "loss": 2.1265, "step": 11248 }, { "epoch": 0.37, "grad_norm": 0.7012056708335876, "learning_rate": 1.415318326513207e-05, "loss": 2.0507, "step": 11249 }, { "epoch": 0.37, "grad_norm": 0.7258504629135132, "learning_rate": 1.4152216353325044e-05, "loss": 2.0767, "step": 11250 }, { "epoch": 0.37, "grad_norm": 0.7148690819740295, "learning_rate": 1.4151249394608715e-05, "loss": 2.0766, "step": 11251 }, { "epoch": 0.37, "grad_norm": 0.7151157855987549, "learning_rate": 1.4150282388993997e-05, "loss": 2.12, "step": 11252 }, { "epoch": 0.37, "grad_norm": 0.7250333428382874, "learning_rate": 1.4149315336491822e-05, "loss": 2.1482, "step": 11253 }, { "epoch": 0.37, "grad_norm": 0.7033488154411316, "learning_rate": 1.4148348237113117e-05, "loss": 2.158, "step": 11254 }, { "epoch": 0.37, "grad_norm": 0.7513361573219299, "learning_rate": 1.4147381090868801e-05, "loss": 2.1476, "step": 11255 }, { "epoch": 0.37, "grad_norm": 0.7150612473487854, "learning_rate": 1.4146413897769809e-05, "loss": 2.0776, "step": 11256 }, { "epoch": 0.37, "grad_norm": 0.7760404348373413, "learning_rate": 1.414544665782706e-05, "loss": 2.1538, "step": 11257 }, { "epoch": 0.37, "grad_norm": 0.7502290606498718, "learning_rate": 1.4144479371051482e-05, "loss": 2.1843, "step": 11258 }, { "epoch": 0.37, "grad_norm": 0.7119926810264587, "learning_rate": 1.414351203745401e-05, "loss": 2.1012, "step": 11259 }, { "epoch": 0.37, "grad_norm": 0.7729415893554688, "learning_rate": 1.4142544657045562e-05, "loss": 2.0562, "step": 11260 }, { "epoch": 0.37, "grad_norm": 0.7658978700637817, "learning_rate": 1.4141577229837075e-05, "loss": 2.098, "step": 11261 }, { "epoch": 0.37, "grad_norm": 0.7574605941772461, "learning_rate": 1.4140609755839479e-05, "loss": 2.1429, "step": 11262 }, { "epoch": 0.37, "grad_norm": 0.7060720324516296, "learning_rate": 1.4139642235063695e-05, "loss": 2.0942, "step": 11263 }, { "epoch": 0.37, "grad_norm": 0.7192293405532837, "learning_rate": 1.413867466752066e-05, "loss": 2.0803, "step": 11264 }, { "epoch": 0.37, "grad_norm": 0.7126860618591309, "learning_rate": 1.4137707053221307e-05, "loss": 2.0107, "step": 11265 }, { "epoch": 0.37, "grad_norm": 0.7545983195304871, "learning_rate": 1.4136739392176561e-05, "loss": 2.1046, "step": 11266 }, { "epoch": 0.37, "grad_norm": 0.7372099161148071, "learning_rate": 1.4135771684397361e-05, "loss": 2.182, "step": 11267 }, { "epoch": 0.37, "grad_norm": 0.7328610420227051, "learning_rate": 1.4134803929894635e-05, "loss": 2.161, "step": 11268 }, { "epoch": 0.37, "grad_norm": 0.697360634803772, "learning_rate": 1.4133836128679316e-05, "loss": 2.0497, "step": 11269 }, { "epoch": 0.37, "grad_norm": 0.7492145299911499, "learning_rate": 1.4132868280762342e-05, "loss": 2.1161, "step": 11270 }, { "epoch": 0.37, "grad_norm": 0.7282615900039673, "learning_rate": 1.4131900386154642e-05, "loss": 2.0528, "step": 11271 }, { "epoch": 0.38, "grad_norm": 0.7174212336540222, "learning_rate": 1.4130932444867153e-05, "loss": 2.1017, "step": 11272 }, { "epoch": 0.38, "grad_norm": 0.7297884821891785, "learning_rate": 1.4129964456910812e-05, "loss": 2.1593, "step": 11273 }, { "epoch": 0.38, "grad_norm": 0.7081218957901001, "learning_rate": 1.4128996422296552e-05, "loss": 2.1565, "step": 11274 }, { "epoch": 0.38, "grad_norm": 0.6938362121582031, "learning_rate": 1.412802834103531e-05, "loss": 2.0969, "step": 11275 }, { "epoch": 0.38, "grad_norm": 0.7617660164833069, "learning_rate": 1.4127060213138024e-05, "loss": 2.0674, "step": 11276 }, { "epoch": 0.38, "grad_norm": 0.721051812171936, "learning_rate": 1.4126092038615629e-05, "loss": 2.0496, "step": 11277 }, { "epoch": 0.38, "grad_norm": 0.7314443588256836, "learning_rate": 1.4125123817479066e-05, "loss": 2.0641, "step": 11278 }, { "epoch": 0.38, "grad_norm": 0.7625236511230469, "learning_rate": 1.4124155549739272e-05, "loss": 2.1138, "step": 11279 }, { "epoch": 0.38, "grad_norm": 0.7367079257965088, "learning_rate": 1.4123187235407182e-05, "loss": 2.0346, "step": 11280 }, { "epoch": 0.38, "grad_norm": 0.7756535410881042, "learning_rate": 1.4122218874493741e-05, "loss": 2.1311, "step": 11281 }, { "epoch": 0.38, "grad_norm": 0.7690381407737732, "learning_rate": 1.4121250467009886e-05, "loss": 2.1323, "step": 11282 }, { "epoch": 0.38, "grad_norm": 0.7657479643821716, "learning_rate": 1.4120282012966562e-05, "loss": 2.1407, "step": 11283 }, { "epoch": 0.38, "grad_norm": 0.6905273199081421, "learning_rate": 1.4119313512374703e-05, "loss": 2.0533, "step": 11284 }, { "epoch": 0.38, "grad_norm": 0.6848394870758057, "learning_rate": 1.4118344965245254e-05, "loss": 2.097, "step": 11285 }, { "epoch": 0.38, "grad_norm": 0.7616974115371704, "learning_rate": 1.411737637158916e-05, "loss": 2.1164, "step": 11286 }, { "epoch": 0.38, "grad_norm": 0.7437761425971985, "learning_rate": 1.4116407731417361e-05, "loss": 2.0913, "step": 11287 }, { "epoch": 0.38, "grad_norm": 0.737551748752594, "learning_rate": 1.4115439044740796e-05, "loss": 2.1029, "step": 11288 }, { "epoch": 0.38, "grad_norm": 0.8175996541976929, "learning_rate": 1.4114470311570417e-05, "loss": 2.1222, "step": 11289 }, { "epoch": 0.38, "grad_norm": 0.7244274020195007, "learning_rate": 1.4113501531917159e-05, "loss": 2.0865, "step": 11290 }, { "epoch": 0.38, "grad_norm": 0.7473612427711487, "learning_rate": 1.4112532705791975e-05, "loss": 2.0965, "step": 11291 }, { "epoch": 0.38, "grad_norm": 0.7113406658172607, "learning_rate": 1.4111563833205806e-05, "loss": 2.1322, "step": 11292 }, { "epoch": 0.38, "grad_norm": 0.7082083821296692, "learning_rate": 1.4110594914169597e-05, "loss": 2.1156, "step": 11293 }, { "epoch": 0.38, "grad_norm": 0.7392251491546631, "learning_rate": 1.4109625948694297e-05, "loss": 2.1376, "step": 11294 }, { "epoch": 0.38, "grad_norm": 0.7428208589553833, "learning_rate": 1.4108656936790852e-05, "loss": 2.0954, "step": 11295 }, { "epoch": 0.38, "grad_norm": 0.7477293014526367, "learning_rate": 1.4107687878470206e-05, "loss": 2.1105, "step": 11296 }, { "epoch": 0.38, "grad_norm": 0.7450971007347107, "learning_rate": 1.4106718773743312e-05, "loss": 2.192, "step": 11297 }, { "epoch": 0.38, "grad_norm": 0.7245600819587708, "learning_rate": 1.410574962262112e-05, "loss": 2.1028, "step": 11298 }, { "epoch": 0.38, "grad_norm": 0.7093858122825623, "learning_rate": 1.4104780425114565e-05, "loss": 2.1175, "step": 11299 }, { "epoch": 0.38, "grad_norm": 0.6904973387718201, "learning_rate": 1.4103811181234614e-05, "loss": 2.0277, "step": 11300 }, { "epoch": 0.38, "grad_norm": 0.7340632677078247, "learning_rate": 1.410284189099221e-05, "loss": 2.1226, "step": 11301 }, { "epoch": 0.38, "grad_norm": 0.7565782070159912, "learning_rate": 1.41018725543983e-05, "loss": 2.1351, "step": 11302 }, { "epoch": 0.38, "grad_norm": 0.7128376960754395, "learning_rate": 1.4100903171463838e-05, "loss": 2.2219, "step": 11303 }, { "epoch": 0.38, "grad_norm": 0.7159379720687866, "learning_rate": 1.4099933742199775e-05, "loss": 2.1717, "step": 11304 }, { "epoch": 0.38, "grad_norm": 0.7225620746612549, "learning_rate": 1.4098964266617064e-05, "loss": 2.0403, "step": 11305 }, { "epoch": 0.38, "grad_norm": 0.7462195158004761, "learning_rate": 1.4097994744726658e-05, "loss": 2.0894, "step": 11306 }, { "epoch": 0.38, "grad_norm": 0.7404658198356628, "learning_rate": 1.4097025176539506e-05, "loss": 2.0947, "step": 11307 }, { "epoch": 0.38, "grad_norm": 0.7252629995346069, "learning_rate": 1.4096055562066567e-05, "loss": 2.0514, "step": 11308 }, { "epoch": 0.38, "grad_norm": 0.7713181376457214, "learning_rate": 1.4095085901318793e-05, "loss": 2.1531, "step": 11309 }, { "epoch": 0.38, "grad_norm": 0.7235509157180786, "learning_rate": 1.4094116194307135e-05, "loss": 2.1111, "step": 11310 }, { "epoch": 0.38, "grad_norm": 0.7227891087532043, "learning_rate": 1.4093146441042556e-05, "loss": 2.1519, "step": 11311 }, { "epoch": 0.38, "grad_norm": 0.7395390868186951, "learning_rate": 1.4092176641536006e-05, "loss": 2.1223, "step": 11312 }, { "epoch": 0.38, "grad_norm": 0.7327308058738708, "learning_rate": 1.4091206795798443e-05, "loss": 2.141, "step": 11313 }, { "epoch": 0.38, "grad_norm": 0.7350775003433228, "learning_rate": 1.409023690384082e-05, "loss": 2.1248, "step": 11314 }, { "epoch": 0.38, "grad_norm": 0.7206475734710693, "learning_rate": 1.4089266965674102e-05, "loss": 2.0807, "step": 11315 }, { "epoch": 0.38, "grad_norm": 0.709550678730011, "learning_rate": 1.408829698130924e-05, "loss": 2.0873, "step": 11316 }, { "epoch": 0.38, "grad_norm": 0.6950903534889221, "learning_rate": 1.4087326950757194e-05, "loss": 2.1174, "step": 11317 }, { "epoch": 0.38, "grad_norm": 0.7297247052192688, "learning_rate": 1.4086356874028927e-05, "loss": 2.2129, "step": 11318 }, { "epoch": 0.38, "grad_norm": 0.7027513980865479, "learning_rate": 1.408538675113539e-05, "loss": 2.1113, "step": 11319 }, { "epoch": 0.38, "grad_norm": 0.7393156290054321, "learning_rate": 1.4084416582087554e-05, "loss": 2.1598, "step": 11320 }, { "epoch": 0.38, "grad_norm": 0.7525022625923157, "learning_rate": 1.4083446366896367e-05, "loss": 2.1294, "step": 11321 }, { "epoch": 0.38, "grad_norm": 0.7394075989723206, "learning_rate": 1.40824761055728e-05, "loss": 2.1739, "step": 11322 }, { "epoch": 0.38, "grad_norm": 0.7208836674690247, "learning_rate": 1.4081505798127809e-05, "loss": 2.1031, "step": 11323 }, { "epoch": 0.38, "grad_norm": 0.7147702574729919, "learning_rate": 1.4080535444572357e-05, "loss": 2.2089, "step": 11324 }, { "epoch": 0.38, "grad_norm": 0.7271811366081238, "learning_rate": 1.4079565044917408e-05, "loss": 2.0441, "step": 11325 }, { "epoch": 0.38, "grad_norm": 0.7279956340789795, "learning_rate": 1.407859459917392e-05, "loss": 2.1404, "step": 11326 }, { "epoch": 0.38, "grad_norm": 0.7437514662742615, "learning_rate": 1.4077624107352867e-05, "loss": 2.0868, "step": 11327 }, { "epoch": 0.38, "grad_norm": 0.7344135642051697, "learning_rate": 1.4076653569465204e-05, "loss": 2.1298, "step": 11328 }, { "epoch": 0.38, "grad_norm": 0.7050654292106628, "learning_rate": 1.4075682985521896e-05, "loss": 2.0802, "step": 11329 }, { "epoch": 0.38, "grad_norm": 0.727127730846405, "learning_rate": 1.4074712355533913e-05, "loss": 2.0194, "step": 11330 }, { "epoch": 0.38, "grad_norm": 0.7598231434822083, "learning_rate": 1.4073741679512218e-05, "loss": 2.106, "step": 11331 }, { "epoch": 0.38, "grad_norm": 0.755497395992279, "learning_rate": 1.4072770957467775e-05, "loss": 2.0838, "step": 11332 }, { "epoch": 0.38, "grad_norm": 0.7339121699333191, "learning_rate": 1.4071800189411552e-05, "loss": 2.1129, "step": 11333 }, { "epoch": 0.38, "grad_norm": 0.7392110228538513, "learning_rate": 1.4070829375354518e-05, "loss": 2.1172, "step": 11334 }, { "epoch": 0.38, "grad_norm": 0.739726185798645, "learning_rate": 1.4069858515307643e-05, "loss": 2.1291, "step": 11335 }, { "epoch": 0.38, "grad_norm": 0.7599457502365112, "learning_rate": 1.4068887609281888e-05, "loss": 2.0738, "step": 11336 }, { "epoch": 0.38, "grad_norm": 0.703784167766571, "learning_rate": 1.4067916657288221e-05, "loss": 2.1631, "step": 11337 }, { "epoch": 0.38, "grad_norm": 0.6943023204803467, "learning_rate": 1.4066945659337623e-05, "loss": 2.0617, "step": 11338 }, { "epoch": 0.38, "grad_norm": 0.7323131561279297, "learning_rate": 1.4065974615441054e-05, "loss": 2.0414, "step": 11339 }, { "epoch": 0.38, "grad_norm": 0.7876615524291992, "learning_rate": 1.4065003525609483e-05, "loss": 2.1557, "step": 11340 }, { "epoch": 0.38, "grad_norm": 0.739802360534668, "learning_rate": 1.406403238985389e-05, "loss": 2.1271, "step": 11341 }, { "epoch": 0.38, "grad_norm": 0.7265348434448242, "learning_rate": 1.4063061208185238e-05, "loss": 2.1282, "step": 11342 }, { "epoch": 0.38, "grad_norm": 0.7415464520454407, "learning_rate": 1.4062089980614504e-05, "loss": 2.0651, "step": 11343 }, { "epoch": 0.38, "grad_norm": 0.7040876150131226, "learning_rate": 1.4061118707152657e-05, "loss": 2.0781, "step": 11344 }, { "epoch": 0.38, "grad_norm": 0.732926607131958, "learning_rate": 1.4060147387810667e-05, "loss": 1.9949, "step": 11345 }, { "epoch": 0.38, "grad_norm": 0.7381318807601929, "learning_rate": 1.4059176022599515e-05, "loss": 2.1105, "step": 11346 }, { "epoch": 0.38, "grad_norm": 0.7351718544960022, "learning_rate": 1.4058204611530173e-05, "loss": 2.0902, "step": 11347 }, { "epoch": 0.38, "grad_norm": 0.7441126108169556, "learning_rate": 1.405723315461361e-05, "loss": 2.1913, "step": 11348 }, { "epoch": 0.38, "grad_norm": 0.7330016493797302, "learning_rate": 1.4056261651860808e-05, "loss": 2.0643, "step": 11349 }, { "epoch": 0.38, "grad_norm": 0.7363520264625549, "learning_rate": 1.4055290103282735e-05, "loss": 2.062, "step": 11350 }, { "epoch": 0.38, "grad_norm": 0.7020858526229858, "learning_rate": 1.4054318508890372e-05, "loss": 2.1009, "step": 11351 }, { "epoch": 0.38, "grad_norm": 0.7362831234931946, "learning_rate": 1.4053346868694697e-05, "loss": 2.0614, "step": 11352 }, { "epoch": 0.38, "grad_norm": 0.6958045959472656, "learning_rate": 1.4052375182706683e-05, "loss": 2.065, "step": 11353 }, { "epoch": 0.38, "grad_norm": 0.760219931602478, "learning_rate": 1.405140345093731e-05, "loss": 2.0969, "step": 11354 }, { "epoch": 0.38, "grad_norm": 0.7303688526153564, "learning_rate": 1.4050431673397556e-05, "loss": 2.1052, "step": 11355 }, { "epoch": 0.38, "grad_norm": 0.7132928371429443, "learning_rate": 1.4049459850098394e-05, "loss": 2.078, "step": 11356 }, { "epoch": 0.38, "grad_norm": 0.7101141214370728, "learning_rate": 1.404848798105081e-05, "loss": 2.0918, "step": 11357 }, { "epoch": 0.38, "grad_norm": 0.7626948952674866, "learning_rate": 1.4047516066265786e-05, "loss": 2.1307, "step": 11358 }, { "epoch": 0.38, "grad_norm": 0.7110300064086914, "learning_rate": 1.4046544105754292e-05, "loss": 2.0222, "step": 11359 }, { "epoch": 0.38, "grad_norm": 0.7119615077972412, "learning_rate": 1.4045572099527318e-05, "loss": 2.1136, "step": 11360 }, { "epoch": 0.38, "grad_norm": 0.7320970296859741, "learning_rate": 1.4044600047595838e-05, "loss": 2.1013, "step": 11361 }, { "epoch": 0.38, "grad_norm": 0.7220376133918762, "learning_rate": 1.404362794997084e-05, "loss": 2.1056, "step": 11362 }, { "epoch": 0.38, "grad_norm": 0.7294628024101257, "learning_rate": 1.4042655806663302e-05, "loss": 2.1514, "step": 11363 }, { "epoch": 0.38, "grad_norm": 0.7361676096916199, "learning_rate": 1.4041683617684206e-05, "loss": 2.1134, "step": 11364 }, { "epoch": 0.38, "grad_norm": 0.7277711033821106, "learning_rate": 1.4040711383044539e-05, "loss": 2.0383, "step": 11365 }, { "epoch": 0.38, "grad_norm": 0.7244346141815186, "learning_rate": 1.4039739102755285e-05, "loss": 2.1375, "step": 11366 }, { "epoch": 0.38, "grad_norm": 0.7274999618530273, "learning_rate": 1.4038766776827422e-05, "loss": 2.1807, "step": 11367 }, { "epoch": 0.38, "grad_norm": 0.756253719329834, "learning_rate": 1.4037794405271941e-05, "loss": 2.131, "step": 11368 }, { "epoch": 0.38, "grad_norm": 0.7286421060562134, "learning_rate": 1.4036821988099827e-05, "loss": 2.0942, "step": 11369 }, { "epoch": 0.38, "grad_norm": 0.7169977426528931, "learning_rate": 1.4035849525322058e-05, "loss": 2.0811, "step": 11370 }, { "epoch": 0.38, "grad_norm": 0.7374930381774902, "learning_rate": 1.4034877016949633e-05, "loss": 2.1252, "step": 11371 }, { "epoch": 0.38, "grad_norm": 0.7497426867485046, "learning_rate": 1.403390446299353e-05, "loss": 2.1562, "step": 11372 }, { "epoch": 0.38, "grad_norm": 0.7529228329658508, "learning_rate": 1.4032931863464736e-05, "loss": 2.1309, "step": 11373 }, { "epoch": 0.38, "grad_norm": 0.7152889370918274, "learning_rate": 1.4031959218374245e-05, "loss": 2.0716, "step": 11374 }, { "epoch": 0.38, "grad_norm": 0.7394862174987793, "learning_rate": 1.403098652773304e-05, "loss": 2.1411, "step": 11375 }, { "epoch": 0.38, "grad_norm": 0.7230199575424194, "learning_rate": 1.4030013791552111e-05, "loss": 2.0813, "step": 11376 }, { "epoch": 0.38, "grad_norm": 0.7785342335700989, "learning_rate": 1.4029041009842448e-05, "loss": 2.1173, "step": 11377 }, { "epoch": 0.38, "grad_norm": 0.7200108170509338, "learning_rate": 1.402806818261504e-05, "loss": 2.091, "step": 11378 }, { "epoch": 0.38, "grad_norm": 0.7376548051834106, "learning_rate": 1.4027095309880881e-05, "loss": 2.1798, "step": 11379 }, { "epoch": 0.38, "grad_norm": 0.7292722463607788, "learning_rate": 1.402612239165096e-05, "loss": 2.0076, "step": 11380 }, { "epoch": 0.38, "grad_norm": 0.7084988951683044, "learning_rate": 1.4025149427936264e-05, "loss": 2.072, "step": 11381 }, { "epoch": 0.38, "grad_norm": 0.745607316493988, "learning_rate": 1.4024176418747793e-05, "loss": 2.0998, "step": 11382 }, { "epoch": 0.38, "grad_norm": 0.7558834552764893, "learning_rate": 1.4023203364096531e-05, "loss": 2.1665, "step": 11383 }, { "epoch": 0.38, "grad_norm": 0.6955224275588989, "learning_rate": 1.4022230263993478e-05, "loss": 2.0801, "step": 11384 }, { "epoch": 0.38, "grad_norm": 0.7393018007278442, "learning_rate": 1.4021257118449622e-05, "loss": 2.1223, "step": 11385 }, { "epoch": 0.38, "grad_norm": 0.7001378536224365, "learning_rate": 1.4020283927475963e-05, "loss": 2.1294, "step": 11386 }, { "epoch": 0.38, "grad_norm": 0.728144109249115, "learning_rate": 1.401931069108349e-05, "loss": 2.0869, "step": 11387 }, { "epoch": 0.38, "grad_norm": 0.7182221412658691, "learning_rate": 1.40183374092832e-05, "loss": 2.0966, "step": 11388 }, { "epoch": 0.38, "grad_norm": 0.694695770740509, "learning_rate": 1.401736408208609e-05, "loss": 2.0954, "step": 11389 }, { "epoch": 0.38, "grad_norm": 0.7392402291297913, "learning_rate": 1.4016390709503156e-05, "loss": 2.0801, "step": 11390 }, { "epoch": 0.38, "grad_norm": 0.7085093855857849, "learning_rate": 1.4015417291545394e-05, "loss": 2.1285, "step": 11391 }, { "epoch": 0.38, "grad_norm": 0.7328885793685913, "learning_rate": 1.4014443828223798e-05, "loss": 2.1352, "step": 11392 }, { "epoch": 0.38, "grad_norm": 0.7181127071380615, "learning_rate": 1.4013470319549371e-05, "loss": 2.1056, "step": 11393 }, { "epoch": 0.38, "grad_norm": 0.701712965965271, "learning_rate": 1.4012496765533108e-05, "loss": 2.0286, "step": 11394 }, { "epoch": 0.38, "grad_norm": 0.7256366014480591, "learning_rate": 1.4011523166186007e-05, "loss": 2.1188, "step": 11395 }, { "epoch": 0.38, "grad_norm": 0.7266802787780762, "learning_rate": 1.401054952151907e-05, "loss": 2.1754, "step": 11396 }, { "epoch": 0.38, "grad_norm": 0.7212596535682678, "learning_rate": 1.4009575831543292e-05, "loss": 2.1239, "step": 11397 }, { "epoch": 0.38, "grad_norm": 0.7530649304389954, "learning_rate": 1.4008602096269677e-05, "loss": 2.1331, "step": 11398 }, { "epoch": 0.38, "grad_norm": 0.7259551882743835, "learning_rate": 1.4007628315709225e-05, "loss": 2.0715, "step": 11399 }, { "epoch": 0.38, "grad_norm": 0.7623724937438965, "learning_rate": 1.4006654489872937e-05, "loss": 2.1195, "step": 11400 }, { "epoch": 0.38, "grad_norm": 0.7286744713783264, "learning_rate": 1.4005680618771816e-05, "loss": 2.089, "step": 11401 }, { "epoch": 0.38, "grad_norm": 0.7333736419677734, "learning_rate": 1.4004706702416864e-05, "loss": 2.1449, "step": 11402 }, { "epoch": 0.38, "grad_norm": 0.7168421149253845, "learning_rate": 1.4003732740819078e-05, "loss": 2.0961, "step": 11403 }, { "epoch": 0.38, "grad_norm": 0.74763423204422, "learning_rate": 1.4002758733989473e-05, "loss": 2.0197, "step": 11404 }, { "epoch": 0.38, "grad_norm": 0.7334607243537903, "learning_rate": 1.400178468193904e-05, "loss": 2.1822, "step": 11405 }, { "epoch": 0.38, "grad_norm": 0.7560888528823853, "learning_rate": 1.4000810584678789e-05, "loss": 2.188, "step": 11406 }, { "epoch": 0.38, "grad_norm": 0.7041736841201782, "learning_rate": 1.3999836442219725e-05, "loss": 2.1567, "step": 11407 }, { "epoch": 0.38, "grad_norm": 0.7709736227989197, "learning_rate": 1.3998862254572854e-05, "loss": 2.1027, "step": 11408 }, { "epoch": 0.38, "grad_norm": 0.7523634433746338, "learning_rate": 1.3997888021749183e-05, "loss": 2.0818, "step": 11409 }, { "epoch": 0.38, "grad_norm": 0.7626747488975525, "learning_rate": 1.3996913743759712e-05, "loss": 2.1527, "step": 11410 }, { "epoch": 0.38, "grad_norm": 0.748218834400177, "learning_rate": 1.3995939420615455e-05, "loss": 2.1156, "step": 11411 }, { "epoch": 0.38, "grad_norm": 0.7393519282341003, "learning_rate": 1.3994965052327414e-05, "loss": 2.1081, "step": 11412 }, { "epoch": 0.38, "grad_norm": 0.7613183259963989, "learning_rate": 1.3993990638906603e-05, "loss": 2.1247, "step": 11413 }, { "epoch": 0.38, "grad_norm": 0.7168604731559753, "learning_rate": 1.3993016180364021e-05, "loss": 2.1126, "step": 11414 }, { "epoch": 0.38, "grad_norm": 0.7124350666999817, "learning_rate": 1.3992041676710686e-05, "loss": 2.0649, "step": 11415 }, { "epoch": 0.38, "grad_norm": 0.7400722503662109, "learning_rate": 1.3991067127957603e-05, "loss": 2.1009, "step": 11416 }, { "epoch": 0.38, "grad_norm": 0.7381260395050049, "learning_rate": 1.3990092534115783e-05, "loss": 2.0644, "step": 11417 }, { "epoch": 0.38, "grad_norm": 0.7363085746765137, "learning_rate": 1.3989117895196233e-05, "loss": 2.0769, "step": 11418 }, { "epoch": 0.38, "grad_norm": 0.7187110781669617, "learning_rate": 1.3988143211209968e-05, "loss": 2.0663, "step": 11419 }, { "epoch": 0.38, "grad_norm": 0.7486928105354309, "learning_rate": 1.3987168482168e-05, "loss": 2.1658, "step": 11420 }, { "epoch": 0.38, "grad_norm": 0.7433101534843445, "learning_rate": 1.3986193708081339e-05, "loss": 2.1435, "step": 11421 }, { "epoch": 0.38, "grad_norm": 0.7400829195976257, "learning_rate": 1.3985218888960999e-05, "loss": 2.0912, "step": 11422 }, { "epoch": 0.38, "grad_norm": 0.7167808413505554, "learning_rate": 1.3984244024817989e-05, "loss": 2.0938, "step": 11423 }, { "epoch": 0.38, "grad_norm": 0.7212663888931274, "learning_rate": 1.3983269115663327e-05, "loss": 2.0599, "step": 11424 }, { "epoch": 0.38, "grad_norm": 0.7064757943153381, "learning_rate": 1.3982294161508021e-05, "loss": 2.1403, "step": 11425 }, { "epoch": 0.38, "grad_norm": 0.7244797348976135, "learning_rate": 1.3981319162363094e-05, "loss": 2.087, "step": 11426 }, { "epoch": 0.38, "grad_norm": 0.726233184337616, "learning_rate": 1.3980344118239553e-05, "loss": 2.0482, "step": 11427 }, { "epoch": 0.38, "grad_norm": 0.7073317170143127, "learning_rate": 1.397936902914842e-05, "loss": 2.1363, "step": 11428 }, { "epoch": 0.38, "grad_norm": 0.7302148342132568, "learning_rate": 1.3978393895100706e-05, "loss": 2.1555, "step": 11429 }, { "epoch": 0.38, "grad_norm": 0.7258449792861938, "learning_rate": 1.3977418716107427e-05, "loss": 2.1651, "step": 11430 }, { "epoch": 0.38, "grad_norm": 0.7266464233398438, "learning_rate": 1.3976443492179607e-05, "loss": 2.0379, "step": 11431 }, { "epoch": 0.38, "grad_norm": 0.8782541751861572, "learning_rate": 1.3975468223328257e-05, "loss": 2.1166, "step": 11432 }, { "epoch": 0.38, "grad_norm": 0.7109593152999878, "learning_rate": 1.3974492909564397e-05, "loss": 2.0765, "step": 11433 }, { "epoch": 0.38, "grad_norm": 0.7213874459266663, "learning_rate": 1.3973517550899045e-05, "loss": 2.1173, "step": 11434 }, { "epoch": 0.38, "grad_norm": 0.7201820015907288, "learning_rate": 1.397254214734322e-05, "loss": 2.0872, "step": 11435 }, { "epoch": 0.38, "grad_norm": 0.719819962978363, "learning_rate": 1.3971566698907944e-05, "loss": 2.1451, "step": 11436 }, { "epoch": 0.38, "grad_norm": 0.7373226881027222, "learning_rate": 1.3970591205604234e-05, "loss": 2.0531, "step": 11437 }, { "epoch": 0.38, "grad_norm": 0.7376651763916016, "learning_rate": 1.3969615667443108e-05, "loss": 2.1365, "step": 11438 }, { "epoch": 0.38, "grad_norm": 0.7356284856796265, "learning_rate": 1.3968640084435597e-05, "loss": 2.1581, "step": 11439 }, { "epoch": 0.38, "grad_norm": 0.744534432888031, "learning_rate": 1.396766445659271e-05, "loss": 2.2055, "step": 11440 }, { "epoch": 0.38, "grad_norm": 0.7165676951408386, "learning_rate": 1.3966688783925478e-05, "loss": 2.1019, "step": 11441 }, { "epoch": 0.38, "grad_norm": 0.7214135527610779, "learning_rate": 1.3965713066444923e-05, "loss": 2.1822, "step": 11442 }, { "epoch": 0.38, "grad_norm": 0.7336829900741577, "learning_rate": 1.3964737304162064e-05, "loss": 2.1272, "step": 11443 }, { "epoch": 0.38, "grad_norm": 0.7009656429290771, "learning_rate": 1.3963761497087926e-05, "loss": 2.0848, "step": 11444 }, { "epoch": 0.38, "grad_norm": 0.7161441445350647, "learning_rate": 1.3962785645233533e-05, "loss": 2.0877, "step": 11445 }, { "epoch": 0.38, "grad_norm": 0.7274609208106995, "learning_rate": 1.3961809748609911e-05, "loss": 2.0588, "step": 11446 }, { "epoch": 0.38, "grad_norm": 0.7047138810157776, "learning_rate": 1.3960833807228085e-05, "loss": 2.1, "step": 11447 }, { "epoch": 0.38, "grad_norm": 0.7163825035095215, "learning_rate": 1.3959857821099081e-05, "loss": 2.1135, "step": 11448 }, { "epoch": 0.38, "grad_norm": 0.7072292566299438, "learning_rate": 1.395888179023392e-05, "loss": 2.1682, "step": 11449 }, { "epoch": 0.38, "grad_norm": 0.7029929161071777, "learning_rate": 1.3957905714643633e-05, "loss": 2.144, "step": 11450 }, { "epoch": 0.38, "grad_norm": 0.7170625329017639, "learning_rate": 1.3956929594339253e-05, "loss": 2.158, "step": 11451 }, { "epoch": 0.38, "grad_norm": 0.7440356612205505, "learning_rate": 1.3955953429331795e-05, "loss": 2.1039, "step": 11452 }, { "epoch": 0.38, "grad_norm": 0.7330176830291748, "learning_rate": 1.3954977219632297e-05, "loss": 2.0907, "step": 11453 }, { "epoch": 0.38, "grad_norm": 0.7226961255073547, "learning_rate": 1.3954000965251781e-05, "loss": 2.1209, "step": 11454 }, { "epoch": 0.38, "grad_norm": 0.7116376161575317, "learning_rate": 1.3953024666201282e-05, "loss": 2.1473, "step": 11455 }, { "epoch": 0.38, "grad_norm": 0.8063004016876221, "learning_rate": 1.3952048322491827e-05, "loss": 2.0179, "step": 11456 }, { "epoch": 0.38, "grad_norm": 0.7078288197517395, "learning_rate": 1.3951071934134446e-05, "loss": 2.0711, "step": 11457 }, { "epoch": 0.38, "grad_norm": 0.7183838486671448, "learning_rate": 1.3950095501140169e-05, "loss": 2.142, "step": 11458 }, { "epoch": 0.38, "grad_norm": 0.7276841402053833, "learning_rate": 1.394911902352003e-05, "loss": 2.1224, "step": 11459 }, { "epoch": 0.38, "grad_norm": 0.718867838382721, "learning_rate": 1.3948142501285056e-05, "loss": 2.1073, "step": 11460 }, { "epoch": 0.38, "grad_norm": 0.732370913028717, "learning_rate": 1.3947165934446283e-05, "loss": 2.1215, "step": 11461 }, { "epoch": 0.38, "grad_norm": 0.7546433806419373, "learning_rate": 1.3946189323014743e-05, "loss": 2.0714, "step": 11462 }, { "epoch": 0.38, "grad_norm": 0.7155033349990845, "learning_rate": 1.3945212667001467e-05, "loss": 2.044, "step": 11463 }, { "epoch": 0.38, "grad_norm": 0.7212733030319214, "learning_rate": 1.3944235966417491e-05, "loss": 2.0661, "step": 11464 }, { "epoch": 0.38, "grad_norm": 0.7089389562606812, "learning_rate": 1.3943259221273851e-05, "loss": 2.1174, "step": 11465 }, { "epoch": 0.38, "grad_norm": 0.6950744390487671, "learning_rate": 1.3942282431581577e-05, "loss": 2.082, "step": 11466 }, { "epoch": 0.38, "grad_norm": 0.7404822111129761, "learning_rate": 1.3941305597351709e-05, "loss": 2.1128, "step": 11467 }, { "epoch": 0.38, "grad_norm": 0.768186092376709, "learning_rate": 1.3940328718595277e-05, "loss": 2.1688, "step": 11468 }, { "epoch": 0.38, "grad_norm": 0.7197774648666382, "learning_rate": 1.3939351795323322e-05, "loss": 2.0765, "step": 11469 }, { "epoch": 0.38, "grad_norm": 0.7011984586715698, "learning_rate": 1.3938374827546879e-05, "loss": 2.0411, "step": 11470 }, { "epoch": 0.38, "grad_norm": 0.7435581088066101, "learning_rate": 1.3937397815276982e-05, "loss": 2.1283, "step": 11471 }, { "epoch": 0.38, "grad_norm": 0.7341912388801575, "learning_rate": 1.3936420758524678e-05, "loss": 2.0971, "step": 11472 }, { "epoch": 0.38, "grad_norm": 0.7510620355606079, "learning_rate": 1.3935443657300997e-05, "loss": 2.1302, "step": 11473 }, { "epoch": 0.38, "grad_norm": 0.7260511517524719, "learning_rate": 1.3934466511616977e-05, "loss": 2.2008, "step": 11474 }, { "epoch": 0.38, "grad_norm": 0.7165123224258423, "learning_rate": 1.3933489321483664e-05, "loss": 2.1922, "step": 11475 }, { "epoch": 0.38, "grad_norm": 0.7333325147628784, "learning_rate": 1.3932512086912093e-05, "loss": 2.0791, "step": 11476 }, { "epoch": 0.38, "grad_norm": 0.7265769243240356, "learning_rate": 1.3931534807913303e-05, "loss": 2.1234, "step": 11477 }, { "epoch": 0.38, "grad_norm": 0.718147873878479, "learning_rate": 1.3930557484498339e-05, "loss": 2.152, "step": 11478 }, { "epoch": 0.38, "grad_norm": 0.7242167592048645, "learning_rate": 1.3929580116678238e-05, "loss": 2.1463, "step": 11479 }, { "epoch": 0.38, "grad_norm": 0.7438389658927917, "learning_rate": 1.3928602704464047e-05, "loss": 2.09, "step": 11480 }, { "epoch": 0.38, "grad_norm": 0.7340410351753235, "learning_rate": 1.3927625247866803e-05, "loss": 2.1176, "step": 11481 }, { "epoch": 0.38, "grad_norm": 0.727824330329895, "learning_rate": 1.392664774689755e-05, "loss": 2.1639, "step": 11482 }, { "epoch": 0.38, "grad_norm": 0.7227712869644165, "learning_rate": 1.3925670201567333e-05, "loss": 2.1331, "step": 11483 }, { "epoch": 0.38, "grad_norm": 0.7353600263595581, "learning_rate": 1.3924692611887196e-05, "loss": 2.1199, "step": 11484 }, { "epoch": 0.38, "grad_norm": 0.7445943355560303, "learning_rate": 1.3923714977868179e-05, "loss": 2.0896, "step": 11485 }, { "epoch": 0.38, "grad_norm": 0.7453267574310303, "learning_rate": 1.3922737299521334e-05, "loss": 2.0772, "step": 11486 }, { "epoch": 0.38, "grad_norm": 0.7079179883003235, "learning_rate": 1.3921759576857698e-05, "loss": 2.085, "step": 11487 }, { "epoch": 0.38, "grad_norm": 0.726108193397522, "learning_rate": 1.3920781809888322e-05, "loss": 2.1079, "step": 11488 }, { "epoch": 0.38, "grad_norm": 0.7410142421722412, "learning_rate": 1.391980399862425e-05, "loss": 2.0856, "step": 11489 }, { "epoch": 0.38, "grad_norm": 0.7477012872695923, "learning_rate": 1.3918826143076532e-05, "loss": 2.1499, "step": 11490 }, { "epoch": 0.38, "grad_norm": 0.7122055292129517, "learning_rate": 1.3917848243256211e-05, "loss": 2.0947, "step": 11491 }, { "epoch": 0.38, "grad_norm": 0.7292513847351074, "learning_rate": 1.3916870299174336e-05, "loss": 2.1374, "step": 11492 }, { "epoch": 0.38, "grad_norm": 0.7271727323532104, "learning_rate": 1.3915892310841959e-05, "loss": 2.1032, "step": 11493 }, { "epoch": 0.38, "grad_norm": 0.7012867331504822, "learning_rate": 1.3914914278270121e-05, "loss": 2.1032, "step": 11494 }, { "epoch": 0.38, "grad_norm": 0.7849199175834656, "learning_rate": 1.3913936201469882e-05, "loss": 2.0916, "step": 11495 }, { "epoch": 0.38, "grad_norm": 0.7315781116485596, "learning_rate": 1.391295808045228e-05, "loss": 2.1162, "step": 11496 }, { "epoch": 0.38, "grad_norm": 0.7250478863716125, "learning_rate": 1.3911979915228374e-05, "loss": 2.1189, "step": 11497 }, { "epoch": 0.38, "grad_norm": 0.7302700281143188, "learning_rate": 1.3911001705809211e-05, "loss": 2.0142, "step": 11498 }, { "epoch": 0.38, "grad_norm": 0.7002418041229248, "learning_rate": 1.391002345220584e-05, "loss": 2.1169, "step": 11499 }, { "epoch": 0.38, "grad_norm": 0.7220374345779419, "learning_rate": 1.3909045154429319e-05, "loss": 2.1254, "step": 11500 }, { "epoch": 0.38, "grad_norm": 0.727595865726471, "learning_rate": 1.3908066812490695e-05, "loss": 2.0756, "step": 11501 }, { "epoch": 0.38, "grad_norm": 0.712340772151947, "learning_rate": 1.3907088426401024e-05, "loss": 2.1615, "step": 11502 }, { "epoch": 0.38, "grad_norm": 0.7316076755523682, "learning_rate": 1.3906109996171355e-05, "loss": 2.1092, "step": 11503 }, { "epoch": 0.38, "grad_norm": 0.7226830720901489, "learning_rate": 1.3905131521812746e-05, "loss": 2.136, "step": 11504 }, { "epoch": 0.38, "grad_norm": 0.7411084175109863, "learning_rate": 1.3904153003336252e-05, "loss": 2.0613, "step": 11505 }, { "epoch": 0.38, "grad_norm": 0.71602463722229, "learning_rate": 1.3903174440752925e-05, "loss": 2.1099, "step": 11506 }, { "epoch": 0.38, "grad_norm": 0.7252110242843628, "learning_rate": 1.3902195834073818e-05, "loss": 2.1546, "step": 11507 }, { "epoch": 0.38, "grad_norm": 0.7378067970275879, "learning_rate": 1.390121718330999e-05, "loss": 2.0927, "step": 11508 }, { "epoch": 0.38, "grad_norm": 0.7067298293113708, "learning_rate": 1.3900238488472498e-05, "loss": 2.0794, "step": 11509 }, { "epoch": 0.38, "grad_norm": 0.768795371055603, "learning_rate": 1.3899259749572397e-05, "loss": 2.0918, "step": 11510 }, { "epoch": 0.38, "grad_norm": 0.7537060976028442, "learning_rate": 1.389828096662074e-05, "loss": 2.1635, "step": 11511 }, { "epoch": 0.38, "grad_norm": 0.7030593156814575, "learning_rate": 1.3897302139628594e-05, "loss": 2.044, "step": 11512 }, { "epoch": 0.38, "grad_norm": 0.727529764175415, "learning_rate": 1.3896323268607013e-05, "loss": 2.0999, "step": 11513 }, { "epoch": 0.38, "grad_norm": 0.7152175903320312, "learning_rate": 1.3895344353567052e-05, "loss": 2.1882, "step": 11514 }, { "epoch": 0.38, "grad_norm": 0.7171653509140015, "learning_rate": 1.3894365394519776e-05, "loss": 2.1376, "step": 11515 }, { "epoch": 0.38, "grad_norm": 0.749289870262146, "learning_rate": 1.3893386391476243e-05, "loss": 2.0859, "step": 11516 }, { "epoch": 0.38, "grad_norm": 0.7324069142341614, "learning_rate": 1.389240734444751e-05, "loss": 2.0636, "step": 11517 }, { "epoch": 0.38, "grad_norm": 0.7270019054412842, "learning_rate": 1.3891428253444639e-05, "loss": 2.1318, "step": 11518 }, { "epoch": 0.38, "grad_norm": 0.7595897316932678, "learning_rate": 1.3890449118478695e-05, "loss": 2.0793, "step": 11519 }, { "epoch": 0.38, "grad_norm": 0.7341136336326599, "learning_rate": 1.3889469939560736e-05, "loss": 2.1393, "step": 11520 }, { "epoch": 0.38, "grad_norm": 0.7345200777053833, "learning_rate": 1.3888490716701824e-05, "loss": 2.1797, "step": 11521 }, { "epoch": 0.38, "grad_norm": 0.7249011397361755, "learning_rate": 1.3887511449913023e-05, "loss": 2.0732, "step": 11522 }, { "epoch": 0.38, "grad_norm": 0.6892530918121338, "learning_rate": 1.3886532139205395e-05, "loss": 2.0747, "step": 11523 }, { "epoch": 0.38, "grad_norm": 0.7325267195701599, "learning_rate": 1.3885552784590009e-05, "loss": 2.0627, "step": 11524 }, { "epoch": 0.38, "grad_norm": 0.732509195804596, "learning_rate": 1.388457338607792e-05, "loss": 2.154, "step": 11525 }, { "epoch": 0.38, "grad_norm": 0.7337095141410828, "learning_rate": 1.38835939436802e-05, "loss": 2.1187, "step": 11526 }, { "epoch": 0.38, "grad_norm": 0.7032434940338135, "learning_rate": 1.388261445740791e-05, "loss": 2.1416, "step": 11527 }, { "epoch": 0.38, "grad_norm": 0.7411068081855774, "learning_rate": 1.3881634927272118e-05, "loss": 2.0621, "step": 11528 }, { "epoch": 0.38, "grad_norm": 0.7256404161453247, "learning_rate": 1.3880655353283889e-05, "loss": 2.1571, "step": 11529 }, { "epoch": 0.38, "grad_norm": 0.7578774690628052, "learning_rate": 1.3879675735454295e-05, "loss": 2.1516, "step": 11530 }, { "epoch": 0.38, "grad_norm": 0.7206849455833435, "learning_rate": 1.3878696073794392e-05, "loss": 2.0701, "step": 11531 }, { "epoch": 0.38, "grad_norm": 0.7665100693702698, "learning_rate": 1.3877716368315257e-05, "loss": 2.0497, "step": 11532 }, { "epoch": 0.38, "grad_norm": 0.7228631377220154, "learning_rate": 1.3876736619027953e-05, "loss": 2.0951, "step": 11533 }, { "epoch": 0.38, "grad_norm": 0.740211546421051, "learning_rate": 1.387575682594355e-05, "loss": 2.0692, "step": 11534 }, { "epoch": 0.38, "grad_norm": 0.7295033931732178, "learning_rate": 1.3874776989073123e-05, "loss": 2.0018, "step": 11535 }, { "epoch": 0.38, "grad_norm": 0.7313127517700195, "learning_rate": 1.3873797108427731e-05, "loss": 2.1086, "step": 11536 }, { "epoch": 0.38, "grad_norm": 0.721352219581604, "learning_rate": 1.387281718401845e-05, "loss": 2.0851, "step": 11537 }, { "epoch": 0.38, "grad_norm": 0.7298734784126282, "learning_rate": 1.3871837215856351e-05, "loss": 2.1828, "step": 11538 }, { "epoch": 0.38, "grad_norm": 0.7168005108833313, "learning_rate": 1.3870857203952505e-05, "loss": 2.068, "step": 11539 }, { "epoch": 0.38, "grad_norm": 0.756931722164154, "learning_rate": 1.3869877148317983e-05, "loss": 2.1316, "step": 11540 }, { "epoch": 0.38, "grad_norm": 0.6979550123214722, "learning_rate": 1.3868897048963858e-05, "loss": 2.1556, "step": 11541 }, { "epoch": 0.38, "grad_norm": 0.7059472799301147, "learning_rate": 1.38679169059012e-05, "loss": 2.1597, "step": 11542 }, { "epoch": 0.38, "grad_norm": 0.7303836941719055, "learning_rate": 1.3866936719141084e-05, "loss": 2.0681, "step": 11543 }, { "epoch": 0.38, "grad_norm": 0.7224733233451843, "learning_rate": 1.3865956488694584e-05, "loss": 2.0979, "step": 11544 }, { "epoch": 0.38, "grad_norm": 0.7030598521232605, "learning_rate": 1.386497621457277e-05, "loss": 2.1062, "step": 11545 }, { "epoch": 0.38, "grad_norm": 0.7102269530296326, "learning_rate": 1.3863995896786723e-05, "loss": 2.074, "step": 11546 }, { "epoch": 0.38, "grad_norm": 0.7306323647499084, "learning_rate": 1.3863015535347515e-05, "loss": 2.0859, "step": 11547 }, { "epoch": 0.38, "grad_norm": 0.7136075496673584, "learning_rate": 1.3862035130266218e-05, "loss": 2.028, "step": 11548 }, { "epoch": 0.38, "grad_norm": 0.7416447401046753, "learning_rate": 1.3861054681553917e-05, "loss": 2.1362, "step": 11549 }, { "epoch": 0.38, "grad_norm": 0.7210537195205688, "learning_rate": 1.3860074189221677e-05, "loss": 2.1254, "step": 11550 }, { "epoch": 0.38, "grad_norm": 0.7629061937332153, "learning_rate": 1.3859093653280584e-05, "loss": 2.1234, "step": 11551 }, { "epoch": 0.38, "grad_norm": 0.7179463505744934, "learning_rate": 1.3858113073741715e-05, "loss": 2.1187, "step": 11552 }, { "epoch": 0.38, "grad_norm": 0.7118474841117859, "learning_rate": 1.385713245061614e-05, "loss": 2.0743, "step": 11553 }, { "epoch": 0.38, "grad_norm": 0.7272747755050659, "learning_rate": 1.3856151783914946e-05, "loss": 2.006, "step": 11554 }, { "epoch": 0.38, "grad_norm": 0.710779070854187, "learning_rate": 1.3855171073649211e-05, "loss": 2.1044, "step": 11555 }, { "epoch": 0.38, "grad_norm": 0.7099700570106506, "learning_rate": 1.385419031983001e-05, "loss": 2.0576, "step": 11556 }, { "epoch": 0.38, "grad_norm": 0.7408515810966492, "learning_rate": 1.3853209522468427e-05, "loss": 2.0831, "step": 11557 }, { "epoch": 0.38, "grad_norm": 0.6994704008102417, "learning_rate": 1.385222868157554e-05, "loss": 2.0659, "step": 11558 }, { "epoch": 0.38, "grad_norm": 0.7266060709953308, "learning_rate": 1.385124779716243e-05, "loss": 2.0474, "step": 11559 }, { "epoch": 0.38, "grad_norm": 0.7341500520706177, "learning_rate": 1.3850266869240181e-05, "loss": 2.127, "step": 11560 }, { "epoch": 0.38, "grad_norm": 0.7326970100402832, "learning_rate": 1.3849285897819873e-05, "loss": 2.0466, "step": 11561 }, { "epoch": 0.38, "grad_norm": 0.7227213978767395, "learning_rate": 1.3848304882912589e-05, "loss": 2.067, "step": 11562 }, { "epoch": 0.38, "grad_norm": 0.7275463938713074, "learning_rate": 1.3847323824529415e-05, "loss": 2.1088, "step": 11563 }, { "epoch": 0.38, "grad_norm": 0.7263650298118591, "learning_rate": 1.3846342722681426e-05, "loss": 2.08, "step": 11564 }, { "epoch": 0.38, "grad_norm": 0.7303856015205383, "learning_rate": 1.384536157737971e-05, "loss": 2.0516, "step": 11565 }, { "epoch": 0.38, "grad_norm": 0.696685254573822, "learning_rate": 1.3844380388635358e-05, "loss": 2.0779, "step": 11566 }, { "epoch": 0.38, "grad_norm": 0.7021346688270569, "learning_rate": 1.3843399156459444e-05, "loss": 2.068, "step": 11567 }, { "epoch": 0.38, "grad_norm": 0.7369007468223572, "learning_rate": 1.3842417880863063e-05, "loss": 2.082, "step": 11568 }, { "epoch": 0.38, "grad_norm": 0.7505294680595398, "learning_rate": 1.3841436561857297e-05, "loss": 2.1907, "step": 11569 }, { "epoch": 0.38, "grad_norm": 0.7150418162345886, "learning_rate": 1.384045519945323e-05, "loss": 2.0478, "step": 11570 }, { "epoch": 0.38, "grad_norm": 0.7252741456031799, "learning_rate": 1.383947379366195e-05, "loss": 2.0921, "step": 11571 }, { "epoch": 0.39, "grad_norm": 0.7577261924743652, "learning_rate": 1.3838492344494547e-05, "loss": 2.1021, "step": 11572 }, { "epoch": 0.39, "grad_norm": 0.6852173805236816, "learning_rate": 1.3837510851962106e-05, "loss": 2.0974, "step": 11573 }, { "epoch": 0.39, "grad_norm": 0.7020335793495178, "learning_rate": 1.383652931607572e-05, "loss": 2.0654, "step": 11574 }, { "epoch": 0.39, "grad_norm": 0.7773502469062805, "learning_rate": 1.3835547736846468e-05, "loss": 2.0196, "step": 11575 }, { "epoch": 0.39, "grad_norm": 0.7273013591766357, "learning_rate": 1.3834566114285448e-05, "loss": 2.1288, "step": 11576 }, { "epoch": 0.39, "grad_norm": 0.7291594743728638, "learning_rate": 1.3833584448403752e-05, "loss": 2.1428, "step": 11577 }, { "epoch": 0.39, "grad_norm": 0.7520511150360107, "learning_rate": 1.3832602739212457e-05, "loss": 2.111, "step": 11578 }, { "epoch": 0.39, "grad_norm": 0.704126238822937, "learning_rate": 1.383162098672267e-05, "loss": 2.1322, "step": 11579 }, { "epoch": 0.39, "grad_norm": 0.7477869391441345, "learning_rate": 1.3830639190945471e-05, "loss": 2.1438, "step": 11580 }, { "epoch": 0.39, "grad_norm": 0.7056574821472168, "learning_rate": 1.3829657351891957e-05, "loss": 2.035, "step": 11581 }, { "epoch": 0.39, "grad_norm": 0.7050648331642151, "learning_rate": 1.3828675469573216e-05, "loss": 2.0526, "step": 11582 }, { "epoch": 0.39, "grad_norm": 0.7426525354385376, "learning_rate": 1.3827693544000345e-05, "loss": 2.1328, "step": 11583 }, { "epoch": 0.39, "grad_norm": 0.7540075182914734, "learning_rate": 1.3826711575184436e-05, "loss": 2.0373, "step": 11584 }, { "epoch": 0.39, "grad_norm": 0.7662214040756226, "learning_rate": 1.3825729563136583e-05, "loss": 2.0748, "step": 11585 }, { "epoch": 0.39, "grad_norm": 0.75132155418396, "learning_rate": 1.3824747507867877e-05, "loss": 2.1016, "step": 11586 }, { "epoch": 0.39, "grad_norm": 0.7163492441177368, "learning_rate": 1.3823765409389417e-05, "loss": 2.0613, "step": 11587 }, { "epoch": 0.39, "grad_norm": 0.7279643416404724, "learning_rate": 1.3822783267712296e-05, "loss": 2.0861, "step": 11588 }, { "epoch": 0.39, "grad_norm": 0.7701489925384521, "learning_rate": 1.3821801082847608e-05, "loss": 2.0789, "step": 11589 }, { "epoch": 0.39, "grad_norm": 0.7818331122398376, "learning_rate": 1.3820818854806454e-05, "loss": 2.0695, "step": 11590 }, { "epoch": 0.39, "grad_norm": 0.7220224142074585, "learning_rate": 1.3819836583599929e-05, "loss": 2.0912, "step": 11591 }, { "epoch": 0.39, "grad_norm": 0.7788681387901306, "learning_rate": 1.3818854269239126e-05, "loss": 2.097, "step": 11592 }, { "epoch": 0.39, "grad_norm": 0.7736840844154358, "learning_rate": 1.3817871911735146e-05, "loss": 2.1502, "step": 11593 }, { "epoch": 0.39, "grad_norm": 0.7439195513725281, "learning_rate": 1.3816889511099087e-05, "loss": 2.1364, "step": 11594 }, { "epoch": 0.39, "grad_norm": 0.7504758238792419, "learning_rate": 1.3815907067342047e-05, "loss": 2.0641, "step": 11595 }, { "epoch": 0.39, "grad_norm": 0.7239766120910645, "learning_rate": 1.3814924580475126e-05, "loss": 2.1144, "step": 11596 }, { "epoch": 0.39, "grad_norm": 0.7589836120605469, "learning_rate": 1.3813942050509422e-05, "loss": 2.0981, "step": 11597 }, { "epoch": 0.39, "grad_norm": 0.754057765007019, "learning_rate": 1.3812959477456036e-05, "loss": 2.1734, "step": 11598 }, { "epoch": 0.39, "grad_norm": 0.7073413133621216, "learning_rate": 1.3811976861326071e-05, "loss": 2.0691, "step": 11599 }, { "epoch": 0.39, "grad_norm": 0.7381290793418884, "learning_rate": 1.381099420213062e-05, "loss": 2.088, "step": 11600 }, { "epoch": 0.39, "grad_norm": 0.7068790793418884, "learning_rate": 1.3810011499880795e-05, "loss": 2.1446, "step": 11601 }, { "epoch": 0.39, "grad_norm": 0.7349498867988586, "learning_rate": 1.380902875458769e-05, "loss": 2.1278, "step": 11602 }, { "epoch": 0.39, "grad_norm": 0.743887186050415, "learning_rate": 1.3808045966262413e-05, "loss": 2.1066, "step": 11603 }, { "epoch": 0.39, "grad_norm": 0.7581366300582886, "learning_rate": 1.3807063134916061e-05, "loss": 2.125, "step": 11604 }, { "epoch": 0.39, "grad_norm": 0.7287168502807617, "learning_rate": 1.3806080260559744e-05, "loss": 2.0938, "step": 11605 }, { "epoch": 0.39, "grad_norm": 0.7191795706748962, "learning_rate": 1.3805097343204561e-05, "loss": 2.1182, "step": 11606 }, { "epoch": 0.39, "grad_norm": 0.755765974521637, "learning_rate": 1.3804114382861617e-05, "loss": 2.11, "step": 11607 }, { "epoch": 0.39, "grad_norm": 0.7350980043411255, "learning_rate": 1.3803131379542021e-05, "loss": 2.0764, "step": 11608 }, { "epoch": 0.39, "grad_norm": 0.7042651772499084, "learning_rate": 1.3802148333256876e-05, "loss": 2.1245, "step": 11609 }, { "epoch": 0.39, "grad_norm": 0.7305948138237, "learning_rate": 1.3801165244017285e-05, "loss": 2.0893, "step": 11610 }, { "epoch": 0.39, "grad_norm": 0.7541106939315796, "learning_rate": 1.3800182111834358e-05, "loss": 2.1493, "step": 11611 }, { "epoch": 0.39, "grad_norm": 0.7800107598304749, "learning_rate": 1.37991989367192e-05, "loss": 2.1469, "step": 11612 }, { "epoch": 0.39, "grad_norm": 0.7313702702522278, "learning_rate": 1.379821571868292e-05, "loss": 2.051, "step": 11613 }, { "epoch": 0.39, "grad_norm": 0.7433415055274963, "learning_rate": 1.3797232457736623e-05, "loss": 2.1306, "step": 11614 }, { "epoch": 0.39, "grad_norm": 0.7259772419929504, "learning_rate": 1.3796249153891422e-05, "loss": 2.1154, "step": 11615 }, { "epoch": 0.39, "grad_norm": 0.7363418936729431, "learning_rate": 1.379526580715842e-05, "loss": 2.0794, "step": 11616 }, { "epoch": 0.39, "grad_norm": 0.7201186418533325, "learning_rate": 1.379428241754873e-05, "loss": 2.0937, "step": 11617 }, { "epoch": 0.39, "grad_norm": 0.7100079655647278, "learning_rate": 1.379329898507346e-05, "loss": 2.088, "step": 11618 }, { "epoch": 0.39, "grad_norm": 0.7562648057937622, "learning_rate": 1.3792315509743724e-05, "loss": 2.1664, "step": 11619 }, { "epoch": 0.39, "grad_norm": 0.7121405005455017, "learning_rate": 1.3791331991570627e-05, "loss": 2.0768, "step": 11620 }, { "epoch": 0.39, "grad_norm": 0.7101187705993652, "learning_rate": 1.3790348430565286e-05, "loss": 2.0737, "step": 11621 }, { "epoch": 0.39, "grad_norm": 0.7118765711784363, "learning_rate": 1.3789364826738804e-05, "loss": 2.1091, "step": 11622 }, { "epoch": 0.39, "grad_norm": 0.7277964949607849, "learning_rate": 1.3788381180102306e-05, "loss": 2.0694, "step": 11623 }, { "epoch": 0.39, "grad_norm": 0.7176958918571472, "learning_rate": 1.3787397490666892e-05, "loss": 2.0614, "step": 11624 }, { "epoch": 0.39, "grad_norm": 0.7238707542419434, "learning_rate": 1.3786413758443687e-05, "loss": 2.0781, "step": 11625 }, { "epoch": 0.39, "grad_norm": 0.7206467390060425, "learning_rate": 1.3785429983443793e-05, "loss": 2.1317, "step": 11626 }, { "epoch": 0.39, "grad_norm": 0.7189276218414307, "learning_rate": 1.3784446165678329e-05, "loss": 2.0996, "step": 11627 }, { "epoch": 0.39, "grad_norm": 0.7252731323242188, "learning_rate": 1.3783462305158416e-05, "loss": 2.1325, "step": 11628 }, { "epoch": 0.39, "grad_norm": 0.7328822016716003, "learning_rate": 1.3782478401895158e-05, "loss": 2.0293, "step": 11629 }, { "epoch": 0.39, "grad_norm": 0.7346433401107788, "learning_rate": 1.3781494455899677e-05, "loss": 2.1235, "step": 11630 }, { "epoch": 0.39, "grad_norm": 0.7207636833190918, "learning_rate": 1.3780510467183087e-05, "loss": 2.0963, "step": 11631 }, { "epoch": 0.39, "grad_norm": 0.7483453154563904, "learning_rate": 1.3779526435756507e-05, "loss": 2.1121, "step": 11632 }, { "epoch": 0.39, "grad_norm": 0.7184513807296753, "learning_rate": 1.3778542361631052e-05, "loss": 2.1231, "step": 11633 }, { "epoch": 0.39, "grad_norm": 0.7262873649597168, "learning_rate": 1.3777558244817841e-05, "loss": 2.0617, "step": 11634 }, { "epoch": 0.39, "grad_norm": 0.737218976020813, "learning_rate": 1.377657408532799e-05, "loss": 2.1204, "step": 11635 }, { "epoch": 0.39, "grad_norm": 0.7416977882385254, "learning_rate": 1.3775589883172618e-05, "loss": 2.1408, "step": 11636 }, { "epoch": 0.39, "grad_norm": 0.7427653670310974, "learning_rate": 1.3774605638362844e-05, "loss": 1.9906, "step": 11637 }, { "epoch": 0.39, "grad_norm": 0.73930823802948, "learning_rate": 1.3773621350909786e-05, "loss": 2.1392, "step": 11638 }, { "epoch": 0.39, "grad_norm": 0.7166590094566345, "learning_rate": 1.3772637020824568e-05, "loss": 2.1509, "step": 11639 }, { "epoch": 0.39, "grad_norm": 0.7091129422187805, "learning_rate": 1.3771652648118308e-05, "loss": 2.1837, "step": 11640 }, { "epoch": 0.39, "grad_norm": 0.775908350944519, "learning_rate": 1.3770668232802124e-05, "loss": 2.1563, "step": 11641 }, { "epoch": 0.39, "grad_norm": 0.7425082921981812, "learning_rate": 1.3769683774887144e-05, "loss": 2.1261, "step": 11642 }, { "epoch": 0.39, "grad_norm": 0.7394794821739197, "learning_rate": 1.3768699274384483e-05, "loss": 2.0882, "step": 11643 }, { "epoch": 0.39, "grad_norm": 0.725896954536438, "learning_rate": 1.3767714731305267e-05, "loss": 2.1013, "step": 11644 }, { "epoch": 0.39, "grad_norm": 0.7525447010993958, "learning_rate": 1.376673014566062e-05, "loss": 2.04, "step": 11645 }, { "epoch": 0.39, "grad_norm": 0.7347779273986816, "learning_rate": 1.376574551746166e-05, "loss": 2.1101, "step": 11646 }, { "epoch": 0.39, "grad_norm": 0.777912974357605, "learning_rate": 1.3764760846719518e-05, "loss": 2.0888, "step": 11647 }, { "epoch": 0.39, "grad_norm": 0.7194976806640625, "learning_rate": 1.376377613344531e-05, "loss": 2.0507, "step": 11648 }, { "epoch": 0.39, "grad_norm": 0.7134403586387634, "learning_rate": 1.3762791377650167e-05, "loss": 2.1727, "step": 11649 }, { "epoch": 0.39, "grad_norm": 0.72414630651474, "learning_rate": 1.3761806579345214e-05, "loss": 2.1516, "step": 11650 }, { "epoch": 0.39, "grad_norm": 0.7293637990951538, "learning_rate": 1.3760821738541572e-05, "loss": 2.0929, "step": 11651 }, { "epoch": 0.39, "grad_norm": 0.7099736332893372, "learning_rate": 1.3759836855250373e-05, "loss": 2.0895, "step": 11652 }, { "epoch": 0.39, "grad_norm": 0.7444831132888794, "learning_rate": 1.3758851929482737e-05, "loss": 2.1395, "step": 11653 }, { "epoch": 0.39, "grad_norm": 0.7232000827789307, "learning_rate": 1.3757866961249797e-05, "loss": 2.1274, "step": 11654 }, { "epoch": 0.39, "grad_norm": 0.7381769418716431, "learning_rate": 1.375688195056268e-05, "loss": 2.125, "step": 11655 }, { "epoch": 0.39, "grad_norm": 0.7343809604644775, "learning_rate": 1.3755896897432511e-05, "loss": 2.1176, "step": 11656 }, { "epoch": 0.39, "grad_norm": 0.7515524625778198, "learning_rate": 1.3754911801870417e-05, "loss": 2.0074, "step": 11657 }, { "epoch": 0.39, "grad_norm": 0.7542002201080322, "learning_rate": 1.3753926663887536e-05, "loss": 2.0765, "step": 11658 }, { "epoch": 0.39, "grad_norm": 0.7247751951217651, "learning_rate": 1.3752941483494987e-05, "loss": 2.1592, "step": 11659 }, { "epoch": 0.39, "grad_norm": 0.7217437624931335, "learning_rate": 1.3751956260703905e-05, "loss": 2.0725, "step": 11660 }, { "epoch": 0.39, "grad_norm": 0.7486657500267029, "learning_rate": 1.3750970995525421e-05, "loss": 2.0966, "step": 11661 }, { "epoch": 0.39, "grad_norm": 0.7318110466003418, "learning_rate": 1.3749985687970666e-05, "loss": 2.1677, "step": 11662 }, { "epoch": 0.39, "grad_norm": 0.7041610479354858, "learning_rate": 1.3749000338050768e-05, "loss": 2.0176, "step": 11663 }, { "epoch": 0.39, "grad_norm": 0.7950376868247986, "learning_rate": 1.3748014945776861e-05, "loss": 2.112, "step": 11664 }, { "epoch": 0.39, "grad_norm": 0.7559768557548523, "learning_rate": 1.3747029511160082e-05, "loss": 2.1106, "step": 11665 }, { "epoch": 0.39, "grad_norm": 0.7226019501686096, "learning_rate": 1.3746044034211556e-05, "loss": 2.1685, "step": 11666 }, { "epoch": 0.39, "grad_norm": 0.7206690311431885, "learning_rate": 1.3745058514942422e-05, "loss": 2.0802, "step": 11667 }, { "epoch": 0.39, "grad_norm": 0.7013974785804749, "learning_rate": 1.374407295336381e-05, "loss": 2.1606, "step": 11668 }, { "epoch": 0.39, "grad_norm": 0.7075404524803162, "learning_rate": 1.3743087349486855e-05, "loss": 2.0479, "step": 11669 }, { "epoch": 0.39, "grad_norm": 0.7521916031837463, "learning_rate": 1.3742101703322696e-05, "loss": 2.0927, "step": 11670 }, { "epoch": 0.39, "grad_norm": 0.774567186832428, "learning_rate": 1.374111601488246e-05, "loss": 2.0388, "step": 11671 }, { "epoch": 0.39, "grad_norm": 0.7391560673713684, "learning_rate": 1.3740130284177293e-05, "loss": 2.0977, "step": 11672 }, { "epoch": 0.39, "grad_norm": 0.6954547166824341, "learning_rate": 1.3739144511218325e-05, "loss": 2.0325, "step": 11673 }, { "epoch": 0.39, "grad_norm": 0.7475699186325073, "learning_rate": 1.3738158696016692e-05, "loss": 2.1308, "step": 11674 }, { "epoch": 0.39, "grad_norm": 0.7774897813796997, "learning_rate": 1.3737172838583534e-05, "loss": 2.0479, "step": 11675 }, { "epoch": 0.39, "grad_norm": 0.704092800617218, "learning_rate": 1.3736186938929987e-05, "loss": 2.1271, "step": 11676 }, { "epoch": 0.39, "grad_norm": 0.7230117917060852, "learning_rate": 1.373520099706719e-05, "loss": 2.1087, "step": 11677 }, { "epoch": 0.39, "grad_norm": 0.7377476692199707, "learning_rate": 1.3734215013006283e-05, "loss": 2.1425, "step": 11678 }, { "epoch": 0.39, "grad_norm": 0.737406313419342, "learning_rate": 1.3733228986758399e-05, "loss": 2.0811, "step": 11679 }, { "epoch": 0.39, "grad_norm": 0.7015998363494873, "learning_rate": 1.3732242918334682e-05, "loss": 2.0914, "step": 11680 }, { "epoch": 0.39, "grad_norm": 0.7315475940704346, "learning_rate": 1.3731256807746277e-05, "loss": 2.0347, "step": 11681 }, { "epoch": 0.39, "grad_norm": 0.7496551275253296, "learning_rate": 1.3730270655004314e-05, "loss": 2.1345, "step": 11682 }, { "epoch": 0.39, "grad_norm": 0.7160812020301819, "learning_rate": 1.3729284460119942e-05, "loss": 2.0269, "step": 11683 }, { "epoch": 0.39, "grad_norm": 0.7383368611335754, "learning_rate": 1.3728298223104299e-05, "loss": 2.1474, "step": 11684 }, { "epoch": 0.39, "grad_norm": 0.7193183898925781, "learning_rate": 1.372731194396853e-05, "loss": 2.0504, "step": 11685 }, { "epoch": 0.39, "grad_norm": 0.7306458950042725, "learning_rate": 1.372632562272377e-05, "loss": 2.0681, "step": 11686 }, { "epoch": 0.39, "grad_norm": 0.7010766863822937, "learning_rate": 1.3725339259381169e-05, "loss": 2.1072, "step": 11687 }, { "epoch": 0.39, "grad_norm": 0.7116683125495911, "learning_rate": 1.372435285395187e-05, "loss": 2.0939, "step": 11688 }, { "epoch": 0.39, "grad_norm": 0.7291589379310608, "learning_rate": 1.3723366406447017e-05, "loss": 2.0564, "step": 11689 }, { "epoch": 0.39, "grad_norm": 0.7191392183303833, "learning_rate": 1.3722379916877746e-05, "loss": 2.1154, "step": 11690 }, { "epoch": 0.39, "grad_norm": 0.7248224020004272, "learning_rate": 1.3721393385255212e-05, "loss": 2.1274, "step": 11691 }, { "epoch": 0.39, "grad_norm": 0.7120988965034485, "learning_rate": 1.372040681159056e-05, "loss": 2.0742, "step": 11692 }, { "epoch": 0.39, "grad_norm": 0.7146470546722412, "learning_rate": 1.3719420195894925e-05, "loss": 2.0074, "step": 11693 }, { "epoch": 0.39, "grad_norm": 0.734995424747467, "learning_rate": 1.3718433538179465e-05, "loss": 2.0324, "step": 11694 }, { "epoch": 0.39, "grad_norm": 0.7181507349014282, "learning_rate": 1.3717446838455322e-05, "loss": 2.053, "step": 11695 }, { "epoch": 0.39, "grad_norm": 0.7228471636772156, "learning_rate": 1.3716460096733641e-05, "loss": 2.0576, "step": 11696 }, { "epoch": 0.39, "grad_norm": 0.7376711964607239, "learning_rate": 1.3715473313025573e-05, "loss": 2.081, "step": 11697 }, { "epoch": 0.39, "grad_norm": 0.7009484171867371, "learning_rate": 1.3714486487342265e-05, "loss": 2.1181, "step": 11698 }, { "epoch": 0.39, "grad_norm": 0.7111235857009888, "learning_rate": 1.3713499619694865e-05, "loss": 2.11, "step": 11699 }, { "epoch": 0.39, "grad_norm": 0.7466190457344055, "learning_rate": 1.3712512710094524e-05, "loss": 2.179, "step": 11700 }, { "epoch": 0.39, "grad_norm": 0.6936691403388977, "learning_rate": 1.3711525758552391e-05, "loss": 2.0376, "step": 11701 }, { "epoch": 0.39, "grad_norm": 0.7580778002738953, "learning_rate": 1.3710538765079614e-05, "loss": 2.1115, "step": 11702 }, { "epoch": 0.39, "grad_norm": 0.7091009020805359, "learning_rate": 1.3709551729687345e-05, "loss": 2.1275, "step": 11703 }, { "epoch": 0.39, "grad_norm": 0.7427220344543457, "learning_rate": 1.370856465238673e-05, "loss": 2.0868, "step": 11704 }, { "epoch": 0.39, "grad_norm": 0.7189304828643799, "learning_rate": 1.3707577533188932e-05, "loss": 2.0795, "step": 11705 }, { "epoch": 0.39, "grad_norm": 0.680560827255249, "learning_rate": 1.3706590372105093e-05, "loss": 2.0343, "step": 11706 }, { "epoch": 0.39, "grad_norm": 0.7574985027313232, "learning_rate": 1.3705603169146367e-05, "loss": 2.1341, "step": 11707 }, { "epoch": 0.39, "grad_norm": 0.7180473208427429, "learning_rate": 1.370461592432391e-05, "loss": 2.0851, "step": 11708 }, { "epoch": 0.39, "grad_norm": 0.7354133725166321, "learning_rate": 1.3703628637648871e-05, "loss": 2.0651, "step": 11709 }, { "epoch": 0.39, "grad_norm": 0.7554914355278015, "learning_rate": 1.370264130913241e-05, "loss": 2.1047, "step": 11710 }, { "epoch": 0.39, "grad_norm": 0.7154842019081116, "learning_rate": 1.3701653938785673e-05, "loss": 2.085, "step": 11711 }, { "epoch": 0.39, "grad_norm": 0.7353411316871643, "learning_rate": 1.3700666526619822e-05, "loss": 2.1355, "step": 11712 }, { "epoch": 0.39, "grad_norm": 0.7104331851005554, "learning_rate": 1.3699679072646009e-05, "loss": 2.0757, "step": 11713 }, { "epoch": 0.39, "grad_norm": 0.7844958901405334, "learning_rate": 1.3698691576875392e-05, "loss": 2.1545, "step": 11714 }, { "epoch": 0.39, "grad_norm": 0.7634601593017578, "learning_rate": 1.3697704039319119e-05, "loss": 2.0961, "step": 11715 }, { "epoch": 0.39, "grad_norm": 0.7031099200248718, "learning_rate": 1.369671645998836e-05, "loss": 2.1251, "step": 11716 }, { "epoch": 0.39, "grad_norm": 0.7315368056297302, "learning_rate": 1.3695728838894258e-05, "loss": 2.0888, "step": 11717 }, { "epoch": 0.39, "grad_norm": 0.7246574759483337, "learning_rate": 1.3694741176047983e-05, "loss": 2.1324, "step": 11718 }, { "epoch": 0.39, "grad_norm": 0.733501672744751, "learning_rate": 1.3693753471460687e-05, "loss": 2.1518, "step": 11719 }, { "epoch": 0.39, "grad_norm": 0.7272926568984985, "learning_rate": 1.3692765725143527e-05, "loss": 2.1252, "step": 11720 }, { "epoch": 0.39, "grad_norm": 0.7733513116836548, "learning_rate": 1.3691777937107664e-05, "loss": 2.1165, "step": 11721 }, { "epoch": 0.39, "grad_norm": 0.8041664361953735, "learning_rate": 1.3690790107364257e-05, "loss": 2.1638, "step": 11722 }, { "epoch": 0.39, "grad_norm": 0.7449365258216858, "learning_rate": 1.3689802235924469e-05, "loss": 2.1418, "step": 11723 }, { "epoch": 0.39, "grad_norm": 0.7313663363456726, "learning_rate": 1.3688814322799457e-05, "loss": 2.1502, "step": 11724 }, { "epoch": 0.39, "grad_norm": 0.7385975122451782, "learning_rate": 1.3687826368000382e-05, "loss": 2.125, "step": 11725 }, { "epoch": 0.39, "grad_norm": 0.7681602239608765, "learning_rate": 1.3686838371538405e-05, "loss": 2.0685, "step": 11726 }, { "epoch": 0.39, "grad_norm": 0.733538031578064, "learning_rate": 1.3685850333424693e-05, "loss": 2.1005, "step": 11727 }, { "epoch": 0.39, "grad_norm": 0.7743049263954163, "learning_rate": 1.3684862253670397e-05, "loss": 2.0788, "step": 11728 }, { "epoch": 0.39, "grad_norm": 0.7338125705718994, "learning_rate": 1.3683874132286694e-05, "loss": 2.0682, "step": 11729 }, { "epoch": 0.39, "grad_norm": 0.7306209802627563, "learning_rate": 1.3682885969284736e-05, "loss": 2.1116, "step": 11730 }, { "epoch": 0.39, "grad_norm": 0.7301396727561951, "learning_rate": 1.3681897764675692e-05, "loss": 2.1278, "step": 11731 }, { "epoch": 0.39, "grad_norm": 0.7581589818000793, "learning_rate": 1.3680909518470725e-05, "loss": 2.132, "step": 11732 }, { "epoch": 0.39, "grad_norm": 0.7395617365837097, "learning_rate": 1.3679921230680997e-05, "loss": 2.1896, "step": 11733 }, { "epoch": 0.39, "grad_norm": 0.7247474193572998, "learning_rate": 1.3678932901317676e-05, "loss": 2.0744, "step": 11734 }, { "epoch": 0.39, "grad_norm": 0.727653443813324, "learning_rate": 1.3677944530391928e-05, "loss": 2.0326, "step": 11735 }, { "epoch": 0.39, "grad_norm": 0.7075906991958618, "learning_rate": 1.3676956117914919e-05, "loss": 2.0534, "step": 11736 }, { "epoch": 0.39, "grad_norm": 0.7425323724746704, "learning_rate": 1.3675967663897813e-05, "loss": 2.0904, "step": 11737 }, { "epoch": 0.39, "grad_norm": 0.7643874287605286, "learning_rate": 1.3674979168351782e-05, "loss": 2.0633, "step": 11738 }, { "epoch": 0.39, "grad_norm": 0.7117298245429993, "learning_rate": 1.3673990631287988e-05, "loss": 2.0662, "step": 11739 }, { "epoch": 0.39, "grad_norm": 0.6828161478042603, "learning_rate": 1.3673002052717603e-05, "loss": 2.0638, "step": 11740 }, { "epoch": 0.39, "grad_norm": 0.7172871232032776, "learning_rate": 1.3672013432651791e-05, "loss": 2.1276, "step": 11741 }, { "epoch": 0.39, "grad_norm": 0.6878655552864075, "learning_rate": 1.367102477110172e-05, "loss": 2.0929, "step": 11742 }, { "epoch": 0.39, "grad_norm": 0.7642264366149902, "learning_rate": 1.3670036068078569e-05, "loss": 2.1102, "step": 11743 }, { "epoch": 0.39, "grad_norm": 0.7359413504600525, "learning_rate": 1.3669047323593499e-05, "loss": 2.1085, "step": 11744 }, { "epoch": 0.39, "grad_norm": 0.6921376585960388, "learning_rate": 1.3668058537657682e-05, "loss": 2.1135, "step": 11745 }, { "epoch": 0.39, "grad_norm": 0.7098267078399658, "learning_rate": 1.3667069710282289e-05, "loss": 2.0348, "step": 11746 }, { "epoch": 0.39, "grad_norm": 0.7138068079948425, "learning_rate": 1.3666080841478493e-05, "loss": 2.1003, "step": 11747 }, { "epoch": 0.39, "grad_norm": 0.7155347466468811, "learning_rate": 1.3665091931257464e-05, "loss": 2.1124, "step": 11748 }, { "epoch": 0.39, "grad_norm": 0.7511319518089294, "learning_rate": 1.3664102979630374e-05, "loss": 2.1371, "step": 11749 }, { "epoch": 0.39, "grad_norm": 0.7296749353408813, "learning_rate": 1.3663113986608393e-05, "loss": 2.1696, "step": 11750 }, { "epoch": 0.39, "grad_norm": 0.7460076808929443, "learning_rate": 1.3662124952202703e-05, "loss": 2.1298, "step": 11751 }, { "epoch": 0.39, "grad_norm": 0.7265193462371826, "learning_rate": 1.3661135876424466e-05, "loss": 2.0483, "step": 11752 }, { "epoch": 0.39, "grad_norm": 0.7461183667182922, "learning_rate": 1.3660146759284863e-05, "loss": 2.0551, "step": 11753 }, { "epoch": 0.39, "grad_norm": 0.7553591728210449, "learning_rate": 1.3659157600795067e-05, "loss": 2.1206, "step": 11754 }, { "epoch": 0.39, "grad_norm": 0.7180603742599487, "learning_rate": 1.3658168400966254e-05, "loss": 2.0343, "step": 11755 }, { "epoch": 0.39, "grad_norm": 0.7250557541847229, "learning_rate": 1.3657179159809597e-05, "loss": 2.1298, "step": 11756 }, { "epoch": 0.39, "grad_norm": 0.7045844197273254, "learning_rate": 1.3656189877336274e-05, "loss": 2.0182, "step": 11757 }, { "epoch": 0.39, "grad_norm": 0.7270033359527588, "learning_rate": 1.3655200553557462e-05, "loss": 2.1452, "step": 11758 }, { "epoch": 0.39, "grad_norm": 0.7578840255737305, "learning_rate": 1.3654211188484333e-05, "loss": 2.1128, "step": 11759 }, { "epoch": 0.39, "grad_norm": 0.7205632925033569, "learning_rate": 1.3653221782128073e-05, "loss": 2.0798, "step": 11760 }, { "epoch": 0.39, "grad_norm": 0.7442613840103149, "learning_rate": 1.3652232334499847e-05, "loss": 2.0643, "step": 11761 }, { "epoch": 0.39, "grad_norm": 0.7242169380187988, "learning_rate": 1.3651242845610846e-05, "loss": 2.0407, "step": 11762 }, { "epoch": 0.39, "grad_norm": 0.7313669323921204, "learning_rate": 1.3650253315472241e-05, "loss": 2.1489, "step": 11763 }, { "epoch": 0.39, "grad_norm": 0.7545270323753357, "learning_rate": 1.3649263744095212e-05, "loss": 2.1099, "step": 11764 }, { "epoch": 0.39, "grad_norm": 0.7050315141677856, "learning_rate": 1.3648274131490942e-05, "loss": 2.0566, "step": 11765 }, { "epoch": 0.39, "grad_norm": 0.7374283075332642, "learning_rate": 1.3647284477670609e-05, "loss": 2.0849, "step": 11766 }, { "epoch": 0.39, "grad_norm": 0.7493853569030762, "learning_rate": 1.3646294782645393e-05, "loss": 2.1557, "step": 11767 }, { "epoch": 0.39, "grad_norm": 0.7397019267082214, "learning_rate": 1.3645305046426475e-05, "loss": 2.0967, "step": 11768 }, { "epoch": 0.39, "grad_norm": 0.7131369113922119, "learning_rate": 1.3644315269025037e-05, "loss": 2.0812, "step": 11769 }, { "epoch": 0.39, "grad_norm": 0.7269879579544067, "learning_rate": 1.364332545045226e-05, "loss": 2.1027, "step": 11770 }, { "epoch": 0.39, "grad_norm": 0.7159155011177063, "learning_rate": 1.3642335590719331e-05, "loss": 2.1522, "step": 11771 }, { "epoch": 0.39, "grad_norm": 0.7585508823394775, "learning_rate": 1.3641345689837424e-05, "loss": 2.0823, "step": 11772 }, { "epoch": 0.39, "grad_norm": 0.732868492603302, "learning_rate": 1.3640355747817731e-05, "loss": 2.0949, "step": 11773 }, { "epoch": 0.39, "grad_norm": 0.7338318228721619, "learning_rate": 1.3639365764671432e-05, "loss": 2.1353, "step": 11774 }, { "epoch": 0.39, "grad_norm": 0.7466054558753967, "learning_rate": 1.363837574040971e-05, "loss": 2.1409, "step": 11775 }, { "epoch": 0.39, "grad_norm": 0.7179787755012512, "learning_rate": 1.3637385675043753e-05, "loss": 2.0827, "step": 11776 }, { "epoch": 0.39, "grad_norm": 0.733527421951294, "learning_rate": 1.3636395568584744e-05, "loss": 2.153, "step": 11777 }, { "epoch": 0.39, "grad_norm": 0.7135565876960754, "learning_rate": 1.3635405421043868e-05, "loss": 2.0811, "step": 11778 }, { "epoch": 0.39, "grad_norm": 0.7211730480194092, "learning_rate": 1.3634415232432313e-05, "loss": 2.1196, "step": 11779 }, { "epoch": 0.39, "grad_norm": 0.7162937521934509, "learning_rate": 1.3633425002761262e-05, "loss": 2.0789, "step": 11780 }, { "epoch": 0.39, "grad_norm": 0.7458769679069519, "learning_rate": 1.3632434732041909e-05, "loss": 2.1288, "step": 11781 }, { "epoch": 0.39, "grad_norm": 0.7141501903533936, "learning_rate": 1.3631444420285436e-05, "loss": 2.1043, "step": 11782 }, { "epoch": 0.39, "grad_norm": 0.7233158946037292, "learning_rate": 1.3630454067503027e-05, "loss": 2.147, "step": 11783 }, { "epoch": 0.39, "grad_norm": 0.7284120917320251, "learning_rate": 1.362946367370588e-05, "loss": 2.1314, "step": 11784 }, { "epoch": 0.39, "grad_norm": 0.7159897685050964, "learning_rate": 1.3628473238905181e-05, "loss": 2.0526, "step": 11785 }, { "epoch": 0.39, "grad_norm": 0.7793608903884888, "learning_rate": 1.3627482763112115e-05, "loss": 2.0951, "step": 11786 }, { "epoch": 0.39, "grad_norm": 0.7869195938110352, "learning_rate": 1.3626492246337876e-05, "loss": 2.0777, "step": 11787 }, { "epoch": 0.39, "grad_norm": 0.8092279434204102, "learning_rate": 1.3625501688593654e-05, "loss": 2.0745, "step": 11788 }, { "epoch": 0.39, "grad_norm": 0.7590587735176086, "learning_rate": 1.3624511089890638e-05, "loss": 2.0162, "step": 11789 }, { "epoch": 0.39, "grad_norm": 0.7630215287208557, "learning_rate": 1.3623520450240018e-05, "loss": 2.1698, "step": 11790 }, { "epoch": 0.39, "grad_norm": 0.7258909344673157, "learning_rate": 1.3622529769652987e-05, "loss": 2.0457, "step": 11791 }, { "epoch": 0.39, "grad_norm": 0.7465696334838867, "learning_rate": 1.3621539048140741e-05, "loss": 2.1494, "step": 11792 }, { "epoch": 0.39, "grad_norm": 0.7444326877593994, "learning_rate": 1.3620548285714468e-05, "loss": 2.0528, "step": 11793 }, { "epoch": 0.39, "grad_norm": 0.7123231291770935, "learning_rate": 1.3619557482385362e-05, "loss": 2.0794, "step": 11794 }, { "epoch": 0.39, "grad_norm": 0.7381765246391296, "learning_rate": 1.3618566638164615e-05, "loss": 2.134, "step": 11795 }, { "epoch": 0.39, "grad_norm": 0.8029764890670776, "learning_rate": 1.3617575753063427e-05, "loss": 2.1034, "step": 11796 }, { "epoch": 0.39, "grad_norm": 0.7212918400764465, "learning_rate": 1.3616584827092984e-05, "loss": 2.0822, "step": 11797 }, { "epoch": 0.39, "grad_norm": 0.6852489113807678, "learning_rate": 1.361559386026449e-05, "loss": 2.0727, "step": 11798 }, { "epoch": 0.39, "grad_norm": 0.7195196747779846, "learning_rate": 1.3614602852589132e-05, "loss": 2.128, "step": 11799 }, { "epoch": 0.39, "grad_norm": 0.7447269558906555, "learning_rate": 1.3613611804078109e-05, "loss": 2.0551, "step": 11800 }, { "epoch": 0.39, "grad_norm": 0.70705646276474, "learning_rate": 1.3612620714742617e-05, "loss": 2.075, "step": 11801 }, { "epoch": 0.39, "grad_norm": 0.721500039100647, "learning_rate": 1.3611629584593856e-05, "loss": 2.0944, "step": 11802 }, { "epoch": 0.39, "grad_norm": 0.7489003539085388, "learning_rate": 1.3610638413643021e-05, "loss": 2.0642, "step": 11803 }, { "epoch": 0.39, "grad_norm": 0.7121352553367615, "learning_rate": 1.3609647201901305e-05, "loss": 2.1007, "step": 11804 }, { "epoch": 0.39, "grad_norm": 0.7280601859092712, "learning_rate": 1.3608655949379914e-05, "loss": 2.094, "step": 11805 }, { "epoch": 0.39, "grad_norm": 0.7160506844520569, "learning_rate": 1.3607664656090042e-05, "loss": 2.0419, "step": 11806 }, { "epoch": 0.39, "grad_norm": 0.7363958954811096, "learning_rate": 1.360667332204289e-05, "loss": 2.1453, "step": 11807 }, { "epoch": 0.39, "grad_norm": 0.7282476425170898, "learning_rate": 1.3605681947249654e-05, "loss": 2.131, "step": 11808 }, { "epoch": 0.39, "grad_norm": 0.717846155166626, "learning_rate": 1.360469053172154e-05, "loss": 2.1376, "step": 11809 }, { "epoch": 0.39, "grad_norm": 0.7346615195274353, "learning_rate": 1.3603699075469743e-05, "loss": 2.078, "step": 11810 }, { "epoch": 0.39, "grad_norm": 0.7356274127960205, "learning_rate": 1.3602707578505465e-05, "loss": 2.0375, "step": 11811 }, { "epoch": 0.39, "grad_norm": 0.750554621219635, "learning_rate": 1.360171604083991e-05, "loss": 2.149, "step": 11812 }, { "epoch": 0.39, "grad_norm": 0.7154950499534607, "learning_rate": 1.3600724462484276e-05, "loss": 2.0947, "step": 11813 }, { "epoch": 0.39, "grad_norm": 0.7440913915634155, "learning_rate": 1.359973284344977e-05, "loss": 2.1504, "step": 11814 }, { "epoch": 0.39, "grad_norm": 0.6969454884529114, "learning_rate": 1.3598741183747589e-05, "loss": 2.0589, "step": 11815 }, { "epoch": 0.39, "grad_norm": 0.7188594341278076, "learning_rate": 1.3597749483388941e-05, "loss": 2.1022, "step": 11816 }, { "epoch": 0.39, "grad_norm": 0.7729142904281616, "learning_rate": 1.3596757742385026e-05, "loss": 2.1188, "step": 11817 }, { "epoch": 0.39, "grad_norm": 0.7737226486206055, "learning_rate": 1.359576596074705e-05, "loss": 2.0984, "step": 11818 }, { "epoch": 0.39, "grad_norm": 0.7376880049705505, "learning_rate": 1.359477413848622e-05, "loss": 2.1221, "step": 11819 }, { "epoch": 0.39, "grad_norm": 0.7260102033615112, "learning_rate": 1.3593782275613738e-05, "loss": 2.0636, "step": 11820 }, { "epoch": 0.39, "grad_norm": 0.712247371673584, "learning_rate": 1.3592790372140807e-05, "loss": 2.1116, "step": 11821 }, { "epoch": 0.39, "grad_norm": 0.7451822757720947, "learning_rate": 1.359179842807864e-05, "loss": 2.0608, "step": 11822 }, { "epoch": 0.39, "grad_norm": 0.7092812061309814, "learning_rate": 1.3590806443438438e-05, "loss": 2.1458, "step": 11823 }, { "epoch": 0.39, "grad_norm": 0.7245237231254578, "learning_rate": 1.3589814418231408e-05, "loss": 2.0969, "step": 11824 }, { "epoch": 0.39, "grad_norm": 0.7172369956970215, "learning_rate": 1.3588822352468757e-05, "loss": 2.1557, "step": 11825 }, { "epoch": 0.39, "grad_norm": 0.7550198435783386, "learning_rate": 1.3587830246161699e-05, "loss": 2.1188, "step": 11826 }, { "epoch": 0.39, "grad_norm": 0.7420009970664978, "learning_rate": 1.3586838099321436e-05, "loss": 2.165, "step": 11827 }, { "epoch": 0.39, "grad_norm": 0.7174752354621887, "learning_rate": 1.3585845911959177e-05, "loss": 2.0657, "step": 11828 }, { "epoch": 0.39, "grad_norm": 0.7367106676101685, "learning_rate": 1.3584853684086132e-05, "loss": 2.1422, "step": 11829 }, { "epoch": 0.39, "grad_norm": 0.7473825812339783, "learning_rate": 1.358386141571351e-05, "loss": 2.1102, "step": 11830 }, { "epoch": 0.39, "grad_norm": 0.6973622441291809, "learning_rate": 1.3582869106852527e-05, "loss": 2.1118, "step": 11831 }, { "epoch": 0.39, "grad_norm": 0.7323245406150818, "learning_rate": 1.3581876757514383e-05, "loss": 2.0888, "step": 11832 }, { "epoch": 0.39, "grad_norm": 0.7049663066864014, "learning_rate": 1.35808843677103e-05, "loss": 2.099, "step": 11833 }, { "epoch": 0.39, "grad_norm": 0.7374347448348999, "learning_rate": 1.3579891937451479e-05, "loss": 2.0756, "step": 11834 }, { "epoch": 0.39, "grad_norm": 0.7212903499603271, "learning_rate": 1.357889946674914e-05, "loss": 2.1041, "step": 11835 }, { "epoch": 0.39, "grad_norm": 0.7214891314506531, "learning_rate": 1.3577906955614491e-05, "loss": 2.108, "step": 11836 }, { "epoch": 0.39, "grad_norm": 0.7283778190612793, "learning_rate": 1.3576914404058745e-05, "loss": 2.1127, "step": 11837 }, { "epoch": 0.39, "grad_norm": 0.7171215415000916, "learning_rate": 1.3575921812093118e-05, "loss": 2.0911, "step": 11838 }, { "epoch": 0.39, "grad_norm": 0.7304274439811707, "learning_rate": 1.3574929179728821e-05, "loss": 2.0783, "step": 11839 }, { "epoch": 0.39, "grad_norm": 0.7523946166038513, "learning_rate": 1.357393650697707e-05, "loss": 2.0963, "step": 11840 }, { "epoch": 0.39, "grad_norm": 0.7120776176452637, "learning_rate": 1.3572943793849081e-05, "loss": 2.0881, "step": 11841 }, { "epoch": 0.39, "grad_norm": 0.7136206030845642, "learning_rate": 1.3571951040356066e-05, "loss": 2.1311, "step": 11842 }, { "epoch": 0.39, "grad_norm": 0.7349733114242554, "learning_rate": 1.3570958246509238e-05, "loss": 2.0704, "step": 11843 }, { "epoch": 0.39, "grad_norm": 0.7401798367500305, "learning_rate": 1.3569965412319821e-05, "loss": 2.1256, "step": 11844 }, { "epoch": 0.39, "grad_norm": 0.7279854416847229, "learning_rate": 1.3568972537799023e-05, "loss": 2.0348, "step": 11845 }, { "epoch": 0.39, "grad_norm": 0.7442956566810608, "learning_rate": 1.3567979622958066e-05, "loss": 2.0971, "step": 11846 }, { "epoch": 0.39, "grad_norm": 0.7395093441009521, "learning_rate": 1.356698666780817e-05, "loss": 2.0695, "step": 11847 }, { "epoch": 0.39, "grad_norm": 0.7076857686042786, "learning_rate": 1.3565993672360545e-05, "loss": 2.0809, "step": 11848 }, { "epoch": 0.39, "grad_norm": 0.7494702339172363, "learning_rate": 1.3565000636626414e-05, "loss": 2.0849, "step": 11849 }, { "epoch": 0.39, "grad_norm": 0.7329906821250916, "learning_rate": 1.3564007560616994e-05, "loss": 2.1433, "step": 11850 }, { "epoch": 0.39, "grad_norm": 0.7262210845947266, "learning_rate": 1.3563014444343508e-05, "loss": 2.0718, "step": 11851 }, { "epoch": 0.39, "grad_norm": 0.7258151769638062, "learning_rate": 1.3562021287817171e-05, "loss": 2.1178, "step": 11852 }, { "epoch": 0.39, "grad_norm": 0.7193211317062378, "learning_rate": 1.3561028091049209e-05, "loss": 2.134, "step": 11853 }, { "epoch": 0.39, "grad_norm": 0.7477589249610901, "learning_rate": 1.3560034854050832e-05, "loss": 2.1748, "step": 11854 }, { "epoch": 0.39, "grad_norm": 0.7304595112800598, "learning_rate": 1.3559041576833275e-05, "loss": 2.1029, "step": 11855 }, { "epoch": 0.39, "grad_norm": 0.7145698070526123, "learning_rate": 1.3558048259407746e-05, "loss": 2.1139, "step": 11856 }, { "epoch": 0.39, "grad_norm": 0.7470434904098511, "learning_rate": 1.3557054901785474e-05, "loss": 2.0705, "step": 11857 }, { "epoch": 0.39, "grad_norm": 0.7447194457054138, "learning_rate": 1.3556061503977682e-05, "loss": 2.141, "step": 11858 }, { "epoch": 0.39, "grad_norm": 0.7773707509040833, "learning_rate": 1.3555068065995592e-05, "loss": 2.0955, "step": 11859 }, { "epoch": 0.39, "grad_norm": 0.7411269545555115, "learning_rate": 1.3554074587850423e-05, "loss": 2.1382, "step": 11860 }, { "epoch": 0.39, "grad_norm": 0.718757152557373, "learning_rate": 1.3553081069553404e-05, "loss": 2.131, "step": 11861 }, { "epoch": 0.39, "grad_norm": 0.7610073685646057, "learning_rate": 1.3552087511115758e-05, "loss": 2.1214, "step": 11862 }, { "epoch": 0.39, "grad_norm": 0.7159537076950073, "learning_rate": 1.355109391254871e-05, "loss": 2.086, "step": 11863 }, { "epoch": 0.39, "grad_norm": 0.723526120185852, "learning_rate": 1.3550100273863485e-05, "loss": 2.1387, "step": 11864 }, { "epoch": 0.39, "grad_norm": 0.7085689306259155, "learning_rate": 1.3549106595071301e-05, "loss": 2.1302, "step": 11865 }, { "epoch": 0.39, "grad_norm": 0.7286121845245361, "learning_rate": 1.3548112876183398e-05, "loss": 2.0922, "step": 11866 }, { "epoch": 0.39, "grad_norm": 0.7174853682518005, "learning_rate": 1.3547119117210991e-05, "loss": 2.1488, "step": 11867 }, { "epoch": 0.39, "grad_norm": 0.7198792695999146, "learning_rate": 1.3546125318165312e-05, "loss": 2.0757, "step": 11868 }, { "epoch": 0.39, "grad_norm": 0.7292694449424744, "learning_rate": 1.3545131479057588e-05, "loss": 2.0017, "step": 11869 }, { "epoch": 0.39, "grad_norm": 0.7365686893463135, "learning_rate": 1.3544137599899046e-05, "loss": 2.1266, "step": 11870 }, { "epoch": 0.39, "grad_norm": 0.7388505935668945, "learning_rate": 1.3543143680700915e-05, "loss": 2.1003, "step": 11871 }, { "epoch": 0.39, "grad_norm": 0.7343797087669373, "learning_rate": 1.3542149721474422e-05, "loss": 2.0624, "step": 11872 }, { "epoch": 0.4, "grad_norm": 0.7326977849006653, "learning_rate": 1.3541155722230798e-05, "loss": 2.1209, "step": 11873 }, { "epoch": 0.4, "grad_norm": 0.7104066610336304, "learning_rate": 1.3540161682981273e-05, "loss": 2.0663, "step": 11874 }, { "epoch": 0.4, "grad_norm": 0.7365987300872803, "learning_rate": 1.3539167603737076e-05, "loss": 2.1019, "step": 11875 }, { "epoch": 0.4, "grad_norm": 0.7244292497634888, "learning_rate": 1.3538173484509434e-05, "loss": 2.1085, "step": 11876 }, { "epoch": 0.4, "grad_norm": 0.7400307655334473, "learning_rate": 1.3537179325309584e-05, "loss": 2.1224, "step": 11877 }, { "epoch": 0.4, "grad_norm": 0.7349416017532349, "learning_rate": 1.3536185126148756e-05, "loss": 2.1256, "step": 11878 }, { "epoch": 0.4, "grad_norm": 0.7294501066207886, "learning_rate": 1.3535190887038176e-05, "loss": 2.1478, "step": 11879 }, { "epoch": 0.4, "grad_norm": 0.6963375806808472, "learning_rate": 1.3534196607989088e-05, "loss": 2.1318, "step": 11880 }, { "epoch": 0.4, "grad_norm": 0.7168729305267334, "learning_rate": 1.3533202289012715e-05, "loss": 2.1097, "step": 11881 }, { "epoch": 0.4, "grad_norm": 0.7568763494491577, "learning_rate": 1.3532207930120294e-05, "loss": 2.1822, "step": 11882 }, { "epoch": 0.4, "grad_norm": 0.7257993221282959, "learning_rate": 1.3531213531323055e-05, "loss": 2.0488, "step": 11883 }, { "epoch": 0.4, "grad_norm": 0.754767119884491, "learning_rate": 1.353021909263224e-05, "loss": 2.149, "step": 11884 }, { "epoch": 0.4, "grad_norm": 0.7038937211036682, "learning_rate": 1.3529224614059073e-05, "loss": 2.0851, "step": 11885 }, { "epoch": 0.4, "grad_norm": 0.7204558253288269, "learning_rate": 1.3528230095614801e-05, "loss": 2.0242, "step": 11886 }, { "epoch": 0.4, "grad_norm": 0.7038654088973999, "learning_rate": 1.3527235537310646e-05, "loss": 2.0865, "step": 11887 }, { "epoch": 0.4, "grad_norm": 0.7367478013038635, "learning_rate": 1.3526240939157855e-05, "loss": 2.1776, "step": 11888 }, { "epoch": 0.4, "grad_norm": 0.7200941443443298, "learning_rate": 1.3525246301167663e-05, "loss": 2.0742, "step": 11889 }, { "epoch": 0.4, "grad_norm": 0.7723875045776367, "learning_rate": 1.3524251623351297e-05, "loss": 2.1452, "step": 11890 }, { "epoch": 0.4, "grad_norm": 0.7163500785827637, "learning_rate": 1.3523256905720007e-05, "loss": 2.0615, "step": 11891 }, { "epoch": 0.4, "grad_norm": 0.7262644171714783, "learning_rate": 1.3522262148285025e-05, "loss": 2.0837, "step": 11892 }, { "epoch": 0.4, "grad_norm": 0.7281244993209839, "learning_rate": 1.3521267351057588e-05, "loss": 2.1059, "step": 11893 }, { "epoch": 0.4, "grad_norm": 0.7241297960281372, "learning_rate": 1.3520272514048937e-05, "loss": 2.147, "step": 11894 }, { "epoch": 0.4, "grad_norm": 0.7133578062057495, "learning_rate": 1.351927763727031e-05, "loss": 2.1647, "step": 11895 }, { "epoch": 0.4, "grad_norm": 0.7305067777633667, "learning_rate": 1.3518282720732947e-05, "loss": 2.095, "step": 11896 }, { "epoch": 0.4, "grad_norm": 0.7288134694099426, "learning_rate": 1.351728776444809e-05, "loss": 2.1072, "step": 11897 }, { "epoch": 0.4, "grad_norm": 0.723741888999939, "learning_rate": 1.3516292768426973e-05, "loss": 2.0583, "step": 11898 }, { "epoch": 0.4, "grad_norm": 0.7667611241340637, "learning_rate": 1.351529773268084e-05, "loss": 2.1075, "step": 11899 }, { "epoch": 0.4, "grad_norm": 0.7788020372390747, "learning_rate": 1.3514302657220939e-05, "loss": 2.0886, "step": 11900 }, { "epoch": 0.4, "grad_norm": 0.7150760293006897, "learning_rate": 1.3513307542058503e-05, "loss": 2.0834, "step": 11901 }, { "epoch": 0.4, "grad_norm": 0.7030854821205139, "learning_rate": 1.3512312387204778e-05, "loss": 2.1086, "step": 11902 }, { "epoch": 0.4, "grad_norm": 0.7624700665473938, "learning_rate": 1.3511317192671005e-05, "loss": 2.1323, "step": 11903 }, { "epoch": 0.4, "grad_norm": 0.7344890832901001, "learning_rate": 1.3510321958468428e-05, "loss": 2.106, "step": 11904 }, { "epoch": 0.4, "grad_norm": 0.7027426958084106, "learning_rate": 1.3509326684608292e-05, "loss": 2.1249, "step": 11905 }, { "epoch": 0.4, "grad_norm": 0.7222638726234436, "learning_rate": 1.350833137110184e-05, "loss": 2.1572, "step": 11906 }, { "epoch": 0.4, "grad_norm": 0.7451738715171814, "learning_rate": 1.3507336017960316e-05, "loss": 2.1378, "step": 11907 }, { "epoch": 0.4, "grad_norm": 0.7626450061798096, "learning_rate": 1.3506340625194967e-05, "loss": 2.1169, "step": 11908 }, { "epoch": 0.4, "grad_norm": 0.7147843837738037, "learning_rate": 1.3505345192817032e-05, "loss": 2.0305, "step": 11909 }, { "epoch": 0.4, "grad_norm": 0.7145608067512512, "learning_rate": 1.3504349720837762e-05, "loss": 2.0765, "step": 11910 }, { "epoch": 0.4, "grad_norm": 0.7192610502243042, "learning_rate": 1.3503354209268407e-05, "loss": 2.0684, "step": 11911 }, { "epoch": 0.4, "grad_norm": 0.7346543669700623, "learning_rate": 1.3502358658120205e-05, "loss": 2.1268, "step": 11912 }, { "epoch": 0.4, "grad_norm": 0.7392002940177917, "learning_rate": 1.3501363067404412e-05, "loss": 2.1106, "step": 11913 }, { "epoch": 0.4, "grad_norm": 0.7130147814750671, "learning_rate": 1.3500367437132264e-05, "loss": 2.0244, "step": 11914 }, { "epoch": 0.4, "grad_norm": 0.7438791990280151, "learning_rate": 1.3499371767315021e-05, "loss": 2.0854, "step": 11915 }, { "epoch": 0.4, "grad_norm": 0.7050479054450989, "learning_rate": 1.3498376057963927e-05, "loss": 2.1197, "step": 11916 }, { "epoch": 0.4, "grad_norm": 0.7010165452957153, "learning_rate": 1.349738030909023e-05, "loss": 2.1193, "step": 11917 }, { "epoch": 0.4, "grad_norm": 0.7917079329490662, "learning_rate": 1.349638452070518e-05, "loss": 2.1634, "step": 11918 }, { "epoch": 0.4, "grad_norm": 0.7302039265632629, "learning_rate": 1.3495388692820023e-05, "loss": 2.102, "step": 11919 }, { "epoch": 0.4, "grad_norm": 0.6980583667755127, "learning_rate": 1.3494392825446018e-05, "loss": 2.0532, "step": 11920 }, { "epoch": 0.4, "grad_norm": 0.7171124815940857, "learning_rate": 1.349339691859441e-05, "loss": 2.1219, "step": 11921 }, { "epoch": 0.4, "grad_norm": 0.7243660092353821, "learning_rate": 1.349240097227645e-05, "loss": 2.0254, "step": 11922 }, { "epoch": 0.4, "grad_norm": 0.7567772269248962, "learning_rate": 1.3491404986503388e-05, "loss": 2.0524, "step": 11923 }, { "epoch": 0.4, "grad_norm": 0.6997441649436951, "learning_rate": 1.3490408961286483e-05, "loss": 2.1235, "step": 11924 }, { "epoch": 0.4, "grad_norm": 0.7804303169250488, "learning_rate": 1.348941289663698e-05, "loss": 2.0649, "step": 11925 }, { "epoch": 0.4, "grad_norm": 0.716002881526947, "learning_rate": 1.3488416792566138e-05, "loss": 2.1837, "step": 11926 }, { "epoch": 0.4, "grad_norm": 0.7299790978431702, "learning_rate": 1.3487420649085205e-05, "loss": 2.1671, "step": 11927 }, { "epoch": 0.4, "grad_norm": 0.7241437435150146, "learning_rate": 1.3486424466205439e-05, "loss": 2.1574, "step": 11928 }, { "epoch": 0.4, "grad_norm": 0.7270132899284363, "learning_rate": 1.3485428243938092e-05, "loss": 2.1026, "step": 11929 }, { "epoch": 0.4, "grad_norm": 0.7270392775535583, "learning_rate": 1.348443198229442e-05, "loss": 2.1644, "step": 11930 }, { "epoch": 0.4, "grad_norm": 0.7319567799568176, "learning_rate": 1.3483435681285675e-05, "loss": 2.1369, "step": 11931 }, { "epoch": 0.4, "grad_norm": 0.7070012092590332, "learning_rate": 1.3482439340923118e-05, "loss": 2.0983, "step": 11932 }, { "epoch": 0.4, "grad_norm": 0.712510883808136, "learning_rate": 1.3481442961218e-05, "loss": 2.0888, "step": 11933 }, { "epoch": 0.4, "grad_norm": 0.7225126028060913, "learning_rate": 1.348044654218158e-05, "loss": 2.0894, "step": 11934 }, { "epoch": 0.4, "grad_norm": 0.7503625750541687, "learning_rate": 1.3479450083825118e-05, "loss": 2.1406, "step": 11935 }, { "epoch": 0.4, "grad_norm": 0.7262725830078125, "learning_rate": 1.3478453586159864e-05, "loss": 2.1001, "step": 11936 }, { "epoch": 0.4, "grad_norm": 0.7298868894577026, "learning_rate": 1.3477457049197083e-05, "loss": 2.122, "step": 11937 }, { "epoch": 0.4, "grad_norm": 0.6974429488182068, "learning_rate": 1.347646047294803e-05, "loss": 2.0752, "step": 11938 }, { "epoch": 0.4, "grad_norm": 0.7239841818809509, "learning_rate": 1.3475463857423963e-05, "loss": 2.1476, "step": 11939 }, { "epoch": 0.4, "grad_norm": 0.7338855862617493, "learning_rate": 1.3474467202636138e-05, "loss": 2.0711, "step": 11940 }, { "epoch": 0.4, "grad_norm": 0.7322626113891602, "learning_rate": 1.3473470508595826e-05, "loss": 1.9713, "step": 11941 }, { "epoch": 0.4, "grad_norm": 0.6999083161354065, "learning_rate": 1.3472473775314274e-05, "loss": 2.0507, "step": 11942 }, { "epoch": 0.4, "grad_norm": 0.7115495204925537, "learning_rate": 1.3471477002802753e-05, "loss": 2.059, "step": 11943 }, { "epoch": 0.4, "grad_norm": 0.7371779084205627, "learning_rate": 1.3470480191072518e-05, "loss": 2.0217, "step": 11944 }, { "epoch": 0.4, "grad_norm": 0.707308828830719, "learning_rate": 1.346948334013483e-05, "loss": 2.0485, "step": 11945 }, { "epoch": 0.4, "grad_norm": 0.735703706741333, "learning_rate": 1.3468486450000954e-05, "loss": 2.1268, "step": 11946 }, { "epoch": 0.4, "grad_norm": 0.7145293354988098, "learning_rate": 1.3467489520682148e-05, "loss": 2.0691, "step": 11947 }, { "epoch": 0.4, "grad_norm": 0.7297286987304688, "learning_rate": 1.3466492552189683e-05, "loss": 2.1146, "step": 11948 }, { "epoch": 0.4, "grad_norm": 0.7017959356307983, "learning_rate": 1.3465495544534812e-05, "loss": 2.0913, "step": 11949 }, { "epoch": 0.4, "grad_norm": 0.7871559858322144, "learning_rate": 1.3464498497728804e-05, "loss": 2.1557, "step": 11950 }, { "epoch": 0.4, "grad_norm": 0.7523760795593262, "learning_rate": 1.3463501411782924e-05, "loss": 2.1079, "step": 11951 }, { "epoch": 0.4, "grad_norm": 0.7428274750709534, "learning_rate": 1.3462504286708436e-05, "loss": 2.1154, "step": 11952 }, { "epoch": 0.4, "grad_norm": 0.7221001386642456, "learning_rate": 1.3461507122516601e-05, "loss": 2.1259, "step": 11953 }, { "epoch": 0.4, "grad_norm": 0.7462368011474609, "learning_rate": 1.3460509919218688e-05, "loss": 2.1448, "step": 11954 }, { "epoch": 0.4, "grad_norm": 0.7348706722259521, "learning_rate": 1.345951267682596e-05, "loss": 2.1367, "step": 11955 }, { "epoch": 0.4, "grad_norm": 0.7168831825256348, "learning_rate": 1.345851539534969e-05, "loss": 2.1278, "step": 11956 }, { "epoch": 0.4, "grad_norm": 0.7091690301895142, "learning_rate": 1.345751807480114e-05, "loss": 2.0625, "step": 11957 }, { "epoch": 0.4, "grad_norm": 0.749040424823761, "learning_rate": 1.345652071519157e-05, "loss": 2.1614, "step": 11958 }, { "epoch": 0.4, "grad_norm": 0.7181079387664795, "learning_rate": 1.3455523316532262e-05, "loss": 2.1376, "step": 11959 }, { "epoch": 0.4, "grad_norm": 0.7624078392982483, "learning_rate": 1.3454525878834472e-05, "loss": 2.0957, "step": 11960 }, { "epoch": 0.4, "grad_norm": 0.7558721303939819, "learning_rate": 1.3453528402109475e-05, "loss": 2.1243, "step": 11961 }, { "epoch": 0.4, "grad_norm": 0.7175999283790588, "learning_rate": 1.345253088636854e-05, "loss": 2.1014, "step": 11962 }, { "epoch": 0.4, "grad_norm": 0.7405567169189453, "learning_rate": 1.3451533331622933e-05, "loss": 2.1304, "step": 11963 }, { "epoch": 0.4, "grad_norm": 0.747665524482727, "learning_rate": 1.3450535737883923e-05, "loss": 2.0849, "step": 11964 }, { "epoch": 0.4, "grad_norm": 0.7248052954673767, "learning_rate": 1.3449538105162786e-05, "loss": 2.0836, "step": 11965 }, { "epoch": 0.4, "grad_norm": 0.7160945534706116, "learning_rate": 1.3448540433470784e-05, "loss": 2.0646, "step": 11966 }, { "epoch": 0.4, "grad_norm": 0.7389582395553589, "learning_rate": 1.3447542722819197e-05, "loss": 2.0902, "step": 11967 }, { "epoch": 0.4, "grad_norm": 0.7161489129066467, "learning_rate": 1.3446544973219294e-05, "loss": 2.1447, "step": 11968 }, { "epoch": 0.4, "grad_norm": 0.729282557964325, "learning_rate": 1.3445547184682341e-05, "loss": 2.0559, "step": 11969 }, { "epoch": 0.4, "grad_norm": 0.7201874256134033, "learning_rate": 1.344454935721962e-05, "loss": 2.0604, "step": 11970 }, { "epoch": 0.4, "grad_norm": 0.7166064381599426, "learning_rate": 1.34435514908424e-05, "loss": 2.0367, "step": 11971 }, { "epoch": 0.4, "grad_norm": 0.7325381636619568, "learning_rate": 1.3442553585561946e-05, "loss": 2.1063, "step": 11972 }, { "epoch": 0.4, "grad_norm": 0.7519670724868774, "learning_rate": 1.3441555641389548e-05, "loss": 2.1117, "step": 11973 }, { "epoch": 0.4, "grad_norm": 0.7265070080757141, "learning_rate": 1.3440557658336465e-05, "loss": 2.117, "step": 11974 }, { "epoch": 0.4, "grad_norm": 0.720403254032135, "learning_rate": 1.3439559636413982e-05, "loss": 1.9995, "step": 11975 }, { "epoch": 0.4, "grad_norm": 0.7042388916015625, "learning_rate": 1.3438561575633366e-05, "loss": 2.0648, "step": 11976 }, { "epoch": 0.4, "grad_norm": 0.7401152849197388, "learning_rate": 1.34375634760059e-05, "loss": 2.0547, "step": 11977 }, { "epoch": 0.4, "grad_norm": 0.7344746589660645, "learning_rate": 1.3436565337542855e-05, "loss": 2.1087, "step": 11978 }, { "epoch": 0.4, "grad_norm": 0.6882182359695435, "learning_rate": 1.343556716025551e-05, "loss": 2.0829, "step": 11979 }, { "epoch": 0.4, "grad_norm": 0.7547155618667603, "learning_rate": 1.3434568944155137e-05, "loss": 2.0933, "step": 11980 }, { "epoch": 0.4, "grad_norm": 0.7404521703720093, "learning_rate": 1.3433570689253021e-05, "loss": 2.1393, "step": 11981 }, { "epoch": 0.4, "grad_norm": 0.7344704270362854, "learning_rate": 1.3432572395560435e-05, "loss": 2.1244, "step": 11982 }, { "epoch": 0.4, "grad_norm": 0.736875593662262, "learning_rate": 1.3431574063088655e-05, "loss": 2.0785, "step": 11983 }, { "epoch": 0.4, "grad_norm": 0.736531674861908, "learning_rate": 1.3430575691848966e-05, "loss": 2.1712, "step": 11984 }, { "epoch": 0.4, "grad_norm": 0.732238233089447, "learning_rate": 1.3429577281852639e-05, "loss": 2.0907, "step": 11985 }, { "epoch": 0.4, "grad_norm": 0.7236172556877136, "learning_rate": 1.3428578833110962e-05, "loss": 2.1248, "step": 11986 }, { "epoch": 0.4, "grad_norm": 0.7297419905662537, "learning_rate": 1.342758034563521e-05, "loss": 2.0684, "step": 11987 }, { "epoch": 0.4, "grad_norm": 0.7087078094482422, "learning_rate": 1.3426581819436662e-05, "loss": 2.1134, "step": 11988 }, { "epoch": 0.4, "grad_norm": 0.7172901630401611, "learning_rate": 1.34255832545266e-05, "loss": 2.0963, "step": 11989 }, { "epoch": 0.4, "grad_norm": 0.7438780665397644, "learning_rate": 1.342458465091631e-05, "loss": 2.1603, "step": 11990 }, { "epoch": 0.4, "grad_norm": 0.6966897249221802, "learning_rate": 1.3423586008617066e-05, "loss": 2.144, "step": 11991 }, { "epoch": 0.4, "grad_norm": 0.7477492094039917, "learning_rate": 1.3422587327640156e-05, "loss": 2.1948, "step": 11992 }, { "epoch": 0.4, "grad_norm": 0.7102459669113159, "learning_rate": 1.342158860799686e-05, "loss": 2.044, "step": 11993 }, { "epoch": 0.4, "grad_norm": 0.6995379328727722, "learning_rate": 1.3420589849698458e-05, "loss": 2.0633, "step": 11994 }, { "epoch": 0.4, "grad_norm": 0.7036551833152771, "learning_rate": 1.341959105275624e-05, "loss": 2.1438, "step": 11995 }, { "epoch": 0.4, "grad_norm": 0.7439143657684326, "learning_rate": 1.3418592217181488e-05, "loss": 2.1255, "step": 11996 }, { "epoch": 0.4, "grad_norm": 0.7276679873466492, "learning_rate": 1.341759334298548e-05, "loss": 2.067, "step": 11997 }, { "epoch": 0.4, "grad_norm": 0.7191269397735596, "learning_rate": 1.3416594430179507e-05, "loss": 2.1727, "step": 11998 }, { "epoch": 0.4, "grad_norm": 0.7358136177062988, "learning_rate": 1.3415595478774852e-05, "loss": 2.0976, "step": 11999 }, { "epoch": 0.4, "grad_norm": 0.7344049215316772, "learning_rate": 1.3414596488782803e-05, "loss": 2.0877, "step": 12000 }, { "epoch": 0.4, "grad_norm": 0.7179331183433533, "learning_rate": 1.3413597460214645e-05, "loss": 2.0674, "step": 12001 }, { "epoch": 0.4, "grad_norm": 0.7403781414031982, "learning_rate": 1.3412598393081657e-05, "loss": 2.0537, "step": 12002 }, { "epoch": 0.4, "grad_norm": 0.7281312942504883, "learning_rate": 1.3411599287395138e-05, "loss": 2.0579, "step": 12003 }, { "epoch": 0.4, "grad_norm": 0.7021412253379822, "learning_rate": 1.341060014316637e-05, "loss": 2.1481, "step": 12004 }, { "epoch": 0.4, "grad_norm": 0.6905468106269836, "learning_rate": 1.3409600960406636e-05, "loss": 2.1128, "step": 12005 }, { "epoch": 0.4, "grad_norm": 0.764636754989624, "learning_rate": 1.3408601739127233e-05, "loss": 2.0651, "step": 12006 }, { "epoch": 0.4, "grad_norm": 0.7115615606307983, "learning_rate": 1.3407602479339444e-05, "loss": 2.1367, "step": 12007 }, { "epoch": 0.4, "grad_norm": 0.7363144755363464, "learning_rate": 1.3406603181054557e-05, "loss": 2.1003, "step": 12008 }, { "epoch": 0.4, "grad_norm": 0.7762720584869385, "learning_rate": 1.3405603844283865e-05, "loss": 2.191, "step": 12009 }, { "epoch": 0.4, "grad_norm": 0.7243630886077881, "learning_rate": 1.3404604469038658e-05, "loss": 2.1183, "step": 12010 }, { "epoch": 0.4, "grad_norm": 0.7665592432022095, "learning_rate": 1.3403605055330225e-05, "loss": 2.0819, "step": 12011 }, { "epoch": 0.4, "grad_norm": 0.7728646397590637, "learning_rate": 1.3402605603169856e-05, "loss": 2.1356, "step": 12012 }, { "epoch": 0.4, "grad_norm": 0.7162576913833618, "learning_rate": 1.3401606112568841e-05, "loss": 2.0983, "step": 12013 }, { "epoch": 0.4, "grad_norm": 0.7321494221687317, "learning_rate": 1.3400606583538477e-05, "loss": 2.1502, "step": 12014 }, { "epoch": 0.4, "grad_norm": 0.7294401526451111, "learning_rate": 1.3399607016090049e-05, "loss": 2.1794, "step": 12015 }, { "epoch": 0.4, "grad_norm": 0.7285364270210266, "learning_rate": 1.3398607410234856e-05, "loss": 2.0643, "step": 12016 }, { "epoch": 0.4, "grad_norm": 0.7331371903419495, "learning_rate": 1.339760776598419e-05, "loss": 2.1587, "step": 12017 }, { "epoch": 0.4, "grad_norm": 0.7316774129867554, "learning_rate": 1.3396608083349338e-05, "loss": 2.089, "step": 12018 }, { "epoch": 0.4, "grad_norm": 0.7160951495170593, "learning_rate": 1.3395608362341604e-05, "loss": 1.9923, "step": 12019 }, { "epoch": 0.4, "grad_norm": 0.7584801316261292, "learning_rate": 1.3394608602972273e-05, "loss": 2.1518, "step": 12020 }, { "epoch": 0.4, "grad_norm": 0.731639564037323, "learning_rate": 1.3393608805252642e-05, "loss": 2.1022, "step": 12021 }, { "epoch": 0.4, "grad_norm": 0.7300428748130798, "learning_rate": 1.339260896919401e-05, "loss": 2.0602, "step": 12022 }, { "epoch": 0.4, "grad_norm": 0.7085554003715515, "learning_rate": 1.339160909480767e-05, "loss": 2.1673, "step": 12023 }, { "epoch": 0.4, "grad_norm": 0.7818900942802429, "learning_rate": 1.339060918210492e-05, "loss": 2.0619, "step": 12024 }, { "epoch": 0.4, "grad_norm": 0.718970000743866, "learning_rate": 1.3389609231097049e-05, "loss": 2.0802, "step": 12025 }, { "epoch": 0.4, "grad_norm": 0.700148344039917, "learning_rate": 1.3388609241795366e-05, "loss": 2.0414, "step": 12026 }, { "epoch": 0.4, "grad_norm": 0.7287668585777283, "learning_rate": 1.3387609214211156e-05, "loss": 2.1654, "step": 12027 }, { "epoch": 0.4, "grad_norm": 0.7149532437324524, "learning_rate": 1.3386609148355726e-05, "loss": 2.0606, "step": 12028 }, { "epoch": 0.4, "grad_norm": 0.7514705657958984, "learning_rate": 1.3385609044240369e-05, "loss": 2.0765, "step": 12029 }, { "epoch": 0.4, "grad_norm": 0.7484687566757202, "learning_rate": 1.3384608901876388e-05, "loss": 2.2026, "step": 12030 }, { "epoch": 0.4, "grad_norm": 0.726549506187439, "learning_rate": 1.3383608721275077e-05, "loss": 2.141, "step": 12031 }, { "epoch": 0.4, "grad_norm": 0.7661895751953125, "learning_rate": 1.3382608502447738e-05, "loss": 2.0832, "step": 12032 }, { "epoch": 0.4, "grad_norm": 0.7306628227233887, "learning_rate": 1.3381608245405668e-05, "loss": 2.1097, "step": 12033 }, { "epoch": 0.4, "grad_norm": 0.7323367595672607, "learning_rate": 1.3380607950160174e-05, "loss": 2.0683, "step": 12034 }, { "epoch": 0.4, "grad_norm": 0.7466384172439575, "learning_rate": 1.337960761672255e-05, "loss": 2.041, "step": 12035 }, { "epoch": 0.4, "grad_norm": 0.7382234930992126, "learning_rate": 1.3378607245104101e-05, "loss": 2.1145, "step": 12036 }, { "epoch": 0.4, "grad_norm": 0.7467278838157654, "learning_rate": 1.3377606835316125e-05, "loss": 2.1153, "step": 12037 }, { "epoch": 0.4, "grad_norm": 0.7356166243553162, "learning_rate": 1.3376606387369928e-05, "loss": 1.9944, "step": 12038 }, { "epoch": 0.4, "grad_norm": 0.7498576045036316, "learning_rate": 1.3375605901276813e-05, "loss": 2.1375, "step": 12039 }, { "epoch": 0.4, "grad_norm": 0.7336161732673645, "learning_rate": 1.3374605377048078e-05, "loss": 2.0669, "step": 12040 }, { "epoch": 0.4, "grad_norm": 0.7627463340759277, "learning_rate": 1.3373604814695033e-05, "loss": 2.1358, "step": 12041 }, { "epoch": 0.4, "grad_norm": 0.715947687625885, "learning_rate": 1.3372604214228975e-05, "loss": 2.0842, "step": 12042 }, { "epoch": 0.4, "grad_norm": 0.7084338068962097, "learning_rate": 1.3371603575661212e-05, "loss": 2.1022, "step": 12043 }, { "epoch": 0.4, "grad_norm": 0.7530382871627808, "learning_rate": 1.3370602899003044e-05, "loss": 2.1372, "step": 12044 }, { "epoch": 0.4, "grad_norm": 0.7993972301483154, "learning_rate": 1.3369602184265784e-05, "loss": 2.1295, "step": 12045 }, { "epoch": 0.4, "grad_norm": 0.7330203652381897, "learning_rate": 1.3368601431460733e-05, "loss": 2.1008, "step": 12046 }, { "epoch": 0.4, "grad_norm": 0.7318942546844482, "learning_rate": 1.3367600640599196e-05, "loss": 2.0997, "step": 12047 }, { "epoch": 0.4, "grad_norm": 0.7715089917182922, "learning_rate": 1.336659981169248e-05, "loss": 2.1062, "step": 12048 }, { "epoch": 0.4, "grad_norm": 0.7368652820587158, "learning_rate": 1.3365598944751893e-05, "loss": 2.0819, "step": 12049 }, { "epoch": 0.4, "grad_norm": 0.7263789772987366, "learning_rate": 1.3364598039788743e-05, "loss": 2.0323, "step": 12050 }, { "epoch": 0.4, "grad_norm": 0.7159261703491211, "learning_rate": 1.3363597096814333e-05, "loss": 2.1078, "step": 12051 }, { "epoch": 0.4, "grad_norm": 0.7335701584815979, "learning_rate": 1.3362596115839979e-05, "loss": 2.0681, "step": 12052 }, { "epoch": 0.4, "grad_norm": 0.7015700340270996, "learning_rate": 1.3361595096876983e-05, "loss": 2.0875, "step": 12053 }, { "epoch": 0.4, "grad_norm": 0.704871654510498, "learning_rate": 1.3360594039936654e-05, "loss": 2.1055, "step": 12054 }, { "epoch": 0.4, "grad_norm": 0.7387040853500366, "learning_rate": 1.3359592945030304e-05, "loss": 2.0598, "step": 12055 }, { "epoch": 0.4, "grad_norm": 0.708371639251709, "learning_rate": 1.3358591812169242e-05, "loss": 2.1104, "step": 12056 }, { "epoch": 0.4, "grad_norm": 0.7675510048866272, "learning_rate": 1.3357590641364777e-05, "loss": 2.1269, "step": 12057 }, { "epoch": 0.4, "grad_norm": 0.7276513576507568, "learning_rate": 1.3356589432628222e-05, "loss": 2.0552, "step": 12058 }, { "epoch": 0.4, "grad_norm": 0.7227287292480469, "learning_rate": 1.3355588185970885e-05, "loss": 2.1679, "step": 12059 }, { "epoch": 0.4, "grad_norm": 0.7557696104049683, "learning_rate": 1.3354586901404081e-05, "loss": 2.1392, "step": 12060 }, { "epoch": 0.4, "grad_norm": 0.7497695088386536, "learning_rate": 1.3353585578939121e-05, "loss": 2.1092, "step": 12061 }, { "epoch": 0.4, "grad_norm": 0.742592453956604, "learning_rate": 1.3352584218587312e-05, "loss": 2.0385, "step": 12062 }, { "epoch": 0.4, "grad_norm": 0.6988925337791443, "learning_rate": 1.3351582820359976e-05, "loss": 2.0614, "step": 12063 }, { "epoch": 0.4, "grad_norm": 0.7404292225837708, "learning_rate": 1.335058138426842e-05, "loss": 2.0912, "step": 12064 }, { "epoch": 0.4, "grad_norm": 0.7507045269012451, "learning_rate": 1.3349579910323958e-05, "loss": 2.0967, "step": 12065 }, { "epoch": 0.4, "grad_norm": 0.7688963413238525, "learning_rate": 1.3348578398537905e-05, "loss": 2.1536, "step": 12066 }, { "epoch": 0.4, "grad_norm": 0.7715846300125122, "learning_rate": 1.334757684892158e-05, "loss": 2.0161, "step": 12067 }, { "epoch": 0.4, "grad_norm": 0.7235500812530518, "learning_rate": 1.334657526148629e-05, "loss": 2.0715, "step": 12068 }, { "epoch": 0.4, "grad_norm": 0.734069287776947, "learning_rate": 1.3345573636243355e-05, "loss": 2.1677, "step": 12069 }, { "epoch": 0.4, "grad_norm": 0.7506744861602783, "learning_rate": 1.3344571973204088e-05, "loss": 2.1195, "step": 12070 }, { "epoch": 0.4, "grad_norm": 0.7169836163520813, "learning_rate": 1.3343570272379807e-05, "loss": 2.1327, "step": 12071 }, { "epoch": 0.4, "grad_norm": 0.7448582649230957, "learning_rate": 1.3342568533781833e-05, "loss": 2.0265, "step": 12072 }, { "epoch": 0.4, "grad_norm": 0.7182772159576416, "learning_rate": 1.3341566757421474e-05, "loss": 2.0622, "step": 12073 }, { "epoch": 0.4, "grad_norm": 0.7488613128662109, "learning_rate": 1.3340564943310055e-05, "loss": 2.0701, "step": 12074 }, { "epoch": 0.4, "grad_norm": 0.7862052321434021, "learning_rate": 1.333956309145889e-05, "loss": 2.0725, "step": 12075 }, { "epoch": 0.4, "grad_norm": 0.7152290940284729, "learning_rate": 1.3338561201879295e-05, "loss": 2.0119, "step": 12076 }, { "epoch": 0.4, "grad_norm": 0.7075240612030029, "learning_rate": 1.3337559274582596e-05, "loss": 2.0566, "step": 12077 }, { "epoch": 0.4, "grad_norm": 0.7444201111793518, "learning_rate": 1.333655730958011e-05, "loss": 2.1379, "step": 12078 }, { "epoch": 0.4, "grad_norm": 0.7348806262016296, "learning_rate": 1.333555530688315e-05, "loss": 2.1088, "step": 12079 }, { "epoch": 0.4, "grad_norm": 0.757260799407959, "learning_rate": 1.3334553266503043e-05, "loss": 2.1445, "step": 12080 }, { "epoch": 0.4, "grad_norm": 0.7476964592933655, "learning_rate": 1.3333551188451109e-05, "loss": 2.1272, "step": 12081 }, { "epoch": 0.4, "grad_norm": 0.7153235077857971, "learning_rate": 1.3332549072738665e-05, "loss": 2.042, "step": 12082 }, { "epoch": 0.4, "grad_norm": 0.718843936920166, "learning_rate": 1.3331546919377039e-05, "loss": 2.0495, "step": 12083 }, { "epoch": 0.4, "grad_norm": 0.7286083698272705, "learning_rate": 1.333054472837754e-05, "loss": 2.1144, "step": 12084 }, { "epoch": 0.4, "grad_norm": 0.7480261325836182, "learning_rate": 1.3329542499751507e-05, "loss": 2.0668, "step": 12085 }, { "epoch": 0.4, "grad_norm": 0.7608886361122131, "learning_rate": 1.332854023351025e-05, "loss": 2.0802, "step": 12086 }, { "epoch": 0.4, "grad_norm": 0.7223628759384155, "learning_rate": 1.3327537929665096e-05, "loss": 2.0372, "step": 12087 }, { "epoch": 0.4, "grad_norm": 0.7423355579376221, "learning_rate": 1.3326535588227371e-05, "loss": 2.0686, "step": 12088 }, { "epoch": 0.4, "grad_norm": 0.7143242359161377, "learning_rate": 1.3325533209208395e-05, "loss": 2.0487, "step": 12089 }, { "epoch": 0.4, "grad_norm": 0.729502260684967, "learning_rate": 1.3324530792619494e-05, "loss": 2.1068, "step": 12090 }, { "epoch": 0.4, "grad_norm": 0.724786102771759, "learning_rate": 1.3323528338471991e-05, "loss": 2.1124, "step": 12091 }, { "epoch": 0.4, "grad_norm": 0.733322262763977, "learning_rate": 1.3322525846777214e-05, "loss": 2.0895, "step": 12092 }, { "epoch": 0.4, "grad_norm": 0.7134036421775818, "learning_rate": 1.3321523317546488e-05, "loss": 2.0739, "step": 12093 }, { "epoch": 0.4, "grad_norm": 0.7441741228103638, "learning_rate": 1.332052075079114e-05, "loss": 2.0501, "step": 12094 }, { "epoch": 0.4, "grad_norm": 0.7421995401382446, "learning_rate": 1.331951814652249e-05, "loss": 2.1202, "step": 12095 }, { "epoch": 0.4, "grad_norm": 0.7337427139282227, "learning_rate": 1.331851550475187e-05, "loss": 2.116, "step": 12096 }, { "epoch": 0.4, "grad_norm": 0.7612642049789429, "learning_rate": 1.3317512825490608e-05, "loss": 2.1247, "step": 12097 }, { "epoch": 0.4, "grad_norm": 0.7377074956893921, "learning_rate": 1.331651010875003e-05, "loss": 2.1202, "step": 12098 }, { "epoch": 0.4, "grad_norm": 0.7289406061172485, "learning_rate": 1.3315507354541467e-05, "loss": 2.0346, "step": 12099 }, { "epoch": 0.4, "grad_norm": 0.7488502860069275, "learning_rate": 1.331450456287624e-05, "loss": 2.0708, "step": 12100 }, { "epoch": 0.4, "grad_norm": 0.7228421568870544, "learning_rate": 1.3313501733765687e-05, "loss": 2.0734, "step": 12101 }, { "epoch": 0.4, "grad_norm": 0.7770979404449463, "learning_rate": 1.3312498867221133e-05, "loss": 2.1616, "step": 12102 }, { "epoch": 0.4, "grad_norm": 0.7226687669754028, "learning_rate": 1.3311495963253907e-05, "loss": 2.0697, "step": 12103 }, { "epoch": 0.4, "grad_norm": 0.7017569541931152, "learning_rate": 1.3310493021875339e-05, "loss": 2.0903, "step": 12104 }, { "epoch": 0.4, "grad_norm": 0.7225120067596436, "learning_rate": 1.3309490043096765e-05, "loss": 1.9901, "step": 12105 }, { "epoch": 0.4, "grad_norm": 0.7058452367782593, "learning_rate": 1.3308487026929506e-05, "loss": 2.0884, "step": 12106 }, { "epoch": 0.4, "grad_norm": 0.7359464168548584, "learning_rate": 1.3307483973384902e-05, "loss": 2.1107, "step": 12107 }, { "epoch": 0.4, "grad_norm": 0.7240065932273865, "learning_rate": 1.3306480882474287e-05, "loss": 2.1334, "step": 12108 }, { "epoch": 0.4, "grad_norm": 0.7140375375747681, "learning_rate": 1.3305477754208982e-05, "loss": 2.1181, "step": 12109 }, { "epoch": 0.4, "grad_norm": 0.7515427470207214, "learning_rate": 1.3304474588600332e-05, "loss": 2.1244, "step": 12110 }, { "epoch": 0.4, "grad_norm": 0.7220250368118286, "learning_rate": 1.330347138565966e-05, "loss": 2.0481, "step": 12111 }, { "epoch": 0.4, "grad_norm": 0.7371188402175903, "learning_rate": 1.3302468145398309e-05, "loss": 2.0658, "step": 12112 }, { "epoch": 0.4, "grad_norm": 0.7189971804618835, "learning_rate": 1.3301464867827606e-05, "loss": 2.0647, "step": 12113 }, { "epoch": 0.4, "grad_norm": 0.7000138163566589, "learning_rate": 1.3300461552958887e-05, "loss": 2.0953, "step": 12114 }, { "epoch": 0.4, "grad_norm": 0.7683405876159668, "learning_rate": 1.329945820080349e-05, "loss": 2.0594, "step": 12115 }, { "epoch": 0.4, "grad_norm": 0.7733820080757141, "learning_rate": 1.3298454811372746e-05, "loss": 2.1801, "step": 12116 }, { "epoch": 0.4, "grad_norm": 0.7567668557167053, "learning_rate": 1.329745138467799e-05, "loss": 2.1038, "step": 12117 }, { "epoch": 0.4, "grad_norm": 0.7354212999343872, "learning_rate": 1.3296447920730566e-05, "loss": 2.0977, "step": 12118 }, { "epoch": 0.4, "grad_norm": 0.7005856037139893, "learning_rate": 1.3295444419541804e-05, "loss": 2.0696, "step": 12119 }, { "epoch": 0.4, "grad_norm": 0.7595085501670837, "learning_rate": 1.3294440881123039e-05, "loss": 2.0424, "step": 12120 }, { "epoch": 0.4, "grad_norm": 0.7395510077476501, "learning_rate": 1.3293437305485617e-05, "loss": 2.0878, "step": 12121 }, { "epoch": 0.4, "grad_norm": 0.7587229013442993, "learning_rate": 1.3292433692640864e-05, "loss": 2.1559, "step": 12122 }, { "epoch": 0.4, "grad_norm": 0.7649913430213928, "learning_rate": 1.3291430042600131e-05, "loss": 2.157, "step": 12123 }, { "epoch": 0.4, "grad_norm": 0.7135988473892212, "learning_rate": 1.3290426355374747e-05, "loss": 2.0855, "step": 12124 }, { "epoch": 0.4, "grad_norm": 0.7831515073776245, "learning_rate": 1.3289422630976054e-05, "loss": 2.1327, "step": 12125 }, { "epoch": 0.4, "grad_norm": 0.7143268585205078, "learning_rate": 1.3288418869415394e-05, "loss": 2.0556, "step": 12126 }, { "epoch": 0.4, "grad_norm": 0.757780909538269, "learning_rate": 1.3287415070704102e-05, "loss": 2.0898, "step": 12127 }, { "epoch": 0.4, "grad_norm": 0.7206072211265564, "learning_rate": 1.3286411234853524e-05, "loss": 2.1595, "step": 12128 }, { "epoch": 0.4, "grad_norm": 0.758520781993866, "learning_rate": 1.3285407361874995e-05, "loss": 2.1164, "step": 12129 }, { "epoch": 0.4, "grad_norm": 0.7994642853736877, "learning_rate": 1.3284403451779862e-05, "loss": 2.1227, "step": 12130 }, { "epoch": 0.4, "grad_norm": 0.7674486637115479, "learning_rate": 1.3283399504579462e-05, "loss": 2.1092, "step": 12131 }, { "epoch": 0.4, "grad_norm": 0.730057954788208, "learning_rate": 1.3282395520285141e-05, "loss": 2.1024, "step": 12132 }, { "epoch": 0.4, "grad_norm": 0.7449609041213989, "learning_rate": 1.3281391498908235e-05, "loss": 2.0773, "step": 12133 }, { "epoch": 0.4, "grad_norm": 0.7412946224212646, "learning_rate": 1.3280387440460094e-05, "loss": 2.0648, "step": 12134 }, { "epoch": 0.4, "grad_norm": 0.7678388953208923, "learning_rate": 1.3279383344952057e-05, "loss": 2.119, "step": 12135 }, { "epoch": 0.4, "grad_norm": 0.7435850501060486, "learning_rate": 1.3278379212395469e-05, "loss": 2.1387, "step": 12136 }, { "epoch": 0.4, "grad_norm": 0.7180525660514832, "learning_rate": 1.3277375042801675e-05, "loss": 2.0609, "step": 12137 }, { "epoch": 0.4, "grad_norm": 0.7182612419128418, "learning_rate": 1.3276370836182018e-05, "loss": 2.0757, "step": 12138 }, { "epoch": 0.4, "grad_norm": 0.738795280456543, "learning_rate": 1.3275366592547843e-05, "loss": 2.1215, "step": 12139 }, { "epoch": 0.4, "grad_norm": 0.740969717502594, "learning_rate": 1.3274362311910497e-05, "loss": 2.081, "step": 12140 }, { "epoch": 0.4, "grad_norm": 0.7093883752822876, "learning_rate": 1.3273357994281322e-05, "loss": 2.0255, "step": 12141 }, { "epoch": 0.4, "grad_norm": 0.7246757745742798, "learning_rate": 1.3272353639671667e-05, "loss": 2.0168, "step": 12142 }, { "epoch": 0.4, "grad_norm": 0.7177174091339111, "learning_rate": 1.3271349248092882e-05, "loss": 2.0693, "step": 12143 }, { "epoch": 0.4, "grad_norm": 0.7357671856880188, "learning_rate": 1.3270344819556304e-05, "loss": 2.0559, "step": 12144 }, { "epoch": 0.4, "grad_norm": 0.7713660001754761, "learning_rate": 1.3269340354073295e-05, "loss": 2.0531, "step": 12145 }, { "epoch": 0.4, "grad_norm": 0.7783529162406921, "learning_rate": 1.326833585165519e-05, "loss": 2.1948, "step": 12146 }, { "epoch": 0.4, "grad_norm": 0.7201218008995056, "learning_rate": 1.326733131231334e-05, "loss": 2.1798, "step": 12147 }, { "epoch": 0.4, "grad_norm": 0.771898627281189, "learning_rate": 1.3266326736059098e-05, "loss": 2.1223, "step": 12148 }, { "epoch": 0.4, "grad_norm": 0.7134940028190613, "learning_rate": 1.3265322122903812e-05, "loss": 2.0542, "step": 12149 }, { "epoch": 0.4, "grad_norm": 0.7506200075149536, "learning_rate": 1.326431747285883e-05, "loss": 2.1791, "step": 12150 }, { "epoch": 0.4, "grad_norm": 0.7342522144317627, "learning_rate": 1.32633127859355e-05, "loss": 2.0727, "step": 12151 }, { "epoch": 0.4, "grad_norm": 0.7490595579147339, "learning_rate": 1.3262308062145178e-05, "loss": 2.0272, "step": 12152 }, { "epoch": 0.4, "grad_norm": 0.7310972809791565, "learning_rate": 1.3261303301499209e-05, "loss": 2.0318, "step": 12153 }, { "epoch": 0.4, "grad_norm": 0.7424598336219788, "learning_rate": 1.3260298504008948e-05, "loss": 2.1796, "step": 12154 }, { "epoch": 0.4, "grad_norm": 0.7339668869972229, "learning_rate": 1.3259293669685742e-05, "loss": 2.1373, "step": 12155 }, { "epoch": 0.4, "grad_norm": 0.7330799698829651, "learning_rate": 1.325828879854095e-05, "loss": 2.0477, "step": 12156 }, { "epoch": 0.4, "grad_norm": 0.7548856735229492, "learning_rate": 1.3257283890585919e-05, "loss": 2.1257, "step": 12157 }, { "epoch": 0.4, "grad_norm": 0.7211975455284119, "learning_rate": 1.3256278945832004e-05, "loss": 2.0301, "step": 12158 }, { "epoch": 0.4, "grad_norm": 0.7330728769302368, "learning_rate": 1.3255273964290559e-05, "loss": 2.0662, "step": 12159 }, { "epoch": 0.4, "grad_norm": 0.7182918190956116, "learning_rate": 1.3254268945972935e-05, "loss": 2.0601, "step": 12160 }, { "epoch": 0.4, "grad_norm": 0.7329309582710266, "learning_rate": 1.3253263890890486e-05, "loss": 2.144, "step": 12161 }, { "epoch": 0.4, "grad_norm": 0.7323415875434875, "learning_rate": 1.3252258799054572e-05, "loss": 2.1493, "step": 12162 }, { "epoch": 0.4, "grad_norm": 0.7347413897514343, "learning_rate": 1.325125367047654e-05, "loss": 2.0207, "step": 12163 }, { "epoch": 0.4, "grad_norm": 0.7081376910209656, "learning_rate": 1.3250248505167753e-05, "loss": 2.161, "step": 12164 }, { "epoch": 0.4, "grad_norm": 0.7458366751670837, "learning_rate": 1.3249243303139562e-05, "loss": 2.0663, "step": 12165 }, { "epoch": 0.4, "grad_norm": 0.7380081415176392, "learning_rate": 1.3248238064403322e-05, "loss": 2.149, "step": 12166 }, { "epoch": 0.4, "grad_norm": 0.739449143409729, "learning_rate": 1.3247232788970397e-05, "loss": 2.0985, "step": 12167 }, { "epoch": 0.4, "grad_norm": 0.737318754196167, "learning_rate": 1.3246227476852135e-05, "loss": 2.1306, "step": 12168 }, { "epoch": 0.4, "grad_norm": 0.7357999682426453, "learning_rate": 1.3245222128059898e-05, "loss": 2.1393, "step": 12169 }, { "epoch": 0.4, "grad_norm": 0.7420468926429749, "learning_rate": 1.3244216742605045e-05, "loss": 2.1582, "step": 12170 }, { "epoch": 0.4, "grad_norm": 0.7335938811302185, "learning_rate": 1.3243211320498931e-05, "loss": 2.0893, "step": 12171 }, { "epoch": 0.4, "grad_norm": 0.7411210536956787, "learning_rate": 1.3242205861752917e-05, "loss": 2.0188, "step": 12172 }, { "epoch": 0.4, "grad_norm": 0.734242856502533, "learning_rate": 1.3241200366378361e-05, "loss": 2.0853, "step": 12173 }, { "epoch": 0.41, "grad_norm": 0.761043131351471, "learning_rate": 1.3240194834386624e-05, "loss": 2.1214, "step": 12174 }, { "epoch": 0.41, "grad_norm": 0.7509834170341492, "learning_rate": 1.3239189265789063e-05, "loss": 2.1148, "step": 12175 }, { "epoch": 0.41, "grad_norm": 0.699530303478241, "learning_rate": 1.3238183660597042e-05, "loss": 2.1627, "step": 12176 }, { "epoch": 0.41, "grad_norm": 0.7220861911773682, "learning_rate": 1.3237178018821916e-05, "loss": 2.0795, "step": 12177 }, { "epoch": 0.41, "grad_norm": 0.7164317965507507, "learning_rate": 1.3236172340475055e-05, "loss": 2.0654, "step": 12178 }, { "epoch": 0.41, "grad_norm": 0.7442899346351624, "learning_rate": 1.3235166625567813e-05, "loss": 2.1254, "step": 12179 }, { "epoch": 0.41, "grad_norm": 0.7266655564308167, "learning_rate": 1.323416087411155e-05, "loss": 2.1311, "step": 12180 }, { "epoch": 0.41, "grad_norm": 0.7420675158500671, "learning_rate": 1.3233155086117639e-05, "loss": 2.1089, "step": 12181 }, { "epoch": 0.41, "grad_norm": 0.7587383389472961, "learning_rate": 1.3232149261597436e-05, "loss": 2.1242, "step": 12182 }, { "epoch": 0.41, "grad_norm": 0.7147238254547119, "learning_rate": 1.3231143400562303e-05, "loss": 2.1317, "step": 12183 }, { "epoch": 0.41, "grad_norm": 0.7093467116355896, "learning_rate": 1.3230137503023606e-05, "loss": 2.0587, "step": 12184 }, { "epoch": 0.41, "grad_norm": 0.7572020292282104, "learning_rate": 1.3229131568992708e-05, "loss": 2.1475, "step": 12185 }, { "epoch": 0.41, "grad_norm": 0.7225651741027832, "learning_rate": 1.3228125598480974e-05, "loss": 2.0567, "step": 12186 }, { "epoch": 0.41, "grad_norm": 0.7124921679496765, "learning_rate": 1.3227119591499771e-05, "loss": 2.0968, "step": 12187 }, { "epoch": 0.41, "grad_norm": 0.7775607705116272, "learning_rate": 1.3226113548060457e-05, "loss": 2.1322, "step": 12188 }, { "epoch": 0.41, "grad_norm": 0.7556065320968628, "learning_rate": 1.322510746817441e-05, "loss": 2.1538, "step": 12189 }, { "epoch": 0.41, "grad_norm": 0.7152615785598755, "learning_rate": 1.3224101351852986e-05, "loss": 2.1077, "step": 12190 }, { "epoch": 0.41, "grad_norm": 0.7348170280456543, "learning_rate": 1.3223095199107553e-05, "loss": 2.1235, "step": 12191 }, { "epoch": 0.41, "grad_norm": 0.7330371737480164, "learning_rate": 1.322208900994948e-05, "loss": 2.0824, "step": 12192 }, { "epoch": 0.41, "grad_norm": 0.752933144569397, "learning_rate": 1.3221082784390133e-05, "loss": 2.1588, "step": 12193 }, { "epoch": 0.41, "grad_norm": 0.7272346019744873, "learning_rate": 1.322007652244088e-05, "loss": 2.0777, "step": 12194 }, { "epoch": 0.41, "grad_norm": 0.7402298450469971, "learning_rate": 1.3219070224113093e-05, "loss": 2.0758, "step": 12195 }, { "epoch": 0.41, "grad_norm": 0.7377109527587891, "learning_rate": 1.3218063889418134e-05, "loss": 2.1124, "step": 12196 }, { "epoch": 0.41, "grad_norm": 0.7469210028648376, "learning_rate": 1.3217057518367375e-05, "loss": 2.0467, "step": 12197 }, { "epoch": 0.41, "grad_norm": 0.7463890314102173, "learning_rate": 1.321605111097219e-05, "loss": 2.1604, "step": 12198 }, { "epoch": 0.41, "grad_norm": 0.710898756980896, "learning_rate": 1.321504466724394e-05, "loss": 2.0383, "step": 12199 }, { "epoch": 0.41, "grad_norm": 0.7388975620269775, "learning_rate": 1.3214038187193998e-05, "loss": 2.0603, "step": 12200 }, { "epoch": 0.41, "grad_norm": 0.7719639539718628, "learning_rate": 1.321303167083374e-05, "loss": 2.125, "step": 12201 }, { "epoch": 0.41, "grad_norm": 0.7088715434074402, "learning_rate": 1.321202511817453e-05, "loss": 2.0342, "step": 12202 }, { "epoch": 0.41, "grad_norm": 0.7219780087471008, "learning_rate": 1.3211018529227748e-05, "loss": 2.0137, "step": 12203 }, { "epoch": 0.41, "grad_norm": 0.7552051544189453, "learning_rate": 1.3210011904004753e-05, "loss": 2.0909, "step": 12204 }, { "epoch": 0.41, "grad_norm": 0.7172917127609253, "learning_rate": 1.3209005242516933e-05, "loss": 2.0871, "step": 12205 }, { "epoch": 0.41, "grad_norm": 0.7143824100494385, "learning_rate": 1.3207998544775646e-05, "loss": 2.1013, "step": 12206 }, { "epoch": 0.41, "grad_norm": 0.7500854134559631, "learning_rate": 1.3206991810792276e-05, "loss": 2.0933, "step": 12207 }, { "epoch": 0.41, "grad_norm": 0.7154642343521118, "learning_rate": 1.3205985040578188e-05, "loss": 2.0465, "step": 12208 }, { "epoch": 0.41, "grad_norm": 0.7345568537712097, "learning_rate": 1.3204978234144763e-05, "loss": 2.1788, "step": 12209 }, { "epoch": 0.41, "grad_norm": 0.733279287815094, "learning_rate": 1.320397139150337e-05, "loss": 2.135, "step": 12210 }, { "epoch": 0.41, "grad_norm": 0.7153021693229675, "learning_rate": 1.3202964512665386e-05, "loss": 2.0769, "step": 12211 }, { "epoch": 0.41, "grad_norm": 0.7709648013114929, "learning_rate": 1.3201957597642188e-05, "loss": 2.132, "step": 12212 }, { "epoch": 0.41, "grad_norm": 0.7457278966903687, "learning_rate": 1.3200950646445148e-05, "loss": 2.0645, "step": 12213 }, { "epoch": 0.41, "grad_norm": 0.7288813591003418, "learning_rate": 1.3199943659085645e-05, "loss": 2.0466, "step": 12214 }, { "epoch": 0.41, "grad_norm": 0.7319962382316589, "learning_rate": 1.319893663557505e-05, "loss": 2.1492, "step": 12215 }, { "epoch": 0.41, "grad_norm": 0.7162802219390869, "learning_rate": 1.3197929575924749e-05, "loss": 2.168, "step": 12216 }, { "epoch": 0.41, "grad_norm": 0.7164115905761719, "learning_rate": 1.319692248014611e-05, "loss": 2.1021, "step": 12217 }, { "epoch": 0.41, "grad_norm": 0.7337713241577148, "learning_rate": 1.3195915348250516e-05, "loss": 2.0957, "step": 12218 }, { "epoch": 0.41, "grad_norm": 0.7305305600166321, "learning_rate": 1.3194908180249345e-05, "loss": 2.0436, "step": 12219 }, { "epoch": 0.41, "grad_norm": 0.7298184037208557, "learning_rate": 1.3193900976153971e-05, "loss": 2.1199, "step": 12220 }, { "epoch": 0.41, "grad_norm": 0.6861364841461182, "learning_rate": 1.3192893735975775e-05, "loss": 2.1148, "step": 12221 }, { "epoch": 0.41, "grad_norm": 0.745052695274353, "learning_rate": 1.319188645972614e-05, "loss": 2.0689, "step": 12222 }, { "epoch": 0.41, "grad_norm": 0.7128831744194031, "learning_rate": 1.319087914741644e-05, "loss": 2.0969, "step": 12223 }, { "epoch": 0.41, "grad_norm": 0.7533825039863586, "learning_rate": 1.3189871799058058e-05, "loss": 2.0121, "step": 12224 }, { "epoch": 0.41, "grad_norm": 0.764360785484314, "learning_rate": 1.3188864414662377e-05, "loss": 2.0778, "step": 12225 }, { "epoch": 0.41, "grad_norm": 0.7718948125839233, "learning_rate": 1.318785699424077e-05, "loss": 2.0817, "step": 12226 }, { "epoch": 0.41, "grad_norm": 0.7558844685554504, "learning_rate": 1.3186849537804625e-05, "loss": 2.091, "step": 12227 }, { "epoch": 0.41, "grad_norm": 0.7332499623298645, "learning_rate": 1.3185842045365323e-05, "loss": 2.0769, "step": 12228 }, { "epoch": 0.41, "grad_norm": 0.7730721235275269, "learning_rate": 1.3184834516934242e-05, "loss": 2.1087, "step": 12229 }, { "epoch": 0.41, "grad_norm": 0.7223451733589172, "learning_rate": 1.3183826952522766e-05, "loss": 2.052, "step": 12230 }, { "epoch": 0.41, "grad_norm": 0.7387200593948364, "learning_rate": 1.3182819352142281e-05, "loss": 2.1251, "step": 12231 }, { "epoch": 0.41, "grad_norm": 0.7380063533782959, "learning_rate": 1.3181811715804169e-05, "loss": 2.1268, "step": 12232 }, { "epoch": 0.41, "grad_norm": 0.7524533867835999, "learning_rate": 1.3180804043519813e-05, "loss": 2.1285, "step": 12233 }, { "epoch": 0.41, "grad_norm": 0.733268141746521, "learning_rate": 1.3179796335300594e-05, "loss": 2.1486, "step": 12234 }, { "epoch": 0.41, "grad_norm": 0.711117148399353, "learning_rate": 1.3178788591157903e-05, "loss": 2.0419, "step": 12235 }, { "epoch": 0.41, "grad_norm": 0.7309277653694153, "learning_rate": 1.3177780811103122e-05, "loss": 2.0573, "step": 12236 }, { "epoch": 0.41, "grad_norm": 0.7389529943466187, "learning_rate": 1.3176772995147631e-05, "loss": 2.0573, "step": 12237 }, { "epoch": 0.41, "grad_norm": 0.7231329083442688, "learning_rate": 1.3175765143302827e-05, "loss": 2.1174, "step": 12238 }, { "epoch": 0.41, "grad_norm": 0.7504206895828247, "learning_rate": 1.3174757255580086e-05, "loss": 2.104, "step": 12239 }, { "epoch": 0.41, "grad_norm": 0.7122098207473755, "learning_rate": 1.3173749331990797e-05, "loss": 2.0573, "step": 12240 }, { "epoch": 0.41, "grad_norm": 0.738332211971283, "learning_rate": 1.317274137254635e-05, "loss": 2.0993, "step": 12241 }, { "epoch": 0.41, "grad_norm": 0.7200453877449036, "learning_rate": 1.3171733377258129e-05, "loss": 2.0298, "step": 12242 }, { "epoch": 0.41, "grad_norm": 0.7603911757469177, "learning_rate": 1.3170725346137524e-05, "loss": 2.0486, "step": 12243 }, { "epoch": 0.41, "grad_norm": 0.6965784430503845, "learning_rate": 1.3169717279195922e-05, "loss": 2.081, "step": 12244 }, { "epoch": 0.41, "grad_norm": 0.728667140007019, "learning_rate": 1.3168709176444711e-05, "loss": 2.0689, "step": 12245 }, { "epoch": 0.41, "grad_norm": 0.7706298232078552, "learning_rate": 1.3167701037895282e-05, "loss": 2.0656, "step": 12246 }, { "epoch": 0.41, "grad_norm": 0.8332446217536926, "learning_rate": 1.3166692863559025e-05, "loss": 2.1349, "step": 12247 }, { "epoch": 0.41, "grad_norm": 0.7225379347801208, "learning_rate": 1.3165684653447324e-05, "loss": 2.1644, "step": 12248 }, { "epoch": 0.41, "grad_norm": 0.7151829600334167, "learning_rate": 1.3164676407571577e-05, "loss": 2.0665, "step": 12249 }, { "epoch": 0.41, "grad_norm": 0.7026948928833008, "learning_rate": 1.3163668125943167e-05, "loss": 2.0818, "step": 12250 }, { "epoch": 0.41, "grad_norm": 0.7538250684738159, "learning_rate": 1.316265980857349e-05, "loss": 2.0982, "step": 12251 }, { "epoch": 0.41, "grad_norm": 0.7229146957397461, "learning_rate": 1.3161651455473936e-05, "loss": 2.1051, "step": 12252 }, { "epoch": 0.41, "grad_norm": 0.7220286726951599, "learning_rate": 1.3160643066655897e-05, "loss": 2.1054, "step": 12253 }, { "epoch": 0.41, "grad_norm": 0.7010679841041565, "learning_rate": 1.3159634642130766e-05, "loss": 2.0999, "step": 12254 }, { "epoch": 0.41, "grad_norm": 0.709977924823761, "learning_rate": 1.3158626181909933e-05, "loss": 2.0737, "step": 12255 }, { "epoch": 0.41, "grad_norm": 0.7584277391433716, "learning_rate": 1.3157617686004792e-05, "loss": 2.1818, "step": 12256 }, { "epoch": 0.41, "grad_norm": 0.7256103157997131, "learning_rate": 1.3156609154426736e-05, "loss": 2.0949, "step": 12257 }, { "epoch": 0.41, "grad_norm": 0.6991605758666992, "learning_rate": 1.3155600587187163e-05, "loss": 2.1322, "step": 12258 }, { "epoch": 0.41, "grad_norm": 0.7396019101142883, "learning_rate": 1.3154591984297462e-05, "loss": 2.0982, "step": 12259 }, { "epoch": 0.41, "grad_norm": 0.7315061092376709, "learning_rate": 1.315358334576903e-05, "loss": 2.2019, "step": 12260 }, { "epoch": 0.41, "grad_norm": 0.7197700142860413, "learning_rate": 1.3152574671613263e-05, "loss": 2.114, "step": 12261 }, { "epoch": 0.41, "grad_norm": 0.7214488387107849, "learning_rate": 1.3151565961841552e-05, "loss": 2.0674, "step": 12262 }, { "epoch": 0.41, "grad_norm": 0.7106820940971375, "learning_rate": 1.3150557216465299e-05, "loss": 2.0424, "step": 12263 }, { "epoch": 0.41, "grad_norm": 0.7551635503768921, "learning_rate": 1.3149548435495894e-05, "loss": 2.1015, "step": 12264 }, { "epoch": 0.41, "grad_norm": 0.7381037473678589, "learning_rate": 1.3148539618944738e-05, "loss": 2.1013, "step": 12265 }, { "epoch": 0.41, "grad_norm": 0.7137131094932556, "learning_rate": 1.3147530766823224e-05, "loss": 2.1085, "step": 12266 }, { "epoch": 0.41, "grad_norm": 0.7035143971443176, "learning_rate": 1.3146521879142754e-05, "loss": 2.1048, "step": 12267 }, { "epoch": 0.41, "grad_norm": 0.7187929749488831, "learning_rate": 1.3145512955914726e-05, "loss": 2.0852, "step": 12268 }, { "epoch": 0.41, "grad_norm": 0.7122571468353271, "learning_rate": 1.3144503997150536e-05, "loss": 2.0894, "step": 12269 }, { "epoch": 0.41, "grad_norm": 0.7035654187202454, "learning_rate": 1.3143495002861579e-05, "loss": 2.0255, "step": 12270 }, { "epoch": 0.41, "grad_norm": 0.6940398216247559, "learning_rate": 1.314248597305926e-05, "loss": 2.0829, "step": 12271 }, { "epoch": 0.41, "grad_norm": 0.7191868424415588, "learning_rate": 1.3141476907754975e-05, "loss": 2.0492, "step": 12272 }, { "epoch": 0.41, "grad_norm": 0.770883321762085, "learning_rate": 1.3140467806960127e-05, "loss": 2.0887, "step": 12273 }, { "epoch": 0.41, "grad_norm": 0.7466403841972351, "learning_rate": 1.3139458670686114e-05, "loss": 2.0788, "step": 12274 }, { "epoch": 0.41, "grad_norm": 0.7153128385543823, "learning_rate": 1.3138449498944334e-05, "loss": 2.0958, "step": 12275 }, { "epoch": 0.41, "grad_norm": 0.7169919013977051, "learning_rate": 1.3137440291746192e-05, "loss": 2.047, "step": 12276 }, { "epoch": 0.41, "grad_norm": 0.7219117283821106, "learning_rate": 1.3136431049103091e-05, "loss": 2.1615, "step": 12277 }, { "epoch": 0.41, "grad_norm": 0.7174742817878723, "learning_rate": 1.3135421771026428e-05, "loss": 2.0657, "step": 12278 }, { "epoch": 0.41, "grad_norm": 0.7144287824630737, "learning_rate": 1.3134412457527608e-05, "loss": 2.0515, "step": 12279 }, { "epoch": 0.41, "grad_norm": 0.7230450510978699, "learning_rate": 1.3133403108618032e-05, "loss": 2.0, "step": 12280 }, { "epoch": 0.41, "grad_norm": 0.7241331338882446, "learning_rate": 1.3132393724309106e-05, "loss": 2.0619, "step": 12281 }, { "epoch": 0.41, "grad_norm": 0.7462328672409058, "learning_rate": 1.3131384304612229e-05, "loss": 2.1105, "step": 12282 }, { "epoch": 0.41, "grad_norm": 0.7317826747894287, "learning_rate": 1.3130374849538809e-05, "loss": 2.1196, "step": 12283 }, { "epoch": 0.41, "grad_norm": 0.719585120677948, "learning_rate": 1.3129365359100246e-05, "loss": 2.0432, "step": 12284 }, { "epoch": 0.41, "grad_norm": 0.7141981720924377, "learning_rate": 1.312835583330795e-05, "loss": 2.0894, "step": 12285 }, { "epoch": 0.41, "grad_norm": 0.7380064725875854, "learning_rate": 1.3127346272173322e-05, "loss": 2.1083, "step": 12286 }, { "epoch": 0.41, "grad_norm": 0.7349861264228821, "learning_rate": 1.3126336675707769e-05, "loss": 2.14, "step": 12287 }, { "epoch": 0.41, "grad_norm": 0.7635281085968018, "learning_rate": 1.3125327043922695e-05, "loss": 2.1023, "step": 12288 }, { "epoch": 0.41, "grad_norm": 0.7234963178634644, "learning_rate": 1.3124317376829509e-05, "loss": 2.1169, "step": 12289 }, { "epoch": 0.41, "grad_norm": 0.7389782071113586, "learning_rate": 1.3123307674439618e-05, "loss": 2.0732, "step": 12290 }, { "epoch": 0.41, "grad_norm": 0.7262029647827148, "learning_rate": 1.3122297936764428e-05, "loss": 2.0917, "step": 12291 }, { "epoch": 0.41, "grad_norm": 0.7388591170310974, "learning_rate": 1.312128816381534e-05, "loss": 2.178, "step": 12292 }, { "epoch": 0.41, "grad_norm": 0.7062615156173706, "learning_rate": 1.3120278355603773e-05, "loss": 2.0834, "step": 12293 }, { "epoch": 0.41, "grad_norm": 0.7468334436416626, "learning_rate": 1.3119268512141128e-05, "loss": 2.1267, "step": 12294 }, { "epoch": 0.41, "grad_norm": 0.730571985244751, "learning_rate": 1.3118258633438812e-05, "loss": 2.0835, "step": 12295 }, { "epoch": 0.41, "grad_norm": 0.7390456795692444, "learning_rate": 1.3117248719508244e-05, "loss": 2.0981, "step": 12296 }, { "epoch": 0.41, "grad_norm": 0.7342653274536133, "learning_rate": 1.3116238770360823e-05, "loss": 2.1061, "step": 12297 }, { "epoch": 0.41, "grad_norm": 0.7776727080345154, "learning_rate": 1.3115228786007963e-05, "loss": 2.1455, "step": 12298 }, { "epoch": 0.41, "grad_norm": 0.7341132164001465, "learning_rate": 1.3114218766461072e-05, "loss": 2.0983, "step": 12299 }, { "epoch": 0.41, "grad_norm": 0.7398295402526855, "learning_rate": 1.3113208711731567e-05, "loss": 2.1011, "step": 12300 }, { "epoch": 0.41, "grad_norm": 0.7450608015060425, "learning_rate": 1.3112198621830852e-05, "loss": 1.9763, "step": 12301 }, { "epoch": 0.41, "grad_norm": 0.7357621788978577, "learning_rate": 1.3111188496770342e-05, "loss": 2.1598, "step": 12302 }, { "epoch": 0.41, "grad_norm": 0.7268281579017639, "learning_rate": 1.3110178336561444e-05, "loss": 2.0508, "step": 12303 }, { "epoch": 0.41, "grad_norm": 0.7181071639060974, "learning_rate": 1.310916814121558e-05, "loss": 2.0474, "step": 12304 }, { "epoch": 0.41, "grad_norm": 0.7495671510696411, "learning_rate": 1.310815791074415e-05, "loss": 2.106, "step": 12305 }, { "epoch": 0.41, "grad_norm": 0.7158242464065552, "learning_rate": 1.3107147645158576e-05, "loss": 2.1299, "step": 12306 }, { "epoch": 0.41, "grad_norm": 0.7315487861633301, "learning_rate": 1.310613734447027e-05, "loss": 2.0266, "step": 12307 }, { "epoch": 0.41, "grad_norm": 0.7496597766876221, "learning_rate": 1.3105127008690644e-05, "loss": 2.1436, "step": 12308 }, { "epoch": 0.41, "grad_norm": 0.7076642513275146, "learning_rate": 1.3104116637831114e-05, "loss": 2.1448, "step": 12309 }, { "epoch": 0.41, "grad_norm": 0.7664644122123718, "learning_rate": 1.310310623190309e-05, "loss": 2.1678, "step": 12310 }, { "epoch": 0.41, "grad_norm": 0.7084061503410339, "learning_rate": 1.3102095790917994e-05, "loss": 2.0676, "step": 12311 }, { "epoch": 0.41, "grad_norm": 0.72226881980896, "learning_rate": 1.3101085314887237e-05, "loss": 2.0899, "step": 12312 }, { "epoch": 0.41, "grad_norm": 0.8006677031517029, "learning_rate": 1.3100074803822236e-05, "loss": 2.0548, "step": 12313 }, { "epoch": 0.41, "grad_norm": 0.715962290763855, "learning_rate": 1.3099064257734404e-05, "loss": 2.1122, "step": 12314 }, { "epoch": 0.41, "grad_norm": 0.7328554391860962, "learning_rate": 1.309805367663516e-05, "loss": 2.0859, "step": 12315 }, { "epoch": 0.41, "grad_norm": 0.7648824453353882, "learning_rate": 1.3097043060535926e-05, "loss": 2.13, "step": 12316 }, { "epoch": 0.41, "grad_norm": 0.7866998314857483, "learning_rate": 1.3096032409448109e-05, "loss": 1.9937, "step": 12317 }, { "epoch": 0.41, "grad_norm": 0.7532053589820862, "learning_rate": 1.3095021723383139e-05, "loss": 2.0001, "step": 12318 }, { "epoch": 0.41, "grad_norm": 0.7566047310829163, "learning_rate": 1.3094011002352421e-05, "loss": 2.1032, "step": 12319 }, { "epoch": 0.41, "grad_norm": 0.7170406579971313, "learning_rate": 1.3093000246367385e-05, "loss": 2.061, "step": 12320 }, { "epoch": 0.41, "grad_norm": 0.7680877447128296, "learning_rate": 1.309198945543944e-05, "loss": 2.0961, "step": 12321 }, { "epoch": 0.41, "grad_norm": 0.7140366435050964, "learning_rate": 1.3090978629580013e-05, "loss": 2.1093, "step": 12322 }, { "epoch": 0.41, "grad_norm": 0.7203481793403625, "learning_rate": 1.308996776880052e-05, "loss": 2.0624, "step": 12323 }, { "epoch": 0.41, "grad_norm": 0.7167226076126099, "learning_rate": 1.3088956873112381e-05, "loss": 2.0504, "step": 12324 }, { "epoch": 0.41, "grad_norm": 0.7650008201599121, "learning_rate": 1.308794594252702e-05, "loss": 2.0954, "step": 12325 }, { "epoch": 0.41, "grad_norm": 0.7596518397331238, "learning_rate": 1.3086934977055854e-05, "loss": 2.0559, "step": 12326 }, { "epoch": 0.41, "grad_norm": 0.7558705806732178, "learning_rate": 1.3085923976710306e-05, "loss": 2.1087, "step": 12327 }, { "epoch": 0.41, "grad_norm": 0.7355661392211914, "learning_rate": 1.3084912941501797e-05, "loss": 2.0683, "step": 12328 }, { "epoch": 0.41, "grad_norm": 0.7271652221679688, "learning_rate": 1.3083901871441753e-05, "loss": 2.1263, "step": 12329 }, { "epoch": 0.41, "grad_norm": 0.7198430895805359, "learning_rate": 1.3082890766541587e-05, "loss": 2.1222, "step": 12330 }, { "epoch": 0.41, "grad_norm": 0.7599000930786133, "learning_rate": 1.3081879626812735e-05, "loss": 2.1045, "step": 12331 }, { "epoch": 0.41, "grad_norm": 0.7240828275680542, "learning_rate": 1.3080868452266608e-05, "loss": 2.0743, "step": 12332 }, { "epoch": 0.41, "grad_norm": 0.7350494265556335, "learning_rate": 1.3079857242914635e-05, "loss": 2.1942, "step": 12333 }, { "epoch": 0.41, "grad_norm": 0.7203738689422607, "learning_rate": 1.3078845998768241e-05, "loss": 2.0572, "step": 12334 }, { "epoch": 0.41, "grad_norm": 0.7287986278533936, "learning_rate": 1.307783471983885e-05, "loss": 2.0391, "step": 12335 }, { "epoch": 0.41, "grad_norm": 0.7205066680908203, "learning_rate": 1.3076823406137885e-05, "loss": 2.0729, "step": 12336 }, { "epoch": 0.41, "grad_norm": 0.7623180150985718, "learning_rate": 1.3075812057676771e-05, "loss": 2.0729, "step": 12337 }, { "epoch": 0.41, "grad_norm": 0.7264131903648376, "learning_rate": 1.3074800674466936e-05, "loss": 2.0602, "step": 12338 }, { "epoch": 0.41, "grad_norm": 0.7383767366409302, "learning_rate": 1.3073789256519806e-05, "loss": 2.1407, "step": 12339 }, { "epoch": 0.41, "grad_norm": 0.7553867697715759, "learning_rate": 1.3072777803846808e-05, "loss": 2.171, "step": 12340 }, { "epoch": 0.41, "grad_norm": 0.7256442308425903, "learning_rate": 1.3071766316459362e-05, "loss": 2.1121, "step": 12341 }, { "epoch": 0.41, "grad_norm": 0.7372667193412781, "learning_rate": 1.3070754794368907e-05, "loss": 2.0319, "step": 12342 }, { "epoch": 0.41, "grad_norm": 0.7469893097877502, "learning_rate": 1.306974323758686e-05, "loss": 2.0926, "step": 12343 }, { "epoch": 0.41, "grad_norm": 0.7320919036865234, "learning_rate": 1.3068731646124652e-05, "loss": 2.0884, "step": 12344 }, { "epoch": 0.41, "grad_norm": 0.743634819984436, "learning_rate": 1.3067720019993715e-05, "loss": 2.0944, "step": 12345 }, { "epoch": 0.41, "grad_norm": 0.7345340847969055, "learning_rate": 1.3066708359205473e-05, "loss": 2.0338, "step": 12346 }, { "epoch": 0.41, "grad_norm": 0.7416863441467285, "learning_rate": 1.3065696663771359e-05, "loss": 2.1609, "step": 12347 }, { "epoch": 0.41, "grad_norm": 0.7155302166938782, "learning_rate": 1.30646849337028e-05, "loss": 2.1, "step": 12348 }, { "epoch": 0.41, "grad_norm": 0.7079076766967773, "learning_rate": 1.3063673169011226e-05, "loss": 2.0907, "step": 12349 }, { "epoch": 0.41, "grad_norm": 0.728448212146759, "learning_rate": 1.3062661369708068e-05, "loss": 2.0345, "step": 12350 }, { "epoch": 0.41, "grad_norm": 0.7596637010574341, "learning_rate": 1.306164953580476e-05, "loss": 2.0966, "step": 12351 }, { "epoch": 0.41, "grad_norm": 0.7685812711715698, "learning_rate": 1.3060637667312723e-05, "loss": 2.1784, "step": 12352 }, { "epoch": 0.41, "grad_norm": 0.7350772023200989, "learning_rate": 1.3059625764243403e-05, "loss": 2.0675, "step": 12353 }, { "epoch": 0.41, "grad_norm": 0.7163321375846863, "learning_rate": 1.305861382660822e-05, "loss": 2.0769, "step": 12354 }, { "epoch": 0.41, "grad_norm": 0.7666007280349731, "learning_rate": 1.3057601854418612e-05, "loss": 2.0864, "step": 12355 }, { "epoch": 0.41, "grad_norm": 0.7470929026603699, "learning_rate": 1.3056589847686008e-05, "loss": 2.1617, "step": 12356 }, { "epoch": 0.41, "grad_norm": 0.7122946381568909, "learning_rate": 1.3055577806421844e-05, "loss": 2.1232, "step": 12357 }, { "epoch": 0.41, "grad_norm": 0.7549747824668884, "learning_rate": 1.3054565730637554e-05, "loss": 2.1978, "step": 12358 }, { "epoch": 0.41, "grad_norm": 0.7279300689697266, "learning_rate": 1.305355362034457e-05, "loss": 2.0166, "step": 12359 }, { "epoch": 0.41, "grad_norm": 0.7320740818977356, "learning_rate": 1.3052541475554325e-05, "loss": 2.1422, "step": 12360 }, { "epoch": 0.41, "grad_norm": 0.7508188486099243, "learning_rate": 1.3051529296278259e-05, "loss": 1.9954, "step": 12361 }, { "epoch": 0.41, "grad_norm": 0.7478777170181274, "learning_rate": 1.3050517082527802e-05, "loss": 2.0834, "step": 12362 }, { "epoch": 0.41, "grad_norm": 0.722675621509552, "learning_rate": 1.3049504834314389e-05, "loss": 2.0601, "step": 12363 }, { "epoch": 0.41, "grad_norm": 0.7712547779083252, "learning_rate": 1.3048492551649459e-05, "loss": 2.0442, "step": 12364 }, { "epoch": 0.41, "grad_norm": 0.7088399529457092, "learning_rate": 1.3047480234544447e-05, "loss": 2.127, "step": 12365 }, { "epoch": 0.41, "grad_norm": 0.7515777945518494, "learning_rate": 1.3046467883010788e-05, "loss": 2.0906, "step": 12366 }, { "epoch": 0.41, "grad_norm": 0.7725820541381836, "learning_rate": 1.304545549705992e-05, "loss": 2.1274, "step": 12367 }, { "epoch": 0.41, "grad_norm": 0.732892632484436, "learning_rate": 1.3044443076703281e-05, "loss": 2.1339, "step": 12368 }, { "epoch": 0.41, "grad_norm": 0.739269495010376, "learning_rate": 1.304343062195231e-05, "loss": 2.066, "step": 12369 }, { "epoch": 0.41, "grad_norm": 0.757472813129425, "learning_rate": 1.3042418132818442e-05, "loss": 2.121, "step": 12370 }, { "epoch": 0.41, "grad_norm": 0.7038738131523132, "learning_rate": 1.3041405609313117e-05, "loss": 2.0845, "step": 12371 }, { "epoch": 0.41, "grad_norm": 0.7386584877967834, "learning_rate": 1.3040393051447776e-05, "loss": 2.1328, "step": 12372 }, { "epoch": 0.41, "grad_norm": 0.771976888179779, "learning_rate": 1.3039380459233855e-05, "loss": 2.0488, "step": 12373 }, { "epoch": 0.41, "grad_norm": 0.7317124009132385, "learning_rate": 1.3038367832682793e-05, "loss": 2.0415, "step": 12374 }, { "epoch": 0.41, "grad_norm": 0.7550515532493591, "learning_rate": 1.3037355171806034e-05, "loss": 2.1163, "step": 12375 }, { "epoch": 0.41, "grad_norm": 0.7270771265029907, "learning_rate": 1.3036342476615017e-05, "loss": 2.0651, "step": 12376 }, { "epoch": 0.41, "grad_norm": 0.719088613986969, "learning_rate": 1.3035329747121177e-05, "loss": 2.0914, "step": 12377 }, { "epoch": 0.41, "grad_norm": 0.7338764667510986, "learning_rate": 1.3034316983335966e-05, "loss": 2.0441, "step": 12378 }, { "epoch": 0.41, "grad_norm": 0.7195775508880615, "learning_rate": 1.3033304185270819e-05, "loss": 2.1247, "step": 12379 }, { "epoch": 0.41, "grad_norm": 0.7105481028556824, "learning_rate": 1.3032291352937176e-05, "loss": 2.115, "step": 12380 }, { "epoch": 0.41, "grad_norm": 0.7122864723205566, "learning_rate": 1.3031278486346484e-05, "loss": 2.1242, "step": 12381 }, { "epoch": 0.41, "grad_norm": 0.7632964253425598, "learning_rate": 1.3030265585510185e-05, "loss": 2.0858, "step": 12382 }, { "epoch": 0.41, "grad_norm": 0.7272965312004089, "learning_rate": 1.3029252650439722e-05, "loss": 2.1293, "step": 12383 }, { "epoch": 0.41, "grad_norm": 0.7279636263847351, "learning_rate": 1.3028239681146537e-05, "loss": 2.1043, "step": 12384 }, { "epoch": 0.41, "grad_norm": 0.7214242815971375, "learning_rate": 1.3027226677642073e-05, "loss": 2.1085, "step": 12385 }, { "epoch": 0.41, "grad_norm": 0.7254440784454346, "learning_rate": 1.3026213639937779e-05, "loss": 2.0777, "step": 12386 }, { "epoch": 0.41, "grad_norm": 0.8031516075134277, "learning_rate": 1.3025200568045096e-05, "loss": 2.1154, "step": 12387 }, { "epoch": 0.41, "grad_norm": 0.7618164420127869, "learning_rate": 1.3024187461975468e-05, "loss": 2.1237, "step": 12388 }, { "epoch": 0.41, "grad_norm": 0.7552929520606995, "learning_rate": 1.3023174321740344e-05, "loss": 2.044, "step": 12389 }, { "epoch": 0.41, "grad_norm": 0.7681983113288879, "learning_rate": 1.3022161147351168e-05, "loss": 2.0774, "step": 12390 }, { "epoch": 0.41, "grad_norm": 0.7132359147071838, "learning_rate": 1.3021147938819387e-05, "loss": 2.0364, "step": 12391 }, { "epoch": 0.41, "grad_norm": 0.75300133228302, "learning_rate": 1.3020134696156448e-05, "loss": 2.0942, "step": 12392 }, { "epoch": 0.41, "grad_norm": 0.7216118574142456, "learning_rate": 1.3019121419373794e-05, "loss": 2.1265, "step": 12393 }, { "epoch": 0.41, "grad_norm": 0.7107651829719543, "learning_rate": 1.301810810848288e-05, "loss": 2.1309, "step": 12394 }, { "epoch": 0.41, "grad_norm": 0.7452234625816345, "learning_rate": 1.3017094763495148e-05, "loss": 2.0988, "step": 12395 }, { "epoch": 0.41, "grad_norm": 0.7183559536933899, "learning_rate": 1.3016081384422044e-05, "loss": 2.0499, "step": 12396 }, { "epoch": 0.41, "grad_norm": 0.7661429643630981, "learning_rate": 1.3015067971275023e-05, "loss": 2.1108, "step": 12397 }, { "epoch": 0.41, "grad_norm": 0.7464975714683533, "learning_rate": 1.3014054524065531e-05, "loss": 2.1354, "step": 12398 }, { "epoch": 0.41, "grad_norm": 0.7326420545578003, "learning_rate": 1.3013041042805014e-05, "loss": 2.0899, "step": 12399 }, { "epoch": 0.41, "grad_norm": 0.7147065997123718, "learning_rate": 1.301202752750493e-05, "loss": 2.0265, "step": 12400 }, { "epoch": 0.41, "grad_norm": 0.7486024498939514, "learning_rate": 1.301101397817672e-05, "loss": 2.1461, "step": 12401 }, { "epoch": 0.41, "grad_norm": 0.739072322845459, "learning_rate": 1.3010000394831846e-05, "loss": 2.0902, "step": 12402 }, { "epoch": 0.41, "grad_norm": 0.7615841627120972, "learning_rate": 1.3008986777481744e-05, "loss": 2.131, "step": 12403 }, { "epoch": 0.41, "grad_norm": 0.7553632259368896, "learning_rate": 1.3007973126137876e-05, "loss": 2.1382, "step": 12404 }, { "epoch": 0.41, "grad_norm": 0.7141990065574646, "learning_rate": 1.3006959440811691e-05, "loss": 2.0961, "step": 12405 }, { "epoch": 0.41, "grad_norm": 0.721104085445404, "learning_rate": 1.3005945721514642e-05, "loss": 2.1062, "step": 12406 }, { "epoch": 0.41, "grad_norm": 0.7564157247543335, "learning_rate": 1.3004931968258174e-05, "loss": 2.0973, "step": 12407 }, { "epoch": 0.41, "grad_norm": 0.7303712964057922, "learning_rate": 1.300391818105375e-05, "loss": 2.0641, "step": 12408 }, { "epoch": 0.41, "grad_norm": 0.7411707043647766, "learning_rate": 1.3002904359912815e-05, "loss": 2.0741, "step": 12409 }, { "epoch": 0.41, "grad_norm": 0.7584968209266663, "learning_rate": 1.300189050484683e-05, "loss": 2.0971, "step": 12410 }, { "epoch": 0.41, "grad_norm": 0.7540577054023743, "learning_rate": 1.3000876615867246e-05, "loss": 2.0445, "step": 12411 }, { "epoch": 0.41, "grad_norm": 0.7482898831367493, "learning_rate": 1.2999862692985515e-05, "loss": 2.0558, "step": 12412 }, { "epoch": 0.41, "grad_norm": 0.7182303667068481, "learning_rate": 1.2998848736213094e-05, "loss": 2.0866, "step": 12413 }, { "epoch": 0.41, "grad_norm": 0.7315609455108643, "learning_rate": 1.2997834745561437e-05, "loss": 2.0639, "step": 12414 }, { "epoch": 0.41, "grad_norm": 0.7472220063209534, "learning_rate": 1.2996820721042001e-05, "loss": 2.1198, "step": 12415 }, { "epoch": 0.41, "grad_norm": 0.7642606496810913, "learning_rate": 1.299580666266624e-05, "loss": 2.1476, "step": 12416 }, { "epoch": 0.41, "grad_norm": 0.7121624946594238, "learning_rate": 1.2994792570445612e-05, "loss": 2.1061, "step": 12417 }, { "epoch": 0.41, "grad_norm": 0.7423768639564514, "learning_rate": 1.299377844439157e-05, "loss": 2.1004, "step": 12418 }, { "epoch": 0.41, "grad_norm": 0.7726824283599854, "learning_rate": 1.2992764284515575e-05, "loss": 2.0773, "step": 12419 }, { "epoch": 0.41, "grad_norm": 0.7338455319404602, "learning_rate": 1.2991750090829082e-05, "loss": 2.1518, "step": 12420 }, { "epoch": 0.41, "grad_norm": 0.7193387150764465, "learning_rate": 1.2990735863343552e-05, "loss": 2.1054, "step": 12421 }, { "epoch": 0.41, "grad_norm": 0.7375040054321289, "learning_rate": 1.2989721602070442e-05, "loss": 2.1476, "step": 12422 }, { "epoch": 0.41, "grad_norm": 0.7037714719772339, "learning_rate": 1.2988707307021207e-05, "loss": 2.0779, "step": 12423 }, { "epoch": 0.41, "grad_norm": 0.722993791103363, "learning_rate": 1.298769297820731e-05, "loss": 2.0913, "step": 12424 }, { "epoch": 0.41, "grad_norm": 0.7318497896194458, "learning_rate": 1.2986678615640208e-05, "loss": 2.1353, "step": 12425 }, { "epoch": 0.41, "grad_norm": 0.7400106191635132, "learning_rate": 1.298566421933136e-05, "loss": 2.107, "step": 12426 }, { "epoch": 0.41, "grad_norm": 0.7406516075134277, "learning_rate": 1.298464978929223e-05, "loss": 2.1031, "step": 12427 }, { "epoch": 0.41, "grad_norm": 0.7219787836074829, "learning_rate": 1.2983635325534273e-05, "loss": 2.1066, "step": 12428 }, { "epoch": 0.41, "grad_norm": 0.7212991714477539, "learning_rate": 1.2982620828068955e-05, "loss": 2.1884, "step": 12429 }, { "epoch": 0.41, "grad_norm": 0.7395643591880798, "learning_rate": 1.2981606296907733e-05, "loss": 2.0887, "step": 12430 }, { "epoch": 0.41, "grad_norm": 0.7339820861816406, "learning_rate": 1.2980591732062071e-05, "loss": 2.1066, "step": 12431 }, { "epoch": 0.41, "grad_norm": 0.7472740411758423, "learning_rate": 1.297957713354343e-05, "loss": 2.1136, "step": 12432 }, { "epoch": 0.41, "grad_norm": 0.7241846323013306, "learning_rate": 1.2978562501363276e-05, "loss": 2.0129, "step": 12433 }, { "epoch": 0.41, "grad_norm": 0.7235549092292786, "learning_rate": 1.2977547835533065e-05, "loss": 2.0751, "step": 12434 }, { "epoch": 0.41, "grad_norm": 0.7627942562103271, "learning_rate": 1.2976533136064266e-05, "loss": 2.0619, "step": 12435 }, { "epoch": 0.41, "grad_norm": 0.7333987355232239, "learning_rate": 1.297551840296834e-05, "loss": 2.1492, "step": 12436 }, { "epoch": 0.41, "grad_norm": 0.7314045429229736, "learning_rate": 1.2974503636256748e-05, "loss": 2.0264, "step": 12437 }, { "epoch": 0.41, "grad_norm": 0.7258691787719727, "learning_rate": 1.2973488835940957e-05, "loss": 2.0911, "step": 12438 }, { "epoch": 0.41, "grad_norm": 0.7175994515419006, "learning_rate": 1.2972474002032434e-05, "loss": 2.0402, "step": 12439 }, { "epoch": 0.41, "grad_norm": 0.7267522215843201, "learning_rate": 1.2971459134542641e-05, "loss": 2.1262, "step": 12440 }, { "epoch": 0.41, "grad_norm": 0.7042745351791382, "learning_rate": 1.2970444233483044e-05, "loss": 2.1337, "step": 12441 }, { "epoch": 0.41, "grad_norm": 0.7178748846054077, "learning_rate": 1.2969429298865109e-05, "loss": 2.0386, "step": 12442 }, { "epoch": 0.41, "grad_norm": 0.7408658266067505, "learning_rate": 1.2968414330700303e-05, "loss": 2.0807, "step": 12443 }, { "epoch": 0.41, "grad_norm": 0.7536001205444336, "learning_rate": 1.2967399329000092e-05, "loss": 2.0797, "step": 12444 }, { "epoch": 0.41, "grad_norm": 0.7605969309806824, "learning_rate": 1.2966384293775937e-05, "loss": 2.0311, "step": 12445 }, { "epoch": 0.41, "grad_norm": 0.7548078298568726, "learning_rate": 1.2965369225039318e-05, "loss": 2.1059, "step": 12446 }, { "epoch": 0.41, "grad_norm": 0.7265551686286926, "learning_rate": 1.2964354122801695e-05, "loss": 2.0705, "step": 12447 }, { "epoch": 0.41, "grad_norm": 0.7274618148803711, "learning_rate": 1.2963338987074531e-05, "loss": 2.0621, "step": 12448 }, { "epoch": 0.41, "grad_norm": 0.7528622150421143, "learning_rate": 1.2962323817869304e-05, "loss": 2.1558, "step": 12449 }, { "epoch": 0.41, "grad_norm": 0.7333722114562988, "learning_rate": 1.2961308615197476e-05, "loss": 2.0447, "step": 12450 }, { "epoch": 0.41, "grad_norm": 0.7431352138519287, "learning_rate": 1.2960293379070521e-05, "loss": 2.096, "step": 12451 }, { "epoch": 0.41, "grad_norm": 0.7008032202720642, "learning_rate": 1.2959278109499904e-05, "loss": 2.0824, "step": 12452 }, { "epoch": 0.41, "grad_norm": 0.7102967500686646, "learning_rate": 1.2958262806497097e-05, "loss": 2.0613, "step": 12453 }, { "epoch": 0.41, "grad_norm": 0.7483636736869812, "learning_rate": 1.2957247470073572e-05, "loss": 2.1896, "step": 12454 }, { "epoch": 0.41, "grad_norm": 0.7122631669044495, "learning_rate": 1.2956232100240802e-05, "loss": 2.1096, "step": 12455 }, { "epoch": 0.41, "grad_norm": 0.7482119798660278, "learning_rate": 1.2955216697010249e-05, "loss": 2.0603, "step": 12456 }, { "epoch": 0.41, "grad_norm": 0.7347743511199951, "learning_rate": 1.2954201260393391e-05, "loss": 2.0582, "step": 12457 }, { "epoch": 0.41, "grad_norm": 0.7447862029075623, "learning_rate": 1.2953185790401699e-05, "loss": 2.0154, "step": 12458 }, { "epoch": 0.41, "grad_norm": 0.7481386065483093, "learning_rate": 1.2952170287046644e-05, "loss": 2.101, "step": 12459 }, { "epoch": 0.41, "grad_norm": 0.7137046456336975, "learning_rate": 1.2951154750339701e-05, "loss": 2.0875, "step": 12460 }, { "epoch": 0.41, "grad_norm": 0.7311532497406006, "learning_rate": 1.2950139180292338e-05, "loss": 2.0994, "step": 12461 }, { "epoch": 0.41, "grad_norm": 0.7358998656272888, "learning_rate": 1.2949123576916033e-05, "loss": 2.1244, "step": 12462 }, { "epoch": 0.41, "grad_norm": 0.7009521126747131, "learning_rate": 1.2948107940222258e-05, "loss": 2.0667, "step": 12463 }, { "epoch": 0.41, "grad_norm": 0.7382879257202148, "learning_rate": 1.2947092270222487e-05, "loss": 2.1183, "step": 12464 }, { "epoch": 0.41, "grad_norm": 0.7537720203399658, "learning_rate": 1.2946076566928197e-05, "loss": 2.089, "step": 12465 }, { "epoch": 0.41, "grad_norm": 0.7354782223701477, "learning_rate": 1.294506083035086e-05, "loss": 2.0513, "step": 12466 }, { "epoch": 0.41, "grad_norm": 0.7709543704986572, "learning_rate": 1.2944045060501949e-05, "loss": 2.1218, "step": 12467 }, { "epoch": 0.41, "grad_norm": 0.7562302947044373, "learning_rate": 1.2943029257392946e-05, "loss": 2.1225, "step": 12468 }, { "epoch": 0.41, "grad_norm": 0.7235966324806213, "learning_rate": 1.294201342103532e-05, "loss": 2.0557, "step": 12469 }, { "epoch": 0.41, "grad_norm": 0.7397311329841614, "learning_rate": 1.294099755144055e-05, "loss": 2.1216, "step": 12470 }, { "epoch": 0.41, "grad_norm": 0.7242834568023682, "learning_rate": 1.2939981648620117e-05, "loss": 2.0255, "step": 12471 }, { "epoch": 0.41, "grad_norm": 0.7577300071716309, "learning_rate": 1.2938965712585491e-05, "loss": 2.1199, "step": 12472 }, { "epoch": 0.41, "grad_norm": 0.7869002819061279, "learning_rate": 1.2937949743348155e-05, "loss": 2.0693, "step": 12473 }, { "epoch": 0.42, "grad_norm": 0.772146463394165, "learning_rate": 1.2936933740919582e-05, "loss": 2.0179, "step": 12474 }, { "epoch": 0.42, "grad_norm": 0.7702578902244568, "learning_rate": 1.2935917705311254e-05, "loss": 2.0312, "step": 12475 }, { "epoch": 0.42, "grad_norm": 0.7136684060096741, "learning_rate": 1.293490163653465e-05, "loss": 2.0224, "step": 12476 }, { "epoch": 0.42, "grad_norm": 0.7049131393432617, "learning_rate": 1.2933885534601247e-05, "loss": 2.1108, "step": 12477 }, { "epoch": 0.42, "grad_norm": 0.7300074696540833, "learning_rate": 1.2932869399522524e-05, "loss": 2.0606, "step": 12478 }, { "epoch": 0.42, "grad_norm": 0.7547006607055664, "learning_rate": 1.2931853231309962e-05, "loss": 2.1243, "step": 12479 }, { "epoch": 0.42, "grad_norm": 0.7452551126480103, "learning_rate": 1.2930837029975039e-05, "loss": 2.1121, "step": 12480 }, { "epoch": 0.42, "grad_norm": 0.7739254236221313, "learning_rate": 1.2929820795529238e-05, "loss": 2.135, "step": 12481 }, { "epoch": 0.42, "grad_norm": 0.7449448108673096, "learning_rate": 1.2928804527984039e-05, "loss": 2.0873, "step": 12482 }, { "epoch": 0.42, "grad_norm": 0.7372896075248718, "learning_rate": 1.292778822735092e-05, "loss": 2.0538, "step": 12483 }, { "epoch": 0.42, "grad_norm": 0.7410143613815308, "learning_rate": 1.2926771893641367e-05, "loss": 2.0869, "step": 12484 }, { "epoch": 0.42, "grad_norm": 0.7250874042510986, "learning_rate": 1.2925755526866861e-05, "loss": 2.1315, "step": 12485 }, { "epoch": 0.42, "grad_norm": 0.7346044778823853, "learning_rate": 1.2924739127038884e-05, "loss": 2.0274, "step": 12486 }, { "epoch": 0.42, "grad_norm": 0.7323034405708313, "learning_rate": 1.2923722694168918e-05, "loss": 2.1207, "step": 12487 }, { "epoch": 0.42, "grad_norm": 0.7275946140289307, "learning_rate": 1.292270622826845e-05, "loss": 2.0034, "step": 12488 }, { "epoch": 0.42, "grad_norm": 0.7204136848449707, "learning_rate": 1.2921689729348951e-05, "loss": 2.0823, "step": 12489 }, { "epoch": 0.42, "grad_norm": 0.7274395823478699, "learning_rate": 1.2920673197421922e-05, "loss": 2.075, "step": 12490 }, { "epoch": 0.42, "grad_norm": 0.7710362672805786, "learning_rate": 1.2919656632498837e-05, "loss": 2.0882, "step": 12491 }, { "epoch": 0.42, "grad_norm": 0.752733051776886, "learning_rate": 1.2918640034591179e-05, "loss": 2.0999, "step": 12492 }, { "epoch": 0.42, "grad_norm": 0.7112500071525574, "learning_rate": 1.291762340371044e-05, "loss": 2.0026, "step": 12493 }, { "epoch": 0.42, "grad_norm": 0.774886965751648, "learning_rate": 1.2916606739868098e-05, "loss": 2.1162, "step": 12494 }, { "epoch": 0.42, "grad_norm": 0.7454021573066711, "learning_rate": 1.2915590043075647e-05, "loss": 2.0428, "step": 12495 }, { "epoch": 0.42, "grad_norm": 0.7235939502716064, "learning_rate": 1.2914573313344568e-05, "loss": 2.1094, "step": 12496 }, { "epoch": 0.42, "grad_norm": 0.7358099818229675, "learning_rate": 1.2913556550686344e-05, "loss": 2.1168, "step": 12497 }, { "epoch": 0.42, "grad_norm": 0.7191984057426453, "learning_rate": 1.2912539755112468e-05, "loss": 2.0134, "step": 12498 }, { "epoch": 0.42, "grad_norm": 0.7245826125144958, "learning_rate": 1.2911522926634427e-05, "loss": 2.1307, "step": 12499 }, { "epoch": 0.42, "grad_norm": 0.749428391456604, "learning_rate": 1.2910506065263701e-05, "loss": 2.1134, "step": 12500 }, { "epoch": 0.42, "grad_norm": 0.7430476546287537, "learning_rate": 1.2909489171011789e-05, "loss": 2.0918, "step": 12501 }, { "epoch": 0.42, "grad_norm": 0.7384743690490723, "learning_rate": 1.2908472243890166e-05, "loss": 2.0688, "step": 12502 }, { "epoch": 0.42, "grad_norm": 0.7587735652923584, "learning_rate": 1.2907455283910333e-05, "loss": 2.0363, "step": 12503 }, { "epoch": 0.42, "grad_norm": 0.7803107500076294, "learning_rate": 1.2906438291083777e-05, "loss": 1.9772, "step": 12504 }, { "epoch": 0.42, "grad_norm": 0.7342994213104248, "learning_rate": 1.2905421265421978e-05, "loss": 2.1213, "step": 12505 }, { "epoch": 0.42, "grad_norm": 0.7121531963348389, "learning_rate": 1.2904404206936438e-05, "loss": 2.0547, "step": 12506 }, { "epoch": 0.42, "grad_norm": 0.7410596013069153, "learning_rate": 1.2903387115638639e-05, "loss": 2.1044, "step": 12507 }, { "epoch": 0.42, "grad_norm": 0.7894978523254395, "learning_rate": 1.2902369991540074e-05, "loss": 2.0863, "step": 12508 }, { "epoch": 0.42, "grad_norm": 0.7759518027305603, "learning_rate": 1.2901352834652233e-05, "loss": 2.0598, "step": 12509 }, { "epoch": 0.42, "grad_norm": 0.6977453827857971, "learning_rate": 1.2900335644986607e-05, "loss": 2.0912, "step": 12510 }, { "epoch": 0.42, "grad_norm": 0.7241869568824768, "learning_rate": 1.2899318422554693e-05, "loss": 2.0525, "step": 12511 }, { "epoch": 0.42, "grad_norm": 0.7618778944015503, "learning_rate": 1.2898301167367977e-05, "loss": 2.1334, "step": 12512 }, { "epoch": 0.42, "grad_norm": 0.7546077370643616, "learning_rate": 1.2897283879437949e-05, "loss": 2.1207, "step": 12513 }, { "epoch": 0.42, "grad_norm": 0.7609128355979919, "learning_rate": 1.289626655877611e-05, "loss": 2.1855, "step": 12514 }, { "epoch": 0.42, "grad_norm": 0.7545419335365295, "learning_rate": 1.2895249205393947e-05, "loss": 2.07, "step": 12515 }, { "epoch": 0.42, "grad_norm": 0.7712535858154297, "learning_rate": 1.2894231819302953e-05, "loss": 2.0833, "step": 12516 }, { "epoch": 0.42, "grad_norm": 0.7289283275604248, "learning_rate": 1.289321440051463e-05, "loss": 2.078, "step": 12517 }, { "epoch": 0.42, "grad_norm": 0.7323263883590698, "learning_rate": 1.2892196949040463e-05, "loss": 2.126, "step": 12518 }, { "epoch": 0.42, "grad_norm": 0.7540423274040222, "learning_rate": 1.289117946489195e-05, "loss": 2.1426, "step": 12519 }, { "epoch": 0.42, "grad_norm": 0.7753336429595947, "learning_rate": 1.2890161948080587e-05, "loss": 2.1253, "step": 12520 }, { "epoch": 0.42, "grad_norm": 0.7441282272338867, "learning_rate": 1.2889144398617866e-05, "loss": 2.0572, "step": 12521 }, { "epoch": 0.42, "grad_norm": 0.8003175854682922, "learning_rate": 1.2888126816515286e-05, "loss": 2.1587, "step": 12522 }, { "epoch": 0.42, "grad_norm": 0.7630079984664917, "learning_rate": 1.288710920178434e-05, "loss": 2.1268, "step": 12523 }, { "epoch": 0.42, "grad_norm": 0.7639109492301941, "learning_rate": 1.2886091554436528e-05, "loss": 2.1387, "step": 12524 }, { "epoch": 0.42, "grad_norm": 0.7448859810829163, "learning_rate": 1.2885073874483345e-05, "loss": 2.069, "step": 12525 }, { "epoch": 0.42, "grad_norm": 0.7317858338356018, "learning_rate": 1.288405616193629e-05, "loss": 2.1252, "step": 12526 }, { "epoch": 0.42, "grad_norm": 0.7437266707420349, "learning_rate": 1.2883038416806852e-05, "loss": 2.1007, "step": 12527 }, { "epoch": 0.42, "grad_norm": 0.7373574376106262, "learning_rate": 1.2882020639106543e-05, "loss": 2.0574, "step": 12528 }, { "epoch": 0.42, "grad_norm": 0.7733877301216125, "learning_rate": 1.2881002828846851e-05, "loss": 2.1218, "step": 12529 }, { "epoch": 0.42, "grad_norm": 0.7624263763427734, "learning_rate": 1.2879984986039278e-05, "loss": 2.1774, "step": 12530 }, { "epoch": 0.42, "grad_norm": 0.7640353441238403, "learning_rate": 1.2878967110695322e-05, "loss": 2.0998, "step": 12531 }, { "epoch": 0.42, "grad_norm": 0.7497876286506653, "learning_rate": 1.2877949202826483e-05, "loss": 2.1312, "step": 12532 }, { "epoch": 0.42, "grad_norm": 0.7114462852478027, "learning_rate": 1.2876931262444262e-05, "loss": 2.0907, "step": 12533 }, { "epoch": 0.42, "grad_norm": 0.7434590458869934, "learning_rate": 1.2875913289560153e-05, "loss": 2.125, "step": 12534 }, { "epoch": 0.42, "grad_norm": 0.7647614479064941, "learning_rate": 1.2874895284185665e-05, "loss": 2.1343, "step": 12535 }, { "epoch": 0.42, "grad_norm": 0.7234348654747009, "learning_rate": 1.2873877246332293e-05, "loss": 2.1097, "step": 12536 }, { "epoch": 0.42, "grad_norm": 0.7357848286628723, "learning_rate": 1.2872859176011545e-05, "loss": 2.0357, "step": 12537 }, { "epoch": 0.42, "grad_norm": 0.7342125177383423, "learning_rate": 1.2871841073234909e-05, "loss": 2.0857, "step": 12538 }, { "epoch": 0.42, "grad_norm": 0.7367901802062988, "learning_rate": 1.2870822938013905e-05, "loss": 2.0725, "step": 12539 }, { "epoch": 0.42, "grad_norm": 0.7597434520721436, "learning_rate": 1.2869804770360022e-05, "loss": 2.0708, "step": 12540 }, { "epoch": 0.42, "grad_norm": 0.7530860900878906, "learning_rate": 1.2868786570284764e-05, "loss": 2.1025, "step": 12541 }, { "epoch": 0.42, "grad_norm": 0.7283613681793213, "learning_rate": 1.286776833779964e-05, "loss": 2.0568, "step": 12542 }, { "epoch": 0.42, "grad_norm": 0.7463346719741821, "learning_rate": 1.2866750072916147e-05, "loss": 2.1213, "step": 12543 }, { "epoch": 0.42, "grad_norm": 0.7241556644439697, "learning_rate": 1.2865731775645794e-05, "loss": 2.0233, "step": 12544 }, { "epoch": 0.42, "grad_norm": 0.7484334111213684, "learning_rate": 1.2864713446000082e-05, "loss": 2.1057, "step": 12545 }, { "epoch": 0.42, "grad_norm": 0.7291292548179626, "learning_rate": 1.2863695083990515e-05, "loss": 2.0629, "step": 12546 }, { "epoch": 0.42, "grad_norm": 0.7417165637016296, "learning_rate": 1.2862676689628602e-05, "loss": 2.0462, "step": 12547 }, { "epoch": 0.42, "grad_norm": 0.7339032292366028, "learning_rate": 1.2861658262925846e-05, "loss": 2.0988, "step": 12548 }, { "epoch": 0.42, "grad_norm": 0.7546550035476685, "learning_rate": 1.286063980389375e-05, "loss": 2.074, "step": 12549 }, { "epoch": 0.42, "grad_norm": 0.7336243391036987, "learning_rate": 1.2859621312543821e-05, "loss": 2.0354, "step": 12550 }, { "epoch": 0.42, "grad_norm": 0.7206490635871887, "learning_rate": 1.2858602788887569e-05, "loss": 2.0985, "step": 12551 }, { "epoch": 0.42, "grad_norm": 0.7196187973022461, "learning_rate": 1.2857584232936498e-05, "loss": 2.1298, "step": 12552 }, { "epoch": 0.42, "grad_norm": 0.7507184743881226, "learning_rate": 1.2856565644702112e-05, "loss": 2.0732, "step": 12553 }, { "epoch": 0.42, "grad_norm": 0.7262274026870728, "learning_rate": 1.2855547024195922e-05, "loss": 2.0221, "step": 12554 }, { "epoch": 0.42, "grad_norm": 0.703575849533081, "learning_rate": 1.2854528371429438e-05, "loss": 2.0877, "step": 12555 }, { "epoch": 0.42, "grad_norm": 0.7684732675552368, "learning_rate": 1.2853509686414163e-05, "loss": 2.1358, "step": 12556 }, { "epoch": 0.42, "grad_norm": 0.7479212284088135, "learning_rate": 1.2852490969161609e-05, "loss": 2.0925, "step": 12557 }, { "epoch": 0.42, "grad_norm": 0.7670333385467529, "learning_rate": 1.2851472219683283e-05, "loss": 2.1338, "step": 12558 }, { "epoch": 0.42, "grad_norm": 0.7229040861129761, "learning_rate": 1.2850453437990698e-05, "loss": 2.1174, "step": 12559 }, { "epoch": 0.42, "grad_norm": 0.7548657655715942, "learning_rate": 1.2849434624095357e-05, "loss": 2.0903, "step": 12560 }, { "epoch": 0.42, "grad_norm": 0.7260783314704895, "learning_rate": 1.2848415778008776e-05, "loss": 2.1035, "step": 12561 }, { "epoch": 0.42, "grad_norm": 0.767126202583313, "learning_rate": 1.284739689974246e-05, "loss": 2.1313, "step": 12562 }, { "epoch": 0.42, "grad_norm": 0.7501829862594604, "learning_rate": 1.2846377989307923e-05, "loss": 2.0471, "step": 12563 }, { "epoch": 0.42, "grad_norm": 0.7142001986503601, "learning_rate": 1.2845359046716676e-05, "loss": 2.1107, "step": 12564 }, { "epoch": 0.42, "grad_norm": 0.7691676616668701, "learning_rate": 1.2844340071980232e-05, "loss": 2.124, "step": 12565 }, { "epoch": 0.42, "grad_norm": 0.7425621747970581, "learning_rate": 1.28433210651101e-05, "loss": 2.0082, "step": 12566 }, { "epoch": 0.42, "grad_norm": 0.7841475605964661, "learning_rate": 1.2842302026117793e-05, "loss": 2.0725, "step": 12567 }, { "epoch": 0.42, "grad_norm": 0.726830244064331, "learning_rate": 1.2841282955014819e-05, "loss": 2.1484, "step": 12568 }, { "epoch": 0.42, "grad_norm": 0.717081606388092, "learning_rate": 1.28402638518127e-05, "loss": 2.1174, "step": 12569 }, { "epoch": 0.42, "grad_norm": 0.7229527831077576, "learning_rate": 1.2839244716522947e-05, "loss": 2.1188, "step": 12570 }, { "epoch": 0.42, "grad_norm": 0.7493440508842468, "learning_rate": 1.2838225549157066e-05, "loss": 2.022, "step": 12571 }, { "epoch": 0.42, "grad_norm": 0.7204367518424988, "learning_rate": 1.2837206349726578e-05, "loss": 2.0566, "step": 12572 }, { "epoch": 0.42, "grad_norm": 0.7157355546951294, "learning_rate": 1.2836187118242998e-05, "loss": 2.0886, "step": 12573 }, { "epoch": 0.42, "grad_norm": 0.7666046619415283, "learning_rate": 1.2835167854717833e-05, "loss": 2.1545, "step": 12574 }, { "epoch": 0.42, "grad_norm": 0.7181825041770935, "learning_rate": 1.2834148559162608e-05, "loss": 2.0887, "step": 12575 }, { "epoch": 0.42, "grad_norm": 0.7197605967521667, "learning_rate": 1.283312923158883e-05, "loss": 2.1795, "step": 12576 }, { "epoch": 0.42, "grad_norm": 0.7486099004745483, "learning_rate": 1.283210987200802e-05, "loss": 2.107, "step": 12577 }, { "epoch": 0.42, "grad_norm": 0.7456390857696533, "learning_rate": 1.2831090480431691e-05, "loss": 2.1254, "step": 12578 }, { "epoch": 0.42, "grad_norm": 0.7212749719619751, "learning_rate": 1.2830071056871363e-05, "loss": 2.1212, "step": 12579 }, { "epoch": 0.42, "grad_norm": 0.7251642942428589, "learning_rate": 1.2829051601338549e-05, "loss": 2.0992, "step": 12580 }, { "epoch": 0.42, "grad_norm": 0.7159357666969299, "learning_rate": 1.2828032113844771e-05, "loss": 2.0818, "step": 12581 }, { "epoch": 0.42, "grad_norm": 0.7305713295936584, "learning_rate": 1.2827012594401538e-05, "loss": 2.1023, "step": 12582 }, { "epoch": 0.42, "grad_norm": 0.7207646369934082, "learning_rate": 1.282599304302038e-05, "loss": 2.0929, "step": 12583 }, { "epoch": 0.42, "grad_norm": 0.7722252607345581, "learning_rate": 1.2824973459712803e-05, "loss": 2.1228, "step": 12584 }, { "epoch": 0.42, "grad_norm": 0.6945728659629822, "learning_rate": 1.2823953844490335e-05, "loss": 2.123, "step": 12585 }, { "epoch": 0.42, "grad_norm": 0.7388607859611511, "learning_rate": 1.2822934197364491e-05, "loss": 2.0776, "step": 12586 }, { "epoch": 0.42, "grad_norm": 0.7294972538948059, "learning_rate": 1.282191451834679e-05, "loss": 2.0525, "step": 12587 }, { "epoch": 0.42, "grad_norm": 0.7348666191101074, "learning_rate": 1.2820894807448751e-05, "loss": 2.1468, "step": 12588 }, { "epoch": 0.42, "grad_norm": 0.6959039568901062, "learning_rate": 1.28198750646819e-05, "loss": 2.1045, "step": 12589 }, { "epoch": 0.42, "grad_norm": 0.753452479839325, "learning_rate": 1.281885529005775e-05, "loss": 2.1281, "step": 12590 }, { "epoch": 0.42, "grad_norm": 0.744562566280365, "learning_rate": 1.2817835483587827e-05, "loss": 2.1277, "step": 12591 }, { "epoch": 0.42, "grad_norm": 0.7442547678947449, "learning_rate": 1.2816815645283648e-05, "loss": 2.0969, "step": 12592 }, { "epoch": 0.42, "grad_norm": 0.735022783279419, "learning_rate": 1.2815795775156736e-05, "loss": 2.0781, "step": 12593 }, { "epoch": 0.42, "grad_norm": 0.7144117951393127, "learning_rate": 1.2814775873218616e-05, "loss": 2.1296, "step": 12594 }, { "epoch": 0.42, "grad_norm": 0.7010131478309631, "learning_rate": 1.2813755939480808e-05, "loss": 2.1471, "step": 12595 }, { "epoch": 0.42, "grad_norm": 0.6953817009925842, "learning_rate": 1.2812735973954832e-05, "loss": 2.0686, "step": 12596 }, { "epoch": 0.42, "grad_norm": 0.758838951587677, "learning_rate": 1.2811715976652215e-05, "loss": 2.117, "step": 12597 }, { "epoch": 0.42, "grad_norm": 0.7543825507164001, "learning_rate": 1.2810695947584478e-05, "loss": 2.0829, "step": 12598 }, { "epoch": 0.42, "grad_norm": 0.7288152575492859, "learning_rate": 1.2809675886763147e-05, "loss": 2.1239, "step": 12599 }, { "epoch": 0.42, "grad_norm": 0.8003158569335938, "learning_rate": 1.2808655794199743e-05, "loss": 2.0807, "step": 12600 }, { "epoch": 0.42, "grad_norm": 0.7143562436103821, "learning_rate": 1.2807635669905791e-05, "loss": 2.0931, "step": 12601 }, { "epoch": 0.42, "grad_norm": 0.7711488604545593, "learning_rate": 1.2806615513892817e-05, "loss": 2.1051, "step": 12602 }, { "epoch": 0.42, "grad_norm": 0.7360863089561462, "learning_rate": 1.2805595326172347e-05, "loss": 2.1104, "step": 12603 }, { "epoch": 0.42, "grad_norm": 0.7283881306648254, "learning_rate": 1.2804575106755905e-05, "loss": 2.1218, "step": 12604 }, { "epoch": 0.42, "grad_norm": 0.7559407353401184, "learning_rate": 1.2803554855655019e-05, "loss": 2.1171, "step": 12605 }, { "epoch": 0.42, "grad_norm": 0.742595911026001, "learning_rate": 1.280253457288121e-05, "loss": 2.1065, "step": 12606 }, { "epoch": 0.42, "grad_norm": 0.759644627571106, "learning_rate": 1.2801514258446006e-05, "loss": 2.0869, "step": 12607 }, { "epoch": 0.42, "grad_norm": 0.7000303268432617, "learning_rate": 1.2800493912360942e-05, "loss": 2.052, "step": 12608 }, { "epoch": 0.42, "grad_norm": 0.7211377620697021, "learning_rate": 1.2799473534637535e-05, "loss": 2.1428, "step": 12609 }, { "epoch": 0.42, "grad_norm": 0.73008131980896, "learning_rate": 1.2798453125287317e-05, "loss": 2.1007, "step": 12610 }, { "epoch": 0.42, "grad_norm": 0.7594427466392517, "learning_rate": 1.2797432684321818e-05, "loss": 2.1128, "step": 12611 }, { "epoch": 0.42, "grad_norm": 0.7525646686553955, "learning_rate": 1.279641221175256e-05, "loss": 2.1618, "step": 12612 }, { "epoch": 0.42, "grad_norm": 0.7373979687690735, "learning_rate": 1.2795391707591078e-05, "loss": 2.1091, "step": 12613 }, { "epoch": 0.42, "grad_norm": 0.7182112336158752, "learning_rate": 1.2794371171848899e-05, "loss": 2.1101, "step": 12614 }, { "epoch": 0.42, "grad_norm": 0.7231495380401611, "learning_rate": 1.2793350604537552e-05, "loss": 2.1376, "step": 12615 }, { "epoch": 0.42, "grad_norm": 0.7288999557495117, "learning_rate": 1.2792330005668568e-05, "loss": 2.0933, "step": 12616 }, { "epoch": 0.42, "grad_norm": 0.717610239982605, "learning_rate": 1.2791309375253472e-05, "loss": 2.0764, "step": 12617 }, { "epoch": 0.42, "grad_norm": 0.7496227025985718, "learning_rate": 1.2790288713303803e-05, "loss": 2.1384, "step": 12618 }, { "epoch": 0.42, "grad_norm": 0.7234798073768616, "learning_rate": 1.2789268019831085e-05, "loss": 2.0489, "step": 12619 }, { "epoch": 0.42, "grad_norm": 0.7556368708610535, "learning_rate": 1.2788247294846849e-05, "loss": 2.1336, "step": 12620 }, { "epoch": 0.42, "grad_norm": 0.7156413793563843, "learning_rate": 1.2787226538362636e-05, "loss": 2.1565, "step": 12621 }, { "epoch": 0.42, "grad_norm": 0.6893162727355957, "learning_rate": 1.2786205750389966e-05, "loss": 2.0671, "step": 12622 }, { "epoch": 0.42, "grad_norm": 0.7370979189872742, "learning_rate": 1.2785184930940377e-05, "loss": 2.095, "step": 12623 }, { "epoch": 0.42, "grad_norm": 0.7006845474243164, "learning_rate": 1.2784164080025403e-05, "loss": 2.0302, "step": 12624 }, { "epoch": 0.42, "grad_norm": 0.7243999242782593, "learning_rate": 1.2783143197656574e-05, "loss": 2.1083, "step": 12625 }, { "epoch": 0.42, "grad_norm": 0.7104041576385498, "learning_rate": 1.2782122283845424e-05, "loss": 2.0912, "step": 12626 }, { "epoch": 0.42, "grad_norm": 0.7446256279945374, "learning_rate": 1.2781101338603487e-05, "loss": 2.1065, "step": 12627 }, { "epoch": 0.42, "grad_norm": 0.7413390874862671, "learning_rate": 1.2780080361942295e-05, "loss": 2.1584, "step": 12628 }, { "epoch": 0.42, "grad_norm": 0.7150620818138123, "learning_rate": 1.2779059353873385e-05, "loss": 2.0622, "step": 12629 }, { "epoch": 0.42, "grad_norm": 0.7796303033828735, "learning_rate": 1.2778038314408294e-05, "loss": 2.1759, "step": 12630 }, { "epoch": 0.42, "grad_norm": 0.7301186919212341, "learning_rate": 1.2777017243558549e-05, "loss": 2.0812, "step": 12631 }, { "epoch": 0.42, "grad_norm": 0.7088884711265564, "learning_rate": 1.2775996141335697e-05, "loss": 2.0692, "step": 12632 }, { "epoch": 0.42, "grad_norm": 0.7162004113197327, "learning_rate": 1.2774975007751265e-05, "loss": 2.0777, "step": 12633 }, { "epoch": 0.42, "grad_norm": 0.712647557258606, "learning_rate": 1.2773953842816791e-05, "loss": 2.042, "step": 12634 }, { "epoch": 0.42, "grad_norm": 0.7210012078285217, "learning_rate": 1.2772932646543811e-05, "loss": 2.0795, "step": 12635 }, { "epoch": 0.42, "grad_norm": 0.7615554928779602, "learning_rate": 1.2771911418943865e-05, "loss": 2.101, "step": 12636 }, { "epoch": 0.42, "grad_norm": 0.7578179240226746, "learning_rate": 1.2770890160028486e-05, "loss": 2.0935, "step": 12637 }, { "epoch": 0.42, "grad_norm": 0.7398737072944641, "learning_rate": 1.2769868869809216e-05, "loss": 2.0496, "step": 12638 }, { "epoch": 0.42, "grad_norm": 0.7361347675323486, "learning_rate": 1.2768847548297592e-05, "loss": 2.1154, "step": 12639 }, { "epoch": 0.42, "grad_norm": 0.7262876629829407, "learning_rate": 1.2767826195505148e-05, "loss": 2.0848, "step": 12640 }, { "epoch": 0.42, "grad_norm": 0.7332879900932312, "learning_rate": 1.276680481144343e-05, "loss": 2.0867, "step": 12641 }, { "epoch": 0.42, "grad_norm": 0.7368112802505493, "learning_rate": 1.2765783396123968e-05, "loss": 2.0892, "step": 12642 }, { "epoch": 0.42, "grad_norm": 0.7189192175865173, "learning_rate": 1.2764761949558308e-05, "loss": 2.0955, "step": 12643 }, { "epoch": 0.42, "grad_norm": 0.7539229989051819, "learning_rate": 1.2763740471757989e-05, "loss": 2.0989, "step": 12644 }, { "epoch": 0.42, "grad_norm": 0.748955488204956, "learning_rate": 1.2762718962734548e-05, "loss": 2.07, "step": 12645 }, { "epoch": 0.42, "grad_norm": 0.7497884035110474, "learning_rate": 1.2761697422499528e-05, "loss": 2.0855, "step": 12646 }, { "epoch": 0.42, "grad_norm": 0.7217252254486084, "learning_rate": 1.2760675851064468e-05, "loss": 2.0618, "step": 12647 }, { "epoch": 0.42, "grad_norm": 0.7172107696533203, "learning_rate": 1.2759654248440911e-05, "loss": 2.0552, "step": 12648 }, { "epoch": 0.42, "grad_norm": 0.7179107069969177, "learning_rate": 1.2758632614640398e-05, "loss": 2.1127, "step": 12649 }, { "epoch": 0.42, "grad_norm": 0.7248226404190063, "learning_rate": 1.275761094967447e-05, "loss": 2.0177, "step": 12650 }, { "epoch": 0.42, "grad_norm": 0.7487012147903442, "learning_rate": 1.275658925355467e-05, "loss": 2.1302, "step": 12651 }, { "epoch": 0.42, "grad_norm": 0.7389917969703674, "learning_rate": 1.2755567526292541e-05, "loss": 2.1286, "step": 12652 }, { "epoch": 0.42, "grad_norm": 0.7343252897262573, "learning_rate": 1.2754545767899622e-05, "loss": 2.0585, "step": 12653 }, { "epoch": 0.42, "grad_norm": 0.7418295741081238, "learning_rate": 1.2753523978387463e-05, "loss": 2.1479, "step": 12654 }, { "epoch": 0.42, "grad_norm": 0.7431178689002991, "learning_rate": 1.2752502157767604e-05, "loss": 2.1084, "step": 12655 }, { "epoch": 0.42, "grad_norm": 0.7074499130249023, "learning_rate": 1.2751480306051584e-05, "loss": 2.1276, "step": 12656 }, { "epoch": 0.42, "grad_norm": 0.712332546710968, "learning_rate": 1.2750458423250955e-05, "loss": 2.1186, "step": 12657 }, { "epoch": 0.42, "grad_norm": 0.7484898567199707, "learning_rate": 1.274943650937726e-05, "loss": 2.012, "step": 12658 }, { "epoch": 0.42, "grad_norm": 0.7371085286140442, "learning_rate": 1.2748414564442039e-05, "loss": 2.0341, "step": 12659 }, { "epoch": 0.42, "grad_norm": 0.7543914318084717, "learning_rate": 1.2747392588456844e-05, "loss": 2.1985, "step": 12660 }, { "epoch": 0.42, "grad_norm": 0.7225934863090515, "learning_rate": 1.2746370581433215e-05, "loss": 2.0547, "step": 12661 }, { "epoch": 0.42, "grad_norm": 0.7660501599311829, "learning_rate": 1.2745348543382702e-05, "loss": 2.0575, "step": 12662 }, { "epoch": 0.42, "grad_norm": 0.7421630024909973, "learning_rate": 1.2744326474316853e-05, "loss": 2.1751, "step": 12663 }, { "epoch": 0.42, "grad_norm": 0.7121289372444153, "learning_rate": 1.2743304374247208e-05, "loss": 2.0713, "step": 12664 }, { "epoch": 0.42, "grad_norm": 0.737197995185852, "learning_rate": 1.2742282243185322e-05, "loss": 2.1301, "step": 12665 }, { "epoch": 0.42, "grad_norm": 0.740676760673523, "learning_rate": 1.2741260081142734e-05, "loss": 2.1065, "step": 12666 }, { "epoch": 0.42, "grad_norm": 0.7201475501060486, "learning_rate": 1.2740237888130998e-05, "loss": 2.0979, "step": 12667 }, { "epoch": 0.42, "grad_norm": 0.7298979759216309, "learning_rate": 1.2739215664161659e-05, "loss": 2.0547, "step": 12668 }, { "epoch": 0.42, "grad_norm": 0.7261883020401001, "learning_rate": 1.273819340924627e-05, "loss": 2.0599, "step": 12669 }, { "epoch": 0.42, "grad_norm": 0.7303823232650757, "learning_rate": 1.2737171123396373e-05, "loss": 2.0918, "step": 12670 }, { "epoch": 0.42, "grad_norm": 0.7302249073982239, "learning_rate": 1.2736148806623522e-05, "loss": 2.0722, "step": 12671 }, { "epoch": 0.42, "grad_norm": 0.7708373069763184, "learning_rate": 1.2735126458939265e-05, "loss": 2.138, "step": 12672 }, { "epoch": 0.42, "grad_norm": 0.720122754573822, "learning_rate": 1.2734104080355153e-05, "loss": 2.1061, "step": 12673 }, { "epoch": 0.42, "grad_norm": 0.7400721311569214, "learning_rate": 1.2733081670882737e-05, "loss": 2.0741, "step": 12674 }, { "epoch": 0.42, "grad_norm": 0.7440530061721802, "learning_rate": 1.2732059230533561e-05, "loss": 2.0882, "step": 12675 }, { "epoch": 0.42, "grad_norm": 0.754288911819458, "learning_rate": 1.2731036759319186e-05, "loss": 2.0258, "step": 12676 }, { "epoch": 0.42, "grad_norm": 0.7512241005897522, "learning_rate": 1.2730014257251155e-05, "loss": 2.0688, "step": 12677 }, { "epoch": 0.42, "grad_norm": 0.7493742108345032, "learning_rate": 1.2728991724341024e-05, "loss": 2.0567, "step": 12678 }, { "epoch": 0.42, "grad_norm": 0.7561684846878052, "learning_rate": 1.2727969160600346e-05, "loss": 2.0758, "step": 12679 }, { "epoch": 0.42, "grad_norm": 0.7259445190429688, "learning_rate": 1.2726946566040668e-05, "loss": 2.1686, "step": 12680 }, { "epoch": 0.42, "grad_norm": 0.7286955714225769, "learning_rate": 1.2725923940673548e-05, "loss": 2.0844, "step": 12681 }, { "epoch": 0.42, "grad_norm": 0.7230685353279114, "learning_rate": 1.2724901284510535e-05, "loss": 2.016, "step": 12682 }, { "epoch": 0.42, "grad_norm": 0.7834388613700867, "learning_rate": 1.2723878597563186e-05, "loss": 2.0878, "step": 12683 }, { "epoch": 0.42, "grad_norm": 0.6985279321670532, "learning_rate": 1.272285587984305e-05, "loss": 2.0895, "step": 12684 }, { "epoch": 0.42, "grad_norm": 0.7152412533760071, "learning_rate": 1.2721833131361689e-05, "loss": 2.0616, "step": 12685 }, { "epoch": 0.42, "grad_norm": 0.7252992987632751, "learning_rate": 1.2720810352130649e-05, "loss": 2.1278, "step": 12686 }, { "epoch": 0.42, "grad_norm": 0.7453857064247131, "learning_rate": 1.271978754216149e-05, "loss": 2.1403, "step": 12687 }, { "epoch": 0.42, "grad_norm": 0.7167844176292419, "learning_rate": 1.2718764701465762e-05, "loss": 2.0571, "step": 12688 }, { "epoch": 0.42, "grad_norm": 0.6989026665687561, "learning_rate": 1.2717741830055026e-05, "loss": 2.0639, "step": 12689 }, { "epoch": 0.42, "grad_norm": 0.7144736647605896, "learning_rate": 1.2716718927940837e-05, "loss": 2.0657, "step": 12690 }, { "epoch": 0.42, "grad_norm": 0.7150135636329651, "learning_rate": 1.2715695995134744e-05, "loss": 2.0971, "step": 12691 }, { "epoch": 0.42, "grad_norm": 0.7528398633003235, "learning_rate": 1.2714673031648317e-05, "loss": 2.0996, "step": 12692 }, { "epoch": 0.42, "grad_norm": 0.7265036702156067, "learning_rate": 1.2713650037493102e-05, "loss": 2.0359, "step": 12693 }, { "epoch": 0.42, "grad_norm": 0.7068057656288147, "learning_rate": 1.2712627012680656e-05, "loss": 2.0662, "step": 12694 }, { "epoch": 0.42, "grad_norm": 0.7649387121200562, "learning_rate": 1.2711603957222542e-05, "loss": 2.0895, "step": 12695 }, { "epoch": 0.42, "grad_norm": 0.722756564617157, "learning_rate": 1.271058087113032e-05, "loss": 2.0955, "step": 12696 }, { "epoch": 0.42, "grad_norm": 0.7174360752105713, "learning_rate": 1.2709557754415536e-05, "loss": 2.1431, "step": 12697 }, { "epoch": 0.42, "grad_norm": 0.7663110494613647, "learning_rate": 1.2708534607089762e-05, "loss": 2.1089, "step": 12698 }, { "epoch": 0.42, "grad_norm": 0.7197758555412292, "learning_rate": 1.270751142916455e-05, "loss": 2.101, "step": 12699 }, { "epoch": 0.42, "grad_norm": 0.7626747488975525, "learning_rate": 1.2706488220651458e-05, "loss": 2.115, "step": 12700 }, { "epoch": 0.42, "grad_norm": 0.7441595792770386, "learning_rate": 1.2705464981562053e-05, "loss": 2.1371, "step": 12701 }, { "epoch": 0.42, "grad_norm": 0.7302168011665344, "learning_rate": 1.2704441711907887e-05, "loss": 2.0224, "step": 12702 }, { "epoch": 0.42, "grad_norm": 0.7157600522041321, "learning_rate": 1.2703418411700525e-05, "loss": 2.0739, "step": 12703 }, { "epoch": 0.42, "grad_norm": 0.7158370614051819, "learning_rate": 1.2702395080951523e-05, "loss": 2.1041, "step": 12704 }, { "epoch": 0.42, "grad_norm": 0.7379342913627625, "learning_rate": 1.270137171967245e-05, "loss": 2.056, "step": 12705 }, { "epoch": 0.42, "grad_norm": 0.7154335379600525, "learning_rate": 1.270034832787486e-05, "loss": 2.1051, "step": 12706 }, { "epoch": 0.42, "grad_norm": 0.7598645091056824, "learning_rate": 1.2699324905570316e-05, "loss": 2.0422, "step": 12707 }, { "epoch": 0.42, "grad_norm": 0.7315258979797363, "learning_rate": 1.2698301452770382e-05, "loss": 2.1084, "step": 12708 }, { "epoch": 0.42, "grad_norm": 0.722845733165741, "learning_rate": 1.2697277969486623e-05, "loss": 2.1356, "step": 12709 }, { "epoch": 0.42, "grad_norm": 0.7215485572814941, "learning_rate": 1.2696254455730592e-05, "loss": 2.0826, "step": 12710 }, { "epoch": 0.42, "grad_norm": 0.711340069770813, "learning_rate": 1.2695230911513861e-05, "loss": 2.1298, "step": 12711 }, { "epoch": 0.42, "grad_norm": 0.7505236268043518, "learning_rate": 1.2694207336847995e-05, "loss": 2.0924, "step": 12712 }, { "epoch": 0.42, "grad_norm": 0.7219468355178833, "learning_rate": 1.2693183731744547e-05, "loss": 2.1006, "step": 12713 }, { "epoch": 0.42, "grad_norm": 0.720144510269165, "learning_rate": 1.2692160096215092e-05, "loss": 2.1187, "step": 12714 }, { "epoch": 0.42, "grad_norm": 0.7063847780227661, "learning_rate": 1.2691136430271187e-05, "loss": 2.1038, "step": 12715 }, { "epoch": 0.42, "grad_norm": 0.7389441132545471, "learning_rate": 1.2690112733924403e-05, "loss": 2.1456, "step": 12716 }, { "epoch": 0.42, "grad_norm": 0.7234571576118469, "learning_rate": 1.2689089007186297e-05, "loss": 2.1394, "step": 12717 }, { "epoch": 0.42, "grad_norm": 0.7167448401451111, "learning_rate": 1.2688065250068442e-05, "loss": 2.0266, "step": 12718 }, { "epoch": 0.42, "grad_norm": 0.7269853949546814, "learning_rate": 1.2687041462582402e-05, "loss": 2.0827, "step": 12719 }, { "epoch": 0.42, "grad_norm": 0.7125935554504395, "learning_rate": 1.2686017644739743e-05, "loss": 2.087, "step": 12720 }, { "epoch": 0.42, "grad_norm": 0.7153884172439575, "learning_rate": 1.2684993796552027e-05, "loss": 2.0965, "step": 12721 }, { "epoch": 0.42, "grad_norm": 0.7356677651405334, "learning_rate": 1.2683969918030828e-05, "loss": 2.0973, "step": 12722 }, { "epoch": 0.42, "grad_norm": 0.7685341835021973, "learning_rate": 1.2682946009187711e-05, "loss": 2.0651, "step": 12723 }, { "epoch": 0.42, "grad_norm": 0.7065647840499878, "learning_rate": 1.2681922070034239e-05, "loss": 2.0572, "step": 12724 }, { "epoch": 0.42, "grad_norm": 0.7394136786460876, "learning_rate": 1.2680898100581986e-05, "loss": 2.0904, "step": 12725 }, { "epoch": 0.42, "grad_norm": 0.7436098456382751, "learning_rate": 1.2679874100842516e-05, "loss": 2.084, "step": 12726 }, { "epoch": 0.42, "grad_norm": 0.729438841342926, "learning_rate": 1.2678850070827397e-05, "loss": 2.0987, "step": 12727 }, { "epoch": 0.42, "grad_norm": 0.7214196920394897, "learning_rate": 1.2677826010548202e-05, "loss": 2.0792, "step": 12728 }, { "epoch": 0.42, "grad_norm": 0.7163324356079102, "learning_rate": 1.2676801920016497e-05, "loss": 2.0815, "step": 12729 }, { "epoch": 0.42, "grad_norm": 0.7306609749794006, "learning_rate": 1.2675777799243853e-05, "loss": 2.1288, "step": 12730 }, { "epoch": 0.42, "grad_norm": 0.7139286994934082, "learning_rate": 1.2674753648241844e-05, "loss": 2.0556, "step": 12731 }, { "epoch": 0.42, "grad_norm": 0.7436847686767578, "learning_rate": 1.2673729467022029e-05, "loss": 2.058, "step": 12732 }, { "epoch": 0.42, "grad_norm": 0.7410436272621155, "learning_rate": 1.267270525559599e-05, "loss": 2.0728, "step": 12733 }, { "epoch": 0.42, "grad_norm": 0.7239573001861572, "learning_rate": 1.2671681013975292e-05, "loss": 2.0907, "step": 12734 }, { "epoch": 0.42, "grad_norm": 0.7278544306755066, "learning_rate": 1.2670656742171505e-05, "loss": 2.1209, "step": 12735 }, { "epoch": 0.42, "grad_norm": 0.7375119924545288, "learning_rate": 1.2669632440196208e-05, "loss": 2.0564, "step": 12736 }, { "epoch": 0.42, "grad_norm": 0.7421109080314636, "learning_rate": 1.2668608108060966e-05, "loss": 2.0524, "step": 12737 }, { "epoch": 0.42, "grad_norm": 0.765608549118042, "learning_rate": 1.2667583745777354e-05, "loss": 2.0263, "step": 12738 }, { "epoch": 0.42, "grad_norm": 0.7103093266487122, "learning_rate": 1.2666559353356944e-05, "loss": 2.004, "step": 12739 }, { "epoch": 0.42, "grad_norm": 0.7310738563537598, "learning_rate": 1.2665534930811308e-05, "loss": 2.1353, "step": 12740 }, { "epoch": 0.42, "grad_norm": 0.7467474341392517, "learning_rate": 1.2664510478152021e-05, "loss": 2.0687, "step": 12741 }, { "epoch": 0.42, "grad_norm": 0.7339344620704651, "learning_rate": 1.2663485995390657e-05, "loss": 2.1003, "step": 12742 }, { "epoch": 0.42, "grad_norm": 0.7507628798484802, "learning_rate": 1.2662461482538788e-05, "loss": 2.0869, "step": 12743 }, { "epoch": 0.42, "grad_norm": 0.7471380233764648, "learning_rate": 1.2661436939607992e-05, "loss": 2.0552, "step": 12744 }, { "epoch": 0.42, "grad_norm": 0.7467623353004456, "learning_rate": 1.266041236660984e-05, "loss": 2.0478, "step": 12745 }, { "epoch": 0.42, "grad_norm": 0.7515903115272522, "learning_rate": 1.2659387763555906e-05, "loss": 1.9875, "step": 12746 }, { "epoch": 0.42, "grad_norm": 0.7452664971351624, "learning_rate": 1.2658363130457771e-05, "loss": 2.098, "step": 12747 }, { "epoch": 0.42, "grad_norm": 0.7219454050064087, "learning_rate": 1.2657338467327005e-05, "loss": 2.1158, "step": 12748 }, { "epoch": 0.42, "grad_norm": 0.7090778946876526, "learning_rate": 1.2656313774175186e-05, "loss": 2.0913, "step": 12749 }, { "epoch": 0.42, "grad_norm": 0.7454358339309692, "learning_rate": 1.2655289051013893e-05, "loss": 2.0386, "step": 12750 }, { "epoch": 0.42, "grad_norm": 0.7435495257377625, "learning_rate": 1.2654264297854699e-05, "loss": 2.132, "step": 12751 }, { "epoch": 0.42, "grad_norm": 0.7354375123977661, "learning_rate": 1.2653239514709184e-05, "loss": 2.0563, "step": 12752 }, { "epoch": 0.42, "grad_norm": 0.7272806763648987, "learning_rate": 1.265221470158892e-05, "loss": 2.0682, "step": 12753 }, { "epoch": 0.42, "grad_norm": 0.7368921041488647, "learning_rate": 1.2651189858505492e-05, "loss": 2.1298, "step": 12754 }, { "epoch": 0.42, "grad_norm": 0.7720046043395996, "learning_rate": 1.2650164985470475e-05, "loss": 2.0695, "step": 12755 }, { "epoch": 0.42, "grad_norm": 0.7313575744628906, "learning_rate": 1.2649140082495447e-05, "loss": 2.0726, "step": 12756 }, { "epoch": 0.42, "grad_norm": 0.7854654788970947, "learning_rate": 1.2648115149591984e-05, "loss": 2.15, "step": 12757 }, { "epoch": 0.42, "grad_norm": 0.7268454432487488, "learning_rate": 1.2647090186771673e-05, "loss": 2.0519, "step": 12758 }, { "epoch": 0.42, "grad_norm": 0.7306022047996521, "learning_rate": 1.2646065194046085e-05, "loss": 2.0747, "step": 12759 }, { "epoch": 0.42, "grad_norm": 0.7409251928329468, "learning_rate": 1.2645040171426803e-05, "loss": 2.0599, "step": 12760 }, { "epoch": 0.42, "grad_norm": 0.7362368702888489, "learning_rate": 1.2644015118925408e-05, "loss": 2.0861, "step": 12761 }, { "epoch": 0.42, "grad_norm": 0.7322105169296265, "learning_rate": 1.264299003655348e-05, "loss": 2.1156, "step": 12762 }, { "epoch": 0.42, "grad_norm": 0.7246610522270203, "learning_rate": 1.2641964924322598e-05, "loss": 2.1398, "step": 12763 }, { "epoch": 0.42, "grad_norm": 0.7239896655082703, "learning_rate": 1.2640939782244345e-05, "loss": 2.106, "step": 12764 }, { "epoch": 0.42, "grad_norm": 0.7168049216270447, "learning_rate": 1.2639914610330304e-05, "loss": 2.0629, "step": 12765 }, { "epoch": 0.42, "grad_norm": 0.7442251443862915, "learning_rate": 1.2638889408592055e-05, "loss": 2.061, "step": 12766 }, { "epoch": 0.42, "grad_norm": 0.7725180387496948, "learning_rate": 1.263786417704118e-05, "loss": 2.138, "step": 12767 }, { "epoch": 0.42, "grad_norm": 0.7543867230415344, "learning_rate": 1.2636838915689258e-05, "loss": 2.0952, "step": 12768 }, { "epoch": 0.42, "grad_norm": 0.7561289072036743, "learning_rate": 1.263581362454788e-05, "loss": 2.1423, "step": 12769 }, { "epoch": 0.42, "grad_norm": 0.7409908771514893, "learning_rate": 1.263478830362862e-05, "loss": 2.0512, "step": 12770 }, { "epoch": 0.42, "grad_norm": 0.7405576109886169, "learning_rate": 1.2633762952943067e-05, "loss": 2.1191, "step": 12771 }, { "epoch": 0.42, "grad_norm": 0.7161704897880554, "learning_rate": 1.2632737572502804e-05, "loss": 1.996, "step": 12772 }, { "epoch": 0.42, "grad_norm": 0.7001577019691467, "learning_rate": 1.2631712162319417e-05, "loss": 2.0985, "step": 12773 }, { "epoch": 0.42, "grad_norm": 0.8032430410385132, "learning_rate": 1.2630686722404486e-05, "loss": 2.0641, "step": 12774 }, { "epoch": 0.43, "grad_norm": 0.7699704170227051, "learning_rate": 1.2629661252769599e-05, "loss": 2.1456, "step": 12775 }, { "epoch": 0.43, "grad_norm": 0.7268160581588745, "learning_rate": 1.2628635753426339e-05, "loss": 2.0595, "step": 12776 }, { "epoch": 0.43, "grad_norm": 0.7381707429885864, "learning_rate": 1.2627610224386295e-05, "loss": 2.0731, "step": 12777 }, { "epoch": 0.43, "grad_norm": 0.7395838499069214, "learning_rate": 1.262658466566105e-05, "loss": 2.0963, "step": 12778 }, { "epoch": 0.43, "grad_norm": 0.7667689919471741, "learning_rate": 1.2625559077262188e-05, "loss": 2.1016, "step": 12779 }, { "epoch": 0.43, "grad_norm": 0.7195752859115601, "learning_rate": 1.2624533459201302e-05, "loss": 2.1036, "step": 12780 }, { "epoch": 0.43, "grad_norm": 0.7402536869049072, "learning_rate": 1.2623507811489974e-05, "loss": 2.0956, "step": 12781 }, { "epoch": 0.43, "grad_norm": 0.7418485283851624, "learning_rate": 1.2622482134139792e-05, "loss": 2.2098, "step": 12782 }, { "epoch": 0.43, "grad_norm": 0.7422534823417664, "learning_rate": 1.2621456427162345e-05, "loss": 2.1005, "step": 12783 }, { "epoch": 0.43, "grad_norm": 0.7154671549797058, "learning_rate": 1.2620430690569218e-05, "loss": 2.0454, "step": 12784 }, { "epoch": 0.43, "grad_norm": 0.7359423637390137, "learning_rate": 1.2619404924372001e-05, "loss": 2.0536, "step": 12785 }, { "epoch": 0.43, "grad_norm": 0.7370131015777588, "learning_rate": 1.2618379128582282e-05, "loss": 2.1466, "step": 12786 }, { "epoch": 0.43, "grad_norm": 0.7296870946884155, "learning_rate": 1.2617353303211651e-05, "loss": 2.1024, "step": 12787 }, { "epoch": 0.43, "grad_norm": 0.7170761227607727, "learning_rate": 1.2616327448271695e-05, "loss": 1.9707, "step": 12788 }, { "epoch": 0.43, "grad_norm": 0.7660859823226929, "learning_rate": 1.2615301563774007e-05, "loss": 2.1363, "step": 12789 }, { "epoch": 0.43, "grad_norm": 0.7266695499420166, "learning_rate": 1.2614275649730172e-05, "loss": 2.063, "step": 12790 }, { "epoch": 0.43, "grad_norm": 0.7092355489730835, "learning_rate": 1.2613249706151785e-05, "loss": 2.0569, "step": 12791 }, { "epoch": 0.43, "grad_norm": 0.7087313532829285, "learning_rate": 1.2612223733050431e-05, "loss": 2.067, "step": 12792 }, { "epoch": 0.43, "grad_norm": 0.7416631579399109, "learning_rate": 1.2611197730437709e-05, "loss": 2.0261, "step": 12793 }, { "epoch": 0.43, "grad_norm": 0.7330492734909058, "learning_rate": 1.2610171698325203e-05, "loss": 2.1244, "step": 12794 }, { "epoch": 0.43, "grad_norm": 0.7242865562438965, "learning_rate": 1.2609145636724505e-05, "loss": 2.17, "step": 12795 }, { "epoch": 0.43, "grad_norm": 0.7279788851737976, "learning_rate": 1.260811954564721e-05, "loss": 2.0504, "step": 12796 }, { "epoch": 0.43, "grad_norm": 0.7238636016845703, "learning_rate": 1.260709342510491e-05, "loss": 2.0592, "step": 12797 }, { "epoch": 0.43, "grad_norm": 0.738256573677063, "learning_rate": 1.2606067275109197e-05, "loss": 2.0793, "step": 12798 }, { "epoch": 0.43, "grad_norm": 0.7343834638595581, "learning_rate": 1.2605041095671663e-05, "loss": 2.1069, "step": 12799 }, { "epoch": 0.43, "grad_norm": 0.7321343421936035, "learning_rate": 1.2604014886803899e-05, "loss": 2.0975, "step": 12800 }, { "epoch": 0.43, "grad_norm": 0.7291301488876343, "learning_rate": 1.2602988648517503e-05, "loss": 2.109, "step": 12801 }, { "epoch": 0.43, "grad_norm": 0.7361016273498535, "learning_rate": 1.2601962380824067e-05, "loss": 2.0713, "step": 12802 }, { "epoch": 0.43, "grad_norm": 0.7367565631866455, "learning_rate": 1.2600936083735182e-05, "loss": 2.1107, "step": 12803 }, { "epoch": 0.43, "grad_norm": 0.7395138740539551, "learning_rate": 1.259990975726245e-05, "loss": 1.9857, "step": 12804 }, { "epoch": 0.43, "grad_norm": 0.7349457740783691, "learning_rate": 1.2598883401417456e-05, "loss": 2.0998, "step": 12805 }, { "epoch": 0.43, "grad_norm": 0.7453685402870178, "learning_rate": 1.2597857016211803e-05, "loss": 2.1413, "step": 12806 }, { "epoch": 0.43, "grad_norm": 0.7263466119766235, "learning_rate": 1.2596830601657086e-05, "loss": 2.1033, "step": 12807 }, { "epoch": 0.43, "grad_norm": 0.7682474851608276, "learning_rate": 1.2595804157764897e-05, "loss": 2.0659, "step": 12808 }, { "epoch": 0.43, "grad_norm": 0.7367715239524841, "learning_rate": 1.2594777684546833e-05, "loss": 2.1067, "step": 12809 }, { "epoch": 0.43, "grad_norm": 0.7507600784301758, "learning_rate": 1.259375118201449e-05, "loss": 2.1425, "step": 12810 }, { "epoch": 0.43, "grad_norm": 0.7448073029518127, "learning_rate": 1.2592724650179471e-05, "loss": 2.1449, "step": 12811 }, { "epoch": 0.43, "grad_norm": 0.7340902090072632, "learning_rate": 1.2591698089053366e-05, "loss": 2.0812, "step": 12812 }, { "epoch": 0.43, "grad_norm": 0.7985180616378784, "learning_rate": 1.2590671498647775e-05, "loss": 2.0712, "step": 12813 }, { "epoch": 0.43, "grad_norm": 0.7169725298881531, "learning_rate": 1.2589644878974295e-05, "loss": 2.0652, "step": 12814 }, { "epoch": 0.43, "grad_norm": 0.709062933921814, "learning_rate": 1.2588618230044522e-05, "loss": 2.0729, "step": 12815 }, { "epoch": 0.43, "grad_norm": 0.7615477442741394, "learning_rate": 1.2587591551870065e-05, "loss": 2.1306, "step": 12816 }, { "epoch": 0.43, "grad_norm": 0.7726173996925354, "learning_rate": 1.2586564844462508e-05, "loss": 2.1059, "step": 12817 }, { "epoch": 0.43, "grad_norm": 0.7393383979797363, "learning_rate": 1.2585538107833461e-05, "loss": 2.0618, "step": 12818 }, { "epoch": 0.43, "grad_norm": 0.746898889541626, "learning_rate": 1.2584511341994519e-05, "loss": 2.0532, "step": 12819 }, { "epoch": 0.43, "grad_norm": 0.743028998374939, "learning_rate": 1.258348454695728e-05, "loss": 2.0486, "step": 12820 }, { "epoch": 0.43, "grad_norm": 0.7561843991279602, "learning_rate": 1.258245772273335e-05, "loss": 2.087, "step": 12821 }, { "epoch": 0.43, "grad_norm": 0.711010217666626, "learning_rate": 1.2581430869334325e-05, "loss": 2.077, "step": 12822 }, { "epoch": 0.43, "grad_norm": 0.7202813029289246, "learning_rate": 1.2580403986771805e-05, "loss": 2.049, "step": 12823 }, { "epoch": 0.43, "grad_norm": 0.7540672421455383, "learning_rate": 1.25793770750574e-05, "loss": 2.0742, "step": 12824 }, { "epoch": 0.43, "grad_norm": 0.7332183718681335, "learning_rate": 1.2578350134202695e-05, "loss": 2.1099, "step": 12825 }, { "epoch": 0.43, "grad_norm": 0.7302794456481934, "learning_rate": 1.2577323164219306e-05, "loss": 2.0679, "step": 12826 }, { "epoch": 0.43, "grad_norm": 0.7282748818397522, "learning_rate": 1.2576296165118832e-05, "loss": 2.0665, "step": 12827 }, { "epoch": 0.43, "grad_norm": 0.7057153582572937, "learning_rate": 1.2575269136912869e-05, "loss": 2.0825, "step": 12828 }, { "epoch": 0.43, "grad_norm": 0.7553389072418213, "learning_rate": 1.2574242079613029e-05, "loss": 2.1098, "step": 12829 }, { "epoch": 0.43, "grad_norm": 0.7073999047279358, "learning_rate": 1.2573214993230909e-05, "loss": 2.0818, "step": 12830 }, { "epoch": 0.43, "grad_norm": 0.7208580374717712, "learning_rate": 1.2572187877778115e-05, "loss": 2.0355, "step": 12831 }, { "epoch": 0.43, "grad_norm": 0.7796158194541931, "learning_rate": 1.2571160733266248e-05, "loss": 2.1315, "step": 12832 }, { "epoch": 0.43, "grad_norm": 0.720184862613678, "learning_rate": 1.2570133559706914e-05, "loss": 2.0471, "step": 12833 }, { "epoch": 0.43, "grad_norm": 0.7150482535362244, "learning_rate": 1.2569106357111717e-05, "loss": 2.1031, "step": 12834 }, { "epoch": 0.43, "grad_norm": 0.7584150433540344, "learning_rate": 1.2568079125492265e-05, "loss": 2.1448, "step": 12835 }, { "epoch": 0.43, "grad_norm": 0.728193998336792, "learning_rate": 1.2567051864860156e-05, "loss": 2.1453, "step": 12836 }, { "epoch": 0.43, "grad_norm": 0.7228482961654663, "learning_rate": 1.2566024575227004e-05, "loss": 2.0512, "step": 12837 }, { "epoch": 0.43, "grad_norm": 0.7330294251441956, "learning_rate": 1.2564997256604407e-05, "loss": 2.1627, "step": 12838 }, { "epoch": 0.43, "grad_norm": 0.7319846749305725, "learning_rate": 1.2563969909003972e-05, "loss": 2.0857, "step": 12839 }, { "epoch": 0.43, "grad_norm": 0.7598676681518555, "learning_rate": 1.2562942532437314e-05, "loss": 2.103, "step": 12840 }, { "epoch": 0.43, "grad_norm": 0.7345932722091675, "learning_rate": 1.2561915126916029e-05, "loss": 2.0929, "step": 12841 }, { "epoch": 0.43, "grad_norm": 0.7150911688804626, "learning_rate": 1.2560887692451728e-05, "loss": 2.1287, "step": 12842 }, { "epoch": 0.43, "grad_norm": 0.7236700654029846, "learning_rate": 1.255986022905602e-05, "loss": 2.0395, "step": 12843 }, { "epoch": 0.43, "grad_norm": 0.7608015537261963, "learning_rate": 1.2558832736740511e-05, "loss": 2.087, "step": 12844 }, { "epoch": 0.43, "grad_norm": 0.7441733479499817, "learning_rate": 1.2557805215516809e-05, "loss": 2.1509, "step": 12845 }, { "epoch": 0.43, "grad_norm": 0.7206248641014099, "learning_rate": 1.2556777665396525e-05, "loss": 2.1258, "step": 12846 }, { "epoch": 0.43, "grad_norm": 0.7422816157341003, "learning_rate": 1.2555750086391263e-05, "loss": 2.1369, "step": 12847 }, { "epoch": 0.43, "grad_norm": 0.7217992544174194, "learning_rate": 1.2554722478512633e-05, "loss": 2.0931, "step": 12848 }, { "epoch": 0.43, "grad_norm": 0.7503949403762817, "learning_rate": 1.2553694841772251e-05, "loss": 2.1338, "step": 12849 }, { "epoch": 0.43, "grad_norm": 0.7418839335441589, "learning_rate": 1.2552667176181714e-05, "loss": 2.0373, "step": 12850 }, { "epoch": 0.43, "grad_norm": 0.7647033929824829, "learning_rate": 1.2551639481752647e-05, "loss": 2.1165, "step": 12851 }, { "epoch": 0.43, "grad_norm": 0.753045380115509, "learning_rate": 1.2550611758496647e-05, "loss": 2.1189, "step": 12852 }, { "epoch": 0.43, "grad_norm": 0.7317849397659302, "learning_rate": 1.2549584006425333e-05, "loss": 2.0848, "step": 12853 }, { "epoch": 0.43, "grad_norm": 0.7443662285804749, "learning_rate": 1.2548556225550312e-05, "loss": 2.1085, "step": 12854 }, { "epoch": 0.43, "grad_norm": 0.767545223236084, "learning_rate": 1.2547528415883195e-05, "loss": 2.0834, "step": 12855 }, { "epoch": 0.43, "grad_norm": 0.7341024279594421, "learning_rate": 1.2546500577435597e-05, "loss": 2.142, "step": 12856 }, { "epoch": 0.43, "grad_norm": 0.7362480163574219, "learning_rate": 1.2545472710219127e-05, "loss": 2.1704, "step": 12857 }, { "epoch": 0.43, "grad_norm": 0.7114051580429077, "learning_rate": 1.2544444814245398e-05, "loss": 2.0947, "step": 12858 }, { "epoch": 0.43, "grad_norm": 0.7444718480110168, "learning_rate": 1.2543416889526023e-05, "loss": 2.0388, "step": 12859 }, { "epoch": 0.43, "grad_norm": 0.7749068737030029, "learning_rate": 1.2542388936072616e-05, "loss": 2.0959, "step": 12860 }, { "epoch": 0.43, "grad_norm": 0.7396832704544067, "learning_rate": 1.2541360953896784e-05, "loss": 2.1142, "step": 12861 }, { "epoch": 0.43, "grad_norm": 0.7276213765144348, "learning_rate": 1.2540332943010152e-05, "loss": 2.1054, "step": 12862 }, { "epoch": 0.43, "grad_norm": 0.7538001537322998, "learning_rate": 1.2539304903424323e-05, "loss": 2.1327, "step": 12863 }, { "epoch": 0.43, "grad_norm": 0.7564075589179993, "learning_rate": 1.2538276835150916e-05, "loss": 2.0927, "step": 12864 }, { "epoch": 0.43, "grad_norm": 0.7238653302192688, "learning_rate": 1.2537248738201547e-05, "loss": 2.1118, "step": 12865 }, { "epoch": 0.43, "grad_norm": 0.7505905628204346, "learning_rate": 1.2536220612587824e-05, "loss": 2.0343, "step": 12866 }, { "epoch": 0.43, "grad_norm": 0.7649300694465637, "learning_rate": 1.253519245832137e-05, "loss": 2.0728, "step": 12867 }, { "epoch": 0.43, "grad_norm": 0.717523455619812, "learning_rate": 1.2534164275413799e-05, "loss": 2.0953, "step": 12868 }, { "epoch": 0.43, "grad_norm": 0.7451853156089783, "learning_rate": 1.253313606387672e-05, "loss": 2.0617, "step": 12869 }, { "epoch": 0.43, "grad_norm": 0.6994777321815491, "learning_rate": 1.2532107823721758e-05, "loss": 2.0686, "step": 12870 }, { "epoch": 0.43, "grad_norm": 0.7746158242225647, "learning_rate": 1.2531079554960527e-05, "loss": 2.0949, "step": 12871 }, { "epoch": 0.43, "grad_norm": 0.7163287401199341, "learning_rate": 1.2530051257604639e-05, "loss": 2.1098, "step": 12872 }, { "epoch": 0.43, "grad_norm": 0.7302113771438599, "learning_rate": 1.2529022931665718e-05, "loss": 2.1206, "step": 12873 }, { "epoch": 0.43, "grad_norm": 0.7289221286773682, "learning_rate": 1.2527994577155375e-05, "loss": 2.0449, "step": 12874 }, { "epoch": 0.43, "grad_norm": 0.7118273973464966, "learning_rate": 1.2526966194085236e-05, "loss": 2.0733, "step": 12875 }, { "epoch": 0.43, "grad_norm": 0.7065019607543945, "learning_rate": 1.2525937782466908e-05, "loss": 2.0951, "step": 12876 }, { "epoch": 0.43, "grad_norm": 0.7398509979248047, "learning_rate": 1.2524909342312017e-05, "loss": 2.1081, "step": 12877 }, { "epoch": 0.43, "grad_norm": 0.7744411826133728, "learning_rate": 1.2523880873632181e-05, "loss": 2.1407, "step": 12878 }, { "epoch": 0.43, "grad_norm": 0.7451678514480591, "learning_rate": 1.2522852376439016e-05, "loss": 2.0842, "step": 12879 }, { "epoch": 0.43, "grad_norm": 0.7644221782684326, "learning_rate": 1.2521823850744146e-05, "loss": 2.0714, "step": 12880 }, { "epoch": 0.43, "grad_norm": 0.7232552170753479, "learning_rate": 1.2520795296559188e-05, "loss": 2.0825, "step": 12881 }, { "epoch": 0.43, "grad_norm": 0.7300330996513367, "learning_rate": 1.2519766713895762e-05, "loss": 2.0915, "step": 12882 }, { "epoch": 0.43, "grad_norm": 0.700222373008728, "learning_rate": 1.2518738102765486e-05, "loss": 2.0966, "step": 12883 }, { "epoch": 0.43, "grad_norm": 0.709098756313324, "learning_rate": 1.2517709463179986e-05, "loss": 2.1471, "step": 12884 }, { "epoch": 0.43, "grad_norm": 0.7355266213417053, "learning_rate": 1.251668079515088e-05, "loss": 2.1091, "step": 12885 }, { "epoch": 0.43, "grad_norm": 0.7305822372436523, "learning_rate": 1.2515652098689785e-05, "loss": 2.1556, "step": 12886 }, { "epoch": 0.43, "grad_norm": 0.776351809501648, "learning_rate": 1.251462337380833e-05, "loss": 2.1657, "step": 12887 }, { "epoch": 0.43, "grad_norm": 0.7186362743377686, "learning_rate": 1.2513594620518133e-05, "loss": 2.1161, "step": 12888 }, { "epoch": 0.43, "grad_norm": 0.7281479239463806, "learning_rate": 1.251256583883082e-05, "loss": 2.048, "step": 12889 }, { "epoch": 0.43, "grad_norm": 0.728370726108551, "learning_rate": 1.2511537028758007e-05, "loss": 2.0607, "step": 12890 }, { "epoch": 0.43, "grad_norm": 0.7770498394966125, "learning_rate": 1.2510508190311322e-05, "loss": 2.1291, "step": 12891 }, { "epoch": 0.43, "grad_norm": 0.7721983194351196, "learning_rate": 1.2509479323502384e-05, "loss": 2.0398, "step": 12892 }, { "epoch": 0.43, "grad_norm": 0.7382127642631531, "learning_rate": 1.2508450428342824e-05, "loss": 2.0664, "step": 12893 }, { "epoch": 0.43, "grad_norm": 0.718222439289093, "learning_rate": 1.250742150484426e-05, "loss": 2.0573, "step": 12894 }, { "epoch": 0.43, "grad_norm": 0.7147057056427002, "learning_rate": 1.2506392553018319e-05, "loss": 2.0341, "step": 12895 }, { "epoch": 0.43, "grad_norm": 0.7591274976730347, "learning_rate": 1.2505363572876617e-05, "loss": 2.0766, "step": 12896 }, { "epoch": 0.43, "grad_norm": 0.747122049331665, "learning_rate": 1.2504334564430792e-05, "loss": 2.0198, "step": 12897 }, { "epoch": 0.43, "grad_norm": 0.7370492815971375, "learning_rate": 1.250330552769246e-05, "loss": 2.0957, "step": 12898 }, { "epoch": 0.43, "grad_norm": 0.7714354395866394, "learning_rate": 1.2502276462673248e-05, "loss": 2.078, "step": 12899 }, { "epoch": 0.43, "grad_norm": 0.7045102119445801, "learning_rate": 1.2501247369384788e-05, "loss": 2.0842, "step": 12900 }, { "epoch": 0.43, "grad_norm": 0.7832626700401306, "learning_rate": 1.2500218247838698e-05, "loss": 2.0752, "step": 12901 }, { "epoch": 0.43, "grad_norm": 0.7434380650520325, "learning_rate": 1.2499189098046607e-05, "loss": 2.1218, "step": 12902 }, { "epoch": 0.43, "grad_norm": 0.7311598658561707, "learning_rate": 1.2498159920020143e-05, "loss": 2.1231, "step": 12903 }, { "epoch": 0.43, "grad_norm": 0.7336952090263367, "learning_rate": 1.249713071377093e-05, "loss": 2.0299, "step": 12904 }, { "epoch": 0.43, "grad_norm": 0.7524884343147278, "learning_rate": 1.24961014793106e-05, "loss": 2.0857, "step": 12905 }, { "epoch": 0.43, "grad_norm": 0.7341942191123962, "learning_rate": 1.249507221665078e-05, "loss": 2.0875, "step": 12906 }, { "epoch": 0.43, "grad_norm": 0.7251594066619873, "learning_rate": 1.2494042925803092e-05, "loss": 2.1256, "step": 12907 }, { "epoch": 0.43, "grad_norm": 0.7407419681549072, "learning_rate": 1.2493013606779175e-05, "loss": 2.0983, "step": 12908 }, { "epoch": 0.43, "grad_norm": 0.7236939668655396, "learning_rate": 1.2491984259590646e-05, "loss": 2.0536, "step": 12909 }, { "epoch": 0.43, "grad_norm": 0.7367979288101196, "learning_rate": 1.2490954884249138e-05, "loss": 2.0309, "step": 12910 }, { "epoch": 0.43, "grad_norm": 0.7514662146568298, "learning_rate": 1.2489925480766288e-05, "loss": 2.1448, "step": 12911 }, { "epoch": 0.43, "grad_norm": 0.7420753240585327, "learning_rate": 1.2488896049153714e-05, "loss": 2.1275, "step": 12912 }, { "epoch": 0.43, "grad_norm": 0.736609935760498, "learning_rate": 1.2487866589423052e-05, "loss": 2.1294, "step": 12913 }, { "epoch": 0.43, "grad_norm": 0.7688855528831482, "learning_rate": 1.2486837101585932e-05, "loss": 2.0282, "step": 12914 }, { "epoch": 0.43, "grad_norm": 0.7054730653762817, "learning_rate": 1.2485807585653985e-05, "loss": 2.1025, "step": 12915 }, { "epoch": 0.43, "grad_norm": 0.7414841651916504, "learning_rate": 1.2484778041638838e-05, "loss": 2.1521, "step": 12916 }, { "epoch": 0.43, "grad_norm": 0.7420551776885986, "learning_rate": 1.2483748469552128e-05, "loss": 2.1072, "step": 12917 }, { "epoch": 0.43, "grad_norm": 0.7364272475242615, "learning_rate": 1.2482718869405479e-05, "loss": 2.11, "step": 12918 }, { "epoch": 0.43, "grad_norm": 0.7953136563301086, "learning_rate": 1.248168924121053e-05, "loss": 2.0976, "step": 12919 }, { "epoch": 0.43, "grad_norm": 0.7382527589797974, "learning_rate": 1.248065958497891e-05, "loss": 2.1382, "step": 12920 }, { "epoch": 0.43, "grad_norm": 0.7572237849235535, "learning_rate": 1.2479629900722252e-05, "loss": 2.1144, "step": 12921 }, { "epoch": 0.43, "grad_norm": 0.7328273057937622, "learning_rate": 1.247860018845219e-05, "loss": 2.0848, "step": 12922 }, { "epoch": 0.43, "grad_norm": 0.7206224203109741, "learning_rate": 1.2477570448180355e-05, "loss": 2.085, "step": 12923 }, { "epoch": 0.43, "grad_norm": 0.7551538944244385, "learning_rate": 1.247654067991838e-05, "loss": 2.1005, "step": 12924 }, { "epoch": 0.43, "grad_norm": 0.7376437187194824, "learning_rate": 1.2475510883677902e-05, "loss": 2.169, "step": 12925 }, { "epoch": 0.43, "grad_norm": 0.722365140914917, "learning_rate": 1.247448105947055e-05, "loss": 2.1536, "step": 12926 }, { "epoch": 0.43, "grad_norm": 0.7430427670478821, "learning_rate": 1.247345120730796e-05, "loss": 2.0885, "step": 12927 }, { "epoch": 0.43, "grad_norm": 0.7104294300079346, "learning_rate": 1.2472421327201774e-05, "loss": 2.135, "step": 12928 }, { "epoch": 0.43, "grad_norm": 0.7420622110366821, "learning_rate": 1.2471391419163615e-05, "loss": 2.1251, "step": 12929 }, { "epoch": 0.43, "grad_norm": 0.7666173577308655, "learning_rate": 1.2470361483205125e-05, "loss": 2.0975, "step": 12930 }, { "epoch": 0.43, "grad_norm": 0.7457684874534607, "learning_rate": 1.2469331519337942e-05, "loss": 2.188, "step": 12931 }, { "epoch": 0.43, "grad_norm": 0.7165634036064148, "learning_rate": 1.2468301527573694e-05, "loss": 2.1345, "step": 12932 }, { "epoch": 0.43, "grad_norm": 0.708013117313385, "learning_rate": 1.2467271507924026e-05, "loss": 2.1187, "step": 12933 }, { "epoch": 0.43, "grad_norm": 0.7162837386131287, "learning_rate": 1.2466241460400567e-05, "loss": 2.0705, "step": 12934 }, { "epoch": 0.43, "grad_norm": 0.7297375798225403, "learning_rate": 1.2465211385014961e-05, "loss": 2.1602, "step": 12935 }, { "epoch": 0.43, "grad_norm": 0.7404950857162476, "learning_rate": 1.246418128177884e-05, "loss": 2.1462, "step": 12936 }, { "epoch": 0.43, "grad_norm": 0.7483963370323181, "learning_rate": 1.246315115070384e-05, "loss": 2.0395, "step": 12937 }, { "epoch": 0.43, "grad_norm": 0.744726300239563, "learning_rate": 1.2462120991801605e-05, "loss": 2.0548, "step": 12938 }, { "epoch": 0.43, "grad_norm": 0.7292644381523132, "learning_rate": 1.2461090805083774e-05, "loss": 2.0595, "step": 12939 }, { "epoch": 0.43, "grad_norm": 0.7418462634086609, "learning_rate": 1.2460060590561972e-05, "loss": 2.122, "step": 12940 }, { "epoch": 0.43, "grad_norm": 0.7440269589424133, "learning_rate": 1.2459030348247853e-05, "loss": 2.1191, "step": 12941 }, { "epoch": 0.43, "grad_norm": 0.7234408259391785, "learning_rate": 1.2458000078153052e-05, "loss": 2.0026, "step": 12942 }, { "epoch": 0.43, "grad_norm": 0.7557561993598938, "learning_rate": 1.24569697802892e-05, "loss": 2.0467, "step": 12943 }, { "epoch": 0.43, "grad_norm": 0.734746515750885, "learning_rate": 1.245593945466795e-05, "loss": 2.1099, "step": 12944 }, { "epoch": 0.43, "grad_norm": 0.742408275604248, "learning_rate": 1.2454909101300934e-05, "loss": 2.1039, "step": 12945 }, { "epoch": 0.43, "grad_norm": 0.7103086113929749, "learning_rate": 1.2453878720199792e-05, "loss": 2.0695, "step": 12946 }, { "epoch": 0.43, "grad_norm": 0.7477011680603027, "learning_rate": 1.2452848311376167e-05, "loss": 2.0753, "step": 12947 }, { "epoch": 0.43, "grad_norm": 0.743648111820221, "learning_rate": 1.2451817874841698e-05, "loss": 2.0901, "step": 12948 }, { "epoch": 0.43, "grad_norm": 0.709757924079895, "learning_rate": 1.245078741060803e-05, "loss": 2.0919, "step": 12949 }, { "epoch": 0.43, "grad_norm": 0.7295517325401306, "learning_rate": 1.24497569186868e-05, "loss": 2.1181, "step": 12950 }, { "epoch": 0.43, "grad_norm": 0.7430495023727417, "learning_rate": 1.2448726399089652e-05, "loss": 2.0155, "step": 12951 }, { "epoch": 0.43, "grad_norm": 0.7456165552139282, "learning_rate": 1.244769585182823e-05, "loss": 2.0471, "step": 12952 }, { "epoch": 0.43, "grad_norm": 0.75956791639328, "learning_rate": 1.2446665276914174e-05, "loss": 2.1012, "step": 12953 }, { "epoch": 0.43, "grad_norm": 0.7669713497161865, "learning_rate": 1.2445634674359126e-05, "loss": 2.1117, "step": 12954 }, { "epoch": 0.43, "grad_norm": 0.7304967641830444, "learning_rate": 1.2444604044174734e-05, "loss": 2.1065, "step": 12955 }, { "epoch": 0.43, "grad_norm": 0.7389522790908813, "learning_rate": 1.2443573386372636e-05, "loss": 2.0986, "step": 12956 }, { "epoch": 0.43, "grad_norm": 0.7395791411399841, "learning_rate": 1.2442542700964477e-05, "loss": 2.1404, "step": 12957 }, { "epoch": 0.43, "grad_norm": 0.730171263217926, "learning_rate": 1.2441511987961906e-05, "loss": 2.0623, "step": 12958 }, { "epoch": 0.43, "grad_norm": 0.7415866851806641, "learning_rate": 1.2440481247376559e-05, "loss": 2.0379, "step": 12959 }, { "epoch": 0.43, "grad_norm": 0.741508960723877, "learning_rate": 1.2439450479220088e-05, "loss": 2.1165, "step": 12960 }, { "epoch": 0.43, "grad_norm": 0.7084594368934631, "learning_rate": 1.2438419683504133e-05, "loss": 2.0718, "step": 12961 }, { "epoch": 0.43, "grad_norm": 0.7417922019958496, "learning_rate": 1.2437388860240342e-05, "loss": 2.0931, "step": 12962 }, { "epoch": 0.43, "grad_norm": 0.733667254447937, "learning_rate": 1.2436358009440359e-05, "loss": 2.0493, "step": 12963 }, { "epoch": 0.43, "grad_norm": 0.742735743522644, "learning_rate": 1.2435327131115835e-05, "loss": 2.0443, "step": 12964 }, { "epoch": 0.43, "grad_norm": 0.7243116497993469, "learning_rate": 1.2434296225278409e-05, "loss": 2.0384, "step": 12965 }, { "epoch": 0.43, "grad_norm": 0.737555980682373, "learning_rate": 1.2433265291939734e-05, "loss": 2.073, "step": 12966 }, { "epoch": 0.43, "grad_norm": 0.7015740275382996, "learning_rate": 1.243223433111145e-05, "loss": 2.0985, "step": 12967 }, { "epoch": 0.43, "grad_norm": 0.7190524339675903, "learning_rate": 1.243120334280521e-05, "loss": 2.1368, "step": 12968 }, { "epoch": 0.43, "grad_norm": 0.7631261348724365, "learning_rate": 1.2430172327032658e-05, "loss": 2.0654, "step": 12969 }, { "epoch": 0.43, "grad_norm": 0.7269861102104187, "learning_rate": 1.2429141283805445e-05, "loss": 2.0432, "step": 12970 }, { "epoch": 0.43, "grad_norm": 0.7421794533729553, "learning_rate": 1.2428110213135215e-05, "loss": 2.1662, "step": 12971 }, { "epoch": 0.43, "grad_norm": 0.7735297679901123, "learning_rate": 1.2427079115033623e-05, "loss": 2.0923, "step": 12972 }, { "epoch": 0.43, "grad_norm": 0.7318286299705505, "learning_rate": 1.2426047989512308e-05, "loss": 2.0134, "step": 12973 }, { "epoch": 0.43, "grad_norm": 0.7278672456741333, "learning_rate": 1.2425016836582928e-05, "loss": 2.0582, "step": 12974 }, { "epoch": 0.43, "grad_norm": 0.7466635704040527, "learning_rate": 1.2423985656257132e-05, "loss": 2.0925, "step": 12975 }, { "epoch": 0.43, "grad_norm": 0.7345931529998779, "learning_rate": 1.242295444854656e-05, "loss": 2.1243, "step": 12976 }, { "epoch": 0.43, "grad_norm": 0.7147917151451111, "learning_rate": 1.2421923213462874e-05, "loss": 2.0957, "step": 12977 }, { "epoch": 0.43, "grad_norm": 0.7575796246528625, "learning_rate": 1.2420891951017717e-05, "loss": 2.1344, "step": 12978 }, { "epoch": 0.43, "grad_norm": 0.730445921421051, "learning_rate": 1.2419860661222743e-05, "loss": 2.1346, "step": 12979 }, { "epoch": 0.43, "grad_norm": 0.7233250737190247, "learning_rate": 1.2418829344089598e-05, "loss": 2.0304, "step": 12980 }, { "epoch": 0.43, "grad_norm": 0.7203359603881836, "learning_rate": 1.241779799962994e-05, "loss": 2.082, "step": 12981 }, { "epoch": 0.43, "grad_norm": 0.7810947299003601, "learning_rate": 1.2416766627855415e-05, "loss": 2.1888, "step": 12982 }, { "epoch": 0.43, "grad_norm": 0.7359386682510376, "learning_rate": 1.2415735228777676e-05, "loss": 1.9999, "step": 12983 }, { "epoch": 0.43, "grad_norm": 0.711155354976654, "learning_rate": 1.2414703802408378e-05, "loss": 2.0707, "step": 12984 }, { "epoch": 0.43, "grad_norm": 0.7029881477355957, "learning_rate": 1.2413672348759171e-05, "loss": 2.0587, "step": 12985 }, { "epoch": 0.43, "grad_norm": 0.7210840582847595, "learning_rate": 1.2412640867841712e-05, "loss": 2.0736, "step": 12986 }, { "epoch": 0.43, "grad_norm": 0.7426431179046631, "learning_rate": 1.2411609359667645e-05, "loss": 2.1838, "step": 12987 }, { "epoch": 0.43, "grad_norm": 0.7290809750556946, "learning_rate": 1.2410577824248632e-05, "loss": 2.0765, "step": 12988 }, { "epoch": 0.43, "grad_norm": 0.7348223328590393, "learning_rate": 1.2409546261596318e-05, "loss": 2.0899, "step": 12989 }, { "epoch": 0.43, "grad_norm": 0.7185832262039185, "learning_rate": 1.2408514671722372e-05, "loss": 2.0783, "step": 12990 }, { "epoch": 0.43, "grad_norm": 0.7139955759048462, "learning_rate": 1.2407483054638432e-05, "loss": 2.1295, "step": 12991 }, { "epoch": 0.43, "grad_norm": 0.744611918926239, "learning_rate": 1.240645141035616e-05, "loss": 2.081, "step": 12992 }, { "epoch": 0.43, "grad_norm": 0.7355293035507202, "learning_rate": 1.2405419738887213e-05, "loss": 2.089, "step": 12993 }, { "epoch": 0.43, "grad_norm": 0.7547792196273804, "learning_rate": 1.240438804024324e-05, "loss": 2.0476, "step": 12994 }, { "epoch": 0.43, "grad_norm": 0.7759978771209717, "learning_rate": 1.2403356314435901e-05, "loss": 2.1206, "step": 12995 }, { "epoch": 0.43, "grad_norm": 0.7384457588195801, "learning_rate": 1.2402324561476854e-05, "loss": 2.1033, "step": 12996 }, { "epoch": 0.43, "grad_norm": 0.7192955613136292, "learning_rate": 1.2401292781377749e-05, "loss": 2.1075, "step": 12997 }, { "epoch": 0.43, "grad_norm": 0.7396403551101685, "learning_rate": 1.2400260974150244e-05, "loss": 2.1064, "step": 12998 }, { "epoch": 0.43, "grad_norm": 0.7467607855796814, "learning_rate": 1.2399229139806004e-05, "loss": 2.0501, "step": 12999 }, { "epoch": 0.43, "grad_norm": 0.7314237952232361, "learning_rate": 1.2398197278356671e-05, "loss": 2.094, "step": 13000 }, { "epoch": 0.43, "grad_norm": 0.7604562044143677, "learning_rate": 1.2397165389813917e-05, "loss": 2.05, "step": 13001 }, { "epoch": 0.43, "grad_norm": 0.7421305775642395, "learning_rate": 1.239613347418939e-05, "loss": 2.0682, "step": 13002 }, { "epoch": 0.43, "grad_norm": 0.7043124437332153, "learning_rate": 1.2395101531494748e-05, "loss": 2.0826, "step": 13003 }, { "epoch": 0.43, "grad_norm": 0.7208982110023499, "learning_rate": 1.239406956174166e-05, "loss": 2.1075, "step": 13004 }, { "epoch": 0.43, "grad_norm": 0.7237157821655273, "learning_rate": 1.2393037564941773e-05, "loss": 2.1099, "step": 13005 }, { "epoch": 0.43, "grad_norm": 0.7213141918182373, "learning_rate": 1.239200554110675e-05, "loss": 2.0778, "step": 13006 }, { "epoch": 0.43, "grad_norm": 0.740760326385498, "learning_rate": 1.2390973490248253e-05, "loss": 2.0622, "step": 13007 }, { "epoch": 0.43, "grad_norm": 0.7101807594299316, "learning_rate": 1.2389941412377935e-05, "loss": 2.0304, "step": 13008 }, { "epoch": 0.43, "grad_norm": 0.7369447946548462, "learning_rate": 1.2388909307507462e-05, "loss": 2.0873, "step": 13009 }, { "epoch": 0.43, "grad_norm": 0.7201719880104065, "learning_rate": 1.2387877175648493e-05, "loss": 2.0637, "step": 13010 }, { "epoch": 0.43, "grad_norm": 0.735668420791626, "learning_rate": 1.238684501681268e-05, "loss": 2.0916, "step": 13011 }, { "epoch": 0.43, "grad_norm": 0.7282198667526245, "learning_rate": 1.2385812831011699e-05, "loss": 2.0079, "step": 13012 }, { "epoch": 0.43, "grad_norm": 0.733134925365448, "learning_rate": 1.23847806182572e-05, "loss": 2.0684, "step": 13013 }, { "epoch": 0.43, "grad_norm": 0.7027309536933899, "learning_rate": 1.2383748378560845e-05, "loss": 2.0627, "step": 13014 }, { "epoch": 0.43, "grad_norm": 0.7507836818695068, "learning_rate": 1.23827161119343e-05, "loss": 2.1422, "step": 13015 }, { "epoch": 0.43, "grad_norm": 0.7230039238929749, "learning_rate": 1.2381683818389226e-05, "loss": 2.0719, "step": 13016 }, { "epoch": 0.43, "grad_norm": 0.7450369000434875, "learning_rate": 1.2380651497937282e-05, "loss": 2.1228, "step": 13017 }, { "epoch": 0.43, "grad_norm": 0.7306612133979797, "learning_rate": 1.2379619150590131e-05, "loss": 2.0863, "step": 13018 }, { "epoch": 0.43, "grad_norm": 0.7152718901634216, "learning_rate": 1.237858677635944e-05, "loss": 2.0963, "step": 13019 }, { "epoch": 0.43, "grad_norm": 0.7400249242782593, "learning_rate": 1.237755437525687e-05, "loss": 2.1085, "step": 13020 }, { "epoch": 0.43, "grad_norm": 0.7396306991577148, "learning_rate": 1.2376521947294084e-05, "loss": 2.102, "step": 13021 }, { "epoch": 0.43, "grad_norm": 0.721130907535553, "learning_rate": 1.2375489492482743e-05, "loss": 2.0543, "step": 13022 }, { "epoch": 0.43, "grad_norm": 0.7092298269271851, "learning_rate": 1.2374457010834516e-05, "loss": 2.1064, "step": 13023 }, { "epoch": 0.43, "grad_norm": 0.7501256465911865, "learning_rate": 1.2373424502361067e-05, "loss": 2.1203, "step": 13024 }, { "epoch": 0.43, "grad_norm": 0.7504392862319946, "learning_rate": 1.2372391967074056e-05, "loss": 2.1429, "step": 13025 }, { "epoch": 0.43, "grad_norm": 0.7366483807563782, "learning_rate": 1.2371359404985152e-05, "loss": 2.1198, "step": 13026 }, { "epoch": 0.43, "grad_norm": 0.7139244079589844, "learning_rate": 1.237032681610602e-05, "loss": 2.0325, "step": 13027 }, { "epoch": 0.43, "grad_norm": 0.7296825051307678, "learning_rate": 1.2369294200448323e-05, "loss": 2.0789, "step": 13028 }, { "epoch": 0.43, "grad_norm": 0.7670575380325317, "learning_rate": 1.2368261558023732e-05, "loss": 2.0786, "step": 13029 }, { "epoch": 0.43, "grad_norm": 0.7251813411712646, "learning_rate": 1.2367228888843905e-05, "loss": 2.0218, "step": 13030 }, { "epoch": 0.43, "grad_norm": 0.7382007837295532, "learning_rate": 1.2366196192920516e-05, "loss": 2.122, "step": 13031 }, { "epoch": 0.43, "grad_norm": 0.7294771075248718, "learning_rate": 1.2365163470265232e-05, "loss": 2.1225, "step": 13032 }, { "epoch": 0.43, "grad_norm": 0.7454811334609985, "learning_rate": 1.236413072088971e-05, "loss": 2.1003, "step": 13033 }, { "epoch": 0.43, "grad_norm": 0.7262408137321472, "learning_rate": 1.2363097944805629e-05, "loss": 2.0271, "step": 13034 }, { "epoch": 0.43, "grad_norm": 0.7219258546829224, "learning_rate": 1.2362065142024654e-05, "loss": 2.1094, "step": 13035 }, { "epoch": 0.43, "grad_norm": 0.7461423277854919, "learning_rate": 1.2361032312558447e-05, "loss": 2.0645, "step": 13036 }, { "epoch": 0.43, "grad_norm": 0.759750485420227, "learning_rate": 1.2359999456418685e-05, "loss": 2.1249, "step": 13037 }, { "epoch": 0.43, "grad_norm": 0.7096912860870361, "learning_rate": 1.235896657361703e-05, "loss": 2.0923, "step": 13038 }, { "epoch": 0.43, "grad_norm": 0.7406792640686035, "learning_rate": 1.2357933664165154e-05, "loss": 2.0667, "step": 13039 }, { "epoch": 0.43, "grad_norm": 0.7547255754470825, "learning_rate": 1.2356900728074726e-05, "loss": 2.0844, "step": 13040 }, { "epoch": 0.43, "grad_norm": 0.7590304017066956, "learning_rate": 1.2355867765357412e-05, "loss": 2.1176, "step": 13041 }, { "epoch": 0.43, "grad_norm": 0.7223701477050781, "learning_rate": 1.2354834776024886e-05, "loss": 2.1008, "step": 13042 }, { "epoch": 0.43, "grad_norm": 0.7323155999183655, "learning_rate": 1.235380176008882e-05, "loss": 2.1232, "step": 13043 }, { "epoch": 0.43, "grad_norm": 0.6999065279960632, "learning_rate": 1.2352768717560875e-05, "loss": 2.102, "step": 13044 }, { "epoch": 0.43, "grad_norm": 0.7324925661087036, "learning_rate": 1.2351735648452732e-05, "loss": 2.0873, "step": 13045 }, { "epoch": 0.43, "grad_norm": 0.7423679232597351, "learning_rate": 1.235070255277606e-05, "loss": 2.0432, "step": 13046 }, { "epoch": 0.43, "grad_norm": 0.7264320254325867, "learning_rate": 1.2349669430542522e-05, "loss": 2.028, "step": 13047 }, { "epoch": 0.43, "grad_norm": 0.7267599701881409, "learning_rate": 1.2348636281763801e-05, "loss": 1.9875, "step": 13048 }, { "epoch": 0.43, "grad_norm": 0.7498978972434998, "learning_rate": 1.234760310645156e-05, "loss": 2.1167, "step": 13049 }, { "epoch": 0.43, "grad_norm": 0.710991621017456, "learning_rate": 1.2346569904617477e-05, "loss": 2.1737, "step": 13050 }, { "epoch": 0.43, "grad_norm": 0.7034395337104797, "learning_rate": 1.2345536676273222e-05, "loss": 2.0488, "step": 13051 }, { "epoch": 0.43, "grad_norm": 0.72297602891922, "learning_rate": 1.2344503421430469e-05, "loss": 2.0898, "step": 13052 }, { "epoch": 0.43, "grad_norm": 0.7451843619346619, "learning_rate": 1.234347014010089e-05, "loss": 2.0423, "step": 13053 }, { "epoch": 0.43, "grad_norm": 0.7730289697647095, "learning_rate": 1.234243683229616e-05, "loss": 2.0815, "step": 13054 }, { "epoch": 0.43, "grad_norm": 0.733791708946228, "learning_rate": 1.2341403498027948e-05, "loss": 2.1385, "step": 13055 }, { "epoch": 0.43, "grad_norm": 0.7163063287734985, "learning_rate": 1.2340370137307933e-05, "loss": 2.0514, "step": 13056 }, { "epoch": 0.43, "grad_norm": 0.7582456469535828, "learning_rate": 1.233933675014779e-05, "loss": 2.1049, "step": 13057 }, { "epoch": 0.43, "grad_norm": 0.7381325364112854, "learning_rate": 1.2338303336559187e-05, "loss": 2.1334, "step": 13058 }, { "epoch": 0.43, "grad_norm": 0.7408602833747864, "learning_rate": 1.233726989655381e-05, "loss": 2.0824, "step": 13059 }, { "epoch": 0.43, "grad_norm": 0.7487793564796448, "learning_rate": 1.2336236430143323e-05, "loss": 2.027, "step": 13060 }, { "epoch": 0.43, "grad_norm": 0.7571076154708862, "learning_rate": 1.2335202937339406e-05, "loss": 2.0531, "step": 13061 }, { "epoch": 0.43, "grad_norm": 0.7500000596046448, "learning_rate": 1.2334169418153733e-05, "loss": 2.0554, "step": 13062 }, { "epoch": 0.43, "grad_norm": 0.7484603524208069, "learning_rate": 1.2333135872597985e-05, "loss": 2.132, "step": 13063 }, { "epoch": 0.43, "grad_norm": 0.7276855111122131, "learning_rate": 1.2332102300683835e-05, "loss": 1.9919, "step": 13064 }, { "epoch": 0.43, "grad_norm": 0.7594392895698547, "learning_rate": 1.233106870242296e-05, "loss": 2.0513, "step": 13065 }, { "epoch": 0.43, "grad_norm": 0.7225670218467712, "learning_rate": 1.2330035077827033e-05, "loss": 2.0747, "step": 13066 }, { "epoch": 0.43, "grad_norm": 0.7556294202804565, "learning_rate": 1.232900142690774e-05, "loss": 2.0421, "step": 13067 }, { "epoch": 0.43, "grad_norm": 0.791875958442688, "learning_rate": 1.2327967749676755e-05, "loss": 2.1779, "step": 13068 }, { "epoch": 0.43, "grad_norm": 0.7489445209503174, "learning_rate": 1.2326934046145749e-05, "loss": 2.0885, "step": 13069 }, { "epoch": 0.43, "grad_norm": 0.7299328446388245, "learning_rate": 1.232590031632641e-05, "loss": 2.1004, "step": 13070 }, { "epoch": 0.43, "grad_norm": 0.7820027470588684, "learning_rate": 1.232486656023041e-05, "loss": 2.1207, "step": 13071 }, { "epoch": 0.43, "grad_norm": 0.7115002274513245, "learning_rate": 1.2323832777869434e-05, "loss": 2.0616, "step": 13072 }, { "epoch": 0.43, "grad_norm": 0.7216504216194153, "learning_rate": 1.2322798969255154e-05, "loss": 2.1014, "step": 13073 }, { "epoch": 0.43, "grad_norm": 0.7520599961280823, "learning_rate": 1.2321765134399253e-05, "loss": 2.0291, "step": 13074 }, { "epoch": 0.44, "grad_norm": 0.7540664076805115, "learning_rate": 1.2320731273313409e-05, "loss": 2.086, "step": 13075 }, { "epoch": 0.44, "grad_norm": 0.6993480324745178, "learning_rate": 1.2319697386009304e-05, "loss": 2.0171, "step": 13076 }, { "epoch": 0.44, "grad_norm": 0.7562140226364136, "learning_rate": 1.2318663472498617e-05, "loss": 2.1042, "step": 13077 }, { "epoch": 0.44, "grad_norm": 0.7461341023445129, "learning_rate": 1.2317629532793029e-05, "loss": 2.1262, "step": 13078 }, { "epoch": 0.44, "grad_norm": 0.7607473731040955, "learning_rate": 1.2316595566904223e-05, "loss": 2.1007, "step": 13079 }, { "epoch": 0.44, "grad_norm": 0.7403892278671265, "learning_rate": 1.2315561574843874e-05, "loss": 2.0591, "step": 13080 }, { "epoch": 0.44, "grad_norm": 0.7353152632713318, "learning_rate": 1.2314527556623674e-05, "loss": 2.0821, "step": 13081 }, { "epoch": 0.44, "grad_norm": 0.7451971173286438, "learning_rate": 1.231349351225529e-05, "loss": 1.9626, "step": 13082 }, { "epoch": 0.44, "grad_norm": 0.7552878856658936, "learning_rate": 1.2312459441750419e-05, "loss": 2.0459, "step": 13083 }, { "epoch": 0.44, "grad_norm": 0.7366985082626343, "learning_rate": 1.2311425345120734e-05, "loss": 2.1556, "step": 13084 }, { "epoch": 0.44, "grad_norm": 0.7105645537376404, "learning_rate": 1.231039122237792e-05, "loss": 2.1362, "step": 13085 }, { "epoch": 0.44, "grad_norm": 0.7527480125427246, "learning_rate": 1.230935707353366e-05, "loss": 2.1095, "step": 13086 }, { "epoch": 0.44, "grad_norm": 0.7542600631713867, "learning_rate": 1.2308322898599636e-05, "loss": 2.0581, "step": 13087 }, { "epoch": 0.44, "grad_norm": 0.8106100559234619, "learning_rate": 1.2307288697587532e-05, "loss": 2.0799, "step": 13088 }, { "epoch": 0.44, "grad_norm": 0.7616980075836182, "learning_rate": 1.2306254470509033e-05, "loss": 2.1161, "step": 13089 }, { "epoch": 0.44, "grad_norm": 0.7653668522834778, "learning_rate": 1.2305220217375824e-05, "loss": 2.1513, "step": 13090 }, { "epoch": 0.44, "grad_norm": 0.7590305805206299, "learning_rate": 1.2304185938199588e-05, "loss": 2.0824, "step": 13091 }, { "epoch": 0.44, "grad_norm": 0.7175247073173523, "learning_rate": 1.2303151632992011e-05, "loss": 2.0704, "step": 13092 }, { "epoch": 0.44, "grad_norm": 0.7056856155395508, "learning_rate": 1.2302117301764773e-05, "loss": 2.0668, "step": 13093 }, { "epoch": 0.44, "grad_norm": 0.7867297530174255, "learning_rate": 1.2301082944529567e-05, "loss": 2.0789, "step": 13094 }, { "epoch": 0.44, "grad_norm": 0.7226760387420654, "learning_rate": 1.2300048561298072e-05, "loss": 2.1711, "step": 13095 }, { "epoch": 0.44, "grad_norm": 0.7539404630661011, "learning_rate": 1.2299014152081974e-05, "loss": 2.0695, "step": 13096 }, { "epoch": 0.44, "grad_norm": 0.7063410878181458, "learning_rate": 1.2297979716892968e-05, "loss": 2.0958, "step": 13097 }, { "epoch": 0.44, "grad_norm": 0.7316232919692993, "learning_rate": 1.2296945255742727e-05, "loss": 2.092, "step": 13098 }, { "epoch": 0.44, "grad_norm": 0.7146007418632507, "learning_rate": 1.2295910768642948e-05, "loss": 2.1059, "step": 13099 }, { "epoch": 0.44, "grad_norm": 0.7345675230026245, "learning_rate": 1.2294876255605315e-05, "loss": 2.0916, "step": 13100 }, { "epoch": 0.44, "grad_norm": 0.7280321717262268, "learning_rate": 1.2293841716641513e-05, "loss": 2.0848, "step": 13101 }, { "epoch": 0.44, "grad_norm": 0.7396214604377747, "learning_rate": 1.2292807151763233e-05, "loss": 2.1429, "step": 13102 }, { "epoch": 0.44, "grad_norm": 0.733751654624939, "learning_rate": 1.2291772560982162e-05, "loss": 2.029, "step": 13103 }, { "epoch": 0.44, "grad_norm": 0.7273739576339722, "learning_rate": 1.2290737944309986e-05, "loss": 2.1023, "step": 13104 }, { "epoch": 0.44, "grad_norm": 0.7427027821540833, "learning_rate": 1.2289703301758398e-05, "loss": 2.1286, "step": 13105 }, { "epoch": 0.44, "grad_norm": 0.6845645904541016, "learning_rate": 1.2288668633339083e-05, "loss": 2.0917, "step": 13106 }, { "epoch": 0.44, "grad_norm": 0.7375481128692627, "learning_rate": 1.2287633939063729e-05, "loss": 2.0495, "step": 13107 }, { "epoch": 0.44, "grad_norm": 0.7013950943946838, "learning_rate": 1.2286599218944028e-05, "loss": 2.0518, "step": 13108 }, { "epoch": 0.44, "grad_norm": 0.749984085559845, "learning_rate": 1.2285564472991671e-05, "loss": 2.1493, "step": 13109 }, { "epoch": 0.44, "grad_norm": 0.7206103801727295, "learning_rate": 1.2284529701218345e-05, "loss": 2.1019, "step": 13110 }, { "epoch": 0.44, "grad_norm": 0.7308148741722107, "learning_rate": 1.228349490363574e-05, "loss": 2.0938, "step": 13111 }, { "epoch": 0.44, "grad_norm": 0.7367468476295471, "learning_rate": 1.2282460080255549e-05, "loss": 2.0537, "step": 13112 }, { "epoch": 0.44, "grad_norm": 0.7396413683891296, "learning_rate": 1.228142523108946e-05, "loss": 2.077, "step": 13113 }, { "epoch": 0.44, "grad_norm": 0.7315376996994019, "learning_rate": 1.2280390356149173e-05, "loss": 2.0684, "step": 13114 }, { "epoch": 0.44, "grad_norm": 0.7459783554077148, "learning_rate": 1.2279355455446363e-05, "loss": 2.147, "step": 13115 }, { "epoch": 0.44, "grad_norm": 0.7214423418045044, "learning_rate": 1.2278320528992738e-05, "loss": 2.0684, "step": 13116 }, { "epoch": 0.44, "grad_norm": 0.7454709410667419, "learning_rate": 1.2277285576799978e-05, "loss": 2.1455, "step": 13117 }, { "epoch": 0.44, "grad_norm": 0.7578288912773132, "learning_rate": 1.227625059887978e-05, "loss": 2.0969, "step": 13118 }, { "epoch": 0.44, "grad_norm": 0.7165650129318237, "learning_rate": 1.2275215595243838e-05, "loss": 2.0942, "step": 13119 }, { "epoch": 0.44, "grad_norm": 0.7278130650520325, "learning_rate": 1.2274180565903846e-05, "loss": 2.0724, "step": 13120 }, { "epoch": 0.44, "grad_norm": 0.7341870665550232, "learning_rate": 1.227314551087149e-05, "loss": 2.0838, "step": 13121 }, { "epoch": 0.44, "grad_norm": 0.7364649176597595, "learning_rate": 1.2272110430158472e-05, "loss": 2.0031, "step": 13122 }, { "epoch": 0.44, "grad_norm": 0.7159063220024109, "learning_rate": 1.2271075323776482e-05, "loss": 2.0438, "step": 13123 }, { "epoch": 0.44, "grad_norm": 0.7325496077537537, "learning_rate": 1.2270040191737214e-05, "loss": 2.1087, "step": 13124 }, { "epoch": 0.44, "grad_norm": 0.7154690623283386, "learning_rate": 1.2269005034052362e-05, "loss": 2.0633, "step": 13125 }, { "epoch": 0.44, "grad_norm": 0.6962517499923706, "learning_rate": 1.226796985073362e-05, "loss": 2.0617, "step": 13126 }, { "epoch": 0.44, "grad_norm": 0.7379069924354553, "learning_rate": 1.2266934641792682e-05, "loss": 2.1081, "step": 13127 }, { "epoch": 0.44, "grad_norm": 0.7530523538589478, "learning_rate": 1.2265899407241248e-05, "loss": 2.0473, "step": 13128 }, { "epoch": 0.44, "grad_norm": 0.7387613654136658, "learning_rate": 1.2264864147091007e-05, "loss": 2.1031, "step": 13129 }, { "epoch": 0.44, "grad_norm": 0.7239238023757935, "learning_rate": 1.2263828861353661e-05, "loss": 2.0882, "step": 13130 }, { "epoch": 0.44, "grad_norm": 0.7356334328651428, "learning_rate": 1.2262793550040903e-05, "loss": 2.044, "step": 13131 }, { "epoch": 0.44, "grad_norm": 0.7270652651786804, "learning_rate": 1.226175821316443e-05, "loss": 2.0914, "step": 13132 }, { "epoch": 0.44, "grad_norm": 0.7258672118186951, "learning_rate": 1.2260722850735938e-05, "loss": 2.079, "step": 13133 }, { "epoch": 0.44, "grad_norm": 0.7671827077865601, "learning_rate": 1.2259687462767125e-05, "loss": 2.1014, "step": 13134 }, { "epoch": 0.44, "grad_norm": 0.7275815606117249, "learning_rate": 1.2258652049269684e-05, "loss": 2.1284, "step": 13135 }, { "epoch": 0.44, "grad_norm": 0.7642878293991089, "learning_rate": 1.225761661025532e-05, "loss": 2.154, "step": 13136 }, { "epoch": 0.44, "grad_norm": 0.7268238067626953, "learning_rate": 1.2256581145735722e-05, "loss": 2.1164, "step": 13137 }, { "epoch": 0.44, "grad_norm": 0.7394101619720459, "learning_rate": 1.2255545655722595e-05, "loss": 2.069, "step": 13138 }, { "epoch": 0.44, "grad_norm": 0.7537820935249329, "learning_rate": 1.2254510140227638e-05, "loss": 2.0637, "step": 13139 }, { "epoch": 0.44, "grad_norm": 0.7274131774902344, "learning_rate": 1.2253474599262543e-05, "loss": 2.0899, "step": 13140 }, { "epoch": 0.44, "grad_norm": 0.7554284334182739, "learning_rate": 1.2252439032839015e-05, "loss": 2.1211, "step": 13141 }, { "epoch": 0.44, "grad_norm": 0.7334622144699097, "learning_rate": 1.225140344096875e-05, "loss": 2.0343, "step": 13142 }, { "epoch": 0.44, "grad_norm": 0.7156754732131958, "learning_rate": 1.2250367823663447e-05, "loss": 2.1048, "step": 13143 }, { "epoch": 0.44, "grad_norm": 0.7202135324478149, "learning_rate": 1.2249332180934807e-05, "loss": 2.0942, "step": 13144 }, { "epoch": 0.44, "grad_norm": 0.7382623553276062, "learning_rate": 1.2248296512794532e-05, "loss": 2.0994, "step": 13145 }, { "epoch": 0.44, "grad_norm": 0.7413188219070435, "learning_rate": 1.2247260819254322e-05, "loss": 2.093, "step": 13146 }, { "epoch": 0.44, "grad_norm": 0.7307173609733582, "learning_rate": 1.2246225100325875e-05, "loss": 2.0524, "step": 13147 }, { "epoch": 0.44, "grad_norm": 0.7691468000411987, "learning_rate": 1.224518935602089e-05, "loss": 2.1094, "step": 13148 }, { "epoch": 0.44, "grad_norm": 0.731715738773346, "learning_rate": 1.2244153586351074e-05, "loss": 2.1303, "step": 13149 }, { "epoch": 0.44, "grad_norm": 0.714421272277832, "learning_rate": 1.2243117791328127e-05, "loss": 2.0974, "step": 13150 }, { "epoch": 0.44, "grad_norm": 0.72210693359375, "learning_rate": 1.2242081970963746e-05, "loss": 2.0474, "step": 13151 }, { "epoch": 0.44, "grad_norm": 0.7014721632003784, "learning_rate": 1.2241046125269642e-05, "loss": 2.0614, "step": 13152 }, { "epoch": 0.44, "grad_norm": 0.7401075959205627, "learning_rate": 1.224001025425751e-05, "loss": 1.9836, "step": 13153 }, { "epoch": 0.44, "grad_norm": 0.7199180126190186, "learning_rate": 1.2238974357939056e-05, "loss": 2.1379, "step": 13154 }, { "epoch": 0.44, "grad_norm": 0.762093186378479, "learning_rate": 1.2237938436325978e-05, "loss": 2.0546, "step": 13155 }, { "epoch": 0.44, "grad_norm": 0.7383498549461365, "learning_rate": 1.2236902489429988e-05, "loss": 2.0278, "step": 13156 }, { "epoch": 0.44, "grad_norm": 0.7315771579742432, "learning_rate": 1.223586651726278e-05, "loss": 2.0895, "step": 13157 }, { "epoch": 0.44, "grad_norm": 0.7298175096511841, "learning_rate": 1.2234830519836067e-05, "loss": 2.0403, "step": 13158 }, { "epoch": 0.44, "grad_norm": 0.7435483932495117, "learning_rate": 1.2233794497161545e-05, "loss": 2.0874, "step": 13159 }, { "epoch": 0.44, "grad_norm": 0.7397019267082214, "learning_rate": 1.2232758449250923e-05, "loss": 2.1538, "step": 13160 }, { "epoch": 0.44, "grad_norm": 0.7528139352798462, "learning_rate": 1.2231722376115907e-05, "loss": 2.1492, "step": 13161 }, { "epoch": 0.44, "grad_norm": 0.7568425536155701, "learning_rate": 1.2230686277768195e-05, "loss": 2.1088, "step": 13162 }, { "epoch": 0.44, "grad_norm": 0.7638435363769531, "learning_rate": 1.2229650154219501e-05, "loss": 2.068, "step": 13163 }, { "epoch": 0.44, "grad_norm": 0.7074065208435059, "learning_rate": 1.2228614005481525e-05, "loss": 2.1211, "step": 13164 }, { "epoch": 0.44, "grad_norm": 0.7352027297019958, "learning_rate": 1.2227577831565973e-05, "loss": 2.1219, "step": 13165 }, { "epoch": 0.44, "grad_norm": 0.7174952030181885, "learning_rate": 1.2226541632484553e-05, "loss": 2.0801, "step": 13166 }, { "epoch": 0.44, "grad_norm": 0.7294105291366577, "learning_rate": 1.222550540824897e-05, "loss": 2.0844, "step": 13167 }, { "epoch": 0.44, "grad_norm": 0.7511965036392212, "learning_rate": 1.2224469158870931e-05, "loss": 2.1647, "step": 13168 }, { "epoch": 0.44, "grad_norm": 0.722353458404541, "learning_rate": 1.2223432884362143e-05, "loss": 2.0466, "step": 13169 }, { "epoch": 0.44, "grad_norm": 0.7366239428520203, "learning_rate": 1.2222396584734314e-05, "loss": 2.1238, "step": 13170 }, { "epoch": 0.44, "grad_norm": 0.7566250562667847, "learning_rate": 1.2221360259999149e-05, "loss": 2.1302, "step": 13171 }, { "epoch": 0.44, "grad_norm": 0.75490802526474, "learning_rate": 1.222032391016836e-05, "loss": 2.1209, "step": 13172 }, { "epoch": 0.44, "grad_norm": 0.7112650275230408, "learning_rate": 1.221928753525365e-05, "loss": 2.1355, "step": 13173 }, { "epoch": 0.44, "grad_norm": 0.7453727126121521, "learning_rate": 1.2218251135266734e-05, "loss": 2.1415, "step": 13174 }, { "epoch": 0.44, "grad_norm": 0.7279874086380005, "learning_rate": 1.2217214710219315e-05, "loss": 2.0881, "step": 13175 }, { "epoch": 0.44, "grad_norm": 0.683152437210083, "learning_rate": 1.2216178260123103e-05, "loss": 2.009, "step": 13176 }, { "epoch": 0.44, "grad_norm": 0.7088492512702942, "learning_rate": 1.2215141784989808e-05, "loss": 2.1278, "step": 13177 }, { "epoch": 0.44, "grad_norm": 0.709507167339325, "learning_rate": 1.221410528483114e-05, "loss": 2.0926, "step": 13178 }, { "epoch": 0.44, "grad_norm": 0.7219312787055969, "learning_rate": 1.2213068759658806e-05, "loss": 2.0901, "step": 13179 }, { "epoch": 0.44, "grad_norm": 0.7445300817489624, "learning_rate": 1.2212032209484521e-05, "loss": 2.1051, "step": 13180 }, { "epoch": 0.44, "grad_norm": 0.7414700984954834, "learning_rate": 1.2210995634319991e-05, "loss": 2.089, "step": 13181 }, { "epoch": 0.44, "grad_norm": 0.7115605473518372, "learning_rate": 1.2209959034176928e-05, "loss": 2.1008, "step": 13182 }, { "epoch": 0.44, "grad_norm": 0.7357711791992188, "learning_rate": 1.2208922409067045e-05, "loss": 2.0417, "step": 13183 }, { "epoch": 0.44, "grad_norm": 0.7760270833969116, "learning_rate": 1.2207885759002047e-05, "loss": 2.139, "step": 13184 }, { "epoch": 0.44, "grad_norm": 0.7291125059127808, "learning_rate": 1.2206849083993654e-05, "loss": 2.0478, "step": 13185 }, { "epoch": 0.44, "grad_norm": 0.7175349593162537, "learning_rate": 1.2205812384053568e-05, "loss": 2.0609, "step": 13186 }, { "epoch": 0.44, "grad_norm": 0.7666062116622925, "learning_rate": 1.2204775659193513e-05, "loss": 2.0536, "step": 13187 }, { "epoch": 0.44, "grad_norm": 0.7302495241165161, "learning_rate": 1.2203738909425191e-05, "loss": 2.0662, "step": 13188 }, { "epoch": 0.44, "grad_norm": 0.7195479869842529, "learning_rate": 1.2202702134760318e-05, "loss": 2.0583, "step": 13189 }, { "epoch": 0.44, "grad_norm": 0.7674867510795593, "learning_rate": 1.2201665335210609e-05, "loss": 2.0538, "step": 13190 }, { "epoch": 0.44, "grad_norm": 0.7009322047233582, "learning_rate": 1.2200628510787774e-05, "loss": 2.0639, "step": 13191 }, { "epoch": 0.44, "grad_norm": 0.7210790514945984, "learning_rate": 1.2199591661503527e-05, "loss": 2.057, "step": 13192 }, { "epoch": 0.44, "grad_norm": 0.7276585102081299, "learning_rate": 1.2198554787369586e-05, "loss": 2.0125, "step": 13193 }, { "epoch": 0.44, "grad_norm": 0.751298189163208, "learning_rate": 1.2197517888397657e-05, "loss": 2.1108, "step": 13194 }, { "epoch": 0.44, "grad_norm": 0.7229934334754944, "learning_rate": 1.2196480964599461e-05, "loss": 2.1031, "step": 13195 }, { "epoch": 0.44, "grad_norm": 0.7401496767997742, "learning_rate": 1.2195444015986712e-05, "loss": 2.1331, "step": 13196 }, { "epoch": 0.44, "grad_norm": 0.7509361505508423, "learning_rate": 1.219440704257112e-05, "loss": 2.1087, "step": 13197 }, { "epoch": 0.44, "grad_norm": 0.7497627139091492, "learning_rate": 1.2193370044364405e-05, "loss": 2.1031, "step": 13198 }, { "epoch": 0.44, "grad_norm": 0.7191805243492126, "learning_rate": 1.2192333021378282e-05, "loss": 2.0454, "step": 13199 }, { "epoch": 0.44, "grad_norm": 0.748418390750885, "learning_rate": 1.2191295973624463e-05, "loss": 2.1085, "step": 13200 }, { "epoch": 0.44, "grad_norm": 0.712529182434082, "learning_rate": 1.2190258901114667e-05, "loss": 2.1191, "step": 13201 }, { "epoch": 0.44, "grad_norm": 0.7392669320106506, "learning_rate": 1.2189221803860609e-05, "loss": 2.1181, "step": 13202 }, { "epoch": 0.44, "grad_norm": 0.7241291403770447, "learning_rate": 1.2188184681874007e-05, "loss": 2.129, "step": 13203 }, { "epoch": 0.44, "grad_norm": 0.769792914390564, "learning_rate": 1.2187147535166577e-05, "loss": 2.097, "step": 13204 }, { "epoch": 0.44, "grad_norm": 0.7727922797203064, "learning_rate": 1.2186110363750035e-05, "loss": 2.1007, "step": 13205 }, { "epoch": 0.44, "grad_norm": 0.7304590940475464, "learning_rate": 1.2185073167636098e-05, "loss": 2.0325, "step": 13206 }, { "epoch": 0.44, "grad_norm": 0.7333456873893738, "learning_rate": 1.218403594683649e-05, "loss": 2.0828, "step": 13207 }, { "epoch": 0.44, "grad_norm": 0.7269657850265503, "learning_rate": 1.2182998701362918e-05, "loss": 2.0368, "step": 13208 }, { "epoch": 0.44, "grad_norm": 0.7209774851799011, "learning_rate": 1.2181961431227112e-05, "loss": 2.0632, "step": 13209 }, { "epoch": 0.44, "grad_norm": 0.7555052042007446, "learning_rate": 1.218092413644078e-05, "loss": 2.0664, "step": 13210 }, { "epoch": 0.44, "grad_norm": 0.7058494091033936, "learning_rate": 1.2179886817015645e-05, "loss": 2.0956, "step": 13211 }, { "epoch": 0.44, "grad_norm": 0.8538835048675537, "learning_rate": 1.2178849472963428e-05, "loss": 2.1221, "step": 13212 }, { "epoch": 0.44, "grad_norm": 0.7394633889198303, "learning_rate": 1.2177812104295848e-05, "loss": 2.0115, "step": 13213 }, { "epoch": 0.44, "grad_norm": 0.7214871644973755, "learning_rate": 1.2176774711024618e-05, "loss": 2.0205, "step": 13214 }, { "epoch": 0.44, "grad_norm": 0.7459919452667236, "learning_rate": 1.217573729316147e-05, "loss": 2.1065, "step": 13215 }, { "epoch": 0.44, "grad_norm": 0.7378609776496887, "learning_rate": 1.2174699850718113e-05, "loss": 2.1565, "step": 13216 }, { "epoch": 0.44, "grad_norm": 0.7099727988243103, "learning_rate": 1.217366238370627e-05, "loss": 2.0685, "step": 13217 }, { "epoch": 0.44, "grad_norm": 0.7468857169151306, "learning_rate": 1.2172624892137668e-05, "loss": 2.1075, "step": 13218 }, { "epoch": 0.44, "grad_norm": 0.7311460971832275, "learning_rate": 1.2171587376024019e-05, "loss": 2.0701, "step": 13219 }, { "epoch": 0.44, "grad_norm": 0.7185695171356201, "learning_rate": 1.2170549835377053e-05, "loss": 2.1327, "step": 13220 }, { "epoch": 0.44, "grad_norm": 0.7469179630279541, "learning_rate": 1.2169512270208483e-05, "loss": 2.1314, "step": 13221 }, { "epoch": 0.44, "grad_norm": 0.7485544681549072, "learning_rate": 1.2168474680530034e-05, "loss": 2.1216, "step": 13222 }, { "epoch": 0.44, "grad_norm": 0.7138108015060425, "learning_rate": 1.2167437066353433e-05, "loss": 2.0641, "step": 13223 }, { "epoch": 0.44, "grad_norm": 0.7264711856842041, "learning_rate": 1.2166399427690396e-05, "loss": 2.0888, "step": 13224 }, { "epoch": 0.44, "grad_norm": 0.7644013166427612, "learning_rate": 1.2165361764552649e-05, "loss": 2.1206, "step": 13225 }, { "epoch": 0.44, "grad_norm": 0.7458553314208984, "learning_rate": 1.2164324076951912e-05, "loss": 2.0934, "step": 13226 }, { "epoch": 0.44, "grad_norm": 0.7324727177619934, "learning_rate": 1.216328636489991e-05, "loss": 2.0704, "step": 13227 }, { "epoch": 0.44, "grad_norm": 0.7355753779411316, "learning_rate": 1.2162248628408366e-05, "loss": 2.0964, "step": 13228 }, { "epoch": 0.44, "grad_norm": 0.7430371046066284, "learning_rate": 1.2161210867489008e-05, "loss": 2.0361, "step": 13229 }, { "epoch": 0.44, "grad_norm": 0.7671518325805664, "learning_rate": 1.2160173082153553e-05, "loss": 2.0983, "step": 13230 }, { "epoch": 0.44, "grad_norm": 0.7220532894134521, "learning_rate": 1.2159135272413732e-05, "loss": 2.1486, "step": 13231 }, { "epoch": 0.44, "grad_norm": 0.7248452305793762, "learning_rate": 1.2158097438281262e-05, "loss": 2.081, "step": 13232 }, { "epoch": 0.44, "grad_norm": 0.70583176612854, "learning_rate": 1.2157059579767871e-05, "loss": 2.0682, "step": 13233 }, { "epoch": 0.44, "grad_norm": 0.717170000076294, "learning_rate": 1.215602169688529e-05, "loss": 2.1011, "step": 13234 }, { "epoch": 0.44, "grad_norm": 0.735986053943634, "learning_rate": 1.2154983789645237e-05, "loss": 1.9876, "step": 13235 }, { "epoch": 0.44, "grad_norm": 0.790681004524231, "learning_rate": 1.215394585805944e-05, "loss": 2.0056, "step": 13236 }, { "epoch": 0.44, "grad_norm": 0.7352068424224854, "learning_rate": 1.2152907902139624e-05, "loss": 2.1499, "step": 13237 }, { "epoch": 0.44, "grad_norm": 0.7338637113571167, "learning_rate": 1.2151869921897517e-05, "loss": 2.0664, "step": 13238 }, { "epoch": 0.44, "grad_norm": 0.724999189376831, "learning_rate": 1.2150831917344843e-05, "loss": 2.1252, "step": 13239 }, { "epoch": 0.44, "grad_norm": 0.725937008857727, "learning_rate": 1.2149793888493336e-05, "loss": 2.0532, "step": 13240 }, { "epoch": 0.44, "grad_norm": 0.7111504077911377, "learning_rate": 1.214875583535471e-05, "loss": 2.0967, "step": 13241 }, { "epoch": 0.44, "grad_norm": 0.738692045211792, "learning_rate": 1.2147717757940704e-05, "loss": 2.116, "step": 13242 }, { "epoch": 0.44, "grad_norm": 0.7376387715339661, "learning_rate": 1.2146679656263043e-05, "loss": 2.1008, "step": 13243 }, { "epoch": 0.44, "grad_norm": 0.7186775207519531, "learning_rate": 1.2145641530333449e-05, "loss": 2.0921, "step": 13244 }, { "epoch": 0.44, "grad_norm": 0.7621871829032898, "learning_rate": 1.214460338016366e-05, "loss": 2.0266, "step": 13245 }, { "epoch": 0.44, "grad_norm": 0.7311782240867615, "learning_rate": 1.2143565205765395e-05, "loss": 2.094, "step": 13246 }, { "epoch": 0.44, "grad_norm": 0.7438217401504517, "learning_rate": 1.214252700715039e-05, "loss": 2.0656, "step": 13247 }, { "epoch": 0.44, "grad_norm": 0.7314299941062927, "learning_rate": 1.2141488784330367e-05, "loss": 2.0232, "step": 13248 }, { "epoch": 0.44, "grad_norm": 0.7191365361213684, "learning_rate": 1.214045053731706e-05, "loss": 2.0018, "step": 13249 }, { "epoch": 0.44, "grad_norm": 0.7546627521514893, "learning_rate": 1.2139412266122194e-05, "loss": 2.0866, "step": 13250 }, { "epoch": 0.44, "grad_norm": 0.7303929924964905, "learning_rate": 1.2138373970757508e-05, "loss": 2.0726, "step": 13251 }, { "epoch": 0.44, "grad_norm": 0.7280935645103455, "learning_rate": 1.2137335651234721e-05, "loss": 2.1202, "step": 13252 }, { "epoch": 0.44, "grad_norm": 0.7485957741737366, "learning_rate": 1.213629730756557e-05, "loss": 2.1531, "step": 13253 }, { "epoch": 0.44, "grad_norm": 0.7577044367790222, "learning_rate": 1.2135258939761787e-05, "loss": 2.1567, "step": 13254 }, { "epoch": 0.44, "grad_norm": 0.732736349105835, "learning_rate": 1.2134220547835096e-05, "loss": 2.1081, "step": 13255 }, { "epoch": 0.44, "grad_norm": 0.747572660446167, "learning_rate": 1.2133182131797234e-05, "loss": 2.0604, "step": 13256 }, { "epoch": 0.44, "grad_norm": 0.779145359992981, "learning_rate": 1.213214369165993e-05, "loss": 2.097, "step": 13257 }, { "epoch": 0.44, "grad_norm": 0.728493869304657, "learning_rate": 1.2131105227434916e-05, "loss": 2.0945, "step": 13258 }, { "epoch": 0.44, "grad_norm": 0.7282928228378296, "learning_rate": 1.2130066739133923e-05, "loss": 2.0741, "step": 13259 }, { "epoch": 0.44, "grad_norm": 0.7217252850532532, "learning_rate": 1.2129028226768686e-05, "loss": 2.0593, "step": 13260 }, { "epoch": 0.44, "grad_norm": 0.7185680866241455, "learning_rate": 1.2127989690350937e-05, "loss": 2.0446, "step": 13261 }, { "epoch": 0.44, "grad_norm": 0.7479565143585205, "learning_rate": 1.2126951129892406e-05, "loss": 2.1234, "step": 13262 }, { "epoch": 0.44, "grad_norm": 0.7399598956108093, "learning_rate": 1.2125912545404826e-05, "loss": 2.0747, "step": 13263 }, { "epoch": 0.44, "grad_norm": 0.794170081615448, "learning_rate": 1.2124873936899932e-05, "loss": 2.0976, "step": 13264 }, { "epoch": 0.44, "grad_norm": 0.7387720942497253, "learning_rate": 1.2123835304389462e-05, "loss": 2.0657, "step": 13265 }, { "epoch": 0.44, "grad_norm": 0.7227472066879272, "learning_rate": 1.212279664788514e-05, "loss": 2.1054, "step": 13266 }, { "epoch": 0.44, "grad_norm": 0.7274028658866882, "learning_rate": 1.2121757967398711e-05, "loss": 2.1271, "step": 13267 }, { "epoch": 0.44, "grad_norm": 0.8054218888282776, "learning_rate": 1.21207192629419e-05, "loss": 2.2246, "step": 13268 }, { "epoch": 0.44, "grad_norm": 0.7277166843414307, "learning_rate": 1.2119680534526447e-05, "loss": 2.1673, "step": 13269 }, { "epoch": 0.44, "grad_norm": 0.7338177561759949, "learning_rate": 1.2118641782164084e-05, "loss": 2.0853, "step": 13270 }, { "epoch": 0.44, "grad_norm": 0.7415596842765808, "learning_rate": 1.2117603005866549e-05, "loss": 2.1669, "step": 13271 }, { "epoch": 0.44, "grad_norm": 0.7345831990242004, "learning_rate": 1.2116564205645576e-05, "loss": 2.1641, "step": 13272 }, { "epoch": 0.44, "grad_norm": 0.7320764064788818, "learning_rate": 1.2115525381512902e-05, "loss": 1.9925, "step": 13273 }, { "epoch": 0.44, "grad_norm": 0.7361744046211243, "learning_rate": 1.211448653348026e-05, "loss": 2.0757, "step": 13274 }, { "epoch": 0.44, "grad_norm": 0.7120850086212158, "learning_rate": 1.211344766155939e-05, "loss": 2.0971, "step": 13275 }, { "epoch": 0.44, "grad_norm": 0.7386009097099304, "learning_rate": 1.211240876576203e-05, "loss": 2.0218, "step": 13276 }, { "epoch": 0.44, "grad_norm": 0.7775686383247375, "learning_rate": 1.2111369846099907e-05, "loss": 2.1285, "step": 13277 }, { "epoch": 0.44, "grad_norm": 0.7459964156150818, "learning_rate": 1.211033090258477e-05, "loss": 1.9862, "step": 13278 }, { "epoch": 0.44, "grad_norm": 0.743541955947876, "learning_rate": 1.2109291935228347e-05, "loss": 2.0848, "step": 13279 }, { "epoch": 0.44, "grad_norm": 0.7040044665336609, "learning_rate": 1.2108252944042385e-05, "loss": 2.13, "step": 13280 }, { "epoch": 0.44, "grad_norm": 0.7124125361442566, "learning_rate": 1.2107213929038615e-05, "loss": 2.0474, "step": 13281 }, { "epoch": 0.44, "grad_norm": 0.712355375289917, "learning_rate": 1.2106174890228775e-05, "loss": 1.9857, "step": 13282 }, { "epoch": 0.44, "grad_norm": 0.7673336863517761, "learning_rate": 1.2105135827624606e-05, "loss": 2.0899, "step": 13283 }, { "epoch": 0.44, "grad_norm": 0.7353999614715576, "learning_rate": 1.2104096741237847e-05, "loss": 2.1287, "step": 13284 }, { "epoch": 0.44, "grad_norm": 0.733677327632904, "learning_rate": 1.2103057631080236e-05, "loss": 2.0848, "step": 13285 }, { "epoch": 0.44, "grad_norm": 0.7404350638389587, "learning_rate": 1.2102018497163513e-05, "loss": 2.0185, "step": 13286 }, { "epoch": 0.44, "grad_norm": 0.7109466195106506, "learning_rate": 1.2100979339499415e-05, "loss": 2.0855, "step": 13287 }, { "epoch": 0.44, "grad_norm": 0.7178806066513062, "learning_rate": 1.2099940158099686e-05, "loss": 2.0765, "step": 13288 }, { "epoch": 0.44, "grad_norm": 0.728897750377655, "learning_rate": 1.2098900952976063e-05, "loss": 2.0555, "step": 13289 }, { "epoch": 0.44, "grad_norm": 0.7291553020477295, "learning_rate": 1.2097861724140286e-05, "loss": 2.0982, "step": 13290 }, { "epoch": 0.44, "grad_norm": 0.7267939448356628, "learning_rate": 1.2096822471604097e-05, "loss": 2.0674, "step": 13291 }, { "epoch": 0.44, "grad_norm": 0.7231267690658569, "learning_rate": 1.2095783195379237e-05, "loss": 2.1678, "step": 13292 }, { "epoch": 0.44, "grad_norm": 0.7178898453712463, "learning_rate": 1.2094743895477447e-05, "loss": 2.0928, "step": 13293 }, { "epoch": 0.44, "grad_norm": 0.7198132872581482, "learning_rate": 1.2093704571910468e-05, "loss": 2.122, "step": 13294 }, { "epoch": 0.44, "grad_norm": 0.7055746912956238, "learning_rate": 1.209266522469004e-05, "loss": 2.1212, "step": 13295 }, { "epoch": 0.44, "grad_norm": 0.7428348064422607, "learning_rate": 1.2091625853827911e-05, "loss": 2.0449, "step": 13296 }, { "epoch": 0.44, "grad_norm": 0.7168387174606323, "learning_rate": 1.2090586459335816e-05, "loss": 2.0194, "step": 13297 }, { "epoch": 0.44, "grad_norm": 0.7541288733482361, "learning_rate": 1.20895470412255e-05, "loss": 2.1057, "step": 13298 }, { "epoch": 0.44, "grad_norm": 0.7282351851463318, "learning_rate": 1.2088507599508707e-05, "loss": 2.0268, "step": 13299 }, { "epoch": 0.44, "grad_norm": 0.7244184017181396, "learning_rate": 1.208746813419718e-05, "loss": 2.1372, "step": 13300 }, { "epoch": 0.44, "grad_norm": 0.7184075713157654, "learning_rate": 1.2086428645302659e-05, "loss": 2.1176, "step": 13301 }, { "epoch": 0.44, "grad_norm": 0.7376933693885803, "learning_rate": 1.2085389132836893e-05, "loss": 2.1075, "step": 13302 }, { "epoch": 0.44, "grad_norm": 0.7401725053787231, "learning_rate": 1.208434959681162e-05, "loss": 2.0628, "step": 13303 }, { "epoch": 0.44, "grad_norm": 0.7182230949401855, "learning_rate": 1.208331003723859e-05, "loss": 2.0199, "step": 13304 }, { "epoch": 0.44, "grad_norm": 0.6960762143135071, "learning_rate": 1.208227045412954e-05, "loss": 2.0676, "step": 13305 }, { "epoch": 0.44, "grad_norm": 0.7502022981643677, "learning_rate": 1.2081230847496221e-05, "loss": 2.0893, "step": 13306 }, { "epoch": 0.44, "grad_norm": 0.7258266806602478, "learning_rate": 1.2080191217350374e-05, "loss": 2.0767, "step": 13307 }, { "epoch": 0.44, "grad_norm": 0.7505900859832764, "learning_rate": 1.2079151563703749e-05, "loss": 2.0079, "step": 13308 }, { "epoch": 0.44, "grad_norm": 0.7215785384178162, "learning_rate": 1.2078111886568085e-05, "loss": 2.0666, "step": 13309 }, { "epoch": 0.44, "grad_norm": 0.7636035084724426, "learning_rate": 1.2077072185955131e-05, "loss": 2.0765, "step": 13310 }, { "epoch": 0.44, "grad_norm": 0.7186139225959778, "learning_rate": 1.2076032461876636e-05, "loss": 2.1178, "step": 13311 }, { "epoch": 0.44, "grad_norm": 0.713874340057373, "learning_rate": 1.2074992714344338e-05, "loss": 2.0386, "step": 13312 }, { "epoch": 0.44, "grad_norm": 0.7320047616958618, "learning_rate": 1.2073952943369992e-05, "loss": 2.0103, "step": 13313 }, { "epoch": 0.44, "grad_norm": 0.7288681268692017, "learning_rate": 1.2072913148965341e-05, "loss": 2.0505, "step": 13314 }, { "epoch": 0.44, "grad_norm": 0.7776504158973694, "learning_rate": 1.207187333114213e-05, "loss": 2.0223, "step": 13315 }, { "epoch": 0.44, "grad_norm": 0.7459606528282166, "learning_rate": 1.207083348991211e-05, "loss": 2.2105, "step": 13316 }, { "epoch": 0.44, "grad_norm": 0.7721802592277527, "learning_rate": 1.2069793625287027e-05, "loss": 2.026, "step": 13317 }, { "epoch": 0.44, "grad_norm": 0.701431930065155, "learning_rate": 1.2068753737278626e-05, "loss": 2.0211, "step": 13318 }, { "epoch": 0.44, "grad_norm": 0.7196957468986511, "learning_rate": 1.2067713825898662e-05, "loss": 1.9918, "step": 13319 }, { "epoch": 0.44, "grad_norm": 0.7388178110122681, "learning_rate": 1.2066673891158875e-05, "loss": 2.058, "step": 13320 }, { "epoch": 0.44, "grad_norm": 0.7367233633995056, "learning_rate": 1.2065633933071019e-05, "loss": 2.099, "step": 13321 }, { "epoch": 0.44, "grad_norm": 0.7038823962211609, "learning_rate": 1.2064593951646843e-05, "loss": 2.0612, "step": 13322 }, { "epoch": 0.44, "grad_norm": 0.730548083782196, "learning_rate": 1.2063553946898092e-05, "loss": 2.0726, "step": 13323 }, { "epoch": 0.44, "grad_norm": 0.7345361709594727, "learning_rate": 1.206251391883652e-05, "loss": 2.088, "step": 13324 }, { "epoch": 0.44, "grad_norm": 0.7355608940124512, "learning_rate": 1.2061473867473874e-05, "loss": 2.0906, "step": 13325 }, { "epoch": 0.44, "grad_norm": 0.7406070232391357, "learning_rate": 1.2060433792821901e-05, "loss": 2.0383, "step": 13326 }, { "epoch": 0.44, "grad_norm": 0.7416023015975952, "learning_rate": 1.2059393694892361e-05, "loss": 2.013, "step": 13327 }, { "epoch": 0.44, "grad_norm": 0.7365932464599609, "learning_rate": 1.2058353573696995e-05, "loss": 2.0441, "step": 13328 }, { "epoch": 0.44, "grad_norm": 0.7340754270553589, "learning_rate": 1.2057313429247554e-05, "loss": 2.1135, "step": 13329 }, { "epoch": 0.44, "grad_norm": 0.7222080826759338, "learning_rate": 1.2056273261555793e-05, "loss": 2.179, "step": 13330 }, { "epoch": 0.44, "grad_norm": 0.7258617281913757, "learning_rate": 1.2055233070633464e-05, "loss": 2.0164, "step": 13331 }, { "epoch": 0.44, "grad_norm": 0.7366630434989929, "learning_rate": 1.2054192856492315e-05, "loss": 2.0369, "step": 13332 }, { "epoch": 0.44, "grad_norm": 0.7424752712249756, "learning_rate": 1.20531526191441e-05, "loss": 2.0194, "step": 13333 }, { "epoch": 0.44, "grad_norm": 0.7324604392051697, "learning_rate": 1.2052112358600565e-05, "loss": 2.2096, "step": 13334 }, { "epoch": 0.44, "grad_norm": 0.7110586166381836, "learning_rate": 1.2051072074873473e-05, "loss": 2.1446, "step": 13335 }, { "epoch": 0.44, "grad_norm": 0.7497795820236206, "learning_rate": 1.2050031767974568e-05, "loss": 2.11, "step": 13336 }, { "epoch": 0.44, "grad_norm": 0.7348743677139282, "learning_rate": 1.2048991437915605e-05, "loss": 2.0414, "step": 13337 }, { "epoch": 0.44, "grad_norm": 0.7378973960876465, "learning_rate": 1.204795108470834e-05, "loss": 2.0422, "step": 13338 }, { "epoch": 0.44, "grad_norm": 0.7241606712341309, "learning_rate": 1.2046910708364523e-05, "loss": 2.0843, "step": 13339 }, { "epoch": 0.44, "grad_norm": 0.704196572303772, "learning_rate": 1.2045870308895908e-05, "loss": 2.1158, "step": 13340 }, { "epoch": 0.44, "grad_norm": 0.7024517059326172, "learning_rate": 1.2044829886314249e-05, "loss": 2.0759, "step": 13341 }, { "epoch": 0.44, "grad_norm": 0.7401084899902344, "learning_rate": 1.2043789440631301e-05, "loss": 2.0879, "step": 13342 }, { "epoch": 0.44, "grad_norm": 0.7075070738792419, "learning_rate": 1.2042748971858816e-05, "loss": 2.0724, "step": 13343 }, { "epoch": 0.44, "grad_norm": 0.7374077439308167, "learning_rate": 1.2041708480008554e-05, "loss": 2.1015, "step": 13344 }, { "epoch": 0.44, "grad_norm": 0.7080702185630798, "learning_rate": 1.2040667965092262e-05, "loss": 2.1689, "step": 13345 }, { "epoch": 0.44, "grad_norm": 0.7485519647598267, "learning_rate": 1.2039627427121701e-05, "loss": 2.067, "step": 13346 }, { "epoch": 0.44, "grad_norm": 0.7322629690170288, "learning_rate": 1.2038586866108626e-05, "loss": 2.0579, "step": 13347 }, { "epoch": 0.44, "grad_norm": 0.7240492105484009, "learning_rate": 1.2037546282064787e-05, "loss": 2.0415, "step": 13348 }, { "epoch": 0.44, "grad_norm": 0.7376251816749573, "learning_rate": 1.2036505675001951e-05, "loss": 2.1102, "step": 13349 }, { "epoch": 0.44, "grad_norm": 0.7244545817375183, "learning_rate": 1.2035465044931862e-05, "loss": 2.0624, "step": 13350 }, { "epoch": 0.44, "grad_norm": 0.6985520720481873, "learning_rate": 1.2034424391866285e-05, "loss": 2.1079, "step": 13351 }, { "epoch": 0.44, "grad_norm": 0.7272088527679443, "learning_rate": 1.203338371581697e-05, "loss": 2.0419, "step": 13352 }, { "epoch": 0.44, "grad_norm": 0.7849928736686707, "learning_rate": 1.203234301679568e-05, "loss": 2.0514, "step": 13353 }, { "epoch": 0.44, "grad_norm": 0.6937972903251648, "learning_rate": 1.203130229481417e-05, "loss": 2.0466, "step": 13354 }, { "epoch": 0.44, "grad_norm": 0.7293304800987244, "learning_rate": 1.2030261549884197e-05, "loss": 2.1136, "step": 13355 }, { "epoch": 0.44, "grad_norm": 0.7527003288269043, "learning_rate": 1.2029220782017515e-05, "loss": 2.0399, "step": 13356 }, { "epoch": 0.44, "grad_norm": 0.731359601020813, "learning_rate": 1.2028179991225889e-05, "loss": 2.1017, "step": 13357 }, { "epoch": 0.44, "grad_norm": 0.7181694507598877, "learning_rate": 1.2027139177521074e-05, "loss": 2.0404, "step": 13358 }, { "epoch": 0.44, "grad_norm": 0.7249829173088074, "learning_rate": 1.2026098340914826e-05, "loss": 2.1439, "step": 13359 }, { "epoch": 0.44, "grad_norm": 0.7250382304191589, "learning_rate": 1.202505748141891e-05, "loss": 2.0003, "step": 13360 }, { "epoch": 0.44, "grad_norm": 0.7194114923477173, "learning_rate": 1.2024016599045083e-05, "loss": 2.032, "step": 13361 }, { "epoch": 0.44, "grad_norm": 0.7364223599433899, "learning_rate": 1.2022975693805099e-05, "loss": 2.0746, "step": 13362 }, { "epoch": 0.44, "grad_norm": 0.7428044080734253, "learning_rate": 1.2021934765710724e-05, "loss": 2.0517, "step": 13363 }, { "epoch": 0.44, "grad_norm": 0.7028313279151917, "learning_rate": 1.202089381477371e-05, "loss": 2.0246, "step": 13364 }, { "epoch": 0.44, "grad_norm": 0.7533397078514099, "learning_rate": 1.2019852841005825e-05, "loss": 2.0668, "step": 13365 }, { "epoch": 0.44, "grad_norm": 0.743570864200592, "learning_rate": 1.201881184441883e-05, "loss": 2.0481, "step": 13366 }, { "epoch": 0.44, "grad_norm": 0.7564150094985962, "learning_rate": 1.2017770825024475e-05, "loss": 2.103, "step": 13367 }, { "epoch": 0.44, "grad_norm": 0.6989673972129822, "learning_rate": 1.201672978283453e-05, "loss": 2.0965, "step": 13368 }, { "epoch": 0.44, "grad_norm": 0.7528740763664246, "learning_rate": 1.2015688717860758e-05, "loss": 2.0254, "step": 13369 }, { "epoch": 0.44, "grad_norm": 0.738219141960144, "learning_rate": 1.2014647630114911e-05, "loss": 2.0191, "step": 13370 }, { "epoch": 0.44, "grad_norm": 0.7300574779510498, "learning_rate": 1.2013606519608761e-05, "loss": 2.0921, "step": 13371 }, { "epoch": 0.44, "grad_norm": 0.7272834777832031, "learning_rate": 1.201256538635406e-05, "loss": 2.1256, "step": 13372 }, { "epoch": 0.44, "grad_norm": 0.7309862971305847, "learning_rate": 1.2011524230362576e-05, "loss": 2.052, "step": 13373 }, { "epoch": 0.44, "grad_norm": 0.7109898924827576, "learning_rate": 1.2010483051646072e-05, "loss": 2.0412, "step": 13374 }, { "epoch": 0.44, "grad_norm": 0.7418531179428101, "learning_rate": 1.2009441850216307e-05, "loss": 2.0788, "step": 13375 }, { "epoch": 0.45, "grad_norm": 0.7676542401313782, "learning_rate": 1.2008400626085047e-05, "loss": 2.083, "step": 13376 }, { "epoch": 0.45, "grad_norm": 0.7033101320266724, "learning_rate": 1.2007359379264051e-05, "loss": 2.1281, "step": 13377 }, { "epoch": 0.45, "grad_norm": 0.7425255179405212, "learning_rate": 1.2006318109765087e-05, "loss": 2.0863, "step": 13378 }, { "epoch": 0.45, "grad_norm": 0.7479313611984253, "learning_rate": 1.2005276817599915e-05, "loss": 2.1083, "step": 13379 }, { "epoch": 0.45, "grad_norm": 0.7301887273788452, "learning_rate": 1.20042355027803e-05, "loss": 2.0251, "step": 13380 }, { "epoch": 0.45, "grad_norm": 0.7358429431915283, "learning_rate": 1.2003194165318011e-05, "loss": 2.1119, "step": 13381 }, { "epoch": 0.45, "grad_norm": 0.7144620418548584, "learning_rate": 1.200215280522481e-05, "loss": 2.0898, "step": 13382 }, { "epoch": 0.45, "grad_norm": 0.7465035915374756, "learning_rate": 1.2001111422512453e-05, "loss": 2.1242, "step": 13383 }, { "epoch": 0.45, "grad_norm": 0.7254433631896973, "learning_rate": 1.2000070017192717e-05, "loss": 2.0923, "step": 13384 }, { "epoch": 0.45, "grad_norm": 0.7439807057380676, "learning_rate": 1.199902858927736e-05, "loss": 2.1265, "step": 13385 }, { "epoch": 0.45, "grad_norm": 0.7531620860099792, "learning_rate": 1.1997987138778151e-05, "loss": 2.1412, "step": 13386 }, { "epoch": 0.45, "grad_norm": 0.7350923418998718, "learning_rate": 1.1996945665706851e-05, "loss": 2.0927, "step": 13387 }, { "epoch": 0.45, "grad_norm": 0.7015329003334045, "learning_rate": 1.1995904170075233e-05, "loss": 2.0502, "step": 13388 }, { "epoch": 0.45, "grad_norm": 0.7923730611801147, "learning_rate": 1.1994862651895059e-05, "loss": 2.0946, "step": 13389 }, { "epoch": 0.45, "grad_norm": 0.7369385361671448, "learning_rate": 1.1993821111178092e-05, "loss": 2.0647, "step": 13390 }, { "epoch": 0.45, "grad_norm": 0.750055730342865, "learning_rate": 1.1992779547936107e-05, "loss": 2.107, "step": 13391 }, { "epoch": 0.45, "grad_norm": 0.7802562117576599, "learning_rate": 1.1991737962180863e-05, "loss": 2.1582, "step": 13392 }, { "epoch": 0.45, "grad_norm": 0.7317141890525818, "learning_rate": 1.1990696353924136e-05, "loss": 2.0508, "step": 13393 }, { "epoch": 0.45, "grad_norm": 0.7592988610267639, "learning_rate": 1.1989654723177681e-05, "loss": 2.0541, "step": 13394 }, { "epoch": 0.45, "grad_norm": 0.7209230661392212, "learning_rate": 1.198861306995328e-05, "loss": 2.1029, "step": 13395 }, { "epoch": 0.45, "grad_norm": 0.7682515382766724, "learning_rate": 1.198757139426269e-05, "loss": 2.1503, "step": 13396 }, { "epoch": 0.45, "grad_norm": 0.7309234142303467, "learning_rate": 1.1986529696117684e-05, "loss": 2.1562, "step": 13397 }, { "epoch": 0.45, "grad_norm": 0.7398276329040527, "learning_rate": 1.198548797553003e-05, "loss": 2.1394, "step": 13398 }, { "epoch": 0.45, "grad_norm": 0.7231976985931396, "learning_rate": 1.1984446232511495e-05, "loss": 2.0708, "step": 13399 }, { "epoch": 0.45, "grad_norm": 0.7282794117927551, "learning_rate": 1.198340446707385e-05, "loss": 2.0774, "step": 13400 }, { "epoch": 0.45, "grad_norm": 0.7439401745796204, "learning_rate": 1.1982362679228865e-05, "loss": 2.1004, "step": 13401 }, { "epoch": 0.45, "grad_norm": 0.7311477065086365, "learning_rate": 1.1981320868988309e-05, "loss": 2.098, "step": 13402 }, { "epoch": 0.45, "grad_norm": 0.7431143522262573, "learning_rate": 1.1980279036363948e-05, "loss": 2.1662, "step": 13403 }, { "epoch": 0.45, "grad_norm": 0.7629624605178833, "learning_rate": 1.197923718136756e-05, "loss": 2.0967, "step": 13404 }, { "epoch": 0.45, "grad_norm": 0.7716542482376099, "learning_rate": 1.1978195304010904e-05, "loss": 2.0398, "step": 13405 }, { "epoch": 0.45, "grad_norm": 0.7417488694190979, "learning_rate": 1.197715340430576e-05, "loss": 2.1021, "step": 13406 }, { "epoch": 0.45, "grad_norm": 0.7302343845367432, "learning_rate": 1.1976111482263898e-05, "loss": 2.0789, "step": 13407 }, { "epoch": 0.45, "grad_norm": 0.7542890310287476, "learning_rate": 1.1975069537897082e-05, "loss": 2.1509, "step": 13408 }, { "epoch": 0.45, "grad_norm": 0.7298617959022522, "learning_rate": 1.1974027571217091e-05, "loss": 2.0961, "step": 13409 }, { "epoch": 0.45, "grad_norm": 0.728975236415863, "learning_rate": 1.1972985582235692e-05, "loss": 2.0823, "step": 13410 }, { "epoch": 0.45, "grad_norm": 0.734054684638977, "learning_rate": 1.1971943570964656e-05, "loss": 2.155, "step": 13411 }, { "epoch": 0.45, "grad_norm": 0.7165126800537109, "learning_rate": 1.197090153741576e-05, "loss": 2.0734, "step": 13412 }, { "epoch": 0.45, "grad_norm": 0.7485402822494507, "learning_rate": 1.1969859481600774e-05, "loss": 2.1955, "step": 13413 }, { "epoch": 0.45, "grad_norm": 0.75748211145401, "learning_rate": 1.1968817403531468e-05, "loss": 2.0924, "step": 13414 }, { "epoch": 0.45, "grad_norm": 0.707868218421936, "learning_rate": 1.196777530321962e-05, "loss": 2.1114, "step": 13415 }, { "epoch": 0.45, "grad_norm": 0.7354319095611572, "learning_rate": 1.1966733180676995e-05, "loss": 2.102, "step": 13416 }, { "epoch": 0.45, "grad_norm": 0.735072910785675, "learning_rate": 1.1965691035915376e-05, "loss": 2.0612, "step": 13417 }, { "epoch": 0.45, "grad_norm": 0.705896258354187, "learning_rate": 1.1964648868946528e-05, "loss": 1.9996, "step": 13418 }, { "epoch": 0.45, "grad_norm": 0.7545985579490662, "learning_rate": 1.196360667978223e-05, "loss": 2.112, "step": 13419 }, { "epoch": 0.45, "grad_norm": 0.7155085802078247, "learning_rate": 1.1962564468434254e-05, "loss": 2.1007, "step": 13420 }, { "epoch": 0.45, "grad_norm": 0.7386749982833862, "learning_rate": 1.1961522234914375e-05, "loss": 2.1554, "step": 13421 }, { "epoch": 0.45, "grad_norm": 0.7164409160614014, "learning_rate": 1.196047997923437e-05, "loss": 2.0849, "step": 13422 }, { "epoch": 0.45, "grad_norm": 0.6822673678398132, "learning_rate": 1.1959437701406007e-05, "loss": 2.1013, "step": 13423 }, { "epoch": 0.45, "grad_norm": 0.74634850025177, "learning_rate": 1.1958395401441067e-05, "loss": 2.124, "step": 13424 }, { "epoch": 0.45, "grad_norm": 0.7154432535171509, "learning_rate": 1.1957353079351324e-05, "loss": 2.0637, "step": 13425 }, { "epoch": 0.45, "grad_norm": 0.7406798601150513, "learning_rate": 1.1956310735148555e-05, "loss": 2.1231, "step": 13426 }, { "epoch": 0.45, "grad_norm": 0.7359046339988708, "learning_rate": 1.1955268368844528e-05, "loss": 2.1022, "step": 13427 }, { "epoch": 0.45, "grad_norm": 0.7135004997253418, "learning_rate": 1.1954225980451031e-05, "loss": 2.0329, "step": 13428 }, { "epoch": 0.45, "grad_norm": 0.7093487977981567, "learning_rate": 1.1953183569979832e-05, "loss": 2.075, "step": 13429 }, { "epoch": 0.45, "grad_norm": 0.7442511320114136, "learning_rate": 1.1952141137442706e-05, "loss": 2.1143, "step": 13430 }, { "epoch": 0.45, "grad_norm": 0.7548267245292664, "learning_rate": 1.1951098682851439e-05, "loss": 2.1139, "step": 13431 }, { "epoch": 0.45, "grad_norm": 0.7108021974563599, "learning_rate": 1.19500562062178e-05, "loss": 2.1115, "step": 13432 }, { "epoch": 0.45, "grad_norm": 0.7433215379714966, "learning_rate": 1.1949013707553568e-05, "loss": 2.0845, "step": 13433 }, { "epoch": 0.45, "grad_norm": 0.7358103394508362, "learning_rate": 1.1947971186870522e-05, "loss": 2.0432, "step": 13434 }, { "epoch": 0.45, "grad_norm": 0.7236347198486328, "learning_rate": 1.194692864418044e-05, "loss": 2.1276, "step": 13435 }, { "epoch": 0.45, "grad_norm": 0.7393098473548889, "learning_rate": 1.19458860794951e-05, "loss": 2.0207, "step": 13436 }, { "epoch": 0.45, "grad_norm": 0.7302792072296143, "learning_rate": 1.1944843492826278e-05, "loss": 2.1426, "step": 13437 }, { "epoch": 0.45, "grad_norm": 0.7330463528633118, "learning_rate": 1.1943800884185753e-05, "loss": 2.0552, "step": 13438 }, { "epoch": 0.45, "grad_norm": 0.7458175420761108, "learning_rate": 1.1942758253585307e-05, "loss": 2.0665, "step": 13439 }, { "epoch": 0.45, "grad_norm": 0.7964898943901062, "learning_rate": 1.1941715601036716e-05, "loss": 2.1928, "step": 13440 }, { "epoch": 0.45, "grad_norm": 0.7129392623901367, "learning_rate": 1.1940672926551757e-05, "loss": 2.1013, "step": 13441 }, { "epoch": 0.45, "grad_norm": 0.7619794607162476, "learning_rate": 1.1939630230142218e-05, "loss": 2.0543, "step": 13442 }, { "epoch": 0.45, "grad_norm": 0.718312680721283, "learning_rate": 1.193858751181987e-05, "loss": 2.0593, "step": 13443 }, { "epoch": 0.45, "grad_norm": 0.6910481452941895, "learning_rate": 1.1937544771596497e-05, "loss": 2.0546, "step": 13444 }, { "epoch": 0.45, "grad_norm": 0.7477357983589172, "learning_rate": 1.193650200948388e-05, "loss": 2.1146, "step": 13445 }, { "epoch": 0.45, "grad_norm": 0.7275376915931702, "learning_rate": 1.1935459225493795e-05, "loss": 2.1487, "step": 13446 }, { "epoch": 0.45, "grad_norm": 0.7672938704490662, "learning_rate": 1.193441641963803e-05, "loss": 2.1407, "step": 13447 }, { "epoch": 0.45, "grad_norm": 0.730105459690094, "learning_rate": 1.1933373591928361e-05, "loss": 2.059, "step": 13448 }, { "epoch": 0.45, "grad_norm": 0.7491620182991028, "learning_rate": 1.1932330742376568e-05, "loss": 2.0867, "step": 13449 }, { "epoch": 0.45, "grad_norm": 0.7420753836631775, "learning_rate": 1.1931287870994437e-05, "loss": 2.1474, "step": 13450 }, { "epoch": 0.45, "grad_norm": 0.7357080578804016, "learning_rate": 1.1930244977793745e-05, "loss": 2.0952, "step": 13451 }, { "epoch": 0.45, "grad_norm": 0.7083612084388733, "learning_rate": 1.1929202062786278e-05, "loss": 2.0772, "step": 13452 }, { "epoch": 0.45, "grad_norm": 0.7223063707351685, "learning_rate": 1.1928159125983818e-05, "loss": 2.1145, "step": 13453 }, { "epoch": 0.45, "grad_norm": 0.7823801040649414, "learning_rate": 1.1927116167398146e-05, "loss": 2.0906, "step": 13454 }, { "epoch": 0.45, "grad_norm": 0.7294846177101135, "learning_rate": 1.1926073187041043e-05, "loss": 1.9763, "step": 13455 }, { "epoch": 0.45, "grad_norm": 0.7259325981140137, "learning_rate": 1.1925030184924293e-05, "loss": 2.1163, "step": 13456 }, { "epoch": 0.45, "grad_norm": 0.7335636615753174, "learning_rate": 1.1923987161059682e-05, "loss": 2.1241, "step": 13457 }, { "epoch": 0.45, "grad_norm": 0.7183225750923157, "learning_rate": 1.1922944115458993e-05, "loss": 2.0826, "step": 13458 }, { "epoch": 0.45, "grad_norm": 0.7281363606452942, "learning_rate": 1.1921901048134009e-05, "loss": 2.0605, "step": 13459 }, { "epoch": 0.45, "grad_norm": 0.7072030305862427, "learning_rate": 1.1920857959096508e-05, "loss": 2.0597, "step": 13460 }, { "epoch": 0.45, "grad_norm": 0.7302013635635376, "learning_rate": 1.1919814848358282e-05, "loss": 2.1012, "step": 13461 }, { "epoch": 0.45, "grad_norm": 0.7271750569343567, "learning_rate": 1.1918771715931116e-05, "loss": 2.0882, "step": 13462 }, { "epoch": 0.45, "grad_norm": 0.715904712677002, "learning_rate": 1.1917728561826787e-05, "loss": 2.1002, "step": 13463 }, { "epoch": 0.45, "grad_norm": 0.7084422707557678, "learning_rate": 1.191668538605709e-05, "loss": 2.0494, "step": 13464 }, { "epoch": 0.45, "grad_norm": 0.7305770516395569, "learning_rate": 1.19156421886338e-05, "loss": 2.0841, "step": 13465 }, { "epoch": 0.45, "grad_norm": 0.7397257089614868, "learning_rate": 1.191459896956871e-05, "loss": 2.1024, "step": 13466 }, { "epoch": 0.45, "grad_norm": 0.7399925589561462, "learning_rate": 1.19135557288736e-05, "loss": 2.088, "step": 13467 }, { "epoch": 0.45, "grad_norm": 0.761913001537323, "learning_rate": 1.1912512466560261e-05, "loss": 2.0867, "step": 13468 }, { "epoch": 0.45, "grad_norm": 0.7388766407966614, "learning_rate": 1.1911469182640478e-05, "loss": 2.0689, "step": 13469 }, { "epoch": 0.45, "grad_norm": 0.7390720248222351, "learning_rate": 1.1910425877126038e-05, "loss": 2.0879, "step": 13470 }, { "epoch": 0.45, "grad_norm": 0.7065528631210327, "learning_rate": 1.1909382550028719e-05, "loss": 2.0785, "step": 13471 }, { "epoch": 0.45, "grad_norm": 0.7561423182487488, "learning_rate": 1.1908339201360319e-05, "loss": 2.1433, "step": 13472 }, { "epoch": 0.45, "grad_norm": 0.7405491471290588, "learning_rate": 1.1907295831132624e-05, "loss": 2.1288, "step": 13473 }, { "epoch": 0.45, "grad_norm": 0.7585501670837402, "learning_rate": 1.1906252439357413e-05, "loss": 2.0703, "step": 13474 }, { "epoch": 0.45, "grad_norm": 0.734179675579071, "learning_rate": 1.1905209026046485e-05, "loss": 2.0892, "step": 13475 }, { "epoch": 0.45, "grad_norm": 0.7396822571754456, "learning_rate": 1.1904165591211616e-05, "loss": 2.0744, "step": 13476 }, { "epoch": 0.45, "grad_norm": 0.7383284568786621, "learning_rate": 1.1903122134864604e-05, "loss": 2.0331, "step": 13477 }, { "epoch": 0.45, "grad_norm": 0.7291897535324097, "learning_rate": 1.1902078657017234e-05, "loss": 2.0848, "step": 13478 }, { "epoch": 0.45, "grad_norm": 0.739812970161438, "learning_rate": 1.1901035157681291e-05, "loss": 2.0657, "step": 13479 }, { "epoch": 0.45, "grad_norm": 0.7075086832046509, "learning_rate": 1.1899991636868569e-05, "loss": 2.0577, "step": 13480 }, { "epoch": 0.45, "grad_norm": 0.6971482634544373, "learning_rate": 1.1898948094590854e-05, "loss": 2.0405, "step": 13481 }, { "epoch": 0.45, "grad_norm": 0.7240017056465149, "learning_rate": 1.1897904530859937e-05, "loss": 2.116, "step": 13482 }, { "epoch": 0.45, "grad_norm": 0.7471370100975037, "learning_rate": 1.1896860945687605e-05, "loss": 2.1012, "step": 13483 }, { "epoch": 0.45, "grad_norm": 0.7188327312469482, "learning_rate": 1.1895817339085651e-05, "loss": 2.0337, "step": 13484 }, { "epoch": 0.45, "grad_norm": 0.7193142175674438, "learning_rate": 1.1894773711065863e-05, "loss": 2.0526, "step": 13485 }, { "epoch": 0.45, "grad_norm": 0.7240602970123291, "learning_rate": 1.1893730061640036e-05, "loss": 2.0533, "step": 13486 }, { "epoch": 0.45, "grad_norm": 0.7409474849700928, "learning_rate": 1.1892686390819952e-05, "loss": 2.1813, "step": 13487 }, { "epoch": 0.45, "grad_norm": 0.7168980240821838, "learning_rate": 1.189164269861741e-05, "loss": 2.1133, "step": 13488 }, { "epoch": 0.45, "grad_norm": 0.7582558393478394, "learning_rate": 1.1890598985044195e-05, "loss": 2.1501, "step": 13489 }, { "epoch": 0.45, "grad_norm": 0.7557507157325745, "learning_rate": 1.1889555250112101e-05, "loss": 2.0906, "step": 13490 }, { "epoch": 0.45, "grad_norm": 0.7242085933685303, "learning_rate": 1.1888511493832919e-05, "loss": 2.1029, "step": 13491 }, { "epoch": 0.45, "grad_norm": 0.7394744157791138, "learning_rate": 1.1887467716218442e-05, "loss": 2.0242, "step": 13492 }, { "epoch": 0.45, "grad_norm": 0.7168545722961426, "learning_rate": 1.1886423917280459e-05, "loss": 1.9949, "step": 13493 }, { "epoch": 0.45, "grad_norm": 0.7620472311973572, "learning_rate": 1.1885380097030765e-05, "loss": 2.1004, "step": 13494 }, { "epoch": 0.45, "grad_norm": 0.7373746037483215, "learning_rate": 1.1884336255481152e-05, "loss": 2.0368, "step": 13495 }, { "epoch": 0.45, "grad_norm": 0.7282609939575195, "learning_rate": 1.188329239264341e-05, "loss": 2.1139, "step": 13496 }, { "epoch": 0.45, "grad_norm": 0.773597002029419, "learning_rate": 1.188224850852934e-05, "loss": 2.0988, "step": 13497 }, { "epoch": 0.45, "grad_norm": 0.7168407440185547, "learning_rate": 1.1881204603150725e-05, "loss": 2.0062, "step": 13498 }, { "epoch": 0.45, "grad_norm": 0.7512868046760559, "learning_rate": 1.1880160676519363e-05, "loss": 2.057, "step": 13499 }, { "epoch": 0.45, "grad_norm": 0.7379336357116699, "learning_rate": 1.1879116728647048e-05, "loss": 2.0415, "step": 13500 }, { "epoch": 0.45, "grad_norm": 0.7391049861907959, "learning_rate": 1.1878072759545576e-05, "loss": 2.1142, "step": 13501 }, { "epoch": 0.45, "grad_norm": 0.7618208527565002, "learning_rate": 1.1877028769226735e-05, "loss": 2.081, "step": 13502 }, { "epoch": 0.45, "grad_norm": 0.7453198432922363, "learning_rate": 1.1875984757702326e-05, "loss": 2.105, "step": 13503 }, { "epoch": 0.45, "grad_norm": 0.7550119161605835, "learning_rate": 1.1874940724984139e-05, "loss": 2.0907, "step": 13504 }, { "epoch": 0.45, "grad_norm": 0.732653021812439, "learning_rate": 1.187389667108397e-05, "loss": 2.0965, "step": 13505 }, { "epoch": 0.45, "grad_norm": 0.7220315337181091, "learning_rate": 1.1872852596013615e-05, "loss": 2.0584, "step": 13506 }, { "epoch": 0.45, "grad_norm": 0.7149839401245117, "learning_rate": 1.187180849978487e-05, "loss": 2.0255, "step": 13507 }, { "epoch": 0.45, "grad_norm": 0.7610925436019897, "learning_rate": 1.1870764382409529e-05, "loss": 2.0962, "step": 13508 }, { "epoch": 0.45, "grad_norm": 0.7505216598510742, "learning_rate": 1.1869720243899385e-05, "loss": 2.0925, "step": 13509 }, { "epoch": 0.45, "grad_norm": 0.7339593172073364, "learning_rate": 1.1868676084266244e-05, "loss": 2.0386, "step": 13510 }, { "epoch": 0.45, "grad_norm": 0.7054505348205566, "learning_rate": 1.1867631903521892e-05, "loss": 2.0927, "step": 13511 }, { "epoch": 0.45, "grad_norm": 0.730927050113678, "learning_rate": 1.186658770167813e-05, "loss": 2.1538, "step": 13512 }, { "epoch": 0.45, "grad_norm": 0.7303416728973389, "learning_rate": 1.1865543478746753e-05, "loss": 2.0691, "step": 13513 }, { "epoch": 0.45, "grad_norm": 0.7300906181335449, "learning_rate": 1.1864499234739559e-05, "loss": 2.1353, "step": 13514 }, { "epoch": 0.45, "grad_norm": 0.7153344750404358, "learning_rate": 1.1863454969668346e-05, "loss": 2.0678, "step": 13515 }, { "epoch": 0.45, "grad_norm": 0.7668852806091309, "learning_rate": 1.1862410683544912e-05, "loss": 2.0774, "step": 13516 }, { "epoch": 0.45, "grad_norm": 0.6971147656440735, "learning_rate": 1.1861366376381052e-05, "loss": 2.1003, "step": 13517 }, { "epoch": 0.45, "grad_norm": 0.7274404168128967, "learning_rate": 1.1860322048188566e-05, "loss": 2.0974, "step": 13518 }, { "epoch": 0.45, "grad_norm": 0.7207568883895874, "learning_rate": 1.1859277698979253e-05, "loss": 2.1015, "step": 13519 }, { "epoch": 0.45, "grad_norm": 0.7528194189071655, "learning_rate": 1.1858233328764908e-05, "loss": 2.0512, "step": 13520 }, { "epoch": 0.45, "grad_norm": 0.7192557454109192, "learning_rate": 1.1857188937557333e-05, "loss": 2.0556, "step": 13521 }, { "epoch": 0.45, "grad_norm": 0.7366397976875305, "learning_rate": 1.1856144525368327e-05, "loss": 2.056, "step": 13522 }, { "epoch": 0.45, "grad_norm": 0.7384868264198303, "learning_rate": 1.1855100092209683e-05, "loss": 2.0505, "step": 13523 }, { "epoch": 0.45, "grad_norm": 0.7295539379119873, "learning_rate": 1.1854055638093212e-05, "loss": 2.0907, "step": 13524 }, { "epoch": 0.45, "grad_norm": 0.7075151801109314, "learning_rate": 1.1853011163030703e-05, "loss": 2.081, "step": 13525 }, { "epoch": 0.45, "grad_norm": 0.7750862240791321, "learning_rate": 1.185196666703396e-05, "loss": 2.0959, "step": 13526 }, { "epoch": 0.45, "grad_norm": 0.698559045791626, "learning_rate": 1.1850922150114786e-05, "loss": 2.0639, "step": 13527 }, { "epoch": 0.45, "grad_norm": 0.7167655229568481, "learning_rate": 1.1849877612284974e-05, "loss": 2.0244, "step": 13528 }, { "epoch": 0.45, "grad_norm": 0.7359040975570679, "learning_rate": 1.1848833053556332e-05, "loss": 2.0873, "step": 13529 }, { "epoch": 0.45, "grad_norm": 0.7603029608726501, "learning_rate": 1.1847788473940658e-05, "loss": 2.1706, "step": 13530 }, { "epoch": 0.45, "grad_norm": 0.7462285161018372, "learning_rate": 1.184674387344975e-05, "loss": 2.0498, "step": 13531 }, { "epoch": 0.45, "grad_norm": 0.7745400071144104, "learning_rate": 1.1845699252095414e-05, "loss": 2.1679, "step": 13532 }, { "epoch": 0.45, "grad_norm": 0.791156530380249, "learning_rate": 1.184465460988945e-05, "loss": 2.1602, "step": 13533 }, { "epoch": 0.45, "grad_norm": 0.7173791527748108, "learning_rate": 1.1843609946843655e-05, "loss": 2.0552, "step": 13534 }, { "epoch": 0.45, "grad_norm": 0.7484079599380493, "learning_rate": 1.1842565262969842e-05, "loss": 2.05, "step": 13535 }, { "epoch": 0.45, "grad_norm": 0.7357771396636963, "learning_rate": 1.1841520558279802e-05, "loss": 2.1432, "step": 13536 }, { "epoch": 0.45, "grad_norm": 0.7360095977783203, "learning_rate": 1.1840475832785343e-05, "loss": 2.0778, "step": 13537 }, { "epoch": 0.45, "grad_norm": 0.767088770866394, "learning_rate": 1.1839431086498268e-05, "loss": 2.0112, "step": 13538 }, { "epoch": 0.45, "grad_norm": 0.7236528396606445, "learning_rate": 1.1838386319430377e-05, "loss": 2.0528, "step": 13539 }, { "epoch": 0.45, "grad_norm": 0.727060079574585, "learning_rate": 1.1837341531593473e-05, "loss": 2.092, "step": 13540 }, { "epoch": 0.45, "grad_norm": 0.7364929914474487, "learning_rate": 1.1836296722999364e-05, "loss": 2.1365, "step": 13541 }, { "epoch": 0.45, "grad_norm": 0.7439523339271545, "learning_rate": 1.1835251893659849e-05, "loss": 2.0503, "step": 13542 }, { "epoch": 0.45, "grad_norm": 0.7160428762435913, "learning_rate": 1.1834207043586738e-05, "loss": 2.1407, "step": 13543 }, { "epoch": 0.45, "grad_norm": 0.7623382210731506, "learning_rate": 1.1833162172791828e-05, "loss": 2.1538, "step": 13544 }, { "epoch": 0.45, "grad_norm": 0.7711104154586792, "learning_rate": 1.183211728128692e-05, "loss": 2.077, "step": 13545 }, { "epoch": 0.45, "grad_norm": 0.7471561431884766, "learning_rate": 1.1831072369083834e-05, "loss": 2.0149, "step": 13546 }, { "epoch": 0.45, "grad_norm": 0.7303157448768616, "learning_rate": 1.1830027436194362e-05, "loss": 2.0109, "step": 13547 }, { "epoch": 0.45, "grad_norm": 0.7227977514266968, "learning_rate": 1.1828982482630314e-05, "loss": 2.1047, "step": 13548 }, { "epoch": 0.45, "grad_norm": 0.744614839553833, "learning_rate": 1.182793750840349e-05, "loss": 2.1748, "step": 13549 }, { "epoch": 0.45, "grad_norm": 0.7837855815887451, "learning_rate": 1.1826892513525701e-05, "loss": 2.1153, "step": 13550 }, { "epoch": 0.45, "grad_norm": 0.7416403889656067, "learning_rate": 1.1825847498008752e-05, "loss": 2.0078, "step": 13551 }, { "epoch": 0.45, "grad_norm": 0.7315610647201538, "learning_rate": 1.1824802461864448e-05, "loss": 2.0833, "step": 13552 }, { "epoch": 0.45, "grad_norm": 0.7420401573181152, "learning_rate": 1.1823757405104594e-05, "loss": 2.1042, "step": 13553 }, { "epoch": 0.45, "grad_norm": 0.7715992331504822, "learning_rate": 1.1822712327740999e-05, "loss": 2.122, "step": 13554 }, { "epoch": 0.45, "grad_norm": 0.7241045236587524, "learning_rate": 1.182166722978547e-05, "loss": 2.0218, "step": 13555 }, { "epoch": 0.45, "grad_norm": 0.7550198435783386, "learning_rate": 1.1820622111249807e-05, "loss": 2.1166, "step": 13556 }, { "epoch": 0.45, "grad_norm": 0.7314906716346741, "learning_rate": 1.1819576972145828e-05, "loss": 2.0638, "step": 13557 }, { "epoch": 0.45, "grad_norm": 0.7201056480407715, "learning_rate": 1.181853181248533e-05, "loss": 2.0344, "step": 13558 }, { "epoch": 0.45, "grad_norm": 0.7708947062492371, "learning_rate": 1.1817486632280129e-05, "loss": 2.0434, "step": 13559 }, { "epoch": 0.45, "grad_norm": 0.8097667694091797, "learning_rate": 1.1816441431542026e-05, "loss": 2.0571, "step": 13560 }, { "epoch": 0.45, "grad_norm": 0.7036840915679932, "learning_rate": 1.1815396210282835e-05, "loss": 2.0917, "step": 13561 }, { "epoch": 0.45, "grad_norm": 0.7495243549346924, "learning_rate": 1.1814350968514358e-05, "loss": 2.1614, "step": 13562 }, { "epoch": 0.45, "grad_norm": 0.7221084833145142, "learning_rate": 1.1813305706248412e-05, "loss": 2.0914, "step": 13563 }, { "epoch": 0.45, "grad_norm": 0.7237715125083923, "learning_rate": 1.1812260423496795e-05, "loss": 2.0879, "step": 13564 }, { "epoch": 0.45, "grad_norm": 0.7475821375846863, "learning_rate": 1.1811215120271327e-05, "loss": 2.1245, "step": 13565 }, { "epoch": 0.45, "grad_norm": 0.7633784413337708, "learning_rate": 1.181016979658381e-05, "loss": 2.1713, "step": 13566 }, { "epoch": 0.45, "grad_norm": 0.7213771939277649, "learning_rate": 1.1809124452446055e-05, "loss": 2.1263, "step": 13567 }, { "epoch": 0.45, "grad_norm": 0.7430083155632019, "learning_rate": 1.1808079087869875e-05, "loss": 2.0524, "step": 13568 }, { "epoch": 0.45, "grad_norm": 0.7704249024391174, "learning_rate": 1.1807033702867071e-05, "loss": 2.0629, "step": 13569 }, { "epoch": 0.45, "grad_norm": 0.7276620268821716, "learning_rate": 1.1805988297449467e-05, "loss": 2.0724, "step": 13570 }, { "epoch": 0.45, "grad_norm": 0.736805260181427, "learning_rate": 1.1804942871628859e-05, "loss": 2.0693, "step": 13571 }, { "epoch": 0.45, "grad_norm": 0.7313312292098999, "learning_rate": 1.1803897425417067e-05, "loss": 2.067, "step": 13572 }, { "epoch": 0.45, "grad_norm": 0.7328804731369019, "learning_rate": 1.18028519588259e-05, "loss": 2.0928, "step": 13573 }, { "epoch": 0.45, "grad_norm": 0.7619591355323792, "learning_rate": 1.180180647186717e-05, "loss": 2.0579, "step": 13574 }, { "epoch": 0.45, "grad_norm": 0.7512155771255493, "learning_rate": 1.180076096455268e-05, "loss": 2.1167, "step": 13575 }, { "epoch": 0.45, "grad_norm": 0.7085748314857483, "learning_rate": 1.1799715436894252e-05, "loss": 2.0645, "step": 13576 }, { "epoch": 0.45, "grad_norm": 0.7317373156547546, "learning_rate": 1.1798669888903693e-05, "loss": 2.1281, "step": 13577 }, { "epoch": 0.45, "grad_norm": 0.7313659191131592, "learning_rate": 1.1797624320592817e-05, "loss": 2.0699, "step": 13578 }, { "epoch": 0.45, "grad_norm": 0.7449996471405029, "learning_rate": 1.1796578731973436e-05, "loss": 2.0624, "step": 13579 }, { "epoch": 0.45, "grad_norm": 0.7183303236961365, "learning_rate": 1.1795533123057356e-05, "loss": 2.0716, "step": 13580 }, { "epoch": 0.45, "grad_norm": 0.7231505513191223, "learning_rate": 1.1794487493856402e-05, "loss": 2.1245, "step": 13581 }, { "epoch": 0.45, "grad_norm": 0.704030454158783, "learning_rate": 1.1793441844382376e-05, "loss": 2.0786, "step": 13582 }, { "epoch": 0.45, "grad_norm": 0.7091351747512817, "learning_rate": 1.1792396174647096e-05, "loss": 2.0382, "step": 13583 }, { "epoch": 0.45, "grad_norm": 0.7370789647102356, "learning_rate": 1.1791350484662375e-05, "loss": 2.1172, "step": 13584 }, { "epoch": 0.45, "grad_norm": 0.7186959385871887, "learning_rate": 1.1790304774440022e-05, "loss": 2.093, "step": 13585 }, { "epoch": 0.45, "grad_norm": 0.7489336133003235, "learning_rate": 1.178925904399186e-05, "loss": 2.068, "step": 13586 }, { "epoch": 0.45, "grad_norm": 0.7123768329620361, "learning_rate": 1.1788213293329696e-05, "loss": 2.057, "step": 13587 }, { "epoch": 0.45, "grad_norm": 0.7339223623275757, "learning_rate": 1.1787167522465344e-05, "loss": 2.0773, "step": 13588 }, { "epoch": 0.45, "grad_norm": 0.7449429631233215, "learning_rate": 1.1786121731410622e-05, "loss": 2.0256, "step": 13589 }, { "epoch": 0.45, "grad_norm": 0.7440928220748901, "learning_rate": 1.1785075920177347e-05, "loss": 2.1163, "step": 13590 }, { "epoch": 0.45, "grad_norm": 0.7177662253379822, "learning_rate": 1.1784030088777325e-05, "loss": 1.9806, "step": 13591 }, { "epoch": 0.45, "grad_norm": 0.7182180881500244, "learning_rate": 1.1782984237222382e-05, "loss": 2.0412, "step": 13592 }, { "epoch": 0.45, "grad_norm": 0.6961660385131836, "learning_rate": 1.1781938365524326e-05, "loss": 2.0749, "step": 13593 }, { "epoch": 0.45, "grad_norm": 0.7375578284263611, "learning_rate": 1.1780892473694974e-05, "loss": 2.0391, "step": 13594 }, { "epoch": 0.45, "grad_norm": 0.7078100442886353, "learning_rate": 1.1779846561746143e-05, "loss": 2.0301, "step": 13595 }, { "epoch": 0.45, "grad_norm": 0.7513500452041626, "learning_rate": 1.1778800629689646e-05, "loss": 2.0775, "step": 13596 }, { "epoch": 0.45, "grad_norm": 0.8137655258178711, "learning_rate": 1.1777754677537306e-05, "loss": 2.1367, "step": 13597 }, { "epoch": 0.45, "grad_norm": 0.7182844877243042, "learning_rate": 1.1776708705300932e-05, "loss": 2.0454, "step": 13598 }, { "epoch": 0.45, "grad_norm": 0.7568352222442627, "learning_rate": 1.1775662712992346e-05, "loss": 2.1431, "step": 13599 }, { "epoch": 0.45, "grad_norm": 0.7061640024185181, "learning_rate": 1.1774616700623363e-05, "loss": 2.0685, "step": 13600 }, { "epoch": 0.45, "grad_norm": 0.739234209060669, "learning_rate": 1.1773570668205803e-05, "loss": 2.0834, "step": 13601 }, { "epoch": 0.45, "grad_norm": 0.7545337080955505, "learning_rate": 1.1772524615751477e-05, "loss": 2.0877, "step": 13602 }, { "epoch": 0.45, "grad_norm": 0.7233408093452454, "learning_rate": 1.177147854327221e-05, "loss": 2.0459, "step": 13603 }, { "epoch": 0.45, "grad_norm": 0.7200063467025757, "learning_rate": 1.1770432450779814e-05, "loss": 2.0533, "step": 13604 }, { "epoch": 0.45, "grad_norm": 0.7366318702697754, "learning_rate": 1.176938633828611e-05, "loss": 2.0898, "step": 13605 }, { "epoch": 0.45, "grad_norm": 0.740839421749115, "learning_rate": 1.1768340205802917e-05, "loss": 2.1812, "step": 13606 }, { "epoch": 0.45, "grad_norm": 0.7220223546028137, "learning_rate": 1.1767294053342053e-05, "loss": 2.0054, "step": 13607 }, { "epoch": 0.45, "grad_norm": 0.72603839635849, "learning_rate": 1.1766247880915335e-05, "loss": 2.0762, "step": 13608 }, { "epoch": 0.45, "grad_norm": 0.7728837728500366, "learning_rate": 1.1765201688534587e-05, "loss": 2.0701, "step": 13609 }, { "epoch": 0.45, "grad_norm": 0.7520168423652649, "learning_rate": 1.176415547621162e-05, "loss": 2.0575, "step": 13610 }, { "epoch": 0.45, "grad_norm": 0.7832136154174805, "learning_rate": 1.1763109243958261e-05, "loss": 2.1359, "step": 13611 }, { "epoch": 0.45, "grad_norm": 0.7365365028381348, "learning_rate": 1.176206299178633e-05, "loss": 2.0446, "step": 13612 }, { "epoch": 0.45, "grad_norm": 0.7437178492546082, "learning_rate": 1.176101671970764e-05, "loss": 2.112, "step": 13613 }, { "epoch": 0.45, "grad_norm": 0.7158846855163574, "learning_rate": 1.1759970427734017e-05, "loss": 2.0736, "step": 13614 }, { "epoch": 0.45, "grad_norm": 0.7176305651664734, "learning_rate": 1.175892411587728e-05, "loss": 2.112, "step": 13615 }, { "epoch": 0.45, "grad_norm": 0.7492138147354126, "learning_rate": 1.1757877784149245e-05, "loss": 2.1162, "step": 13616 }, { "epoch": 0.45, "grad_norm": 0.7546277046203613, "learning_rate": 1.1756831432561742e-05, "loss": 2.06, "step": 13617 }, { "epoch": 0.45, "grad_norm": 0.7384843826293945, "learning_rate": 1.1755785061126584e-05, "loss": 2.097, "step": 13618 }, { "epoch": 0.45, "grad_norm": 0.7283943295478821, "learning_rate": 1.1754738669855596e-05, "loss": 2.0687, "step": 13619 }, { "epoch": 0.45, "grad_norm": 0.7531116604804993, "learning_rate": 1.1753692258760599e-05, "loss": 2.1363, "step": 13620 }, { "epoch": 0.45, "grad_norm": 0.7325713038444519, "learning_rate": 1.1752645827853416e-05, "loss": 2.102, "step": 13621 }, { "epoch": 0.45, "grad_norm": 0.7368161082267761, "learning_rate": 1.1751599377145863e-05, "loss": 2.0616, "step": 13622 }, { "epoch": 0.45, "grad_norm": 0.741741955280304, "learning_rate": 1.1750552906649775e-05, "loss": 2.0974, "step": 13623 }, { "epoch": 0.45, "grad_norm": 0.7672288417816162, "learning_rate": 1.1749506416376956e-05, "loss": 2.0957, "step": 13624 }, { "epoch": 0.45, "grad_norm": 0.7456240653991699, "learning_rate": 1.1748459906339247e-05, "loss": 2.0949, "step": 13625 }, { "epoch": 0.45, "grad_norm": 0.7599456310272217, "learning_rate": 1.174741337654846e-05, "loss": 2.0673, "step": 13626 }, { "epoch": 0.45, "grad_norm": 0.7436365485191345, "learning_rate": 1.174636682701642e-05, "loss": 2.052, "step": 13627 }, { "epoch": 0.45, "grad_norm": 0.7147632241249084, "learning_rate": 1.1745320257754954e-05, "loss": 2.1098, "step": 13628 }, { "epoch": 0.45, "grad_norm": 0.7553607821464539, "learning_rate": 1.1744273668775878e-05, "loss": 2.0852, "step": 13629 }, { "epoch": 0.45, "grad_norm": 0.7371150255203247, "learning_rate": 1.1743227060091023e-05, "loss": 2.1183, "step": 13630 }, { "epoch": 0.45, "grad_norm": 0.7260974645614624, "learning_rate": 1.174218043171221e-05, "loss": 2.0854, "step": 13631 }, { "epoch": 0.45, "grad_norm": 0.7527048587799072, "learning_rate": 1.1741133783651261e-05, "loss": 2.0101, "step": 13632 }, { "epoch": 0.45, "grad_norm": 0.6959316730499268, "learning_rate": 1.1740087115920007e-05, "loss": 2.0863, "step": 13633 }, { "epoch": 0.45, "grad_norm": 0.719954252243042, "learning_rate": 1.1739040428530268e-05, "loss": 2.059, "step": 13634 }, { "epoch": 0.45, "grad_norm": 0.7341354489326477, "learning_rate": 1.1737993721493868e-05, "loss": 2.0851, "step": 13635 }, { "epoch": 0.45, "grad_norm": 0.7512224912643433, "learning_rate": 1.1736946994822636e-05, "loss": 2.0598, "step": 13636 }, { "epoch": 0.45, "grad_norm": 0.7037633657455444, "learning_rate": 1.1735900248528392e-05, "loss": 2.0953, "step": 13637 }, { "epoch": 0.45, "grad_norm": 0.7858691215515137, "learning_rate": 1.1734853482622961e-05, "loss": 2.1482, "step": 13638 }, { "epoch": 0.45, "grad_norm": 0.7320055365562439, "learning_rate": 1.1733806697118179e-05, "loss": 2.0523, "step": 13639 }, { "epoch": 0.45, "grad_norm": 0.7900111079216003, "learning_rate": 1.1732759892025862e-05, "loss": 2.1107, "step": 13640 }, { "epoch": 0.45, "grad_norm": 0.709962010383606, "learning_rate": 1.1731713067357839e-05, "loss": 2.05, "step": 13641 }, { "epoch": 0.45, "grad_norm": 0.7873455882072449, "learning_rate": 1.1730666223125937e-05, "loss": 2.0806, "step": 13642 }, { "epoch": 0.45, "grad_norm": 0.7152709364891052, "learning_rate": 1.1729619359341982e-05, "loss": 2.0529, "step": 13643 }, { "epoch": 0.45, "grad_norm": 0.7855849266052246, "learning_rate": 1.1728572476017802e-05, "loss": 2.1003, "step": 13644 }, { "epoch": 0.45, "grad_norm": 0.7602287530899048, "learning_rate": 1.1727525573165224e-05, "loss": 2.0402, "step": 13645 }, { "epoch": 0.45, "grad_norm": 0.76436448097229, "learning_rate": 1.172647865079607e-05, "loss": 2.1372, "step": 13646 }, { "epoch": 0.45, "grad_norm": 0.8220353126525879, "learning_rate": 1.1725431708922176e-05, "loss": 2.0536, "step": 13647 }, { "epoch": 0.45, "grad_norm": 0.7804812788963318, "learning_rate": 1.1724384747555367e-05, "loss": 2.0916, "step": 13648 }, { "epoch": 0.45, "grad_norm": 0.7357838153839111, "learning_rate": 1.1723337766707464e-05, "loss": 2.1634, "step": 13649 }, { "epoch": 0.45, "grad_norm": 0.7195533514022827, "learning_rate": 1.1722290766390305e-05, "loss": 2.1455, "step": 13650 }, { "epoch": 0.45, "grad_norm": 0.7323089241981506, "learning_rate": 1.1721243746615714e-05, "loss": 2.0436, "step": 13651 }, { "epoch": 0.45, "grad_norm": 0.753634512424469, "learning_rate": 1.172019670739552e-05, "loss": 2.0273, "step": 13652 }, { "epoch": 0.45, "grad_norm": 0.7407516837120056, "learning_rate": 1.171914964874155e-05, "loss": 2.1384, "step": 13653 }, { "epoch": 0.45, "grad_norm": 0.7272430658340454, "learning_rate": 1.1718102570665637e-05, "loss": 2.0942, "step": 13654 }, { "epoch": 0.45, "grad_norm": 0.7293431758880615, "learning_rate": 1.1717055473179606e-05, "loss": 2.0504, "step": 13655 }, { "epoch": 0.45, "grad_norm": 0.8003043532371521, "learning_rate": 1.1716008356295291e-05, "loss": 2.0668, "step": 13656 }, { "epoch": 0.45, "grad_norm": 0.7484285831451416, "learning_rate": 1.1714961220024517e-05, "loss": 2.112, "step": 13657 }, { "epoch": 0.45, "grad_norm": 0.7347952127456665, "learning_rate": 1.171391406437912e-05, "loss": 2.0792, "step": 13658 }, { "epoch": 0.45, "grad_norm": 0.7198622822761536, "learning_rate": 1.1712866889370921e-05, "loss": 2.0725, "step": 13659 }, { "epoch": 0.45, "grad_norm": 0.7431581020355225, "learning_rate": 1.1711819695011757e-05, "loss": 2.165, "step": 13660 }, { "epoch": 0.45, "grad_norm": 0.7139117121696472, "learning_rate": 1.1710772481313462e-05, "loss": 2.0748, "step": 13661 }, { "epoch": 0.45, "grad_norm": 0.7634978294372559, "learning_rate": 1.1709725248287858e-05, "loss": 2.0433, "step": 13662 }, { "epoch": 0.45, "grad_norm": 0.7221102714538574, "learning_rate": 1.170867799594678e-05, "loss": 2.1047, "step": 13663 }, { "epoch": 0.45, "grad_norm": 0.746594250202179, "learning_rate": 1.170763072430206e-05, "loss": 2.0648, "step": 13664 }, { "epoch": 0.45, "grad_norm": 0.7177837491035461, "learning_rate": 1.170658343336553e-05, "loss": 2.0794, "step": 13665 }, { "epoch": 0.45, "grad_norm": 0.7404343485832214, "learning_rate": 1.170553612314902e-05, "loss": 2.129, "step": 13666 }, { "epoch": 0.45, "grad_norm": 0.7583587169647217, "learning_rate": 1.1704488793664364e-05, "loss": 2.0697, "step": 13667 }, { "epoch": 0.45, "grad_norm": 0.7130071520805359, "learning_rate": 1.1703441444923387e-05, "loss": 2.0525, "step": 13668 }, { "epoch": 0.45, "grad_norm": 0.713004469871521, "learning_rate": 1.170239407693793e-05, "loss": 2.0594, "step": 13669 }, { "epoch": 0.45, "grad_norm": 0.7370579838752747, "learning_rate": 1.1701346689719823e-05, "loss": 2.138, "step": 13670 }, { "epoch": 0.45, "grad_norm": 0.7253836393356323, "learning_rate": 1.1700299283280899e-05, "loss": 2.0853, "step": 13671 }, { "epoch": 0.45, "grad_norm": 0.7241047024726868, "learning_rate": 1.1699251857632991e-05, "loss": 2.1016, "step": 13672 }, { "epoch": 0.45, "grad_norm": 0.7888206839561462, "learning_rate": 1.1698204412787927e-05, "loss": 2.1041, "step": 13673 }, { "epoch": 0.45, "grad_norm": 0.7230753898620605, "learning_rate": 1.1697156948757549e-05, "loss": 2.1162, "step": 13674 }, { "epoch": 0.45, "grad_norm": 0.7181246876716614, "learning_rate": 1.1696109465553685e-05, "loss": 2.0219, "step": 13675 }, { "epoch": 0.46, "grad_norm": 0.7416624426841736, "learning_rate": 1.169506196318817e-05, "loss": 2.0487, "step": 13676 }, { "epoch": 0.46, "grad_norm": 0.72171950340271, "learning_rate": 1.1694014441672838e-05, "loss": 1.9841, "step": 13677 }, { "epoch": 0.46, "grad_norm": 0.7187781929969788, "learning_rate": 1.1692966901019524e-05, "loss": 2.095, "step": 13678 }, { "epoch": 0.46, "grad_norm": 0.7483463883399963, "learning_rate": 1.1691919341240063e-05, "loss": 2.1678, "step": 13679 }, { "epoch": 0.46, "grad_norm": 0.7612974643707275, "learning_rate": 1.1690871762346287e-05, "loss": 2.0787, "step": 13680 }, { "epoch": 0.46, "grad_norm": 0.7629601955413818, "learning_rate": 1.1689824164350035e-05, "loss": 2.0825, "step": 13681 }, { "epoch": 0.46, "grad_norm": 0.7431178689002991, "learning_rate": 1.1688776547263137e-05, "loss": 2.0248, "step": 13682 }, { "epoch": 0.46, "grad_norm": 0.7509763240814209, "learning_rate": 1.1687728911097435e-05, "loss": 2.0657, "step": 13683 }, { "epoch": 0.46, "grad_norm": 0.7365239858627319, "learning_rate": 1.1686681255864758e-05, "loss": 2.0528, "step": 13684 }, { "epoch": 0.46, "grad_norm": 0.7138425707817078, "learning_rate": 1.1685633581576947e-05, "loss": 2.1661, "step": 13685 }, { "epoch": 0.46, "grad_norm": 0.7321451902389526, "learning_rate": 1.1684585888245834e-05, "loss": 2.0905, "step": 13686 }, { "epoch": 0.46, "grad_norm": 0.7448704838752747, "learning_rate": 1.1683538175883256e-05, "loss": 2.081, "step": 13687 }, { "epoch": 0.46, "grad_norm": 0.764478325843811, "learning_rate": 1.168249044450105e-05, "loss": 2.0542, "step": 13688 }, { "epoch": 0.46, "grad_norm": 0.7574368119239807, "learning_rate": 1.1681442694111055e-05, "loss": 2.0596, "step": 13689 }, { "epoch": 0.46, "grad_norm": 0.7530555725097656, "learning_rate": 1.1680394924725107e-05, "loss": 2.15, "step": 13690 }, { "epoch": 0.46, "grad_norm": 0.69953852891922, "learning_rate": 1.1679347136355039e-05, "loss": 2.0633, "step": 13691 }, { "epoch": 0.46, "grad_norm": 0.763239860534668, "learning_rate": 1.1678299329012693e-05, "loss": 2.0256, "step": 13692 }, { "epoch": 0.46, "grad_norm": 0.7498131394386292, "learning_rate": 1.1677251502709904e-05, "loss": 2.0631, "step": 13693 }, { "epoch": 0.46, "grad_norm": 0.7580260038375854, "learning_rate": 1.1676203657458513e-05, "loss": 2.1257, "step": 13694 }, { "epoch": 0.46, "grad_norm": 0.7232735753059387, "learning_rate": 1.167515579327035e-05, "loss": 2.1499, "step": 13695 }, { "epoch": 0.46, "grad_norm": 0.7194355130195618, "learning_rate": 1.1674107910157264e-05, "loss": 2.0418, "step": 13696 }, { "epoch": 0.46, "grad_norm": 0.7269057631492615, "learning_rate": 1.1673060008131085e-05, "loss": 2.0864, "step": 13697 }, { "epoch": 0.46, "grad_norm": 0.7373828887939453, "learning_rate": 1.1672012087203655e-05, "loss": 2.1178, "step": 13698 }, { "epoch": 0.46, "grad_norm": 0.7113621234893799, "learning_rate": 1.1670964147386815e-05, "loss": 2.0994, "step": 13699 }, { "epoch": 0.46, "grad_norm": 0.7409968376159668, "learning_rate": 1.1669916188692397e-05, "loss": 2.0597, "step": 13700 }, { "epoch": 0.46, "grad_norm": 0.7060233950614929, "learning_rate": 1.1668868211132247e-05, "loss": 2.0928, "step": 13701 }, { "epoch": 0.46, "grad_norm": 0.7202461957931519, "learning_rate": 1.16678202147182e-05, "loss": 2.0582, "step": 13702 }, { "epoch": 0.46, "grad_norm": 0.7405433654785156, "learning_rate": 1.16667721994621e-05, "loss": 2.1196, "step": 13703 }, { "epoch": 0.46, "grad_norm": 0.7499316334724426, "learning_rate": 1.1665724165375783e-05, "loss": 2.0728, "step": 13704 }, { "epoch": 0.46, "grad_norm": 0.7555602192878723, "learning_rate": 1.1664676112471094e-05, "loss": 2.0208, "step": 13705 }, { "epoch": 0.46, "grad_norm": 0.7762464284896851, "learning_rate": 1.1663628040759865e-05, "loss": 2.0387, "step": 13706 }, { "epoch": 0.46, "grad_norm": 0.7289209365844727, "learning_rate": 1.1662579950253944e-05, "loss": 2.0078, "step": 13707 }, { "epoch": 0.46, "grad_norm": 0.7297747731208801, "learning_rate": 1.166153184096517e-05, "loss": 2.137, "step": 13708 }, { "epoch": 0.46, "grad_norm": 0.7750729918479919, "learning_rate": 1.1660483712905381e-05, "loss": 2.1189, "step": 13709 }, { "epoch": 0.46, "grad_norm": 0.8182358145713806, "learning_rate": 1.1659435566086421e-05, "loss": 2.1063, "step": 13710 }, { "epoch": 0.46, "grad_norm": 0.7671381235122681, "learning_rate": 1.165838740052013e-05, "loss": 2.1015, "step": 13711 }, { "epoch": 0.46, "grad_norm": 0.7501659393310547, "learning_rate": 1.165733921621835e-05, "loss": 2.047, "step": 13712 }, { "epoch": 0.46, "grad_norm": 0.7266820669174194, "learning_rate": 1.1656291013192922e-05, "loss": 1.9885, "step": 13713 }, { "epoch": 0.46, "grad_norm": 0.7108304500579834, "learning_rate": 1.165524279145569e-05, "loss": 2.1447, "step": 13714 }, { "epoch": 0.46, "grad_norm": 0.7451035380363464, "learning_rate": 1.1654194551018496e-05, "loss": 2.1282, "step": 13715 }, { "epoch": 0.46, "grad_norm": 0.7590197920799255, "learning_rate": 1.1653146291893182e-05, "loss": 2.0399, "step": 13716 }, { "epoch": 0.46, "grad_norm": 0.743858277797699, "learning_rate": 1.1652098014091587e-05, "loss": 2.1689, "step": 13717 }, { "epoch": 0.46, "grad_norm": 0.7178841829299927, "learning_rate": 1.165104971762556e-05, "loss": 2.0946, "step": 13718 }, { "epoch": 0.46, "grad_norm": 0.7296066284179688, "learning_rate": 1.1650001402506939e-05, "loss": 2.0716, "step": 13719 }, { "epoch": 0.46, "grad_norm": 0.7324082255363464, "learning_rate": 1.1648953068747569e-05, "loss": 2.0448, "step": 13720 }, { "epoch": 0.46, "grad_norm": 0.7605100274085999, "learning_rate": 1.1647904716359292e-05, "loss": 2.015, "step": 13721 }, { "epoch": 0.46, "grad_norm": 0.768844485282898, "learning_rate": 1.1646856345353957e-05, "loss": 2.0263, "step": 13722 }, { "epoch": 0.46, "grad_norm": 0.7715038657188416, "learning_rate": 1.1645807955743402e-05, "loss": 2.1304, "step": 13723 }, { "epoch": 0.46, "grad_norm": 0.7381030321121216, "learning_rate": 1.1644759547539473e-05, "loss": 2.0975, "step": 13724 }, { "epoch": 0.46, "grad_norm": 0.751333475112915, "learning_rate": 1.1643711120754015e-05, "loss": 2.0958, "step": 13725 }, { "epoch": 0.46, "grad_norm": 0.7442734837532043, "learning_rate": 1.1642662675398872e-05, "loss": 1.9951, "step": 13726 }, { "epoch": 0.46, "grad_norm": 0.7160650491714478, "learning_rate": 1.1641614211485892e-05, "loss": 2.0888, "step": 13727 }, { "epoch": 0.46, "grad_norm": 0.7498159408569336, "learning_rate": 1.1640565729026912e-05, "loss": 2.1579, "step": 13728 }, { "epoch": 0.46, "grad_norm": 0.731899082660675, "learning_rate": 1.1639517228033786e-05, "loss": 2.136, "step": 13729 }, { "epoch": 0.46, "grad_norm": 0.7222269177436829, "learning_rate": 1.1638468708518352e-05, "loss": 2.0882, "step": 13730 }, { "epoch": 0.46, "grad_norm": 0.7352545857429504, "learning_rate": 1.163742017049246e-05, "loss": 2.1528, "step": 13731 }, { "epoch": 0.46, "grad_norm": 0.7229012846946716, "learning_rate": 1.1636371613967954e-05, "loss": 2.0229, "step": 13732 }, { "epoch": 0.46, "grad_norm": 0.7431833744049072, "learning_rate": 1.1635323038956678e-05, "loss": 2.1452, "step": 13733 }, { "epoch": 0.46, "grad_norm": 0.7188732028007507, "learning_rate": 1.1634274445470485e-05, "loss": 2.1309, "step": 13734 }, { "epoch": 0.46, "grad_norm": 0.7330019474029541, "learning_rate": 1.1633225833521216e-05, "loss": 2.0301, "step": 13735 }, { "epoch": 0.46, "grad_norm": 0.7540656924247742, "learning_rate": 1.1632177203120719e-05, "loss": 2.0855, "step": 13736 }, { "epoch": 0.46, "grad_norm": 0.6982877254486084, "learning_rate": 1.1631128554280837e-05, "loss": 2.074, "step": 13737 }, { "epoch": 0.46, "grad_norm": 0.7802829742431641, "learning_rate": 1.1630079887013426e-05, "loss": 2.0758, "step": 13738 }, { "epoch": 0.46, "grad_norm": 0.7080307006835938, "learning_rate": 1.1629031201330322e-05, "loss": 2.0532, "step": 13739 }, { "epoch": 0.46, "grad_norm": 0.7211573719978333, "learning_rate": 1.1627982497243384e-05, "loss": 1.9945, "step": 13740 }, { "epoch": 0.46, "grad_norm": 0.7203699350357056, "learning_rate": 1.1626933774764451e-05, "loss": 2.1082, "step": 13741 }, { "epoch": 0.46, "grad_norm": 0.6971467733383179, "learning_rate": 1.162588503390537e-05, "loss": 2.0702, "step": 13742 }, { "epoch": 0.46, "grad_norm": 0.7233619689941406, "learning_rate": 1.1624836274678e-05, "loss": 2.1116, "step": 13743 }, { "epoch": 0.46, "grad_norm": 0.7606395483016968, "learning_rate": 1.1623787497094177e-05, "loss": 2.0618, "step": 13744 }, { "epoch": 0.46, "grad_norm": 0.7250732779502869, "learning_rate": 1.1622738701165756e-05, "loss": 2.119, "step": 13745 }, { "epoch": 0.46, "grad_norm": 0.7453100085258484, "learning_rate": 1.1621689886904583e-05, "loss": 2.0983, "step": 13746 }, { "epoch": 0.46, "grad_norm": 0.757234513759613, "learning_rate": 1.162064105432251e-05, "loss": 2.1804, "step": 13747 }, { "epoch": 0.46, "grad_norm": 0.7453684210777283, "learning_rate": 1.1619592203431384e-05, "loss": 2.055, "step": 13748 }, { "epoch": 0.46, "grad_norm": 0.7019809484481812, "learning_rate": 1.1618543334243055e-05, "loss": 2.0598, "step": 13749 }, { "epoch": 0.46, "grad_norm": 0.7220568060874939, "learning_rate": 1.1617494446769368e-05, "loss": 2.0768, "step": 13750 }, { "epoch": 0.46, "grad_norm": 0.7256587743759155, "learning_rate": 1.1616445541022184e-05, "loss": 2.1179, "step": 13751 }, { "epoch": 0.46, "grad_norm": 0.7081303596496582, "learning_rate": 1.1615396617013339e-05, "loss": 2.0777, "step": 13752 }, { "epoch": 0.46, "grad_norm": 0.7434557676315308, "learning_rate": 1.161434767475469e-05, "loss": 2.1268, "step": 13753 }, { "epoch": 0.46, "grad_norm": 0.7596583366394043, "learning_rate": 1.161329871425809e-05, "loss": 2.1072, "step": 13754 }, { "epoch": 0.46, "grad_norm": 1.0023049116134644, "learning_rate": 1.1612249735535386e-05, "loss": 2.0781, "step": 13755 }, { "epoch": 0.46, "grad_norm": 0.7322414517402649, "learning_rate": 1.1611200738598429e-05, "loss": 2.2143, "step": 13756 }, { "epoch": 0.46, "grad_norm": 0.7430794835090637, "learning_rate": 1.1610151723459069e-05, "loss": 2.1117, "step": 13757 }, { "epoch": 0.46, "grad_norm": 0.7875491380691528, "learning_rate": 1.160910269012916e-05, "loss": 2.2122, "step": 13758 }, { "epoch": 0.46, "grad_norm": 0.7219997644424438, "learning_rate": 1.1608053638620551e-05, "loss": 2.0634, "step": 13759 }, { "epoch": 0.46, "grad_norm": 0.732057511806488, "learning_rate": 1.1607004568945097e-05, "loss": 2.0851, "step": 13760 }, { "epoch": 0.46, "grad_norm": 0.7197027802467346, "learning_rate": 1.1605955481114643e-05, "loss": 2.1158, "step": 13761 }, { "epoch": 0.46, "grad_norm": 0.7384375929832458, "learning_rate": 1.160490637514105e-05, "loss": 2.0889, "step": 13762 }, { "epoch": 0.46, "grad_norm": 0.7573885917663574, "learning_rate": 1.160385725103616e-05, "loss": 2.0544, "step": 13763 }, { "epoch": 0.46, "grad_norm": 0.726362407207489, "learning_rate": 1.1602808108811831e-05, "loss": 2.0627, "step": 13764 }, { "epoch": 0.46, "grad_norm": 0.7642941474914551, "learning_rate": 1.160175894847992e-05, "loss": 2.1119, "step": 13765 }, { "epoch": 0.46, "grad_norm": 0.7677561640739441, "learning_rate": 1.1600709770052272e-05, "loss": 2.0845, "step": 13766 }, { "epoch": 0.46, "grad_norm": 0.6975741982460022, "learning_rate": 1.1599660573540746e-05, "loss": 2.0672, "step": 13767 }, { "epoch": 0.46, "grad_norm": 0.7311582565307617, "learning_rate": 1.159861135895719e-05, "loss": 2.0461, "step": 13768 }, { "epoch": 0.46, "grad_norm": 0.7537286281585693, "learning_rate": 1.159756212631346e-05, "loss": 2.0727, "step": 13769 }, { "epoch": 0.46, "grad_norm": 0.7318214774131775, "learning_rate": 1.1596512875621408e-05, "loss": 2.0891, "step": 13770 }, { "epoch": 0.46, "grad_norm": 0.7135664224624634, "learning_rate": 1.1595463606892891e-05, "loss": 2.1078, "step": 13771 }, { "epoch": 0.46, "grad_norm": 0.7326236963272095, "learning_rate": 1.159441432013976e-05, "loss": 2.0932, "step": 13772 }, { "epoch": 0.46, "grad_norm": 0.726948082447052, "learning_rate": 1.1593365015373874e-05, "loss": 2.0788, "step": 13773 }, { "epoch": 0.46, "grad_norm": 0.7320414185523987, "learning_rate": 1.1592315692607078e-05, "loss": 2.0545, "step": 13774 }, { "epoch": 0.46, "grad_norm": 0.7315406203269958, "learning_rate": 1.1591266351851234e-05, "loss": 2.0611, "step": 13775 }, { "epoch": 0.46, "grad_norm": 0.7235338687896729, "learning_rate": 1.1590216993118199e-05, "loss": 2.1023, "step": 13776 }, { "epoch": 0.46, "grad_norm": 0.7381092309951782, "learning_rate": 1.158916761641982e-05, "loss": 2.0963, "step": 13777 }, { "epoch": 0.46, "grad_norm": 0.772896409034729, "learning_rate": 1.1588118221767962e-05, "loss": 2.1372, "step": 13778 }, { "epoch": 0.46, "grad_norm": 0.7314771413803101, "learning_rate": 1.1587068809174471e-05, "loss": 2.0575, "step": 13779 }, { "epoch": 0.46, "grad_norm": 0.7316055297851562, "learning_rate": 1.1586019378651208e-05, "loss": 2.1004, "step": 13780 }, { "epoch": 0.46, "grad_norm": 0.7309486865997314, "learning_rate": 1.1584969930210026e-05, "loss": 2.0272, "step": 13781 }, { "epoch": 0.46, "grad_norm": 0.7354961633682251, "learning_rate": 1.1583920463862783e-05, "loss": 2.0628, "step": 13782 }, { "epoch": 0.46, "grad_norm": 0.7436493039131165, "learning_rate": 1.1582870979621337e-05, "loss": 2.1886, "step": 13783 }, { "epoch": 0.46, "grad_norm": 0.7101261615753174, "learning_rate": 1.1581821477497538e-05, "loss": 2.0419, "step": 13784 }, { "epoch": 0.46, "grad_norm": 0.7517040371894836, "learning_rate": 1.1580771957503252e-05, "loss": 2.1498, "step": 13785 }, { "epoch": 0.46, "grad_norm": 0.7366485595703125, "learning_rate": 1.1579722419650328e-05, "loss": 2.0679, "step": 13786 }, { "epoch": 0.46, "grad_norm": 0.7682188153266907, "learning_rate": 1.1578672863950628e-05, "loss": 2.082, "step": 13787 }, { "epoch": 0.46, "grad_norm": 0.7242255806922913, "learning_rate": 1.1577623290416005e-05, "loss": 2.0692, "step": 13788 }, { "epoch": 0.46, "grad_norm": 0.7442821264266968, "learning_rate": 1.157657369905832e-05, "loss": 2.0902, "step": 13789 }, { "epoch": 0.46, "grad_norm": 0.7589248418807983, "learning_rate": 1.1575524089889429e-05, "loss": 2.0905, "step": 13790 }, { "epoch": 0.46, "grad_norm": 0.7484375834465027, "learning_rate": 1.157447446292119e-05, "loss": 2.069, "step": 13791 }, { "epoch": 0.46, "grad_norm": 0.7687254548072815, "learning_rate": 1.1573424818165462e-05, "loss": 2.1245, "step": 13792 }, { "epoch": 0.46, "grad_norm": 0.7781221866607666, "learning_rate": 1.1572375155634101e-05, "loss": 2.1161, "step": 13793 }, { "epoch": 0.46, "grad_norm": 0.724916934967041, "learning_rate": 1.1571325475338968e-05, "loss": 2.1169, "step": 13794 }, { "epoch": 0.46, "grad_norm": 0.7341775298118591, "learning_rate": 1.1570275777291919e-05, "loss": 2.076, "step": 13795 }, { "epoch": 0.46, "grad_norm": 0.710292398929596, "learning_rate": 1.1569226061504816e-05, "loss": 1.9933, "step": 13796 }, { "epoch": 0.46, "grad_norm": 0.7598239183425903, "learning_rate": 1.1568176327989517e-05, "loss": 2.1007, "step": 13797 }, { "epoch": 0.46, "grad_norm": 0.755302369594574, "learning_rate": 1.156712657675788e-05, "loss": 2.1054, "step": 13798 }, { "epoch": 0.46, "grad_norm": 0.7612534761428833, "learning_rate": 1.1566076807821766e-05, "loss": 2.0651, "step": 13799 }, { "epoch": 0.46, "grad_norm": 0.7229418754577637, "learning_rate": 1.1565027021193036e-05, "loss": 2.1513, "step": 13800 }, { "epoch": 0.46, "grad_norm": 0.7178006172180176, "learning_rate": 1.1563977216883544e-05, "loss": 2.0463, "step": 13801 }, { "epoch": 0.46, "grad_norm": 0.7258201241493225, "learning_rate": 1.1562927394905157e-05, "loss": 2.1088, "step": 13802 }, { "epoch": 0.46, "grad_norm": 0.6954020261764526, "learning_rate": 1.1561877555269729e-05, "loss": 2.0157, "step": 13803 }, { "epoch": 0.46, "grad_norm": 0.7243265509605408, "learning_rate": 1.1560827697989128e-05, "loss": 2.0818, "step": 13804 }, { "epoch": 0.46, "grad_norm": 0.7162607312202454, "learning_rate": 1.1559777823075206e-05, "loss": 2.0569, "step": 13805 }, { "epoch": 0.46, "grad_norm": 0.7211546897888184, "learning_rate": 1.155872793053983e-05, "loss": 2.0529, "step": 13806 }, { "epoch": 0.46, "grad_norm": 0.7309221029281616, "learning_rate": 1.155767802039486e-05, "loss": 2.0982, "step": 13807 }, { "epoch": 0.46, "grad_norm": 0.7461057901382446, "learning_rate": 1.1556628092652156e-05, "loss": 2.097, "step": 13808 }, { "epoch": 0.46, "grad_norm": 0.7538158297538757, "learning_rate": 1.1555578147323583e-05, "loss": 2.0875, "step": 13809 }, { "epoch": 0.46, "grad_norm": 0.727258563041687, "learning_rate": 1.1554528184420995e-05, "loss": 2.1033, "step": 13810 }, { "epoch": 0.46, "grad_norm": 0.7411683797836304, "learning_rate": 1.1553478203956264e-05, "loss": 2.017, "step": 13811 }, { "epoch": 0.46, "grad_norm": 0.7461369633674622, "learning_rate": 1.1552428205941241e-05, "loss": 2.0472, "step": 13812 }, { "epoch": 0.46, "grad_norm": 0.7368441820144653, "learning_rate": 1.1551378190387796e-05, "loss": 2.0617, "step": 13813 }, { "epoch": 0.46, "grad_norm": 0.7620907425880432, "learning_rate": 1.1550328157307791e-05, "loss": 2.1029, "step": 13814 }, { "epoch": 0.46, "grad_norm": 0.7447356581687927, "learning_rate": 1.1549278106713086e-05, "loss": 2.0507, "step": 13815 }, { "epoch": 0.46, "grad_norm": 0.7286354899406433, "learning_rate": 1.1548228038615545e-05, "loss": 2.0656, "step": 13816 }, { "epoch": 0.46, "grad_norm": 0.7376671433448792, "learning_rate": 1.1547177953027029e-05, "loss": 2.0964, "step": 13817 }, { "epoch": 0.46, "grad_norm": 0.7543604969978333, "learning_rate": 1.1546127849959405e-05, "loss": 2.1194, "step": 13818 }, { "epoch": 0.46, "grad_norm": 0.7598224878311157, "learning_rate": 1.1545077729424534e-05, "loss": 2.0952, "step": 13819 }, { "epoch": 0.46, "grad_norm": 0.7582297325134277, "learning_rate": 1.1544027591434283e-05, "loss": 2.0438, "step": 13820 }, { "epoch": 0.46, "grad_norm": 0.7243918776512146, "learning_rate": 1.1542977436000511e-05, "loss": 2.0731, "step": 13821 }, { "epoch": 0.46, "grad_norm": 0.7117231488227844, "learning_rate": 1.1541927263135087e-05, "loss": 2.0732, "step": 13822 }, { "epoch": 0.46, "grad_norm": 0.7152557969093323, "learning_rate": 1.1540877072849867e-05, "loss": 1.9554, "step": 13823 }, { "epoch": 0.46, "grad_norm": 0.7396102547645569, "learning_rate": 1.1539826865156725e-05, "loss": 2.0946, "step": 13824 }, { "epoch": 0.46, "grad_norm": 0.7315008640289307, "learning_rate": 1.1538776640067519e-05, "loss": 2.0017, "step": 13825 }, { "epoch": 0.46, "grad_norm": 0.7583364248275757, "learning_rate": 1.1537726397594119e-05, "loss": 2.1846, "step": 13826 }, { "epoch": 0.46, "grad_norm": 0.7367548942565918, "learning_rate": 1.1536676137748384e-05, "loss": 1.9949, "step": 13827 }, { "epoch": 0.46, "grad_norm": 0.7530710697174072, "learning_rate": 1.1535625860542186e-05, "loss": 2.1036, "step": 13828 }, { "epoch": 0.46, "grad_norm": 0.7300674319267273, "learning_rate": 1.1534575565987383e-05, "loss": 2.0043, "step": 13829 }, { "epoch": 0.46, "grad_norm": 0.7651640772819519, "learning_rate": 1.1533525254095848e-05, "loss": 2.0677, "step": 13830 }, { "epoch": 0.46, "grad_norm": 0.7363502383232117, "learning_rate": 1.1532474924879445e-05, "loss": 2.0626, "step": 13831 }, { "epoch": 0.46, "grad_norm": 0.7527604699134827, "learning_rate": 1.1531424578350032e-05, "loss": 2.116, "step": 13832 }, { "epoch": 0.46, "grad_norm": 0.7592442035675049, "learning_rate": 1.1530374214519489e-05, "loss": 2.1137, "step": 13833 }, { "epoch": 0.46, "grad_norm": 0.7953033447265625, "learning_rate": 1.152932383339967e-05, "loss": 2.0978, "step": 13834 }, { "epoch": 0.46, "grad_norm": 0.7347846031188965, "learning_rate": 1.1528273435002448e-05, "loss": 2.078, "step": 13835 }, { "epoch": 0.46, "grad_norm": 0.7502967715263367, "learning_rate": 1.1527223019339688e-05, "loss": 2.0442, "step": 13836 }, { "epoch": 0.46, "grad_norm": 0.7213720083236694, "learning_rate": 1.1526172586423259e-05, "loss": 2.0875, "step": 13837 }, { "epoch": 0.46, "grad_norm": 0.7581600546836853, "learning_rate": 1.1525122136265025e-05, "loss": 2.1345, "step": 13838 }, { "epoch": 0.46, "grad_norm": 0.7445322275161743, "learning_rate": 1.1524071668876856e-05, "loss": 2.1047, "step": 13839 }, { "epoch": 0.46, "grad_norm": 0.7370960712432861, "learning_rate": 1.1523021184270615e-05, "loss": 2.064, "step": 13840 }, { "epoch": 0.46, "grad_norm": 0.774368166923523, "learning_rate": 1.1521970682458176e-05, "loss": 2.1606, "step": 13841 }, { "epoch": 0.46, "grad_norm": 0.7363104820251465, "learning_rate": 1.1520920163451407e-05, "loss": 2.0227, "step": 13842 }, { "epoch": 0.46, "grad_norm": 0.7620216012001038, "learning_rate": 1.1519869627262168e-05, "loss": 2.1349, "step": 13843 }, { "epoch": 0.46, "grad_norm": 0.7250754237174988, "learning_rate": 1.1518819073902336e-05, "loss": 2.1687, "step": 13844 }, { "epoch": 0.46, "grad_norm": 0.73106849193573, "learning_rate": 1.1517768503383777e-05, "loss": 2.0818, "step": 13845 }, { "epoch": 0.46, "grad_norm": 0.74542236328125, "learning_rate": 1.1516717915718357e-05, "loss": 2.0703, "step": 13846 }, { "epoch": 0.46, "grad_norm": 0.7665259838104248, "learning_rate": 1.1515667310917946e-05, "loss": 2.0592, "step": 13847 }, { "epoch": 0.46, "grad_norm": 0.7540133595466614, "learning_rate": 1.1514616688994416e-05, "loss": 2.0475, "step": 13848 }, { "epoch": 0.46, "grad_norm": 0.7784125804901123, "learning_rate": 1.1513566049959634e-05, "loss": 2.1381, "step": 13849 }, { "epoch": 0.46, "grad_norm": 0.7082016468048096, "learning_rate": 1.151251539382547e-05, "loss": 2.0818, "step": 13850 }, { "epoch": 0.46, "grad_norm": 0.7291967272758484, "learning_rate": 1.1511464720603791e-05, "loss": 2.1883, "step": 13851 }, { "epoch": 0.46, "grad_norm": 0.714471161365509, "learning_rate": 1.1510414030306472e-05, "loss": 2.0319, "step": 13852 }, { "epoch": 0.46, "grad_norm": 0.7395991086959839, "learning_rate": 1.1509363322945381e-05, "loss": 1.9846, "step": 13853 }, { "epoch": 0.46, "grad_norm": 0.7501673698425293, "learning_rate": 1.1508312598532385e-05, "loss": 2.098, "step": 13854 }, { "epoch": 0.46, "grad_norm": 0.7246443033218384, "learning_rate": 1.1507261857079358e-05, "loss": 2.0764, "step": 13855 }, { "epoch": 0.46, "grad_norm": 0.7416405081748962, "learning_rate": 1.1506211098598172e-05, "loss": 2.0618, "step": 13856 }, { "epoch": 0.46, "grad_norm": 0.7348794341087341, "learning_rate": 1.1505160323100692e-05, "loss": 2.1011, "step": 13857 }, { "epoch": 0.46, "grad_norm": 0.7050432562828064, "learning_rate": 1.1504109530598797e-05, "loss": 2.0575, "step": 13858 }, { "epoch": 0.46, "grad_norm": 0.7333235740661621, "learning_rate": 1.1503058721104349e-05, "loss": 2.0726, "step": 13859 }, { "epoch": 0.46, "grad_norm": 0.7256713509559631, "learning_rate": 1.150200789462923e-05, "loss": 2.0544, "step": 13860 }, { "epoch": 0.46, "grad_norm": 0.7469145059585571, "learning_rate": 1.1500957051185304e-05, "loss": 2.1184, "step": 13861 }, { "epoch": 0.46, "grad_norm": 0.7442758679389954, "learning_rate": 1.1499906190784445e-05, "loss": 2.1784, "step": 13862 }, { "epoch": 0.46, "grad_norm": 0.7319467663764954, "learning_rate": 1.1498855313438524e-05, "loss": 2.0443, "step": 13863 }, { "epoch": 0.46, "grad_norm": 0.8317609429359436, "learning_rate": 1.1497804419159417e-05, "loss": 2.1619, "step": 13864 }, { "epoch": 0.46, "grad_norm": 0.7429776191711426, "learning_rate": 1.1496753507958988e-05, "loss": 2.1153, "step": 13865 }, { "epoch": 0.46, "grad_norm": 0.7224605679512024, "learning_rate": 1.149570257984912e-05, "loss": 2.0132, "step": 13866 }, { "epoch": 0.46, "grad_norm": 0.7159321904182434, "learning_rate": 1.1494651634841676e-05, "loss": 2.0076, "step": 13867 }, { "epoch": 0.46, "grad_norm": 0.7364002466201782, "learning_rate": 1.1493600672948537e-05, "loss": 2.0055, "step": 13868 }, { "epoch": 0.46, "grad_norm": 0.7353820204734802, "learning_rate": 1.1492549694181574e-05, "loss": 2.0486, "step": 13869 }, { "epoch": 0.46, "grad_norm": 0.7276679277420044, "learning_rate": 1.1491498698552657e-05, "loss": 2.067, "step": 13870 }, { "epoch": 0.46, "grad_norm": 0.711052656173706, "learning_rate": 1.1490447686073663e-05, "loss": 2.1002, "step": 13871 }, { "epoch": 0.46, "grad_norm": 0.7470738887786865, "learning_rate": 1.1489396656756463e-05, "loss": 2.1044, "step": 13872 }, { "epoch": 0.46, "grad_norm": 0.7476822137832642, "learning_rate": 1.1488345610612934e-05, "loss": 2.1049, "step": 13873 }, { "epoch": 0.46, "grad_norm": 0.7425014972686768, "learning_rate": 1.148729454765495e-05, "loss": 2.1098, "step": 13874 }, { "epoch": 0.46, "grad_norm": 0.7206026911735535, "learning_rate": 1.1486243467894381e-05, "loss": 2.0387, "step": 13875 }, { "epoch": 0.46, "grad_norm": 0.7091426849365234, "learning_rate": 1.1485192371343106e-05, "loss": 2.0376, "step": 13876 }, { "epoch": 0.46, "grad_norm": 0.715815007686615, "learning_rate": 1.1484141258012998e-05, "loss": 2.0714, "step": 13877 }, { "epoch": 0.46, "grad_norm": 0.7546079158782959, "learning_rate": 1.148309012791593e-05, "loss": 2.0836, "step": 13878 }, { "epoch": 0.46, "grad_norm": 0.7702943682670593, "learning_rate": 1.1482038981063778e-05, "loss": 2.1156, "step": 13879 }, { "epoch": 0.46, "grad_norm": 0.7444843649864197, "learning_rate": 1.1480987817468423e-05, "loss": 2.1323, "step": 13880 }, { "epoch": 0.46, "grad_norm": 0.7514033317565918, "learning_rate": 1.147993663714173e-05, "loss": 2.061, "step": 13881 }, { "epoch": 0.46, "grad_norm": 0.7395638823509216, "learning_rate": 1.1478885440095587e-05, "loss": 2.1083, "step": 13882 }, { "epoch": 0.46, "grad_norm": 0.758599042892456, "learning_rate": 1.1477834226341857e-05, "loss": 2.1821, "step": 13883 }, { "epoch": 0.46, "grad_norm": 0.7210294008255005, "learning_rate": 1.1476782995892424e-05, "loss": 2.0166, "step": 13884 }, { "epoch": 0.46, "grad_norm": 0.7398263216018677, "learning_rate": 1.1475731748759162e-05, "loss": 2.1801, "step": 13885 }, { "epoch": 0.46, "grad_norm": 0.764863908290863, "learning_rate": 1.147468048495395e-05, "loss": 2.0518, "step": 13886 }, { "epoch": 0.46, "grad_norm": 0.7324207425117493, "learning_rate": 1.1473629204488659e-05, "loss": 2.0582, "step": 13887 }, { "epoch": 0.46, "grad_norm": 0.7663314938545227, "learning_rate": 1.147257790737517e-05, "loss": 2.1097, "step": 13888 }, { "epoch": 0.46, "grad_norm": 0.7337262630462646, "learning_rate": 1.1471526593625358e-05, "loss": 2.0108, "step": 13889 }, { "epoch": 0.46, "grad_norm": 0.7179603576660156, "learning_rate": 1.1470475263251099e-05, "loss": 2.0722, "step": 13890 }, { "epoch": 0.46, "grad_norm": 0.7415421009063721, "learning_rate": 1.1469423916264277e-05, "loss": 2.066, "step": 13891 }, { "epoch": 0.46, "grad_norm": 0.7523535490036011, "learning_rate": 1.146837255267676e-05, "loss": 2.0712, "step": 13892 }, { "epoch": 0.46, "grad_norm": 0.7425236105918884, "learning_rate": 1.1467321172500437e-05, "loss": 2.086, "step": 13893 }, { "epoch": 0.46, "grad_norm": 0.777191698551178, "learning_rate": 1.1466269775747174e-05, "loss": 2.0477, "step": 13894 }, { "epoch": 0.46, "grad_norm": 0.7317134737968445, "learning_rate": 1.1465218362428856e-05, "loss": 2.0937, "step": 13895 }, { "epoch": 0.46, "grad_norm": 0.7797331809997559, "learning_rate": 1.1464166932557359e-05, "loss": 2.1333, "step": 13896 }, { "epoch": 0.46, "grad_norm": 0.751956045627594, "learning_rate": 1.1463115486144563e-05, "loss": 2.0396, "step": 13897 }, { "epoch": 0.46, "grad_norm": 0.7568763494491577, "learning_rate": 1.1462064023202345e-05, "loss": 2.0919, "step": 13898 }, { "epoch": 0.46, "grad_norm": 0.7560786604881287, "learning_rate": 1.1461012543742584e-05, "loss": 2.0439, "step": 13899 }, { "epoch": 0.46, "grad_norm": 0.7259364724159241, "learning_rate": 1.145996104777716e-05, "loss": 2.0688, "step": 13900 }, { "epoch": 0.46, "grad_norm": 0.7402641177177429, "learning_rate": 1.1458909535317953e-05, "loss": 2.1444, "step": 13901 }, { "epoch": 0.46, "grad_norm": 0.718134343624115, "learning_rate": 1.1457858006376841e-05, "loss": 2.086, "step": 13902 }, { "epoch": 0.46, "grad_norm": 0.7434369325637817, "learning_rate": 1.1456806460965701e-05, "loss": 2.059, "step": 13903 }, { "epoch": 0.46, "grad_norm": 0.7297135591506958, "learning_rate": 1.1455754899096419e-05, "loss": 2.157, "step": 13904 }, { "epoch": 0.46, "grad_norm": 0.7373976707458496, "learning_rate": 1.145470332078087e-05, "loss": 2.094, "step": 13905 }, { "epoch": 0.46, "grad_norm": 0.7307386994361877, "learning_rate": 1.1453651726030933e-05, "loss": 2.1151, "step": 13906 }, { "epoch": 0.46, "grad_norm": 0.7358463406562805, "learning_rate": 1.1452600114858492e-05, "loss": 2.0761, "step": 13907 }, { "epoch": 0.46, "grad_norm": 0.7153708934783936, "learning_rate": 1.1451548487275429e-05, "loss": 2.0131, "step": 13908 }, { "epoch": 0.46, "grad_norm": 0.7238354086875916, "learning_rate": 1.1450496843293618e-05, "loss": 2.0723, "step": 13909 }, { "epoch": 0.46, "grad_norm": 0.7462184429168701, "learning_rate": 1.1449445182924945e-05, "loss": 2.0467, "step": 13910 }, { "epoch": 0.46, "grad_norm": 0.7478886246681213, "learning_rate": 1.144839350618129e-05, "loss": 2.1195, "step": 13911 }, { "epoch": 0.46, "grad_norm": 0.7192968726158142, "learning_rate": 1.1447341813074533e-05, "loss": 2.0741, "step": 13912 }, { "epoch": 0.46, "grad_norm": 0.7380467653274536, "learning_rate": 1.144629010361656e-05, "loss": 2.0996, "step": 13913 }, { "epoch": 0.46, "grad_norm": 0.7404887080192566, "learning_rate": 1.1445238377819243e-05, "loss": 2.0496, "step": 13914 }, { "epoch": 0.46, "grad_norm": 0.7744516134262085, "learning_rate": 1.1444186635694476e-05, "loss": 2.0126, "step": 13915 }, { "epoch": 0.46, "grad_norm": 0.7374155521392822, "learning_rate": 1.1443134877254131e-05, "loss": 2.1088, "step": 13916 }, { "epoch": 0.46, "grad_norm": 0.7409254312515259, "learning_rate": 1.1442083102510096e-05, "loss": 2.0647, "step": 13917 }, { "epoch": 0.46, "grad_norm": 0.7647099494934082, "learning_rate": 1.144103131147425e-05, "loss": 2.1031, "step": 13918 }, { "epoch": 0.46, "grad_norm": 0.741031289100647, "learning_rate": 1.1439979504158476e-05, "loss": 2.0965, "step": 13919 }, { "epoch": 0.46, "grad_norm": 0.7317578792572021, "learning_rate": 1.1438927680574658e-05, "loss": 2.0238, "step": 13920 }, { "epoch": 0.46, "grad_norm": 0.7393238544464111, "learning_rate": 1.143787584073468e-05, "loss": 2.0677, "step": 13921 }, { "epoch": 0.46, "grad_norm": 0.7135379910469055, "learning_rate": 1.1436823984650422e-05, "loss": 2.051, "step": 13922 }, { "epoch": 0.46, "grad_norm": 0.7402812242507935, "learning_rate": 1.143577211233377e-05, "loss": 2.161, "step": 13923 }, { "epoch": 0.46, "grad_norm": 0.7004898190498352, "learning_rate": 1.1434720223796605e-05, "loss": 2.0466, "step": 13924 }, { "epoch": 0.46, "grad_norm": 0.7115724682807922, "learning_rate": 1.143366831905081e-05, "loss": 2.0585, "step": 13925 }, { "epoch": 0.46, "grad_norm": 0.7690955400466919, "learning_rate": 1.1432616398108274e-05, "loss": 2.1698, "step": 13926 }, { "epoch": 0.46, "grad_norm": 0.733790934085846, "learning_rate": 1.1431564460980877e-05, "loss": 2.0965, "step": 13927 }, { "epoch": 0.46, "grad_norm": 0.687777042388916, "learning_rate": 1.1430512507680503e-05, "loss": 2.0732, "step": 13928 }, { "epoch": 0.46, "grad_norm": 0.7394942045211792, "learning_rate": 1.1429460538219034e-05, "loss": 2.0993, "step": 13929 }, { "epoch": 0.46, "grad_norm": 0.7259914875030518, "learning_rate": 1.1428408552608361e-05, "loss": 2.0541, "step": 13930 }, { "epoch": 0.46, "grad_norm": 0.7330652475357056, "learning_rate": 1.1427356550860364e-05, "loss": 2.1504, "step": 13931 }, { "epoch": 0.46, "grad_norm": 0.7698226571083069, "learning_rate": 1.1426304532986929e-05, "loss": 2.118, "step": 13932 }, { "epoch": 0.46, "grad_norm": 0.7498550415039062, "learning_rate": 1.1425252498999944e-05, "loss": 2.1065, "step": 13933 }, { "epoch": 0.46, "grad_norm": 0.7705021500587463, "learning_rate": 1.1424200448911289e-05, "loss": 2.034, "step": 13934 }, { "epoch": 0.46, "grad_norm": 0.7328606247901917, "learning_rate": 1.1423148382732854e-05, "loss": 2.0827, "step": 13935 }, { "epoch": 0.46, "grad_norm": 0.7137869596481323, "learning_rate": 1.142209630047652e-05, "loss": 2.0482, "step": 13936 }, { "epoch": 0.46, "grad_norm": 0.7185385823249817, "learning_rate": 1.1421044202154179e-05, "loss": 2.1659, "step": 13937 }, { "epoch": 0.46, "grad_norm": 0.7574206590652466, "learning_rate": 1.141999208777771e-05, "loss": 2.1053, "step": 13938 }, { "epoch": 0.46, "grad_norm": 0.7163469195365906, "learning_rate": 1.1418939957359004e-05, "loss": 2.1074, "step": 13939 }, { "epoch": 0.46, "grad_norm": 0.7385785579681396, "learning_rate": 1.1417887810909944e-05, "loss": 2.1101, "step": 13940 }, { "epoch": 0.46, "grad_norm": 0.7270582914352417, "learning_rate": 1.1416835648442422e-05, "loss": 2.0727, "step": 13941 }, { "epoch": 0.46, "grad_norm": 0.7220864295959473, "learning_rate": 1.1415783469968318e-05, "loss": 2.0929, "step": 13942 }, { "epoch": 0.46, "grad_norm": 0.7149447798728943, "learning_rate": 1.1414731275499522e-05, "loss": 2.0499, "step": 13943 }, { "epoch": 0.46, "grad_norm": 0.72572261095047, "learning_rate": 1.1413679065047922e-05, "loss": 2.1075, "step": 13944 }, { "epoch": 0.46, "grad_norm": 0.762215256690979, "learning_rate": 1.1412626838625404e-05, "loss": 2.0324, "step": 13945 }, { "epoch": 0.46, "grad_norm": 0.7542533874511719, "learning_rate": 1.1411574596243859e-05, "loss": 2.015, "step": 13946 }, { "epoch": 0.46, "grad_norm": 0.7126274704933167, "learning_rate": 1.1410522337915169e-05, "loss": 2.1059, "step": 13947 }, { "epoch": 0.46, "grad_norm": 0.7149423956871033, "learning_rate": 1.1409470063651225e-05, "loss": 2.0672, "step": 13948 }, { "epoch": 0.46, "grad_norm": 0.748698353767395, "learning_rate": 1.1408417773463913e-05, "loss": 1.9922, "step": 13949 }, { "epoch": 0.46, "grad_norm": 0.7311465740203857, "learning_rate": 1.1407365467365124e-05, "loss": 2.0944, "step": 13950 }, { "epoch": 0.46, "grad_norm": 0.732067883014679, "learning_rate": 1.1406313145366742e-05, "loss": 2.1242, "step": 13951 }, { "epoch": 0.46, "grad_norm": 0.71977299451828, "learning_rate": 1.1405260807480662e-05, "loss": 2.0736, "step": 13952 }, { "epoch": 0.46, "grad_norm": 0.7463904619216919, "learning_rate": 1.1404208453718769e-05, "loss": 2.0105, "step": 13953 }, { "epoch": 0.46, "grad_norm": 0.7443024516105652, "learning_rate": 1.1403156084092947e-05, "loss": 2.0538, "step": 13954 }, { "epoch": 0.46, "grad_norm": 0.7316613793373108, "learning_rate": 1.1402103698615093e-05, "loss": 1.9684, "step": 13955 }, { "epoch": 0.46, "grad_norm": 0.7614690065383911, "learning_rate": 1.1401051297297095e-05, "loss": 2.1533, "step": 13956 }, { "epoch": 0.46, "grad_norm": 0.7414304614067078, "learning_rate": 1.1399998880150838e-05, "loss": 2.0698, "step": 13957 }, { "epoch": 0.46, "grad_norm": 0.754650890827179, "learning_rate": 1.1398946447188213e-05, "loss": 2.1399, "step": 13958 }, { "epoch": 0.46, "grad_norm": 0.7636027932167053, "learning_rate": 1.1397893998421115e-05, "loss": 2.0127, "step": 13959 }, { "epoch": 0.46, "grad_norm": 0.7040846347808838, "learning_rate": 1.1396841533861427e-05, "loss": 2.1173, "step": 13960 }, { "epoch": 0.46, "grad_norm": 0.7160289287567139, "learning_rate": 1.139578905352104e-05, "loss": 2.0297, "step": 13961 }, { "epoch": 0.46, "grad_norm": 0.7494525909423828, "learning_rate": 1.1394736557411852e-05, "loss": 2.1559, "step": 13962 }, { "epoch": 0.46, "grad_norm": 0.7337530255317688, "learning_rate": 1.1393684045545741e-05, "loss": 2.0356, "step": 13963 }, { "epoch": 0.46, "grad_norm": 0.7371634244918823, "learning_rate": 1.139263151793461e-05, "loss": 2.0973, "step": 13964 }, { "epoch": 0.46, "grad_norm": 0.767296552658081, "learning_rate": 1.1391578974590344e-05, "loss": 2.1593, "step": 13965 }, { "epoch": 0.46, "grad_norm": 0.7549008131027222, "learning_rate": 1.139052641552483e-05, "loss": 2.0054, "step": 13966 }, { "epoch": 0.46, "grad_norm": 0.7386720776557922, "learning_rate": 1.1389473840749965e-05, "loss": 2.0924, "step": 13967 }, { "epoch": 0.46, "grad_norm": 0.7300058603286743, "learning_rate": 1.1388421250277641e-05, "loss": 2.0955, "step": 13968 }, { "epoch": 0.46, "grad_norm": 0.7234599590301514, "learning_rate": 1.1387368644119745e-05, "loss": 2.1453, "step": 13969 }, { "epoch": 0.46, "grad_norm": 0.7075248956680298, "learning_rate": 1.1386316022288175e-05, "loss": 2.0352, "step": 13970 }, { "epoch": 0.46, "grad_norm": 0.7367042899131775, "learning_rate": 1.1385263384794813e-05, "loss": 2.131, "step": 13971 }, { "epoch": 0.46, "grad_norm": 0.7205585241317749, "learning_rate": 1.1384210731651562e-05, "loss": 2.1128, "step": 13972 }, { "epoch": 0.46, "grad_norm": 0.6995275616645813, "learning_rate": 1.138315806287031e-05, "loss": 2.0532, "step": 13973 }, { "epoch": 0.46, "grad_norm": 0.7629989385604858, "learning_rate": 1.1382105378462945e-05, "loss": 2.0868, "step": 13974 }, { "epoch": 0.46, "grad_norm": 0.740467369556427, "learning_rate": 1.1381052678441367e-05, "loss": 2.0842, "step": 13975 }, { "epoch": 0.46, "grad_norm": 0.712752640247345, "learning_rate": 1.1379999962817462e-05, "loss": 2.1317, "step": 13976 }, { "epoch": 0.47, "grad_norm": 0.7488521933555603, "learning_rate": 1.1378947231603128e-05, "loss": 2.1505, "step": 13977 }, { "epoch": 0.47, "grad_norm": 0.7209734320640564, "learning_rate": 1.1377894484810255e-05, "loss": 2.0598, "step": 13978 }, { "epoch": 0.47, "grad_norm": 0.7276514768600464, "learning_rate": 1.1376841722450738e-05, "loss": 2.1052, "step": 13979 }, { "epoch": 0.47, "grad_norm": 0.7672101855278015, "learning_rate": 1.137578894453647e-05, "loss": 1.9813, "step": 13980 }, { "epoch": 0.47, "grad_norm": 0.781242847442627, "learning_rate": 1.137473615107935e-05, "loss": 2.103, "step": 13981 }, { "epoch": 0.47, "grad_norm": 0.7273551821708679, "learning_rate": 1.1373683342091257e-05, "loss": 2.0676, "step": 13982 }, { "epoch": 0.47, "grad_norm": 0.7384310960769653, "learning_rate": 1.13726305175841e-05, "loss": 2.1079, "step": 13983 }, { "epoch": 0.47, "grad_norm": 0.7347680330276489, "learning_rate": 1.137157767756977e-05, "loss": 2.0556, "step": 13984 }, { "epoch": 0.47, "grad_norm": 0.7486788034439087, "learning_rate": 1.1370524822060154e-05, "loss": 2.0971, "step": 13985 }, { "epoch": 0.47, "grad_norm": 0.7360063791275024, "learning_rate": 1.1369471951067158e-05, "loss": 2.0756, "step": 13986 }, { "epoch": 0.47, "grad_norm": 0.7113722562789917, "learning_rate": 1.1368419064602666e-05, "loss": 2.0873, "step": 13987 }, { "epoch": 0.47, "grad_norm": 0.7143206000328064, "learning_rate": 1.1367366162678577e-05, "loss": 2.0529, "step": 13988 }, { "epoch": 0.47, "grad_norm": 0.7286527156829834, "learning_rate": 1.1366313245306788e-05, "loss": 2.0539, "step": 13989 }, { "epoch": 0.47, "grad_norm": 0.7297046184539795, "learning_rate": 1.1365260312499194e-05, "loss": 2.105, "step": 13990 }, { "epoch": 0.47, "grad_norm": 0.7451215982437134, "learning_rate": 1.1364207364267686e-05, "loss": 2.0773, "step": 13991 }, { "epoch": 0.47, "grad_norm": 0.7240462303161621, "learning_rate": 1.1363154400624166e-05, "loss": 2.0693, "step": 13992 }, { "epoch": 0.47, "grad_norm": 0.7205730080604553, "learning_rate": 1.1362101421580522e-05, "loss": 2.0684, "step": 13993 }, { "epoch": 0.47, "grad_norm": 0.7240166068077087, "learning_rate": 1.1361048427148657e-05, "loss": 2.0661, "step": 13994 }, { "epoch": 0.47, "grad_norm": 0.7666645050048828, "learning_rate": 1.1359995417340468e-05, "loss": 2.1364, "step": 13995 }, { "epoch": 0.47, "grad_norm": 0.7451803684234619, "learning_rate": 1.1358942392167842e-05, "loss": 2.0688, "step": 13996 }, { "epoch": 0.47, "grad_norm": 0.7014675140380859, "learning_rate": 1.1357889351642686e-05, "loss": 2.0438, "step": 13997 }, { "epoch": 0.47, "grad_norm": 0.7625283002853394, "learning_rate": 1.135683629577689e-05, "loss": 2.1134, "step": 13998 }, { "epoch": 0.47, "grad_norm": 0.7011498808860779, "learning_rate": 1.135578322458235e-05, "loss": 2.0442, "step": 13999 }, { "epoch": 0.47, "grad_norm": 0.719828724861145, "learning_rate": 1.135473013807097e-05, "loss": 2.103, "step": 14000 }, { "epoch": 0.47, "grad_norm": 0.7221359610557556, "learning_rate": 1.135367703625464e-05, "loss": 2.0582, "step": 14001 }, { "epoch": 0.47, "grad_norm": 0.7149716019630432, "learning_rate": 1.1352623919145263e-05, "loss": 2.0814, "step": 14002 }, { "epoch": 0.47, "grad_norm": 0.7294800877571106, "learning_rate": 1.1351570786754731e-05, "loss": 2.1489, "step": 14003 }, { "epoch": 0.47, "grad_norm": 0.7244287729263306, "learning_rate": 1.1350517639094945e-05, "loss": 2.0893, "step": 14004 }, { "epoch": 0.47, "grad_norm": 0.748668909072876, "learning_rate": 1.1349464476177801e-05, "loss": 2.1098, "step": 14005 }, { "epoch": 0.47, "grad_norm": 0.740935206413269, "learning_rate": 1.1348411298015203e-05, "loss": 2.0779, "step": 14006 }, { "epoch": 0.47, "grad_norm": 0.7441394329071045, "learning_rate": 1.1347358104619038e-05, "loss": 2.052, "step": 14007 }, { "epoch": 0.47, "grad_norm": 0.7490922808647156, "learning_rate": 1.1346304896001217e-05, "loss": 2.1348, "step": 14008 }, { "epoch": 0.47, "grad_norm": 0.7414029240608215, "learning_rate": 1.1345251672173628e-05, "loss": 2.0878, "step": 14009 }, { "epoch": 0.47, "grad_norm": 0.7330541610717773, "learning_rate": 1.1344198433148175e-05, "loss": 1.9649, "step": 14010 }, { "epoch": 0.47, "grad_norm": 0.7368197441101074, "learning_rate": 1.1343145178936757e-05, "loss": 2.0267, "step": 14011 }, { "epoch": 0.47, "grad_norm": 0.7594302296638489, "learning_rate": 1.134209190955127e-05, "loss": 2.1157, "step": 14012 }, { "epoch": 0.47, "grad_norm": 0.7266318202018738, "learning_rate": 1.1341038625003615e-05, "loss": 2.0607, "step": 14013 }, { "epoch": 0.47, "grad_norm": 0.7155597805976868, "learning_rate": 1.133998532530569e-05, "loss": 2.108, "step": 14014 }, { "epoch": 0.47, "grad_norm": 0.7481585144996643, "learning_rate": 1.1338932010469402e-05, "loss": 2.1388, "step": 14015 }, { "epoch": 0.47, "grad_norm": 0.7479772567749023, "learning_rate": 1.133787868050664e-05, "loss": 2.0908, "step": 14016 }, { "epoch": 0.47, "grad_norm": 0.7307004332542419, "learning_rate": 1.1336825335429314e-05, "loss": 2.1405, "step": 14017 }, { "epoch": 0.47, "grad_norm": 0.7523561716079712, "learning_rate": 1.1335771975249312e-05, "loss": 2.158, "step": 14018 }, { "epoch": 0.47, "grad_norm": 0.7081090211868286, "learning_rate": 1.1334718599978545e-05, "loss": 2.0943, "step": 14019 }, { "epoch": 0.47, "grad_norm": 0.7477535605430603, "learning_rate": 1.1333665209628908e-05, "loss": 2.1125, "step": 14020 }, { "epoch": 0.47, "grad_norm": 0.7332716584205627, "learning_rate": 1.1332611804212305e-05, "loss": 2.0454, "step": 14021 }, { "epoch": 0.47, "grad_norm": 0.767056941986084, "learning_rate": 1.1331558383740633e-05, "loss": 2.113, "step": 14022 }, { "epoch": 0.47, "grad_norm": 0.7267517447471619, "learning_rate": 1.1330504948225795e-05, "loss": 2.0748, "step": 14023 }, { "epoch": 0.47, "grad_norm": 0.7462465763092041, "learning_rate": 1.1329451497679692e-05, "loss": 2.0513, "step": 14024 }, { "epoch": 0.47, "grad_norm": 0.7530091404914856, "learning_rate": 1.1328398032114225e-05, "loss": 2.0737, "step": 14025 }, { "epoch": 0.47, "grad_norm": 0.7244776487350464, "learning_rate": 1.1327344551541295e-05, "loss": 2.1615, "step": 14026 }, { "epoch": 0.47, "grad_norm": 0.7228409647941589, "learning_rate": 1.1326291055972805e-05, "loss": 2.1467, "step": 14027 }, { "epoch": 0.47, "grad_norm": 0.7332518696784973, "learning_rate": 1.1325237545420658e-05, "loss": 2.1216, "step": 14028 }, { "epoch": 0.47, "grad_norm": 0.7250930666923523, "learning_rate": 1.1324184019896748e-05, "loss": 2.0935, "step": 14029 }, { "epoch": 0.47, "grad_norm": 0.7490986585617065, "learning_rate": 1.132313047941299e-05, "loss": 2.187, "step": 14030 }, { "epoch": 0.47, "grad_norm": 0.7438898086547852, "learning_rate": 1.1322076923981275e-05, "loss": 2.023, "step": 14031 }, { "epoch": 0.47, "grad_norm": 0.743503749370575, "learning_rate": 1.1321023353613511e-05, "loss": 2.1319, "step": 14032 }, { "epoch": 0.47, "grad_norm": 0.7419090270996094, "learning_rate": 1.13199697683216e-05, "loss": 2.1351, "step": 14033 }, { "epoch": 0.47, "grad_norm": 0.7308027148246765, "learning_rate": 1.1318916168117442e-05, "loss": 2.0453, "step": 14034 }, { "epoch": 0.47, "grad_norm": 0.7740827202796936, "learning_rate": 1.1317862553012944e-05, "loss": 2.1055, "step": 14035 }, { "epoch": 0.47, "grad_norm": 0.7577747106552124, "learning_rate": 1.1316808923020007e-05, "loss": 2.0984, "step": 14036 }, { "epoch": 0.47, "grad_norm": 0.742768406867981, "learning_rate": 1.1315755278150534e-05, "loss": 2.0944, "step": 14037 }, { "epoch": 0.47, "grad_norm": 0.7283887267112732, "learning_rate": 1.1314701618416429e-05, "loss": 2.0895, "step": 14038 }, { "epoch": 0.47, "grad_norm": 0.7511473298072815, "learning_rate": 1.1313647943829597e-05, "loss": 2.1151, "step": 14039 }, { "epoch": 0.47, "grad_norm": 0.7397477626800537, "learning_rate": 1.1312594254401937e-05, "loss": 1.961, "step": 14040 }, { "epoch": 0.47, "grad_norm": 0.73041170835495, "learning_rate": 1.131154055014536e-05, "loss": 2.1296, "step": 14041 }, { "epoch": 0.47, "grad_norm": 0.7607660889625549, "learning_rate": 1.1310486831071765e-05, "loss": 2.1117, "step": 14042 }, { "epoch": 0.47, "grad_norm": 0.7276760935783386, "learning_rate": 1.1309433097193057e-05, "loss": 2.0849, "step": 14043 }, { "epoch": 0.47, "grad_norm": 0.7549551129341125, "learning_rate": 1.1308379348521141e-05, "loss": 2.1102, "step": 14044 }, { "epoch": 0.47, "grad_norm": 0.7305091023445129, "learning_rate": 1.1307325585067923e-05, "loss": 2.1077, "step": 14045 }, { "epoch": 0.47, "grad_norm": 0.7362569570541382, "learning_rate": 1.1306271806845306e-05, "loss": 2.0761, "step": 14046 }, { "epoch": 0.47, "grad_norm": 0.7306249737739563, "learning_rate": 1.1305218013865198e-05, "loss": 1.9843, "step": 14047 }, { "epoch": 0.47, "grad_norm": 0.7770626544952393, "learning_rate": 1.1304164206139499e-05, "loss": 2.1324, "step": 14048 }, { "epoch": 0.47, "grad_norm": 0.7392917275428772, "learning_rate": 1.1303110383680119e-05, "loss": 2.0991, "step": 14049 }, { "epoch": 0.47, "grad_norm": 0.741791844367981, "learning_rate": 1.1302056546498964e-05, "loss": 2.0201, "step": 14050 }, { "epoch": 0.47, "grad_norm": 0.7629430294036865, "learning_rate": 1.130100269460793e-05, "loss": 2.0694, "step": 14051 }, { "epoch": 0.47, "grad_norm": 0.7539768218994141, "learning_rate": 1.1299948828018936e-05, "loss": 2.1713, "step": 14052 }, { "epoch": 0.47, "grad_norm": 0.7455495595932007, "learning_rate": 1.1298894946743878e-05, "loss": 2.0637, "step": 14053 }, { "epoch": 0.47, "grad_norm": 0.735785961151123, "learning_rate": 1.129784105079467e-05, "loss": 2.0646, "step": 14054 }, { "epoch": 0.47, "grad_norm": 0.6989507079124451, "learning_rate": 1.1296787140183212e-05, "loss": 2.0175, "step": 14055 }, { "epoch": 0.47, "grad_norm": 0.7415017485618591, "learning_rate": 1.1295733214921411e-05, "loss": 2.0424, "step": 14056 }, { "epoch": 0.47, "grad_norm": 0.6982277631759644, "learning_rate": 1.1294679275021179e-05, "loss": 1.9993, "step": 14057 }, { "epoch": 0.47, "grad_norm": 0.7305813431739807, "learning_rate": 1.129362532049442e-05, "loss": 2.109, "step": 14058 }, { "epoch": 0.47, "grad_norm": 0.7424694299697876, "learning_rate": 1.1292571351353037e-05, "loss": 2.1005, "step": 14059 }, { "epoch": 0.47, "grad_norm": 0.7935014963150024, "learning_rate": 1.1291517367608942e-05, "loss": 2.1004, "step": 14060 }, { "epoch": 0.47, "grad_norm": 0.727861225605011, "learning_rate": 1.1290463369274042e-05, "loss": 2.0406, "step": 14061 }, { "epoch": 0.47, "grad_norm": 0.7031880021095276, "learning_rate": 1.128940935636024e-05, "loss": 2.0641, "step": 14062 }, { "epoch": 0.47, "grad_norm": 0.7515766620635986, "learning_rate": 1.128835532887945e-05, "loss": 2.1094, "step": 14063 }, { "epoch": 0.47, "grad_norm": 0.7242578864097595, "learning_rate": 1.1287301286843573e-05, "loss": 2.1454, "step": 14064 }, { "epoch": 0.47, "grad_norm": 0.7245280146598816, "learning_rate": 1.1286247230264522e-05, "loss": 2.058, "step": 14065 }, { "epoch": 0.47, "grad_norm": 0.7270819544792175, "learning_rate": 1.1285193159154207e-05, "loss": 2.0514, "step": 14066 }, { "epoch": 0.47, "grad_norm": 0.7513290643692017, "learning_rate": 1.1284139073524528e-05, "loss": 2.0748, "step": 14067 }, { "epoch": 0.47, "grad_norm": 0.7444294691085815, "learning_rate": 1.1283084973387402e-05, "loss": 2.0319, "step": 14068 }, { "epoch": 0.47, "grad_norm": 0.7484315633773804, "learning_rate": 1.1282030858754731e-05, "loss": 2.0395, "step": 14069 }, { "epoch": 0.47, "grad_norm": 0.6898239850997925, "learning_rate": 1.1280976729638428e-05, "loss": 2.0262, "step": 14070 }, { "epoch": 0.47, "grad_norm": 0.723848283290863, "learning_rate": 1.12799225860504e-05, "loss": 2.0494, "step": 14071 }, { "epoch": 0.47, "grad_norm": 0.736810028553009, "learning_rate": 1.1278868428002559e-05, "loss": 2.0754, "step": 14072 }, { "epoch": 0.47, "grad_norm": 0.7575604319572449, "learning_rate": 1.127781425550681e-05, "loss": 2.0905, "step": 14073 }, { "epoch": 0.47, "grad_norm": 0.7576225399971008, "learning_rate": 1.1276760068575065e-05, "loss": 1.978, "step": 14074 }, { "epoch": 0.47, "grad_norm": 0.7554639577865601, "learning_rate": 1.1275705867219231e-05, "loss": 2.1882, "step": 14075 }, { "epoch": 0.47, "grad_norm": 0.7662122249603271, "learning_rate": 1.127465165145122e-05, "loss": 2.0803, "step": 14076 }, { "epoch": 0.47, "grad_norm": 0.7734922766685486, "learning_rate": 1.1273597421282946e-05, "loss": 2.0119, "step": 14077 }, { "epoch": 0.47, "grad_norm": 0.7401114702224731, "learning_rate": 1.127254317672631e-05, "loss": 2.0387, "step": 14078 }, { "epoch": 0.47, "grad_norm": 0.7506241202354431, "learning_rate": 1.1271488917793232e-05, "loss": 2.0169, "step": 14079 }, { "epoch": 0.47, "grad_norm": 0.7795040607452393, "learning_rate": 1.1270434644495614e-05, "loss": 2.1069, "step": 14080 }, { "epoch": 0.47, "grad_norm": 0.753847599029541, "learning_rate": 1.126938035684537e-05, "loss": 2.0981, "step": 14081 }, { "epoch": 0.47, "grad_norm": 0.7214574813842773, "learning_rate": 1.126832605485441e-05, "loss": 2.0556, "step": 14082 }, { "epoch": 0.47, "grad_norm": 0.711821973323822, "learning_rate": 1.1267271738534646e-05, "loss": 2.056, "step": 14083 }, { "epoch": 0.47, "grad_norm": 0.7408535480499268, "learning_rate": 1.1266217407897988e-05, "loss": 2.1032, "step": 14084 }, { "epoch": 0.47, "grad_norm": 0.7385123372077942, "learning_rate": 1.1265163062956353e-05, "loss": 2.054, "step": 14085 }, { "epoch": 0.47, "grad_norm": 0.7447160482406616, "learning_rate": 1.1264108703721638e-05, "loss": 2.1132, "step": 14086 }, { "epoch": 0.47, "grad_norm": 0.7687646746635437, "learning_rate": 1.1263054330205769e-05, "loss": 2.0824, "step": 14087 }, { "epoch": 0.47, "grad_norm": 0.7291406989097595, "learning_rate": 1.1261999942420653e-05, "loss": 2.0499, "step": 14088 }, { "epoch": 0.47, "grad_norm": 0.7002138495445251, "learning_rate": 1.1260945540378198e-05, "loss": 2.0523, "step": 14089 }, { "epoch": 0.47, "grad_norm": 0.7312231659889221, "learning_rate": 1.1259891124090326e-05, "loss": 2.083, "step": 14090 }, { "epoch": 0.47, "grad_norm": 0.7296798825263977, "learning_rate": 1.1258836693568937e-05, "loss": 2.1295, "step": 14091 }, { "epoch": 0.47, "grad_norm": 0.7545778155326843, "learning_rate": 1.125778224882595e-05, "loss": 2.0522, "step": 14092 }, { "epoch": 0.47, "grad_norm": 0.7274122834205627, "learning_rate": 1.1256727789873276e-05, "loss": 2.0464, "step": 14093 }, { "epoch": 0.47, "grad_norm": 0.7647008895874023, "learning_rate": 1.1255673316722828e-05, "loss": 2.1172, "step": 14094 }, { "epoch": 0.47, "grad_norm": 0.715890109539032, "learning_rate": 1.1254618829386518e-05, "loss": 2.0993, "step": 14095 }, { "epoch": 0.47, "grad_norm": 0.7222809791564941, "learning_rate": 1.1253564327876262e-05, "loss": 2.087, "step": 14096 }, { "epoch": 0.47, "grad_norm": 0.7518162727355957, "learning_rate": 1.125250981220397e-05, "loss": 2.0916, "step": 14097 }, { "epoch": 0.47, "grad_norm": 0.7373457551002502, "learning_rate": 1.1251455282381554e-05, "loss": 2.0645, "step": 14098 }, { "epoch": 0.47, "grad_norm": 0.7541444897651672, "learning_rate": 1.1250400738420933e-05, "loss": 2.08, "step": 14099 }, { "epoch": 0.47, "grad_norm": 0.7496023178100586, "learning_rate": 1.1249346180334012e-05, "loss": 2.1125, "step": 14100 }, { "epoch": 0.47, "grad_norm": 0.7495477795600891, "learning_rate": 1.1248291608132718e-05, "loss": 2.0757, "step": 14101 }, { "epoch": 0.47, "grad_norm": 0.7495827674865723, "learning_rate": 1.1247237021828951e-05, "loss": 2.0812, "step": 14102 }, { "epoch": 0.47, "grad_norm": 0.7537965178489685, "learning_rate": 1.1246182421434633e-05, "loss": 2.1164, "step": 14103 }, { "epoch": 0.47, "grad_norm": 0.7331140637397766, "learning_rate": 1.1245127806961676e-05, "loss": 2.0276, "step": 14104 }, { "epoch": 0.47, "grad_norm": 0.76673823595047, "learning_rate": 1.1244073178421996e-05, "loss": 2.082, "step": 14105 }, { "epoch": 0.47, "grad_norm": 0.7465325593948364, "learning_rate": 1.1243018535827503e-05, "loss": 2.0999, "step": 14106 }, { "epoch": 0.47, "grad_norm": 0.7604457139968872, "learning_rate": 1.1241963879190117e-05, "loss": 2.1277, "step": 14107 }, { "epoch": 0.47, "grad_norm": 0.7472529411315918, "learning_rate": 1.124090920852175e-05, "loss": 2.0806, "step": 14108 }, { "epoch": 0.47, "grad_norm": 0.7689916491508484, "learning_rate": 1.1239854523834319e-05, "loss": 2.147, "step": 14109 }, { "epoch": 0.47, "grad_norm": 0.7152977585792542, "learning_rate": 1.1238799825139741e-05, "loss": 2.0955, "step": 14110 }, { "epoch": 0.47, "grad_norm": 0.7335329055786133, "learning_rate": 1.1237745112449923e-05, "loss": 2.1087, "step": 14111 }, { "epoch": 0.47, "grad_norm": 0.7496398091316223, "learning_rate": 1.1236690385776792e-05, "loss": 2.0839, "step": 14112 }, { "epoch": 0.47, "grad_norm": 0.7283348441123962, "learning_rate": 1.1235635645132255e-05, "loss": 2.0566, "step": 14113 }, { "epoch": 0.47, "grad_norm": 0.7165517210960388, "learning_rate": 1.1234580890528229e-05, "loss": 2.1336, "step": 14114 }, { "epoch": 0.47, "grad_norm": 0.7048013210296631, "learning_rate": 1.1233526121976632e-05, "loss": 2.1134, "step": 14115 }, { "epoch": 0.47, "grad_norm": 0.7458078265190125, "learning_rate": 1.123247133948938e-05, "loss": 2.0285, "step": 14116 }, { "epoch": 0.47, "grad_norm": 0.7379661798477173, "learning_rate": 1.123141654307839e-05, "loss": 2.1385, "step": 14117 }, { "epoch": 0.47, "grad_norm": 0.7375317811965942, "learning_rate": 1.1230361732755579e-05, "loss": 2.1057, "step": 14118 }, { "epoch": 0.47, "grad_norm": 0.7448461651802063, "learning_rate": 1.122930690853286e-05, "loss": 2.062, "step": 14119 }, { "epoch": 0.47, "grad_norm": 0.7793803811073303, "learning_rate": 1.1228252070422152e-05, "loss": 2.1329, "step": 14120 }, { "epoch": 0.47, "grad_norm": 0.7436450719833374, "learning_rate": 1.1227197218435375e-05, "loss": 2.1152, "step": 14121 }, { "epoch": 0.47, "grad_norm": 0.7249694466590881, "learning_rate": 1.122614235258444e-05, "loss": 2.0756, "step": 14122 }, { "epoch": 0.47, "grad_norm": 0.7256861925125122, "learning_rate": 1.1225087472881269e-05, "loss": 2.0855, "step": 14123 }, { "epoch": 0.47, "grad_norm": 0.7595627903938293, "learning_rate": 1.1224032579337777e-05, "loss": 2.1777, "step": 14124 }, { "epoch": 0.47, "grad_norm": 0.7674539685249329, "learning_rate": 1.1222977671965882e-05, "loss": 2.0858, "step": 14125 }, { "epoch": 0.47, "grad_norm": 0.7489127516746521, "learning_rate": 1.12219227507775e-05, "loss": 2.0329, "step": 14126 }, { "epoch": 0.47, "grad_norm": 0.6948639750480652, "learning_rate": 1.1220867815784553e-05, "loss": 2.0531, "step": 14127 }, { "epoch": 0.47, "grad_norm": 0.7794910669326782, "learning_rate": 1.1219812866998959e-05, "loss": 2.1326, "step": 14128 }, { "epoch": 0.47, "grad_norm": 0.7348852753639221, "learning_rate": 1.1218757904432629e-05, "loss": 2.0992, "step": 14129 }, { "epoch": 0.47, "grad_norm": 0.7510849833488464, "learning_rate": 1.121770292809749e-05, "loss": 2.0313, "step": 14130 }, { "epoch": 0.47, "grad_norm": 0.7234457731246948, "learning_rate": 1.1216647938005455e-05, "loss": 2.0655, "step": 14131 }, { "epoch": 0.47, "grad_norm": 0.7223379015922546, "learning_rate": 1.121559293416845e-05, "loss": 2.0913, "step": 14132 }, { "epoch": 0.47, "grad_norm": 0.7217646837234497, "learning_rate": 1.121453791659838e-05, "loss": 2.0782, "step": 14133 }, { "epoch": 0.47, "grad_norm": 0.7575363516807556, "learning_rate": 1.1213482885307179e-05, "loss": 2.057, "step": 14134 }, { "epoch": 0.47, "grad_norm": 0.7553440928459167, "learning_rate": 1.1212427840306759e-05, "loss": 2.1164, "step": 14135 }, { "epoch": 0.47, "grad_norm": 0.7449776530265808, "learning_rate": 1.1211372781609038e-05, "loss": 2.0241, "step": 14136 }, { "epoch": 0.47, "grad_norm": 0.7748123407363892, "learning_rate": 1.1210317709225935e-05, "loss": 2.0277, "step": 14137 }, { "epoch": 0.47, "grad_norm": 0.7384259700775146, "learning_rate": 1.1209262623169375e-05, "loss": 2.0691, "step": 14138 }, { "epoch": 0.47, "grad_norm": 0.6911337375640869, "learning_rate": 1.1208207523451274e-05, "loss": 2.032, "step": 14139 }, { "epoch": 0.47, "grad_norm": 0.7353290915489197, "learning_rate": 1.1207152410083553e-05, "loss": 2.0862, "step": 14140 }, { "epoch": 0.47, "grad_norm": 0.7301750183105469, "learning_rate": 1.1206097283078131e-05, "loss": 2.1174, "step": 14141 }, { "epoch": 0.47, "grad_norm": 0.7042744159698486, "learning_rate": 1.1205042142446927e-05, "loss": 2.0093, "step": 14142 }, { "epoch": 0.47, "grad_norm": 0.7420377731323242, "learning_rate": 1.1203986988201867e-05, "loss": 2.0163, "step": 14143 }, { "epoch": 0.47, "grad_norm": 0.728172242641449, "learning_rate": 1.120293182035486e-05, "loss": 2.0896, "step": 14144 }, { "epoch": 0.47, "grad_norm": 0.7490823268890381, "learning_rate": 1.1201876638917843e-05, "loss": 2.1335, "step": 14145 }, { "epoch": 0.47, "grad_norm": 0.7358691096305847, "learning_rate": 1.1200821443902726e-05, "loss": 2.1055, "step": 14146 }, { "epoch": 0.47, "grad_norm": 0.7314212322235107, "learning_rate": 1.1199766235321429e-05, "loss": 2.1365, "step": 14147 }, { "epoch": 0.47, "grad_norm": 0.7496983408927917, "learning_rate": 1.1198711013185879e-05, "loss": 2.0748, "step": 14148 }, { "epoch": 0.47, "grad_norm": 0.7331315875053406, "learning_rate": 1.1197655777507991e-05, "loss": 2.0567, "step": 14149 }, { "epoch": 0.47, "grad_norm": 0.7236708998680115, "learning_rate": 1.1196600528299693e-05, "loss": 2.1185, "step": 14150 }, { "epoch": 0.47, "grad_norm": 0.7511530518531799, "learning_rate": 1.1195545265572903e-05, "loss": 2.1002, "step": 14151 }, { "epoch": 0.47, "grad_norm": 0.7367854714393616, "learning_rate": 1.1194489989339543e-05, "loss": 2.0731, "step": 14152 }, { "epoch": 0.47, "grad_norm": 0.7246856093406677, "learning_rate": 1.1193434699611533e-05, "loss": 2.0118, "step": 14153 }, { "epoch": 0.47, "grad_norm": 0.7418739199638367, "learning_rate": 1.1192379396400803e-05, "loss": 2.0615, "step": 14154 }, { "epoch": 0.47, "grad_norm": 0.773021399974823, "learning_rate": 1.1191324079719263e-05, "loss": 2.1079, "step": 14155 }, { "epoch": 0.47, "grad_norm": 0.734575092792511, "learning_rate": 1.1190268749578848e-05, "loss": 2.1141, "step": 14156 }, { "epoch": 0.47, "grad_norm": 0.712656557559967, "learning_rate": 1.1189213405991469e-05, "loss": 2.106, "step": 14157 }, { "epoch": 0.47, "grad_norm": 0.7313783764839172, "learning_rate": 1.1188158048969056e-05, "loss": 2.079, "step": 14158 }, { "epoch": 0.47, "grad_norm": 0.7257425785064697, "learning_rate": 1.1187102678523529e-05, "loss": 2.1187, "step": 14159 }, { "epoch": 0.47, "grad_norm": 0.7388837933540344, "learning_rate": 1.118604729466681e-05, "loss": 2.1224, "step": 14160 }, { "epoch": 0.47, "grad_norm": 0.771956741809845, "learning_rate": 1.1184991897410829e-05, "loss": 2.0956, "step": 14161 }, { "epoch": 0.47, "grad_norm": 0.7762402892112732, "learning_rate": 1.11839364867675e-05, "loss": 2.0883, "step": 14162 }, { "epoch": 0.47, "grad_norm": 0.7367829084396362, "learning_rate": 1.1182881062748749e-05, "loss": 2.0575, "step": 14163 }, { "epoch": 0.47, "grad_norm": 0.7327202558517456, "learning_rate": 1.1181825625366506e-05, "loss": 2.1224, "step": 14164 }, { "epoch": 0.47, "grad_norm": 0.7169814109802246, "learning_rate": 1.1180770174632684e-05, "loss": 2.0279, "step": 14165 }, { "epoch": 0.47, "grad_norm": 0.7503306269645691, "learning_rate": 1.1179714710559215e-05, "loss": 2.0068, "step": 14166 }, { "epoch": 0.47, "grad_norm": 0.7357821464538574, "learning_rate": 1.1178659233158024e-05, "loss": 2.0596, "step": 14167 }, { "epoch": 0.47, "grad_norm": 0.7679687738418579, "learning_rate": 1.1177603742441025e-05, "loss": 2.1336, "step": 14168 }, { "epoch": 0.47, "grad_norm": 0.745258629322052, "learning_rate": 1.1176548238420156e-05, "loss": 2.0968, "step": 14169 }, { "epoch": 0.47, "grad_norm": 0.7397328615188599, "learning_rate": 1.117549272110733e-05, "loss": 2.0678, "step": 14170 }, { "epoch": 0.47, "grad_norm": 0.7299311757087708, "learning_rate": 1.1174437190514475e-05, "loss": 2.0712, "step": 14171 }, { "epoch": 0.47, "grad_norm": 0.740551233291626, "learning_rate": 1.1173381646653523e-05, "loss": 2.109, "step": 14172 }, { "epoch": 0.47, "grad_norm": 0.7318323850631714, "learning_rate": 1.117232608953639e-05, "loss": 2.0391, "step": 14173 }, { "epoch": 0.47, "grad_norm": 0.749868631362915, "learning_rate": 1.1171270519175002e-05, "loss": 2.0989, "step": 14174 }, { "epoch": 0.47, "grad_norm": 0.7104763388633728, "learning_rate": 1.1170214935581287e-05, "loss": 2.1373, "step": 14175 }, { "epoch": 0.47, "grad_norm": 0.8010764122009277, "learning_rate": 1.116915933876717e-05, "loss": 2.0425, "step": 14176 }, { "epoch": 0.47, "grad_norm": 0.7307770252227783, "learning_rate": 1.1168103728744575e-05, "loss": 2.0634, "step": 14177 }, { "epoch": 0.47, "grad_norm": 0.7643880844116211, "learning_rate": 1.1167048105525431e-05, "loss": 2.0882, "step": 14178 }, { "epoch": 0.47, "grad_norm": 0.753108561038971, "learning_rate": 1.1165992469121659e-05, "loss": 2.1011, "step": 14179 }, { "epoch": 0.47, "grad_norm": 0.7784432172775269, "learning_rate": 1.1164936819545187e-05, "loss": 2.0803, "step": 14180 }, { "epoch": 0.47, "grad_norm": 0.7116439342498779, "learning_rate": 1.1163881156807945e-05, "loss": 2.0969, "step": 14181 }, { "epoch": 0.47, "grad_norm": 0.7272990942001343, "learning_rate": 1.116282548092185e-05, "loss": 2.0168, "step": 14182 }, { "epoch": 0.47, "grad_norm": 0.8080035448074341, "learning_rate": 1.116176979189884e-05, "loss": 2.1166, "step": 14183 }, { "epoch": 0.47, "grad_norm": 0.7456568479537964, "learning_rate": 1.1160714089750833e-05, "loss": 2.1534, "step": 14184 }, { "epoch": 0.47, "grad_norm": 0.7111576199531555, "learning_rate": 1.1159658374489759e-05, "loss": 2.1103, "step": 14185 }, { "epoch": 0.47, "grad_norm": 0.7288686633110046, "learning_rate": 1.1158602646127546e-05, "loss": 2.0632, "step": 14186 }, { "epoch": 0.47, "grad_norm": 0.7175378203392029, "learning_rate": 1.1157546904676115e-05, "loss": 2.0521, "step": 14187 }, { "epoch": 0.47, "grad_norm": 0.7293234467506409, "learning_rate": 1.1156491150147399e-05, "loss": 2.1176, "step": 14188 }, { "epoch": 0.47, "grad_norm": 0.7512077689170837, "learning_rate": 1.1155435382553327e-05, "loss": 2.0551, "step": 14189 }, { "epoch": 0.47, "grad_norm": 0.7169491052627563, "learning_rate": 1.115437960190582e-05, "loss": 2.0045, "step": 14190 }, { "epoch": 0.47, "grad_norm": 0.741670548915863, "learning_rate": 1.1153323808216809e-05, "loss": 2.0671, "step": 14191 }, { "epoch": 0.47, "grad_norm": 0.7526654005050659, "learning_rate": 1.1152268001498226e-05, "loss": 2.1611, "step": 14192 }, { "epoch": 0.47, "grad_norm": 0.7536620497703552, "learning_rate": 1.1151212181761988e-05, "loss": 2.0351, "step": 14193 }, { "epoch": 0.47, "grad_norm": 0.7254747152328491, "learning_rate": 1.1150156349020034e-05, "loss": 2.0792, "step": 14194 }, { "epoch": 0.47, "grad_norm": 0.7223042249679565, "learning_rate": 1.1149100503284285e-05, "loss": 1.9875, "step": 14195 }, { "epoch": 0.47, "grad_norm": 0.7163583636283875, "learning_rate": 1.1148044644566673e-05, "loss": 2.0846, "step": 14196 }, { "epoch": 0.47, "grad_norm": 0.7543470859527588, "learning_rate": 1.1146988772879123e-05, "loss": 1.9927, "step": 14197 }, { "epoch": 0.47, "grad_norm": 0.711875855922699, "learning_rate": 1.114593288823357e-05, "loss": 1.9932, "step": 14198 }, { "epoch": 0.47, "grad_norm": 0.7417489886283875, "learning_rate": 1.1144876990641937e-05, "loss": 2.0592, "step": 14199 }, { "epoch": 0.47, "grad_norm": 0.7545101642608643, "learning_rate": 1.1143821080116155e-05, "loss": 2.0807, "step": 14200 }, { "epoch": 0.47, "grad_norm": 0.749754011631012, "learning_rate": 1.1142765156668151e-05, "loss": 2.1095, "step": 14201 }, { "epoch": 0.47, "grad_norm": 0.7475979924201965, "learning_rate": 1.1141709220309858e-05, "loss": 2.1227, "step": 14202 }, { "epoch": 0.47, "grad_norm": 0.7298274040222168, "learning_rate": 1.1140653271053204e-05, "loss": 2.0631, "step": 14203 }, { "epoch": 0.47, "grad_norm": 0.7125329971313477, "learning_rate": 1.1139597308910115e-05, "loss": 2.0651, "step": 14204 }, { "epoch": 0.47, "grad_norm": 0.7100964784622192, "learning_rate": 1.1138541333892528e-05, "loss": 2.1247, "step": 14205 }, { "epoch": 0.47, "grad_norm": 0.749793529510498, "learning_rate": 1.1137485346012365e-05, "loss": 2.0522, "step": 14206 }, { "epoch": 0.47, "grad_norm": 0.7549937963485718, "learning_rate": 1.1136429345281558e-05, "loss": 2.0706, "step": 14207 }, { "epoch": 0.47, "grad_norm": 0.7523854970932007, "learning_rate": 1.113537333171204e-05, "loss": 2.1121, "step": 14208 }, { "epoch": 0.47, "grad_norm": 0.7278813719749451, "learning_rate": 1.1134317305315739e-05, "loss": 2.0391, "step": 14209 }, { "epoch": 0.47, "grad_norm": 0.741080641746521, "learning_rate": 1.1133261266104586e-05, "loss": 2.0419, "step": 14210 }, { "epoch": 0.47, "grad_norm": 0.722870409488678, "learning_rate": 1.1132205214090512e-05, "loss": 2.0673, "step": 14211 }, { "epoch": 0.47, "grad_norm": 0.779769778251648, "learning_rate": 1.1131149149285445e-05, "loss": 2.0892, "step": 14212 }, { "epoch": 0.47, "grad_norm": 0.7490190863609314, "learning_rate": 1.113009307170132e-05, "loss": 2.0157, "step": 14213 }, { "epoch": 0.47, "grad_norm": 0.7290025949478149, "learning_rate": 1.1129036981350068e-05, "loss": 2.0793, "step": 14214 }, { "epoch": 0.47, "grad_norm": 0.7280087471008301, "learning_rate": 1.1127980878243612e-05, "loss": 2.0995, "step": 14215 }, { "epoch": 0.47, "grad_norm": 0.7295913100242615, "learning_rate": 1.1126924762393894e-05, "loss": 2.0829, "step": 14216 }, { "epoch": 0.47, "grad_norm": 0.7469884753227234, "learning_rate": 1.112586863381284e-05, "loss": 2.1549, "step": 14217 }, { "epoch": 0.47, "grad_norm": 0.7080041170120239, "learning_rate": 1.112481249251238e-05, "loss": 2.0709, "step": 14218 }, { "epoch": 0.47, "grad_norm": 0.7416033148765564, "learning_rate": 1.1123756338504447e-05, "loss": 2.0324, "step": 14219 }, { "epoch": 0.47, "grad_norm": 0.7677333950996399, "learning_rate": 1.1122700171800975e-05, "loss": 2.0431, "step": 14220 }, { "epoch": 0.47, "grad_norm": 0.7225643396377563, "learning_rate": 1.1121643992413896e-05, "loss": 2.0588, "step": 14221 }, { "epoch": 0.47, "grad_norm": 0.7581294178962708, "learning_rate": 1.1120587800355138e-05, "loss": 2.1167, "step": 14222 }, { "epoch": 0.47, "grad_norm": 0.7548710703849792, "learning_rate": 1.1119531595636634e-05, "loss": 2.0471, "step": 14223 }, { "epoch": 0.47, "grad_norm": 0.7461514472961426, "learning_rate": 1.1118475378270322e-05, "loss": 2.0981, "step": 14224 }, { "epoch": 0.47, "grad_norm": 0.7237613797187805, "learning_rate": 1.1117419148268132e-05, "loss": 2.0392, "step": 14225 }, { "epoch": 0.47, "grad_norm": 0.7099583148956299, "learning_rate": 1.1116362905641988e-05, "loss": 2.0895, "step": 14226 }, { "epoch": 0.47, "grad_norm": 0.738778293132782, "learning_rate": 1.1115306650403839e-05, "loss": 2.1059, "step": 14227 }, { "epoch": 0.47, "grad_norm": 0.7312893867492676, "learning_rate": 1.1114250382565603e-05, "loss": 2.0993, "step": 14228 }, { "epoch": 0.47, "grad_norm": 0.748782753944397, "learning_rate": 1.1113194102139221e-05, "loss": 2.1583, "step": 14229 }, { "epoch": 0.47, "grad_norm": 0.7461559176445007, "learning_rate": 1.1112137809136625e-05, "loss": 2.1165, "step": 14230 }, { "epoch": 0.47, "grad_norm": 0.7195150852203369, "learning_rate": 1.1111081503569748e-05, "loss": 2.092, "step": 14231 }, { "epoch": 0.47, "grad_norm": 0.7435267567634583, "learning_rate": 1.1110025185450522e-05, "loss": 2.1278, "step": 14232 }, { "epoch": 0.47, "grad_norm": 0.7279480695724487, "learning_rate": 1.1108968854790882e-05, "loss": 2.0468, "step": 14233 }, { "epoch": 0.47, "grad_norm": 0.727445125579834, "learning_rate": 1.1107912511602762e-05, "loss": 2.0994, "step": 14234 }, { "epoch": 0.47, "grad_norm": 0.779285728931427, "learning_rate": 1.1106856155898096e-05, "loss": 2.1178, "step": 14235 }, { "epoch": 0.47, "grad_norm": 0.7448298931121826, "learning_rate": 1.110579978768882e-05, "loss": 2.0478, "step": 14236 }, { "epoch": 0.47, "grad_norm": 0.7249873876571655, "learning_rate": 1.1104743406986863e-05, "loss": 2.0938, "step": 14237 }, { "epoch": 0.47, "grad_norm": 0.7359350919723511, "learning_rate": 1.1103687013804165e-05, "loss": 2.0596, "step": 14238 }, { "epoch": 0.47, "grad_norm": 0.7265920042991638, "learning_rate": 1.1102630608152656e-05, "loss": 2.0913, "step": 14239 }, { "epoch": 0.47, "grad_norm": 0.7173481583595276, "learning_rate": 1.1101574190044274e-05, "loss": 2.0293, "step": 14240 }, { "epoch": 0.47, "grad_norm": 0.7362083792686462, "learning_rate": 1.110051775949095e-05, "loss": 2.0835, "step": 14241 }, { "epoch": 0.47, "grad_norm": 0.7416704893112183, "learning_rate": 1.1099461316504622e-05, "loss": 2.0422, "step": 14242 }, { "epoch": 0.47, "grad_norm": 0.729263424873352, "learning_rate": 1.1098404861097224e-05, "loss": 2.0986, "step": 14243 }, { "epoch": 0.47, "grad_norm": 0.7417550683021545, "learning_rate": 1.1097348393280693e-05, "loss": 2.0579, "step": 14244 }, { "epoch": 0.47, "grad_norm": 0.7671549320220947, "learning_rate": 1.1096291913066963e-05, "loss": 2.0377, "step": 14245 }, { "epoch": 0.47, "grad_norm": 0.781024694442749, "learning_rate": 1.1095235420467969e-05, "loss": 2.0717, "step": 14246 }, { "epoch": 0.47, "grad_norm": 0.7438501119613647, "learning_rate": 1.109417891549565e-05, "loss": 2.0973, "step": 14247 }, { "epoch": 0.47, "grad_norm": 0.7039564251899719, "learning_rate": 1.1093122398161936e-05, "loss": 2.0365, "step": 14248 }, { "epoch": 0.47, "grad_norm": 0.7803378105163574, "learning_rate": 1.1092065868478765e-05, "loss": 2.1019, "step": 14249 }, { "epoch": 0.47, "grad_norm": 0.772740364074707, "learning_rate": 1.1091009326458076e-05, "loss": 2.0728, "step": 14250 }, { "epoch": 0.47, "grad_norm": 0.7509992122650146, "learning_rate": 1.10899527721118e-05, "loss": 2.0917, "step": 14251 }, { "epoch": 0.47, "grad_norm": 0.7196308374404907, "learning_rate": 1.108889620545188e-05, "loss": 2.0937, "step": 14252 }, { "epoch": 0.47, "grad_norm": 0.7355828285217285, "learning_rate": 1.1087839626490244e-05, "loss": 2.1054, "step": 14253 }, { "epoch": 0.47, "grad_norm": 0.7692351937294006, "learning_rate": 1.1086783035238838e-05, "loss": 2.0726, "step": 14254 }, { "epoch": 0.47, "grad_norm": 0.7383742928504944, "learning_rate": 1.1085726431709594e-05, "loss": 2.1103, "step": 14255 }, { "epoch": 0.47, "grad_norm": 0.7565069198608398, "learning_rate": 1.1084669815914447e-05, "loss": 2.0934, "step": 14256 }, { "epoch": 0.47, "grad_norm": 0.7379568219184875, "learning_rate": 1.1083613187865337e-05, "loss": 2.0754, "step": 14257 }, { "epoch": 0.47, "grad_norm": 0.7448661923408508, "learning_rate": 1.10825565475742e-05, "loss": 2.0382, "step": 14258 }, { "epoch": 0.47, "grad_norm": 0.7181591391563416, "learning_rate": 1.1081499895052972e-05, "loss": 2.0113, "step": 14259 }, { "epoch": 0.47, "grad_norm": 0.7368479371070862, "learning_rate": 1.1080443230313595e-05, "loss": 2.076, "step": 14260 }, { "epoch": 0.47, "grad_norm": 0.7352670431137085, "learning_rate": 1.1079386553368001e-05, "loss": 2.1059, "step": 14261 }, { "epoch": 0.47, "grad_norm": 0.7263641953468323, "learning_rate": 1.1078329864228132e-05, "loss": 1.9485, "step": 14262 }, { "epoch": 0.47, "grad_norm": 0.7289211750030518, "learning_rate": 1.1077273162905924e-05, "loss": 2.1017, "step": 14263 }, { "epoch": 0.47, "grad_norm": 0.7218477129936218, "learning_rate": 1.1076216449413313e-05, "loss": 2.1144, "step": 14264 }, { "epoch": 0.47, "grad_norm": 0.72855144739151, "learning_rate": 1.1075159723762243e-05, "loss": 2.0301, "step": 14265 }, { "epoch": 0.47, "grad_norm": 0.7453488707542419, "learning_rate": 1.1074102985964645e-05, "loss": 2.1004, "step": 14266 }, { "epoch": 0.47, "grad_norm": 0.7261212468147278, "learning_rate": 1.1073046236032463e-05, "loss": 2.0811, "step": 14267 }, { "epoch": 0.47, "grad_norm": 0.7659101486206055, "learning_rate": 1.1071989473977633e-05, "loss": 2.1092, "step": 14268 }, { "epoch": 0.47, "grad_norm": 0.7164590358734131, "learning_rate": 1.1070932699812094e-05, "loss": 2.1276, "step": 14269 }, { "epoch": 0.47, "grad_norm": 0.7432991862297058, "learning_rate": 1.1069875913547785e-05, "loss": 1.9513, "step": 14270 }, { "epoch": 0.47, "grad_norm": 0.7367212772369385, "learning_rate": 1.1068819115196647e-05, "loss": 2.1209, "step": 14271 }, { "epoch": 0.47, "grad_norm": 0.753699779510498, "learning_rate": 1.1067762304770613e-05, "loss": 2.0827, "step": 14272 }, { "epoch": 0.47, "grad_norm": 0.7014368176460266, "learning_rate": 1.106670548228163e-05, "loss": 2.0153, "step": 14273 }, { "epoch": 0.47, "grad_norm": 0.7370536923408508, "learning_rate": 1.1065648647741634e-05, "loss": 2.0971, "step": 14274 }, { "epoch": 0.47, "grad_norm": 0.7757735252380371, "learning_rate": 1.1064591801162558e-05, "loss": 2.1051, "step": 14275 }, { "epoch": 0.47, "grad_norm": 0.7629368901252747, "learning_rate": 1.1063534942556356e-05, "loss": 2.0872, "step": 14276 }, { "epoch": 0.47, "grad_norm": 0.7460831999778748, "learning_rate": 1.1062478071934955e-05, "loss": 2.0856, "step": 14277 }, { "epoch": 0.48, "grad_norm": 0.7188703417778015, "learning_rate": 1.1061421189310298e-05, "loss": 2.0073, "step": 14278 }, { "epoch": 0.48, "grad_norm": 0.7412698268890381, "learning_rate": 1.1060364294694328e-05, "loss": 2.0473, "step": 14279 }, { "epoch": 0.48, "grad_norm": 0.7346656918525696, "learning_rate": 1.1059307388098984e-05, "loss": 2.0386, "step": 14280 }, { "epoch": 0.48, "grad_norm": 0.7299883961677551, "learning_rate": 1.1058250469536204e-05, "loss": 2.102, "step": 14281 }, { "epoch": 0.48, "grad_norm": 0.7499776482582092, "learning_rate": 1.1057193539017936e-05, "loss": 2.0454, "step": 14282 }, { "epoch": 0.48, "grad_norm": 0.7022654414176941, "learning_rate": 1.1056136596556109e-05, "loss": 2.1104, "step": 14283 }, { "epoch": 0.48, "grad_norm": 0.7361116409301758, "learning_rate": 1.1055079642162672e-05, "loss": 2.0841, "step": 14284 }, { "epoch": 0.48, "grad_norm": 0.727783739566803, "learning_rate": 1.1054022675849565e-05, "loss": 2.0352, "step": 14285 }, { "epoch": 0.48, "grad_norm": 0.758216142654419, "learning_rate": 1.1052965697628724e-05, "loss": 2.1149, "step": 14286 }, { "epoch": 0.48, "grad_norm": 0.6951219439506531, "learning_rate": 1.1051908707512098e-05, "loss": 2.0474, "step": 14287 }, { "epoch": 0.48, "grad_norm": 0.718856692314148, "learning_rate": 1.105085170551162e-05, "loss": 2.0779, "step": 14288 }, { "epoch": 0.48, "grad_norm": 0.723412036895752, "learning_rate": 1.1049794691639238e-05, "loss": 2.0386, "step": 14289 }, { "epoch": 0.48, "grad_norm": 0.7316255569458008, "learning_rate": 1.1048737665906887e-05, "loss": 2.108, "step": 14290 }, { "epoch": 0.48, "grad_norm": 0.7689874172210693, "learning_rate": 1.1047680628326516e-05, "loss": 2.1273, "step": 14291 }, { "epoch": 0.48, "grad_norm": 0.7342373132705688, "learning_rate": 1.1046623578910063e-05, "loss": 2.1002, "step": 14292 }, { "epoch": 0.48, "grad_norm": 0.7026596665382385, "learning_rate": 1.104556651766947e-05, "loss": 2.0714, "step": 14293 }, { "epoch": 0.48, "grad_norm": 0.7184266448020935, "learning_rate": 1.1044509444616678e-05, "loss": 2.0685, "step": 14294 }, { "epoch": 0.48, "grad_norm": 0.7516776919364929, "learning_rate": 1.1043452359763631e-05, "loss": 2.1051, "step": 14295 }, { "epoch": 0.48, "grad_norm": 0.7233201265335083, "learning_rate": 1.1042395263122272e-05, "loss": 2.1263, "step": 14296 }, { "epoch": 0.48, "grad_norm": 0.7389164566993713, "learning_rate": 1.104133815470454e-05, "loss": 2.0444, "step": 14297 }, { "epoch": 0.48, "grad_norm": 0.7377763986587524, "learning_rate": 1.1040281034522383e-05, "loss": 2.0929, "step": 14298 }, { "epoch": 0.48, "grad_norm": 0.7246180772781372, "learning_rate": 1.1039223902587736e-05, "loss": 2.0888, "step": 14299 }, { "epoch": 0.48, "grad_norm": 0.7131818532943726, "learning_rate": 1.1038166758912551e-05, "loss": 2.0968, "step": 14300 }, { "epoch": 0.48, "grad_norm": 0.7286971807479858, "learning_rate": 1.1037109603508764e-05, "loss": 2.1462, "step": 14301 }, { "epoch": 0.48, "grad_norm": 0.7298083901405334, "learning_rate": 1.103605243638832e-05, "loss": 2.0474, "step": 14302 }, { "epoch": 0.48, "grad_norm": 0.7260264158248901, "learning_rate": 1.1034995257563163e-05, "loss": 2.0769, "step": 14303 }, { "epoch": 0.48, "grad_norm": 0.7699926495552063, "learning_rate": 1.103393806704524e-05, "loss": 2.1027, "step": 14304 }, { "epoch": 0.48, "grad_norm": 0.7546269297599792, "learning_rate": 1.1032880864846485e-05, "loss": 2.107, "step": 14305 }, { "epoch": 0.48, "grad_norm": 0.7314380407333374, "learning_rate": 1.103182365097885e-05, "loss": 2.0534, "step": 14306 }, { "epoch": 0.48, "grad_norm": 0.7109289765357971, "learning_rate": 1.1030766425454278e-05, "loss": 2.1013, "step": 14307 }, { "epoch": 0.48, "grad_norm": 0.7165745496749878, "learning_rate": 1.1029709188284706e-05, "loss": 2.0598, "step": 14308 }, { "epoch": 0.48, "grad_norm": 0.7367250323295593, "learning_rate": 1.1028651939482086e-05, "loss": 2.0832, "step": 14309 }, { "epoch": 0.48, "grad_norm": 0.7513577342033386, "learning_rate": 1.102759467905836e-05, "loss": 2.0393, "step": 14310 }, { "epoch": 0.48, "grad_norm": 0.7990437150001526, "learning_rate": 1.102653740702547e-05, "loss": 2.087, "step": 14311 }, { "epoch": 0.48, "grad_norm": 0.7362295389175415, "learning_rate": 1.1025480123395362e-05, "loss": 2.0531, "step": 14312 }, { "epoch": 0.48, "grad_norm": 0.7576819062232971, "learning_rate": 1.1024422828179984e-05, "loss": 2.0929, "step": 14313 }, { "epoch": 0.48, "grad_norm": 0.7159005999565125, "learning_rate": 1.1023365521391274e-05, "loss": 2.0763, "step": 14314 }, { "epoch": 0.48, "grad_norm": 0.71181720495224, "learning_rate": 1.1022308203041178e-05, "loss": 2.0437, "step": 14315 }, { "epoch": 0.48, "grad_norm": 0.7493017315864563, "learning_rate": 1.1021250873141647e-05, "loss": 2.0566, "step": 14316 }, { "epoch": 0.48, "grad_norm": 0.704941987991333, "learning_rate": 1.102019353170462e-05, "loss": 2.1047, "step": 14317 }, { "epoch": 0.48, "grad_norm": 0.7419793009757996, "learning_rate": 1.1019136178742047e-05, "loss": 2.1184, "step": 14318 }, { "epoch": 0.48, "grad_norm": 0.7151005268096924, "learning_rate": 1.1018078814265868e-05, "loss": 2.0089, "step": 14319 }, { "epoch": 0.48, "grad_norm": 0.7453631162643433, "learning_rate": 1.1017021438288034e-05, "loss": 2.0218, "step": 14320 }, { "epoch": 0.48, "grad_norm": 0.7348095774650574, "learning_rate": 1.1015964050820485e-05, "loss": 2.1215, "step": 14321 }, { "epoch": 0.48, "grad_norm": 0.7228155732154846, "learning_rate": 1.101490665187517e-05, "loss": 2.0924, "step": 14322 }, { "epoch": 0.48, "grad_norm": 0.7173928618431091, "learning_rate": 1.1013849241464035e-05, "loss": 2.0314, "step": 14323 }, { "epoch": 0.48, "grad_norm": 0.7199174165725708, "learning_rate": 1.1012791819599025e-05, "loss": 2.0794, "step": 14324 }, { "epoch": 0.48, "grad_norm": 0.7311334013938904, "learning_rate": 1.1011734386292087e-05, "loss": 2.0624, "step": 14325 }, { "epoch": 0.48, "grad_norm": 0.749911367893219, "learning_rate": 1.1010676941555167e-05, "loss": 2.0884, "step": 14326 }, { "epoch": 0.48, "grad_norm": 0.7528603076934814, "learning_rate": 1.1009619485400209e-05, "loss": 2.1063, "step": 14327 }, { "epoch": 0.48, "grad_norm": 0.7235192060470581, "learning_rate": 1.1008562017839164e-05, "loss": 2.0645, "step": 14328 }, { "epoch": 0.48, "grad_norm": 0.7137499451637268, "learning_rate": 1.1007504538883979e-05, "loss": 2.0096, "step": 14329 }, { "epoch": 0.48, "grad_norm": 0.7254073023796082, "learning_rate": 1.1006447048546594e-05, "loss": 2.0687, "step": 14330 }, { "epoch": 0.48, "grad_norm": 0.7711024284362793, "learning_rate": 1.1005389546838963e-05, "loss": 2.0943, "step": 14331 }, { "epoch": 0.48, "grad_norm": 0.751362681388855, "learning_rate": 1.1004332033773028e-05, "loss": 2.0817, "step": 14332 }, { "epoch": 0.48, "grad_norm": 0.7168970108032227, "learning_rate": 1.1003274509360738e-05, "loss": 2.0688, "step": 14333 }, { "epoch": 0.48, "grad_norm": 0.7737463712692261, "learning_rate": 1.1002216973614042e-05, "loss": 2.1569, "step": 14334 }, { "epoch": 0.48, "grad_norm": 0.7361810803413391, "learning_rate": 1.1001159426544886e-05, "loss": 2.0896, "step": 14335 }, { "epoch": 0.48, "grad_norm": 0.7197995185852051, "learning_rate": 1.1000101868165216e-05, "loss": 2.1391, "step": 14336 }, { "epoch": 0.48, "grad_norm": 0.751809298992157, "learning_rate": 1.099904429848698e-05, "loss": 2.1495, "step": 14337 }, { "epoch": 0.48, "grad_norm": 0.7163493037223816, "learning_rate": 1.0997986717522128e-05, "loss": 2.0216, "step": 14338 }, { "epoch": 0.48, "grad_norm": 0.7354874610900879, "learning_rate": 1.099692912528261e-05, "loss": 1.9987, "step": 14339 }, { "epoch": 0.48, "grad_norm": 0.7600633502006531, "learning_rate": 1.0995871521780371e-05, "loss": 2.0085, "step": 14340 }, { "epoch": 0.48, "grad_norm": 0.7319091558456421, "learning_rate": 1.0994813907027355e-05, "loss": 2.0032, "step": 14341 }, { "epoch": 0.48, "grad_norm": 0.7334975004196167, "learning_rate": 1.099375628103552e-05, "loss": 1.9702, "step": 14342 }, { "epoch": 0.48, "grad_norm": 0.7139559388160706, "learning_rate": 1.0992698643816804e-05, "loss": 2.005, "step": 14343 }, { "epoch": 0.48, "grad_norm": 0.7769381999969482, "learning_rate": 1.0991640995383161e-05, "loss": 2.1367, "step": 14344 }, { "epoch": 0.48, "grad_norm": 0.716027557849884, "learning_rate": 1.0990583335746541e-05, "loss": 2.0542, "step": 14345 }, { "epoch": 0.48, "grad_norm": 0.7761984467506409, "learning_rate": 1.098952566491889e-05, "loss": 2.0923, "step": 14346 }, { "epoch": 0.48, "grad_norm": 0.739981472492218, "learning_rate": 1.0988467982912158e-05, "loss": 2.0699, "step": 14347 }, { "epoch": 0.48, "grad_norm": 0.7462652325630188, "learning_rate": 1.0987410289738294e-05, "loss": 2.0871, "step": 14348 }, { "epoch": 0.48, "grad_norm": 0.7528339624404907, "learning_rate": 1.0986352585409248e-05, "loss": 2.0772, "step": 14349 }, { "epoch": 0.48, "grad_norm": 0.7492295503616333, "learning_rate": 1.0985294869936969e-05, "loss": 2.0549, "step": 14350 }, { "epoch": 0.48, "grad_norm": 0.7841691374778748, "learning_rate": 1.0984237143333408e-05, "loss": 2.1091, "step": 14351 }, { "epoch": 0.48, "grad_norm": 0.7653245329856873, "learning_rate": 1.0983179405610506e-05, "loss": 2.1358, "step": 14352 }, { "epoch": 0.48, "grad_norm": 0.7500335574150085, "learning_rate": 1.0982121656780225e-05, "loss": 2.1296, "step": 14353 }, { "epoch": 0.48, "grad_norm": 0.7445665597915649, "learning_rate": 1.0981063896854505e-05, "loss": 1.9956, "step": 14354 }, { "epoch": 0.48, "grad_norm": 0.7375607490539551, "learning_rate": 1.0980006125845304e-05, "loss": 2.1364, "step": 14355 }, { "epoch": 0.48, "grad_norm": 0.7358099818229675, "learning_rate": 1.0978948343764566e-05, "loss": 2.0695, "step": 14356 }, { "epoch": 0.48, "grad_norm": 0.7630209922790527, "learning_rate": 1.0977890550624241e-05, "loss": 2.1038, "step": 14357 }, { "epoch": 0.48, "grad_norm": 0.7237311005592346, "learning_rate": 1.0976832746436286e-05, "loss": 2.0676, "step": 14358 }, { "epoch": 0.48, "grad_norm": 0.7421792149543762, "learning_rate": 1.0975774931212647e-05, "loss": 2.0875, "step": 14359 }, { "epoch": 0.48, "grad_norm": 0.7438628673553467, "learning_rate": 1.0974717104965271e-05, "loss": 2.1439, "step": 14360 }, { "epoch": 0.48, "grad_norm": 0.7470522522926331, "learning_rate": 1.0973659267706114e-05, "loss": 2.1254, "step": 14361 }, { "epoch": 0.48, "grad_norm": 0.7259917259216309, "learning_rate": 1.0972601419447126e-05, "loss": 2.049, "step": 14362 }, { "epoch": 0.48, "grad_norm": 0.7716514468193054, "learning_rate": 1.0971543560200257e-05, "loss": 2.0957, "step": 14363 }, { "epoch": 0.48, "grad_norm": 0.7350912690162659, "learning_rate": 1.0970485689977459e-05, "loss": 2.0722, "step": 14364 }, { "epoch": 0.48, "grad_norm": 0.7682210206985474, "learning_rate": 1.096942780879068e-05, "loss": 2.1348, "step": 14365 }, { "epoch": 0.48, "grad_norm": 0.744215726852417, "learning_rate": 1.0968369916651878e-05, "loss": 2.0874, "step": 14366 }, { "epoch": 0.48, "grad_norm": 0.7271391749382019, "learning_rate": 1.0967312013572997e-05, "loss": 1.9712, "step": 14367 }, { "epoch": 0.48, "grad_norm": 0.7200261950492859, "learning_rate": 1.096625409956599e-05, "loss": 2.1073, "step": 14368 }, { "epoch": 0.48, "grad_norm": 0.77424556016922, "learning_rate": 1.0965196174642814e-05, "loss": 2.0051, "step": 14369 }, { "epoch": 0.48, "grad_norm": 0.7318762540817261, "learning_rate": 1.0964138238815416e-05, "loss": 2.0531, "step": 14370 }, { "epoch": 0.48, "grad_norm": 0.730255663394928, "learning_rate": 1.096308029209575e-05, "loss": 2.1758, "step": 14371 }, { "epoch": 0.48, "grad_norm": 0.7182950377464294, "learning_rate": 1.0962022334495765e-05, "loss": 2.0916, "step": 14372 }, { "epoch": 0.48, "grad_norm": 0.7538824677467346, "learning_rate": 1.0960964366027418e-05, "loss": 2.1205, "step": 14373 }, { "epoch": 0.48, "grad_norm": 0.7706205248832703, "learning_rate": 1.0959906386702656e-05, "loss": 2.0821, "step": 14374 }, { "epoch": 0.48, "grad_norm": 0.7516716718673706, "learning_rate": 1.0958848396533438e-05, "loss": 2.0682, "step": 14375 }, { "epoch": 0.48, "grad_norm": 0.6984663009643555, "learning_rate": 1.095779039553171e-05, "loss": 2.0458, "step": 14376 }, { "epoch": 0.48, "grad_norm": 0.7279420495033264, "learning_rate": 1.0956732383709429e-05, "loss": 2.1155, "step": 14377 }, { "epoch": 0.48, "grad_norm": 0.7458705306053162, "learning_rate": 1.0955674361078546e-05, "loss": 1.9973, "step": 14378 }, { "epoch": 0.48, "grad_norm": 0.7171550393104553, "learning_rate": 1.0954616327651011e-05, "loss": 2.0765, "step": 14379 }, { "epoch": 0.48, "grad_norm": 0.7854146361351013, "learning_rate": 1.0953558283438783e-05, "loss": 2.1844, "step": 14380 }, { "epoch": 0.48, "grad_norm": 0.7266553640365601, "learning_rate": 1.0952500228453812e-05, "loss": 2.1075, "step": 14381 }, { "epoch": 0.48, "grad_norm": 0.7142643332481384, "learning_rate": 1.0951442162708053e-05, "loss": 2.0437, "step": 14382 }, { "epoch": 0.48, "grad_norm": 0.7138341069221497, "learning_rate": 1.0950384086213455e-05, "loss": 2.116, "step": 14383 }, { "epoch": 0.48, "grad_norm": 0.7354432940483093, "learning_rate": 1.0949325998981976e-05, "loss": 2.0782, "step": 14384 }, { "epoch": 0.48, "grad_norm": 0.7139890789985657, "learning_rate": 1.094826790102557e-05, "loss": 2.0422, "step": 14385 }, { "epoch": 0.48, "grad_norm": 0.7205191850662231, "learning_rate": 1.0947209792356188e-05, "loss": 2.0512, "step": 14386 }, { "epoch": 0.48, "grad_norm": 0.7609856128692627, "learning_rate": 1.0946151672985782e-05, "loss": 2.0829, "step": 14387 }, { "epoch": 0.48, "grad_norm": 0.7177435755729675, "learning_rate": 1.0945093542926312e-05, "loss": 2.1275, "step": 14388 }, { "epoch": 0.48, "grad_norm": 0.7493702173233032, "learning_rate": 1.094403540218973e-05, "loss": 2.0845, "step": 14389 }, { "epoch": 0.48, "grad_norm": 0.7312853336334229, "learning_rate": 1.0942977250787986e-05, "loss": 2.0646, "step": 14390 }, { "epoch": 0.48, "grad_norm": 0.793751060962677, "learning_rate": 1.094191908873304e-05, "loss": 2.088, "step": 14391 }, { "epoch": 0.48, "grad_norm": 0.7416241765022278, "learning_rate": 1.0940860916036843e-05, "loss": 2.0813, "step": 14392 }, { "epoch": 0.48, "grad_norm": 0.7198772430419922, "learning_rate": 1.093980273271135e-05, "loss": 2.101, "step": 14393 }, { "epoch": 0.48, "grad_norm": 0.7386717200279236, "learning_rate": 1.093874453876852e-05, "loss": 2.124, "step": 14394 }, { "epoch": 0.48, "grad_norm": 0.771170973777771, "learning_rate": 1.09376863342203e-05, "loss": 2.0349, "step": 14395 }, { "epoch": 0.48, "grad_norm": 0.7241373658180237, "learning_rate": 1.0936628119078651e-05, "loss": 2.0342, "step": 14396 }, { "epoch": 0.48, "grad_norm": 0.7185758352279663, "learning_rate": 1.0935569893355532e-05, "loss": 2.0642, "step": 14397 }, { "epoch": 0.48, "grad_norm": 0.7752938270568848, "learning_rate": 1.0934511657062883e-05, "loss": 2.0667, "step": 14398 }, { "epoch": 0.48, "grad_norm": 0.7349656224250793, "learning_rate": 1.0933453410212674e-05, "loss": 2.0726, "step": 14399 }, { "epoch": 0.48, "grad_norm": 0.7349641919136047, "learning_rate": 1.0932395152816856e-05, "loss": 2.079, "step": 14400 }, { "epoch": 0.48, "grad_norm": 0.7453962564468384, "learning_rate": 1.093133688488738e-05, "loss": 2.1127, "step": 14401 }, { "epoch": 0.48, "grad_norm": 0.7331302165985107, "learning_rate": 1.093027860643621e-05, "loss": 2.092, "step": 14402 }, { "epoch": 0.48, "grad_norm": 0.7489010095596313, "learning_rate": 1.0929220317475298e-05, "loss": 2.1371, "step": 14403 }, { "epoch": 0.48, "grad_norm": 0.7274237275123596, "learning_rate": 1.0928162018016597e-05, "loss": 2.0997, "step": 14404 }, { "epoch": 0.48, "grad_norm": 0.7037672996520996, "learning_rate": 1.0927103708072065e-05, "loss": 2.071, "step": 14405 }, { "epoch": 0.48, "grad_norm": 0.7324825525283813, "learning_rate": 1.092604538765366e-05, "loss": 2.0293, "step": 14406 }, { "epoch": 0.48, "grad_norm": 0.7871751189231873, "learning_rate": 1.0924987056773334e-05, "loss": 2.0793, "step": 14407 }, { "epoch": 0.48, "grad_norm": 0.7060885429382324, "learning_rate": 1.0923928715443052e-05, "loss": 2.0876, "step": 14408 }, { "epoch": 0.48, "grad_norm": 0.7304544448852539, "learning_rate": 1.092287036367476e-05, "loss": 2.0895, "step": 14409 }, { "epoch": 0.48, "grad_norm": 0.7364305853843689, "learning_rate": 1.0921812001480421e-05, "loss": 2.0912, "step": 14410 }, { "epoch": 0.48, "grad_norm": 0.7127228379249573, "learning_rate": 1.0920753628871992e-05, "loss": 2.022, "step": 14411 }, { "epoch": 0.48, "grad_norm": 0.7321669459342957, "learning_rate": 1.0919695245861426e-05, "loss": 2.1161, "step": 14412 }, { "epoch": 0.48, "grad_norm": 0.7178621292114258, "learning_rate": 1.0918636852460685e-05, "loss": 2.0904, "step": 14413 }, { "epoch": 0.48, "grad_norm": 0.7229382395744324, "learning_rate": 1.0917578448681718e-05, "loss": 2.0731, "step": 14414 }, { "epoch": 0.48, "grad_norm": 0.739997386932373, "learning_rate": 1.091652003453649e-05, "loss": 2.0058, "step": 14415 }, { "epoch": 0.48, "grad_norm": 0.7560994029045105, "learning_rate": 1.0915461610036956e-05, "loss": 2.0961, "step": 14416 }, { "epoch": 0.48, "grad_norm": 0.7425842881202698, "learning_rate": 1.0914403175195074e-05, "loss": 2.0943, "step": 14417 }, { "epoch": 0.48, "grad_norm": 0.7661288380622864, "learning_rate": 1.0913344730022801e-05, "loss": 2.1376, "step": 14418 }, { "epoch": 0.48, "grad_norm": 0.7817131876945496, "learning_rate": 1.0912286274532093e-05, "loss": 2.1068, "step": 14419 }, { "epoch": 0.48, "grad_norm": 0.733586847782135, "learning_rate": 1.091122780873491e-05, "loss": 2.0932, "step": 14420 }, { "epoch": 0.48, "grad_norm": 0.7024655938148499, "learning_rate": 1.0910169332643207e-05, "loss": 2.0376, "step": 14421 }, { "epoch": 0.48, "grad_norm": 0.7409464716911316, "learning_rate": 1.0909110846268952e-05, "loss": 2.139, "step": 14422 }, { "epoch": 0.48, "grad_norm": 0.7436284422874451, "learning_rate": 1.0908052349624086e-05, "loss": 2.0371, "step": 14423 }, { "epoch": 0.48, "grad_norm": 0.7409657835960388, "learning_rate": 1.0906993842720584e-05, "loss": 2.0584, "step": 14424 }, { "epoch": 0.48, "grad_norm": 0.7211195230484009, "learning_rate": 1.0905935325570392e-05, "loss": 2.1267, "step": 14425 }, { "epoch": 0.48, "grad_norm": 0.7863349914550781, "learning_rate": 1.0904876798185476e-05, "loss": 2.1386, "step": 14426 }, { "epoch": 0.48, "grad_norm": 0.735988974571228, "learning_rate": 1.0903818260577792e-05, "loss": 2.0587, "step": 14427 }, { "epoch": 0.48, "grad_norm": 0.7491430044174194, "learning_rate": 1.0902759712759297e-05, "loss": 2.0627, "step": 14428 }, { "epoch": 0.48, "grad_norm": 0.7436647415161133, "learning_rate": 1.0901701154741952e-05, "loss": 2.102, "step": 14429 }, { "epoch": 0.48, "grad_norm": 0.7171557545661926, "learning_rate": 1.0900642586537719e-05, "loss": 2.1349, "step": 14430 }, { "epoch": 0.48, "grad_norm": 0.7371664643287659, "learning_rate": 1.0899584008158553e-05, "loss": 2.0691, "step": 14431 }, { "epoch": 0.48, "grad_norm": 0.7271640300750732, "learning_rate": 1.0898525419616413e-05, "loss": 2.0522, "step": 14432 }, { "epoch": 0.48, "grad_norm": 0.7158271670341492, "learning_rate": 1.0897466820923262e-05, "loss": 2.029, "step": 14433 }, { "epoch": 0.48, "grad_norm": 0.7423985600471497, "learning_rate": 1.089640821209105e-05, "loss": 2.1821, "step": 14434 }, { "epoch": 0.48, "grad_norm": 0.7722052931785583, "learning_rate": 1.089534959313175e-05, "loss": 2.0516, "step": 14435 }, { "epoch": 0.48, "grad_norm": 0.7033466696739197, "learning_rate": 1.0894290964057313e-05, "loss": 2.047, "step": 14436 }, { "epoch": 0.48, "grad_norm": 0.7261670827865601, "learning_rate": 1.08932323248797e-05, "loss": 2.0294, "step": 14437 }, { "epoch": 0.48, "grad_norm": 0.7561299800872803, "learning_rate": 1.0892173675610873e-05, "loss": 2.0643, "step": 14438 }, { "epoch": 0.48, "grad_norm": 0.7200880646705627, "learning_rate": 1.089111501626279e-05, "loss": 1.9993, "step": 14439 }, { "epoch": 0.48, "grad_norm": 0.7535219192504883, "learning_rate": 1.089005634684741e-05, "loss": 2.0563, "step": 14440 }, { "epoch": 0.48, "grad_norm": 0.716543436050415, "learning_rate": 1.0888997667376697e-05, "loss": 2.1138, "step": 14441 }, { "epoch": 0.48, "grad_norm": 0.7296412587165833, "learning_rate": 1.088793897786261e-05, "loss": 2.077, "step": 14442 }, { "epoch": 0.48, "grad_norm": 0.7608610391616821, "learning_rate": 1.088688027831711e-05, "loss": 2.0565, "step": 14443 }, { "epoch": 0.48, "grad_norm": 0.7232423424720764, "learning_rate": 1.0885821568752155e-05, "loss": 2.0926, "step": 14444 }, { "epoch": 0.48, "grad_norm": 0.7583892345428467, "learning_rate": 1.0884762849179704e-05, "loss": 2.1186, "step": 14445 }, { "epoch": 0.48, "grad_norm": 0.7035171389579773, "learning_rate": 1.0883704119611725e-05, "loss": 2.0165, "step": 14446 }, { "epoch": 0.48, "grad_norm": 0.7235673069953918, "learning_rate": 1.0882645380060173e-05, "loss": 2.1529, "step": 14447 }, { "epoch": 0.48, "grad_norm": 0.7160201072692871, "learning_rate": 1.088158663053701e-05, "loss": 2.0459, "step": 14448 }, { "epoch": 0.48, "grad_norm": 0.7110678553581238, "learning_rate": 1.0880527871054198e-05, "loss": 2.0787, "step": 14449 }, { "epoch": 0.48, "grad_norm": 0.746077299118042, "learning_rate": 1.0879469101623701e-05, "loss": 2.1206, "step": 14450 }, { "epoch": 0.48, "grad_norm": 0.7280842661857605, "learning_rate": 1.0878410322257472e-05, "loss": 2.0939, "step": 14451 }, { "epoch": 0.48, "grad_norm": 0.7563570141792297, "learning_rate": 1.0877351532967484e-05, "loss": 2.1091, "step": 14452 }, { "epoch": 0.48, "grad_norm": 0.7215373516082764, "learning_rate": 1.087629273376569e-05, "loss": 2.0472, "step": 14453 }, { "epoch": 0.48, "grad_norm": 0.7527751326560974, "learning_rate": 1.0875233924664053e-05, "loss": 2.1465, "step": 14454 }, { "epoch": 0.48, "grad_norm": 0.7293099761009216, "learning_rate": 1.0874175105674536e-05, "loss": 2.1375, "step": 14455 }, { "epoch": 0.48, "grad_norm": 0.7809600234031677, "learning_rate": 1.0873116276809103e-05, "loss": 2.0034, "step": 14456 }, { "epoch": 0.48, "grad_norm": 0.7228170037269592, "learning_rate": 1.0872057438079716e-05, "loss": 2.0187, "step": 14457 }, { "epoch": 0.48, "grad_norm": 0.7572272419929504, "learning_rate": 1.0870998589498329e-05, "loss": 2.0769, "step": 14458 }, { "epoch": 0.48, "grad_norm": 0.7572263479232788, "learning_rate": 1.0869939731076916e-05, "loss": 2.1412, "step": 14459 }, { "epoch": 0.48, "grad_norm": 0.7432723641395569, "learning_rate": 1.086888086282743e-05, "loss": 2.0379, "step": 14460 }, { "epoch": 0.48, "grad_norm": 0.7562572956085205, "learning_rate": 1.0867821984761836e-05, "loss": 2.0951, "step": 14461 }, { "epoch": 0.48, "grad_norm": 0.7595495581626892, "learning_rate": 1.0866763096892103e-05, "loss": 2.063, "step": 14462 }, { "epoch": 0.48, "grad_norm": 0.7275890111923218, "learning_rate": 1.0865704199230187e-05, "loss": 2.0746, "step": 14463 }, { "epoch": 0.48, "grad_norm": 0.7534134984016418, "learning_rate": 1.0864645291788052e-05, "loss": 2.0974, "step": 14464 }, { "epoch": 0.48, "grad_norm": 0.724084198474884, "learning_rate": 1.086358637457766e-05, "loss": 2.1156, "step": 14465 }, { "epoch": 0.48, "grad_norm": 0.7541021704673767, "learning_rate": 1.0862527447610975e-05, "loss": 2.0722, "step": 14466 }, { "epoch": 0.48, "grad_norm": 0.753352165222168, "learning_rate": 1.0861468510899962e-05, "loss": 2.1408, "step": 14467 }, { "epoch": 0.48, "grad_norm": 0.7354586124420166, "learning_rate": 1.0860409564456584e-05, "loss": 2.064, "step": 14468 }, { "epoch": 0.48, "grad_norm": 0.7099581956863403, "learning_rate": 1.0859350608292797e-05, "loss": 1.9769, "step": 14469 }, { "epoch": 0.48, "grad_norm": 0.7177425622940063, "learning_rate": 1.0858291642420578e-05, "loss": 2.113, "step": 14470 }, { "epoch": 0.48, "grad_norm": 0.7127478122711182, "learning_rate": 1.0857232666851878e-05, "loss": 2.0088, "step": 14471 }, { "epoch": 0.48, "grad_norm": 0.7607955932617188, "learning_rate": 1.0856173681598665e-05, "loss": 2.0841, "step": 14472 }, { "epoch": 0.48, "grad_norm": 0.7667393684387207, "learning_rate": 1.0855114686672907e-05, "loss": 2.0871, "step": 14473 }, { "epoch": 0.48, "grad_norm": 0.7473248839378357, "learning_rate": 1.0854055682086564e-05, "loss": 2.1569, "step": 14474 }, { "epoch": 0.48, "grad_norm": 0.7220759987831116, "learning_rate": 1.08529966678516e-05, "loss": 2.144, "step": 14475 }, { "epoch": 0.48, "grad_norm": 0.7408268451690674, "learning_rate": 1.0851937643979978e-05, "loss": 2.1088, "step": 14476 }, { "epoch": 0.48, "grad_norm": 0.7305794954299927, "learning_rate": 1.0850878610483663e-05, "loss": 2.0707, "step": 14477 }, { "epoch": 0.48, "grad_norm": 0.7127835750579834, "learning_rate": 1.0849819567374623e-05, "loss": 1.9797, "step": 14478 }, { "epoch": 0.48, "grad_norm": 0.7302347421646118, "learning_rate": 1.084876051466482e-05, "loss": 2.0681, "step": 14479 }, { "epoch": 0.48, "grad_norm": 0.7055133581161499, "learning_rate": 1.0847701452366215e-05, "loss": 2.0967, "step": 14480 }, { "epoch": 0.48, "grad_norm": 0.7589823603630066, "learning_rate": 1.084664238049078e-05, "loss": 2.0769, "step": 14481 }, { "epoch": 0.48, "grad_norm": 0.7595403790473938, "learning_rate": 1.0845583299050471e-05, "loss": 2.0888, "step": 14482 }, { "epoch": 0.48, "grad_norm": 0.7433275580406189, "learning_rate": 1.0844524208057257e-05, "loss": 2.0595, "step": 14483 }, { "epoch": 0.48, "grad_norm": 0.7364020943641663, "learning_rate": 1.0843465107523107e-05, "loss": 2.1058, "step": 14484 }, { "epoch": 0.48, "grad_norm": 0.7449548840522766, "learning_rate": 1.0842405997459981e-05, "loss": 2.1129, "step": 14485 }, { "epoch": 0.48, "grad_norm": 0.7430303692817688, "learning_rate": 1.0841346877879847e-05, "loss": 2.0789, "step": 14486 }, { "epoch": 0.48, "grad_norm": 0.7459086179733276, "learning_rate": 1.0840287748794669e-05, "loss": 2.0754, "step": 14487 }, { "epoch": 0.48, "grad_norm": 0.7378416657447815, "learning_rate": 1.083922861021641e-05, "loss": 2.108, "step": 14488 }, { "epoch": 0.48, "grad_norm": 0.7368146777153015, "learning_rate": 1.083816946215704e-05, "loss": 2.1181, "step": 14489 }, { "epoch": 0.48, "grad_norm": 0.745969831943512, "learning_rate": 1.0837110304628524e-05, "loss": 2.077, "step": 14490 }, { "epoch": 0.48, "grad_norm": 0.7739499807357788, "learning_rate": 1.0836051137642822e-05, "loss": 2.1226, "step": 14491 }, { "epoch": 0.48, "grad_norm": 0.7658153772354126, "learning_rate": 1.0834991961211907e-05, "loss": 2.0104, "step": 14492 }, { "epoch": 0.48, "grad_norm": 0.7268418669700623, "learning_rate": 1.0833932775347745e-05, "loss": 2.0734, "step": 14493 }, { "epoch": 0.48, "grad_norm": 0.7200512886047363, "learning_rate": 1.0832873580062293e-05, "loss": 1.987, "step": 14494 }, { "epoch": 0.48, "grad_norm": 0.7354490756988525, "learning_rate": 1.0831814375367528e-05, "loss": 2.0619, "step": 14495 }, { "epoch": 0.48, "grad_norm": 0.7385833859443665, "learning_rate": 1.083075516127541e-05, "loss": 2.032, "step": 14496 }, { "epoch": 0.48, "grad_norm": 0.7234517931938171, "learning_rate": 1.0829695937797908e-05, "loss": 2.0088, "step": 14497 }, { "epoch": 0.48, "grad_norm": 0.7308991551399231, "learning_rate": 1.0828636704946987e-05, "loss": 2.078, "step": 14498 }, { "epoch": 0.48, "grad_norm": 0.72681725025177, "learning_rate": 1.0827577462734612e-05, "loss": 2.0715, "step": 14499 }, { "epoch": 0.48, "grad_norm": 0.7563591003417969, "learning_rate": 1.0826518211172755e-05, "loss": 2.0677, "step": 14500 }, { "epoch": 0.48, "grad_norm": 0.741085410118103, "learning_rate": 1.0825458950273382e-05, "loss": 2.1019, "step": 14501 }, { "epoch": 0.48, "grad_norm": 0.7280721068382263, "learning_rate": 1.0824399680048452e-05, "loss": 2.0044, "step": 14502 }, { "epoch": 0.48, "grad_norm": 0.7590895295143127, "learning_rate": 1.0823340400509939e-05, "loss": 2.0291, "step": 14503 }, { "epoch": 0.48, "grad_norm": 0.7302896976470947, "learning_rate": 1.082228111166981e-05, "loss": 2.0569, "step": 14504 }, { "epoch": 0.48, "grad_norm": 0.7266169190406799, "learning_rate": 1.0821221813540028e-05, "loss": 2.0741, "step": 14505 }, { "epoch": 0.48, "grad_norm": 0.7605879306793213, "learning_rate": 1.0820162506132567e-05, "loss": 2.1122, "step": 14506 }, { "epoch": 0.48, "grad_norm": 0.7141992449760437, "learning_rate": 1.081910318945939e-05, "loss": 1.9808, "step": 14507 }, { "epoch": 0.48, "grad_norm": 0.7256706953048706, "learning_rate": 1.0818043863532464e-05, "loss": 2.0349, "step": 14508 }, { "epoch": 0.48, "grad_norm": 0.7446910738945007, "learning_rate": 1.0816984528363758e-05, "loss": 2.1489, "step": 14509 }, { "epoch": 0.48, "grad_norm": 0.7351961731910706, "learning_rate": 1.0815925183965239e-05, "loss": 2.1343, "step": 14510 }, { "epoch": 0.48, "grad_norm": 0.7672762870788574, "learning_rate": 1.0814865830348878e-05, "loss": 2.0792, "step": 14511 }, { "epoch": 0.48, "grad_norm": 0.7373375296592712, "learning_rate": 1.081380646752664e-05, "loss": 2.1457, "step": 14512 }, { "epoch": 0.48, "grad_norm": 0.7743820548057556, "learning_rate": 1.0812747095510489e-05, "loss": 2.1224, "step": 14513 }, { "epoch": 0.48, "grad_norm": 0.7648014426231384, "learning_rate": 1.08116877143124e-05, "loss": 1.9845, "step": 14514 }, { "epoch": 0.48, "grad_norm": 0.7586275935173035, "learning_rate": 1.0810628323944343e-05, "loss": 2.0666, "step": 14515 }, { "epoch": 0.48, "grad_norm": 0.7430000305175781, "learning_rate": 1.0809568924418277e-05, "loss": 2.0849, "step": 14516 }, { "epoch": 0.48, "grad_norm": 0.7236108183860779, "learning_rate": 1.0808509515746179e-05, "loss": 2.0875, "step": 14517 }, { "epoch": 0.48, "grad_norm": 0.7681379318237305, "learning_rate": 1.0807450097940013e-05, "loss": 2.1311, "step": 14518 }, { "epoch": 0.48, "grad_norm": 0.7555314302444458, "learning_rate": 1.0806390671011748e-05, "loss": 2.021, "step": 14519 }, { "epoch": 0.48, "grad_norm": 0.7460755705833435, "learning_rate": 1.0805331234973354e-05, "loss": 2.1327, "step": 14520 }, { "epoch": 0.48, "grad_norm": 0.7329644560813904, "learning_rate": 1.0804271789836802e-05, "loss": 2.0831, "step": 14521 }, { "epoch": 0.48, "grad_norm": 0.7432982325553894, "learning_rate": 1.0803212335614057e-05, "loss": 2.0984, "step": 14522 }, { "epoch": 0.48, "grad_norm": 0.7142213582992554, "learning_rate": 1.080215287231709e-05, "loss": 2.0855, "step": 14523 }, { "epoch": 0.48, "grad_norm": 0.7374944090843201, "learning_rate": 1.080109339995787e-05, "loss": 2.1162, "step": 14524 }, { "epoch": 0.48, "grad_norm": 0.725470781326294, "learning_rate": 1.0800033918548364e-05, "loss": 2.1443, "step": 14525 }, { "epoch": 0.48, "grad_norm": 0.738006591796875, "learning_rate": 1.0798974428100548e-05, "loss": 2.0391, "step": 14526 }, { "epoch": 0.48, "grad_norm": 0.7326415777206421, "learning_rate": 1.0797914928626383e-05, "loss": 2.0639, "step": 14527 }, { "epoch": 0.48, "grad_norm": 0.7425469756126404, "learning_rate": 1.0796855420137846e-05, "loss": 2.1035, "step": 14528 }, { "epoch": 0.48, "grad_norm": 0.7605542540550232, "learning_rate": 1.0795795902646902e-05, "loss": 2.1137, "step": 14529 }, { "epoch": 0.48, "grad_norm": 0.7297346591949463, "learning_rate": 1.0794736376165523e-05, "loss": 2.0356, "step": 14530 }, { "epoch": 0.48, "grad_norm": 0.718613862991333, "learning_rate": 1.0793676840705678e-05, "loss": 2.0344, "step": 14531 }, { "epoch": 0.48, "grad_norm": 0.7361851334571838, "learning_rate": 1.0792617296279335e-05, "loss": 2.0215, "step": 14532 }, { "epoch": 0.48, "grad_norm": 0.7202955484390259, "learning_rate": 1.0791557742898469e-05, "loss": 2.0593, "step": 14533 }, { "epoch": 0.48, "grad_norm": 0.7353624701499939, "learning_rate": 1.0790498180575046e-05, "loss": 2.0801, "step": 14534 }, { "epoch": 0.48, "grad_norm": 0.7539271712303162, "learning_rate": 1.078943860932104e-05, "loss": 2.0801, "step": 14535 }, { "epoch": 0.48, "grad_norm": 0.7168089151382446, "learning_rate": 1.0788379029148417e-05, "loss": 2.1141, "step": 14536 }, { "epoch": 0.48, "grad_norm": 0.7230263948440552, "learning_rate": 1.078731944006915e-05, "loss": 2.0702, "step": 14537 }, { "epoch": 0.48, "grad_norm": 0.753931999206543, "learning_rate": 1.0786259842095208e-05, "loss": 2.1037, "step": 14538 }, { "epoch": 0.48, "grad_norm": 0.7379437685012817, "learning_rate": 1.0785200235238569e-05, "loss": 2.0617, "step": 14539 }, { "epoch": 0.48, "grad_norm": 0.7212793231010437, "learning_rate": 1.0784140619511192e-05, "loss": 2.0534, "step": 14540 }, { "epoch": 0.48, "grad_norm": 0.7412114143371582, "learning_rate": 1.0783080994925058e-05, "loss": 2.0414, "step": 14541 }, { "epoch": 0.48, "grad_norm": 0.7415284514427185, "learning_rate": 1.078202136149213e-05, "loss": 2.1183, "step": 14542 }, { "epoch": 0.48, "grad_norm": 0.7655936479568481, "learning_rate": 1.0780961719224384e-05, "loss": 2.0047, "step": 14543 }, { "epoch": 0.48, "grad_norm": 0.7588881850242615, "learning_rate": 1.0779902068133789e-05, "loss": 2.095, "step": 14544 }, { "epoch": 0.48, "grad_norm": 0.7264940142631531, "learning_rate": 1.0778842408232319e-05, "loss": 2.1196, "step": 14545 }, { "epoch": 0.48, "grad_norm": 0.7662963271141052, "learning_rate": 1.0777782739531945e-05, "loss": 2.1363, "step": 14546 }, { "epoch": 0.48, "grad_norm": 0.7407233119010925, "learning_rate": 1.0776723062044636e-05, "loss": 2.0421, "step": 14547 }, { "epoch": 0.48, "grad_norm": 0.7734267711639404, "learning_rate": 1.0775663375782369e-05, "loss": 2.1171, "step": 14548 }, { "epoch": 0.48, "grad_norm": 0.709658145904541, "learning_rate": 1.0774603680757105e-05, "loss": 2.1365, "step": 14549 }, { "epoch": 0.48, "grad_norm": 0.7338653802871704, "learning_rate": 1.0773543976980827e-05, "loss": 2.0883, "step": 14550 }, { "epoch": 0.48, "grad_norm": 0.7757417559623718, "learning_rate": 1.0772484264465499e-05, "loss": 2.0663, "step": 14551 }, { "epoch": 0.48, "grad_norm": 0.7110576033592224, "learning_rate": 1.07714245432231e-05, "loss": 2.1025, "step": 14552 }, { "epoch": 0.48, "grad_norm": 0.7363674640655518, "learning_rate": 1.0770364813265599e-05, "loss": 2.0375, "step": 14553 }, { "epoch": 0.48, "grad_norm": 0.7321013808250427, "learning_rate": 1.0769305074604967e-05, "loss": 2.041, "step": 14554 }, { "epoch": 0.48, "grad_norm": 0.7813422679901123, "learning_rate": 1.0768245327253176e-05, "loss": 2.0472, "step": 14555 }, { "epoch": 0.48, "grad_norm": 0.7532978653907776, "learning_rate": 1.07671855712222e-05, "loss": 2.1504, "step": 14556 }, { "epoch": 0.48, "grad_norm": 0.7305266857147217, "learning_rate": 1.076612580652401e-05, "loss": 1.9771, "step": 14557 }, { "epoch": 0.48, "grad_norm": 0.7696027159690857, "learning_rate": 1.0765066033170581e-05, "loss": 2.0586, "step": 14558 }, { "epoch": 0.48, "grad_norm": 0.7136980295181274, "learning_rate": 1.0764006251173884e-05, "loss": 2.0457, "step": 14559 }, { "epoch": 0.48, "grad_norm": 0.7092509865760803, "learning_rate": 1.0762946460545892e-05, "loss": 2.0526, "step": 14560 }, { "epoch": 0.48, "grad_norm": 0.7640079855918884, "learning_rate": 1.0761886661298579e-05, "loss": 2.1239, "step": 14561 }, { "epoch": 0.48, "grad_norm": 0.7479318976402283, "learning_rate": 1.0760826853443916e-05, "loss": 2.0975, "step": 14562 }, { "epoch": 0.48, "grad_norm": 0.7171921730041504, "learning_rate": 1.0759767036993877e-05, "loss": 2.0863, "step": 14563 }, { "epoch": 0.48, "grad_norm": 0.7298987507820129, "learning_rate": 1.0758707211960435e-05, "loss": 2.0352, "step": 14564 }, { "epoch": 0.48, "grad_norm": 0.7785048484802246, "learning_rate": 1.0757647378355562e-05, "loss": 2.1103, "step": 14565 }, { "epoch": 0.48, "grad_norm": 0.7254499793052673, "learning_rate": 1.0756587536191238e-05, "loss": 2.1137, "step": 14566 }, { "epoch": 0.48, "grad_norm": 0.748076319694519, "learning_rate": 1.0755527685479428e-05, "loss": 2.1113, "step": 14567 }, { "epoch": 0.48, "grad_norm": 0.7399908900260925, "learning_rate": 1.0754467826232108e-05, "loss": 2.1108, "step": 14568 }, { "epoch": 0.48, "grad_norm": 0.7155466079711914, "learning_rate": 1.0753407958461255e-05, "loss": 2.0481, "step": 14569 }, { "epoch": 0.48, "grad_norm": 0.7140225768089294, "learning_rate": 1.075234808217884e-05, "loss": 2.0565, "step": 14570 }, { "epoch": 0.48, "grad_norm": 0.7307649850845337, "learning_rate": 1.0751288197396836e-05, "loss": 2.0242, "step": 14571 }, { "epoch": 0.48, "grad_norm": 0.7213634848594666, "learning_rate": 1.0750228304127221e-05, "loss": 2.1171, "step": 14572 }, { "epoch": 0.48, "grad_norm": 0.7209476232528687, "learning_rate": 1.0749168402381963e-05, "loss": 2.1208, "step": 14573 }, { "epoch": 0.48, "grad_norm": 0.7196819186210632, "learning_rate": 1.0748108492173042e-05, "loss": 2.1133, "step": 14574 }, { "epoch": 0.48, "grad_norm": 0.7658464312553406, "learning_rate": 1.074704857351243e-05, "loss": 2.1002, "step": 14575 }, { "epoch": 0.48, "grad_norm": 0.7282299995422363, "learning_rate": 1.0745988646412095e-05, "loss": 2.0259, "step": 14576 }, { "epoch": 0.48, "grad_norm": 0.7416151165962219, "learning_rate": 1.0744928710884027e-05, "loss": 2.0864, "step": 14577 }, { "epoch": 0.49, "grad_norm": 0.7469478845596313, "learning_rate": 1.0743868766940185e-05, "loss": 2.1459, "step": 14578 }, { "epoch": 0.49, "grad_norm": 0.7472251057624817, "learning_rate": 1.074280881459255e-05, "loss": 1.9939, "step": 14579 }, { "epoch": 0.49, "grad_norm": 0.7251909375190735, "learning_rate": 1.0741748853853096e-05, "loss": 2.0804, "step": 14580 }, { "epoch": 0.49, "grad_norm": 0.7327628135681152, "learning_rate": 1.0740688884733798e-05, "loss": 2.1516, "step": 14581 }, { "epoch": 0.49, "grad_norm": 0.7315933108329773, "learning_rate": 1.0739628907246634e-05, "loss": 2.1176, "step": 14582 }, { "epoch": 0.49, "grad_norm": 0.7169124484062195, "learning_rate": 1.0738568921403577e-05, "loss": 2.0604, "step": 14583 }, { "epoch": 0.49, "grad_norm": 0.72947758436203, "learning_rate": 1.0737508927216593e-05, "loss": 2.0901, "step": 14584 }, { "epoch": 0.49, "grad_norm": 0.7218141555786133, "learning_rate": 1.0736448924697673e-05, "loss": 2.1156, "step": 14585 }, { "epoch": 0.49, "grad_norm": 0.7156152129173279, "learning_rate": 1.0735388913858784e-05, "loss": 2.0811, "step": 14586 }, { "epoch": 0.49, "grad_norm": 0.7359183430671692, "learning_rate": 1.0734328894711898e-05, "loss": 2.0208, "step": 14587 }, { "epoch": 0.49, "grad_norm": 0.6928489804267883, "learning_rate": 1.0733268867268998e-05, "loss": 2.0554, "step": 14588 }, { "epoch": 0.49, "grad_norm": 0.743375837802887, "learning_rate": 1.0732208831542055e-05, "loss": 2.0936, "step": 14589 }, { "epoch": 0.49, "grad_norm": 0.7146577835083008, "learning_rate": 1.0731148787543046e-05, "loss": 2.0956, "step": 14590 }, { "epoch": 0.49, "grad_norm": 0.741669774055481, "learning_rate": 1.0730088735283946e-05, "loss": 2.0339, "step": 14591 }, { "epoch": 0.49, "grad_norm": 0.7593550682067871, "learning_rate": 1.072902867477673e-05, "loss": 2.1198, "step": 14592 }, { "epoch": 0.49, "grad_norm": 0.750761091709137, "learning_rate": 1.0727968606033378e-05, "loss": 2.0423, "step": 14593 }, { "epoch": 0.49, "grad_norm": 0.7350996732711792, "learning_rate": 1.0726908529065865e-05, "loss": 2.1502, "step": 14594 }, { "epoch": 0.49, "grad_norm": 0.7488811612129211, "learning_rate": 1.072584844388616e-05, "loss": 2.0394, "step": 14595 }, { "epoch": 0.49, "grad_norm": 0.7655408978462219, "learning_rate": 1.072478835050625e-05, "loss": 2.0863, "step": 14596 }, { "epoch": 0.49, "grad_norm": 0.7548730373382568, "learning_rate": 1.0723728248938103e-05, "loss": 2.0056, "step": 14597 }, { "epoch": 0.49, "grad_norm": 0.7081469893455505, "learning_rate": 1.0722668139193696e-05, "loss": 2.0936, "step": 14598 }, { "epoch": 0.49, "grad_norm": 0.766166090965271, "learning_rate": 1.0721608021285012e-05, "loss": 2.0785, "step": 14599 }, { "epoch": 0.49, "grad_norm": 0.774308979511261, "learning_rate": 1.0720547895224023e-05, "loss": 2.0606, "step": 14600 }, { "epoch": 0.49, "grad_norm": 0.7536383271217346, "learning_rate": 1.0719487761022705e-05, "loss": 2.0379, "step": 14601 }, { "epoch": 0.49, "grad_norm": 0.7396448254585266, "learning_rate": 1.0718427618693035e-05, "loss": 2.0943, "step": 14602 }, { "epoch": 0.49, "grad_norm": 0.7359600067138672, "learning_rate": 1.0717367468246992e-05, "loss": 2.0329, "step": 14603 }, { "epoch": 0.49, "grad_norm": 0.7345253229141235, "learning_rate": 1.0716307309696552e-05, "loss": 2.0687, "step": 14604 }, { "epoch": 0.49, "grad_norm": 0.7935307621955872, "learning_rate": 1.0715247143053693e-05, "loss": 2.0726, "step": 14605 }, { "epoch": 0.49, "grad_norm": 0.7525728940963745, "learning_rate": 1.0714186968330386e-05, "loss": 2.0839, "step": 14606 }, { "epoch": 0.49, "grad_norm": 0.8106819987297058, "learning_rate": 1.0713126785538618e-05, "loss": 2.0749, "step": 14607 }, { "epoch": 0.49, "grad_norm": 0.7426064610481262, "learning_rate": 1.0712066594690362e-05, "loss": 2.1124, "step": 14608 }, { "epoch": 0.49, "grad_norm": 0.725161612033844, "learning_rate": 1.0711006395797593e-05, "loss": 2.0218, "step": 14609 }, { "epoch": 0.49, "grad_norm": 0.7419837117195129, "learning_rate": 1.070994618887229e-05, "loss": 2.0605, "step": 14610 }, { "epoch": 0.49, "grad_norm": 0.7481278777122498, "learning_rate": 1.0708885973926432e-05, "loss": 2.0176, "step": 14611 }, { "epoch": 0.49, "grad_norm": 0.7455757856369019, "learning_rate": 1.0707825750971994e-05, "loss": 2.1241, "step": 14612 }, { "epoch": 0.49, "grad_norm": 0.7478470206260681, "learning_rate": 1.0706765520020958e-05, "loss": 2.0784, "step": 14613 }, { "epoch": 0.49, "grad_norm": 0.7379599213600159, "learning_rate": 1.0705705281085297e-05, "loss": 2.1277, "step": 14614 }, { "epoch": 0.49, "grad_norm": 0.7394887804985046, "learning_rate": 1.0704645034176992e-05, "loss": 2.0614, "step": 14615 }, { "epoch": 0.49, "grad_norm": 0.7471224069595337, "learning_rate": 1.0703584779308023e-05, "loss": 2.1958, "step": 14616 }, { "epoch": 0.49, "grad_norm": 0.7275299429893494, "learning_rate": 1.0702524516490363e-05, "loss": 2.0863, "step": 14617 }, { "epoch": 0.49, "grad_norm": 0.7513114213943481, "learning_rate": 1.070146424573599e-05, "loss": 2.1102, "step": 14618 }, { "epoch": 0.49, "grad_norm": 0.7537183165550232, "learning_rate": 1.070040396705689e-05, "loss": 2.083, "step": 14619 }, { "epoch": 0.49, "grad_norm": 0.7381898760795593, "learning_rate": 1.0699343680465035e-05, "loss": 2.0606, "step": 14620 }, { "epoch": 0.49, "grad_norm": 0.7517684102058411, "learning_rate": 1.0698283385972405e-05, "loss": 2.1284, "step": 14621 }, { "epoch": 0.49, "grad_norm": 0.7099905014038086, "learning_rate": 1.0697223083590979e-05, "loss": 1.9619, "step": 14622 }, { "epoch": 0.49, "grad_norm": 0.7577435970306396, "learning_rate": 1.0696162773332735e-05, "loss": 2.0517, "step": 14623 }, { "epoch": 0.49, "grad_norm": 0.7741847634315491, "learning_rate": 1.0695102455209649e-05, "loss": 2.0292, "step": 14624 }, { "epoch": 0.49, "grad_norm": 0.745650053024292, "learning_rate": 1.0694042129233707e-05, "loss": 2.091, "step": 14625 }, { "epoch": 0.49, "grad_norm": 0.7720785737037659, "learning_rate": 1.0692981795416881e-05, "loss": 2.1599, "step": 14626 }, { "epoch": 0.49, "grad_norm": 0.7433773875236511, "learning_rate": 1.0691921453771155e-05, "loss": 2.1176, "step": 14627 }, { "epoch": 0.49, "grad_norm": 0.7450385093688965, "learning_rate": 1.0690861104308504e-05, "loss": 2.0064, "step": 14628 }, { "epoch": 0.49, "grad_norm": 0.7526825666427612, "learning_rate": 1.0689800747040908e-05, "loss": 2.1196, "step": 14629 }, { "epoch": 0.49, "grad_norm": 0.7309104800224304, "learning_rate": 1.0688740381980352e-05, "loss": 2.055, "step": 14630 }, { "epoch": 0.49, "grad_norm": 0.7464230060577393, "learning_rate": 1.0687680009138806e-05, "loss": 2.0825, "step": 14631 }, { "epoch": 0.49, "grad_norm": 0.7237047553062439, "learning_rate": 1.0686619628528256e-05, "loss": 2.0883, "step": 14632 }, { "epoch": 0.49, "grad_norm": 0.7204283475875854, "learning_rate": 1.068555924016068e-05, "loss": 2.0997, "step": 14633 }, { "epoch": 0.49, "grad_norm": 0.7293440699577332, "learning_rate": 1.068449884404806e-05, "loss": 2.0959, "step": 14634 }, { "epoch": 0.49, "grad_norm": 0.7542620301246643, "learning_rate": 1.068343844020237e-05, "loss": 2.0791, "step": 14635 }, { "epoch": 0.49, "grad_norm": 0.7173818945884705, "learning_rate": 1.0682378028635591e-05, "loss": 2.0866, "step": 14636 }, { "epoch": 0.49, "grad_norm": 0.7198214530944824, "learning_rate": 1.0681317609359709e-05, "loss": 2.0463, "step": 14637 }, { "epoch": 0.49, "grad_norm": 0.7311445474624634, "learning_rate": 1.06802571823867e-05, "loss": 2.0898, "step": 14638 }, { "epoch": 0.49, "grad_norm": 0.7399665117263794, "learning_rate": 1.0679196747728543e-05, "loss": 2.1081, "step": 14639 }, { "epoch": 0.49, "grad_norm": 0.7373688817024231, "learning_rate": 1.0678136305397218e-05, "loss": 2.0918, "step": 14640 }, { "epoch": 0.49, "grad_norm": 0.7440222501754761, "learning_rate": 1.0677075855404709e-05, "loss": 2.1241, "step": 14641 }, { "epoch": 0.49, "grad_norm": 0.7079523205757141, "learning_rate": 1.067601539776299e-05, "loss": 2.09, "step": 14642 }, { "epoch": 0.49, "grad_norm": 0.7419347763061523, "learning_rate": 1.067495493248405e-05, "loss": 2.151, "step": 14643 }, { "epoch": 0.49, "grad_norm": 0.740949809551239, "learning_rate": 1.0673894459579858e-05, "loss": 2.0791, "step": 14644 }, { "epoch": 0.49, "grad_norm": 0.7136651277542114, "learning_rate": 1.0672833979062406e-05, "loss": 2.0997, "step": 14645 }, { "epoch": 0.49, "grad_norm": 0.7348021864891052, "learning_rate": 1.067177349094367e-05, "loss": 2.134, "step": 14646 }, { "epoch": 0.49, "grad_norm": 0.7171558737754822, "learning_rate": 1.0670712995235631e-05, "loss": 2.0722, "step": 14647 }, { "epoch": 0.49, "grad_norm": 0.7304102182388306, "learning_rate": 1.0669652491950269e-05, "loss": 2.0041, "step": 14648 }, { "epoch": 0.49, "grad_norm": 0.7185641527175903, "learning_rate": 1.0668591981099566e-05, "loss": 2.1098, "step": 14649 }, { "epoch": 0.49, "grad_norm": 0.7144058346748352, "learning_rate": 1.0667531462695502e-05, "loss": 2.1243, "step": 14650 }, { "epoch": 0.49, "grad_norm": 0.7622905373573303, "learning_rate": 1.0666470936750057e-05, "loss": 2.0879, "step": 14651 }, { "epoch": 0.49, "grad_norm": 0.7399274110794067, "learning_rate": 1.0665410403275216e-05, "loss": 2.0914, "step": 14652 }, { "epoch": 0.49, "grad_norm": 0.7643499970436096, "learning_rate": 1.0664349862282958e-05, "loss": 2.1075, "step": 14653 }, { "epoch": 0.49, "grad_norm": 0.7139347195625305, "learning_rate": 1.0663289313785268e-05, "loss": 1.9994, "step": 14654 }, { "epoch": 0.49, "grad_norm": 0.7429953813552856, "learning_rate": 1.0662228757794117e-05, "loss": 2.085, "step": 14655 }, { "epoch": 0.49, "grad_norm": 0.7331598401069641, "learning_rate": 1.06611681943215e-05, "loss": 2.0623, "step": 14656 }, { "epoch": 0.49, "grad_norm": 0.7400999665260315, "learning_rate": 1.066010762337939e-05, "loss": 2.0318, "step": 14657 }, { "epoch": 0.49, "grad_norm": 0.7368826866149902, "learning_rate": 1.065904704497977e-05, "loss": 2.1126, "step": 14658 }, { "epoch": 0.49, "grad_norm": 0.7501544952392578, "learning_rate": 1.0657986459134621e-05, "loss": 2.133, "step": 14659 }, { "epoch": 0.49, "grad_norm": 0.7455234527587891, "learning_rate": 1.065692586585593e-05, "loss": 2.1171, "step": 14660 }, { "epoch": 0.49, "grad_norm": 0.7894590497016907, "learning_rate": 1.0655865265155673e-05, "loss": 2.1526, "step": 14661 }, { "epoch": 0.49, "grad_norm": 0.7913345098495483, "learning_rate": 1.0654804657045837e-05, "loss": 2.1424, "step": 14662 }, { "epoch": 0.49, "grad_norm": 0.7647777199745178, "learning_rate": 1.06537440415384e-05, "loss": 2.146, "step": 14663 }, { "epoch": 0.49, "grad_norm": 0.7644203305244446, "learning_rate": 1.0652683418645347e-05, "loss": 2.0871, "step": 14664 }, { "epoch": 0.49, "grad_norm": 0.7443122267723083, "learning_rate": 1.0651622788378662e-05, "loss": 2.058, "step": 14665 }, { "epoch": 0.49, "grad_norm": 0.754377543926239, "learning_rate": 1.0650562150750318e-05, "loss": 2.0506, "step": 14666 }, { "epoch": 0.49, "grad_norm": 0.7406039834022522, "learning_rate": 1.064950150577231e-05, "loss": 2.0777, "step": 14667 }, { "epoch": 0.49, "grad_norm": 0.7436974048614502, "learning_rate": 1.0648440853456612e-05, "loss": 2.1324, "step": 14668 }, { "epoch": 0.49, "grad_norm": 0.7229152917861938, "learning_rate": 1.0647380193815209e-05, "loss": 2.0307, "step": 14669 }, { "epoch": 0.49, "grad_norm": 0.7415510416030884, "learning_rate": 1.0646319526860086e-05, "loss": 2.067, "step": 14670 }, { "epoch": 0.49, "grad_norm": 0.7377073168754578, "learning_rate": 1.0645258852603222e-05, "loss": 2.102, "step": 14671 }, { "epoch": 0.49, "grad_norm": 0.7496103048324585, "learning_rate": 1.0644198171056601e-05, "loss": 2.1657, "step": 14672 }, { "epoch": 0.49, "grad_norm": 0.7456011772155762, "learning_rate": 1.0643137482232206e-05, "loss": 2.0973, "step": 14673 }, { "epoch": 0.49, "grad_norm": 0.7341430187225342, "learning_rate": 1.0642076786142024e-05, "loss": 2.1307, "step": 14674 }, { "epoch": 0.49, "grad_norm": 0.7097193002700806, "learning_rate": 1.0641016082798032e-05, "loss": 2.0745, "step": 14675 }, { "epoch": 0.49, "grad_norm": 0.7230064868927002, "learning_rate": 1.0639955372212218e-05, "loss": 2.0991, "step": 14676 }, { "epoch": 0.49, "grad_norm": 0.735785961151123, "learning_rate": 1.0638894654396562e-05, "loss": 2.0265, "step": 14677 }, { "epoch": 0.49, "grad_norm": 0.7457605004310608, "learning_rate": 1.0637833929363049e-05, "loss": 2.1327, "step": 14678 }, { "epoch": 0.49, "grad_norm": 0.7232545018196106, "learning_rate": 1.0636773197123661e-05, "loss": 2.071, "step": 14679 }, { "epoch": 0.49, "grad_norm": 0.7175222039222717, "learning_rate": 1.0635712457690382e-05, "loss": 2.1216, "step": 14680 }, { "epoch": 0.49, "grad_norm": 0.7136391997337341, "learning_rate": 1.0634651711075199e-05, "loss": 2.056, "step": 14681 }, { "epoch": 0.49, "grad_norm": 0.7620527148246765, "learning_rate": 1.0633590957290091e-05, "loss": 2.0689, "step": 14682 }, { "epoch": 0.49, "grad_norm": 0.7666252851486206, "learning_rate": 1.0632530196347046e-05, "loss": 2.1107, "step": 14683 }, { "epoch": 0.49, "grad_norm": 0.7668177485466003, "learning_rate": 1.0631469428258044e-05, "loss": 2.0176, "step": 14684 }, { "epoch": 0.49, "grad_norm": 0.7574915885925293, "learning_rate": 1.063040865303507e-05, "loss": 2.0771, "step": 14685 }, { "epoch": 0.49, "grad_norm": 0.7369412779808044, "learning_rate": 1.0629347870690108e-05, "loss": 2.0537, "step": 14686 }, { "epoch": 0.49, "grad_norm": 0.7470325827598572, "learning_rate": 1.0628287081235144e-05, "loss": 2.0076, "step": 14687 }, { "epoch": 0.49, "grad_norm": 0.7449122667312622, "learning_rate": 1.062722628468216e-05, "loss": 2.124, "step": 14688 }, { "epoch": 0.49, "grad_norm": 0.7190289497375488, "learning_rate": 1.0626165481043142e-05, "loss": 2.0568, "step": 14689 }, { "epoch": 0.49, "grad_norm": 0.7241438031196594, "learning_rate": 1.0625104670330074e-05, "loss": 2.1299, "step": 14690 }, { "epoch": 0.49, "grad_norm": 0.742878258228302, "learning_rate": 1.0624043852554934e-05, "loss": 2.1139, "step": 14691 }, { "epoch": 0.49, "grad_norm": 0.7262085676193237, "learning_rate": 1.0622983027729719e-05, "loss": 2.0705, "step": 14692 }, { "epoch": 0.49, "grad_norm": 0.7183705568313599, "learning_rate": 1.0621922195866404e-05, "loss": 2.0801, "step": 14693 }, { "epoch": 0.49, "grad_norm": 0.731928288936615, "learning_rate": 1.0620861356976977e-05, "loss": 2.067, "step": 14694 }, { "epoch": 0.49, "grad_norm": 0.7484527230262756, "learning_rate": 1.0619800511073422e-05, "loss": 2.1193, "step": 14695 }, { "epoch": 0.49, "grad_norm": 0.7143909335136414, "learning_rate": 1.0618739658167725e-05, "loss": 2.1202, "step": 14696 }, { "epoch": 0.49, "grad_norm": 0.7312271595001221, "learning_rate": 1.061767879827187e-05, "loss": 2.0424, "step": 14697 }, { "epoch": 0.49, "grad_norm": 0.738841712474823, "learning_rate": 1.0616617931397841e-05, "loss": 2.077, "step": 14698 }, { "epoch": 0.49, "grad_norm": 0.754411518573761, "learning_rate": 1.0615557057557621e-05, "loss": 2.1127, "step": 14699 }, { "epoch": 0.49, "grad_norm": 0.7421163320541382, "learning_rate": 1.0614496176763205e-05, "loss": 2.1467, "step": 14700 }, { "epoch": 0.49, "grad_norm": 0.7563844919204712, "learning_rate": 1.0613435289026566e-05, "loss": 2.0843, "step": 14701 }, { "epoch": 0.49, "grad_norm": 0.7339263558387756, "learning_rate": 1.0612374394359695e-05, "loss": 2.1495, "step": 14702 }, { "epoch": 0.49, "grad_norm": 0.7737752199172974, "learning_rate": 1.061131349277458e-05, "loss": 2.052, "step": 14703 }, { "epoch": 0.49, "grad_norm": 0.7618801593780518, "learning_rate": 1.0610252584283201e-05, "loss": 2.0892, "step": 14704 }, { "epoch": 0.49, "grad_norm": 0.785929262638092, "learning_rate": 1.0609191668897546e-05, "loss": 2.0841, "step": 14705 }, { "epoch": 0.49, "grad_norm": 0.723013162612915, "learning_rate": 1.0608130746629602e-05, "loss": 2.0553, "step": 14706 }, { "epoch": 0.49, "grad_norm": 0.7250922918319702, "learning_rate": 1.060706981749135e-05, "loss": 2.0306, "step": 14707 }, { "epoch": 0.49, "grad_norm": 0.722086489200592, "learning_rate": 1.0606008881494783e-05, "loss": 2.0614, "step": 14708 }, { "epoch": 0.49, "grad_norm": 0.7287055850028992, "learning_rate": 1.0604947938651882e-05, "loss": 2.0793, "step": 14709 }, { "epoch": 0.49, "grad_norm": 0.7173362970352173, "learning_rate": 1.0603886988974633e-05, "loss": 2.0832, "step": 14710 }, { "epoch": 0.49, "grad_norm": 0.727529764175415, "learning_rate": 1.060282603247502e-05, "loss": 2.0841, "step": 14711 }, { "epoch": 0.49, "grad_norm": 0.7330241203308105, "learning_rate": 1.0601765069165038e-05, "loss": 2.1248, "step": 14712 }, { "epoch": 0.49, "grad_norm": 0.6999135613441467, "learning_rate": 1.060070409905666e-05, "loss": 2.0929, "step": 14713 }, { "epoch": 0.49, "grad_norm": 0.7290116548538208, "learning_rate": 1.0599643122161884e-05, "loss": 2.1478, "step": 14714 }, { "epoch": 0.49, "grad_norm": 0.7333020567893982, "learning_rate": 1.059858213849269e-05, "loss": 2.0858, "step": 14715 }, { "epoch": 0.49, "grad_norm": 0.7391338348388672, "learning_rate": 1.0597521148061065e-05, "loss": 2.0842, "step": 14716 }, { "epoch": 0.49, "grad_norm": 0.7238151431083679, "learning_rate": 1.0596460150878997e-05, "loss": 2.0709, "step": 14717 }, { "epoch": 0.49, "grad_norm": 0.747388482093811, "learning_rate": 1.0595399146958472e-05, "loss": 2.0572, "step": 14718 }, { "epoch": 0.49, "grad_norm": 0.7397347092628479, "learning_rate": 1.0594338136311476e-05, "loss": 2.0958, "step": 14719 }, { "epoch": 0.49, "grad_norm": 0.7746142148971558, "learning_rate": 1.0593277118949997e-05, "loss": 2.1318, "step": 14720 }, { "epoch": 0.49, "grad_norm": 0.718159556388855, "learning_rate": 1.0592216094886019e-05, "loss": 2.0858, "step": 14721 }, { "epoch": 0.49, "grad_norm": 0.7231830358505249, "learning_rate": 1.059115506413153e-05, "loss": 2.0669, "step": 14722 }, { "epoch": 0.49, "grad_norm": 0.730810284614563, "learning_rate": 1.0590094026698522e-05, "loss": 2.0586, "step": 14723 }, { "epoch": 0.49, "grad_norm": 0.7129188179969788, "learning_rate": 1.0589032982598972e-05, "loss": 2.0543, "step": 14724 }, { "epoch": 0.49, "grad_norm": 0.7510318160057068, "learning_rate": 1.0587971931844876e-05, "loss": 2.1121, "step": 14725 }, { "epoch": 0.49, "grad_norm": 0.720358669757843, "learning_rate": 1.0586910874448218e-05, "loss": 2.1159, "step": 14726 }, { "epoch": 0.49, "grad_norm": 0.7271606922149658, "learning_rate": 1.0585849810420983e-05, "loss": 2.0892, "step": 14727 }, { "epoch": 0.49, "grad_norm": 0.757247269153595, "learning_rate": 1.058478873977516e-05, "loss": 2.0705, "step": 14728 }, { "epoch": 0.49, "grad_norm": 0.7254404425621033, "learning_rate": 1.0583727662522738e-05, "loss": 2.0867, "step": 14729 }, { "epoch": 0.49, "grad_norm": 0.7303285002708435, "learning_rate": 1.0582666578675703e-05, "loss": 2.0723, "step": 14730 }, { "epoch": 0.49, "grad_norm": 0.7081171870231628, "learning_rate": 1.0581605488246043e-05, "loss": 2.0343, "step": 14731 }, { "epoch": 0.49, "grad_norm": 0.7264071702957153, "learning_rate": 1.0580544391245742e-05, "loss": 2.1134, "step": 14732 }, { "epoch": 0.49, "grad_norm": 0.7303284406661987, "learning_rate": 1.0579483287686791e-05, "loss": 2.0114, "step": 14733 }, { "epoch": 0.49, "grad_norm": 0.7209147214889526, "learning_rate": 1.0578422177581183e-05, "loss": 2.0332, "step": 14734 }, { "epoch": 0.49, "grad_norm": 0.7522361278533936, "learning_rate": 1.0577361060940895e-05, "loss": 2.0763, "step": 14735 }, { "epoch": 0.49, "grad_norm": 0.748513400554657, "learning_rate": 1.0576299937777921e-05, "loss": 2.076, "step": 14736 }, { "epoch": 0.49, "grad_norm": 0.7712220549583435, "learning_rate": 1.0575238808104249e-05, "loss": 2.1357, "step": 14737 }, { "epoch": 0.49, "grad_norm": 0.7355931401252747, "learning_rate": 1.0574177671931865e-05, "loss": 2.1153, "step": 14738 }, { "epoch": 0.49, "grad_norm": 0.767103374004364, "learning_rate": 1.0573116529272758e-05, "loss": 2.0697, "step": 14739 }, { "epoch": 0.49, "grad_norm": 0.7569059133529663, "learning_rate": 1.0572055380138917e-05, "loss": 2.0828, "step": 14740 }, { "epoch": 0.49, "grad_norm": 0.7460962533950806, "learning_rate": 1.057099422454233e-05, "loss": 2.0737, "step": 14741 }, { "epoch": 0.49, "grad_norm": 0.8008043766021729, "learning_rate": 1.0569933062494984e-05, "loss": 2.0467, "step": 14742 }, { "epoch": 0.49, "grad_norm": 0.7106244564056396, "learning_rate": 1.0568871894008868e-05, "loss": 2.0353, "step": 14743 }, { "epoch": 0.49, "grad_norm": 0.7089235186576843, "learning_rate": 1.0567810719095973e-05, "loss": 2.053, "step": 14744 }, { "epoch": 0.49, "grad_norm": 0.7956158518791199, "learning_rate": 1.0566749537768281e-05, "loss": 2.1527, "step": 14745 }, { "epoch": 0.49, "grad_norm": 0.7136249542236328, "learning_rate": 1.0565688350037788e-05, "loss": 2.0314, "step": 14746 }, { "epoch": 0.49, "grad_norm": 0.7562602758407593, "learning_rate": 1.0564627155916483e-05, "loss": 2.0599, "step": 14747 }, { "epoch": 0.49, "grad_norm": 0.7685193419456482, "learning_rate": 1.0563565955416343e-05, "loss": 2.1565, "step": 14748 }, { "epoch": 0.49, "grad_norm": 0.8187074065208435, "learning_rate": 1.0562504748549372e-05, "loss": 2.1812, "step": 14749 }, { "epoch": 0.49, "grad_norm": 0.7400255799293518, "learning_rate": 1.056144353532755e-05, "loss": 2.081, "step": 14750 }, { "epoch": 0.49, "grad_norm": 0.7657455205917358, "learning_rate": 1.0560382315762867e-05, "loss": 2.1556, "step": 14751 }, { "epoch": 0.49, "grad_norm": 0.7442103624343872, "learning_rate": 1.0559321089867314e-05, "loss": 2.046, "step": 14752 }, { "epoch": 0.49, "grad_norm": 0.7319579720497131, "learning_rate": 1.0558259857652877e-05, "loss": 2.0833, "step": 14753 }, { "epoch": 0.49, "grad_norm": 0.754719614982605, "learning_rate": 1.055719861913155e-05, "loss": 2.0554, "step": 14754 }, { "epoch": 0.49, "grad_norm": 0.740059494972229, "learning_rate": 1.0556137374315318e-05, "loss": 2.0894, "step": 14755 }, { "epoch": 0.49, "grad_norm": 0.7332885265350342, "learning_rate": 1.0555076123216173e-05, "loss": 2.1334, "step": 14756 }, { "epoch": 0.49, "grad_norm": 0.7479389309883118, "learning_rate": 1.0554014865846102e-05, "loss": 2.0968, "step": 14757 }, { "epoch": 0.49, "grad_norm": 0.764297604560852, "learning_rate": 1.0552953602217097e-05, "loss": 2.0348, "step": 14758 }, { "epoch": 0.49, "grad_norm": 0.7222644090652466, "learning_rate": 1.0551892332341145e-05, "loss": 2.095, "step": 14759 }, { "epoch": 0.49, "grad_norm": 0.7213262915611267, "learning_rate": 1.055083105623024e-05, "loss": 2.0729, "step": 14760 }, { "epoch": 0.49, "grad_norm": 0.7323314547538757, "learning_rate": 1.0549769773896366e-05, "loss": 2.0795, "step": 14761 }, { "epoch": 0.49, "grad_norm": 0.7494660019874573, "learning_rate": 1.0548708485351515e-05, "loss": 2.1009, "step": 14762 }, { "epoch": 0.49, "grad_norm": 0.7454178333282471, "learning_rate": 1.0547647190607677e-05, "loss": 2.1049, "step": 14763 }, { "epoch": 0.49, "grad_norm": 0.7230193018913269, "learning_rate": 1.0546585889676842e-05, "loss": 1.9667, "step": 14764 }, { "epoch": 0.49, "grad_norm": 0.7430580854415894, "learning_rate": 1.0545524582571e-05, "loss": 2.0633, "step": 14765 }, { "epoch": 0.49, "grad_norm": 0.7117336392402649, "learning_rate": 1.0544463269302141e-05, "loss": 2.0982, "step": 14766 }, { "epoch": 0.49, "grad_norm": 0.7224259376525879, "learning_rate": 1.0543401949882255e-05, "loss": 2.0417, "step": 14767 }, { "epoch": 0.49, "grad_norm": 0.7265010476112366, "learning_rate": 1.0542340624323333e-05, "loss": 2.1235, "step": 14768 }, { "epoch": 0.49, "grad_norm": 0.7292789220809937, "learning_rate": 1.0541279292637365e-05, "loss": 2.109, "step": 14769 }, { "epoch": 0.49, "grad_norm": 0.7527442574501038, "learning_rate": 1.0540217954836337e-05, "loss": 2.1187, "step": 14770 }, { "epoch": 0.49, "grad_norm": 0.712405264377594, "learning_rate": 1.0539156610932251e-05, "loss": 2.0323, "step": 14771 }, { "epoch": 0.49, "grad_norm": 0.7256230115890503, "learning_rate": 1.0538095260937085e-05, "loss": 2.1081, "step": 14772 }, { "epoch": 0.49, "grad_norm": 0.7343063354492188, "learning_rate": 1.0537033904862832e-05, "loss": 2.1318, "step": 14773 }, { "epoch": 0.49, "grad_norm": 0.7543482184410095, "learning_rate": 1.0535972542721486e-05, "loss": 2.0812, "step": 14774 }, { "epoch": 0.49, "grad_norm": 0.7285898327827454, "learning_rate": 1.0534911174525038e-05, "loss": 2.025, "step": 14775 }, { "epoch": 0.49, "grad_norm": 0.7087196111679077, "learning_rate": 1.0533849800285473e-05, "loss": 2.0329, "step": 14776 }, { "epoch": 0.49, "grad_norm": 0.7079684734344482, "learning_rate": 1.053278842001479e-05, "loss": 2.0988, "step": 14777 }, { "epoch": 0.49, "grad_norm": 0.742074728012085, "learning_rate": 1.0531727033724974e-05, "loss": 2.1, "step": 14778 }, { "epoch": 0.49, "grad_norm": 0.7521358728408813, "learning_rate": 1.0530665641428017e-05, "loss": 2.0587, "step": 14779 }, { "epoch": 0.49, "grad_norm": 0.732659637928009, "learning_rate": 1.0529604243135914e-05, "loss": 2.1874, "step": 14780 }, { "epoch": 0.49, "grad_norm": 0.8465365767478943, "learning_rate": 1.0528542838860649e-05, "loss": 2.1838, "step": 14781 }, { "epoch": 0.49, "grad_norm": 0.7363940477371216, "learning_rate": 1.052748142861422e-05, "loss": 2.03, "step": 14782 }, { "epoch": 0.49, "grad_norm": 0.7460628151893616, "learning_rate": 1.0526420012408612e-05, "loss": 2.0515, "step": 14783 }, { "epoch": 0.49, "grad_norm": 0.7049014568328857, "learning_rate": 1.0525358590255817e-05, "loss": 2.025, "step": 14784 }, { "epoch": 0.49, "grad_norm": 0.7407488226890564, "learning_rate": 1.0524297162167834e-05, "loss": 2.0535, "step": 14785 }, { "epoch": 0.49, "grad_norm": 0.723631739616394, "learning_rate": 1.0523235728156647e-05, "loss": 2.0119, "step": 14786 }, { "epoch": 0.49, "grad_norm": 0.7573441863059998, "learning_rate": 1.0522174288234248e-05, "loss": 2.0564, "step": 14787 }, { "epoch": 0.49, "grad_norm": 0.7188177704811096, "learning_rate": 1.0521112842412631e-05, "loss": 2.0811, "step": 14788 }, { "epoch": 0.49, "grad_norm": 0.7122758030891418, "learning_rate": 1.0520051390703786e-05, "loss": 2.1229, "step": 14789 }, { "epoch": 0.49, "grad_norm": 0.7398770451545715, "learning_rate": 1.0518989933119705e-05, "loss": 2.1362, "step": 14790 }, { "epoch": 0.49, "grad_norm": 0.731458842754364, "learning_rate": 1.0517928469672383e-05, "loss": 2.0919, "step": 14791 }, { "epoch": 0.49, "grad_norm": 0.7129913568496704, "learning_rate": 1.0516867000373803e-05, "loss": 2.0775, "step": 14792 }, { "epoch": 0.49, "grad_norm": 0.7778252363204956, "learning_rate": 1.051580552523597e-05, "loss": 2.1144, "step": 14793 }, { "epoch": 0.49, "grad_norm": 0.7551724910736084, "learning_rate": 1.0514744044270861e-05, "loss": 2.0782, "step": 14794 }, { "epoch": 0.49, "grad_norm": 0.71817547082901, "learning_rate": 1.0513682557490477e-05, "loss": 2.0851, "step": 14795 }, { "epoch": 0.49, "grad_norm": 0.7446401715278625, "learning_rate": 1.0512621064906812e-05, "loss": 2.1078, "step": 14796 }, { "epoch": 0.49, "grad_norm": 0.7497610449790955, "learning_rate": 1.0511559566531853e-05, "loss": 2.0912, "step": 14797 }, { "epoch": 0.49, "grad_norm": 0.739708662033081, "learning_rate": 1.0510498062377595e-05, "loss": 2.1659, "step": 14798 }, { "epoch": 0.49, "grad_norm": 0.7608699798583984, "learning_rate": 1.0509436552456025e-05, "loss": 1.9546, "step": 14799 }, { "epoch": 0.49, "grad_norm": 0.7054779529571533, "learning_rate": 1.0508375036779142e-05, "loss": 2.0285, "step": 14800 }, { "epoch": 0.49, "grad_norm": 0.732172966003418, "learning_rate": 1.0507313515358937e-05, "loss": 2.1021, "step": 14801 }, { "epoch": 0.49, "grad_norm": 0.740344762802124, "learning_rate": 1.05062519882074e-05, "loss": 2.0766, "step": 14802 }, { "epoch": 0.49, "grad_norm": 0.746663510799408, "learning_rate": 1.0505190455336523e-05, "loss": 2.0438, "step": 14803 }, { "epoch": 0.49, "grad_norm": 0.733219563961029, "learning_rate": 1.0504128916758306e-05, "loss": 2.0896, "step": 14804 }, { "epoch": 0.49, "grad_norm": 0.7470428943634033, "learning_rate": 1.050306737248473e-05, "loss": 2.1443, "step": 14805 }, { "epoch": 0.49, "grad_norm": 0.682278037071228, "learning_rate": 1.0502005822527794e-05, "loss": 2.077, "step": 14806 }, { "epoch": 0.49, "grad_norm": 0.7123978137969971, "learning_rate": 1.0500944266899494e-05, "loss": 2.0967, "step": 14807 }, { "epoch": 0.49, "grad_norm": 0.7374909520149231, "learning_rate": 1.0499882705611816e-05, "loss": 2.0918, "step": 14808 }, { "epoch": 0.49, "grad_norm": 0.7331507205963135, "learning_rate": 1.0498821138676759e-05, "loss": 2.0707, "step": 14809 }, { "epoch": 0.49, "grad_norm": 0.7418730854988098, "learning_rate": 1.0497759566106311e-05, "loss": 2.075, "step": 14810 }, { "epoch": 0.49, "grad_norm": 0.7236738801002502, "learning_rate": 1.0496697987912467e-05, "loss": 2.1086, "step": 14811 }, { "epoch": 0.49, "grad_norm": 0.7181369662284851, "learning_rate": 1.0495636404107222e-05, "loss": 2.1007, "step": 14812 }, { "epoch": 0.49, "grad_norm": 0.7514585852622986, "learning_rate": 1.0494574814702567e-05, "loss": 2.0852, "step": 14813 }, { "epoch": 0.49, "grad_norm": 0.7502102851867676, "learning_rate": 1.0493513219710491e-05, "loss": 2.0993, "step": 14814 }, { "epoch": 0.49, "grad_norm": 0.7513480186462402, "learning_rate": 1.0492451619142996e-05, "loss": 2.0501, "step": 14815 }, { "epoch": 0.49, "grad_norm": 0.746101438999176, "learning_rate": 1.0491390013012075e-05, "loss": 2.0503, "step": 14816 }, { "epoch": 0.49, "grad_norm": 0.7273973226547241, "learning_rate": 1.049032840132971e-05, "loss": 1.9748, "step": 14817 }, { "epoch": 0.49, "grad_norm": 0.7350636720657349, "learning_rate": 1.0489266784107908e-05, "loss": 2.0512, "step": 14818 }, { "epoch": 0.49, "grad_norm": 0.7424909472465515, "learning_rate": 1.0488205161358653e-05, "loss": 2.0722, "step": 14819 }, { "epoch": 0.49, "grad_norm": 0.7231415510177612, "learning_rate": 1.0487143533093944e-05, "loss": 2.0642, "step": 14820 }, { "epoch": 0.49, "grad_norm": 0.7148972749710083, "learning_rate": 1.0486081899325772e-05, "loss": 2.0375, "step": 14821 }, { "epoch": 0.49, "grad_norm": 0.7788691520690918, "learning_rate": 1.0485020260066132e-05, "loss": 2.1145, "step": 14822 }, { "epoch": 0.49, "grad_norm": 0.7397133111953735, "learning_rate": 1.0483958615327017e-05, "loss": 2.0871, "step": 14823 }, { "epoch": 0.49, "grad_norm": 0.7532609701156616, "learning_rate": 1.0482896965120424e-05, "loss": 2.109, "step": 14824 }, { "epoch": 0.49, "grad_norm": 0.7499014735221863, "learning_rate": 1.048183530945834e-05, "loss": 2.1293, "step": 14825 }, { "epoch": 0.49, "grad_norm": 0.7129559516906738, "learning_rate": 1.0480773648352764e-05, "loss": 2.0304, "step": 14826 }, { "epoch": 0.49, "grad_norm": 0.7490065097808838, "learning_rate": 1.047971198181569e-05, "loss": 2.0992, "step": 14827 }, { "epoch": 0.49, "grad_norm": 0.751141369342804, "learning_rate": 1.047865030985911e-05, "loss": 2.0212, "step": 14828 }, { "epoch": 0.49, "grad_norm": 0.7428528070449829, "learning_rate": 1.0477588632495021e-05, "loss": 2.0472, "step": 14829 }, { "epoch": 0.49, "grad_norm": 0.7626310586929321, "learning_rate": 1.0476526949735414e-05, "loss": 2.0668, "step": 14830 }, { "epoch": 0.49, "grad_norm": 0.7268064618110657, "learning_rate": 1.0475465261592286e-05, "loss": 2.0644, "step": 14831 }, { "epoch": 0.49, "grad_norm": 0.7549756169319153, "learning_rate": 1.0474403568077629e-05, "loss": 2.068, "step": 14832 }, { "epoch": 0.49, "grad_norm": 0.7479764819145203, "learning_rate": 1.0473341869203439e-05, "loss": 2.07, "step": 14833 }, { "epoch": 0.49, "grad_norm": 0.7389526963233948, "learning_rate": 1.0472280164981711e-05, "loss": 2.0545, "step": 14834 }, { "epoch": 0.49, "grad_norm": 0.7164120078086853, "learning_rate": 1.0471218455424438e-05, "loss": 2.0708, "step": 14835 }, { "epoch": 0.49, "grad_norm": 0.7190315127372742, "learning_rate": 1.0470156740543613e-05, "loss": 2.1056, "step": 14836 }, { "epoch": 0.49, "grad_norm": 0.7541943192481995, "learning_rate": 1.0469095020351234e-05, "loss": 2.1135, "step": 14837 }, { "epoch": 0.49, "grad_norm": 0.7683500647544861, "learning_rate": 1.0468033294859297e-05, "loss": 2.0597, "step": 14838 }, { "epoch": 0.49, "grad_norm": 0.7602309584617615, "learning_rate": 1.0466971564079791e-05, "loss": 2.1157, "step": 14839 }, { "epoch": 0.49, "grad_norm": 0.7340739369392395, "learning_rate": 1.0465909828024717e-05, "loss": 2.0805, "step": 14840 }, { "epoch": 0.49, "grad_norm": 0.7412785887718201, "learning_rate": 1.0464848086706062e-05, "loss": 2.0509, "step": 14841 }, { "epoch": 0.49, "grad_norm": 0.7308230996131897, "learning_rate": 1.0463786340135829e-05, "loss": 2.0538, "step": 14842 }, { "epoch": 0.49, "grad_norm": 0.729729413986206, "learning_rate": 1.046272458832601e-05, "loss": 2.0844, "step": 14843 }, { "epoch": 0.49, "grad_norm": 0.7556081414222717, "learning_rate": 1.0461662831288597e-05, "loss": 1.9887, "step": 14844 }, { "epoch": 0.49, "grad_norm": 0.7030629515647888, "learning_rate": 1.046060106903559e-05, "loss": 2.0412, "step": 14845 }, { "epoch": 0.49, "grad_norm": 0.7378942966461182, "learning_rate": 1.0459539301578985e-05, "loss": 2.065, "step": 14846 }, { "epoch": 0.49, "grad_norm": 0.7397748827934265, "learning_rate": 1.0458477528930768e-05, "loss": 2.0969, "step": 14847 }, { "epoch": 0.49, "grad_norm": 0.72629714012146, "learning_rate": 1.0457415751102944e-05, "loss": 1.9918, "step": 14848 }, { "epoch": 0.49, "grad_norm": 0.7250366806983948, "learning_rate": 1.0456353968107505e-05, "loss": 2.0717, "step": 14849 }, { "epoch": 0.49, "grad_norm": 0.7371429800987244, "learning_rate": 1.0455292179956445e-05, "loss": 2.1163, "step": 14850 }, { "epoch": 0.49, "grad_norm": 0.7485713362693787, "learning_rate": 1.0454230386661763e-05, "loss": 2.0558, "step": 14851 }, { "epoch": 0.49, "grad_norm": 0.7368359565734863, "learning_rate": 1.045316858823545e-05, "loss": 2.0922, "step": 14852 }, { "epoch": 0.49, "grad_norm": 0.7014850378036499, "learning_rate": 1.0452106784689507e-05, "loss": 2.1043, "step": 14853 }, { "epoch": 0.49, "grad_norm": 0.7132440209388733, "learning_rate": 1.0451044976035922e-05, "loss": 2.0933, "step": 14854 }, { "epoch": 0.49, "grad_norm": 0.7075067162513733, "learning_rate": 1.0449983162286698e-05, "loss": 2.1263, "step": 14855 }, { "epoch": 0.49, "grad_norm": 0.7589308023452759, "learning_rate": 1.0448921343453828e-05, "loss": 2.1144, "step": 14856 }, { "epoch": 0.49, "grad_norm": 0.7201362252235413, "learning_rate": 1.0447859519549307e-05, "loss": 2.0986, "step": 14857 }, { "epoch": 0.49, "grad_norm": 0.7554084658622742, "learning_rate": 1.0446797690585132e-05, "loss": 1.9713, "step": 14858 }, { "epoch": 0.49, "grad_norm": 0.7545449733734131, "learning_rate": 1.0445735856573298e-05, "loss": 2.1074, "step": 14859 }, { "epoch": 0.49, "grad_norm": 0.7172949910163879, "learning_rate": 1.0444674017525802e-05, "loss": 2.1635, "step": 14860 }, { "epoch": 0.49, "grad_norm": 0.7619054317474365, "learning_rate": 1.0443612173454638e-05, "loss": 2.1584, "step": 14861 }, { "epoch": 0.49, "grad_norm": 0.7586904764175415, "learning_rate": 1.0442550324371808e-05, "loss": 2.0511, "step": 14862 }, { "epoch": 0.49, "grad_norm": 0.7278960347175598, "learning_rate": 1.0441488470289298e-05, "loss": 2.0518, "step": 14863 }, { "epoch": 0.49, "grad_norm": 0.7224172353744507, "learning_rate": 1.0440426611219114e-05, "loss": 2.0955, "step": 14864 }, { "epoch": 0.49, "grad_norm": 0.7188601493835449, "learning_rate": 1.0439364747173248e-05, "loss": 2.1007, "step": 14865 }, { "epoch": 0.49, "grad_norm": 0.7485142350196838, "learning_rate": 1.0438302878163695e-05, "loss": 2.0162, "step": 14866 }, { "epoch": 0.49, "grad_norm": 0.7297865152359009, "learning_rate": 1.0437241004202453e-05, "loss": 2.0578, "step": 14867 }, { "epoch": 0.49, "grad_norm": 0.7471247315406799, "learning_rate": 1.043617912530152e-05, "loss": 2.0313, "step": 14868 }, { "epoch": 0.49, "grad_norm": 0.7283530235290527, "learning_rate": 1.043511724147289e-05, "loss": 2.0541, "step": 14869 }, { "epoch": 0.49, "grad_norm": 0.7534549236297607, "learning_rate": 1.043405535272856e-05, "loss": 2.1377, "step": 14870 }, { "epoch": 0.49, "grad_norm": 0.7159428000450134, "learning_rate": 1.0432993459080527e-05, "loss": 2.1281, "step": 14871 }, { "epoch": 0.49, "grad_norm": 0.7389901280403137, "learning_rate": 1.043193156054079e-05, "loss": 2.1387, "step": 14872 }, { "epoch": 0.49, "grad_norm": 0.7569697499275208, "learning_rate": 1.043086965712134e-05, "loss": 2.0704, "step": 14873 }, { "epoch": 0.49, "grad_norm": 0.7161557078361511, "learning_rate": 1.0429807748834177e-05, "loss": 2.0791, "step": 14874 }, { "epoch": 0.49, "grad_norm": 0.7219780087471008, "learning_rate": 1.0428745835691304e-05, "loss": 2.0284, "step": 14875 }, { "epoch": 0.49, "grad_norm": 0.7340202927589417, "learning_rate": 1.0427683917704704e-05, "loss": 2.0737, "step": 14876 }, { "epoch": 0.49, "grad_norm": 0.7395620942115784, "learning_rate": 1.0426621994886385e-05, "loss": 2.076, "step": 14877 }, { "epoch": 0.49, "grad_norm": 0.7204892039299011, "learning_rate": 1.0425560067248342e-05, "loss": 2.0223, "step": 14878 }, { "epoch": 0.5, "grad_norm": 0.7494797706604004, "learning_rate": 1.042449813480257e-05, "loss": 2.1426, "step": 14879 }, { "epoch": 0.5, "grad_norm": 0.7590444684028625, "learning_rate": 1.0423436197561066e-05, "loss": 2.1084, "step": 14880 }, { "epoch": 0.5, "grad_norm": 0.7462781667709351, "learning_rate": 1.0422374255535828e-05, "loss": 2.0546, "step": 14881 }, { "epoch": 0.5, "grad_norm": 0.7948204874992371, "learning_rate": 1.0421312308738853e-05, "loss": 2.1134, "step": 14882 }, { "epoch": 0.5, "grad_norm": 0.7597406506538391, "learning_rate": 1.042025035718214e-05, "loss": 2.0733, "step": 14883 }, { "epoch": 0.5, "grad_norm": 0.7587404251098633, "learning_rate": 1.0419188400877684e-05, "loss": 2.0537, "step": 14884 }, { "epoch": 0.5, "grad_norm": 0.7504622340202332, "learning_rate": 1.0418126439837481e-05, "loss": 2.0388, "step": 14885 }, { "epoch": 0.5, "grad_norm": 0.749716579914093, "learning_rate": 1.0417064474073535e-05, "loss": 2.1063, "step": 14886 }, { "epoch": 0.5, "grad_norm": 0.7219901084899902, "learning_rate": 1.0416002503597835e-05, "loss": 2.0664, "step": 14887 }, { "epoch": 0.5, "grad_norm": 0.7423126697540283, "learning_rate": 1.0414940528422384e-05, "loss": 2.0935, "step": 14888 }, { "epoch": 0.5, "grad_norm": 0.7399823069572449, "learning_rate": 1.0413878548559179e-05, "loss": 2.0848, "step": 14889 }, { "epoch": 0.5, "grad_norm": 0.7641533613204956, "learning_rate": 1.0412816564020215e-05, "loss": 2.0497, "step": 14890 }, { "epoch": 0.5, "grad_norm": 0.7240563631057739, "learning_rate": 1.0411754574817492e-05, "loss": 2.0497, "step": 14891 }, { "epoch": 0.5, "grad_norm": 0.748987078666687, "learning_rate": 1.0410692580963007e-05, "loss": 2.0936, "step": 14892 }, { "epoch": 0.5, "grad_norm": 0.7537991404533386, "learning_rate": 1.0409630582468759e-05, "loss": 2.0376, "step": 14893 }, { "epoch": 0.5, "grad_norm": 0.753716766834259, "learning_rate": 1.0408568579346742e-05, "loss": 2.0471, "step": 14894 }, { "epoch": 0.5, "grad_norm": 0.7071313261985779, "learning_rate": 1.0407506571608961e-05, "loss": 2.0454, "step": 14895 }, { "epoch": 0.5, "grad_norm": 0.7359317541122437, "learning_rate": 1.0406444559267406e-05, "loss": 2.1734, "step": 14896 }, { "epoch": 0.5, "grad_norm": 0.7349830865859985, "learning_rate": 1.040538254233408e-05, "loss": 2.0682, "step": 14897 }, { "epoch": 0.5, "grad_norm": 0.7482216358184814, "learning_rate": 1.040432052082098e-05, "loss": 2.1106, "step": 14898 }, { "epoch": 0.5, "grad_norm": 0.7701519131660461, "learning_rate": 1.04032584947401e-05, "loss": 2.1622, "step": 14899 }, { "epoch": 0.5, "grad_norm": 0.7353580594062805, "learning_rate": 1.0402196464103449e-05, "loss": 2.0583, "step": 14900 }, { "epoch": 0.5, "grad_norm": 0.7098243236541748, "learning_rate": 1.0401134428923013e-05, "loss": 2.1072, "step": 14901 }, { "epoch": 0.5, "grad_norm": 0.7634531855583191, "learning_rate": 1.0400072389210796e-05, "loss": 2.1195, "step": 14902 }, { "epoch": 0.5, "grad_norm": 0.7568991184234619, "learning_rate": 1.0399010344978795e-05, "loss": 2.0333, "step": 14903 }, { "epoch": 0.5, "grad_norm": 0.7366955280303955, "learning_rate": 1.0397948296239011e-05, "loss": 2.0631, "step": 14904 }, { "epoch": 0.5, "grad_norm": 0.7325473427772522, "learning_rate": 1.039688624300344e-05, "loss": 2.1169, "step": 14905 }, { "epoch": 0.5, "grad_norm": 0.7596264481544495, "learning_rate": 1.039582418528408e-05, "loss": 2.1066, "step": 14906 }, { "epoch": 0.5, "grad_norm": 0.7336369752883911, "learning_rate": 1.0394762123092927e-05, "loss": 2.001, "step": 14907 }, { "epoch": 0.5, "grad_norm": 0.7136302590370178, "learning_rate": 1.0393700056441988e-05, "loss": 1.9942, "step": 14908 }, { "epoch": 0.5, "grad_norm": 0.7119197845458984, "learning_rate": 1.0392637985343257e-05, "loss": 2.1342, "step": 14909 }, { "epoch": 0.5, "grad_norm": 0.7322482466697693, "learning_rate": 1.0391575909808726e-05, "loss": 2.0638, "step": 14910 }, { "epoch": 0.5, "grad_norm": 0.7553600668907166, "learning_rate": 1.0390513829850407e-05, "loss": 2.0723, "step": 14911 }, { "epoch": 0.5, "grad_norm": 0.7396270632743835, "learning_rate": 1.0389451745480287e-05, "loss": 2.0935, "step": 14912 }, { "epoch": 0.5, "grad_norm": 0.7652947902679443, "learning_rate": 1.0388389656710372e-05, "loss": 2.0409, "step": 14913 }, { "epoch": 0.5, "grad_norm": 0.7241201996803284, "learning_rate": 1.0387327563552657e-05, "loss": 1.9566, "step": 14914 }, { "epoch": 0.5, "grad_norm": 0.7488453984260559, "learning_rate": 1.038626546601914e-05, "loss": 2.1189, "step": 14915 }, { "epoch": 0.5, "grad_norm": 0.7362661957740784, "learning_rate": 1.0385203364121825e-05, "loss": 2.1046, "step": 14916 }, { "epoch": 0.5, "grad_norm": 0.760785698890686, "learning_rate": 1.0384141257872711e-05, "loss": 2.0809, "step": 14917 }, { "epoch": 0.5, "grad_norm": 0.7182925343513489, "learning_rate": 1.0383079147283788e-05, "loss": 2.0664, "step": 14918 }, { "epoch": 0.5, "grad_norm": 0.8115835785865784, "learning_rate": 1.0382017032367065e-05, "loss": 2.1025, "step": 14919 }, { "epoch": 0.5, "grad_norm": 0.7542586326599121, "learning_rate": 1.0380954913134535e-05, "loss": 2.061, "step": 14920 }, { "epoch": 0.5, "grad_norm": 0.734032392501831, "learning_rate": 1.0379892789598201e-05, "loss": 2.1074, "step": 14921 }, { "epoch": 0.5, "grad_norm": 0.7570006251335144, "learning_rate": 1.0378830661770064e-05, "loss": 2.1267, "step": 14922 }, { "epoch": 0.5, "grad_norm": 0.7115615606307983, "learning_rate": 1.0377768529662116e-05, "loss": 2.005, "step": 14923 }, { "epoch": 0.5, "grad_norm": 0.7267482280731201, "learning_rate": 1.037670639328636e-05, "loss": 2.1589, "step": 14924 }, { "epoch": 0.5, "grad_norm": 0.7609515190124512, "learning_rate": 1.0375644252654797e-05, "loss": 2.0504, "step": 14925 }, { "epoch": 0.5, "grad_norm": 0.7393014430999756, "learning_rate": 1.0374582107779428e-05, "loss": 2.0218, "step": 14926 }, { "epoch": 0.5, "grad_norm": 0.7563897967338562, "learning_rate": 1.0373519958672247e-05, "loss": 2.0477, "step": 14927 }, { "epoch": 0.5, "grad_norm": 0.7553216814994812, "learning_rate": 1.037245780534526e-05, "loss": 2.0472, "step": 14928 }, { "epoch": 0.5, "grad_norm": 0.7543018460273743, "learning_rate": 1.0371395647810458e-05, "loss": 2.0794, "step": 14929 }, { "epoch": 0.5, "grad_norm": 0.7642726898193359, "learning_rate": 1.0370333486079847e-05, "loss": 2.0482, "step": 14930 }, { "epoch": 0.5, "grad_norm": 0.7337827682495117, "learning_rate": 1.0369271320165428e-05, "loss": 2.0773, "step": 14931 }, { "epoch": 0.5, "grad_norm": 0.7327796220779419, "learning_rate": 1.0368209150079193e-05, "loss": 2.0768, "step": 14932 }, { "epoch": 0.5, "grad_norm": 0.7250407934188843, "learning_rate": 1.0367146975833154e-05, "loss": 2.0812, "step": 14933 }, { "epoch": 0.5, "grad_norm": 0.7569813132286072, "learning_rate": 1.0366084797439297e-05, "loss": 2.0398, "step": 14934 }, { "epoch": 0.5, "grad_norm": 0.7283141016960144, "learning_rate": 1.0365022614909635e-05, "loss": 2.0673, "step": 14935 }, { "epoch": 0.5, "grad_norm": 0.7430052757263184, "learning_rate": 1.0363960428256157e-05, "loss": 2.1804, "step": 14936 }, { "epoch": 0.5, "grad_norm": 0.7416808009147644, "learning_rate": 1.0362898237490869e-05, "loss": 2.0851, "step": 14937 }, { "epoch": 0.5, "grad_norm": 0.7201119065284729, "learning_rate": 1.0361836042625766e-05, "loss": 1.9295, "step": 14938 }, { "epoch": 0.5, "grad_norm": 0.7499935626983643, "learning_rate": 1.0360773843672856e-05, "loss": 2.0678, "step": 14939 }, { "epoch": 0.5, "grad_norm": 0.7393625378608704, "learning_rate": 1.035971164064413e-05, "loss": 2.0804, "step": 14940 }, { "epoch": 0.5, "grad_norm": 0.7280915975570679, "learning_rate": 1.0358649433551595e-05, "loss": 2.1508, "step": 14941 }, { "epoch": 0.5, "grad_norm": 0.741487443447113, "learning_rate": 1.035758722240725e-05, "loss": 2.1156, "step": 14942 }, { "epoch": 0.5, "grad_norm": 0.7153503894805908, "learning_rate": 1.0356525007223092e-05, "loss": 2.1264, "step": 14943 }, { "epoch": 0.5, "grad_norm": 0.7580947875976562, "learning_rate": 1.0355462788011128e-05, "loss": 2.0357, "step": 14944 }, { "epoch": 0.5, "grad_norm": 0.7380011081695557, "learning_rate": 1.0354400564783347e-05, "loss": 2.0876, "step": 14945 }, { "epoch": 0.5, "grad_norm": 0.73245769739151, "learning_rate": 1.035333833755176e-05, "loss": 2.0588, "step": 14946 }, { "epoch": 0.5, "grad_norm": 0.7557364106178284, "learning_rate": 1.0352276106328365e-05, "loss": 2.0769, "step": 14947 }, { "epoch": 0.5, "grad_norm": 0.7831631302833557, "learning_rate": 1.0351213871125159e-05, "loss": 2.043, "step": 14948 }, { "epoch": 0.5, "grad_norm": 0.7449649572372437, "learning_rate": 1.0350151631954144e-05, "loss": 2.1206, "step": 14949 }, { "epoch": 0.5, "grad_norm": 0.7104489803314209, "learning_rate": 1.034908938882732e-05, "loss": 1.9836, "step": 14950 }, { "epoch": 0.5, "grad_norm": 0.7336370348930359, "learning_rate": 1.0348027141756692e-05, "loss": 2.1277, "step": 14951 }, { "epoch": 0.5, "grad_norm": 0.723856508731842, "learning_rate": 1.0346964890754255e-05, "loss": 2.0523, "step": 14952 }, { "epoch": 0.5, "grad_norm": 0.7445895075798035, "learning_rate": 1.0345902635832013e-05, "loss": 2.1037, "step": 14953 }, { "epoch": 0.5, "grad_norm": 0.7457610368728638, "learning_rate": 1.0344840377001963e-05, "loss": 2.052, "step": 14954 }, { "epoch": 0.5, "grad_norm": 0.7634707093238831, "learning_rate": 1.0343778114276116e-05, "loss": 2.0986, "step": 14955 }, { "epoch": 0.5, "grad_norm": 0.7595989108085632, "learning_rate": 1.0342715847666456e-05, "loss": 2.1089, "step": 14956 }, { "epoch": 0.5, "grad_norm": 0.7370497584342957, "learning_rate": 1.0341653577185e-05, "loss": 2.1221, "step": 14957 }, { "epoch": 0.5, "grad_norm": 0.7734804749488831, "learning_rate": 1.034059130284374e-05, "loss": 2.0986, "step": 14958 }, { "epoch": 0.5, "grad_norm": 0.7678103446960449, "learning_rate": 1.0339529024654677e-05, "loss": 2.0241, "step": 14959 }, { "epoch": 0.5, "grad_norm": 0.7381793856620789, "learning_rate": 1.0338466742629816e-05, "loss": 1.9941, "step": 14960 }, { "epoch": 0.5, "grad_norm": 0.7295925617218018, "learning_rate": 1.0337404456781155e-05, "loss": 2.0989, "step": 14961 }, { "epoch": 0.5, "grad_norm": 0.7533796429634094, "learning_rate": 1.0336342167120696e-05, "loss": 2.1011, "step": 14962 }, { "epoch": 0.5, "grad_norm": 0.7278109788894653, "learning_rate": 1.0335279873660443e-05, "loss": 2.0117, "step": 14963 }, { "epoch": 0.5, "grad_norm": 0.7220146059989929, "learning_rate": 1.033421757641239e-05, "loss": 2.0747, "step": 14964 }, { "epoch": 0.5, "grad_norm": 0.7226706147193909, "learning_rate": 1.0333155275388546e-05, "loss": 2.0671, "step": 14965 }, { "epoch": 0.5, "grad_norm": 0.7375779151916504, "learning_rate": 1.0332092970600911e-05, "loss": 2.0097, "step": 14966 }, { "epoch": 0.5, "grad_norm": 0.7444968819618225, "learning_rate": 1.0331030662061479e-05, "loss": 2.1171, "step": 14967 }, { "epoch": 0.5, "grad_norm": 0.73470139503479, "learning_rate": 1.0329968349782263e-05, "loss": 2.1067, "step": 14968 }, { "epoch": 0.5, "grad_norm": 0.7416480183601379, "learning_rate": 1.0328906033775252e-05, "loss": 2.0331, "step": 14969 }, { "epoch": 0.5, "grad_norm": 0.7530362010002136, "learning_rate": 1.0327843714052456e-05, "loss": 2.0737, "step": 14970 }, { "epoch": 0.5, "grad_norm": 0.7495905756950378, "learning_rate": 1.0326781390625873e-05, "loss": 2.0987, "step": 14971 }, { "epoch": 0.5, "grad_norm": 0.7729963064193726, "learning_rate": 1.0325719063507507e-05, "loss": 2.1022, "step": 14972 }, { "epoch": 0.5, "grad_norm": 0.77906733751297, "learning_rate": 1.0324656732709355e-05, "loss": 2.1234, "step": 14973 }, { "epoch": 0.5, "grad_norm": 0.7431879043579102, "learning_rate": 1.0323594398243424e-05, "loss": 2.0721, "step": 14974 }, { "epoch": 0.5, "grad_norm": 0.78221595287323, "learning_rate": 1.0322532060121713e-05, "loss": 2.0817, "step": 14975 }, { "epoch": 0.5, "grad_norm": 0.7753394842147827, "learning_rate": 1.0321469718356221e-05, "loss": 2.0719, "step": 14976 }, { "epoch": 0.5, "grad_norm": 0.7251288294792175, "learning_rate": 1.0320407372958959e-05, "loss": 2.128, "step": 14977 }, { "epoch": 0.5, "grad_norm": 0.73030686378479, "learning_rate": 1.0319345023941914e-05, "loss": 2.0274, "step": 14978 }, { "epoch": 0.5, "grad_norm": 0.7386972308158875, "learning_rate": 1.03182826713171e-05, "loss": 2.0903, "step": 14979 }, { "epoch": 0.5, "grad_norm": 0.7418166399002075, "learning_rate": 1.0317220315096517e-05, "loss": 2.0638, "step": 14980 }, { "epoch": 0.5, "grad_norm": 0.7402293682098389, "learning_rate": 1.0316157955292162e-05, "loss": 2.0901, "step": 14981 }, { "epoch": 0.5, "grad_norm": 0.7058587074279785, "learning_rate": 1.031509559191604e-05, "loss": 2.068, "step": 14982 }, { "epoch": 0.5, "grad_norm": 0.7270570397377014, "learning_rate": 1.0314033224980154e-05, "loss": 2.0533, "step": 14983 }, { "epoch": 0.5, "grad_norm": 0.743494987487793, "learning_rate": 1.0312970854496502e-05, "loss": 2.0321, "step": 14984 }, { "epoch": 0.5, "grad_norm": 0.7592753171920776, "learning_rate": 1.031190848047709e-05, "loss": 2.016, "step": 14985 }, { "epoch": 0.5, "grad_norm": 0.772149384021759, "learning_rate": 1.0310846102933921e-05, "loss": 2.1343, "step": 14986 }, { "epoch": 0.5, "grad_norm": 0.7260019183158875, "learning_rate": 1.0309783721878992e-05, "loss": 2.0509, "step": 14987 }, { "epoch": 0.5, "grad_norm": 0.7711973786354065, "learning_rate": 1.0308721337324313e-05, "loss": 2.1454, "step": 14988 }, { "epoch": 0.5, "grad_norm": 0.7398608922958374, "learning_rate": 1.0307658949281874e-05, "loss": 2.1267, "step": 14989 }, { "epoch": 0.5, "grad_norm": 0.7645648717880249, "learning_rate": 1.0306596557763689e-05, "loss": 2.074, "step": 14990 }, { "epoch": 0.5, "grad_norm": 0.7492560148239136, "learning_rate": 1.0305534162781755e-05, "loss": 2.0244, "step": 14991 }, { "epoch": 0.5, "grad_norm": 0.7266944646835327, "learning_rate": 1.0304471764348071e-05, "loss": 2.1695, "step": 14992 }, { "epoch": 0.5, "grad_norm": 0.7242462635040283, "learning_rate": 1.030340936247465e-05, "loss": 2.0263, "step": 14993 }, { "epoch": 0.5, "grad_norm": 0.7420892119407654, "learning_rate": 1.0302346957173485e-05, "loss": 2.054, "step": 14994 }, { "epoch": 0.5, "grad_norm": 0.749206006526947, "learning_rate": 1.0301284548456583e-05, "loss": 2.0831, "step": 14995 }, { "epoch": 0.5, "grad_norm": 0.7119378447532654, "learning_rate": 1.0300222136335942e-05, "loss": 2.0203, "step": 14996 }, { "epoch": 0.5, "grad_norm": 0.7122659683227539, "learning_rate": 1.0299159720823568e-05, "loss": 2.118, "step": 14997 }, { "epoch": 0.5, "grad_norm": 0.7372711896896362, "learning_rate": 1.0298097301931463e-05, "loss": 2.0184, "step": 14998 }, { "epoch": 0.5, "grad_norm": 0.7319211959838867, "learning_rate": 1.0297034879671632e-05, "loss": 2.1168, "step": 14999 }, { "epoch": 0.5, "grad_norm": 0.7794976830482483, "learning_rate": 1.029597245405607e-05, "loss": 2.1424, "step": 15000 }, { "epoch": 0.5, "grad_norm": 0.7259954810142517, "learning_rate": 1.029491002509679e-05, "loss": 2.1461, "step": 15001 }, { "epoch": 0.5, "grad_norm": 0.7392929792404175, "learning_rate": 1.0293847592805786e-05, "loss": 2.0404, "step": 15002 }, { "epoch": 0.5, "grad_norm": 0.7564783692359924, "learning_rate": 1.0292785157195063e-05, "loss": 2.1237, "step": 15003 }, { "epoch": 0.5, "grad_norm": 0.7409192323684692, "learning_rate": 1.0291722718276626e-05, "loss": 2.0812, "step": 15004 }, { "epoch": 0.5, "grad_norm": 0.7153927087783813, "learning_rate": 1.0290660276062478e-05, "loss": 2.1209, "step": 15005 }, { "epoch": 0.5, "grad_norm": 0.7136696577072144, "learning_rate": 1.028959783056462e-05, "loss": 1.9567, "step": 15006 }, { "epoch": 0.5, "grad_norm": 0.7844505906105042, "learning_rate": 1.0288535381795055e-05, "loss": 2.0603, "step": 15007 }, { "epoch": 0.5, "grad_norm": 0.7610125541687012, "learning_rate": 1.0287472929765787e-05, "loss": 2.0848, "step": 15008 }, { "epoch": 0.5, "grad_norm": 0.7249979376792908, "learning_rate": 1.0286410474488817e-05, "loss": 2.1011, "step": 15009 }, { "epoch": 0.5, "grad_norm": 0.7571722865104675, "learning_rate": 1.0285348015976154e-05, "loss": 2.0825, "step": 15010 }, { "epoch": 0.5, "grad_norm": 0.7320692539215088, "learning_rate": 1.0284285554239788e-05, "loss": 2.0592, "step": 15011 }, { "epoch": 0.5, "grad_norm": 0.735034167766571, "learning_rate": 1.0283223089291738e-05, "loss": 2.0401, "step": 15012 }, { "epoch": 0.5, "grad_norm": 0.7413105368614197, "learning_rate": 1.0282160621143995e-05, "loss": 2.0998, "step": 15013 }, { "epoch": 0.5, "grad_norm": 0.7590155005455017, "learning_rate": 1.0281098149808566e-05, "loss": 2.0922, "step": 15014 }, { "epoch": 0.5, "grad_norm": 0.7385783195495605, "learning_rate": 1.028003567529746e-05, "loss": 2.1231, "step": 15015 }, { "epoch": 0.5, "grad_norm": 0.7663057446479797, "learning_rate": 1.0278973197622672e-05, "loss": 2.12, "step": 15016 }, { "epoch": 0.5, "grad_norm": 0.7794922590255737, "learning_rate": 1.0277910716796208e-05, "loss": 2.0075, "step": 15017 }, { "epoch": 0.5, "grad_norm": 0.8342970609664917, "learning_rate": 1.0276848232830073e-05, "loss": 2.0844, "step": 15018 }, { "epoch": 0.5, "grad_norm": 0.730302095413208, "learning_rate": 1.0275785745736267e-05, "loss": 2.0895, "step": 15019 }, { "epoch": 0.5, "grad_norm": 0.7153183817863464, "learning_rate": 1.0274723255526795e-05, "loss": 1.9688, "step": 15020 }, { "epoch": 0.5, "grad_norm": 0.7469394207000732, "learning_rate": 1.0273660762213663e-05, "loss": 2.0676, "step": 15021 }, { "epoch": 0.5, "grad_norm": 0.7591396570205688, "learning_rate": 1.0272598265808871e-05, "loss": 2.0814, "step": 15022 }, { "epoch": 0.5, "grad_norm": 0.789517343044281, "learning_rate": 1.0271535766324425e-05, "loss": 2.0854, "step": 15023 }, { "epoch": 0.5, "grad_norm": 0.7499868869781494, "learning_rate": 1.0270473263772325e-05, "loss": 2.1533, "step": 15024 }, { "epoch": 0.5, "grad_norm": 0.7245014309883118, "learning_rate": 1.0269410758164576e-05, "loss": 2.0434, "step": 15025 }, { "epoch": 0.5, "grad_norm": 0.7627546787261963, "learning_rate": 1.0268348249513185e-05, "loss": 2.0336, "step": 15026 }, { "epoch": 0.5, "grad_norm": 0.7713609337806702, "learning_rate": 1.0267285737830151e-05, "loss": 2.0851, "step": 15027 }, { "epoch": 0.5, "grad_norm": 0.7164605855941772, "learning_rate": 1.0266223223127479e-05, "loss": 2.0451, "step": 15028 }, { "epoch": 0.5, "grad_norm": 0.7042728662490845, "learning_rate": 1.0265160705417172e-05, "loss": 2.0453, "step": 15029 }, { "epoch": 0.5, "grad_norm": 0.7053623795509338, "learning_rate": 1.0264098184711235e-05, "loss": 2.1062, "step": 15030 }, { "epoch": 0.5, "grad_norm": 0.7514421343803406, "learning_rate": 1.0263035661021673e-05, "loss": 2.0617, "step": 15031 }, { "epoch": 0.5, "grad_norm": 0.6990146040916443, "learning_rate": 1.026197313436049e-05, "loss": 2.0726, "step": 15032 }, { "epoch": 0.5, "grad_norm": 0.7161879539489746, "learning_rate": 1.0260910604739685e-05, "loss": 2.0757, "step": 15033 }, { "epoch": 0.5, "grad_norm": 0.7399678230285645, "learning_rate": 1.0259848072171265e-05, "loss": 2.0967, "step": 15034 }, { "epoch": 0.5, "grad_norm": 0.714238703250885, "learning_rate": 1.0258785536667237e-05, "loss": 2.0928, "step": 15035 }, { "epoch": 0.5, "grad_norm": 0.7681207656860352, "learning_rate": 1.0257722998239596e-05, "loss": 2.002, "step": 15036 }, { "epoch": 0.5, "grad_norm": 0.7067738771438599, "learning_rate": 1.0256660456900358e-05, "loss": 2.1206, "step": 15037 }, { "epoch": 0.5, "grad_norm": 0.7332746982574463, "learning_rate": 1.0255597912661515e-05, "loss": 1.9893, "step": 15038 }, { "epoch": 0.5, "grad_norm": 0.7434923648834229, "learning_rate": 1.0254535365535082e-05, "loss": 2.0891, "step": 15039 }, { "epoch": 0.5, "grad_norm": 0.7306588292121887, "learning_rate": 1.0253472815533052e-05, "loss": 2.064, "step": 15040 }, { "epoch": 0.5, "grad_norm": 0.6879561543464661, "learning_rate": 1.0252410262667439e-05, "loss": 2.0323, "step": 15041 }, { "epoch": 0.5, "grad_norm": 0.7288201451301575, "learning_rate": 1.025134770695024e-05, "loss": 2.0726, "step": 15042 }, { "epoch": 0.5, "grad_norm": 0.7382511496543884, "learning_rate": 1.0250285148393464e-05, "loss": 2.0406, "step": 15043 }, { "epoch": 0.5, "grad_norm": 0.734129786491394, "learning_rate": 1.0249222587009111e-05, "loss": 1.9874, "step": 15044 }, { "epoch": 0.5, "grad_norm": 0.7282660603523254, "learning_rate": 1.0248160022809188e-05, "loss": 2.1071, "step": 15045 }, { "epoch": 0.5, "grad_norm": 0.742565929889679, "learning_rate": 1.0247097455805699e-05, "loss": 2.1104, "step": 15046 }, { "epoch": 0.5, "grad_norm": 0.7477833032608032, "learning_rate": 1.0246034886010647e-05, "loss": 2.0561, "step": 15047 }, { "epoch": 0.5, "grad_norm": 0.7254829406738281, "learning_rate": 1.0244972313436039e-05, "loss": 2.03, "step": 15048 }, { "epoch": 0.5, "grad_norm": 0.7322990894317627, "learning_rate": 1.0243909738093876e-05, "loss": 2.0664, "step": 15049 }, { "epoch": 0.5, "grad_norm": 0.7462686896324158, "learning_rate": 1.0242847159996165e-05, "loss": 2.0078, "step": 15050 }, { "epoch": 0.5, "grad_norm": 0.7335122227668762, "learning_rate": 1.0241784579154907e-05, "loss": 2.1686, "step": 15051 }, { "epoch": 0.5, "grad_norm": 0.7716194987297058, "learning_rate": 1.024072199558211e-05, "loss": 2.059, "step": 15052 }, { "epoch": 0.5, "grad_norm": 0.7603804469108582, "learning_rate": 1.0239659409289775e-05, "loss": 2.0512, "step": 15053 }, { "epoch": 0.5, "grad_norm": 0.7174856662750244, "learning_rate": 1.023859682028991e-05, "loss": 2.0278, "step": 15054 }, { "epoch": 0.5, "grad_norm": 0.7241097688674927, "learning_rate": 1.0237534228594519e-05, "loss": 2.0836, "step": 15055 }, { "epoch": 0.5, "grad_norm": 0.7563875317573547, "learning_rate": 1.0236471634215604e-05, "loss": 2.0814, "step": 15056 }, { "epoch": 0.5, "grad_norm": 0.7260016202926636, "learning_rate": 1.023540903716517e-05, "loss": 2.0738, "step": 15057 }, { "epoch": 0.5, "grad_norm": 0.7824963331222534, "learning_rate": 1.0234346437455225e-05, "loss": 2.0687, "step": 15058 }, { "epoch": 0.5, "grad_norm": 0.7510564923286438, "learning_rate": 1.0233283835097771e-05, "loss": 2.0284, "step": 15059 }, { "epoch": 0.5, "grad_norm": 0.751591682434082, "learning_rate": 1.023222123010481e-05, "loss": 2.0995, "step": 15060 }, { "epoch": 0.5, "grad_norm": 0.7454843521118164, "learning_rate": 1.0231158622488355e-05, "loss": 2.0902, "step": 15061 }, { "epoch": 0.5, "grad_norm": 0.7171697616577148, "learning_rate": 1.0230096012260402e-05, "loss": 2.0762, "step": 15062 }, { "epoch": 0.5, "grad_norm": 0.7329590916633606, "learning_rate": 1.0229033399432959e-05, "loss": 2.046, "step": 15063 }, { "epoch": 0.5, "grad_norm": 0.7426868677139282, "learning_rate": 1.0227970784018032e-05, "loss": 2.1133, "step": 15064 }, { "epoch": 0.5, "grad_norm": 0.7365871071815491, "learning_rate": 1.0226908166027623e-05, "loss": 2.0647, "step": 15065 }, { "epoch": 0.5, "grad_norm": 0.7472206950187683, "learning_rate": 1.0225845545473739e-05, "loss": 2.0433, "step": 15066 }, { "epoch": 0.5, "grad_norm": 0.7443026900291443, "learning_rate": 1.0224782922368384e-05, "loss": 2.098, "step": 15067 }, { "epoch": 0.5, "grad_norm": 0.7591605186462402, "learning_rate": 1.0223720296723564e-05, "loss": 2.1284, "step": 15068 }, { "epoch": 0.5, "grad_norm": 0.7183951735496521, "learning_rate": 1.0222657668551284e-05, "loss": 2.104, "step": 15069 }, { "epoch": 0.5, "grad_norm": 0.7175899147987366, "learning_rate": 1.022159503786355e-05, "loss": 2.0853, "step": 15070 }, { "epoch": 0.5, "grad_norm": 0.7538208365440369, "learning_rate": 1.0220532404672358e-05, "loss": 2.053, "step": 15071 }, { "epoch": 0.5, "grad_norm": 0.7765169739723206, "learning_rate": 1.0219469768989726e-05, "loss": 2.0836, "step": 15072 }, { "epoch": 0.5, "grad_norm": 0.7378482222557068, "learning_rate": 1.0218407130827655e-05, "loss": 2.0727, "step": 15073 }, { "epoch": 0.5, "grad_norm": 0.7468685507774353, "learning_rate": 1.0217344490198143e-05, "loss": 2.1315, "step": 15074 }, { "epoch": 0.5, "grad_norm": 0.7393693327903748, "learning_rate": 1.0216281847113202e-05, "loss": 2.0738, "step": 15075 }, { "epoch": 0.5, "grad_norm": 0.7757687568664551, "learning_rate": 1.0215219201584836e-05, "loss": 2.1222, "step": 15076 }, { "epoch": 0.5, "grad_norm": 0.7412993311882019, "learning_rate": 1.021415655362505e-05, "loss": 2.113, "step": 15077 }, { "epoch": 0.5, "grad_norm": 0.7452883124351501, "learning_rate": 1.0213093903245848e-05, "loss": 2.0894, "step": 15078 }, { "epoch": 0.5, "grad_norm": 0.7222248315811157, "learning_rate": 1.0212031250459236e-05, "loss": 2.1022, "step": 15079 }, { "epoch": 0.5, "grad_norm": 0.7397889494895935, "learning_rate": 1.021096859527722e-05, "loss": 2.0902, "step": 15080 }, { "epoch": 0.5, "grad_norm": 0.7630834579467773, "learning_rate": 1.0209905937711806e-05, "loss": 2.0848, "step": 15081 }, { "epoch": 0.5, "grad_norm": 0.7141994833946228, "learning_rate": 1.020884327777499e-05, "loss": 2.0776, "step": 15082 }, { "epoch": 0.5, "grad_norm": 0.7081262469291687, "learning_rate": 1.0207780615478794e-05, "loss": 2.1083, "step": 15083 }, { "epoch": 0.5, "grad_norm": 0.7765079736709595, "learning_rate": 1.0206717950835212e-05, "loss": 2.0279, "step": 15084 }, { "epoch": 0.5, "grad_norm": 0.7433781623840332, "learning_rate": 1.0205655283856251e-05, "loss": 2.0705, "step": 15085 }, { "epoch": 0.5, "grad_norm": 0.7235702276229858, "learning_rate": 1.0204592614553917e-05, "loss": 2.0715, "step": 15086 }, { "epoch": 0.5, "grad_norm": 0.7098559141159058, "learning_rate": 1.0203529942940214e-05, "loss": 2.0702, "step": 15087 }, { "epoch": 0.5, "grad_norm": 0.7294248342514038, "learning_rate": 1.020246726902715e-05, "loss": 2.0861, "step": 15088 }, { "epoch": 0.5, "grad_norm": 0.7264123558998108, "learning_rate": 1.020140459282673e-05, "loss": 2.0442, "step": 15089 }, { "epoch": 0.5, "grad_norm": 0.7129672169685364, "learning_rate": 1.020034191435096e-05, "loss": 2.0229, "step": 15090 }, { "epoch": 0.5, "grad_norm": 0.7302761673927307, "learning_rate": 1.0199279233611843e-05, "loss": 2.0656, "step": 15091 }, { "epoch": 0.5, "grad_norm": 0.7293055653572083, "learning_rate": 1.0198216550621388e-05, "loss": 2.0566, "step": 15092 }, { "epoch": 0.5, "grad_norm": 0.7157041430473328, "learning_rate": 1.0197153865391593e-05, "loss": 2.1004, "step": 15093 }, { "epoch": 0.5, "grad_norm": 0.765071451663971, "learning_rate": 1.0196091177934476e-05, "loss": 2.1405, "step": 15094 }, { "epoch": 0.5, "grad_norm": 0.7433205246925354, "learning_rate": 1.0195028488262034e-05, "loss": 2.0846, "step": 15095 }, { "epoch": 0.5, "grad_norm": 0.7083059549331665, "learning_rate": 1.0193965796386271e-05, "loss": 2.0481, "step": 15096 }, { "epoch": 0.5, "grad_norm": 0.7131375670433044, "learning_rate": 1.0192903102319198e-05, "loss": 2.0603, "step": 15097 }, { "epoch": 0.5, "grad_norm": 0.7157630920410156, "learning_rate": 1.019184040607282e-05, "loss": 2.0362, "step": 15098 }, { "epoch": 0.5, "grad_norm": 0.7088899612426758, "learning_rate": 1.019077770765914e-05, "loss": 2.0728, "step": 15099 }, { "epoch": 0.5, "grad_norm": 0.733696460723877, "learning_rate": 1.0189715007090167e-05, "loss": 2.0708, "step": 15100 }, { "epoch": 0.5, "grad_norm": 0.758601725101471, "learning_rate": 1.0188652304377901e-05, "loss": 2.0514, "step": 15101 }, { "epoch": 0.5, "grad_norm": 0.7484540939331055, "learning_rate": 1.0187589599534356e-05, "loss": 2.0157, "step": 15102 }, { "epoch": 0.5, "grad_norm": 0.7373932003974915, "learning_rate": 1.0186526892571535e-05, "loss": 2.0475, "step": 15103 }, { "epoch": 0.5, "grad_norm": 0.7213883399963379, "learning_rate": 1.0185464183501437e-05, "loss": 2.1162, "step": 15104 }, { "epoch": 0.5, "grad_norm": 0.7763431072235107, "learning_rate": 1.0184401472336078e-05, "loss": 2.0045, "step": 15105 }, { "epoch": 0.5, "grad_norm": 0.7215366363525391, "learning_rate": 1.0183338759087458e-05, "loss": 2.0769, "step": 15106 }, { "epoch": 0.5, "grad_norm": 0.7565811276435852, "learning_rate": 1.018227604376758e-05, "loss": 2.1016, "step": 15107 }, { "epoch": 0.5, "grad_norm": 0.7278676629066467, "learning_rate": 1.0181213326388461e-05, "loss": 2.1173, "step": 15108 }, { "epoch": 0.5, "grad_norm": 0.7293820381164551, "learning_rate": 1.0180150606962097e-05, "loss": 2.0596, "step": 15109 }, { "epoch": 0.5, "grad_norm": 0.7558550834655762, "learning_rate": 1.0179087885500496e-05, "loss": 2.1054, "step": 15110 }, { "epoch": 0.5, "grad_norm": 0.7702699303627014, "learning_rate": 1.0178025162015666e-05, "loss": 2.0019, "step": 15111 }, { "epoch": 0.5, "grad_norm": 0.734424889087677, "learning_rate": 1.0176962436519612e-05, "loss": 2.0816, "step": 15112 }, { "epoch": 0.5, "grad_norm": 0.7536603212356567, "learning_rate": 1.0175899709024339e-05, "loss": 2.1009, "step": 15113 }, { "epoch": 0.5, "grad_norm": 0.7323282957077026, "learning_rate": 1.0174836979541858e-05, "loss": 2.1001, "step": 15114 }, { "epoch": 0.5, "grad_norm": 0.7491731643676758, "learning_rate": 1.0173774248084164e-05, "loss": 2.101, "step": 15115 }, { "epoch": 0.5, "grad_norm": 0.7597584128379822, "learning_rate": 1.0172711514663279e-05, "loss": 2.0307, "step": 15116 }, { "epoch": 0.5, "grad_norm": 0.7628453969955444, "learning_rate": 1.0171648779291197e-05, "loss": 2.1231, "step": 15117 }, { "epoch": 0.5, "grad_norm": 0.7514697909355164, "learning_rate": 1.0170586041979924e-05, "loss": 2.1547, "step": 15118 }, { "epoch": 0.5, "grad_norm": 0.7454550862312317, "learning_rate": 1.0169523302741476e-05, "loss": 2.0855, "step": 15119 }, { "epoch": 0.5, "grad_norm": 0.7269362211227417, "learning_rate": 1.0168460561587848e-05, "loss": 2.046, "step": 15120 }, { "epoch": 0.5, "grad_norm": 0.7522753477096558, "learning_rate": 1.0167397818531053e-05, "loss": 2.088, "step": 15121 }, { "epoch": 0.5, "grad_norm": 0.721161961555481, "learning_rate": 1.0166335073583096e-05, "loss": 2.0585, "step": 15122 }, { "epoch": 0.5, "grad_norm": 0.7396345138549805, "learning_rate": 1.016527232675598e-05, "loss": 2.0434, "step": 15123 }, { "epoch": 0.5, "grad_norm": 0.7660729885101318, "learning_rate": 1.0164209578061719e-05, "loss": 2.1389, "step": 15124 }, { "epoch": 0.5, "grad_norm": 0.7383466362953186, "learning_rate": 1.0163146827512314e-05, "loss": 2.0484, "step": 15125 }, { "epoch": 0.5, "grad_norm": 0.7397867441177368, "learning_rate": 1.016208407511977e-05, "loss": 2.1103, "step": 15126 }, { "epoch": 0.5, "grad_norm": 0.7015363574028015, "learning_rate": 1.0161021320896097e-05, "loss": 2.0816, "step": 15127 }, { "epoch": 0.5, "grad_norm": 0.7294539213180542, "learning_rate": 1.0159958564853297e-05, "loss": 2.0564, "step": 15128 }, { "epoch": 0.5, "grad_norm": 0.7421606183052063, "learning_rate": 1.0158895807003375e-05, "loss": 2.0636, "step": 15129 }, { "epoch": 0.5, "grad_norm": 0.7420483827590942, "learning_rate": 1.0157833047358347e-05, "loss": 2.1263, "step": 15130 }, { "epoch": 0.5, "grad_norm": 0.738120436668396, "learning_rate": 1.015677028593021e-05, "loss": 2.0523, "step": 15131 }, { "epoch": 0.5, "grad_norm": 0.732755184173584, "learning_rate": 1.015570752273098e-05, "loss": 2.0664, "step": 15132 }, { "epoch": 0.5, "grad_norm": 0.7340903282165527, "learning_rate": 1.0154644757772654e-05, "loss": 2.1405, "step": 15133 }, { "epoch": 0.5, "grad_norm": 0.7227667570114136, "learning_rate": 1.0153581991067243e-05, "loss": 2.0689, "step": 15134 }, { "epoch": 0.5, "grad_norm": 0.7309543490409851, "learning_rate": 1.015251922262675e-05, "loss": 2.0788, "step": 15135 }, { "epoch": 0.5, "grad_norm": 0.7164747714996338, "learning_rate": 1.0151456452463192e-05, "loss": 2.0419, "step": 15136 }, { "epoch": 0.5, "grad_norm": 0.773080587387085, "learning_rate": 1.0150393680588557e-05, "loss": 2.025, "step": 15137 }, { "epoch": 0.5, "grad_norm": 0.758573591709137, "learning_rate": 1.0149330907014867e-05, "loss": 2.0936, "step": 15138 }, { "epoch": 0.5, "grad_norm": 0.7612324357032776, "learning_rate": 1.0148268131754125e-05, "loss": 2.0881, "step": 15139 }, { "epoch": 0.5, "grad_norm": 0.7490122318267822, "learning_rate": 1.0147205354818334e-05, "loss": 1.9939, "step": 15140 }, { "epoch": 0.5, "grad_norm": 0.7673269510269165, "learning_rate": 1.0146142576219508e-05, "loss": 2.0337, "step": 15141 }, { "epoch": 0.5, "grad_norm": 0.7624891400337219, "learning_rate": 1.0145079795969644e-05, "loss": 2.0566, "step": 15142 }, { "epoch": 0.5, "grad_norm": 0.718422532081604, "learning_rate": 1.0144017014080759e-05, "loss": 2.0209, "step": 15143 }, { "epoch": 0.5, "grad_norm": 0.728847086429596, "learning_rate": 1.014295423056485e-05, "loss": 2.0575, "step": 15144 }, { "epoch": 0.5, "grad_norm": 0.7390760779380798, "learning_rate": 1.0141891445433926e-05, "loss": 2.0314, "step": 15145 }, { "epoch": 0.5, "grad_norm": 0.7592571973800659, "learning_rate": 1.0140828658699999e-05, "loss": 1.996, "step": 15146 }, { "epoch": 0.5, "grad_norm": 0.7184703350067139, "learning_rate": 1.0139765870375071e-05, "loss": 2.061, "step": 15147 }, { "epoch": 0.5, "grad_norm": 0.7111548781394958, "learning_rate": 1.013870308047115e-05, "loss": 2.0621, "step": 15148 }, { "epoch": 0.5, "grad_norm": 0.7391053438186646, "learning_rate": 1.0137640289000244e-05, "loss": 2.0417, "step": 15149 }, { "epoch": 0.5, "grad_norm": 0.7320195436477661, "learning_rate": 1.0136577495974358e-05, "loss": 2.0852, "step": 15150 }, { "epoch": 0.5, "grad_norm": 0.7583122849464417, "learning_rate": 1.01355147014055e-05, "loss": 2.0752, "step": 15151 }, { "epoch": 0.5, "grad_norm": 0.7353431582450867, "learning_rate": 1.0134451905305679e-05, "loss": 2.0692, "step": 15152 }, { "epoch": 0.5, "grad_norm": 0.7522952556610107, "learning_rate": 1.0133389107686894e-05, "loss": 2.0474, "step": 15153 }, { "epoch": 0.5, "grad_norm": 0.7363624572753906, "learning_rate": 1.013232630856116e-05, "loss": 2.0796, "step": 15154 }, { "epoch": 0.5, "grad_norm": 0.7328789234161377, "learning_rate": 1.0131263507940479e-05, "loss": 2.0889, "step": 15155 }, { "epoch": 0.5, "grad_norm": 0.7326347231864929, "learning_rate": 1.0130200705836861e-05, "loss": 2.1141, "step": 15156 }, { "epoch": 0.5, "grad_norm": 0.7435540556907654, "learning_rate": 1.0129137902262311e-05, "loss": 2.0389, "step": 15157 }, { "epoch": 0.5, "grad_norm": 0.719172477722168, "learning_rate": 1.0128075097228837e-05, "loss": 2.1025, "step": 15158 }, { "epoch": 0.5, "grad_norm": 0.7488683462142944, "learning_rate": 1.0127012290748446e-05, "loss": 2.1098, "step": 15159 }, { "epoch": 0.5, "grad_norm": 0.7347745299339294, "learning_rate": 1.0125949482833144e-05, "loss": 2.0382, "step": 15160 }, { "epoch": 0.5, "grad_norm": 0.718937337398529, "learning_rate": 1.0124886673494938e-05, "loss": 2.0751, "step": 15161 }, { "epoch": 0.5, "grad_norm": 0.7688394784927368, "learning_rate": 1.0123823862745836e-05, "loss": 2.0577, "step": 15162 }, { "epoch": 0.5, "grad_norm": 0.7382035851478577, "learning_rate": 1.012276105059785e-05, "loss": 2.1334, "step": 15163 }, { "epoch": 0.5, "grad_norm": 0.7622292041778564, "learning_rate": 1.0121698237062973e-05, "loss": 2.0474, "step": 15164 }, { "epoch": 0.5, "grad_norm": 0.7671157717704773, "learning_rate": 1.0120635422153227e-05, "loss": 2.1028, "step": 15165 }, { "epoch": 0.5, "grad_norm": 0.7590948343276978, "learning_rate": 1.0119572605880608e-05, "loss": 2.0489, "step": 15166 }, { "epoch": 0.5, "grad_norm": 0.7478145360946655, "learning_rate": 1.0118509788257129e-05, "loss": 2.0816, "step": 15167 }, { "epoch": 0.5, "grad_norm": 0.7441771626472473, "learning_rate": 1.0117446969294797e-05, "loss": 2.1215, "step": 15168 }, { "epoch": 0.5, "grad_norm": 0.7434021234512329, "learning_rate": 1.0116384149005618e-05, "loss": 2.0326, "step": 15169 }, { "epoch": 0.5, "grad_norm": 0.7349259853363037, "learning_rate": 1.0115321327401599e-05, "loss": 2.0524, "step": 15170 }, { "epoch": 0.5, "grad_norm": 0.7186653017997742, "learning_rate": 1.0114258504494747e-05, "loss": 2.0354, "step": 15171 }, { "epoch": 0.5, "grad_norm": 0.7231743931770325, "learning_rate": 1.0113195680297068e-05, "loss": 2.1182, "step": 15172 }, { "epoch": 0.5, "grad_norm": 0.7351239919662476, "learning_rate": 1.0112132854820573e-05, "loss": 2.1352, "step": 15173 }, { "epoch": 0.5, "grad_norm": 0.7919723391532898, "learning_rate": 1.0111070028077267e-05, "loss": 2.0923, "step": 15174 }, { "epoch": 0.5, "grad_norm": 0.7534822225570679, "learning_rate": 1.0110007200079152e-05, "loss": 2.0735, "step": 15175 }, { "epoch": 0.5, "grad_norm": 0.7378916144371033, "learning_rate": 1.0108944370838247e-05, "loss": 2.0864, "step": 15176 }, { "epoch": 0.5, "grad_norm": 0.7237681150436401, "learning_rate": 1.0107881540366549e-05, "loss": 2.0669, "step": 15177 }, { "epoch": 0.5, "grad_norm": 0.7312081456184387, "learning_rate": 1.0106818708676067e-05, "loss": 1.9855, "step": 15178 }, { "epoch": 0.51, "grad_norm": 0.7438679933547974, "learning_rate": 1.0105755875778814e-05, "loss": 2.0171, "step": 15179 }, { "epoch": 0.51, "grad_norm": 0.7269994616508484, "learning_rate": 1.0104693041686788e-05, "loss": 2.0821, "step": 15180 }, { "epoch": 0.51, "grad_norm": 0.7123048901557922, "learning_rate": 1.0103630206412005e-05, "loss": 2.1417, "step": 15181 }, { "epoch": 0.51, "grad_norm": 0.7697892189025879, "learning_rate": 1.0102567369966466e-05, "loss": 1.9883, "step": 15182 }, { "epoch": 0.51, "grad_norm": 0.7447414398193359, "learning_rate": 1.0101504532362183e-05, "loss": 2.0633, "step": 15183 }, { "epoch": 0.51, "grad_norm": 0.7448007464408875, "learning_rate": 1.010044169361116e-05, "loss": 2.0827, "step": 15184 }, { "epoch": 0.51, "grad_norm": 0.7365018725395203, "learning_rate": 1.009937885372541e-05, "loss": 2.0613, "step": 15185 }, { "epoch": 0.51, "grad_norm": 0.7608380317687988, "learning_rate": 1.0098316012716929e-05, "loss": 2.0395, "step": 15186 }, { "epoch": 0.51, "grad_norm": 0.7518879771232605, "learning_rate": 1.0097253170597737e-05, "loss": 1.9853, "step": 15187 }, { "epoch": 0.51, "grad_norm": 0.7416049242019653, "learning_rate": 1.0096190327379833e-05, "loss": 2.1062, "step": 15188 }, { "epoch": 0.51, "grad_norm": 0.721646249294281, "learning_rate": 1.0095127483075226e-05, "loss": 2.1055, "step": 15189 }, { "epoch": 0.51, "grad_norm": 0.7360051870346069, "learning_rate": 1.0094064637695926e-05, "loss": 2.1329, "step": 15190 }, { "epoch": 0.51, "grad_norm": 0.7046104669570923, "learning_rate": 1.0093001791253938e-05, "loss": 2.057, "step": 15191 }, { "epoch": 0.51, "grad_norm": 0.7525282502174377, "learning_rate": 1.0091938943761272e-05, "loss": 2.0985, "step": 15192 }, { "epoch": 0.51, "grad_norm": 0.7234799861907959, "learning_rate": 1.0090876095229932e-05, "loss": 2.0857, "step": 15193 }, { "epoch": 0.51, "grad_norm": 0.7559448480606079, "learning_rate": 1.0089813245671928e-05, "loss": 2.0604, "step": 15194 }, { "epoch": 0.51, "grad_norm": 0.7281912565231323, "learning_rate": 1.0088750395099268e-05, "loss": 2.0901, "step": 15195 }, { "epoch": 0.51, "grad_norm": 0.7190815806388855, "learning_rate": 1.0087687543523957e-05, "loss": 2.0004, "step": 15196 }, { "epoch": 0.51, "grad_norm": 0.7349309921264648, "learning_rate": 1.0086624690958e-05, "loss": 2.0891, "step": 15197 }, { "epoch": 0.51, "grad_norm": 0.7298219799995422, "learning_rate": 1.0085561837413413e-05, "loss": 2.0898, "step": 15198 }, { "epoch": 0.51, "grad_norm": 0.7425423264503479, "learning_rate": 1.0084498982902195e-05, "loss": 2.0662, "step": 15199 }, { "epoch": 0.51, "grad_norm": 0.7731768488883972, "learning_rate": 1.0083436127436359e-05, "loss": 2.0929, "step": 15200 }, { "epoch": 0.51, "grad_norm": 0.7267821431159973, "learning_rate": 1.008237327102791e-05, "loss": 2.1465, "step": 15201 }, { "epoch": 0.51, "grad_norm": 0.7411586046218872, "learning_rate": 1.0081310413688855e-05, "loss": 1.9804, "step": 15202 }, { "epoch": 0.51, "grad_norm": 0.7393783926963806, "learning_rate": 1.0080247555431204e-05, "loss": 2.075, "step": 15203 }, { "epoch": 0.51, "grad_norm": 0.7609144449234009, "learning_rate": 1.0079184696266964e-05, "loss": 2.0835, "step": 15204 }, { "epoch": 0.51, "grad_norm": 0.7264783382415771, "learning_rate": 1.0078121836208139e-05, "loss": 2.0801, "step": 15205 }, { "epoch": 0.51, "grad_norm": 0.7539568543434143, "learning_rate": 1.007705897526674e-05, "loss": 2.0123, "step": 15206 }, { "epoch": 0.51, "grad_norm": 0.7428175210952759, "learning_rate": 1.0075996113454778e-05, "loss": 2.0255, "step": 15207 }, { "epoch": 0.51, "grad_norm": 0.7531280517578125, "learning_rate": 1.0074933250784251e-05, "loss": 2.1658, "step": 15208 }, { "epoch": 0.51, "grad_norm": 0.7306881546974182, "learning_rate": 1.0073870387267175e-05, "loss": 2.1391, "step": 15209 }, { "epoch": 0.51, "grad_norm": 0.7184191942214966, "learning_rate": 1.0072807522915555e-05, "loss": 2.0463, "step": 15210 }, { "epoch": 0.51, "grad_norm": 0.7459871172904968, "learning_rate": 1.0071744657741393e-05, "loss": 2.0882, "step": 15211 }, { "epoch": 0.51, "grad_norm": 0.7540646195411682, "learning_rate": 1.0070681791756708e-05, "loss": 2.1714, "step": 15212 }, { "epoch": 0.51, "grad_norm": 0.7392327189445496, "learning_rate": 1.00696189249735e-05, "loss": 2.035, "step": 15213 }, { "epoch": 0.51, "grad_norm": 0.7537505626678467, "learning_rate": 1.0068556057403777e-05, "loss": 2.0894, "step": 15214 }, { "epoch": 0.51, "grad_norm": 0.7503833174705505, "learning_rate": 1.006749318905955e-05, "loss": 2.05, "step": 15215 }, { "epoch": 0.51, "grad_norm": 0.7317917346954346, "learning_rate": 1.0066430319952823e-05, "loss": 2.0767, "step": 15216 }, { "epoch": 0.51, "grad_norm": 0.7461367845535278, "learning_rate": 1.0065367450095605e-05, "loss": 2.0301, "step": 15217 }, { "epoch": 0.51, "grad_norm": 0.740315318107605, "learning_rate": 1.0064304579499905e-05, "loss": 2.0266, "step": 15218 }, { "epoch": 0.51, "grad_norm": 0.742792010307312, "learning_rate": 1.0063241708177726e-05, "loss": 2.115, "step": 15219 }, { "epoch": 0.51, "grad_norm": 0.7185328006744385, "learning_rate": 1.0062178836141083e-05, "loss": 2.1014, "step": 15220 }, { "epoch": 0.51, "grad_norm": 0.7664076685905457, "learning_rate": 1.006111596340198e-05, "loss": 2.0612, "step": 15221 }, { "epoch": 0.51, "grad_norm": 0.7740065455436707, "learning_rate": 1.0060053089972421e-05, "loss": 2.0792, "step": 15222 }, { "epoch": 0.51, "grad_norm": 0.732048511505127, "learning_rate": 1.0058990215864421e-05, "loss": 2.1611, "step": 15223 }, { "epoch": 0.51, "grad_norm": 0.72318434715271, "learning_rate": 1.0057927341089984e-05, "loss": 2.0056, "step": 15224 }, { "epoch": 0.51, "grad_norm": 0.754892885684967, "learning_rate": 1.0056864465661116e-05, "loss": 2.0849, "step": 15225 }, { "epoch": 0.51, "grad_norm": 0.7374746203422546, "learning_rate": 1.0055801589589826e-05, "loss": 2.0613, "step": 15226 }, { "epoch": 0.51, "grad_norm": 0.7219117283821106, "learning_rate": 1.0054738712888125e-05, "loss": 2.0746, "step": 15227 }, { "epoch": 0.51, "grad_norm": 0.7901682257652283, "learning_rate": 1.0053675835568017e-05, "loss": 2.098, "step": 15228 }, { "epoch": 0.51, "grad_norm": 0.7302582859992981, "learning_rate": 1.0052612957641512e-05, "loss": 2.0974, "step": 15229 }, { "epoch": 0.51, "grad_norm": 0.7356825470924377, "learning_rate": 1.0051550079120613e-05, "loss": 2.0163, "step": 15230 }, { "epoch": 0.51, "grad_norm": 0.7401964664459229, "learning_rate": 1.0050487200017336e-05, "loss": 2.0496, "step": 15231 }, { "epoch": 0.51, "grad_norm": 0.7328833937644958, "learning_rate": 1.004942432034368e-05, "loss": 2.0699, "step": 15232 }, { "epoch": 0.51, "grad_norm": 0.739920973777771, "learning_rate": 1.0048361440111659e-05, "loss": 2.1049, "step": 15233 }, { "epoch": 0.51, "grad_norm": 0.7351012825965881, "learning_rate": 1.0047298559333281e-05, "loss": 2.0481, "step": 15234 }, { "epoch": 0.51, "grad_norm": 0.7253711819648743, "learning_rate": 1.0046235678020546e-05, "loss": 2.0576, "step": 15235 }, { "epoch": 0.51, "grad_norm": 0.7943901419639587, "learning_rate": 1.0045172796185473e-05, "loss": 2.0545, "step": 15236 }, { "epoch": 0.51, "grad_norm": 0.7487738132476807, "learning_rate": 1.0044109913840061e-05, "loss": 2.0827, "step": 15237 }, { "epoch": 0.51, "grad_norm": 0.7283720970153809, "learning_rate": 1.0043047030996322e-05, "loss": 2.0863, "step": 15238 }, { "epoch": 0.51, "grad_norm": 0.7430052161216736, "learning_rate": 1.0041984147666263e-05, "loss": 2.0745, "step": 15239 }, { "epoch": 0.51, "grad_norm": 0.7278792262077332, "learning_rate": 1.0040921263861891e-05, "loss": 2.1126, "step": 15240 }, { "epoch": 0.51, "grad_norm": 0.7549926042556763, "learning_rate": 1.0039858379595215e-05, "loss": 2.0238, "step": 15241 }, { "epoch": 0.51, "grad_norm": 0.7231711745262146, "learning_rate": 1.0038795494878246e-05, "loss": 2.0809, "step": 15242 }, { "epoch": 0.51, "grad_norm": 0.7544825077056885, "learning_rate": 1.003773260972298e-05, "loss": 2.1058, "step": 15243 }, { "epoch": 0.51, "grad_norm": 0.7193359136581421, "learning_rate": 1.0036669724141438e-05, "loss": 2.0645, "step": 15244 }, { "epoch": 0.51, "grad_norm": 0.724247395992279, "learning_rate": 1.0035606838145626e-05, "loss": 2.0784, "step": 15245 }, { "epoch": 0.51, "grad_norm": 0.7380356788635254, "learning_rate": 1.0034543951747544e-05, "loss": 2.0535, "step": 15246 }, { "epoch": 0.51, "grad_norm": 0.7263193130493164, "learning_rate": 1.0033481064959207e-05, "loss": 2.0935, "step": 15247 }, { "epoch": 0.51, "grad_norm": 0.8255354166030884, "learning_rate": 1.003241817779262e-05, "loss": 2.1038, "step": 15248 }, { "epoch": 0.51, "grad_norm": 0.7594192624092102, "learning_rate": 1.0031355290259792e-05, "loss": 2.0508, "step": 15249 }, { "epoch": 0.51, "grad_norm": 0.7299487590789795, "learning_rate": 1.003029240237273e-05, "loss": 2.1225, "step": 15250 }, { "epoch": 0.51, "grad_norm": 0.7273380160331726, "learning_rate": 1.0029229514143442e-05, "loss": 2.072, "step": 15251 }, { "epoch": 0.51, "grad_norm": 0.7191823124885559, "learning_rate": 1.0028166625583936e-05, "loss": 2.0998, "step": 15252 }, { "epoch": 0.51, "grad_norm": 0.7502151727676392, "learning_rate": 1.0027103736706219e-05, "loss": 2.0606, "step": 15253 }, { "epoch": 0.51, "grad_norm": 0.7763063311576843, "learning_rate": 1.00260408475223e-05, "loss": 2.0743, "step": 15254 }, { "epoch": 0.51, "grad_norm": 0.7186775207519531, "learning_rate": 1.0024977958044186e-05, "loss": 2.088, "step": 15255 }, { "epoch": 0.51, "grad_norm": 0.7211713194847107, "learning_rate": 1.002391506828389e-05, "loss": 2.0633, "step": 15256 }, { "epoch": 0.51, "grad_norm": 0.7242795825004578, "learning_rate": 1.002285217825341e-05, "loss": 2.0531, "step": 15257 }, { "epoch": 0.51, "grad_norm": 0.743888795375824, "learning_rate": 1.0021789287964766e-05, "loss": 2.1296, "step": 15258 }, { "epoch": 0.51, "grad_norm": 0.7710480093955994, "learning_rate": 1.0020726397429954e-05, "loss": 2.103, "step": 15259 }, { "epoch": 0.51, "grad_norm": 0.7453544735908508, "learning_rate": 1.0019663506660988e-05, "loss": 2.1141, "step": 15260 }, { "epoch": 0.51, "grad_norm": 0.7427123785018921, "learning_rate": 1.0018600615669878e-05, "loss": 2.0837, "step": 15261 }, { "epoch": 0.51, "grad_norm": 0.7413340210914612, "learning_rate": 1.0017537724468626e-05, "loss": 2.0458, "step": 15262 }, { "epoch": 0.51, "grad_norm": 0.7319753766059875, "learning_rate": 1.0016474833069245e-05, "loss": 2.0056, "step": 15263 }, { "epoch": 0.51, "grad_norm": 0.7666097283363342, "learning_rate": 1.0015411941483739e-05, "loss": 2.0842, "step": 15264 }, { "epoch": 0.51, "grad_norm": 0.7362604737281799, "learning_rate": 1.001434904972412e-05, "loss": 2.0875, "step": 15265 }, { "epoch": 0.51, "grad_norm": 0.7272161841392517, "learning_rate": 1.0013286157802393e-05, "loss": 2.0756, "step": 15266 }, { "epoch": 0.51, "grad_norm": 0.7550996541976929, "learning_rate": 1.0012223265730568e-05, "loss": 2.0467, "step": 15267 }, { "epoch": 0.51, "grad_norm": 0.7245338559150696, "learning_rate": 1.0011160373520648e-05, "loss": 2.0044, "step": 15268 }, { "epoch": 0.51, "grad_norm": 0.7105261087417603, "learning_rate": 1.0010097481184648e-05, "loss": 2.0823, "step": 15269 }, { "epoch": 0.51, "grad_norm": 0.7497519254684448, "learning_rate": 1.0009034588734575e-05, "loss": 2.0664, "step": 15270 }, { "epoch": 0.51, "grad_norm": 0.724078357219696, "learning_rate": 1.0007971696182431e-05, "loss": 2.0628, "step": 15271 }, { "epoch": 0.51, "grad_norm": 0.7513481378555298, "learning_rate": 1.0006908803540225e-05, "loss": 2.0779, "step": 15272 }, { "epoch": 0.51, "grad_norm": 0.7692030072212219, "learning_rate": 1.0005845910819971e-05, "loss": 2.0697, "step": 15273 }, { "epoch": 0.51, "grad_norm": 0.7543445825576782, "learning_rate": 1.0004783018033673e-05, "loss": 2.1127, "step": 15274 }, { "epoch": 0.51, "grad_norm": 0.7230091094970703, "learning_rate": 1.0003720125193337e-05, "loss": 2.1016, "step": 15275 }, { "epoch": 0.51, "grad_norm": 0.6973388195037842, "learning_rate": 1.0002657232310975e-05, "loss": 2.0636, "step": 15276 }, { "epoch": 0.51, "grad_norm": 0.7234853506088257, "learning_rate": 1.0001594339398593e-05, "loss": 2.058, "step": 15277 }, { "epoch": 0.51, "grad_norm": 0.7300195097923279, "learning_rate": 1.0000531446468202e-05, "loss": 1.999, "step": 15278 }, { "epoch": 0.51, "grad_norm": 0.7298647165298462, "learning_rate": 9.999468553531801e-06, "loss": 2.0329, "step": 15279 }, { "epoch": 0.51, "grad_norm": 0.7457165718078613, "learning_rate": 9.998405660601407e-06, "loss": 2.0123, "step": 15280 }, { "epoch": 0.51, "grad_norm": 0.7942795157432556, "learning_rate": 9.997342767689028e-06, "loss": 2.0454, "step": 15281 }, { "epoch": 0.51, "grad_norm": 0.7216944694519043, "learning_rate": 9.996279874806665e-06, "loss": 2.0531, "step": 15282 }, { "epoch": 0.51, "grad_norm": 0.7550262808799744, "learning_rate": 9.99521698196633e-06, "loss": 2.1393, "step": 15283 }, { "epoch": 0.51, "grad_norm": 0.732352614402771, "learning_rate": 9.99415408918003e-06, "loss": 2.0892, "step": 15284 }, { "epoch": 0.51, "grad_norm": 0.7159614562988281, "learning_rate": 9.993091196459774e-06, "loss": 2.0502, "step": 15285 }, { "epoch": 0.51, "grad_norm": 0.7350059747695923, "learning_rate": 9.992028303817576e-06, "loss": 2.0457, "step": 15286 }, { "epoch": 0.51, "grad_norm": 0.7518167495727539, "learning_rate": 9.99096541126543e-06, "loss": 2.0672, "step": 15287 }, { "epoch": 0.51, "grad_norm": 0.7341758012771606, "learning_rate": 9.989902518815354e-06, "loss": 2.0402, "step": 15288 }, { "epoch": 0.51, "grad_norm": 0.7129610180854797, "learning_rate": 9.988839626479352e-06, "loss": 2.0887, "step": 15289 }, { "epoch": 0.51, "grad_norm": 0.7049269676208496, "learning_rate": 9.987776734269437e-06, "loss": 1.9986, "step": 15290 }, { "epoch": 0.51, "grad_norm": 0.7403312921524048, "learning_rate": 9.98671384219761e-06, "loss": 2.0856, "step": 15291 }, { "epoch": 0.51, "grad_norm": 0.7347792387008667, "learning_rate": 9.985650950275884e-06, "loss": 2.0887, "step": 15292 }, { "epoch": 0.51, "grad_norm": 0.7298508882522583, "learning_rate": 9.984588058516261e-06, "loss": 2.0803, "step": 15293 }, { "epoch": 0.51, "grad_norm": 0.7205401659011841, "learning_rate": 9.983525166930762e-06, "loss": 2.0711, "step": 15294 }, { "epoch": 0.51, "grad_norm": 0.7332109808921814, "learning_rate": 9.982462275531377e-06, "loss": 2.013, "step": 15295 }, { "epoch": 0.51, "grad_norm": 0.7301912307739258, "learning_rate": 9.981399384330125e-06, "loss": 2.0295, "step": 15296 }, { "epoch": 0.51, "grad_norm": 0.7130862474441528, "learning_rate": 9.980336493339014e-06, "loss": 2.0465, "step": 15297 }, { "epoch": 0.51, "grad_norm": 0.7575035095214844, "learning_rate": 9.979273602570049e-06, "loss": 2.1012, "step": 15298 }, { "epoch": 0.51, "grad_norm": 0.7416663765907288, "learning_rate": 9.97821071203524e-06, "loss": 1.9937, "step": 15299 }, { "epoch": 0.51, "grad_norm": 0.7405804395675659, "learning_rate": 9.977147821746593e-06, "loss": 2.1182, "step": 15300 }, { "epoch": 0.51, "grad_norm": 0.7277283072471619, "learning_rate": 9.976084931716112e-06, "loss": 2.1212, "step": 15301 }, { "epoch": 0.51, "grad_norm": 0.7182308435440063, "learning_rate": 9.975022041955812e-06, "loss": 2.0121, "step": 15302 }, { "epoch": 0.51, "grad_norm": 0.719869077205658, "learning_rate": 9.973959152477703e-06, "loss": 2.1158, "step": 15303 }, { "epoch": 0.51, "grad_norm": 0.7614814043045044, "learning_rate": 9.972896263293784e-06, "loss": 2.0855, "step": 15304 }, { "epoch": 0.51, "grad_norm": 0.7154949903488159, "learning_rate": 9.971833374416068e-06, "loss": 2.0485, "step": 15305 }, { "epoch": 0.51, "grad_norm": 0.7173162698745728, "learning_rate": 9.970770485856563e-06, "loss": 2.0618, "step": 15306 }, { "epoch": 0.51, "grad_norm": 0.7708456516265869, "learning_rate": 9.969707597627272e-06, "loss": 2.1038, "step": 15307 }, { "epoch": 0.51, "grad_norm": 0.7407368421554565, "learning_rate": 9.968644709740213e-06, "loss": 2.0913, "step": 15308 }, { "epoch": 0.51, "grad_norm": 0.7399535179138184, "learning_rate": 9.967581822207381e-06, "loss": 2.0952, "step": 15309 }, { "epoch": 0.51, "grad_norm": 0.7314403653144836, "learning_rate": 9.966518935040795e-06, "loss": 2.1047, "step": 15310 }, { "epoch": 0.51, "grad_norm": 0.7201424241065979, "learning_rate": 9.965456048252456e-06, "loss": 2.0688, "step": 15311 }, { "epoch": 0.51, "grad_norm": 0.7334617972373962, "learning_rate": 9.96439316185438e-06, "loss": 2.0652, "step": 15312 }, { "epoch": 0.51, "grad_norm": 0.7077397704124451, "learning_rate": 9.963330275858563e-06, "loss": 2.1227, "step": 15313 }, { "epoch": 0.51, "grad_norm": 0.744537353515625, "learning_rate": 9.962267390277021e-06, "loss": 2.1078, "step": 15314 }, { "epoch": 0.51, "grad_norm": 0.7356935739517212, "learning_rate": 9.961204505121757e-06, "loss": 2.0835, "step": 15315 }, { "epoch": 0.51, "grad_norm": 0.7220970392227173, "learning_rate": 9.960141620404785e-06, "loss": 2.1011, "step": 15316 }, { "epoch": 0.51, "grad_norm": 0.7231786847114563, "learning_rate": 9.95907873613811e-06, "loss": 2.0847, "step": 15317 }, { "epoch": 0.51, "grad_norm": 0.7488526701927185, "learning_rate": 9.958015852333738e-06, "loss": 2.077, "step": 15318 }, { "epoch": 0.51, "grad_norm": 0.715393602848053, "learning_rate": 9.956952969003681e-06, "loss": 1.974, "step": 15319 }, { "epoch": 0.51, "grad_norm": 0.768552839756012, "learning_rate": 9.955890086159939e-06, "loss": 2.0354, "step": 15320 }, { "epoch": 0.51, "grad_norm": 0.7585130929946899, "learning_rate": 9.954827203814532e-06, "loss": 2.0856, "step": 15321 }, { "epoch": 0.51, "grad_norm": 0.7729237079620361, "learning_rate": 9.953764321979457e-06, "loss": 2.1393, "step": 15322 }, { "epoch": 0.51, "grad_norm": 0.7574151754379272, "learning_rate": 9.952701440666722e-06, "loss": 2.0875, "step": 15323 }, { "epoch": 0.51, "grad_norm": 0.7221570014953613, "learning_rate": 9.951638559888341e-06, "loss": 2.1037, "step": 15324 }, { "epoch": 0.51, "grad_norm": 0.7477939128875732, "learning_rate": 9.950575679656322e-06, "loss": 2.0826, "step": 15325 }, { "epoch": 0.51, "grad_norm": 0.7464163899421692, "learning_rate": 9.949512799982669e-06, "loss": 2.1224, "step": 15326 }, { "epoch": 0.51, "grad_norm": 0.7142251133918762, "learning_rate": 9.948449920879389e-06, "loss": 2.0919, "step": 15327 }, { "epoch": 0.51, "grad_norm": 0.7431299686431885, "learning_rate": 9.94738704235849e-06, "loss": 2.0833, "step": 15328 }, { "epoch": 0.51, "grad_norm": 0.7188173532485962, "learning_rate": 9.946324164431984e-06, "loss": 1.9877, "step": 15329 }, { "epoch": 0.51, "grad_norm": 0.744729220867157, "learning_rate": 9.94526128711188e-06, "loss": 2.0454, "step": 15330 }, { "epoch": 0.51, "grad_norm": 0.7507574558258057, "learning_rate": 9.944198410410175e-06, "loss": 2.0925, "step": 15331 }, { "epoch": 0.51, "grad_norm": 0.7460050582885742, "learning_rate": 9.943135534338887e-06, "loss": 2.0664, "step": 15332 }, { "epoch": 0.51, "grad_norm": 0.7492886781692505, "learning_rate": 9.942072658910019e-06, "loss": 2.1096, "step": 15333 }, { "epoch": 0.51, "grad_norm": 0.7262297868728638, "learning_rate": 9.941009784135584e-06, "loss": 2.1055, "step": 15334 }, { "epoch": 0.51, "grad_norm": 0.7422386407852173, "learning_rate": 9.93994691002758e-06, "loss": 2.1267, "step": 15335 }, { "epoch": 0.51, "grad_norm": 0.7256451845169067, "learning_rate": 9.938884036598024e-06, "loss": 2.1021, "step": 15336 }, { "epoch": 0.51, "grad_norm": 0.7455830574035645, "learning_rate": 9.937821163858919e-06, "loss": 2.1173, "step": 15337 }, { "epoch": 0.51, "grad_norm": 0.7465831637382507, "learning_rate": 9.936758291822274e-06, "loss": 2.0572, "step": 15338 }, { "epoch": 0.51, "grad_norm": 0.7223896384239197, "learning_rate": 9.9356954205001e-06, "loss": 2.0221, "step": 15339 }, { "epoch": 0.51, "grad_norm": 0.74922776222229, "learning_rate": 9.9346325499044e-06, "loss": 2.1266, "step": 15340 }, { "epoch": 0.51, "grad_norm": 0.7232711911201477, "learning_rate": 9.93356968004718e-06, "loss": 2.0403, "step": 15341 }, { "epoch": 0.51, "grad_norm": 0.7061301469802856, "learning_rate": 9.932506810940451e-06, "loss": 2.0142, "step": 15342 }, { "epoch": 0.51, "grad_norm": 0.7734557390213013, "learning_rate": 9.931443942596228e-06, "loss": 2.1026, "step": 15343 }, { "epoch": 0.51, "grad_norm": 0.7426832318305969, "learning_rate": 9.930381075026503e-06, "loss": 1.9838, "step": 15344 }, { "epoch": 0.51, "grad_norm": 0.7451736927032471, "learning_rate": 9.929318208243293e-06, "loss": 2.0332, "step": 15345 }, { "epoch": 0.51, "grad_norm": 0.7048302888870239, "learning_rate": 9.928255342258607e-06, "loss": 2.0318, "step": 15346 }, { "epoch": 0.51, "grad_norm": 0.7207559943199158, "learning_rate": 9.927192477084448e-06, "loss": 2.1094, "step": 15347 }, { "epoch": 0.51, "grad_norm": 0.7534931898117065, "learning_rate": 9.92612961273283e-06, "loss": 2.0471, "step": 15348 }, { "epoch": 0.51, "grad_norm": 0.723344624042511, "learning_rate": 9.925066749215752e-06, "loss": 2.0498, "step": 15349 }, { "epoch": 0.51, "grad_norm": 0.7234449982643127, "learning_rate": 9.924003886545225e-06, "loss": 2.0775, "step": 15350 }, { "epoch": 0.51, "grad_norm": 0.7601335644721985, "learning_rate": 9.922941024733259e-06, "loss": 2.0446, "step": 15351 }, { "epoch": 0.51, "grad_norm": 0.7248831391334534, "learning_rate": 9.921878163791864e-06, "loss": 2.0859, "step": 15352 }, { "epoch": 0.51, "grad_norm": 0.7175817489624023, "learning_rate": 9.92081530373304e-06, "loss": 2.0436, "step": 15353 }, { "epoch": 0.51, "grad_norm": 0.7386490106582642, "learning_rate": 9.919752444568798e-06, "loss": 2.0243, "step": 15354 }, { "epoch": 0.51, "grad_norm": 0.7008812427520752, "learning_rate": 9.918689586311146e-06, "loss": 2.0074, "step": 15355 }, { "epoch": 0.51, "grad_norm": 0.740475594997406, "learning_rate": 9.917626728972095e-06, "loss": 2.068, "step": 15356 }, { "epoch": 0.51, "grad_norm": 0.7467914819717407, "learning_rate": 9.916563872563647e-06, "loss": 2.1123, "step": 15357 }, { "epoch": 0.51, "grad_norm": 0.719474196434021, "learning_rate": 9.915501017097807e-06, "loss": 2.0098, "step": 15358 }, { "epoch": 0.51, "grad_norm": 0.7158430814743042, "learning_rate": 9.91443816258659e-06, "loss": 2.0239, "step": 15359 }, { "epoch": 0.51, "grad_norm": 0.7373977899551392, "learning_rate": 9.913375309042001e-06, "loss": 2.0632, "step": 15360 }, { "epoch": 0.51, "grad_norm": 0.752083957195282, "learning_rate": 9.912312456476048e-06, "loss": 2.1362, "step": 15361 }, { "epoch": 0.51, "grad_norm": 0.7436091303825378, "learning_rate": 9.911249604900737e-06, "loss": 2.0635, "step": 15362 }, { "epoch": 0.51, "grad_norm": 0.7555418014526367, "learning_rate": 9.910186754328075e-06, "loss": 2.0264, "step": 15363 }, { "epoch": 0.51, "grad_norm": 0.7400103211402893, "learning_rate": 9.909123904770068e-06, "loss": 2.0187, "step": 15364 }, { "epoch": 0.51, "grad_norm": 0.712190568447113, "learning_rate": 9.908061056238733e-06, "loss": 2.0523, "step": 15365 }, { "epoch": 0.51, "grad_norm": 0.7998560070991516, "learning_rate": 9.906998208746064e-06, "loss": 2.1282, "step": 15366 }, { "epoch": 0.51, "grad_norm": 0.7256661057472229, "learning_rate": 9.905935362304076e-06, "loss": 2.1391, "step": 15367 }, { "epoch": 0.51, "grad_norm": 0.726402223110199, "learning_rate": 9.904872516924776e-06, "loss": 2.0804, "step": 15368 }, { "epoch": 0.51, "grad_norm": 0.7185222506523132, "learning_rate": 9.90380967262017e-06, "loss": 2.0465, "step": 15369 }, { "epoch": 0.51, "grad_norm": 0.7617346048355103, "learning_rate": 9.902746829402268e-06, "loss": 2.0661, "step": 15370 }, { "epoch": 0.51, "grad_norm": 0.7034574151039124, "learning_rate": 9.901683987283074e-06, "loss": 1.9937, "step": 15371 }, { "epoch": 0.51, "grad_norm": 0.7556415796279907, "learning_rate": 9.900621146274594e-06, "loss": 2.0638, "step": 15372 }, { "epoch": 0.51, "grad_norm": 0.7601121664047241, "learning_rate": 9.89955830638884e-06, "loss": 2.1071, "step": 15373 }, { "epoch": 0.51, "grad_norm": 0.7391757965087891, "learning_rate": 9.89849546763782e-06, "loss": 2.0533, "step": 15374 }, { "epoch": 0.51, "grad_norm": 0.7098444700241089, "learning_rate": 9.897432630033537e-06, "loss": 2.1017, "step": 15375 }, { "epoch": 0.51, "grad_norm": 0.7534666061401367, "learning_rate": 9.896369793587998e-06, "loss": 2.1028, "step": 15376 }, { "epoch": 0.51, "grad_norm": 0.7309271097183228, "learning_rate": 9.895306958313215e-06, "loss": 2.031, "step": 15377 }, { "epoch": 0.51, "grad_norm": 0.7496236562728882, "learning_rate": 9.894244124221188e-06, "loss": 2.0072, "step": 15378 }, { "epoch": 0.51, "grad_norm": 0.7571414709091187, "learning_rate": 9.893181291323936e-06, "loss": 2.0447, "step": 15379 }, { "epoch": 0.51, "grad_norm": 0.7489757537841797, "learning_rate": 9.892118459633454e-06, "loss": 2.0185, "step": 15380 }, { "epoch": 0.51, "grad_norm": 0.7407941222190857, "learning_rate": 9.891055629161756e-06, "loss": 2.0663, "step": 15381 }, { "epoch": 0.51, "grad_norm": 0.7439061999320984, "learning_rate": 9.889992799920848e-06, "loss": 2.0648, "step": 15382 }, { "epoch": 0.51, "grad_norm": 0.7580878138542175, "learning_rate": 9.888929971922738e-06, "loss": 2.0232, "step": 15383 }, { "epoch": 0.51, "grad_norm": 0.7839977145195007, "learning_rate": 9.88786714517943e-06, "loss": 2.0624, "step": 15384 }, { "epoch": 0.51, "grad_norm": 0.760627269744873, "learning_rate": 9.886804319702934e-06, "loss": 2.0722, "step": 15385 }, { "epoch": 0.51, "grad_norm": 0.7217568159103394, "learning_rate": 9.885741495505255e-06, "loss": 2.0922, "step": 15386 }, { "epoch": 0.51, "grad_norm": 0.7457327842712402, "learning_rate": 9.884678672598406e-06, "loss": 2.1363, "step": 15387 }, { "epoch": 0.51, "grad_norm": 0.7632504105567932, "learning_rate": 9.883615850994384e-06, "loss": 2.1028, "step": 15388 }, { "epoch": 0.51, "grad_norm": 0.7467462420463562, "learning_rate": 9.882553030705206e-06, "loss": 2.0979, "step": 15389 }, { "epoch": 0.51, "grad_norm": 0.7233224511146545, "learning_rate": 9.881490211742873e-06, "loss": 2.0171, "step": 15390 }, { "epoch": 0.51, "grad_norm": 0.7475437521934509, "learning_rate": 9.880427394119394e-06, "loss": 2.0718, "step": 15391 }, { "epoch": 0.51, "grad_norm": 0.7187190055847168, "learning_rate": 9.87936457784678e-06, "loss": 2.1157, "step": 15392 }, { "epoch": 0.51, "grad_norm": 0.7324836254119873, "learning_rate": 9.87830176293703e-06, "loss": 2.0941, "step": 15393 }, { "epoch": 0.51, "grad_norm": 0.7730200886726379, "learning_rate": 9.877238949402154e-06, "loss": 2.0695, "step": 15394 }, { "epoch": 0.51, "grad_norm": 0.7878044843673706, "learning_rate": 9.876176137254164e-06, "loss": 2.0871, "step": 15395 }, { "epoch": 0.51, "grad_norm": 0.7498047351837158, "learning_rate": 9.875113326505064e-06, "loss": 2.0561, "step": 15396 }, { "epoch": 0.51, "grad_norm": 0.7387615442276001, "learning_rate": 9.87405051716686e-06, "loss": 2.0795, "step": 15397 }, { "epoch": 0.51, "grad_norm": 0.7281858921051025, "learning_rate": 9.872987709251557e-06, "loss": 2.0904, "step": 15398 }, { "epoch": 0.51, "grad_norm": 0.7290946841239929, "learning_rate": 9.871924902771166e-06, "loss": 2.0523, "step": 15399 }, { "epoch": 0.51, "grad_norm": 0.7390920519828796, "learning_rate": 9.87086209773769e-06, "loss": 2.0774, "step": 15400 }, { "epoch": 0.51, "grad_norm": 0.7358347773551941, "learning_rate": 9.869799294163145e-06, "loss": 2.0841, "step": 15401 }, { "epoch": 0.51, "grad_norm": 0.7207438945770264, "learning_rate": 9.868736492059524e-06, "loss": 2.0145, "step": 15402 }, { "epoch": 0.51, "grad_norm": 0.7457445859909058, "learning_rate": 9.867673691438844e-06, "loss": 2.0136, "step": 15403 }, { "epoch": 0.51, "grad_norm": 0.7311504483222961, "learning_rate": 9.866610892313108e-06, "loss": 2.0981, "step": 15404 }, { "epoch": 0.51, "grad_norm": 0.7590753436088562, "learning_rate": 9.865548094694328e-06, "loss": 2.0612, "step": 15405 }, { "epoch": 0.51, "grad_norm": 0.7475900650024414, "learning_rate": 9.864485298594504e-06, "loss": 2.0596, "step": 15406 }, { "epoch": 0.51, "grad_norm": 0.7344780564308167, "learning_rate": 9.863422504025645e-06, "loss": 2.0409, "step": 15407 }, { "epoch": 0.51, "grad_norm": 0.7245326042175293, "learning_rate": 9.862359710999758e-06, "loss": 2.0726, "step": 15408 }, { "epoch": 0.51, "grad_norm": 0.7149578928947449, "learning_rate": 9.86129691952885e-06, "loss": 2.1089, "step": 15409 }, { "epoch": 0.51, "grad_norm": 0.7664355635643005, "learning_rate": 9.860234129624932e-06, "loss": 2.053, "step": 15410 }, { "epoch": 0.51, "grad_norm": 0.7224113941192627, "learning_rate": 9.859171341300003e-06, "loss": 2.1228, "step": 15411 }, { "epoch": 0.51, "grad_norm": 0.8034083247184753, "learning_rate": 9.858108554566076e-06, "loss": 2.0027, "step": 15412 }, { "epoch": 0.51, "grad_norm": 0.7370339632034302, "learning_rate": 9.857045769435155e-06, "loss": 2.0495, "step": 15413 }, { "epoch": 0.51, "grad_norm": 0.6925652623176575, "learning_rate": 9.855982985919246e-06, "loss": 2.0543, "step": 15414 }, { "epoch": 0.51, "grad_norm": 0.7595086097717285, "learning_rate": 9.854920204030358e-06, "loss": 2.0787, "step": 15415 }, { "epoch": 0.51, "grad_norm": 0.7260364294052124, "learning_rate": 9.853857423780493e-06, "loss": 2.0787, "step": 15416 }, { "epoch": 0.51, "grad_norm": 0.7198963761329651, "learning_rate": 9.852794645181666e-06, "loss": 2.0078, "step": 15417 }, { "epoch": 0.51, "grad_norm": 0.7344509363174438, "learning_rate": 9.851731868245877e-06, "loss": 2.0207, "step": 15418 }, { "epoch": 0.51, "grad_norm": 0.7224626541137695, "learning_rate": 9.850669092985136e-06, "loss": 2.1186, "step": 15419 }, { "epoch": 0.51, "grad_norm": 0.7282788157463074, "learning_rate": 9.849606319411445e-06, "loss": 2.1431, "step": 15420 }, { "epoch": 0.51, "grad_norm": 0.7216843962669373, "learning_rate": 9.848543547536813e-06, "loss": 2.0717, "step": 15421 }, { "epoch": 0.51, "grad_norm": 0.7225651144981384, "learning_rate": 9.84748077737325e-06, "loss": 2.0695, "step": 15422 }, { "epoch": 0.51, "grad_norm": 0.7397006154060364, "learning_rate": 9.846418008932762e-06, "loss": 2.0807, "step": 15423 }, { "epoch": 0.51, "grad_norm": 0.7230384349822998, "learning_rate": 9.84535524222735e-06, "loss": 1.9347, "step": 15424 }, { "epoch": 0.51, "grad_norm": 0.7308531403541565, "learning_rate": 9.844292477269023e-06, "loss": 2.003, "step": 15425 }, { "epoch": 0.51, "grad_norm": 0.7896338701248169, "learning_rate": 9.84322971406979e-06, "loss": 2.0688, "step": 15426 }, { "epoch": 0.51, "grad_norm": 0.74184650182724, "learning_rate": 9.842166952641656e-06, "loss": 2.0828, "step": 15427 }, { "epoch": 0.51, "grad_norm": 0.7163426280021667, "learning_rate": 9.841104192996627e-06, "loss": 2.0951, "step": 15428 }, { "epoch": 0.51, "grad_norm": 0.7500051259994507, "learning_rate": 9.840041435146708e-06, "loss": 2.0631, "step": 15429 }, { "epoch": 0.51, "grad_norm": 0.7333345413208008, "learning_rate": 9.838978679103908e-06, "loss": 1.9789, "step": 15430 }, { "epoch": 0.51, "grad_norm": 0.7438106536865234, "learning_rate": 9.837915924880232e-06, "loss": 2.0958, "step": 15431 }, { "epoch": 0.51, "grad_norm": 0.7173997163772583, "learning_rate": 9.83685317248769e-06, "loss": 2.0991, "step": 15432 }, { "epoch": 0.51, "grad_norm": 0.710135817527771, "learning_rate": 9.835790421938284e-06, "loss": 2.0511, "step": 15433 }, { "epoch": 0.51, "grad_norm": 0.7174953818321228, "learning_rate": 9.83472767324402e-06, "loss": 2.1526, "step": 15434 }, { "epoch": 0.51, "grad_norm": 0.7140005230903625, "learning_rate": 9.833664926416904e-06, "loss": 2.1002, "step": 15435 }, { "epoch": 0.51, "grad_norm": 0.7471922636032104, "learning_rate": 9.832602181468952e-06, "loss": 2.1362, "step": 15436 }, { "epoch": 0.51, "grad_norm": 0.7396267652511597, "learning_rate": 9.831539438412153e-06, "loss": 2.0854, "step": 15437 }, { "epoch": 0.51, "grad_norm": 0.7174857258796692, "learning_rate": 9.830476697258528e-06, "loss": 2.0772, "step": 15438 }, { "epoch": 0.51, "grad_norm": 0.752330482006073, "learning_rate": 9.829413958020078e-06, "loss": 2.1341, "step": 15439 }, { "epoch": 0.51, "grad_norm": 0.7354570031166077, "learning_rate": 9.828351220708807e-06, "loss": 2.1309, "step": 15440 }, { "epoch": 0.51, "grad_norm": 0.7684403657913208, "learning_rate": 9.827288485336726e-06, "loss": 2.0429, "step": 15441 }, { "epoch": 0.51, "grad_norm": 0.746667206287384, "learning_rate": 9.826225751915837e-06, "loss": 2.0474, "step": 15442 }, { "epoch": 0.51, "grad_norm": 0.7403775453567505, "learning_rate": 9.825163020458145e-06, "loss": 2.113, "step": 15443 }, { "epoch": 0.51, "grad_norm": 0.7618185877799988, "learning_rate": 9.824100290975661e-06, "loss": 2.0831, "step": 15444 }, { "epoch": 0.51, "grad_norm": 0.7202662229537964, "learning_rate": 9.823037563480391e-06, "loss": 2.0557, "step": 15445 }, { "epoch": 0.51, "grad_norm": 0.7305876016616821, "learning_rate": 9.821974837984337e-06, "loss": 2.028, "step": 15446 }, { "epoch": 0.51, "grad_norm": 0.7291291356086731, "learning_rate": 9.820912114499507e-06, "loss": 2.1397, "step": 15447 }, { "epoch": 0.51, "grad_norm": 0.7440987825393677, "learning_rate": 9.819849393037905e-06, "loss": 2.1266, "step": 15448 }, { "epoch": 0.51, "grad_norm": 0.7154314517974854, "learning_rate": 9.818786673611545e-06, "loss": 2.0856, "step": 15449 }, { "epoch": 0.51, "grad_norm": 0.7714183330535889, "learning_rate": 9.817723956232422e-06, "loss": 2.0478, "step": 15450 }, { "epoch": 0.51, "grad_norm": 0.7148356437683105, "learning_rate": 9.816661240912545e-06, "loss": 2.0562, "step": 15451 }, { "epoch": 0.51, "grad_norm": 0.7106906771659851, "learning_rate": 9.815598527663924e-06, "loss": 2.0265, "step": 15452 }, { "epoch": 0.51, "grad_norm": 0.7191160321235657, "learning_rate": 9.814535816498563e-06, "loss": 2.1333, "step": 15453 }, { "epoch": 0.51, "grad_norm": 0.7423055768013, "learning_rate": 9.81347310742847e-06, "loss": 2.1441, "step": 15454 }, { "epoch": 0.51, "grad_norm": 0.719419538974762, "learning_rate": 9.812410400465646e-06, "loss": 2.0527, "step": 15455 }, { "epoch": 0.51, "grad_norm": 0.7294454574584961, "learning_rate": 9.8113476956221e-06, "loss": 1.9811, "step": 15456 }, { "epoch": 0.51, "grad_norm": 0.7310861349105835, "learning_rate": 9.810284992909835e-06, "loss": 2.0688, "step": 15457 }, { "epoch": 0.51, "grad_norm": 0.7448965907096863, "learning_rate": 9.809222292340865e-06, "loss": 2.0857, "step": 15458 }, { "epoch": 0.51, "grad_norm": 0.6905385851860046, "learning_rate": 9.808159593927183e-06, "loss": 2.0013, "step": 15459 }, { "epoch": 0.51, "grad_norm": 0.7185955047607422, "learning_rate": 9.807096897680805e-06, "loss": 2.0877, "step": 15460 }, { "epoch": 0.51, "grad_norm": 0.735254168510437, "learning_rate": 9.80603420361373e-06, "loss": 2.0117, "step": 15461 }, { "epoch": 0.51, "grad_norm": 0.7139628529548645, "learning_rate": 9.804971511737971e-06, "loss": 2.0588, "step": 15462 }, { "epoch": 0.51, "grad_norm": 0.7532000541687012, "learning_rate": 9.80390882206553e-06, "loss": 1.9247, "step": 15463 }, { "epoch": 0.51, "grad_norm": 0.7532366514205933, "learning_rate": 9.802846134608409e-06, "loss": 2.1071, "step": 15464 }, { "epoch": 0.51, "grad_norm": 0.740577757358551, "learning_rate": 9.801783449378616e-06, "loss": 2.13, "step": 15465 }, { "epoch": 0.51, "grad_norm": 0.7298146486282349, "learning_rate": 9.800720766388159e-06, "loss": 2.0301, "step": 15466 }, { "epoch": 0.51, "grad_norm": 0.7567242383956909, "learning_rate": 9.799658085649045e-06, "loss": 2.1368, "step": 15467 }, { "epoch": 0.51, "grad_norm": 0.7340618371963501, "learning_rate": 9.798595407173272e-06, "loss": 1.9951, "step": 15468 }, { "epoch": 0.51, "grad_norm": 0.737190306186676, "learning_rate": 9.797532730972853e-06, "loss": 2.1096, "step": 15469 }, { "epoch": 0.51, "grad_norm": 0.7751051783561707, "learning_rate": 9.796470057059788e-06, "loss": 2.1154, "step": 15470 }, { "epoch": 0.51, "grad_norm": 0.7434642314910889, "learning_rate": 9.79540738544609e-06, "loss": 2.0401, "step": 15471 }, { "epoch": 0.51, "grad_norm": 0.7171930074691772, "learning_rate": 9.794344716143754e-06, "loss": 2.0856, "step": 15472 }, { "epoch": 0.51, "grad_norm": 0.737261950969696, "learning_rate": 9.793282049164791e-06, "loss": 2.1137, "step": 15473 }, { "epoch": 0.51, "grad_norm": 0.7464218735694885, "learning_rate": 9.792219384521207e-06, "loss": 2.117, "step": 15474 }, { "epoch": 0.51, "grad_norm": 0.7863865494728088, "learning_rate": 9.791156722225007e-06, "loss": 2.0856, "step": 15475 }, { "epoch": 0.51, "grad_norm": 0.7412142753601074, "learning_rate": 9.7900940622882e-06, "loss": 2.0507, "step": 15476 }, { "epoch": 0.51, "grad_norm": 0.7396461367607117, "learning_rate": 9.789031404722785e-06, "loss": 2.0501, "step": 15477 }, { "epoch": 0.51, "grad_norm": 0.7308434844017029, "learning_rate": 9.787968749540767e-06, "loss": 2.076, "step": 15478 }, { "epoch": 0.51, "grad_norm": 0.7044723629951477, "learning_rate": 9.786906096754154e-06, "loss": 1.9995, "step": 15479 }, { "epoch": 0.52, "grad_norm": 0.7285619378089905, "learning_rate": 9.785843446374955e-06, "loss": 2.1147, "step": 15480 }, { "epoch": 0.52, "grad_norm": 0.7292072772979736, "learning_rate": 9.784780798415167e-06, "loss": 2.027, "step": 15481 }, { "epoch": 0.52, "grad_norm": 0.7500810623168945, "learning_rate": 9.7837181528868e-06, "loss": 2.1657, "step": 15482 }, { "epoch": 0.52, "grad_norm": 0.7479604482650757, "learning_rate": 9.782655509801859e-06, "loss": 2.0836, "step": 15483 }, { "epoch": 0.52, "grad_norm": 0.7441113591194153, "learning_rate": 9.78159286917235e-06, "loss": 2.0057, "step": 15484 }, { "epoch": 0.52, "grad_norm": 0.6986281275749207, "learning_rate": 9.780530231010279e-06, "loss": 1.9845, "step": 15485 }, { "epoch": 0.52, "grad_norm": 0.7486786246299744, "learning_rate": 9.779467595327644e-06, "loss": 2.0646, "step": 15486 }, { "epoch": 0.52, "grad_norm": 0.7273546457290649, "learning_rate": 9.778404962136455e-06, "loss": 2.0678, "step": 15487 }, { "epoch": 0.52, "grad_norm": 0.7597822546958923, "learning_rate": 9.777342331448717e-06, "loss": 2.0996, "step": 15488 }, { "epoch": 0.52, "grad_norm": 0.7094641923904419, "learning_rate": 9.776279703276439e-06, "loss": 2.0453, "step": 15489 }, { "epoch": 0.52, "grad_norm": 0.7305936813354492, "learning_rate": 9.775217077631619e-06, "loss": 2.0448, "step": 15490 }, { "epoch": 0.52, "grad_norm": 0.7562252879142761, "learning_rate": 9.774154454526265e-06, "loss": 2.0429, "step": 15491 }, { "epoch": 0.52, "grad_norm": 0.7288805246353149, "learning_rate": 9.77309183397238e-06, "loss": 2.0711, "step": 15492 }, { "epoch": 0.52, "grad_norm": 0.7238330245018005, "learning_rate": 9.77202921598197e-06, "loss": 2.0819, "step": 15493 }, { "epoch": 0.52, "grad_norm": 0.7518173456192017, "learning_rate": 9.770966600567046e-06, "loss": 2.0907, "step": 15494 }, { "epoch": 0.52, "grad_norm": 0.7401343584060669, "learning_rate": 9.769903987739602e-06, "loss": 2.0953, "step": 15495 }, { "epoch": 0.52, "grad_norm": 0.7601081728935242, "learning_rate": 9.768841377511649e-06, "loss": 2.098, "step": 15496 }, { "epoch": 0.52, "grad_norm": 0.7254143953323364, "learning_rate": 9.76777876989519e-06, "loss": 2.1037, "step": 15497 }, { "epoch": 0.52, "grad_norm": 0.754649817943573, "learning_rate": 9.766716164902234e-06, "loss": 2.076, "step": 15498 }, { "epoch": 0.52, "grad_norm": 0.7536258697509766, "learning_rate": 9.765653562544779e-06, "loss": 2.0219, "step": 15499 }, { "epoch": 0.52, "grad_norm": 0.7242909669876099, "learning_rate": 9.764590962834832e-06, "loss": 2.0932, "step": 15500 }, { "epoch": 0.52, "grad_norm": 0.7305471301078796, "learning_rate": 9.763528365784397e-06, "loss": 2.0499, "step": 15501 }, { "epoch": 0.52, "grad_norm": 0.7612824440002441, "learning_rate": 9.762465771405488e-06, "loss": 2.0851, "step": 15502 }, { "epoch": 0.52, "grad_norm": 0.7543918490409851, "learning_rate": 9.761403179710092e-06, "loss": 2.0588, "step": 15503 }, { "epoch": 0.52, "grad_norm": 0.7554641962051392, "learning_rate": 9.760340590710227e-06, "loss": 2.0982, "step": 15504 }, { "epoch": 0.52, "grad_norm": 0.7310226559638977, "learning_rate": 9.759278004417893e-06, "loss": 1.9929, "step": 15505 }, { "epoch": 0.52, "grad_norm": 0.7380367517471313, "learning_rate": 9.758215420845095e-06, "loss": 2.0211, "step": 15506 }, { "epoch": 0.52, "grad_norm": 0.7349383234977722, "learning_rate": 9.75715284000384e-06, "loss": 1.9907, "step": 15507 }, { "epoch": 0.52, "grad_norm": 0.7183637022972107, "learning_rate": 9.756090261906129e-06, "loss": 2.0757, "step": 15508 }, { "epoch": 0.52, "grad_norm": 0.7491658926010132, "learning_rate": 9.755027686563963e-06, "loss": 2.103, "step": 15509 }, { "epoch": 0.52, "grad_norm": 0.7532989978790283, "learning_rate": 9.753965113989353e-06, "loss": 2.1514, "step": 15510 }, { "epoch": 0.52, "grad_norm": 0.7262265682220459, "learning_rate": 9.752902544194304e-06, "loss": 2.0946, "step": 15511 }, { "epoch": 0.52, "grad_norm": 0.7389784455299377, "learning_rate": 9.751839977190815e-06, "loss": 2.1384, "step": 15512 }, { "epoch": 0.52, "grad_norm": 0.7374013066291809, "learning_rate": 9.750777412990892e-06, "loss": 2.0292, "step": 15513 }, { "epoch": 0.52, "grad_norm": 0.7303351759910583, "learning_rate": 9.74971485160654e-06, "loss": 2.0003, "step": 15514 }, { "epoch": 0.52, "grad_norm": 0.7219321727752686, "learning_rate": 9.748652293049761e-06, "loss": 2.0752, "step": 15515 }, { "epoch": 0.52, "grad_norm": 0.7591698169708252, "learning_rate": 9.747589737332566e-06, "loss": 2.1115, "step": 15516 }, { "epoch": 0.52, "grad_norm": 0.7176534533500671, "learning_rate": 9.74652718446695e-06, "loss": 1.9945, "step": 15517 }, { "epoch": 0.52, "grad_norm": 0.7573475241661072, "learning_rate": 9.745464634464923e-06, "loss": 2.0965, "step": 15518 }, { "epoch": 0.52, "grad_norm": 0.7309889793395996, "learning_rate": 9.744402087338485e-06, "loss": 2.1003, "step": 15519 }, { "epoch": 0.52, "grad_norm": 0.7442038059234619, "learning_rate": 9.743339543099649e-06, "loss": 2.0572, "step": 15520 }, { "epoch": 0.52, "grad_norm": 0.7452618479728699, "learning_rate": 9.742277001760406e-06, "loss": 2.0766, "step": 15521 }, { "epoch": 0.52, "grad_norm": 0.7495506405830383, "learning_rate": 9.741214463332766e-06, "loss": 2.1218, "step": 15522 }, { "epoch": 0.52, "grad_norm": 0.7246361374855042, "learning_rate": 9.740151927828736e-06, "loss": 2.0471, "step": 15523 }, { "epoch": 0.52, "grad_norm": 0.7134158611297607, "learning_rate": 9.739089395260316e-06, "loss": 2.0002, "step": 15524 }, { "epoch": 0.52, "grad_norm": 0.7300854325294495, "learning_rate": 9.738026865639515e-06, "loss": 2.0589, "step": 15525 }, { "epoch": 0.52, "grad_norm": 0.7746339440345764, "learning_rate": 9.73696433897833e-06, "loss": 2.0503, "step": 15526 }, { "epoch": 0.52, "grad_norm": 0.7450358271598816, "learning_rate": 9.735901815288767e-06, "loss": 2.1289, "step": 15527 }, { "epoch": 0.52, "grad_norm": 0.7547447681427002, "learning_rate": 9.734839294582828e-06, "loss": 2.0906, "step": 15528 }, { "epoch": 0.52, "grad_norm": 0.7442801594734192, "learning_rate": 9.733776776872528e-06, "loss": 2.1284, "step": 15529 }, { "epoch": 0.52, "grad_norm": 0.7449796795845032, "learning_rate": 9.732714262169854e-06, "loss": 1.9984, "step": 15530 }, { "epoch": 0.52, "grad_norm": 0.7429680824279785, "learning_rate": 9.731651750486819e-06, "loss": 2.107, "step": 15531 }, { "epoch": 0.52, "grad_norm": 0.7331522107124329, "learning_rate": 9.730589241835426e-06, "loss": 2.0447, "step": 15532 }, { "epoch": 0.52, "grad_norm": 0.7749835848808289, "learning_rate": 9.72952673622768e-06, "loss": 2.0516, "step": 15533 }, { "epoch": 0.52, "grad_norm": 0.7580354809761047, "learning_rate": 9.72846423367558e-06, "loss": 2.0497, "step": 15534 }, { "epoch": 0.52, "grad_norm": 0.7220734357833862, "learning_rate": 9.727401734191134e-06, "loss": 2.0757, "step": 15535 }, { "epoch": 0.52, "grad_norm": 0.766269326210022, "learning_rate": 9.726339237786338e-06, "loss": 2.0983, "step": 15536 }, { "epoch": 0.52, "grad_norm": 0.7298220992088318, "learning_rate": 9.725276744473206e-06, "loss": 2.0778, "step": 15537 }, { "epoch": 0.52, "grad_norm": 0.7452158331871033, "learning_rate": 9.724214254263737e-06, "loss": 2.0963, "step": 15538 }, { "epoch": 0.52, "grad_norm": 0.7327550649642944, "learning_rate": 9.72315176716993e-06, "loss": 2.0423, "step": 15539 }, { "epoch": 0.52, "grad_norm": 0.7246344089508057, "learning_rate": 9.722089283203795e-06, "loss": 2.0486, "step": 15540 }, { "epoch": 0.52, "grad_norm": 0.7150371670722961, "learning_rate": 9.721026802377331e-06, "loss": 2.071, "step": 15541 }, { "epoch": 0.52, "grad_norm": 0.7200292944908142, "learning_rate": 9.719964324702545e-06, "loss": 2.0106, "step": 15542 }, { "epoch": 0.52, "grad_norm": 0.7637087106704712, "learning_rate": 9.718901850191435e-06, "loss": 2.0936, "step": 15543 }, { "epoch": 0.52, "grad_norm": 0.7801696062088013, "learning_rate": 9.717839378856006e-06, "loss": 2.1, "step": 15544 }, { "epoch": 0.52, "grad_norm": 0.7393117547035217, "learning_rate": 9.716776910708265e-06, "loss": 2.0983, "step": 15545 }, { "epoch": 0.52, "grad_norm": 0.7470100522041321, "learning_rate": 9.715714445760212e-06, "loss": 2.0617, "step": 15546 }, { "epoch": 0.52, "grad_norm": 0.7474226951599121, "learning_rate": 9.714651984023853e-06, "loss": 2.0158, "step": 15547 }, { "epoch": 0.52, "grad_norm": 0.7845041751861572, "learning_rate": 9.713589525511185e-06, "loss": 2.0418, "step": 15548 }, { "epoch": 0.52, "grad_norm": 0.7198737859725952, "learning_rate": 9.712527070234216e-06, "loss": 2.022, "step": 15549 }, { "epoch": 0.52, "grad_norm": 0.7450085282325745, "learning_rate": 9.711464618204945e-06, "loss": 2.0532, "step": 15550 }, { "epoch": 0.52, "grad_norm": 0.7316471338272095, "learning_rate": 9.710402169435385e-06, "loss": 2.1293, "step": 15551 }, { "epoch": 0.52, "grad_norm": 0.7153242826461792, "learning_rate": 9.709339723937524e-06, "loss": 2.1143, "step": 15552 }, { "epoch": 0.52, "grad_norm": 0.7542601823806763, "learning_rate": 9.708277281723375e-06, "loss": 2.1353, "step": 15553 }, { "epoch": 0.52, "grad_norm": 0.7349059581756592, "learning_rate": 9.707214842804939e-06, "loss": 2.1952, "step": 15554 }, { "epoch": 0.52, "grad_norm": 0.7501107454299927, "learning_rate": 9.706152407194217e-06, "loss": 2.105, "step": 15555 }, { "epoch": 0.52, "grad_norm": 0.727715253829956, "learning_rate": 9.705089974903216e-06, "loss": 2.0784, "step": 15556 }, { "epoch": 0.52, "grad_norm": 0.744004487991333, "learning_rate": 9.704027545943934e-06, "loss": 2.1167, "step": 15557 }, { "epoch": 0.52, "grad_norm": 0.7010319828987122, "learning_rate": 9.702965120328372e-06, "loss": 1.9665, "step": 15558 }, { "epoch": 0.52, "grad_norm": 0.731033444404602, "learning_rate": 9.701902698068539e-06, "loss": 2.0776, "step": 15559 }, { "epoch": 0.52, "grad_norm": 0.7611697912216187, "learning_rate": 9.700840279176435e-06, "loss": 2.1183, "step": 15560 }, { "epoch": 0.52, "grad_norm": 0.7328007817268372, "learning_rate": 9.699777863664062e-06, "loss": 2.0478, "step": 15561 }, { "epoch": 0.52, "grad_norm": 0.7671434283256531, "learning_rate": 9.698715451543422e-06, "loss": 2.1109, "step": 15562 }, { "epoch": 0.52, "grad_norm": 0.7581314444541931, "learning_rate": 9.697653042826516e-06, "loss": 2.0886, "step": 15563 }, { "epoch": 0.52, "grad_norm": 0.764369785785675, "learning_rate": 9.696590637525354e-06, "loss": 2.0315, "step": 15564 }, { "epoch": 0.52, "grad_norm": 0.731525719165802, "learning_rate": 9.69552823565193e-06, "loss": 2.0346, "step": 15565 }, { "epoch": 0.52, "grad_norm": 0.7416642308235168, "learning_rate": 9.694465837218247e-06, "loss": 2.0398, "step": 15566 }, { "epoch": 0.52, "grad_norm": 0.7409846782684326, "learning_rate": 9.693403442236313e-06, "loss": 2.0868, "step": 15567 }, { "epoch": 0.52, "grad_norm": 0.7339968681335449, "learning_rate": 9.692341050718126e-06, "loss": 2.1072, "step": 15568 }, { "epoch": 0.52, "grad_norm": 0.7538873553276062, "learning_rate": 9.691278662675694e-06, "loss": 2.0655, "step": 15569 }, { "epoch": 0.52, "grad_norm": 0.727290689945221, "learning_rate": 9.690216278121011e-06, "loss": 2.0388, "step": 15570 }, { "epoch": 0.52, "grad_norm": 0.7230735421180725, "learning_rate": 9.689153897066082e-06, "loss": 2.1174, "step": 15571 }, { "epoch": 0.52, "grad_norm": 0.7105750441551208, "learning_rate": 9.68809151952291e-06, "loss": 2.0501, "step": 15572 }, { "epoch": 0.52, "grad_norm": 0.7411559224128723, "learning_rate": 9.687029145503503e-06, "loss": 2.0669, "step": 15573 }, { "epoch": 0.52, "grad_norm": 0.7452191710472107, "learning_rate": 9.68596677501985e-06, "loss": 2.0551, "step": 15574 }, { "epoch": 0.52, "grad_norm": 0.7293837666511536, "learning_rate": 9.684904408083962e-06, "loss": 2.0482, "step": 15575 }, { "epoch": 0.52, "grad_norm": 0.7480900287628174, "learning_rate": 9.683842044707841e-06, "loss": 2.0255, "step": 15576 }, { "epoch": 0.52, "grad_norm": 0.7426881194114685, "learning_rate": 9.682779684903486e-06, "loss": 2.087, "step": 15577 }, { "epoch": 0.52, "grad_norm": 0.7375836968421936, "learning_rate": 9.681717328682904e-06, "loss": 2.0823, "step": 15578 }, { "epoch": 0.52, "grad_norm": 0.7787740230560303, "learning_rate": 9.68065497605809e-06, "loss": 2.1588, "step": 15579 }, { "epoch": 0.52, "grad_norm": 0.7724153399467468, "learning_rate": 9.679592627041046e-06, "loss": 2.1198, "step": 15580 }, { "epoch": 0.52, "grad_norm": 0.7504175901412964, "learning_rate": 9.678530281643779e-06, "loss": 2.0601, "step": 15581 }, { "epoch": 0.52, "grad_norm": 0.7513518929481506, "learning_rate": 9.677467939878292e-06, "loss": 2.0904, "step": 15582 }, { "epoch": 0.52, "grad_norm": 0.7302078604698181, "learning_rate": 9.67640560175658e-06, "loss": 2.096, "step": 15583 }, { "epoch": 0.52, "grad_norm": 0.7388531565666199, "learning_rate": 9.675343267290647e-06, "loss": 2.0967, "step": 15584 }, { "epoch": 0.52, "grad_norm": 0.7590838670730591, "learning_rate": 9.674280936492496e-06, "loss": 2.0323, "step": 15585 }, { "epoch": 0.52, "grad_norm": 0.75392085313797, "learning_rate": 9.673218609374127e-06, "loss": 2.0687, "step": 15586 }, { "epoch": 0.52, "grad_norm": 0.7369232177734375, "learning_rate": 9.672156285947549e-06, "loss": 2.0584, "step": 15587 }, { "epoch": 0.52, "grad_norm": 0.7518595457077026, "learning_rate": 9.671093966224749e-06, "loss": 2.1102, "step": 15588 }, { "epoch": 0.52, "grad_norm": 0.795049250125885, "learning_rate": 9.67003165021774e-06, "loss": 2.065, "step": 15589 }, { "epoch": 0.52, "grad_norm": 0.719650387763977, "learning_rate": 9.668969337938521e-06, "loss": 2.0489, "step": 15590 }, { "epoch": 0.52, "grad_norm": 0.7470910549163818, "learning_rate": 9.667907029399094e-06, "loss": 2.0173, "step": 15591 }, { "epoch": 0.52, "grad_norm": 0.7344533801078796, "learning_rate": 9.666844724611456e-06, "loss": 2.1142, "step": 15592 }, { "epoch": 0.52, "grad_norm": 0.7749903202056885, "learning_rate": 9.665782423587611e-06, "loss": 2.0897, "step": 15593 }, { "epoch": 0.52, "grad_norm": 0.7238495945930481, "learning_rate": 9.664720126339559e-06, "loss": 2.0848, "step": 15594 }, { "epoch": 0.52, "grad_norm": 0.7621783018112183, "learning_rate": 9.663657832879307e-06, "loss": 2.0491, "step": 15595 }, { "epoch": 0.52, "grad_norm": 0.8091264367103577, "learning_rate": 9.662595543218847e-06, "loss": 2.1907, "step": 15596 }, { "epoch": 0.52, "grad_norm": 0.7234330177307129, "learning_rate": 9.661533257370188e-06, "loss": 2.0327, "step": 15597 }, { "epoch": 0.52, "grad_norm": 0.7885359525680542, "learning_rate": 9.660470975345325e-06, "loss": 1.9826, "step": 15598 }, { "epoch": 0.52, "grad_norm": 0.822218656539917, "learning_rate": 9.659408697156264e-06, "loss": 2.0889, "step": 15599 }, { "epoch": 0.52, "grad_norm": 0.7173131704330444, "learning_rate": 9.658346422815006e-06, "loss": 2.0554, "step": 15600 }, { "epoch": 0.52, "grad_norm": 0.7182657122612, "learning_rate": 9.657284152333547e-06, "loss": 2.0971, "step": 15601 }, { "epoch": 0.52, "grad_norm": 0.8015856146812439, "learning_rate": 9.65622188572389e-06, "loss": 2.0837, "step": 15602 }, { "epoch": 0.52, "grad_norm": 0.7591501474380493, "learning_rate": 9.655159622998037e-06, "loss": 2.0357, "step": 15603 }, { "epoch": 0.52, "grad_norm": 0.7415878176689148, "learning_rate": 9.65409736416799e-06, "loss": 2.0732, "step": 15604 }, { "epoch": 0.52, "grad_norm": 0.7339306473731995, "learning_rate": 9.653035109245749e-06, "loss": 1.9814, "step": 15605 }, { "epoch": 0.52, "grad_norm": 0.7654830813407898, "learning_rate": 9.651972858243312e-06, "loss": 2.0572, "step": 15606 }, { "epoch": 0.52, "grad_norm": 0.7673883438110352, "learning_rate": 9.650910611172681e-06, "loss": 2.0857, "step": 15607 }, { "epoch": 0.52, "grad_norm": 0.7576623558998108, "learning_rate": 9.649848368045856e-06, "loss": 2.142, "step": 15608 }, { "epoch": 0.52, "grad_norm": 0.7089091539382935, "learning_rate": 9.648786128874846e-06, "loss": 2.0497, "step": 15609 }, { "epoch": 0.52, "grad_norm": 0.7996172308921814, "learning_rate": 9.647723893671639e-06, "loss": 2.0808, "step": 15610 }, { "epoch": 0.52, "grad_norm": 0.7176097631454468, "learning_rate": 9.646661662448241e-06, "loss": 2.0339, "step": 15611 }, { "epoch": 0.52, "grad_norm": 0.7384870052337646, "learning_rate": 9.645599435216653e-06, "loss": 2.0507, "step": 15612 }, { "epoch": 0.52, "grad_norm": 0.7355840802192688, "learning_rate": 9.644537211988877e-06, "loss": 2.0441, "step": 15613 }, { "epoch": 0.52, "grad_norm": 0.7313699126243591, "learning_rate": 9.64347499277691e-06, "loss": 2.0774, "step": 15614 }, { "epoch": 0.52, "grad_norm": 0.728209376335144, "learning_rate": 9.642412777592753e-06, "loss": 2.0528, "step": 15615 }, { "epoch": 0.52, "grad_norm": 0.7632866501808167, "learning_rate": 9.641350566448405e-06, "loss": 2.0603, "step": 15616 }, { "epoch": 0.52, "grad_norm": 0.7017009854316711, "learning_rate": 9.640288359355875e-06, "loss": 2.0926, "step": 15617 }, { "epoch": 0.52, "grad_norm": 0.729819118976593, "learning_rate": 9.639226156327149e-06, "loss": 2.0396, "step": 15618 }, { "epoch": 0.52, "grad_norm": 0.7357673645019531, "learning_rate": 9.638163957374238e-06, "loss": 2.0588, "step": 15619 }, { "epoch": 0.52, "grad_norm": 0.7222998738288879, "learning_rate": 9.637101762509136e-06, "loss": 2.0495, "step": 15620 }, { "epoch": 0.52, "grad_norm": 0.7392330765724182, "learning_rate": 9.636039571743845e-06, "loss": 2.1075, "step": 15621 }, { "epoch": 0.52, "grad_norm": 0.7758607864379883, "learning_rate": 9.634977385090372e-06, "loss": 2.0415, "step": 15622 }, { "epoch": 0.52, "grad_norm": 0.7159187197685242, "learning_rate": 9.633915202560705e-06, "loss": 2.029, "step": 15623 }, { "epoch": 0.52, "grad_norm": 0.7334617972373962, "learning_rate": 9.63285302416685e-06, "loss": 2.0291, "step": 15624 }, { "epoch": 0.52, "grad_norm": 0.7247999310493469, "learning_rate": 9.631790849920805e-06, "loss": 2.0225, "step": 15625 }, { "epoch": 0.52, "grad_norm": 0.7163276076316833, "learning_rate": 9.630728679834577e-06, "loss": 2.0576, "step": 15626 }, { "epoch": 0.52, "grad_norm": 0.7445126175880432, "learning_rate": 9.629666513920155e-06, "loss": 2.0651, "step": 15627 }, { "epoch": 0.52, "grad_norm": 0.776853621006012, "learning_rate": 9.628604352189546e-06, "loss": 2.086, "step": 15628 }, { "epoch": 0.52, "grad_norm": 0.738005518913269, "learning_rate": 9.627542194654743e-06, "loss": 2.079, "step": 15629 }, { "epoch": 0.52, "grad_norm": 0.7269394397735596, "learning_rate": 9.626480041327754e-06, "loss": 2.0773, "step": 15630 }, { "epoch": 0.52, "grad_norm": 0.7316299676895142, "learning_rate": 9.625417892220575e-06, "loss": 2.1224, "step": 15631 }, { "epoch": 0.52, "grad_norm": 0.7305302023887634, "learning_rate": 9.624355747345205e-06, "loss": 2.0894, "step": 15632 }, { "epoch": 0.52, "grad_norm": 0.7339333891868591, "learning_rate": 9.623293606713641e-06, "loss": 2.0949, "step": 15633 }, { "epoch": 0.52, "grad_norm": 0.7231512665748596, "learning_rate": 9.622231470337887e-06, "loss": 2.0842, "step": 15634 }, { "epoch": 0.52, "grad_norm": 0.7310609221458435, "learning_rate": 9.621169338229943e-06, "loss": 2.1719, "step": 15635 }, { "epoch": 0.52, "grad_norm": 0.7382004857063293, "learning_rate": 9.620107210401802e-06, "loss": 2.1282, "step": 15636 }, { "epoch": 0.52, "grad_norm": 0.7339346408843994, "learning_rate": 9.619045086865467e-06, "loss": 2.0786, "step": 15637 }, { "epoch": 0.52, "grad_norm": 0.7268326878547668, "learning_rate": 9.617982967632937e-06, "loss": 2.0701, "step": 15638 }, { "epoch": 0.52, "grad_norm": 0.7250897884368896, "learning_rate": 9.616920852716214e-06, "loss": 2.0398, "step": 15639 }, { "epoch": 0.52, "grad_norm": 0.7248212099075317, "learning_rate": 9.615858742127296e-06, "loss": 2.1378, "step": 15640 }, { "epoch": 0.52, "grad_norm": 0.725259006023407, "learning_rate": 9.614796635878178e-06, "loss": 2.0715, "step": 15641 }, { "epoch": 0.52, "grad_norm": 0.7079594731330872, "learning_rate": 9.613734533980861e-06, "loss": 2.1301, "step": 15642 }, { "epoch": 0.52, "grad_norm": 0.7052373886108398, "learning_rate": 9.612672436447345e-06, "loss": 2.071, "step": 15643 }, { "epoch": 0.52, "grad_norm": 0.7427425384521484, "learning_rate": 9.611610343289635e-06, "loss": 2.0805, "step": 15644 }, { "epoch": 0.52, "grad_norm": 0.7867558002471924, "learning_rate": 9.610548254519716e-06, "loss": 2.0804, "step": 15645 }, { "epoch": 0.52, "grad_norm": 0.7661941647529602, "learning_rate": 9.609486170149597e-06, "loss": 2.1064, "step": 15646 }, { "epoch": 0.52, "grad_norm": 0.7181205749511719, "learning_rate": 9.608424090191274e-06, "loss": 2.063, "step": 15647 }, { "epoch": 0.52, "grad_norm": 0.7304525375366211, "learning_rate": 9.60736201465675e-06, "loss": 2.105, "step": 15648 }, { "epoch": 0.52, "grad_norm": 0.7100653052330017, "learning_rate": 9.606299943558017e-06, "loss": 2.0914, "step": 15649 }, { "epoch": 0.52, "grad_norm": 0.7128437757492065, "learning_rate": 9.605237876907075e-06, "loss": 1.9821, "step": 15650 }, { "epoch": 0.52, "grad_norm": 0.7711062431335449, "learning_rate": 9.604175814715923e-06, "loss": 2.076, "step": 15651 }, { "epoch": 0.52, "grad_norm": 0.7840140461921692, "learning_rate": 9.603113756996562e-06, "loss": 2.0621, "step": 15652 }, { "epoch": 0.52, "grad_norm": 0.7471088767051697, "learning_rate": 9.602051703760994e-06, "loss": 2.0794, "step": 15653 }, { "epoch": 0.52, "grad_norm": 0.7418235540390015, "learning_rate": 9.600989655021208e-06, "loss": 2.0833, "step": 15654 }, { "epoch": 0.52, "grad_norm": 0.7122145891189575, "learning_rate": 9.599927610789207e-06, "loss": 2.0023, "step": 15655 }, { "epoch": 0.52, "grad_norm": 0.7504203915596008, "learning_rate": 9.59886557107699e-06, "loss": 2.0605, "step": 15656 }, { "epoch": 0.52, "grad_norm": 0.7434459924697876, "learning_rate": 9.597803535896558e-06, "loss": 2.0022, "step": 15657 }, { "epoch": 0.52, "grad_norm": 0.7607999444007874, "learning_rate": 9.596741505259903e-06, "loss": 2.0711, "step": 15658 }, { "epoch": 0.52, "grad_norm": 0.7218484282493591, "learning_rate": 9.595679479179023e-06, "loss": 2.1075, "step": 15659 }, { "epoch": 0.52, "grad_norm": 0.6917795538902283, "learning_rate": 9.594617457665921e-06, "loss": 1.9981, "step": 15660 }, { "epoch": 0.52, "grad_norm": 0.7257257699966431, "learning_rate": 9.593555440732595e-06, "loss": 2.0641, "step": 15661 }, { "epoch": 0.52, "grad_norm": 0.7253341674804688, "learning_rate": 9.592493428391042e-06, "loss": 2.0578, "step": 15662 }, { "epoch": 0.52, "grad_norm": 0.7403224110603333, "learning_rate": 9.59143142065326e-06, "loss": 2.0707, "step": 15663 }, { "epoch": 0.52, "grad_norm": 0.7456210851669312, "learning_rate": 9.590369417531245e-06, "loss": 2.059, "step": 15664 }, { "epoch": 0.52, "grad_norm": 0.7223693132400513, "learning_rate": 9.589307419036993e-06, "loss": 2.0749, "step": 15665 }, { "epoch": 0.52, "grad_norm": 0.7177839875221252, "learning_rate": 9.588245425182513e-06, "loss": 2.0554, "step": 15666 }, { "epoch": 0.52, "grad_norm": 0.741969645023346, "learning_rate": 9.587183435979788e-06, "loss": 2.0956, "step": 15667 }, { "epoch": 0.52, "grad_norm": 0.7246341109275818, "learning_rate": 9.586121451440824e-06, "loss": 2.1022, "step": 15668 }, { "epoch": 0.52, "grad_norm": 0.7434824109077454, "learning_rate": 9.585059471577618e-06, "loss": 2.0682, "step": 15669 }, { "epoch": 0.52, "grad_norm": 0.7317240834236145, "learning_rate": 9.583997496402166e-06, "loss": 2.0692, "step": 15670 }, { "epoch": 0.52, "grad_norm": 0.7391767501831055, "learning_rate": 9.58293552592647e-06, "loss": 2.0706, "step": 15671 }, { "epoch": 0.52, "grad_norm": 0.7675814032554626, "learning_rate": 9.58187356016252e-06, "loss": 2.066, "step": 15672 }, { "epoch": 0.52, "grad_norm": 0.7880501747131348, "learning_rate": 9.580811599122318e-06, "loss": 2.1229, "step": 15673 }, { "epoch": 0.52, "grad_norm": 0.8052398562431335, "learning_rate": 9.579749642817862e-06, "loss": 2.1123, "step": 15674 }, { "epoch": 0.52, "grad_norm": 0.7399470210075378, "learning_rate": 9.578687691261149e-06, "loss": 2.0813, "step": 15675 }, { "epoch": 0.52, "grad_norm": 0.699654757976532, "learning_rate": 9.577625744464176e-06, "loss": 2.0749, "step": 15676 }, { "epoch": 0.52, "grad_norm": 0.7200848460197449, "learning_rate": 9.576563802438937e-06, "loss": 2.0896, "step": 15677 }, { "epoch": 0.52, "grad_norm": 0.7724472284317017, "learning_rate": 9.575501865197433e-06, "loss": 2.0847, "step": 15678 }, { "epoch": 0.52, "grad_norm": 0.7234969735145569, "learning_rate": 9.574439932751663e-06, "loss": 2.1139, "step": 15679 }, { "epoch": 0.52, "grad_norm": 0.7368679046630859, "learning_rate": 9.573378005113618e-06, "loss": 2.0898, "step": 15680 }, { "epoch": 0.52, "grad_norm": 0.7394647002220154, "learning_rate": 9.572316082295297e-06, "loss": 2.1129, "step": 15681 }, { "epoch": 0.52, "grad_norm": 0.792491614818573, "learning_rate": 9.571254164308701e-06, "loss": 1.9983, "step": 15682 }, { "epoch": 0.52, "grad_norm": 0.7280409932136536, "learning_rate": 9.570192251165823e-06, "loss": 2.0351, "step": 15683 }, { "epoch": 0.52, "grad_norm": 0.7441772222518921, "learning_rate": 9.569130342878663e-06, "loss": 2.0424, "step": 15684 }, { "epoch": 0.52, "grad_norm": 0.7174262404441833, "learning_rate": 9.568068439459214e-06, "loss": 2.089, "step": 15685 }, { "epoch": 0.52, "grad_norm": 0.7425598502159119, "learning_rate": 9.567006540919475e-06, "loss": 2.0367, "step": 15686 }, { "epoch": 0.52, "grad_norm": 0.7531047463417053, "learning_rate": 9.56594464727144e-06, "loss": 2.1734, "step": 15687 }, { "epoch": 0.52, "grad_norm": 0.7682279348373413, "learning_rate": 9.564882758527115e-06, "loss": 2.0675, "step": 15688 }, { "epoch": 0.52, "grad_norm": 0.751871645450592, "learning_rate": 9.563820874698483e-06, "loss": 2.0384, "step": 15689 }, { "epoch": 0.52, "grad_norm": 0.7457606792449951, "learning_rate": 9.562758995797548e-06, "loss": 2.1188, "step": 15690 }, { "epoch": 0.52, "grad_norm": 0.7763810157775879, "learning_rate": 9.561697121836308e-06, "loss": 2.0363, "step": 15691 }, { "epoch": 0.52, "grad_norm": 0.7256711721420288, "learning_rate": 9.560635252826755e-06, "loss": 2.0025, "step": 15692 }, { "epoch": 0.52, "grad_norm": 0.7388671636581421, "learning_rate": 9.55957338878089e-06, "loss": 2.0253, "step": 15693 }, { "epoch": 0.52, "grad_norm": 0.7272011041641235, "learning_rate": 9.558511529710704e-06, "loss": 2.0942, "step": 15694 }, { "epoch": 0.52, "grad_norm": 0.7340438365936279, "learning_rate": 9.557449675628195e-06, "loss": 2.0918, "step": 15695 }, { "epoch": 0.52, "grad_norm": 0.7305469512939453, "learning_rate": 9.556387826545363e-06, "loss": 2.1108, "step": 15696 }, { "epoch": 0.52, "grad_norm": 0.7320858240127563, "learning_rate": 9.555325982474201e-06, "loss": 2.0539, "step": 15697 }, { "epoch": 0.52, "grad_norm": 0.757124662399292, "learning_rate": 9.554264143426706e-06, "loss": 2.0506, "step": 15698 }, { "epoch": 0.52, "grad_norm": 0.7323154807090759, "learning_rate": 9.553202309414872e-06, "loss": 2.0326, "step": 15699 }, { "epoch": 0.52, "grad_norm": 0.7463230490684509, "learning_rate": 9.552140480450695e-06, "loss": 2.0583, "step": 15700 }, { "epoch": 0.52, "grad_norm": 0.7544329166412354, "learning_rate": 9.551078656546173e-06, "loss": 1.9961, "step": 15701 }, { "epoch": 0.52, "grad_norm": 0.7124444246292114, "learning_rate": 9.550016837713307e-06, "loss": 2.0705, "step": 15702 }, { "epoch": 0.52, "grad_norm": 0.7511016726493835, "learning_rate": 9.54895502396408e-06, "loss": 2.0319, "step": 15703 }, { "epoch": 0.52, "grad_norm": 0.7437857985496521, "learning_rate": 9.547893215310498e-06, "loss": 2.1308, "step": 15704 }, { "epoch": 0.52, "grad_norm": 0.7489336729049683, "learning_rate": 9.546831411764552e-06, "loss": 2.1006, "step": 15705 }, { "epoch": 0.52, "grad_norm": 0.7377965450286865, "learning_rate": 9.545769613338242e-06, "loss": 2.032, "step": 15706 }, { "epoch": 0.52, "grad_norm": 0.7281315326690674, "learning_rate": 9.544707820043558e-06, "loss": 2.0589, "step": 15707 }, { "epoch": 0.52, "grad_norm": 0.7634711265563965, "learning_rate": 9.5436460318925e-06, "loss": 2.1049, "step": 15708 }, { "epoch": 0.52, "grad_norm": 0.7188063859939575, "learning_rate": 9.542584248897057e-06, "loss": 2.0291, "step": 15709 }, { "epoch": 0.52, "grad_norm": 0.7308862209320068, "learning_rate": 9.541522471069237e-06, "loss": 2.1017, "step": 15710 }, { "epoch": 0.52, "grad_norm": 0.7983438968658447, "learning_rate": 9.540460698421022e-06, "loss": 2.103, "step": 15711 }, { "epoch": 0.52, "grad_norm": 0.7809951901435852, "learning_rate": 9.539398930964412e-06, "loss": 2.0465, "step": 15712 }, { "epoch": 0.52, "grad_norm": 0.7638314962387085, "learning_rate": 9.538337168711405e-06, "loss": 2.1133, "step": 15713 }, { "epoch": 0.52, "grad_norm": 0.7468096017837524, "learning_rate": 9.537275411673992e-06, "loss": 2.1609, "step": 15714 }, { "epoch": 0.52, "grad_norm": 0.7424658536911011, "learning_rate": 9.536213659864176e-06, "loss": 2.0808, "step": 15715 }, { "epoch": 0.52, "grad_norm": 0.7466991543769836, "learning_rate": 9.535151913293942e-06, "loss": 2.0162, "step": 15716 }, { "epoch": 0.52, "grad_norm": 0.7339297533035278, "learning_rate": 9.534090171975286e-06, "loss": 2.0988, "step": 15717 }, { "epoch": 0.52, "grad_norm": 0.70721834897995, "learning_rate": 9.53302843592021e-06, "loss": 2.0169, "step": 15718 }, { "epoch": 0.52, "grad_norm": 0.7596811056137085, "learning_rate": 9.531966705140707e-06, "loss": 2.0618, "step": 15719 }, { "epoch": 0.52, "grad_norm": 0.7654244303703308, "learning_rate": 9.530904979648767e-06, "loss": 2.1149, "step": 15720 }, { "epoch": 0.52, "grad_norm": 0.7776036262512207, "learning_rate": 9.529843259456389e-06, "loss": 2.0832, "step": 15721 }, { "epoch": 0.52, "grad_norm": 0.7850261926651001, "learning_rate": 9.528781544575563e-06, "loss": 2.1057, "step": 15722 }, { "epoch": 0.52, "grad_norm": 0.7335439920425415, "learning_rate": 9.52771983501829e-06, "loss": 2.1113, "step": 15723 }, { "epoch": 0.52, "grad_norm": 0.7434214353561401, "learning_rate": 9.526658130796566e-06, "loss": 2.122, "step": 15724 }, { "epoch": 0.52, "grad_norm": 0.7806556820869446, "learning_rate": 9.525596431922372e-06, "loss": 2.1181, "step": 15725 }, { "epoch": 0.52, "grad_norm": 0.7381771206855774, "learning_rate": 9.524534738407716e-06, "loss": 2.091, "step": 15726 }, { "epoch": 0.52, "grad_norm": 0.7438307404518127, "learning_rate": 9.523473050264588e-06, "loss": 2.1019, "step": 15727 }, { "epoch": 0.52, "grad_norm": 0.7604383826255798, "learning_rate": 9.522411367504984e-06, "loss": 2.0668, "step": 15728 }, { "epoch": 0.52, "grad_norm": 0.7254573702812195, "learning_rate": 9.521349690140892e-06, "loss": 2.0621, "step": 15729 }, { "epoch": 0.52, "grad_norm": 0.7669029831886292, "learning_rate": 9.520288018184311e-06, "loss": 2.0862, "step": 15730 }, { "epoch": 0.52, "grad_norm": 0.7311447262763977, "learning_rate": 9.519226351647237e-06, "loss": 2.0101, "step": 15731 }, { "epoch": 0.52, "grad_norm": 0.8445455431938171, "learning_rate": 9.518164690541662e-06, "loss": 2.0962, "step": 15732 }, { "epoch": 0.52, "grad_norm": 0.724023699760437, "learning_rate": 9.51710303487958e-06, "loss": 2.093, "step": 15733 }, { "epoch": 0.52, "grad_norm": 0.7382174730300903, "learning_rate": 9.516041384672987e-06, "loss": 2.0085, "step": 15734 }, { "epoch": 0.52, "grad_norm": 0.734093964099884, "learning_rate": 9.51497973993387e-06, "loss": 1.9806, "step": 15735 }, { "epoch": 0.52, "grad_norm": 0.760665237903595, "learning_rate": 9.51391810067423e-06, "loss": 2.1102, "step": 15736 }, { "epoch": 0.52, "grad_norm": 0.7396479249000549, "learning_rate": 9.512856466906061e-06, "loss": 2.0624, "step": 15737 }, { "epoch": 0.52, "grad_norm": 0.7344732880592346, "learning_rate": 9.511794838641349e-06, "loss": 2.0368, "step": 15738 }, { "epoch": 0.52, "grad_norm": 0.7259513735771179, "learning_rate": 9.510733215892095e-06, "loss": 2.0857, "step": 15739 }, { "epoch": 0.52, "grad_norm": 0.7683669328689575, "learning_rate": 9.50967159867029e-06, "loss": 2.059, "step": 15740 }, { "epoch": 0.52, "grad_norm": 0.7500695586204529, "learning_rate": 9.50860998698793e-06, "loss": 2.1114, "step": 15741 }, { "epoch": 0.52, "grad_norm": 0.7788426280021667, "learning_rate": 9.507548380857006e-06, "loss": 2.1336, "step": 15742 }, { "epoch": 0.52, "grad_norm": 0.7230142951011658, "learning_rate": 9.50648678028951e-06, "loss": 2.0642, "step": 15743 }, { "epoch": 0.52, "grad_norm": 0.7244279980659485, "learning_rate": 9.505425185297436e-06, "loss": 2.1015, "step": 15744 }, { "epoch": 0.52, "grad_norm": 0.7430877089500427, "learning_rate": 9.50436359589278e-06, "loss": 2.0847, "step": 15745 }, { "epoch": 0.52, "grad_norm": 0.7482494711875916, "learning_rate": 9.503302012087536e-06, "loss": 2.0845, "step": 15746 }, { "epoch": 0.52, "grad_norm": 0.7594310641288757, "learning_rate": 9.502240433893692e-06, "loss": 2.0668, "step": 15747 }, { "epoch": 0.52, "grad_norm": 0.7254951596260071, "learning_rate": 9.501178861323245e-06, "loss": 2.0711, "step": 15748 }, { "epoch": 0.52, "grad_norm": 0.7483556270599365, "learning_rate": 9.500117294388185e-06, "loss": 2.1087, "step": 15749 }, { "epoch": 0.52, "grad_norm": 0.7409572005271912, "learning_rate": 9.499055733100511e-06, "loss": 2.0474, "step": 15750 }, { "epoch": 0.52, "grad_norm": 0.736400842666626, "learning_rate": 9.49799417747221e-06, "loss": 2.062, "step": 15751 }, { "epoch": 0.52, "grad_norm": 0.7645689249038696, "learning_rate": 9.496932627515273e-06, "loss": 2.0918, "step": 15752 }, { "epoch": 0.52, "grad_norm": 0.7306239604949951, "learning_rate": 9.4958710832417e-06, "loss": 1.9713, "step": 15753 }, { "epoch": 0.52, "grad_norm": 0.7504016160964966, "learning_rate": 9.494809544663477e-06, "loss": 2.1383, "step": 15754 }, { "epoch": 0.52, "grad_norm": 0.7311137318611145, "learning_rate": 9.493748011792604e-06, "loss": 2.0301, "step": 15755 }, { "epoch": 0.52, "grad_norm": 0.7563546895980835, "learning_rate": 9.492686484641068e-06, "loss": 2.0522, "step": 15756 }, { "epoch": 0.52, "grad_norm": 0.7652077674865723, "learning_rate": 9.49162496322086e-06, "loss": 2.0855, "step": 15757 }, { "epoch": 0.52, "grad_norm": 0.7568408250808716, "learning_rate": 9.490563447543975e-06, "loss": 2.0712, "step": 15758 }, { "epoch": 0.52, "grad_norm": 0.7727518081665039, "learning_rate": 9.489501937622412e-06, "loss": 2.12, "step": 15759 }, { "epoch": 0.52, "grad_norm": 0.7362053394317627, "learning_rate": 9.48844043346815e-06, "loss": 2.0788, "step": 15760 }, { "epoch": 0.52, "grad_norm": 0.742225170135498, "learning_rate": 9.48737893509319e-06, "loss": 2.0499, "step": 15761 }, { "epoch": 0.52, "grad_norm": 0.7362850904464722, "learning_rate": 9.486317442509523e-06, "loss": 2.1266, "step": 15762 }, { "epoch": 0.52, "grad_norm": 0.7402482032775879, "learning_rate": 9.48525595572914e-06, "loss": 2.1284, "step": 15763 }, { "epoch": 0.52, "grad_norm": 0.7439380884170532, "learning_rate": 9.484194474764037e-06, "loss": 2.0478, "step": 15764 }, { "epoch": 0.52, "grad_norm": 0.7478271722793579, "learning_rate": 9.483132999626199e-06, "loss": 2.0321, "step": 15765 }, { "epoch": 0.52, "grad_norm": 0.7143625617027283, "learning_rate": 9.48207153032762e-06, "loss": 2.0246, "step": 15766 }, { "epoch": 0.52, "grad_norm": 0.7555240988731384, "learning_rate": 9.481010066880295e-06, "loss": 2.0967, "step": 15767 }, { "epoch": 0.52, "grad_norm": 0.7884346842765808, "learning_rate": 9.479948609296217e-06, "loss": 2.1578, "step": 15768 }, { "epoch": 0.52, "grad_norm": 0.7755685448646545, "learning_rate": 9.478887157587372e-06, "loss": 2.1375, "step": 15769 }, { "epoch": 0.52, "grad_norm": 0.723800003528595, "learning_rate": 9.477825711765754e-06, "loss": 2.069, "step": 15770 }, { "epoch": 0.52, "grad_norm": 0.7671294808387756, "learning_rate": 9.476764271843356e-06, "loss": 2.0953, "step": 15771 }, { "epoch": 0.52, "grad_norm": 0.7486233115196228, "learning_rate": 9.47570283783217e-06, "loss": 2.0755, "step": 15772 }, { "epoch": 0.52, "grad_norm": 0.7416868209838867, "learning_rate": 9.474641409744185e-06, "loss": 2.0736, "step": 15773 }, { "epoch": 0.52, "grad_norm": 0.7744722962379456, "learning_rate": 9.473579987591391e-06, "loss": 2.0116, "step": 15774 }, { "epoch": 0.52, "grad_norm": 0.767002522945404, "learning_rate": 9.472518571385784e-06, "loss": 2.1284, "step": 15775 }, { "epoch": 0.52, "grad_norm": 0.7750516533851624, "learning_rate": 9.471457161139353e-06, "loss": 2.1571, "step": 15776 }, { "epoch": 0.52, "grad_norm": 0.7492559552192688, "learning_rate": 9.47039575686409e-06, "loss": 2.0626, "step": 15777 }, { "epoch": 0.52, "grad_norm": 0.7435364127159119, "learning_rate": 9.469334358571985e-06, "loss": 2.0488, "step": 15778 }, { "epoch": 0.52, "grad_norm": 0.7209482789039612, "learning_rate": 9.468272966275029e-06, "loss": 2.0208, "step": 15779 }, { "epoch": 0.53, "grad_norm": 0.7359265685081482, "learning_rate": 9.46721157998521e-06, "loss": 1.9891, "step": 15780 }, { "epoch": 0.53, "grad_norm": 0.735598087310791, "learning_rate": 9.46615019971453e-06, "loss": 2.0773, "step": 15781 }, { "epoch": 0.53, "grad_norm": 0.7395017743110657, "learning_rate": 9.465088825474967e-06, "loss": 2.0549, "step": 15782 }, { "epoch": 0.53, "grad_norm": 0.7272185683250427, "learning_rate": 9.464027457278516e-06, "loss": 2.0781, "step": 15783 }, { "epoch": 0.53, "grad_norm": 0.750967264175415, "learning_rate": 9.462966095137171e-06, "loss": 2.0924, "step": 15784 }, { "epoch": 0.53, "grad_norm": 0.7419408559799194, "learning_rate": 9.461904739062919e-06, "loss": 2.1077, "step": 15785 }, { "epoch": 0.53, "grad_norm": 0.7364767789840698, "learning_rate": 9.460843389067756e-06, "loss": 2.033, "step": 15786 }, { "epoch": 0.53, "grad_norm": 0.7381106615066528, "learning_rate": 9.459782045163664e-06, "loss": 2.0877, "step": 15787 }, { "epoch": 0.53, "grad_norm": 0.7438233494758606, "learning_rate": 9.458720707362636e-06, "loss": 2.1061, "step": 15788 }, { "epoch": 0.53, "grad_norm": 0.7786669731140137, "learning_rate": 9.457659375676668e-06, "loss": 2.0593, "step": 15789 }, { "epoch": 0.53, "grad_norm": 0.7632341384887695, "learning_rate": 9.456598050117747e-06, "loss": 2.132, "step": 15790 }, { "epoch": 0.53, "grad_norm": 0.7435451149940491, "learning_rate": 9.455536730697862e-06, "loss": 2.1111, "step": 15791 }, { "epoch": 0.53, "grad_norm": 0.7558166980743408, "learning_rate": 9.454475417429003e-06, "loss": 2.0168, "step": 15792 }, { "epoch": 0.53, "grad_norm": 0.7478861212730408, "learning_rate": 9.453414110323161e-06, "loss": 2.031, "step": 15793 }, { "epoch": 0.53, "grad_norm": 0.7423695921897888, "learning_rate": 9.45235280939233e-06, "loss": 2.0486, "step": 15794 }, { "epoch": 0.53, "grad_norm": 0.7125422358512878, "learning_rate": 9.45129151464849e-06, "loss": 2.0601, "step": 15795 }, { "epoch": 0.53, "grad_norm": 0.7318973541259766, "learning_rate": 9.450230226103638e-06, "loss": 2.0893, "step": 15796 }, { "epoch": 0.53, "grad_norm": 0.7244168519973755, "learning_rate": 9.449168943769765e-06, "loss": 2.0447, "step": 15797 }, { "epoch": 0.53, "grad_norm": 0.7325756549835205, "learning_rate": 9.448107667658856e-06, "loss": 2.0813, "step": 15798 }, { "epoch": 0.53, "grad_norm": 0.7265756130218506, "learning_rate": 9.447046397782907e-06, "loss": 2.0506, "step": 15799 }, { "epoch": 0.53, "grad_norm": 0.7130411267280579, "learning_rate": 9.445985134153901e-06, "loss": 2.0992, "step": 15800 }, { "epoch": 0.53, "grad_norm": 0.7140209674835205, "learning_rate": 9.44492387678383e-06, "loss": 2.065, "step": 15801 }, { "epoch": 0.53, "grad_norm": 0.7602997422218323, "learning_rate": 9.443862625684682e-06, "loss": 2.0608, "step": 15802 }, { "epoch": 0.53, "grad_norm": 0.7834575772285461, "learning_rate": 9.442801380868454e-06, "loss": 2.0289, "step": 15803 }, { "epoch": 0.53, "grad_norm": 0.7600589394569397, "learning_rate": 9.441740142347125e-06, "loss": 2.0771, "step": 15804 }, { "epoch": 0.53, "grad_norm": 0.729640543460846, "learning_rate": 9.44067891013269e-06, "loss": 2.1011, "step": 15805 }, { "epoch": 0.53, "grad_norm": 0.7252516746520996, "learning_rate": 9.439617684237135e-06, "loss": 2.0815, "step": 15806 }, { "epoch": 0.53, "grad_norm": 0.7101517915725708, "learning_rate": 9.43855646467245e-06, "loss": 2.0159, "step": 15807 }, { "epoch": 0.53, "grad_norm": 0.7258523106575012, "learning_rate": 9.437495251450633e-06, "loss": 2.0312, "step": 15808 }, { "epoch": 0.53, "grad_norm": 0.7389060258865356, "learning_rate": 9.436434044583658e-06, "loss": 2.1157, "step": 15809 }, { "epoch": 0.53, "grad_norm": 0.7136008143424988, "learning_rate": 9.43537284408352e-06, "loss": 2.0687, "step": 15810 }, { "epoch": 0.53, "grad_norm": 0.7355362176895142, "learning_rate": 9.434311649962211e-06, "loss": 2.0547, "step": 15811 }, { "epoch": 0.53, "grad_norm": 0.7053923010826111, "learning_rate": 9.43325046223172e-06, "loss": 2.0235, "step": 15812 }, { "epoch": 0.53, "grad_norm": 0.7544941306114197, "learning_rate": 9.43218928090403e-06, "loss": 2.1299, "step": 15813 }, { "epoch": 0.53, "grad_norm": 0.7230919599533081, "learning_rate": 9.431128105991134e-06, "loss": 2.0619, "step": 15814 }, { "epoch": 0.53, "grad_norm": 0.7529556751251221, "learning_rate": 9.430066937505018e-06, "loss": 2.1546, "step": 15815 }, { "epoch": 0.53, "grad_norm": 0.7836734056472778, "learning_rate": 9.429005775457672e-06, "loss": 2.0359, "step": 15816 }, { "epoch": 0.53, "grad_norm": 0.7046622633934021, "learning_rate": 9.427944619861088e-06, "loss": 2.0602, "step": 15817 }, { "epoch": 0.53, "grad_norm": 0.7204525470733643, "learning_rate": 9.426883470727245e-06, "loss": 2.0742, "step": 15818 }, { "epoch": 0.53, "grad_norm": 0.7767167091369629, "learning_rate": 9.425822328068138e-06, "loss": 2.0537, "step": 15819 }, { "epoch": 0.53, "grad_norm": 0.7305651307106018, "learning_rate": 9.424761191895755e-06, "loss": 2.1283, "step": 15820 }, { "epoch": 0.53, "grad_norm": 0.7382656931877136, "learning_rate": 9.423700062222084e-06, "loss": 2.1179, "step": 15821 }, { "epoch": 0.53, "grad_norm": 0.7248588800430298, "learning_rate": 9.42263893905911e-06, "loss": 2.0587, "step": 15822 }, { "epoch": 0.53, "grad_norm": 0.7596117854118347, "learning_rate": 9.421577822418822e-06, "loss": 2.037, "step": 15823 }, { "epoch": 0.53, "grad_norm": 0.7299769520759583, "learning_rate": 9.420516712313209e-06, "loss": 2.0938, "step": 15824 }, { "epoch": 0.53, "grad_norm": 0.7371147871017456, "learning_rate": 9.419455608754265e-06, "loss": 1.9524, "step": 15825 }, { "epoch": 0.53, "grad_norm": 0.7545364499092102, "learning_rate": 9.418394511753962e-06, "loss": 2.1051, "step": 15826 }, { "epoch": 0.53, "grad_norm": 0.7525614500045776, "learning_rate": 9.4173334213243e-06, "loss": 2.0947, "step": 15827 }, { "epoch": 0.53, "grad_norm": 0.7079751491546631, "learning_rate": 9.416272337477266e-06, "loss": 2.0313, "step": 15828 }, { "epoch": 0.53, "grad_norm": 0.7101505994796753, "learning_rate": 9.41521126022484e-06, "loss": 2.0589, "step": 15829 }, { "epoch": 0.53, "grad_norm": 0.698821485042572, "learning_rate": 9.414150189579022e-06, "loss": 2.0423, "step": 15830 }, { "epoch": 0.53, "grad_norm": 0.7328428030014038, "learning_rate": 9.413089125551785e-06, "loss": 2.0214, "step": 15831 }, { "epoch": 0.53, "grad_norm": 0.7246461510658264, "learning_rate": 9.412028068155126e-06, "loss": 2.1057, "step": 15832 }, { "epoch": 0.53, "grad_norm": 0.7135437726974487, "learning_rate": 9.41096701740103e-06, "loss": 2.1221, "step": 15833 }, { "epoch": 0.53, "grad_norm": 0.7545658349990845, "learning_rate": 9.409905973301483e-06, "loss": 2.0592, "step": 15834 }, { "epoch": 0.53, "grad_norm": 0.7284616231918335, "learning_rate": 9.408844935868473e-06, "loss": 2.1123, "step": 15835 }, { "epoch": 0.53, "grad_norm": 0.7177366614341736, "learning_rate": 9.407783905113985e-06, "loss": 2.0438, "step": 15836 }, { "epoch": 0.53, "grad_norm": 0.7558740377426147, "learning_rate": 9.406722881050005e-06, "loss": 2.0508, "step": 15837 }, { "epoch": 0.53, "grad_norm": 0.7324455380439758, "learning_rate": 9.405661863688526e-06, "loss": 2.0253, "step": 15838 }, { "epoch": 0.53, "grad_norm": 0.7695493102073669, "learning_rate": 9.404600853041531e-06, "loss": 2.1424, "step": 15839 }, { "epoch": 0.53, "grad_norm": 0.7359585165977478, "learning_rate": 9.403539849121006e-06, "loss": 2.0983, "step": 15840 }, { "epoch": 0.53, "grad_norm": 0.7323364019393921, "learning_rate": 9.402478851938938e-06, "loss": 2.0658, "step": 15841 }, { "epoch": 0.53, "grad_norm": 0.7378591299057007, "learning_rate": 9.401417861507313e-06, "loss": 2.082, "step": 15842 }, { "epoch": 0.53, "grad_norm": 0.7363995313644409, "learning_rate": 9.400356877838121e-06, "loss": 2.044, "step": 15843 }, { "epoch": 0.53, "grad_norm": 0.7286170125007629, "learning_rate": 9.399295900943343e-06, "loss": 2.0859, "step": 15844 }, { "epoch": 0.53, "grad_norm": 0.7152288556098938, "learning_rate": 9.398234930834966e-06, "loss": 2.0839, "step": 15845 }, { "epoch": 0.53, "grad_norm": 0.7402381300926208, "learning_rate": 9.397173967524981e-06, "loss": 2.1061, "step": 15846 }, { "epoch": 0.53, "grad_norm": 0.7478959560394287, "learning_rate": 9.396113011025369e-06, "loss": 2.045, "step": 15847 }, { "epoch": 0.53, "grad_norm": 0.7314088940620422, "learning_rate": 9.395052061348123e-06, "loss": 2.0655, "step": 15848 }, { "epoch": 0.53, "grad_norm": 0.7541019916534424, "learning_rate": 9.39399111850522e-06, "loss": 2.1193, "step": 15849 }, { "epoch": 0.53, "grad_norm": 0.7061830759048462, "learning_rate": 9.39293018250865e-06, "loss": 1.999, "step": 15850 }, { "epoch": 0.53, "grad_norm": 0.7455294132232666, "learning_rate": 9.3918692533704e-06, "loss": 2.1187, "step": 15851 }, { "epoch": 0.53, "grad_norm": 0.7494887113571167, "learning_rate": 9.390808331102459e-06, "loss": 2.0961, "step": 15852 }, { "epoch": 0.53, "grad_norm": 0.7483009099960327, "learning_rate": 9.389747415716802e-06, "loss": 2.0741, "step": 15853 }, { "epoch": 0.53, "grad_norm": 0.7065709829330444, "learning_rate": 9.388686507225423e-06, "loss": 2.1226, "step": 15854 }, { "epoch": 0.53, "grad_norm": 0.7431784272193909, "learning_rate": 9.387625605640305e-06, "loss": 2.1025, "step": 15855 }, { "epoch": 0.53, "grad_norm": 0.7415103316307068, "learning_rate": 9.386564710973438e-06, "loss": 2.0317, "step": 15856 }, { "epoch": 0.53, "grad_norm": 0.730280339717865, "learning_rate": 9.385503823236802e-06, "loss": 2.098, "step": 15857 }, { "epoch": 0.53, "grad_norm": 0.7357711791992188, "learning_rate": 9.38444294244238e-06, "loss": 2.0468, "step": 15858 }, { "epoch": 0.53, "grad_norm": 0.7279675602912903, "learning_rate": 9.383382068602162e-06, "loss": 2.0672, "step": 15859 }, { "epoch": 0.53, "grad_norm": 0.7332284450531006, "learning_rate": 9.382321201728132e-06, "loss": 1.9717, "step": 15860 }, { "epoch": 0.53, "grad_norm": 0.7530045509338379, "learning_rate": 9.381260341832279e-06, "loss": 2.043, "step": 15861 }, { "epoch": 0.53, "grad_norm": 0.7332090139389038, "learning_rate": 9.380199488926581e-06, "loss": 2.057, "step": 15862 }, { "epoch": 0.53, "grad_norm": 1.1976981163024902, "learning_rate": 9.379138643023026e-06, "loss": 2.1329, "step": 15863 }, { "epoch": 0.53, "grad_norm": 0.7334766387939453, "learning_rate": 9.378077804133598e-06, "loss": 2.0798, "step": 15864 }, { "epoch": 0.53, "grad_norm": 0.7234364151954651, "learning_rate": 9.377016972270286e-06, "loss": 1.9968, "step": 15865 }, { "epoch": 0.53, "grad_norm": 0.7135488986968994, "learning_rate": 9.37595614744507e-06, "loss": 2.0541, "step": 15866 }, { "epoch": 0.53, "grad_norm": 0.7311728596687317, "learning_rate": 9.374895329669931e-06, "loss": 2.0885, "step": 15867 }, { "epoch": 0.53, "grad_norm": 0.7306535243988037, "learning_rate": 9.373834518956861e-06, "loss": 2.0747, "step": 15868 }, { "epoch": 0.53, "grad_norm": 0.733503520488739, "learning_rate": 9.372773715317842e-06, "loss": 2.0493, "step": 15869 }, { "epoch": 0.53, "grad_norm": 0.7361902594566345, "learning_rate": 9.37171291876486e-06, "loss": 2.0461, "step": 15870 }, { "epoch": 0.53, "grad_norm": 0.7731547355651855, "learning_rate": 9.370652129309895e-06, "loss": 2.065, "step": 15871 }, { "epoch": 0.53, "grad_norm": 0.7021054625511169, "learning_rate": 9.369591346964934e-06, "loss": 2.0787, "step": 15872 }, { "epoch": 0.53, "grad_norm": 0.7471688389778137, "learning_rate": 9.368530571741958e-06, "loss": 2.0916, "step": 15873 }, { "epoch": 0.53, "grad_norm": 0.7331233024597168, "learning_rate": 9.36746980365296e-06, "loss": 2.0663, "step": 15874 }, { "epoch": 0.53, "grad_norm": 0.7613646388053894, "learning_rate": 9.36640904270991e-06, "loss": 2.0638, "step": 15875 }, { "epoch": 0.53, "grad_norm": 0.7453413605690002, "learning_rate": 9.365348288924803e-06, "loss": 2.0288, "step": 15876 }, { "epoch": 0.53, "grad_norm": 0.7449492812156677, "learning_rate": 9.36428754230962e-06, "loss": 1.9543, "step": 15877 }, { "epoch": 0.53, "grad_norm": 0.725737988948822, "learning_rate": 9.36322680287634e-06, "loss": 1.9944, "step": 15878 }, { "epoch": 0.53, "grad_norm": 0.7219983339309692, "learning_rate": 9.362166070636956e-06, "loss": 2.1017, "step": 15879 }, { "epoch": 0.53, "grad_norm": 0.7992630004882812, "learning_rate": 9.361105345603443e-06, "loss": 2.0734, "step": 15880 }, { "epoch": 0.53, "grad_norm": 0.7272497415542603, "learning_rate": 9.360044627787785e-06, "loss": 2.0692, "step": 15881 }, { "epoch": 0.53, "grad_norm": 0.7185360193252563, "learning_rate": 9.35898391720197e-06, "loss": 2.0602, "step": 15882 }, { "epoch": 0.53, "grad_norm": 0.71562659740448, "learning_rate": 9.357923213857981e-06, "loss": 2.1034, "step": 15883 }, { "epoch": 0.53, "grad_norm": 0.7642841935157776, "learning_rate": 9.356862517767796e-06, "loss": 2.0837, "step": 15884 }, { "epoch": 0.53, "grad_norm": 0.7575034499168396, "learning_rate": 9.355801828943402e-06, "loss": 2.0419, "step": 15885 }, { "epoch": 0.53, "grad_norm": 0.736322283744812, "learning_rate": 9.354741147396781e-06, "loss": 2.0731, "step": 15886 }, { "epoch": 0.53, "grad_norm": 0.7693501114845276, "learning_rate": 9.35368047313992e-06, "loss": 2.0783, "step": 15887 }, { "epoch": 0.53, "grad_norm": 0.8010561466217041, "learning_rate": 9.352619806184795e-06, "loss": 2.0822, "step": 15888 }, { "epoch": 0.53, "grad_norm": 0.7613589763641357, "learning_rate": 9.351559146543391e-06, "loss": 2.0257, "step": 15889 }, { "epoch": 0.53, "grad_norm": 0.7548536658287048, "learning_rate": 9.350498494227693e-06, "loss": 2.0373, "step": 15890 }, { "epoch": 0.53, "grad_norm": 0.738605260848999, "learning_rate": 9.349437849249683e-06, "loss": 2.0035, "step": 15891 }, { "epoch": 0.53, "grad_norm": 0.7517900466918945, "learning_rate": 9.348377211621345e-06, "loss": 2.0684, "step": 15892 }, { "epoch": 0.53, "grad_norm": 0.7380388379096985, "learning_rate": 9.347316581354655e-06, "loss": 2.0322, "step": 15893 }, { "epoch": 0.53, "grad_norm": 0.7132270932197571, "learning_rate": 9.346255958461603e-06, "loss": 2.0554, "step": 15894 }, { "epoch": 0.53, "grad_norm": 0.7483150959014893, "learning_rate": 9.345195342954163e-06, "loss": 2.0034, "step": 15895 }, { "epoch": 0.53, "grad_norm": 0.7544447183609009, "learning_rate": 9.344134734844332e-06, "loss": 2.0262, "step": 15896 }, { "epoch": 0.53, "grad_norm": 0.7355642318725586, "learning_rate": 9.343074134144072e-06, "loss": 2.0759, "step": 15897 }, { "epoch": 0.53, "grad_norm": 0.7268276214599609, "learning_rate": 9.34201354086538e-06, "loss": 2.1108, "step": 15898 }, { "epoch": 0.53, "grad_norm": 0.7692199945449829, "learning_rate": 9.340952955020234e-06, "loss": 2.1444, "step": 15899 }, { "epoch": 0.53, "grad_norm": 0.7469164133071899, "learning_rate": 9.339892376620611e-06, "loss": 2.0963, "step": 15900 }, { "epoch": 0.53, "grad_norm": 0.7846393585205078, "learning_rate": 9.338831805678506e-06, "loss": 2.0965, "step": 15901 }, { "epoch": 0.53, "grad_norm": 0.7346498966217041, "learning_rate": 9.337771242205886e-06, "loss": 2.1197, "step": 15902 }, { "epoch": 0.53, "grad_norm": 0.7151606678962708, "learning_rate": 9.336710686214735e-06, "loss": 2.0631, "step": 15903 }, { "epoch": 0.53, "grad_norm": 0.7369479537010193, "learning_rate": 9.335650137717043e-06, "loss": 2.0799, "step": 15904 }, { "epoch": 0.53, "grad_norm": 0.7005956768989563, "learning_rate": 9.334589596724786e-06, "loss": 2.0627, "step": 15905 }, { "epoch": 0.53, "grad_norm": 0.7518231272697449, "learning_rate": 9.333529063249945e-06, "loss": 2.0413, "step": 15906 }, { "epoch": 0.53, "grad_norm": 0.7471197247505188, "learning_rate": 9.332468537304501e-06, "loss": 2.1366, "step": 15907 }, { "epoch": 0.53, "grad_norm": 0.7283148169517517, "learning_rate": 9.331408018900436e-06, "loss": 2.0184, "step": 15908 }, { "epoch": 0.53, "grad_norm": 0.7611947655677795, "learning_rate": 9.330347508049731e-06, "loss": 2.122, "step": 15909 }, { "epoch": 0.53, "grad_norm": 0.7322114109992981, "learning_rate": 9.329287004764374e-06, "loss": 2.1117, "step": 15910 }, { "epoch": 0.53, "grad_norm": 0.7239534258842468, "learning_rate": 9.328226509056331e-06, "loss": 2.0801, "step": 15911 }, { "epoch": 0.53, "grad_norm": 0.7653972506523132, "learning_rate": 9.327166020937595e-06, "loss": 2.026, "step": 15912 }, { "epoch": 0.53, "grad_norm": 0.7509065270423889, "learning_rate": 9.326105540420143e-06, "loss": 2.0544, "step": 15913 }, { "epoch": 0.53, "grad_norm": 0.76719731092453, "learning_rate": 9.325045067515957e-06, "loss": 2.0404, "step": 15914 }, { "epoch": 0.53, "grad_norm": 0.7251100540161133, "learning_rate": 9.323984602237014e-06, "loss": 2.0965, "step": 15915 }, { "epoch": 0.53, "grad_norm": 0.7555171847343445, "learning_rate": 9.322924144595294e-06, "loss": 2.0828, "step": 15916 }, { "epoch": 0.53, "grad_norm": 0.7464106678962708, "learning_rate": 9.321863694602784e-06, "loss": 2.038, "step": 15917 }, { "epoch": 0.53, "grad_norm": 0.7507331967353821, "learning_rate": 9.320803252271464e-06, "loss": 2.0619, "step": 15918 }, { "epoch": 0.53, "grad_norm": 0.7521811127662659, "learning_rate": 9.319742817613304e-06, "loss": 2.0277, "step": 15919 }, { "epoch": 0.53, "grad_norm": 0.7191751599311829, "learning_rate": 9.318682390640293e-06, "loss": 2.0395, "step": 15920 }, { "epoch": 0.53, "grad_norm": 0.7472249865531921, "learning_rate": 9.31762197136441e-06, "loss": 2.0462, "step": 15921 }, { "epoch": 0.53, "grad_norm": 0.748084306716919, "learning_rate": 9.316561559797632e-06, "loss": 2.0716, "step": 15922 }, { "epoch": 0.53, "grad_norm": 0.7248259782791138, "learning_rate": 9.315501155951947e-06, "loss": 2.0381, "step": 15923 }, { "epoch": 0.53, "grad_norm": 0.7302417755126953, "learning_rate": 9.314440759839321e-06, "loss": 2.0538, "step": 15924 }, { "epoch": 0.53, "grad_norm": 0.7608217597007751, "learning_rate": 9.313380371471747e-06, "loss": 2.0259, "step": 15925 }, { "epoch": 0.53, "grad_norm": 0.7580153942108154, "learning_rate": 9.312319990861196e-06, "loss": 2.1632, "step": 15926 }, { "epoch": 0.53, "grad_norm": 0.731648325920105, "learning_rate": 9.311259618019653e-06, "loss": 2.1136, "step": 15927 }, { "epoch": 0.53, "grad_norm": 0.7302659749984741, "learning_rate": 9.310199252959095e-06, "loss": 1.9977, "step": 15928 }, { "epoch": 0.53, "grad_norm": 0.7995164394378662, "learning_rate": 9.309138895691501e-06, "loss": 2.1677, "step": 15929 }, { "epoch": 0.53, "grad_norm": 0.7981265187263489, "learning_rate": 9.308078546228849e-06, "loss": 2.0395, "step": 15930 }, { "epoch": 0.53, "grad_norm": 0.7587030529975891, "learning_rate": 9.30701820458312e-06, "loss": 2.0385, "step": 15931 }, { "epoch": 0.53, "grad_norm": 0.7124722003936768, "learning_rate": 9.305957870766297e-06, "loss": 2.0262, "step": 15932 }, { "epoch": 0.53, "grad_norm": 0.7316484451293945, "learning_rate": 9.304897544790353e-06, "loss": 2.1416, "step": 15933 }, { "epoch": 0.53, "grad_norm": 0.7431384325027466, "learning_rate": 9.303837226667269e-06, "loss": 2.1107, "step": 15934 }, { "epoch": 0.53, "grad_norm": 0.7530476450920105, "learning_rate": 9.302776916409024e-06, "loss": 2.1362, "step": 15935 }, { "epoch": 0.53, "grad_norm": 0.7857641577720642, "learning_rate": 9.301716614027599e-06, "loss": 2.1101, "step": 15936 }, { "epoch": 0.53, "grad_norm": 0.7001701593399048, "learning_rate": 9.300656319534968e-06, "loss": 2.0378, "step": 15937 }, { "epoch": 0.53, "grad_norm": 0.7270858883857727, "learning_rate": 9.299596032943111e-06, "loss": 2.102, "step": 15938 }, { "epoch": 0.53, "grad_norm": 0.7247528433799744, "learning_rate": 9.298535754264008e-06, "loss": 2.0579, "step": 15939 }, { "epoch": 0.53, "grad_norm": 0.7364323735237122, "learning_rate": 9.29747548350964e-06, "loss": 2.0386, "step": 15940 }, { "epoch": 0.53, "grad_norm": 0.7209014892578125, "learning_rate": 9.29641522069198e-06, "loss": 2.0716, "step": 15941 }, { "epoch": 0.53, "grad_norm": 0.7620095014572144, "learning_rate": 9.29535496582301e-06, "loss": 2.0582, "step": 15942 }, { "epoch": 0.53, "grad_norm": 0.7178986668586731, "learning_rate": 9.294294718914705e-06, "loss": 2.0447, "step": 15943 }, { "epoch": 0.53, "grad_norm": 0.7734183669090271, "learning_rate": 9.293234479979044e-06, "loss": 2.0757, "step": 15944 }, { "epoch": 0.53, "grad_norm": 0.7778571248054504, "learning_rate": 9.29217424902801e-06, "loss": 2.098, "step": 15945 }, { "epoch": 0.53, "grad_norm": 0.7628556489944458, "learning_rate": 9.291114026073571e-06, "loss": 2.0508, "step": 15946 }, { "epoch": 0.53, "grad_norm": 0.7472501993179321, "learning_rate": 9.290053811127713e-06, "loss": 2.0123, "step": 15947 }, { "epoch": 0.53, "grad_norm": 0.7359395623207092, "learning_rate": 9.288993604202409e-06, "loss": 2.0957, "step": 15948 }, { "epoch": 0.53, "grad_norm": 0.7122228741645813, "learning_rate": 9.287933405309643e-06, "loss": 2.0294, "step": 15949 }, { "epoch": 0.53, "grad_norm": 0.7597297430038452, "learning_rate": 9.286873214461385e-06, "loss": 2.0164, "step": 15950 }, { "epoch": 0.53, "grad_norm": 0.7028793692588806, "learning_rate": 9.285813031669616e-06, "loss": 2.0631, "step": 15951 }, { "epoch": 0.53, "grad_norm": 0.7687334418296814, "learning_rate": 9.284752856946309e-06, "loss": 2.1056, "step": 15952 }, { "epoch": 0.53, "grad_norm": 0.735640287399292, "learning_rate": 9.283692690303448e-06, "loss": 2.1401, "step": 15953 }, { "epoch": 0.53, "grad_norm": 0.7564795017242432, "learning_rate": 9.28263253175301e-06, "loss": 2.1223, "step": 15954 }, { "epoch": 0.53, "grad_norm": 0.7364903092384338, "learning_rate": 9.281572381306968e-06, "loss": 2.0563, "step": 15955 }, { "epoch": 0.53, "grad_norm": 0.7497307062149048, "learning_rate": 9.280512238977298e-06, "loss": 2.0999, "step": 15956 }, { "epoch": 0.53, "grad_norm": 0.7197167277336121, "learning_rate": 9.279452104775982e-06, "loss": 2.0195, "step": 15957 }, { "epoch": 0.53, "grad_norm": 0.7526372671127319, "learning_rate": 9.278391978714993e-06, "loss": 2.1083, "step": 15958 }, { "epoch": 0.53, "grad_norm": 0.7457206845283508, "learning_rate": 9.277331860806306e-06, "loss": 2.0435, "step": 15959 }, { "epoch": 0.53, "grad_norm": 0.7184691429138184, "learning_rate": 9.2762717510619e-06, "loss": 2.0582, "step": 15960 }, { "epoch": 0.53, "grad_norm": 0.7513689994812012, "learning_rate": 9.275211649493753e-06, "loss": 2.0727, "step": 15961 }, { "epoch": 0.53, "grad_norm": 0.7421584129333496, "learning_rate": 9.274151556113841e-06, "loss": 2.0529, "step": 15962 }, { "epoch": 0.53, "grad_norm": 0.7788191437721252, "learning_rate": 9.27309147093414e-06, "loss": 2.0449, "step": 15963 }, { "epoch": 0.53, "grad_norm": 0.7572394013404846, "learning_rate": 9.272031393966625e-06, "loss": 2.0837, "step": 15964 }, { "epoch": 0.53, "grad_norm": 0.7335614562034607, "learning_rate": 9.270971325223271e-06, "loss": 2.0115, "step": 15965 }, { "epoch": 0.53, "grad_norm": 0.7741708159446716, "learning_rate": 9.269911264716056e-06, "loss": 2.0438, "step": 15966 }, { "epoch": 0.53, "grad_norm": 0.735787034034729, "learning_rate": 9.268851212456959e-06, "loss": 2.0625, "step": 15967 }, { "epoch": 0.53, "grad_norm": 0.7941128611564636, "learning_rate": 9.267791168457946e-06, "loss": 2.0165, "step": 15968 }, { "epoch": 0.53, "grad_norm": 0.7362989187240601, "learning_rate": 9.266731132731003e-06, "loss": 2.0431, "step": 15969 }, { "epoch": 0.53, "grad_norm": 0.7495849132537842, "learning_rate": 9.265671105288104e-06, "loss": 2.006, "step": 15970 }, { "epoch": 0.53, "grad_norm": 0.7700591087341309, "learning_rate": 9.264611086141222e-06, "loss": 2.0796, "step": 15971 }, { "epoch": 0.53, "grad_norm": 0.7365755438804626, "learning_rate": 9.26355107530233e-06, "loss": 2.0562, "step": 15972 }, { "epoch": 0.53, "grad_norm": 0.7625305652618408, "learning_rate": 9.262491072783409e-06, "loss": 2.017, "step": 15973 }, { "epoch": 0.53, "grad_norm": 0.7295148968696594, "learning_rate": 9.261431078596428e-06, "loss": 2.0731, "step": 15974 }, { "epoch": 0.53, "grad_norm": 0.7187147736549377, "learning_rate": 9.260371092753368e-06, "loss": 2.0581, "step": 15975 }, { "epoch": 0.53, "grad_norm": 0.7318670749664307, "learning_rate": 9.259311115266205e-06, "loss": 2.0573, "step": 15976 }, { "epoch": 0.53, "grad_norm": 0.7463316321372986, "learning_rate": 9.258251146146907e-06, "loss": 2.1557, "step": 15977 }, { "epoch": 0.53, "grad_norm": 0.7334805130958557, "learning_rate": 9.257191185407454e-06, "loss": 2.0499, "step": 15978 }, { "epoch": 0.53, "grad_norm": 0.7716814875602722, "learning_rate": 9.256131233059819e-06, "loss": 2.0639, "step": 15979 }, { "epoch": 0.53, "grad_norm": 0.7400861978530884, "learning_rate": 9.25507128911598e-06, "loss": 2.1087, "step": 15980 }, { "epoch": 0.53, "grad_norm": 0.7209727764129639, "learning_rate": 9.254011353587907e-06, "loss": 2.1423, "step": 15981 }, { "epoch": 0.53, "grad_norm": 0.7437472939491272, "learning_rate": 9.252951426487574e-06, "loss": 2.0571, "step": 15982 }, { "epoch": 0.53, "grad_norm": 0.7227652668952942, "learning_rate": 9.25189150782696e-06, "loss": 2.0668, "step": 15983 }, { "epoch": 0.53, "grad_norm": 0.770972728729248, "learning_rate": 9.250831597618037e-06, "loss": 2.0623, "step": 15984 }, { "epoch": 0.53, "grad_norm": 0.7300300002098083, "learning_rate": 9.249771695872784e-06, "loss": 2.1043, "step": 15985 }, { "epoch": 0.53, "grad_norm": 0.732370913028717, "learning_rate": 9.248711802603166e-06, "loss": 2.1387, "step": 15986 }, { "epoch": 0.53, "grad_norm": 0.7470964789390564, "learning_rate": 9.247651917821163e-06, "loss": 2.0844, "step": 15987 }, { "epoch": 0.53, "grad_norm": 0.746906042098999, "learning_rate": 9.246592041538745e-06, "loss": 2.0946, "step": 15988 }, { "epoch": 0.53, "grad_norm": 0.7387294769287109, "learning_rate": 9.245532173767895e-06, "loss": 2.066, "step": 15989 }, { "epoch": 0.53, "grad_norm": 0.7141947746276855, "learning_rate": 9.244472314520573e-06, "loss": 2.0267, "step": 15990 }, { "epoch": 0.53, "grad_norm": 0.7304352521896362, "learning_rate": 9.243412463808765e-06, "loss": 1.9975, "step": 15991 }, { "epoch": 0.53, "grad_norm": 0.7589696049690247, "learning_rate": 9.242352621644438e-06, "loss": 2.0633, "step": 15992 }, { "epoch": 0.53, "grad_norm": 0.8823273777961731, "learning_rate": 9.241292788039566e-06, "loss": 2.0972, "step": 15993 }, { "epoch": 0.53, "grad_norm": 0.7484263777732849, "learning_rate": 9.240232963006128e-06, "loss": 2.0597, "step": 15994 }, { "epoch": 0.53, "grad_norm": 0.7398248910903931, "learning_rate": 9.23917314655609e-06, "loss": 2.0696, "step": 15995 }, { "epoch": 0.53, "grad_norm": 0.7671696543693542, "learning_rate": 9.238113338701422e-06, "loss": 2.1627, "step": 15996 }, { "epoch": 0.53, "grad_norm": 0.7640106678009033, "learning_rate": 9.237053539454108e-06, "loss": 2.0688, "step": 15997 }, { "epoch": 0.53, "grad_norm": 0.7214716672897339, "learning_rate": 9.235993748826118e-06, "loss": 1.9921, "step": 15998 }, { "epoch": 0.53, "grad_norm": 0.7194445133209229, "learning_rate": 9.23493396682942e-06, "loss": 1.9789, "step": 15999 }, { "epoch": 0.53, "grad_norm": 0.732318103313446, "learning_rate": 9.233874193475992e-06, "loss": 1.9512, "step": 16000 }, { "epoch": 0.53, "grad_norm": 0.7848606109619141, "learning_rate": 9.232814428777803e-06, "loss": 2.1032, "step": 16001 }, { "epoch": 0.53, "grad_norm": 0.7297263145446777, "learning_rate": 9.231754672746829e-06, "loss": 2.0383, "step": 16002 }, { "epoch": 0.53, "grad_norm": 0.7562658190727234, "learning_rate": 9.230694925395038e-06, "loss": 2.0156, "step": 16003 }, { "epoch": 0.53, "grad_norm": 0.7618056535720825, "learning_rate": 9.229635186734403e-06, "loss": 2.0539, "step": 16004 }, { "epoch": 0.53, "grad_norm": 0.7276254296302795, "learning_rate": 9.228575456776901e-06, "loss": 2.0325, "step": 16005 }, { "epoch": 0.53, "grad_norm": 0.752739429473877, "learning_rate": 9.2275157355345e-06, "loss": 2.0664, "step": 16006 }, { "epoch": 0.53, "grad_norm": 0.7278198599815369, "learning_rate": 9.226456023019178e-06, "loss": 2.0488, "step": 16007 }, { "epoch": 0.53, "grad_norm": 0.7573243379592896, "learning_rate": 9.225396319242898e-06, "loss": 2.0771, "step": 16008 }, { "epoch": 0.53, "grad_norm": 0.7025761604309082, "learning_rate": 9.224336624217635e-06, "loss": 2.0635, "step": 16009 }, { "epoch": 0.53, "grad_norm": 0.7310386300086975, "learning_rate": 9.223276937955364e-06, "loss": 2.0162, "step": 16010 }, { "epoch": 0.53, "grad_norm": 0.7539940476417542, "learning_rate": 9.22221726046806e-06, "loss": 2.1346, "step": 16011 }, { "epoch": 0.53, "grad_norm": 0.7575652599334717, "learning_rate": 9.221157591767683e-06, "loss": 2.1174, "step": 16012 }, { "epoch": 0.53, "grad_norm": 0.7140714526176453, "learning_rate": 9.220097931866213e-06, "loss": 2.0418, "step": 16013 }, { "epoch": 0.53, "grad_norm": 0.7271680235862732, "learning_rate": 9.21903828077562e-06, "loss": 2.0855, "step": 16014 }, { "epoch": 0.53, "grad_norm": 0.7224127054214478, "learning_rate": 9.217978638507871e-06, "loss": 2.0037, "step": 16015 }, { "epoch": 0.53, "grad_norm": 0.7882169485092163, "learning_rate": 9.216919005074949e-06, "loss": 2.0628, "step": 16016 }, { "epoch": 0.53, "grad_norm": 0.750856339931488, "learning_rate": 9.21585938048881e-06, "loss": 2.1081, "step": 16017 }, { "epoch": 0.53, "grad_norm": 0.7655377984046936, "learning_rate": 9.214799764761436e-06, "loss": 2.0696, "step": 16018 }, { "epoch": 0.53, "grad_norm": 0.7266107201576233, "learning_rate": 9.21374015790479e-06, "loss": 2.0761, "step": 16019 }, { "epoch": 0.53, "grad_norm": 0.7755805253982544, "learning_rate": 9.212680559930853e-06, "loss": 2.0336, "step": 16020 }, { "epoch": 0.53, "grad_norm": 0.7473196983337402, "learning_rate": 9.211620970851586e-06, "loss": 2.052, "step": 16021 }, { "epoch": 0.53, "grad_norm": 0.7513112425804138, "learning_rate": 9.210561390678964e-06, "loss": 2.1181, "step": 16022 }, { "epoch": 0.53, "grad_norm": 0.7416778802871704, "learning_rate": 9.209501819424953e-06, "loss": 2.0195, "step": 16023 }, { "epoch": 0.53, "grad_norm": 0.7785535454750061, "learning_rate": 9.208442257101531e-06, "loss": 2.0946, "step": 16024 }, { "epoch": 0.53, "grad_norm": 0.7548454403877258, "learning_rate": 9.207382703720666e-06, "loss": 2.0952, "step": 16025 }, { "epoch": 0.53, "grad_norm": 0.7580496668815613, "learning_rate": 9.206323159294325e-06, "loss": 2.0763, "step": 16026 }, { "epoch": 0.53, "grad_norm": 0.7459222078323364, "learning_rate": 9.205263623834479e-06, "loss": 2.065, "step": 16027 }, { "epoch": 0.53, "grad_norm": 0.7287431359291077, "learning_rate": 9.2042040973531e-06, "loss": 1.9973, "step": 16028 }, { "epoch": 0.53, "grad_norm": 0.7263702750205994, "learning_rate": 9.20314457986216e-06, "loss": 2.0102, "step": 16029 }, { "epoch": 0.53, "grad_norm": 0.7440166473388672, "learning_rate": 9.20208507137362e-06, "loss": 2.0551, "step": 16030 }, { "epoch": 0.53, "grad_norm": 0.720483660697937, "learning_rate": 9.201025571899455e-06, "loss": 1.9949, "step": 16031 }, { "epoch": 0.53, "grad_norm": 0.7443420886993408, "learning_rate": 9.199966081451636e-06, "loss": 2.1093, "step": 16032 }, { "epoch": 0.53, "grad_norm": 0.772274374961853, "learning_rate": 9.198906600042135e-06, "loss": 2.1204, "step": 16033 }, { "epoch": 0.53, "grad_norm": 0.7459427714347839, "learning_rate": 9.197847127682914e-06, "loss": 2.0704, "step": 16034 }, { "epoch": 0.53, "grad_norm": 0.752036988735199, "learning_rate": 9.196787664385946e-06, "loss": 2.0165, "step": 16035 }, { "epoch": 0.53, "grad_norm": 0.7689023613929749, "learning_rate": 9.195728210163202e-06, "loss": 2.036, "step": 16036 }, { "epoch": 0.53, "grad_norm": 0.7844099402427673, "learning_rate": 9.194668765026645e-06, "loss": 2.0233, "step": 16037 }, { "epoch": 0.53, "grad_norm": 0.7220128774642944, "learning_rate": 9.193609328988257e-06, "loss": 1.9683, "step": 16038 }, { "epoch": 0.53, "grad_norm": 0.732947826385498, "learning_rate": 9.19254990205999e-06, "loss": 2.016, "step": 16039 }, { "epoch": 0.53, "grad_norm": 0.7263383269309998, "learning_rate": 9.191490484253824e-06, "loss": 2.0834, "step": 16040 }, { "epoch": 0.53, "grad_norm": 0.7345051169395447, "learning_rate": 9.190431075581725e-06, "loss": 2.0499, "step": 16041 }, { "epoch": 0.53, "grad_norm": 0.771159827709198, "learning_rate": 9.189371676055663e-06, "loss": 2.001, "step": 16042 }, { "epoch": 0.53, "grad_norm": 0.7602362632751465, "learning_rate": 9.188312285687602e-06, "loss": 2.0196, "step": 16043 }, { "epoch": 0.53, "grad_norm": 0.7179849743843079, "learning_rate": 9.187252904489513e-06, "loss": 2.0573, "step": 16044 }, { "epoch": 0.53, "grad_norm": 0.7222908735275269, "learning_rate": 9.186193532473364e-06, "loss": 2.0258, "step": 16045 }, { "epoch": 0.53, "grad_norm": 0.7636646628379822, "learning_rate": 9.185134169651124e-06, "loss": 2.1187, "step": 16046 }, { "epoch": 0.53, "grad_norm": 0.7379689812660217, "learning_rate": 9.184074816034763e-06, "loss": 2.0459, "step": 16047 }, { "epoch": 0.53, "grad_norm": 0.7704395055770874, "learning_rate": 9.183015471636244e-06, "loss": 2.0527, "step": 16048 }, { "epoch": 0.53, "grad_norm": 0.7609572410583496, "learning_rate": 9.181956136467537e-06, "loss": 2.1446, "step": 16049 }, { "epoch": 0.53, "grad_norm": 0.7350201606750488, "learning_rate": 9.180896810540613e-06, "loss": 2.071, "step": 16050 }, { "epoch": 0.53, "grad_norm": 0.6975039839744568, "learning_rate": 9.179837493867438e-06, "loss": 2.116, "step": 16051 }, { "epoch": 0.53, "grad_norm": 0.7201418280601501, "learning_rate": 9.178778186459974e-06, "loss": 2.0475, "step": 16052 }, { "epoch": 0.53, "grad_norm": 0.7411181330680847, "learning_rate": 9.177718888330192e-06, "loss": 2.0896, "step": 16053 }, { "epoch": 0.53, "grad_norm": 0.7326714992523193, "learning_rate": 9.176659599490061e-06, "loss": 2.0152, "step": 16054 }, { "epoch": 0.53, "grad_norm": 0.7135183215141296, "learning_rate": 9.17560031995155e-06, "loss": 2.0945, "step": 16055 }, { "epoch": 0.53, "grad_norm": 0.7686595916748047, "learning_rate": 9.174541049726625e-06, "loss": 2.1613, "step": 16056 }, { "epoch": 0.53, "grad_norm": 0.7410893440246582, "learning_rate": 9.173481788827248e-06, "loss": 2.0826, "step": 16057 }, { "epoch": 0.53, "grad_norm": 0.7164680361747742, "learning_rate": 9.17242253726539e-06, "loss": 2.0463, "step": 16058 }, { "epoch": 0.53, "grad_norm": 0.7621414065361023, "learning_rate": 9.171363295053013e-06, "loss": 2.0786, "step": 16059 }, { "epoch": 0.53, "grad_norm": 0.709117591381073, "learning_rate": 9.170304062202097e-06, "loss": 2.0697, "step": 16060 }, { "epoch": 0.53, "grad_norm": 0.7376083731651306, "learning_rate": 9.169244838724591e-06, "loss": 2.0554, "step": 16061 }, { "epoch": 0.53, "grad_norm": 0.7514504194259644, "learning_rate": 9.168185624632474e-06, "loss": 2.0679, "step": 16062 }, { "epoch": 0.53, "grad_norm": 0.7439470291137695, "learning_rate": 9.167126419937707e-06, "loss": 2.0757, "step": 16063 }, { "epoch": 0.53, "grad_norm": 0.7106485962867737, "learning_rate": 9.16606722465226e-06, "loss": 2.099, "step": 16064 }, { "epoch": 0.53, "grad_norm": 0.7550050020217896, "learning_rate": 9.165008038788096e-06, "loss": 2.0571, "step": 16065 }, { "epoch": 0.53, "grad_norm": 0.7180540561676025, "learning_rate": 9.163948862357181e-06, "loss": 2.0725, "step": 16066 }, { "epoch": 0.53, "grad_norm": 0.7606554627418518, "learning_rate": 9.162889695371478e-06, "loss": 2.0486, "step": 16067 }, { "epoch": 0.53, "grad_norm": 0.7469570636749268, "learning_rate": 9.16183053784296e-06, "loss": 2.1457, "step": 16068 }, { "epoch": 0.53, "grad_norm": 0.7186725735664368, "learning_rate": 9.160771389783591e-06, "loss": 2.0632, "step": 16069 }, { "epoch": 0.53, "grad_norm": 0.7411295771598816, "learning_rate": 9.159712251205335e-06, "loss": 2.0708, "step": 16070 }, { "epoch": 0.53, "grad_norm": 0.7392745018005371, "learning_rate": 9.158653122120156e-06, "loss": 2.0583, "step": 16071 }, { "epoch": 0.53, "grad_norm": 0.7609171271324158, "learning_rate": 9.15759400254002e-06, "loss": 2.0359, "step": 16072 }, { "epoch": 0.53, "grad_norm": 0.7403454780578613, "learning_rate": 9.156534892476898e-06, "loss": 2.026, "step": 16073 }, { "epoch": 0.53, "grad_norm": 0.7484779953956604, "learning_rate": 9.155475791942745e-06, "loss": 2.0465, "step": 16074 }, { "epoch": 0.53, "grad_norm": 0.7578922510147095, "learning_rate": 9.15441670094953e-06, "loss": 2.1687, "step": 16075 }, { "epoch": 0.53, "grad_norm": 0.7346295118331909, "learning_rate": 9.153357619509225e-06, "loss": 2.1262, "step": 16076 }, { "epoch": 0.53, "grad_norm": 0.7380934953689575, "learning_rate": 9.152298547633787e-06, "loss": 2.1445, "step": 16077 }, { "epoch": 0.53, "grad_norm": 0.7431432008743286, "learning_rate": 9.151239485335184e-06, "loss": 2.1418, "step": 16078 }, { "epoch": 0.53, "grad_norm": 0.7498764991760254, "learning_rate": 9.150180432625379e-06, "loss": 2.0631, "step": 16079 }, { "epoch": 0.53, "grad_norm": 0.7398027181625366, "learning_rate": 9.149121389516338e-06, "loss": 2.0563, "step": 16080 }, { "epoch": 0.54, "grad_norm": 0.7248805165290833, "learning_rate": 9.148062356020024e-06, "loss": 2.0595, "step": 16081 }, { "epoch": 0.54, "grad_norm": 0.7447824478149414, "learning_rate": 9.147003332148406e-06, "loss": 2.1401, "step": 16082 }, { "epoch": 0.54, "grad_norm": 0.733701229095459, "learning_rate": 9.14594431791344e-06, "loss": 2.0748, "step": 16083 }, { "epoch": 0.54, "grad_norm": 0.7724853754043579, "learning_rate": 9.144885313327096e-06, "loss": 2.0811, "step": 16084 }, { "epoch": 0.54, "grad_norm": 0.7426878809928894, "learning_rate": 9.143826318401335e-06, "loss": 2.0711, "step": 16085 }, { "epoch": 0.54, "grad_norm": 0.711362361907959, "learning_rate": 9.142767333148126e-06, "loss": 2.0984, "step": 16086 }, { "epoch": 0.54, "grad_norm": 0.7430671453475952, "learning_rate": 9.141708357579427e-06, "loss": 2.0942, "step": 16087 }, { "epoch": 0.54, "grad_norm": 0.7391875982284546, "learning_rate": 9.140649391707204e-06, "loss": 2.047, "step": 16088 }, { "epoch": 0.54, "grad_norm": 0.7467110753059387, "learning_rate": 9.139590435543419e-06, "loss": 2.0372, "step": 16089 }, { "epoch": 0.54, "grad_norm": 0.7503875494003296, "learning_rate": 9.138531489100038e-06, "loss": 2.0905, "step": 16090 }, { "epoch": 0.54, "grad_norm": 0.740982711315155, "learning_rate": 9.137472552389027e-06, "loss": 2.0638, "step": 16091 }, { "epoch": 0.54, "grad_norm": 0.7608187198638916, "learning_rate": 9.136413625422343e-06, "loss": 2.0465, "step": 16092 }, { "epoch": 0.54, "grad_norm": 0.7321698665618896, "learning_rate": 9.135354708211952e-06, "loss": 2.0898, "step": 16093 }, { "epoch": 0.54, "grad_norm": 0.7443631291389465, "learning_rate": 9.134295800769817e-06, "loss": 2.0541, "step": 16094 }, { "epoch": 0.54, "grad_norm": 0.7146250605583191, "learning_rate": 9.1332369031079e-06, "loss": 2.0765, "step": 16095 }, { "epoch": 0.54, "grad_norm": 0.7480495572090149, "learning_rate": 9.132178015238165e-06, "loss": 2.0841, "step": 16096 }, { "epoch": 0.54, "grad_norm": 0.7449318170547485, "learning_rate": 9.131119137172571e-06, "loss": 2.0497, "step": 16097 }, { "epoch": 0.54, "grad_norm": 0.7293895483016968, "learning_rate": 9.130060268923086e-06, "loss": 2.0757, "step": 16098 }, { "epoch": 0.54, "grad_norm": 0.746681809425354, "learning_rate": 9.129001410501671e-06, "loss": 2.1067, "step": 16099 }, { "epoch": 0.54, "grad_norm": 0.7334829568862915, "learning_rate": 9.12794256192029e-06, "loss": 2.0069, "step": 16100 }, { "epoch": 0.54, "grad_norm": 0.738744854927063, "learning_rate": 9.1268837231909e-06, "loss": 2.0933, "step": 16101 }, { "epoch": 0.54, "grad_norm": 0.7283045649528503, "learning_rate": 9.125824894325465e-06, "loss": 2.0529, "step": 16102 }, { "epoch": 0.54, "grad_norm": 0.7260571718215942, "learning_rate": 9.124766075335949e-06, "loss": 1.9908, "step": 16103 }, { "epoch": 0.54, "grad_norm": 0.758544921875, "learning_rate": 9.123707266234317e-06, "loss": 2.0468, "step": 16104 }, { "epoch": 0.54, "grad_norm": 0.7505977749824524, "learning_rate": 9.12264846703252e-06, "loss": 2.1078, "step": 16105 }, { "epoch": 0.54, "grad_norm": 0.7734881043434143, "learning_rate": 9.12158967774253e-06, "loss": 2.073, "step": 16106 }, { "epoch": 0.54, "grad_norm": 0.7540022134780884, "learning_rate": 9.120530898376302e-06, "loss": 2.0999, "step": 16107 }, { "epoch": 0.54, "grad_norm": 0.741304874420166, "learning_rate": 9.1194721289458e-06, "loss": 2.0651, "step": 16108 }, { "epoch": 0.54, "grad_norm": 0.7225976586341858, "learning_rate": 9.118413369462994e-06, "loss": 2.1356, "step": 16109 }, { "epoch": 0.54, "grad_norm": 0.7370829582214355, "learning_rate": 9.11735461993983e-06, "loss": 2.0921, "step": 16110 }, { "epoch": 0.54, "grad_norm": 0.7164703607559204, "learning_rate": 9.116295880388277e-06, "loss": 2.1013, "step": 16111 }, { "epoch": 0.54, "grad_norm": 0.7514081001281738, "learning_rate": 9.115237150820298e-06, "loss": 2.0568, "step": 16112 }, { "epoch": 0.54, "grad_norm": 0.7405162453651428, "learning_rate": 9.114178431247851e-06, "loss": 2.0951, "step": 16113 }, { "epoch": 0.54, "grad_norm": 0.7289748787879944, "learning_rate": 9.113119721682896e-06, "loss": 2.0838, "step": 16114 }, { "epoch": 0.54, "grad_norm": 0.713518500328064, "learning_rate": 9.112061022137393e-06, "loss": 2.0805, "step": 16115 }, { "epoch": 0.54, "grad_norm": 0.7457652688026428, "learning_rate": 9.111002332623303e-06, "loss": 2.0737, "step": 16116 }, { "epoch": 0.54, "grad_norm": 0.7515103816986084, "learning_rate": 9.109943653152593e-06, "loss": 2.0728, "step": 16117 }, { "epoch": 0.54, "grad_norm": 0.7351611852645874, "learning_rate": 9.108884983737212e-06, "loss": 2.0241, "step": 16118 }, { "epoch": 0.54, "grad_norm": 0.7572402358055115, "learning_rate": 9.10782632438913e-06, "loss": 2.0374, "step": 16119 }, { "epoch": 0.54, "grad_norm": 0.7435814142227173, "learning_rate": 9.106767675120301e-06, "loss": 2.1318, "step": 16120 }, { "epoch": 0.54, "grad_norm": 0.7317203283309937, "learning_rate": 9.105709035942689e-06, "loss": 2.1107, "step": 16121 }, { "epoch": 0.54, "grad_norm": 0.7276882529258728, "learning_rate": 9.104650406868254e-06, "loss": 2.0933, "step": 16122 }, { "epoch": 0.54, "grad_norm": 0.7675588130950928, "learning_rate": 9.103591787908952e-06, "loss": 2.0925, "step": 16123 }, { "epoch": 0.54, "grad_norm": 0.731278657913208, "learning_rate": 9.102533179076743e-06, "loss": 2.1017, "step": 16124 }, { "epoch": 0.54, "grad_norm": 0.7704048156738281, "learning_rate": 9.101474580383589e-06, "loss": 2.0069, "step": 16125 }, { "epoch": 0.54, "grad_norm": 0.7406962513923645, "learning_rate": 9.100415991841452e-06, "loss": 2.0428, "step": 16126 }, { "epoch": 0.54, "grad_norm": 0.7625757455825806, "learning_rate": 9.099357413462284e-06, "loss": 2.0459, "step": 16127 }, { "epoch": 0.54, "grad_norm": 0.7589849829673767, "learning_rate": 9.098298845258049e-06, "loss": 2.1383, "step": 16128 }, { "epoch": 0.54, "grad_norm": 0.7585662007331848, "learning_rate": 9.097240287240705e-06, "loss": 2.05, "step": 16129 }, { "epoch": 0.54, "grad_norm": 0.7413753271102905, "learning_rate": 9.09618173942221e-06, "loss": 2.0668, "step": 16130 }, { "epoch": 0.54, "grad_norm": 0.718809962272644, "learning_rate": 9.095123201814529e-06, "loss": 2.0453, "step": 16131 }, { "epoch": 0.54, "grad_norm": 0.768153190612793, "learning_rate": 9.09406467442961e-06, "loss": 2.0457, "step": 16132 }, { "epoch": 0.54, "grad_norm": 0.7476561665534973, "learning_rate": 9.093006157279421e-06, "loss": 2.087, "step": 16133 }, { "epoch": 0.54, "grad_norm": 0.7554195523262024, "learning_rate": 9.091947650375916e-06, "loss": 2.0119, "step": 16134 }, { "epoch": 0.54, "grad_norm": 0.8251556754112244, "learning_rate": 9.090889153731055e-06, "loss": 2.1137, "step": 16135 }, { "epoch": 0.54, "grad_norm": 0.7942646145820618, "learning_rate": 9.089830667356794e-06, "loss": 2.0463, "step": 16136 }, { "epoch": 0.54, "grad_norm": 0.7700174450874329, "learning_rate": 9.088772191265093e-06, "loss": 2.0855, "step": 16137 }, { "epoch": 0.54, "grad_norm": 0.7480670809745789, "learning_rate": 9.087713725467909e-06, "loss": 2.0943, "step": 16138 }, { "epoch": 0.54, "grad_norm": 0.7469131350517273, "learning_rate": 9.0866552699772e-06, "loss": 2.0285, "step": 16139 }, { "epoch": 0.54, "grad_norm": 0.7369194030761719, "learning_rate": 9.085596824804928e-06, "loss": 2.0611, "step": 16140 }, { "epoch": 0.54, "grad_norm": 0.7487397193908691, "learning_rate": 9.084538389963045e-06, "loss": 2.0866, "step": 16141 }, { "epoch": 0.54, "grad_norm": 0.7577124834060669, "learning_rate": 9.083479965463511e-06, "loss": 2.1081, "step": 16142 }, { "epoch": 0.54, "grad_norm": 0.7707308530807495, "learning_rate": 9.082421551318284e-06, "loss": 2.1824, "step": 16143 }, { "epoch": 0.54, "grad_norm": 0.7502233386039734, "learning_rate": 9.081363147539322e-06, "loss": 2.1968, "step": 16144 }, { "epoch": 0.54, "grad_norm": 0.7382599115371704, "learning_rate": 9.080304754138577e-06, "loss": 2.0209, "step": 16145 }, { "epoch": 0.54, "grad_norm": 0.7302808165550232, "learning_rate": 9.07924637112801e-06, "loss": 2.0301, "step": 16146 }, { "epoch": 0.54, "grad_norm": 0.7310431599617004, "learning_rate": 9.078187998519579e-06, "loss": 2.0952, "step": 16147 }, { "epoch": 0.54, "grad_norm": 0.7677083611488342, "learning_rate": 9.077129636325242e-06, "loss": 2.1208, "step": 16148 }, { "epoch": 0.54, "grad_norm": 0.7403497099876404, "learning_rate": 9.076071284556953e-06, "loss": 2.0338, "step": 16149 }, { "epoch": 0.54, "grad_norm": 0.7446951866149902, "learning_rate": 9.075012943226667e-06, "loss": 2.0691, "step": 16150 }, { "epoch": 0.54, "grad_norm": 0.7945897579193115, "learning_rate": 9.073954612346342e-06, "loss": 2.111, "step": 16151 }, { "epoch": 0.54, "grad_norm": 0.7324787378311157, "learning_rate": 9.072896291927935e-06, "loss": 2.0523, "step": 16152 }, { "epoch": 0.54, "grad_norm": 0.7656278610229492, "learning_rate": 9.071837981983408e-06, "loss": 2.0457, "step": 16153 }, { "epoch": 0.54, "grad_norm": 0.7410079836845398, "learning_rate": 9.070779682524706e-06, "loss": 2.0802, "step": 16154 }, { "epoch": 0.54, "grad_norm": 0.7331940531730652, "learning_rate": 9.069721393563792e-06, "loss": 2.0261, "step": 16155 }, { "epoch": 0.54, "grad_norm": 0.7268899083137512, "learning_rate": 9.06866311511262e-06, "loss": 2.0287, "step": 16156 }, { "epoch": 0.54, "grad_norm": 0.7414692640304565, "learning_rate": 9.067604847183149e-06, "loss": 2.1066, "step": 16157 }, { "epoch": 0.54, "grad_norm": 0.7182986736297607, "learning_rate": 9.06654658978733e-06, "loss": 2.0423, "step": 16158 }, { "epoch": 0.54, "grad_norm": 0.7330798506736755, "learning_rate": 9.06548834293712e-06, "loss": 2.1183, "step": 16159 }, { "epoch": 0.54, "grad_norm": 0.7413531541824341, "learning_rate": 9.064430106644473e-06, "loss": 2.092, "step": 16160 }, { "epoch": 0.54, "grad_norm": 0.7121817469596863, "learning_rate": 9.063371880921347e-06, "loss": 2.0294, "step": 16161 }, { "epoch": 0.54, "grad_norm": 0.7239598035812378, "learning_rate": 9.062313665779701e-06, "loss": 2.0272, "step": 16162 }, { "epoch": 0.54, "grad_norm": 0.7395011186599731, "learning_rate": 9.061255461231484e-06, "loss": 2.0847, "step": 16163 }, { "epoch": 0.54, "grad_norm": 0.7319414019584656, "learning_rate": 9.060197267288651e-06, "loss": 2.0674, "step": 16164 }, { "epoch": 0.54, "grad_norm": 0.706632673740387, "learning_rate": 9.059139083963158e-06, "loss": 1.9886, "step": 16165 }, { "epoch": 0.54, "grad_norm": 0.7694850564002991, "learning_rate": 9.058080911266965e-06, "loss": 2.0172, "step": 16166 }, { "epoch": 0.54, "grad_norm": 0.7421568036079407, "learning_rate": 9.057022749212018e-06, "loss": 2.0633, "step": 16167 }, { "epoch": 0.54, "grad_norm": 0.7180240750312805, "learning_rate": 9.055964597810273e-06, "loss": 2.1059, "step": 16168 }, { "epoch": 0.54, "grad_norm": 0.7587441802024841, "learning_rate": 9.05490645707369e-06, "loss": 2.0178, "step": 16169 }, { "epoch": 0.54, "grad_norm": 0.7535413503646851, "learning_rate": 9.05384832701422e-06, "loss": 2.04, "step": 16170 }, { "epoch": 0.54, "grad_norm": 0.736610472202301, "learning_rate": 9.052790207643817e-06, "loss": 2.0879, "step": 16171 }, { "epoch": 0.54, "grad_norm": 0.7255251407623291, "learning_rate": 9.051732098974434e-06, "loss": 2.0414, "step": 16172 }, { "epoch": 0.54, "grad_norm": 0.7483540773391724, "learning_rate": 9.050674001018025e-06, "loss": 2.0733, "step": 16173 }, { "epoch": 0.54, "grad_norm": 0.7389726042747498, "learning_rate": 9.049615913786545e-06, "loss": 2.0048, "step": 16174 }, { "epoch": 0.54, "grad_norm": 0.7407342195510864, "learning_rate": 9.048557837291954e-06, "loss": 2.0505, "step": 16175 }, { "epoch": 0.54, "grad_norm": 0.7210568189620972, "learning_rate": 9.04749977154619e-06, "loss": 2.034, "step": 16176 }, { "epoch": 0.54, "grad_norm": 0.7721253037452698, "learning_rate": 9.046441716561219e-06, "loss": 2.0911, "step": 16177 }, { "epoch": 0.54, "grad_norm": 0.7353857159614563, "learning_rate": 9.04538367234899e-06, "loss": 2.0862, "step": 16178 }, { "epoch": 0.54, "grad_norm": 0.8708370923995972, "learning_rate": 9.044325638921461e-06, "loss": 1.9916, "step": 16179 }, { "epoch": 0.54, "grad_norm": 0.7454116940498352, "learning_rate": 9.043267616290576e-06, "loss": 2.0955, "step": 16180 }, { "epoch": 0.54, "grad_norm": 0.7351784706115723, "learning_rate": 9.042209604468294e-06, "loss": 2.0996, "step": 16181 }, { "epoch": 0.54, "grad_norm": 0.7427186965942383, "learning_rate": 9.041151603466565e-06, "loss": 2.0771, "step": 16182 }, { "epoch": 0.54, "grad_norm": 0.7075121402740479, "learning_rate": 9.040093613297344e-06, "loss": 2.0639, "step": 16183 }, { "epoch": 0.54, "grad_norm": 0.7156421542167664, "learning_rate": 9.039035633972585e-06, "loss": 2.0304, "step": 16184 }, { "epoch": 0.54, "grad_norm": 0.773158073425293, "learning_rate": 9.037977665504236e-06, "loss": 2.126, "step": 16185 }, { "epoch": 0.54, "grad_norm": 0.7273066639900208, "learning_rate": 9.036919707904254e-06, "loss": 2.0138, "step": 16186 }, { "epoch": 0.54, "grad_norm": 0.7115066051483154, "learning_rate": 9.035861761184587e-06, "loss": 2.1259, "step": 16187 }, { "epoch": 0.54, "grad_norm": 0.7173439860343933, "learning_rate": 9.034803825357191e-06, "loss": 2.0793, "step": 16188 }, { "epoch": 0.54, "grad_norm": 0.7285423874855042, "learning_rate": 9.033745900434013e-06, "loss": 2.0359, "step": 16189 }, { "epoch": 0.54, "grad_norm": 0.7512083053588867, "learning_rate": 9.032687986427006e-06, "loss": 2.0584, "step": 16190 }, { "epoch": 0.54, "grad_norm": 0.7425249814987183, "learning_rate": 9.031630083348125e-06, "loss": 2.111, "step": 16191 }, { "epoch": 0.54, "grad_norm": 0.7439605593681335, "learning_rate": 9.030572191209322e-06, "loss": 2.0396, "step": 16192 }, { "epoch": 0.54, "grad_norm": 0.7213541269302368, "learning_rate": 9.029514310022546e-06, "loss": 2.0104, "step": 16193 }, { "epoch": 0.54, "grad_norm": 0.7300660610198975, "learning_rate": 9.028456439799747e-06, "loss": 2.1314, "step": 16194 }, { "epoch": 0.54, "grad_norm": 0.7191729545593262, "learning_rate": 9.027398580552878e-06, "loss": 1.9987, "step": 16195 }, { "epoch": 0.54, "grad_norm": 0.7377094626426697, "learning_rate": 9.026340732293886e-06, "loss": 2.0191, "step": 16196 }, { "epoch": 0.54, "grad_norm": 0.7304640412330627, "learning_rate": 9.025282895034734e-06, "loss": 1.9988, "step": 16197 }, { "epoch": 0.54, "grad_norm": 0.7341344952583313, "learning_rate": 9.024225068787358e-06, "loss": 1.9537, "step": 16198 }, { "epoch": 0.54, "grad_norm": 0.7419582605361938, "learning_rate": 9.023167253563717e-06, "loss": 2.0686, "step": 16199 }, { "epoch": 0.54, "grad_norm": 0.7607742547988892, "learning_rate": 9.02210944937576e-06, "loss": 2.1546, "step": 16200 }, { "epoch": 0.54, "grad_norm": 0.7684308886528015, "learning_rate": 9.021051656235435e-06, "loss": 2.0936, "step": 16201 }, { "epoch": 0.54, "grad_norm": 0.7703185081481934, "learning_rate": 9.019993874154701e-06, "loss": 2.0552, "step": 16202 }, { "epoch": 0.54, "grad_norm": 0.7494840621948242, "learning_rate": 9.018936103145499e-06, "loss": 2.0515, "step": 16203 }, { "epoch": 0.54, "grad_norm": 0.7157339453697205, "learning_rate": 9.017878343219778e-06, "loss": 2.0611, "step": 16204 }, { "epoch": 0.54, "grad_norm": 0.7513536214828491, "learning_rate": 9.016820594389495e-06, "loss": 1.967, "step": 16205 }, { "epoch": 0.54, "grad_norm": 0.7592445015907288, "learning_rate": 9.015762856666599e-06, "loss": 2.0943, "step": 16206 }, { "epoch": 0.54, "grad_norm": 0.7481652498245239, "learning_rate": 9.014705130063035e-06, "loss": 2.0641, "step": 16207 }, { "epoch": 0.54, "grad_norm": 0.7607617974281311, "learning_rate": 9.013647414590755e-06, "loss": 2.0, "step": 16208 }, { "epoch": 0.54, "grad_norm": 0.7149009704589844, "learning_rate": 9.012589710261706e-06, "loss": 2.0335, "step": 16209 }, { "epoch": 0.54, "grad_norm": 0.7259677648544312, "learning_rate": 9.011532017087845e-06, "loss": 2.0787, "step": 16210 }, { "epoch": 0.54, "grad_norm": 0.7185119390487671, "learning_rate": 9.010474335081115e-06, "loss": 2.112, "step": 16211 }, { "epoch": 0.54, "grad_norm": 0.7556877732276917, "learning_rate": 9.009416664253462e-06, "loss": 2.0783, "step": 16212 }, { "epoch": 0.54, "grad_norm": 0.7617525458335876, "learning_rate": 9.00835900461684e-06, "loss": 2.1198, "step": 16213 }, { "epoch": 0.54, "grad_norm": 0.7449831366539001, "learning_rate": 9.0073013561832e-06, "loss": 2.0579, "step": 16214 }, { "epoch": 0.54, "grad_norm": 0.7478481531143188, "learning_rate": 9.006243718964487e-06, "loss": 1.9981, "step": 16215 }, { "epoch": 0.54, "grad_norm": 0.7514544725418091, "learning_rate": 9.005186092972647e-06, "loss": 2.0855, "step": 16216 }, { "epoch": 0.54, "grad_norm": 0.704127848148346, "learning_rate": 9.00412847821963e-06, "loss": 2.0071, "step": 16217 }, { "epoch": 0.54, "grad_norm": 0.7376036643981934, "learning_rate": 9.00307087471739e-06, "loss": 2.0599, "step": 16218 }, { "epoch": 0.54, "grad_norm": 0.7440323829650879, "learning_rate": 9.002013282477873e-06, "loss": 2.002, "step": 16219 }, { "epoch": 0.54, "grad_norm": 0.7418403625488281, "learning_rate": 9.000955701513022e-06, "loss": 2.1034, "step": 16220 }, { "epoch": 0.54, "grad_norm": 0.7720919251441956, "learning_rate": 8.999898131834788e-06, "loss": 2.1118, "step": 16221 }, { "epoch": 0.54, "grad_norm": 0.7663979530334473, "learning_rate": 8.998840573455119e-06, "loss": 2.0998, "step": 16222 }, { "epoch": 0.54, "grad_norm": 0.7163242101669312, "learning_rate": 8.99778302638596e-06, "loss": 2.1059, "step": 16223 }, { "epoch": 0.54, "grad_norm": 0.7514809966087341, "learning_rate": 8.996725490639267e-06, "loss": 2.0792, "step": 16224 }, { "epoch": 0.54, "grad_norm": 0.7812800407409668, "learning_rate": 8.995667966226975e-06, "loss": 2.0053, "step": 16225 }, { "epoch": 0.54, "grad_norm": 0.7364043593406677, "learning_rate": 8.99461045316104e-06, "loss": 2.0753, "step": 16226 }, { "epoch": 0.54, "grad_norm": 0.7410843372344971, "learning_rate": 8.993552951453407e-06, "loss": 2.0659, "step": 16227 }, { "epoch": 0.54, "grad_norm": 0.732086181640625, "learning_rate": 8.992495461116026e-06, "loss": 2.0275, "step": 16228 }, { "epoch": 0.54, "grad_norm": 0.7653730511665344, "learning_rate": 8.991437982160838e-06, "loss": 2.0618, "step": 16229 }, { "epoch": 0.54, "grad_norm": 0.7546468377113342, "learning_rate": 8.990380514599793e-06, "loss": 2.0155, "step": 16230 }, { "epoch": 0.54, "grad_norm": 0.787878692150116, "learning_rate": 8.989323058444835e-06, "loss": 2.0889, "step": 16231 }, { "epoch": 0.54, "grad_norm": 0.7119103074073792, "learning_rate": 8.988265613707914e-06, "loss": 2.0806, "step": 16232 }, { "epoch": 0.54, "grad_norm": 0.7557964324951172, "learning_rate": 8.987208180400978e-06, "loss": 2.1351, "step": 16233 }, { "epoch": 0.54, "grad_norm": 0.763425886631012, "learning_rate": 8.986150758535967e-06, "loss": 2.0304, "step": 16234 }, { "epoch": 0.54, "grad_norm": 0.7396345138549805, "learning_rate": 8.985093348124833e-06, "loss": 2.0696, "step": 16235 }, { "epoch": 0.54, "grad_norm": 0.7193009257316589, "learning_rate": 8.984035949179518e-06, "loss": 2.0742, "step": 16236 }, { "epoch": 0.54, "grad_norm": 0.7525489926338196, "learning_rate": 8.982978561711973e-06, "loss": 2.078, "step": 16237 }, { "epoch": 0.54, "grad_norm": 0.7478075623512268, "learning_rate": 8.981921185734136e-06, "loss": 2.1097, "step": 16238 }, { "epoch": 0.54, "grad_norm": 0.7698614001274109, "learning_rate": 8.980863821257956e-06, "loss": 2.076, "step": 16239 }, { "epoch": 0.54, "grad_norm": 0.7490207552909851, "learning_rate": 8.97980646829538e-06, "loss": 2.0975, "step": 16240 }, { "epoch": 0.54, "grad_norm": 0.709922730922699, "learning_rate": 8.978749126858356e-06, "loss": 1.9925, "step": 16241 }, { "epoch": 0.54, "grad_norm": 0.7205783724784851, "learning_rate": 8.977691796958823e-06, "loss": 2.1036, "step": 16242 }, { "epoch": 0.54, "grad_norm": 0.7257975935935974, "learning_rate": 8.97663447860873e-06, "loss": 2.0634, "step": 16243 }, { "epoch": 0.54, "grad_norm": 0.7788102030754089, "learning_rate": 8.97557717182002e-06, "loss": 2.0918, "step": 16244 }, { "epoch": 0.54, "grad_norm": 0.700707733631134, "learning_rate": 8.974519876604637e-06, "loss": 2.0723, "step": 16245 }, { "epoch": 0.54, "grad_norm": 0.7587103247642517, "learning_rate": 8.973462592974535e-06, "loss": 2.0106, "step": 16246 }, { "epoch": 0.54, "grad_norm": 0.7439965009689331, "learning_rate": 8.972405320941644e-06, "loss": 2.0749, "step": 16247 }, { "epoch": 0.54, "grad_norm": 0.7697708606719971, "learning_rate": 8.971348060517915e-06, "loss": 2.1124, "step": 16248 }, { "epoch": 0.54, "grad_norm": 0.7248289585113525, "learning_rate": 8.970290811715296e-06, "loss": 2.0574, "step": 16249 }, { "epoch": 0.54, "grad_norm": 0.7576391696929932, "learning_rate": 8.969233574545729e-06, "loss": 2.0449, "step": 16250 }, { "epoch": 0.54, "grad_norm": 0.7459337115287781, "learning_rate": 8.968176349021153e-06, "loss": 2.1233, "step": 16251 }, { "epoch": 0.54, "grad_norm": 0.7211782932281494, "learning_rate": 8.967119135153519e-06, "loss": 2.0372, "step": 16252 }, { "epoch": 0.54, "grad_norm": 0.7357760071754456, "learning_rate": 8.966061932954762e-06, "loss": 2.1153, "step": 16253 }, { "epoch": 0.54, "grad_norm": 0.726985514163971, "learning_rate": 8.965004742436837e-06, "loss": 2.069, "step": 16254 }, { "epoch": 0.54, "grad_norm": 0.7001355290412903, "learning_rate": 8.963947563611682e-06, "loss": 2.0795, "step": 16255 }, { "epoch": 0.54, "grad_norm": 0.7488248348236084, "learning_rate": 8.96289039649124e-06, "loss": 2.0716, "step": 16256 }, { "epoch": 0.54, "grad_norm": 0.7550780773162842, "learning_rate": 8.961833241087452e-06, "loss": 2.0653, "step": 16257 }, { "epoch": 0.54, "grad_norm": 0.7502903938293457, "learning_rate": 8.960776097412265e-06, "loss": 2.1059, "step": 16258 }, { "epoch": 0.54, "grad_norm": 0.7185998558998108, "learning_rate": 8.959718965477622e-06, "loss": 2.0884, "step": 16259 }, { "epoch": 0.54, "grad_norm": 0.7165513038635254, "learning_rate": 8.958661845295464e-06, "loss": 2.032, "step": 16260 }, { "epoch": 0.54, "grad_norm": 0.7208660244941711, "learning_rate": 8.95760473687773e-06, "loss": 2.0507, "step": 16261 }, { "epoch": 0.54, "grad_norm": 0.7690922021865845, "learning_rate": 8.95654764023637e-06, "loss": 2.079, "step": 16262 }, { "epoch": 0.54, "grad_norm": 0.7364292740821838, "learning_rate": 8.955490555383325e-06, "loss": 2.0876, "step": 16263 }, { "epoch": 0.54, "grad_norm": 0.7338917255401611, "learning_rate": 8.954433482330534e-06, "loss": 2.027, "step": 16264 }, { "epoch": 0.54, "grad_norm": 0.7267216444015503, "learning_rate": 8.95337642108994e-06, "loss": 2.1407, "step": 16265 }, { "epoch": 0.54, "grad_norm": 0.7158786058425903, "learning_rate": 8.952319371673486e-06, "loss": 2.0967, "step": 16266 }, { "epoch": 0.54, "grad_norm": 0.7620810866355896, "learning_rate": 8.951262334093111e-06, "loss": 2.0181, "step": 16267 }, { "epoch": 0.54, "grad_norm": 0.7380293011665344, "learning_rate": 8.950205308360767e-06, "loss": 2.0822, "step": 16268 }, { "epoch": 0.54, "grad_norm": 0.7258612513542175, "learning_rate": 8.949148294488382e-06, "loss": 2.1114, "step": 16269 }, { "epoch": 0.54, "grad_norm": 0.7434398531913757, "learning_rate": 8.948091292487905e-06, "loss": 2.0492, "step": 16270 }, { "epoch": 0.54, "grad_norm": 0.7498160004615784, "learning_rate": 8.947034302371278e-06, "loss": 2.0411, "step": 16271 }, { "epoch": 0.54, "grad_norm": 0.75923752784729, "learning_rate": 8.94597732415044e-06, "loss": 2.0712, "step": 16272 }, { "epoch": 0.54, "grad_norm": 0.7656378746032715, "learning_rate": 8.944920357837332e-06, "loss": 2.0592, "step": 16273 }, { "epoch": 0.54, "grad_norm": 0.7707496881484985, "learning_rate": 8.943863403443895e-06, "loss": 2.0515, "step": 16274 }, { "epoch": 0.54, "grad_norm": 0.7154850959777832, "learning_rate": 8.942806460982066e-06, "loss": 2.0965, "step": 16275 }, { "epoch": 0.54, "grad_norm": 0.7067455649375916, "learning_rate": 8.941749530463794e-06, "loss": 2.0236, "step": 16276 }, { "epoch": 0.54, "grad_norm": 0.7330570816993713, "learning_rate": 8.940692611901018e-06, "loss": 2.0634, "step": 16277 }, { "epoch": 0.54, "grad_norm": 0.7584925889968872, "learning_rate": 8.939635705305674e-06, "loss": 2.0718, "step": 16278 }, { "epoch": 0.54, "grad_norm": 0.7464617490768433, "learning_rate": 8.938578810689704e-06, "loss": 2.0745, "step": 16279 }, { "epoch": 0.54, "grad_norm": 0.7350733876228333, "learning_rate": 8.93752192806505e-06, "loss": 2.0708, "step": 16280 }, { "epoch": 0.54, "grad_norm": 0.7704419493675232, "learning_rate": 8.936465057443651e-06, "loss": 2.1003, "step": 16281 }, { "epoch": 0.54, "grad_norm": 0.7061072587966919, "learning_rate": 8.935408198837445e-06, "loss": 1.9995, "step": 16282 }, { "epoch": 0.54, "grad_norm": 0.7205577492713928, "learning_rate": 8.93435135225837e-06, "loss": 2.1138, "step": 16283 }, { "epoch": 0.54, "grad_norm": 0.7163249850273132, "learning_rate": 8.933294517718372e-06, "loss": 2.0614, "step": 16284 }, { "epoch": 0.54, "grad_norm": 0.7564570307731628, "learning_rate": 8.932237695229388e-06, "loss": 2.0772, "step": 16285 }, { "epoch": 0.54, "grad_norm": 0.715981125831604, "learning_rate": 8.931180884803358e-06, "loss": 2.0685, "step": 16286 }, { "epoch": 0.54, "grad_norm": 0.7391754984855652, "learning_rate": 8.930124086452218e-06, "loss": 2.0779, "step": 16287 }, { "epoch": 0.54, "grad_norm": 0.7761326432228088, "learning_rate": 8.929067300187908e-06, "loss": 2.1213, "step": 16288 }, { "epoch": 0.54, "grad_norm": 0.7507399916648865, "learning_rate": 8.928010526022367e-06, "loss": 2.1064, "step": 16289 }, { "epoch": 0.54, "grad_norm": 0.7556437253952026, "learning_rate": 8.926953763967542e-06, "loss": 2.0909, "step": 16290 }, { "epoch": 0.54, "grad_norm": 0.7838396430015564, "learning_rate": 8.925897014035357e-06, "loss": 2.0516, "step": 16291 }, { "epoch": 0.54, "grad_norm": 0.7268619537353516, "learning_rate": 8.92484027623776e-06, "loss": 2.0941, "step": 16292 }, { "epoch": 0.54, "grad_norm": 0.7410727739334106, "learning_rate": 8.923783550586687e-06, "loss": 2.0805, "step": 16293 }, { "epoch": 0.54, "grad_norm": 0.753060519695282, "learning_rate": 8.92272683709408e-06, "loss": 2.0235, "step": 16294 }, { "epoch": 0.54, "grad_norm": 0.7532007098197937, "learning_rate": 8.921670135771873e-06, "loss": 2.0638, "step": 16295 }, { "epoch": 0.54, "grad_norm": 0.7364959120750427, "learning_rate": 8.920613446632002e-06, "loss": 2.1032, "step": 16296 }, { "epoch": 0.54, "grad_norm": 0.7531649470329285, "learning_rate": 8.919556769686408e-06, "loss": 2.0525, "step": 16297 }, { "epoch": 0.54, "grad_norm": 0.7300714254379272, "learning_rate": 8.91850010494703e-06, "loss": 2.0844, "step": 16298 }, { "epoch": 0.54, "grad_norm": 0.7341073155403137, "learning_rate": 8.917443452425804e-06, "loss": 2.0725, "step": 16299 }, { "epoch": 0.54, "grad_norm": 0.7265954613685608, "learning_rate": 8.916386812134668e-06, "loss": 2.0515, "step": 16300 }, { "epoch": 0.54, "grad_norm": 0.7453778386116028, "learning_rate": 8.915330184085556e-06, "loss": 2.0457, "step": 16301 }, { "epoch": 0.54, "grad_norm": 0.760808527469635, "learning_rate": 8.914273568290408e-06, "loss": 2.0789, "step": 16302 }, { "epoch": 0.54, "grad_norm": 0.7292805910110474, "learning_rate": 8.913216964761167e-06, "loss": 2.0363, "step": 16303 }, { "epoch": 0.54, "grad_norm": 0.7843396067619324, "learning_rate": 8.912160373509759e-06, "loss": 2.0192, "step": 16304 }, { "epoch": 0.54, "grad_norm": 0.7307671904563904, "learning_rate": 8.911103794548124e-06, "loss": 2.0508, "step": 16305 }, { "epoch": 0.54, "grad_norm": 0.7470925450325012, "learning_rate": 8.910047227888202e-06, "loss": 2.148, "step": 16306 }, { "epoch": 0.54, "grad_norm": 0.7362359762191772, "learning_rate": 8.908990673541928e-06, "loss": 2.0578, "step": 16307 }, { "epoch": 0.54, "grad_norm": 0.7493798732757568, "learning_rate": 8.907934131521238e-06, "loss": 2.0248, "step": 16308 }, { "epoch": 0.54, "grad_norm": 0.7416777014732361, "learning_rate": 8.90687760183807e-06, "loss": 2.0812, "step": 16309 }, { "epoch": 0.54, "grad_norm": 0.7177509069442749, "learning_rate": 8.905821084504353e-06, "loss": 2.1011, "step": 16310 }, { "epoch": 0.54, "grad_norm": 0.7297325730323792, "learning_rate": 8.904764579532031e-06, "loss": 1.9583, "step": 16311 }, { "epoch": 0.54, "grad_norm": 0.7191773653030396, "learning_rate": 8.90370808693304e-06, "loss": 2.0743, "step": 16312 }, { "epoch": 0.54, "grad_norm": 0.7441610097885132, "learning_rate": 8.902651606719308e-06, "loss": 2.0935, "step": 16313 }, { "epoch": 0.54, "grad_norm": 0.7911031246185303, "learning_rate": 8.901595138902777e-06, "loss": 2.1091, "step": 16314 }, { "epoch": 0.54, "grad_norm": 0.7798601984977722, "learning_rate": 8.90053868349538e-06, "loss": 2.093, "step": 16315 }, { "epoch": 0.54, "grad_norm": 0.7286093235015869, "learning_rate": 8.89948224050905e-06, "loss": 2.0557, "step": 16316 }, { "epoch": 0.54, "grad_norm": 0.7270519137382507, "learning_rate": 8.898425809955731e-06, "loss": 2.0851, "step": 16317 }, { "epoch": 0.54, "grad_norm": 0.731543779373169, "learning_rate": 8.897369391847347e-06, "loss": 2.1046, "step": 16318 }, { "epoch": 0.54, "grad_norm": 0.7244449853897095, "learning_rate": 8.896312986195837e-06, "loss": 2.0213, "step": 16319 }, { "epoch": 0.54, "grad_norm": 0.7579661011695862, "learning_rate": 8.895256593013139e-06, "loss": 2.1528, "step": 16320 }, { "epoch": 0.54, "grad_norm": 0.7737211585044861, "learning_rate": 8.894200212311185e-06, "loss": 2.033, "step": 16321 }, { "epoch": 0.54, "grad_norm": 0.7177257537841797, "learning_rate": 8.893143844101906e-06, "loss": 2.0686, "step": 16322 }, { "epoch": 0.54, "grad_norm": 0.7622888088226318, "learning_rate": 8.892087488397241e-06, "loss": 2.0876, "step": 16323 }, { "epoch": 0.54, "grad_norm": 0.7203890085220337, "learning_rate": 8.891031145209118e-06, "loss": 2.0547, "step": 16324 }, { "epoch": 0.54, "grad_norm": 0.7218117117881775, "learning_rate": 8.889974814549483e-06, "loss": 1.9708, "step": 16325 }, { "epoch": 0.54, "grad_norm": 0.7412674427032471, "learning_rate": 8.888918496430255e-06, "loss": 2.1429, "step": 16326 }, { "epoch": 0.54, "grad_norm": 0.7095749974250793, "learning_rate": 8.887862190863378e-06, "loss": 2.0996, "step": 16327 }, { "epoch": 0.54, "grad_norm": 0.7347303032875061, "learning_rate": 8.88680589786078e-06, "loss": 2.0658, "step": 16328 }, { "epoch": 0.54, "grad_norm": 0.7229824066162109, "learning_rate": 8.885749617434399e-06, "loss": 2.0856, "step": 16329 }, { "epoch": 0.54, "grad_norm": 0.7854555249214172, "learning_rate": 8.884693349596168e-06, "loss": 1.9825, "step": 16330 }, { "epoch": 0.54, "grad_norm": 0.7309315800666809, "learning_rate": 8.883637094358014e-06, "loss": 2.1037, "step": 16331 }, { "epoch": 0.54, "grad_norm": 0.750438392162323, "learning_rate": 8.882580851731872e-06, "loss": 2.1121, "step": 16332 }, { "epoch": 0.54, "grad_norm": 0.7555505633354187, "learning_rate": 8.88152462172968e-06, "loss": 2.0725, "step": 16333 }, { "epoch": 0.54, "grad_norm": 0.7532895803451538, "learning_rate": 8.880468404363368e-06, "loss": 2.0532, "step": 16334 }, { "epoch": 0.54, "grad_norm": 0.7346996068954468, "learning_rate": 8.879412199644866e-06, "loss": 2.0612, "step": 16335 }, { "epoch": 0.54, "grad_norm": 0.7353106141090393, "learning_rate": 8.87835600758611e-06, "loss": 2.0661, "step": 16336 }, { "epoch": 0.54, "grad_norm": 0.7091019749641418, "learning_rate": 8.877299828199027e-06, "loss": 2.0991, "step": 16337 }, { "epoch": 0.54, "grad_norm": 0.7403883934020996, "learning_rate": 8.876243661495553e-06, "loss": 2.0793, "step": 16338 }, { "epoch": 0.54, "grad_norm": 0.7267485857009888, "learning_rate": 8.875187507487626e-06, "loss": 2.0315, "step": 16339 }, { "epoch": 0.54, "grad_norm": 0.735567569732666, "learning_rate": 8.874131366187165e-06, "loss": 2.0554, "step": 16340 }, { "epoch": 0.54, "grad_norm": 0.7663167715072632, "learning_rate": 8.873075237606108e-06, "loss": 2.0465, "step": 16341 }, { "epoch": 0.54, "grad_norm": 0.7100227475166321, "learning_rate": 8.872019121756388e-06, "loss": 2.1102, "step": 16342 }, { "epoch": 0.54, "grad_norm": 0.7401384711265564, "learning_rate": 8.870963018649937e-06, "loss": 2.0062, "step": 16343 }, { "epoch": 0.54, "grad_norm": 0.7168064117431641, "learning_rate": 8.869906928298683e-06, "loss": 2.0104, "step": 16344 }, { "epoch": 0.54, "grad_norm": 0.7407863736152649, "learning_rate": 8.868850850714556e-06, "loss": 2.0572, "step": 16345 }, { "epoch": 0.54, "grad_norm": 0.7458263635635376, "learning_rate": 8.86779478590949e-06, "loss": 2.0737, "step": 16346 }, { "epoch": 0.54, "grad_norm": 0.7497252821922302, "learning_rate": 8.866738733895415e-06, "loss": 2.0545, "step": 16347 }, { "epoch": 0.54, "grad_norm": 0.7619100213050842, "learning_rate": 8.865682694684263e-06, "loss": 2.0858, "step": 16348 }, { "epoch": 0.54, "grad_norm": 0.7518855929374695, "learning_rate": 8.864626668287963e-06, "loss": 2.0811, "step": 16349 }, { "epoch": 0.54, "grad_norm": 0.7021903395652771, "learning_rate": 8.863570654718444e-06, "loss": 2.0255, "step": 16350 }, { "epoch": 0.54, "grad_norm": 0.7539703845977783, "learning_rate": 8.862514653987639e-06, "loss": 1.997, "step": 16351 }, { "epoch": 0.54, "grad_norm": 0.7241875529289246, "learning_rate": 8.861458666107479e-06, "loss": 2.0855, "step": 16352 }, { "epoch": 0.54, "grad_norm": 0.7283573746681213, "learning_rate": 8.860402691089888e-06, "loss": 2.0164, "step": 16353 }, { "epoch": 0.54, "grad_norm": 0.7650176286697388, "learning_rate": 8.859346728946797e-06, "loss": 2.0927, "step": 16354 }, { "epoch": 0.54, "grad_norm": 0.7596832513809204, "learning_rate": 8.858290779690142e-06, "loss": 2.1711, "step": 16355 }, { "epoch": 0.54, "grad_norm": 0.7846308350563049, "learning_rate": 8.857234843331852e-06, "loss": 2.0723, "step": 16356 }, { "epoch": 0.54, "grad_norm": 0.7709128260612488, "learning_rate": 8.856178919883849e-06, "loss": 2.0588, "step": 16357 }, { "epoch": 0.54, "grad_norm": 0.7674776315689087, "learning_rate": 8.855123009358066e-06, "loss": 2.1107, "step": 16358 }, { "epoch": 0.54, "grad_norm": 0.734981894493103, "learning_rate": 8.854067111766433e-06, "loss": 2.0562, "step": 16359 }, { "epoch": 0.54, "grad_norm": 0.7432228326797485, "learning_rate": 8.853011227120875e-06, "loss": 2.0074, "step": 16360 }, { "epoch": 0.54, "grad_norm": 0.7195361256599426, "learning_rate": 8.851955355433332e-06, "loss": 2.1244, "step": 16361 }, { "epoch": 0.54, "grad_norm": 0.7259693741798401, "learning_rate": 8.850899496715718e-06, "loss": 2.0712, "step": 16362 }, { "epoch": 0.54, "grad_norm": 0.7404330968856812, "learning_rate": 8.84984365097997e-06, "loss": 2.0101, "step": 16363 }, { "epoch": 0.54, "grad_norm": 0.7575764060020447, "learning_rate": 8.848787818238013e-06, "loss": 2.0347, "step": 16364 }, { "epoch": 0.54, "grad_norm": 0.7332408428192139, "learning_rate": 8.847731998501781e-06, "loss": 2.0897, "step": 16365 }, { "epoch": 0.54, "grad_norm": 0.7336429953575134, "learning_rate": 8.846676191783193e-06, "loss": 2.1056, "step": 16366 }, { "epoch": 0.54, "grad_norm": 0.7381898164749146, "learning_rate": 8.845620398094184e-06, "loss": 2.0875, "step": 16367 }, { "epoch": 0.54, "grad_norm": 0.7727584838867188, "learning_rate": 8.844564617446674e-06, "loss": 2.0834, "step": 16368 }, { "epoch": 0.54, "grad_norm": 0.7485928535461426, "learning_rate": 8.8435088498526e-06, "loss": 2.0019, "step": 16369 }, { "epoch": 0.54, "grad_norm": 0.753943920135498, "learning_rate": 8.842453095323887e-06, "loss": 2.0938, "step": 16370 }, { "epoch": 0.54, "grad_norm": 0.7791716456413269, "learning_rate": 8.841397353872459e-06, "loss": 2.0941, "step": 16371 }, { "epoch": 0.54, "grad_norm": 0.7774559259414673, "learning_rate": 8.840341625510243e-06, "loss": 2.0225, "step": 16372 }, { "epoch": 0.54, "grad_norm": 0.7509011030197144, "learning_rate": 8.839285910249168e-06, "loss": 2.0519, "step": 16373 }, { "epoch": 0.54, "grad_norm": 0.76008141040802, "learning_rate": 8.838230208101165e-06, "loss": 2.0619, "step": 16374 }, { "epoch": 0.54, "grad_norm": 0.7372307181358337, "learning_rate": 8.837174519078153e-06, "loss": 2.048, "step": 16375 }, { "epoch": 0.54, "grad_norm": 0.7336348295211792, "learning_rate": 8.836118843192059e-06, "loss": 2.0739, "step": 16376 }, { "epoch": 0.54, "grad_norm": 0.731756865978241, "learning_rate": 8.835063180454815e-06, "loss": 2.0933, "step": 16377 }, { "epoch": 0.54, "grad_norm": 0.726236879825592, "learning_rate": 8.834007530878343e-06, "loss": 2.1016, "step": 16378 }, { "epoch": 0.54, "grad_norm": 0.7615071535110474, "learning_rate": 8.832951894474574e-06, "loss": 1.9864, "step": 16379 }, { "epoch": 0.54, "grad_norm": 0.7209869623184204, "learning_rate": 8.831896271255429e-06, "loss": 2.1012, "step": 16380 }, { "epoch": 0.54, "grad_norm": 0.7409300208091736, "learning_rate": 8.830840661232833e-06, "loss": 2.0608, "step": 16381 }, { "epoch": 0.55, "grad_norm": 0.7446460127830505, "learning_rate": 8.829785064418713e-06, "loss": 2.0783, "step": 16382 }, { "epoch": 0.55, "grad_norm": 0.7276361584663391, "learning_rate": 8.828729480825003e-06, "loss": 2.0929, "step": 16383 }, { "epoch": 0.55, "grad_norm": 0.7155804634094238, "learning_rate": 8.827673910463614e-06, "loss": 1.9797, "step": 16384 }, { "epoch": 0.55, "grad_norm": 0.7350172400474548, "learning_rate": 8.82661835334648e-06, "loss": 2.0749, "step": 16385 }, { "epoch": 0.55, "grad_norm": 0.7399376630783081, "learning_rate": 8.825562809485523e-06, "loss": 2.0603, "step": 16386 }, { "epoch": 0.55, "grad_norm": 0.7186457514762878, "learning_rate": 8.824507278892673e-06, "loss": 2.0803, "step": 16387 }, { "epoch": 0.55, "grad_norm": 0.7316640019416809, "learning_rate": 8.82345176157985e-06, "loss": 2.0311, "step": 16388 }, { "epoch": 0.55, "grad_norm": 0.7412613034248352, "learning_rate": 8.822396257558976e-06, "loss": 2.1119, "step": 16389 }, { "epoch": 0.55, "grad_norm": 0.7315983772277832, "learning_rate": 8.821340766841979e-06, "loss": 2.0947, "step": 16390 }, { "epoch": 0.55, "grad_norm": 0.7742661237716675, "learning_rate": 8.820285289440784e-06, "loss": 2.0993, "step": 16391 }, { "epoch": 0.55, "grad_norm": 0.7630462050437927, "learning_rate": 8.819229825367318e-06, "loss": 2.1406, "step": 16392 }, { "epoch": 0.55, "grad_norm": 0.9673565626144409, "learning_rate": 8.8181743746335e-06, "loss": 2.0757, "step": 16393 }, { "epoch": 0.55, "grad_norm": 0.7499637007713318, "learning_rate": 8.817118937251253e-06, "loss": 2.1382, "step": 16394 }, { "epoch": 0.55, "grad_norm": 0.7543171644210815, "learning_rate": 8.816063513232502e-06, "loss": 2.01, "step": 16395 }, { "epoch": 0.55, "grad_norm": 0.7438796758651733, "learning_rate": 8.815008102589178e-06, "loss": 2.0917, "step": 16396 }, { "epoch": 0.55, "grad_norm": 0.7333088517189026, "learning_rate": 8.813952705333193e-06, "loss": 2.1347, "step": 16397 }, { "epoch": 0.55, "grad_norm": 0.7183871865272522, "learning_rate": 8.812897321476473e-06, "loss": 2.0638, "step": 16398 }, { "epoch": 0.55, "grad_norm": 0.7447047829627991, "learning_rate": 8.811841951030946e-06, "loss": 2.0168, "step": 16399 }, { "epoch": 0.55, "grad_norm": 0.7352822422981262, "learning_rate": 8.810786594008533e-06, "loss": 1.9715, "step": 16400 }, { "epoch": 0.55, "grad_norm": 0.7336325645446777, "learning_rate": 8.809731250421157e-06, "loss": 2.0754, "step": 16401 }, { "epoch": 0.55, "grad_norm": 0.727049708366394, "learning_rate": 8.808675920280739e-06, "loss": 2.1251, "step": 16402 }, { "epoch": 0.55, "grad_norm": 0.7498889565467834, "learning_rate": 8.8076206035992e-06, "loss": 2.0328, "step": 16403 }, { "epoch": 0.55, "grad_norm": 0.7203955054283142, "learning_rate": 8.806565300388465e-06, "loss": 1.9741, "step": 16404 }, { "epoch": 0.55, "grad_norm": 0.7142823934555054, "learning_rate": 8.805510010660462e-06, "loss": 2.0366, "step": 16405 }, { "epoch": 0.55, "grad_norm": 0.7268344163894653, "learning_rate": 8.804454734427099e-06, "loss": 2.0161, "step": 16406 }, { "epoch": 0.55, "grad_norm": 0.7361729741096497, "learning_rate": 8.803399471700309e-06, "loss": 2.0878, "step": 16407 }, { "epoch": 0.55, "grad_norm": 0.7373544573783875, "learning_rate": 8.80234422249201e-06, "loss": 2.1055, "step": 16408 }, { "epoch": 0.55, "grad_norm": 0.762712836265564, "learning_rate": 8.801288986814123e-06, "loss": 2.0677, "step": 16409 }, { "epoch": 0.55, "grad_norm": 0.7404637932777405, "learning_rate": 8.800233764678574e-06, "loss": 1.9704, "step": 16410 }, { "epoch": 0.55, "grad_norm": 0.721305787563324, "learning_rate": 8.799178556097278e-06, "loss": 2.0269, "step": 16411 }, { "epoch": 0.55, "grad_norm": 0.7558053731918335, "learning_rate": 8.798123361082159e-06, "loss": 2.0937, "step": 16412 }, { "epoch": 0.55, "grad_norm": 0.7867981195449829, "learning_rate": 8.797068179645139e-06, "loss": 2.1392, "step": 16413 }, { "epoch": 0.55, "grad_norm": 0.7282512187957764, "learning_rate": 8.79601301179814e-06, "loss": 2.0511, "step": 16414 }, { "epoch": 0.55, "grad_norm": 0.7018710374832153, "learning_rate": 8.794957857553076e-06, "loss": 2.0441, "step": 16415 }, { "epoch": 0.55, "grad_norm": 0.7805810570716858, "learning_rate": 8.793902716921874e-06, "loss": 2.0117, "step": 16416 }, { "epoch": 0.55, "grad_norm": 0.757597804069519, "learning_rate": 8.792847589916449e-06, "loss": 2.0694, "step": 16417 }, { "epoch": 0.55, "grad_norm": 0.7313772439956665, "learning_rate": 8.791792476548731e-06, "loss": 2.0916, "step": 16418 }, { "epoch": 0.55, "grad_norm": 0.7542209625244141, "learning_rate": 8.790737376830628e-06, "loss": 2.0449, "step": 16419 }, { "epoch": 0.55, "grad_norm": 0.7357569932937622, "learning_rate": 8.789682290774067e-06, "loss": 2.0325, "step": 16420 }, { "epoch": 0.55, "grad_norm": 0.749394953250885, "learning_rate": 8.788627218390965e-06, "loss": 2.053, "step": 16421 }, { "epoch": 0.55, "grad_norm": 0.7415332794189453, "learning_rate": 8.787572159693245e-06, "loss": 2.0296, "step": 16422 }, { "epoch": 0.55, "grad_norm": 0.7371897101402283, "learning_rate": 8.786517114692826e-06, "loss": 2.0258, "step": 16423 }, { "epoch": 0.55, "grad_norm": 0.7630943655967712, "learning_rate": 8.785462083401622e-06, "loss": 2.1062, "step": 16424 }, { "epoch": 0.55, "grad_norm": 0.7523031830787659, "learning_rate": 8.784407065831554e-06, "loss": 2.1208, "step": 16425 }, { "epoch": 0.55, "grad_norm": 0.7253754734992981, "learning_rate": 8.783352061994545e-06, "loss": 2.0302, "step": 16426 }, { "epoch": 0.55, "grad_norm": 0.7486311197280884, "learning_rate": 8.782297071902512e-06, "loss": 2.1541, "step": 16427 }, { "epoch": 0.55, "grad_norm": 0.7621978521347046, "learning_rate": 8.781242095567373e-06, "loss": 2.1069, "step": 16428 }, { "epoch": 0.55, "grad_norm": 0.7580153346061707, "learning_rate": 8.780187133001046e-06, "loss": 2.085, "step": 16429 }, { "epoch": 0.55, "grad_norm": 0.7255485653877258, "learning_rate": 8.779132184215449e-06, "loss": 1.9676, "step": 16430 }, { "epoch": 0.55, "grad_norm": 0.7207717895507812, "learning_rate": 8.7780772492225e-06, "loss": 2.064, "step": 16431 }, { "epoch": 0.55, "grad_norm": 0.73396235704422, "learning_rate": 8.777022328034123e-06, "loss": 2.1004, "step": 16432 }, { "epoch": 0.55, "grad_norm": 0.7451662421226501, "learning_rate": 8.775967420662227e-06, "loss": 2.1047, "step": 16433 }, { "epoch": 0.55, "grad_norm": 0.7580092549324036, "learning_rate": 8.774912527118734e-06, "loss": 2.0853, "step": 16434 }, { "epoch": 0.55, "grad_norm": 0.7154918909072876, "learning_rate": 8.773857647415562e-06, "loss": 2.0127, "step": 16435 }, { "epoch": 0.55, "grad_norm": 0.759900689125061, "learning_rate": 8.77280278156463e-06, "loss": 2.0175, "step": 16436 }, { "epoch": 0.55, "grad_norm": 0.7003214955329895, "learning_rate": 8.77174792957785e-06, "loss": 2.0018, "step": 16437 }, { "epoch": 0.55, "grad_norm": 0.7213950753211975, "learning_rate": 8.770693091467142e-06, "loss": 2.0408, "step": 16438 }, { "epoch": 0.55, "grad_norm": 0.7741171717643738, "learning_rate": 8.769638267244423e-06, "loss": 2.0574, "step": 16439 }, { "epoch": 0.55, "grad_norm": 0.7192155718803406, "learning_rate": 8.768583456921613e-06, "loss": 2.0653, "step": 16440 }, { "epoch": 0.55, "grad_norm": 0.7209720611572266, "learning_rate": 8.76752866051062e-06, "loss": 1.9983, "step": 16441 }, { "epoch": 0.55, "grad_norm": 0.7691717147827148, "learning_rate": 8.76647387802337e-06, "loss": 2.1055, "step": 16442 }, { "epoch": 0.55, "grad_norm": 0.7311484217643738, "learning_rate": 8.765419109471773e-06, "loss": 2.0706, "step": 16443 }, { "epoch": 0.55, "grad_norm": 0.7514926195144653, "learning_rate": 8.76436435486775e-06, "loss": 2.032, "step": 16444 }, { "epoch": 0.55, "grad_norm": 0.7472614645957947, "learning_rate": 8.763309614223215e-06, "loss": 1.9625, "step": 16445 }, { "epoch": 0.55, "grad_norm": 0.7446077466011047, "learning_rate": 8.76225488755008e-06, "loss": 2.0227, "step": 16446 }, { "epoch": 0.55, "grad_norm": 0.7139190435409546, "learning_rate": 8.761200174860262e-06, "loss": 2.0222, "step": 16447 }, { "epoch": 0.55, "grad_norm": 0.7562984228134155, "learning_rate": 8.760145476165681e-06, "loss": 2.0708, "step": 16448 }, { "epoch": 0.55, "grad_norm": 0.7522115111351013, "learning_rate": 8.759090791478253e-06, "loss": 2.0889, "step": 16449 }, { "epoch": 0.55, "grad_norm": 0.7712689638137817, "learning_rate": 8.758036120809887e-06, "loss": 2.0916, "step": 16450 }, { "epoch": 0.55, "grad_norm": 0.7339121699333191, "learning_rate": 8.7569814641725e-06, "loss": 2.1025, "step": 16451 }, { "epoch": 0.55, "grad_norm": 0.7472110986709595, "learning_rate": 8.75592682157801e-06, "loss": 2.0213, "step": 16452 }, { "epoch": 0.55, "grad_norm": 0.7393128275871277, "learning_rate": 8.754872193038326e-06, "loss": 2.0774, "step": 16453 }, { "epoch": 0.55, "grad_norm": 0.7281447649002075, "learning_rate": 8.753817578565372e-06, "loss": 2.0659, "step": 16454 }, { "epoch": 0.55, "grad_norm": 0.7336798310279846, "learning_rate": 8.752762978171052e-06, "loss": 2.0236, "step": 16455 }, { "epoch": 0.55, "grad_norm": 0.7374682426452637, "learning_rate": 8.751708391867286e-06, "loss": 2.0149, "step": 16456 }, { "epoch": 0.55, "grad_norm": 0.8017622828483582, "learning_rate": 8.750653819665987e-06, "loss": 2.04, "step": 16457 }, { "epoch": 0.55, "grad_norm": 0.7338774800300598, "learning_rate": 8.749599261579072e-06, "loss": 2.1439, "step": 16458 }, { "epoch": 0.55, "grad_norm": 0.7582857608795166, "learning_rate": 8.748544717618449e-06, "loss": 2.1613, "step": 16459 }, { "epoch": 0.55, "grad_norm": 0.7346146702766418, "learning_rate": 8.747490187796035e-06, "loss": 2.0802, "step": 16460 }, { "epoch": 0.55, "grad_norm": 0.7700697183609009, "learning_rate": 8.74643567212374e-06, "loss": 2.0834, "step": 16461 }, { "epoch": 0.55, "grad_norm": 0.7341357469558716, "learning_rate": 8.745381170613483e-06, "loss": 2.0131, "step": 16462 }, { "epoch": 0.55, "grad_norm": 0.7856462597846985, "learning_rate": 8.744326683277175e-06, "loss": 2.0972, "step": 16463 }, { "epoch": 0.55, "grad_norm": 0.7200462818145752, "learning_rate": 8.743272210126728e-06, "loss": 2.0518, "step": 16464 }, { "epoch": 0.55, "grad_norm": 0.7624671459197998, "learning_rate": 8.742217751174053e-06, "loss": 2.1662, "step": 16465 }, { "epoch": 0.55, "grad_norm": 0.7486549019813538, "learning_rate": 8.741163306431066e-06, "loss": 2.0891, "step": 16466 }, { "epoch": 0.55, "grad_norm": 0.7333300113677979, "learning_rate": 8.74010887590968e-06, "loss": 2.106, "step": 16467 }, { "epoch": 0.55, "grad_norm": 0.7472153306007385, "learning_rate": 8.739054459621803e-06, "loss": 2.1254, "step": 16468 }, { "epoch": 0.55, "grad_norm": 0.7572233080863953, "learning_rate": 8.738000057579348e-06, "loss": 2.0237, "step": 16469 }, { "epoch": 0.55, "grad_norm": 0.7770799398422241, "learning_rate": 8.736945669794231e-06, "loss": 2.1385, "step": 16470 }, { "epoch": 0.55, "grad_norm": 0.7321068048477173, "learning_rate": 8.735891296278363e-06, "loss": 2.084, "step": 16471 }, { "epoch": 0.55, "grad_norm": 0.7369509339332581, "learning_rate": 8.734836937043654e-06, "loss": 2.0601, "step": 16472 }, { "epoch": 0.55, "grad_norm": 0.7463334798812866, "learning_rate": 8.733782592102014e-06, "loss": 2.0082, "step": 16473 }, { "epoch": 0.55, "grad_norm": 0.7468870282173157, "learning_rate": 8.732728261465356e-06, "loss": 2.0951, "step": 16474 }, { "epoch": 0.55, "grad_norm": 0.7458130717277527, "learning_rate": 8.731673945145591e-06, "loss": 2.0892, "step": 16475 }, { "epoch": 0.55, "grad_norm": 0.7328843474388123, "learning_rate": 8.730619643154637e-06, "loss": 1.9699, "step": 16476 }, { "epoch": 0.55, "grad_norm": 0.7309742569923401, "learning_rate": 8.72956535550439e-06, "loss": 2.1138, "step": 16477 }, { "epoch": 0.55, "grad_norm": 0.761681854724884, "learning_rate": 8.728511082206771e-06, "loss": 2.0866, "step": 16478 }, { "epoch": 0.55, "grad_norm": 0.7546001076698303, "learning_rate": 8.72745682327369e-06, "loss": 2.0924, "step": 16479 }, { "epoch": 0.55, "grad_norm": 0.7201979756355286, "learning_rate": 8.726402578717057e-06, "loss": 2.068, "step": 16480 }, { "epoch": 0.55, "grad_norm": 0.7547780871391296, "learning_rate": 8.725348348548781e-06, "loss": 2.0795, "step": 16481 }, { "epoch": 0.55, "grad_norm": 0.7751436233520508, "learning_rate": 8.724294132780772e-06, "loss": 2.0831, "step": 16482 }, { "epoch": 0.55, "grad_norm": 0.7417458295822144, "learning_rate": 8.723239931424937e-06, "loss": 2.0504, "step": 16483 }, { "epoch": 0.55, "grad_norm": 0.7622350454330444, "learning_rate": 8.722185744493192e-06, "loss": 2.031, "step": 16484 }, { "epoch": 0.55, "grad_norm": 0.758143424987793, "learning_rate": 8.721131571997446e-06, "loss": 2.0416, "step": 16485 }, { "epoch": 0.55, "grad_norm": 0.7258217334747314, "learning_rate": 8.720077413949601e-06, "loss": 2.1094, "step": 16486 }, { "epoch": 0.55, "grad_norm": 0.7180361151695251, "learning_rate": 8.719023270361575e-06, "loss": 2.1265, "step": 16487 }, { "epoch": 0.55, "grad_norm": 0.7318562865257263, "learning_rate": 8.717969141245272e-06, "loss": 2.0622, "step": 16488 }, { "epoch": 0.55, "grad_norm": 0.7662108540534973, "learning_rate": 8.716915026612603e-06, "loss": 2.0706, "step": 16489 }, { "epoch": 0.55, "grad_norm": 0.7394196391105652, "learning_rate": 8.715860926475476e-06, "loss": 2.0144, "step": 16490 }, { "epoch": 0.55, "grad_norm": 0.7161291241645813, "learning_rate": 8.714806840845797e-06, "loss": 2.0527, "step": 16491 }, { "epoch": 0.55, "grad_norm": 0.7171561121940613, "learning_rate": 8.713752769735478e-06, "loss": 2.0639, "step": 16492 }, { "epoch": 0.55, "grad_norm": 0.7340219020843506, "learning_rate": 8.712698713156427e-06, "loss": 2.046, "step": 16493 }, { "epoch": 0.55, "grad_norm": 0.7242436408996582, "learning_rate": 8.711644671120555e-06, "loss": 2.0917, "step": 16494 }, { "epoch": 0.55, "grad_norm": 0.7369041442871094, "learning_rate": 8.710590643639762e-06, "loss": 1.9866, "step": 16495 }, { "epoch": 0.55, "grad_norm": 0.7433868646621704, "learning_rate": 8.709536630725961e-06, "loss": 1.9983, "step": 16496 }, { "epoch": 0.55, "grad_norm": 0.7507178783416748, "learning_rate": 8.708482632391058e-06, "loss": 1.962, "step": 16497 }, { "epoch": 0.55, "grad_norm": 0.7221167683601379, "learning_rate": 8.707428648646968e-06, "loss": 2.0879, "step": 16498 }, { "epoch": 0.55, "grad_norm": 0.718222439289093, "learning_rate": 8.706374679505584e-06, "loss": 2.048, "step": 16499 }, { "epoch": 0.55, "grad_norm": 0.7409595847129822, "learning_rate": 8.705320724978822e-06, "loss": 2.1006, "step": 16500 }, { "epoch": 0.55, "grad_norm": 0.7389588952064514, "learning_rate": 8.70426678507859e-06, "loss": 2.0059, "step": 16501 }, { "epoch": 0.55, "grad_norm": 0.7479934692382812, "learning_rate": 8.703212859816793e-06, "loss": 2.1182, "step": 16502 }, { "epoch": 0.55, "grad_norm": 0.8031801581382751, "learning_rate": 8.702158949205336e-06, "loss": 2.0686, "step": 16503 }, { "epoch": 0.55, "grad_norm": 0.7791239023208618, "learning_rate": 8.701105053256123e-06, "loss": 2.0796, "step": 16504 }, { "epoch": 0.55, "grad_norm": 0.7407656908035278, "learning_rate": 8.700051171981067e-06, "loss": 2.0367, "step": 16505 }, { "epoch": 0.55, "grad_norm": 0.7432065606117249, "learning_rate": 8.698997305392072e-06, "loss": 2.1034, "step": 16506 }, { "epoch": 0.55, "grad_norm": 0.7563015818595886, "learning_rate": 8.697943453501043e-06, "loss": 2.0432, "step": 16507 }, { "epoch": 0.55, "grad_norm": 0.7101261019706726, "learning_rate": 8.696889616319885e-06, "loss": 2.0622, "step": 16508 }, { "epoch": 0.55, "grad_norm": 0.7513152360916138, "learning_rate": 8.695835793860505e-06, "loss": 2.0544, "step": 16509 }, { "epoch": 0.55, "grad_norm": 0.7645315527915955, "learning_rate": 8.694781986134803e-06, "loss": 2.0479, "step": 16510 }, { "epoch": 0.55, "grad_norm": 0.7208690643310547, "learning_rate": 8.693728193154697e-06, "loss": 2.0857, "step": 16511 }, { "epoch": 0.55, "grad_norm": 0.7870786190032959, "learning_rate": 8.692674414932079e-06, "loss": 2.1685, "step": 16512 }, { "epoch": 0.55, "grad_norm": 0.7334544062614441, "learning_rate": 8.69162065147886e-06, "loss": 2.1058, "step": 16513 }, { "epoch": 0.55, "grad_norm": 0.7286974191665649, "learning_rate": 8.690566902806946e-06, "loss": 2.0788, "step": 16514 }, { "epoch": 0.55, "grad_norm": 0.7775580883026123, "learning_rate": 8.689513168928239e-06, "loss": 2.1163, "step": 16515 }, { "epoch": 0.55, "grad_norm": 0.7246006727218628, "learning_rate": 8.688459449854644e-06, "loss": 2.1215, "step": 16516 }, { "epoch": 0.55, "grad_norm": 0.7750433087348938, "learning_rate": 8.687405745598066e-06, "loss": 2.0104, "step": 16517 }, { "epoch": 0.55, "grad_norm": 0.7462099194526672, "learning_rate": 8.686352056170406e-06, "loss": 2.0742, "step": 16518 }, { "epoch": 0.55, "grad_norm": 0.729267954826355, "learning_rate": 8.685298381583573e-06, "loss": 2.0872, "step": 16519 }, { "epoch": 0.55, "grad_norm": 0.7599778175354004, "learning_rate": 8.68424472184947e-06, "loss": 2.0934, "step": 16520 }, { "epoch": 0.55, "grad_norm": 0.7321904897689819, "learning_rate": 8.683191076979996e-06, "loss": 2.0766, "step": 16521 }, { "epoch": 0.55, "grad_norm": 0.7425832152366638, "learning_rate": 8.68213744698706e-06, "loss": 2.0943, "step": 16522 }, { "epoch": 0.55, "grad_norm": 0.7381272912025452, "learning_rate": 8.68108383188256e-06, "loss": 1.9724, "step": 16523 }, { "epoch": 0.55, "grad_norm": 0.739753007888794, "learning_rate": 8.6800302316784e-06, "loss": 2.0667, "step": 16524 }, { "epoch": 0.55, "grad_norm": 0.7081009149551392, "learning_rate": 8.678976646386494e-06, "loss": 2.0266, "step": 16525 }, { "epoch": 0.55, "grad_norm": 0.7296431064605713, "learning_rate": 8.677923076018727e-06, "loss": 2.1299, "step": 16526 }, { "epoch": 0.55, "grad_norm": 0.7543759346008301, "learning_rate": 8.676869520587012e-06, "loss": 2.0355, "step": 16527 }, { "epoch": 0.55, "grad_norm": 0.7315911054611206, "learning_rate": 8.67581598010325e-06, "loss": 2.0542, "step": 16528 }, { "epoch": 0.55, "grad_norm": 0.7469914555549622, "learning_rate": 8.674762454579347e-06, "loss": 1.9833, "step": 16529 }, { "epoch": 0.55, "grad_norm": 0.7629496455192566, "learning_rate": 8.673708944027196e-06, "loss": 2.0736, "step": 16530 }, { "epoch": 0.55, "grad_norm": 0.7489063143730164, "learning_rate": 8.672655448458707e-06, "loss": 2.0969, "step": 16531 }, { "epoch": 0.55, "grad_norm": 0.7569689154624939, "learning_rate": 8.671601967885775e-06, "loss": 2.172, "step": 16532 }, { "epoch": 0.55, "grad_norm": 0.7368515729904175, "learning_rate": 8.670548502320313e-06, "loss": 2.084, "step": 16533 }, { "epoch": 0.55, "grad_norm": 0.7274659872055054, "learning_rate": 8.669495051774208e-06, "loss": 2.0868, "step": 16534 }, { "epoch": 0.55, "grad_norm": 0.7237684726715088, "learning_rate": 8.66844161625937e-06, "loss": 2.0621, "step": 16535 }, { "epoch": 0.55, "grad_norm": 0.7403201460838318, "learning_rate": 8.667388195787698e-06, "loss": 2.0858, "step": 16536 }, { "epoch": 0.55, "grad_norm": 0.7313265800476074, "learning_rate": 8.666334790371093e-06, "loss": 2.0689, "step": 16537 }, { "epoch": 0.55, "grad_norm": 0.7565664649009705, "learning_rate": 8.66528140002146e-06, "loss": 2.0156, "step": 16538 }, { "epoch": 0.55, "grad_norm": 0.7245146036148071, "learning_rate": 8.664228024750691e-06, "loss": 2.1686, "step": 16539 }, { "epoch": 0.55, "grad_norm": 0.7347381711006165, "learning_rate": 8.663174664570691e-06, "loss": 2.0121, "step": 16540 }, { "epoch": 0.55, "grad_norm": 0.7291240692138672, "learning_rate": 8.662121319493359e-06, "loss": 2.058, "step": 16541 }, { "epoch": 0.55, "grad_norm": 0.7480359077453613, "learning_rate": 8.661067989530602e-06, "loss": 2.0085, "step": 16542 }, { "epoch": 0.55, "grad_norm": 0.7226346135139465, "learning_rate": 8.660014674694311e-06, "loss": 2.0989, "step": 16543 }, { "epoch": 0.55, "grad_norm": 0.7640408873558044, "learning_rate": 8.658961374996388e-06, "loss": 2.0177, "step": 16544 }, { "epoch": 0.55, "grad_norm": 0.7345245480537415, "learning_rate": 8.657908090448734e-06, "loss": 2.0824, "step": 16545 }, { "epoch": 0.55, "grad_norm": 0.7203503251075745, "learning_rate": 8.656854821063245e-06, "loss": 1.9963, "step": 16546 }, { "epoch": 0.55, "grad_norm": 0.7438231110572815, "learning_rate": 8.65580156685183e-06, "loss": 2.0159, "step": 16547 }, { "epoch": 0.55, "grad_norm": 0.7365139126777649, "learning_rate": 8.654748327826374e-06, "loss": 2.0422, "step": 16548 }, { "epoch": 0.55, "grad_norm": 0.7612596154212952, "learning_rate": 8.653695103998788e-06, "loss": 2.1934, "step": 16549 }, { "epoch": 0.55, "grad_norm": 0.746025562286377, "learning_rate": 8.652641895380962e-06, "loss": 2.034, "step": 16550 }, { "epoch": 0.55, "grad_norm": 0.7477189898490906, "learning_rate": 8.651588701984804e-06, "loss": 2.1124, "step": 16551 }, { "epoch": 0.55, "grad_norm": 0.7265787720680237, "learning_rate": 8.6505355238222e-06, "loss": 2.0113, "step": 16552 }, { "epoch": 0.55, "grad_norm": 0.7614309191703796, "learning_rate": 8.649482360905058e-06, "loss": 2.1932, "step": 16553 }, { "epoch": 0.55, "grad_norm": 0.7433967590332031, "learning_rate": 8.64842921324527e-06, "loss": 2.019, "step": 16554 }, { "epoch": 0.55, "grad_norm": 0.7171213626861572, "learning_rate": 8.647376080854738e-06, "loss": 1.9965, "step": 16555 }, { "epoch": 0.55, "grad_norm": 0.7529215216636658, "learning_rate": 8.646322963745362e-06, "loss": 1.9999, "step": 16556 }, { "epoch": 0.55, "grad_norm": 0.7579878568649292, "learning_rate": 8.645269861929033e-06, "loss": 1.9896, "step": 16557 }, { "epoch": 0.55, "grad_norm": 0.7403212189674377, "learning_rate": 8.644216775417651e-06, "loss": 2.021, "step": 16558 }, { "epoch": 0.55, "grad_norm": 0.7308504581451416, "learning_rate": 8.643163704223114e-06, "loss": 2.0088, "step": 16559 }, { "epoch": 0.55, "grad_norm": 0.7473870515823364, "learning_rate": 8.64211064835732e-06, "loss": 2.0071, "step": 16560 }, { "epoch": 0.55, "grad_norm": 0.7900104522705078, "learning_rate": 8.641057607832161e-06, "loss": 2.0624, "step": 16561 }, { "epoch": 0.55, "grad_norm": 0.7551008462905884, "learning_rate": 8.640004582659534e-06, "loss": 2.0343, "step": 16562 }, { "epoch": 0.55, "grad_norm": 0.7325859665870667, "learning_rate": 8.638951572851343e-06, "loss": 2.0203, "step": 16563 }, { "epoch": 0.55, "grad_norm": 0.740439772605896, "learning_rate": 8.637898578419479e-06, "loss": 2.1324, "step": 16564 }, { "epoch": 0.55, "grad_norm": 0.7063793540000916, "learning_rate": 8.636845599375838e-06, "loss": 1.9648, "step": 16565 }, { "epoch": 0.55, "grad_norm": 0.7512066960334778, "learning_rate": 8.635792635732316e-06, "loss": 2.0641, "step": 16566 }, { "epoch": 0.55, "grad_norm": 0.7407785654067993, "learning_rate": 8.634739687500811e-06, "loss": 2.015, "step": 16567 }, { "epoch": 0.55, "grad_norm": 0.7464678883552551, "learning_rate": 8.633686754693212e-06, "loss": 2.089, "step": 16568 }, { "epoch": 0.55, "grad_norm": 0.7471652030944824, "learning_rate": 8.632633837321426e-06, "loss": 2.0426, "step": 16569 }, { "epoch": 0.55, "grad_norm": 0.7374445796012878, "learning_rate": 8.631580935397336e-06, "loss": 2.1322, "step": 16570 }, { "epoch": 0.55, "grad_norm": 0.7281451225280762, "learning_rate": 8.630528048932847e-06, "loss": 2.1281, "step": 16571 }, { "epoch": 0.55, "grad_norm": 0.7499199509620667, "learning_rate": 8.629475177939846e-06, "loss": 2.1031, "step": 16572 }, { "epoch": 0.55, "grad_norm": 0.7474506497383118, "learning_rate": 8.628422322430236e-06, "loss": 2.047, "step": 16573 }, { "epoch": 0.55, "grad_norm": 0.7745090126991272, "learning_rate": 8.627369482415902e-06, "loss": 2.0617, "step": 16574 }, { "epoch": 0.55, "grad_norm": 0.715404212474823, "learning_rate": 8.626316657908745e-06, "loss": 2.0683, "step": 16575 }, { "epoch": 0.55, "grad_norm": 0.7193914651870728, "learning_rate": 8.625263848920656e-06, "loss": 2.0772, "step": 16576 }, { "epoch": 0.55, "grad_norm": 0.7458266019821167, "learning_rate": 8.62421105546353e-06, "loss": 2.0706, "step": 16577 }, { "epoch": 0.55, "grad_norm": 0.7418307065963745, "learning_rate": 8.623158277549265e-06, "loss": 2.0929, "step": 16578 }, { "epoch": 0.55, "grad_norm": 0.747020423412323, "learning_rate": 8.622105515189749e-06, "loss": 2.0343, "step": 16579 }, { "epoch": 0.55, "grad_norm": 0.7525926828384399, "learning_rate": 8.621052768396877e-06, "loss": 2.0387, "step": 16580 }, { "epoch": 0.55, "grad_norm": 0.7497417330741882, "learning_rate": 8.620000037182541e-06, "loss": 2.095, "step": 16581 }, { "epoch": 0.55, "grad_norm": 0.797443687915802, "learning_rate": 8.61894732155864e-06, "loss": 1.9991, "step": 16582 }, { "epoch": 0.55, "grad_norm": 0.7296102643013, "learning_rate": 8.61789462153706e-06, "loss": 2.0531, "step": 16583 }, { "epoch": 0.55, "grad_norm": 0.7121030688285828, "learning_rate": 8.616841937129695e-06, "loss": 2.0351, "step": 16584 }, { "epoch": 0.55, "grad_norm": 0.736751139163971, "learning_rate": 8.61578926834844e-06, "loss": 2.0211, "step": 16585 }, { "epoch": 0.55, "grad_norm": 0.7401643395423889, "learning_rate": 8.614736615205189e-06, "loss": 2.0125, "step": 16586 }, { "epoch": 0.55, "grad_norm": 0.7396181225776672, "learning_rate": 8.61368397771183e-06, "loss": 2.0437, "step": 16587 }, { "epoch": 0.55, "grad_norm": 0.7332243919372559, "learning_rate": 8.612631355880259e-06, "loss": 2.0368, "step": 16588 }, { "epoch": 0.55, "grad_norm": 0.741874635219574, "learning_rate": 8.611578749722362e-06, "loss": 2.1056, "step": 16589 }, { "epoch": 0.55, "grad_norm": 0.7566730380058289, "learning_rate": 8.610526159250034e-06, "loss": 2.0775, "step": 16590 }, { "epoch": 0.55, "grad_norm": 0.7542139887809753, "learning_rate": 8.609473584475176e-06, "loss": 2.0487, "step": 16591 }, { "epoch": 0.55, "grad_norm": 0.7411539554595947, "learning_rate": 8.608421025409662e-06, "loss": 2.0714, "step": 16592 }, { "epoch": 0.55, "grad_norm": 0.7386788129806519, "learning_rate": 8.607368482065394e-06, "loss": 2.063, "step": 16593 }, { "epoch": 0.55, "grad_norm": 0.7344232201576233, "learning_rate": 8.606315954454259e-06, "loss": 2.1, "step": 16594 }, { "epoch": 0.55, "grad_norm": 0.7509559392929077, "learning_rate": 8.605263442588155e-06, "loss": 2.092, "step": 16595 }, { "epoch": 0.55, "grad_norm": 0.7308575510978699, "learning_rate": 8.604210946478963e-06, "loss": 2.0769, "step": 16596 }, { "epoch": 0.55, "grad_norm": 0.7411999106407166, "learning_rate": 8.603158466138576e-06, "loss": 2.0129, "step": 16597 }, { "epoch": 0.55, "grad_norm": 0.7192003726959229, "learning_rate": 8.602106001578888e-06, "loss": 2.0662, "step": 16598 }, { "epoch": 0.55, "grad_norm": 0.711035966873169, "learning_rate": 8.601053552811789e-06, "loss": 2.0574, "step": 16599 }, { "epoch": 0.55, "grad_norm": 0.7375175952911377, "learning_rate": 8.600001119849167e-06, "loss": 2.1135, "step": 16600 }, { "epoch": 0.55, "grad_norm": 0.7306444644927979, "learning_rate": 8.59894870270291e-06, "loss": 2.0144, "step": 16601 }, { "epoch": 0.55, "grad_norm": 0.7241631746292114, "learning_rate": 8.597896301384909e-06, "loss": 2.0483, "step": 16602 }, { "epoch": 0.55, "grad_norm": 0.7347139716148376, "learning_rate": 8.596843915907053e-06, "loss": 2.0448, "step": 16603 }, { "epoch": 0.55, "grad_norm": 0.7405506372451782, "learning_rate": 8.595791546281238e-06, "loss": 2.0824, "step": 16604 }, { "epoch": 0.55, "grad_norm": 0.7392181158065796, "learning_rate": 8.594739192519341e-06, "loss": 2.0736, "step": 16605 }, { "epoch": 0.55, "grad_norm": 0.7310424447059631, "learning_rate": 8.593686854633259e-06, "loss": 2.0808, "step": 16606 }, { "epoch": 0.55, "grad_norm": 0.8175680041313171, "learning_rate": 8.592634532634877e-06, "loss": 2.0383, "step": 16607 }, { "epoch": 0.55, "grad_norm": 0.7483745813369751, "learning_rate": 8.591582226536089e-06, "loss": 2.1011, "step": 16608 }, { "epoch": 0.55, "grad_norm": 0.713896632194519, "learning_rate": 8.59052993634878e-06, "loss": 2.0045, "step": 16609 }, { "epoch": 0.55, "grad_norm": 0.741944432258606, "learning_rate": 8.589477662084835e-06, "loss": 2.039, "step": 16610 }, { "epoch": 0.55, "grad_norm": 0.7125879526138306, "learning_rate": 8.588425403756143e-06, "loss": 2.0402, "step": 16611 }, { "epoch": 0.55, "grad_norm": 0.7613575458526611, "learning_rate": 8.587373161374594e-06, "loss": 2.0654, "step": 16612 }, { "epoch": 0.55, "grad_norm": 0.7500796914100647, "learning_rate": 8.58632093495208e-06, "loss": 2.0999, "step": 16613 }, { "epoch": 0.55, "grad_norm": 0.7684041857719421, "learning_rate": 8.58526872450048e-06, "loss": 2.0826, "step": 16614 }, { "epoch": 0.55, "grad_norm": 0.7503336668014526, "learning_rate": 8.584216530031685e-06, "loss": 2.0957, "step": 16615 }, { "epoch": 0.55, "grad_norm": 0.7542755603790283, "learning_rate": 8.583164351557582e-06, "loss": 2.1096, "step": 16616 }, { "epoch": 0.55, "grad_norm": 0.7351455688476562, "learning_rate": 8.582112189090061e-06, "loss": 2.0249, "step": 16617 }, { "epoch": 0.55, "grad_norm": 0.7745035886764526, "learning_rate": 8.581060042641001e-06, "loss": 2.0259, "step": 16618 }, { "epoch": 0.55, "grad_norm": 0.7146393060684204, "learning_rate": 8.580007912222294e-06, "loss": 2.0509, "step": 16619 }, { "epoch": 0.55, "grad_norm": 0.7604787349700928, "learning_rate": 8.578955797845824e-06, "loss": 2.1236, "step": 16620 }, { "epoch": 0.55, "grad_norm": 0.7524462938308716, "learning_rate": 8.577903699523482e-06, "loss": 2.099, "step": 16621 }, { "epoch": 0.55, "grad_norm": 0.7318755388259888, "learning_rate": 8.576851617267151e-06, "loss": 2.0673, "step": 16622 }, { "epoch": 0.55, "grad_norm": 0.7297762632369995, "learning_rate": 8.575799551088713e-06, "loss": 2.0698, "step": 16623 }, { "epoch": 0.55, "grad_norm": 0.740635097026825, "learning_rate": 8.574747501000059e-06, "loss": 2.0259, "step": 16624 }, { "epoch": 0.55, "grad_norm": 0.7482772469520569, "learning_rate": 8.573695467013071e-06, "loss": 2.0438, "step": 16625 }, { "epoch": 0.55, "grad_norm": 0.743240237236023, "learning_rate": 8.57264344913964e-06, "loss": 2.1062, "step": 16626 }, { "epoch": 0.55, "grad_norm": 0.7569797039031982, "learning_rate": 8.571591447391642e-06, "loss": 2.077, "step": 16627 }, { "epoch": 0.55, "grad_norm": 0.7482658624649048, "learning_rate": 8.570539461780967e-06, "loss": 2.08, "step": 16628 }, { "epoch": 0.55, "grad_norm": 0.7489868998527527, "learning_rate": 8.569487492319502e-06, "loss": 2.0798, "step": 16629 }, { "epoch": 0.55, "grad_norm": 0.7610114216804504, "learning_rate": 8.568435539019126e-06, "loss": 2.1593, "step": 16630 }, { "epoch": 0.55, "grad_norm": 0.7612504363059998, "learning_rate": 8.56738360189173e-06, "loss": 2.0845, "step": 16631 }, { "epoch": 0.55, "grad_norm": 0.7707653045654297, "learning_rate": 8.566331680949193e-06, "loss": 1.998, "step": 16632 }, { "epoch": 0.55, "grad_norm": 0.7285487651824951, "learning_rate": 8.565279776203397e-06, "loss": 2.0798, "step": 16633 }, { "epoch": 0.55, "grad_norm": 0.7132567763328552, "learning_rate": 8.564227887666231e-06, "loss": 2.0647, "step": 16634 }, { "epoch": 0.55, "grad_norm": 0.7683488130569458, "learning_rate": 8.563176015349581e-06, "loss": 2.1337, "step": 16635 }, { "epoch": 0.55, "grad_norm": 0.7454658150672913, "learning_rate": 8.562124159265323e-06, "loss": 2.0341, "step": 16636 }, { "epoch": 0.55, "grad_norm": 0.7358244061470032, "learning_rate": 8.561072319425344e-06, "loss": 2.0537, "step": 16637 }, { "epoch": 0.55, "grad_norm": 0.7353538274765015, "learning_rate": 8.560020495841526e-06, "loss": 2.1103, "step": 16638 }, { "epoch": 0.55, "grad_norm": 0.7644364237785339, "learning_rate": 8.55896868852575e-06, "loss": 2.009, "step": 16639 }, { "epoch": 0.55, "grad_norm": 0.7569608092308044, "learning_rate": 8.557916897489909e-06, "loss": 2.0949, "step": 16640 }, { "epoch": 0.55, "grad_norm": 0.74721759557724, "learning_rate": 8.55686512274587e-06, "loss": 2.1578, "step": 16641 }, { "epoch": 0.55, "grad_norm": 0.73298579454422, "learning_rate": 8.555813364305526e-06, "loss": 2.1098, "step": 16642 }, { "epoch": 0.55, "grad_norm": 0.7489504814147949, "learning_rate": 8.554761622180757e-06, "loss": 2.0194, "step": 16643 }, { "epoch": 0.55, "grad_norm": 0.7614023089408875, "learning_rate": 8.553709896383445e-06, "loss": 2.0604, "step": 16644 }, { "epoch": 0.55, "grad_norm": 0.7530896067619324, "learning_rate": 8.552658186925469e-06, "loss": 2.0376, "step": 16645 }, { "epoch": 0.55, "grad_norm": 0.7655874490737915, "learning_rate": 8.551606493818713e-06, "loss": 2.0129, "step": 16646 }, { "epoch": 0.55, "grad_norm": 0.7612796425819397, "learning_rate": 8.550554817075057e-06, "loss": 2.0055, "step": 16647 }, { "epoch": 0.55, "grad_norm": 0.7210971117019653, "learning_rate": 8.549503156706387e-06, "loss": 2.1089, "step": 16648 }, { "epoch": 0.55, "grad_norm": 0.772261917591095, "learning_rate": 8.548451512724576e-06, "loss": 2.1435, "step": 16649 }, { "epoch": 0.55, "grad_norm": 0.7566694021224976, "learning_rate": 8.547399885141511e-06, "loss": 2.0296, "step": 16650 }, { "epoch": 0.55, "grad_norm": 0.7337427735328674, "learning_rate": 8.546348273969069e-06, "loss": 2.0955, "step": 16651 }, { "epoch": 0.55, "grad_norm": 0.7413740158081055, "learning_rate": 8.545296679219136e-06, "loss": 2.0966, "step": 16652 }, { "epoch": 0.55, "grad_norm": 0.7298580408096313, "learning_rate": 8.544245100903586e-06, "loss": 2.0029, "step": 16653 }, { "epoch": 0.55, "grad_norm": 0.7241822481155396, "learning_rate": 8.543193539034302e-06, "loss": 2.0111, "step": 16654 }, { "epoch": 0.55, "grad_norm": 0.7459873557090759, "learning_rate": 8.542141993623162e-06, "loss": 2.08, "step": 16655 }, { "epoch": 0.55, "grad_norm": 0.736303985118866, "learning_rate": 8.541090464682049e-06, "loss": 2.1133, "step": 16656 }, { "epoch": 0.55, "grad_norm": 0.7282065153121948, "learning_rate": 8.540038952222842e-06, "loss": 2.0373, "step": 16657 }, { "epoch": 0.55, "grad_norm": 0.7662880420684814, "learning_rate": 8.538987456257418e-06, "loss": 2.1136, "step": 16658 }, { "epoch": 0.55, "grad_norm": 0.7325481176376343, "learning_rate": 8.537935976797657e-06, "loss": 2.0477, "step": 16659 }, { "epoch": 0.55, "grad_norm": 0.7756228446960449, "learning_rate": 8.53688451385544e-06, "loss": 2.0828, "step": 16660 }, { "epoch": 0.55, "grad_norm": 0.7225051522254944, "learning_rate": 8.535833067442641e-06, "loss": 2.0759, "step": 16661 }, { "epoch": 0.55, "grad_norm": 0.7147581577301025, "learning_rate": 8.534781637571149e-06, "loss": 2.1547, "step": 16662 }, { "epoch": 0.55, "grad_norm": 0.7189888954162598, "learning_rate": 8.533730224252828e-06, "loss": 2.0441, "step": 16663 }, { "epoch": 0.55, "grad_norm": 0.7515743970870972, "learning_rate": 8.532678827499566e-06, "loss": 2.1121, "step": 16664 }, { "epoch": 0.55, "grad_norm": 0.7584514021873474, "learning_rate": 8.531627447323238e-06, "loss": 2.0439, "step": 16665 }, { "epoch": 0.55, "grad_norm": 0.763821542263031, "learning_rate": 8.530576083735726e-06, "loss": 2.0424, "step": 16666 }, { "epoch": 0.55, "grad_norm": 0.7620757818222046, "learning_rate": 8.529524736748903e-06, "loss": 2.1903, "step": 16667 }, { "epoch": 0.55, "grad_norm": 0.7464105486869812, "learning_rate": 8.528473406374645e-06, "loss": 2.0711, "step": 16668 }, { "epoch": 0.55, "grad_norm": 0.7380724549293518, "learning_rate": 8.527422092624832e-06, "loss": 2.0598, "step": 16669 }, { "epoch": 0.55, "grad_norm": 0.7136291265487671, "learning_rate": 8.526370795511343e-06, "loss": 2.0428, "step": 16670 }, { "epoch": 0.55, "grad_norm": 0.7326642274856567, "learning_rate": 8.525319515046054e-06, "loss": 2.0691, "step": 16671 }, { "epoch": 0.55, "grad_norm": 0.7272912263870239, "learning_rate": 8.524268251240841e-06, "loss": 2.025, "step": 16672 }, { "epoch": 0.55, "grad_norm": 0.73006671667099, "learning_rate": 8.523217004107578e-06, "loss": 2.0448, "step": 16673 }, { "epoch": 0.55, "grad_norm": 0.7180224061012268, "learning_rate": 8.522165773658146e-06, "loss": 2.0934, "step": 16674 }, { "epoch": 0.55, "grad_norm": 0.7334069609642029, "learning_rate": 8.52111455990442e-06, "loss": 2.0006, "step": 16675 }, { "epoch": 0.55, "grad_norm": 0.7747009992599487, "learning_rate": 8.520063362858271e-06, "loss": 2.0802, "step": 16676 }, { "epoch": 0.55, "grad_norm": 0.7535221576690674, "learning_rate": 8.519012182531579e-06, "loss": 2.0041, "step": 16677 }, { "epoch": 0.55, "grad_norm": 0.7366111278533936, "learning_rate": 8.517961018936222e-06, "loss": 2.0318, "step": 16678 }, { "epoch": 0.55, "grad_norm": 0.7255215644836426, "learning_rate": 8.516909872084073e-06, "loss": 2.1056, "step": 16679 }, { "epoch": 0.55, "grad_norm": 0.7402520179748535, "learning_rate": 8.515858741987007e-06, "loss": 2.1018, "step": 16680 }, { "epoch": 0.55, "grad_norm": 0.7555060386657715, "learning_rate": 8.514807628656899e-06, "loss": 2.0613, "step": 16681 }, { "epoch": 0.56, "grad_norm": 0.7324411869049072, "learning_rate": 8.51375653210562e-06, "loss": 2.0786, "step": 16682 }, { "epoch": 0.56, "grad_norm": 0.7216768860816956, "learning_rate": 8.512705452345051e-06, "loss": 2.0308, "step": 16683 }, { "epoch": 0.56, "grad_norm": 0.7511988282203674, "learning_rate": 8.51165438938707e-06, "loss": 2.057, "step": 16684 }, { "epoch": 0.56, "grad_norm": 0.7523396611213684, "learning_rate": 8.510603343243538e-06, "loss": 2.1045, "step": 16685 }, { "epoch": 0.56, "grad_norm": 0.7295357584953308, "learning_rate": 8.509552313926339e-06, "loss": 2.0954, "step": 16686 }, { "epoch": 0.56, "grad_norm": 0.7326708436012268, "learning_rate": 8.508501301447345e-06, "loss": 2.081, "step": 16687 }, { "epoch": 0.56, "grad_norm": 0.7313787937164307, "learning_rate": 8.50745030581843e-06, "loss": 2.0415, "step": 16688 }, { "epoch": 0.56, "grad_norm": 0.744006872177124, "learning_rate": 8.506399327051465e-06, "loss": 2.0998, "step": 16689 }, { "epoch": 0.56, "grad_norm": 0.7513835430145264, "learning_rate": 8.505348365158325e-06, "loss": 2.0719, "step": 16690 }, { "epoch": 0.56, "grad_norm": 0.7507015466690063, "learning_rate": 8.504297420150882e-06, "loss": 2.0537, "step": 16691 }, { "epoch": 0.56, "grad_norm": 0.7489732503890991, "learning_rate": 8.503246492041013e-06, "loss": 2.0194, "step": 16692 }, { "epoch": 0.56, "grad_norm": 0.772464394569397, "learning_rate": 8.502195580840588e-06, "loss": 2.0088, "step": 16693 }, { "epoch": 0.56, "grad_norm": 0.7171416878700256, "learning_rate": 8.501144686561479e-06, "loss": 2.0699, "step": 16694 }, { "epoch": 0.56, "grad_norm": 0.6983522772789001, "learning_rate": 8.500093809215558e-06, "loss": 1.9862, "step": 16695 }, { "epoch": 0.56, "grad_norm": 0.7744280099868774, "learning_rate": 8.499042948814696e-06, "loss": 1.9997, "step": 16696 }, { "epoch": 0.56, "grad_norm": 0.7388389706611633, "learning_rate": 8.497992105370774e-06, "loss": 1.9827, "step": 16697 }, { "epoch": 0.56, "grad_norm": 0.7499402165412903, "learning_rate": 8.496941278895653e-06, "loss": 2.0952, "step": 16698 }, { "epoch": 0.56, "grad_norm": 0.7866347432136536, "learning_rate": 8.495890469401204e-06, "loss": 2.0504, "step": 16699 }, { "epoch": 0.56, "grad_norm": 0.7501490116119385, "learning_rate": 8.494839676899307e-06, "loss": 2.071, "step": 16700 }, { "epoch": 0.56, "grad_norm": 0.7905648350715637, "learning_rate": 8.493788901401831e-06, "loss": 2.0114, "step": 16701 }, { "epoch": 0.56, "grad_norm": 0.7692969441413879, "learning_rate": 8.492738142920645e-06, "loss": 2.0612, "step": 16702 }, { "epoch": 0.56, "grad_norm": 0.7460446953773499, "learning_rate": 8.491687401467618e-06, "loss": 2.0701, "step": 16703 }, { "epoch": 0.56, "grad_norm": 0.7274276614189148, "learning_rate": 8.49063667705462e-06, "loss": 2.0593, "step": 16704 }, { "epoch": 0.56, "grad_norm": 0.7628127336502075, "learning_rate": 8.48958596969353e-06, "loss": 2.0783, "step": 16705 }, { "epoch": 0.56, "grad_norm": 0.7190506458282471, "learning_rate": 8.488535279396212e-06, "loss": 2.0829, "step": 16706 }, { "epoch": 0.56, "grad_norm": 0.7803504467010498, "learning_rate": 8.487484606174534e-06, "loss": 2.0335, "step": 16707 }, { "epoch": 0.56, "grad_norm": 0.7428827881813049, "learning_rate": 8.486433950040369e-06, "loss": 2.0164, "step": 16708 }, { "epoch": 0.56, "grad_norm": 0.7503796815872192, "learning_rate": 8.485383311005586e-06, "loss": 2.0557, "step": 16709 }, { "epoch": 0.56, "grad_norm": 0.7121695280075073, "learning_rate": 8.484332689082057e-06, "loss": 2.0376, "step": 16710 }, { "epoch": 0.56, "grad_norm": 0.7454421520233154, "learning_rate": 8.483282084281648e-06, "loss": 2.1091, "step": 16711 }, { "epoch": 0.56, "grad_norm": 0.7335931658744812, "learning_rate": 8.482231496616226e-06, "loss": 2.0761, "step": 16712 }, { "epoch": 0.56, "grad_norm": 0.7471561431884766, "learning_rate": 8.481180926097665e-06, "loss": 2.0228, "step": 16713 }, { "epoch": 0.56, "grad_norm": 0.7073904275894165, "learning_rate": 8.480130372737832e-06, "loss": 2.0438, "step": 16714 }, { "epoch": 0.56, "grad_norm": 0.7536607384681702, "learning_rate": 8.479079836548598e-06, "loss": 2.1102, "step": 16715 }, { "epoch": 0.56, "grad_norm": 0.7500789761543274, "learning_rate": 8.478029317541825e-06, "loss": 2.0857, "step": 16716 }, { "epoch": 0.56, "grad_norm": 0.7411984801292419, "learning_rate": 8.476978815729386e-06, "loss": 2.0995, "step": 16717 }, { "epoch": 0.56, "grad_norm": 0.7154254913330078, "learning_rate": 8.475928331123146e-06, "loss": 2.0338, "step": 16718 }, { "epoch": 0.56, "grad_norm": 0.7668420076370239, "learning_rate": 8.47487786373498e-06, "loss": 2.0844, "step": 16719 }, { "epoch": 0.56, "grad_norm": 0.7501354813575745, "learning_rate": 8.473827413576746e-06, "loss": 2.1359, "step": 16720 }, { "epoch": 0.56, "grad_norm": 0.7358644604682922, "learning_rate": 8.472776980660315e-06, "loss": 2.0128, "step": 16721 }, { "epoch": 0.56, "grad_norm": 0.8009878396987915, "learning_rate": 8.471726564997554e-06, "loss": 2.0536, "step": 16722 }, { "epoch": 0.56, "grad_norm": 0.7800041437149048, "learning_rate": 8.470676166600333e-06, "loss": 2.0113, "step": 16723 }, { "epoch": 0.56, "grad_norm": 0.7271307110786438, "learning_rate": 8.469625785480518e-06, "loss": 2.0392, "step": 16724 }, { "epoch": 0.56, "grad_norm": 0.76762455701828, "learning_rate": 8.468575421649971e-06, "loss": 2.1025, "step": 16725 }, { "epoch": 0.56, "grad_norm": 0.7439972162246704, "learning_rate": 8.467525075120558e-06, "loss": 2.048, "step": 16726 }, { "epoch": 0.56, "grad_norm": 0.8246400356292725, "learning_rate": 8.466474745904154e-06, "loss": 2.0461, "step": 16727 }, { "epoch": 0.56, "grad_norm": 0.7291239500045776, "learning_rate": 8.465424434012619e-06, "loss": 2.0845, "step": 16728 }, { "epoch": 0.56, "grad_norm": 0.7072829008102417, "learning_rate": 8.464374139457819e-06, "loss": 2.0243, "step": 16729 }, { "epoch": 0.56, "grad_norm": 0.7153077721595764, "learning_rate": 8.463323862251619e-06, "loss": 2.0363, "step": 16730 }, { "epoch": 0.56, "grad_norm": 0.7212857604026794, "learning_rate": 8.462273602405885e-06, "loss": 2.0708, "step": 16731 }, { "epoch": 0.56, "grad_norm": 0.7523965239524841, "learning_rate": 8.461223359932481e-06, "loss": 2.0362, "step": 16732 }, { "epoch": 0.56, "grad_norm": 0.8195013999938965, "learning_rate": 8.460173134843282e-06, "loss": 2.079, "step": 16733 }, { "epoch": 0.56, "grad_norm": 0.7346334457397461, "learning_rate": 8.459122927150135e-06, "loss": 2.0538, "step": 16734 }, { "epoch": 0.56, "grad_norm": 0.7347535490989685, "learning_rate": 8.458072736864918e-06, "loss": 2.0074, "step": 16735 }, { "epoch": 0.56, "grad_norm": 0.7356991171836853, "learning_rate": 8.45702256399949e-06, "loss": 2.0384, "step": 16736 }, { "epoch": 0.56, "grad_norm": 0.763713538646698, "learning_rate": 8.455972408565722e-06, "loss": 1.9293, "step": 16737 }, { "epoch": 0.56, "grad_norm": 0.7369728684425354, "learning_rate": 8.454922270575467e-06, "loss": 2.088, "step": 16738 }, { "epoch": 0.56, "grad_norm": 0.7270092964172363, "learning_rate": 8.453872150040598e-06, "loss": 2.0698, "step": 16739 }, { "epoch": 0.56, "grad_norm": 0.7209954261779785, "learning_rate": 8.452822046972971e-06, "loss": 1.9942, "step": 16740 }, { "epoch": 0.56, "grad_norm": 0.7575922012329102, "learning_rate": 8.45177196138446e-06, "loss": 2.0961, "step": 16741 }, { "epoch": 0.56, "grad_norm": 0.7480920553207397, "learning_rate": 8.450721893286917e-06, "loss": 2.0809, "step": 16742 }, { "epoch": 0.56, "grad_norm": 0.7328463196754456, "learning_rate": 8.449671842692212e-06, "loss": 2.0936, "step": 16743 }, { "epoch": 0.56, "grad_norm": 0.7560030221939087, "learning_rate": 8.448621809612205e-06, "loss": 2.0891, "step": 16744 }, { "epoch": 0.56, "grad_norm": 0.7235105037689209, "learning_rate": 8.44757179405876e-06, "loss": 1.9919, "step": 16745 }, { "epoch": 0.56, "grad_norm": 0.7721200585365295, "learning_rate": 8.446521796043743e-06, "loss": 2.1198, "step": 16746 }, { "epoch": 0.56, "grad_norm": 0.7190041542053223, "learning_rate": 8.445471815579009e-06, "loss": 2.0472, "step": 16747 }, { "epoch": 0.56, "grad_norm": 0.7581098079681396, "learning_rate": 8.44442185267642e-06, "loss": 2.0574, "step": 16748 }, { "epoch": 0.56, "grad_norm": 0.7667680382728577, "learning_rate": 8.443371907347844e-06, "loss": 2.0652, "step": 16749 }, { "epoch": 0.56, "grad_norm": 0.7571572065353394, "learning_rate": 8.442321979605143e-06, "loss": 2.1043, "step": 16750 }, { "epoch": 0.56, "grad_norm": 0.7084314227104187, "learning_rate": 8.441272069460171e-06, "loss": 2.0475, "step": 16751 }, { "epoch": 0.56, "grad_norm": 0.736884355545044, "learning_rate": 8.440222176924796e-06, "loss": 2.0691, "step": 16752 }, { "epoch": 0.56, "grad_norm": 0.7467279434204102, "learning_rate": 8.439172302010877e-06, "loss": 2.121, "step": 16753 }, { "epoch": 0.56, "grad_norm": 0.7498335838317871, "learning_rate": 8.43812244473027e-06, "loss": 2.0418, "step": 16754 }, { "epoch": 0.56, "grad_norm": 0.7791133522987366, "learning_rate": 8.43707260509485e-06, "loss": 2.0628, "step": 16755 }, { "epoch": 0.56, "grad_norm": 0.7558866739273071, "learning_rate": 8.436022783116458e-06, "loss": 2.0166, "step": 16756 }, { "epoch": 0.56, "grad_norm": 0.7172505855560303, "learning_rate": 8.434972978806967e-06, "loss": 2.0932, "step": 16757 }, { "epoch": 0.56, "grad_norm": 0.7278936505317688, "learning_rate": 8.433923192178235e-06, "loss": 2.0228, "step": 16758 }, { "epoch": 0.56, "grad_norm": 0.7596557140350342, "learning_rate": 8.432873423242123e-06, "loss": 2.1009, "step": 16759 }, { "epoch": 0.56, "grad_norm": 0.7843927145004272, "learning_rate": 8.431823672010486e-06, "loss": 2.1353, "step": 16760 }, { "epoch": 0.56, "grad_norm": 0.7333033084869385, "learning_rate": 8.430773938495187e-06, "loss": 2.0425, "step": 16761 }, { "epoch": 0.56, "grad_norm": 0.740612268447876, "learning_rate": 8.429724222708081e-06, "loss": 2.0036, "step": 16762 }, { "epoch": 0.56, "grad_norm": 0.7729616761207581, "learning_rate": 8.428674524661039e-06, "loss": 2.0484, "step": 16763 }, { "epoch": 0.56, "grad_norm": 0.7519281506538391, "learning_rate": 8.427624844365902e-06, "loss": 2.0905, "step": 16764 }, { "epoch": 0.56, "grad_norm": 0.7422950863838196, "learning_rate": 8.426575181834541e-06, "loss": 2.0446, "step": 16765 }, { "epoch": 0.56, "grad_norm": 0.7161850929260254, "learning_rate": 8.425525537078812e-06, "loss": 2.0582, "step": 16766 }, { "epoch": 0.56, "grad_norm": 0.6909325122833252, "learning_rate": 8.424475910110573e-06, "loss": 2.0624, "step": 16767 }, { "epoch": 0.56, "grad_norm": 0.7443349957466125, "learning_rate": 8.423426300941684e-06, "loss": 2.1091, "step": 16768 }, { "epoch": 0.56, "grad_norm": 0.7540214657783508, "learning_rate": 8.422376709584e-06, "loss": 2.0487, "step": 16769 }, { "epoch": 0.56, "grad_norm": 0.7226146459579468, "learning_rate": 8.421327136049374e-06, "loss": 2.0376, "step": 16770 }, { "epoch": 0.56, "grad_norm": 0.7182632088661194, "learning_rate": 8.420277580349672e-06, "loss": 2.0894, "step": 16771 }, { "epoch": 0.56, "grad_norm": 0.7491751909255981, "learning_rate": 8.419228042496751e-06, "loss": 2.0657, "step": 16772 }, { "epoch": 0.56, "grad_norm": 0.7263699769973755, "learning_rate": 8.418178522502463e-06, "loss": 2.048, "step": 16773 }, { "epoch": 0.56, "grad_norm": 0.7348998188972473, "learning_rate": 8.417129020378668e-06, "loss": 2.0409, "step": 16774 }, { "epoch": 0.56, "grad_norm": 0.7435441613197327, "learning_rate": 8.416079536137219e-06, "loss": 2.0701, "step": 16775 }, { "epoch": 0.56, "grad_norm": 0.7647241353988647, "learning_rate": 8.415030069789974e-06, "loss": 2.0818, "step": 16776 }, { "epoch": 0.56, "grad_norm": 0.721744179725647, "learning_rate": 8.413980621348799e-06, "loss": 2.102, "step": 16777 }, { "epoch": 0.56, "grad_norm": 0.7339802980422974, "learning_rate": 8.412931190825532e-06, "loss": 2.0859, "step": 16778 }, { "epoch": 0.56, "grad_norm": 0.7314200401306152, "learning_rate": 8.411881778232042e-06, "loss": 2.0393, "step": 16779 }, { "epoch": 0.56, "grad_norm": 0.7117515206336975, "learning_rate": 8.410832383580181e-06, "loss": 2.1135, "step": 16780 }, { "epoch": 0.56, "grad_norm": 0.7433462738990784, "learning_rate": 8.409783006881806e-06, "loss": 2.0337, "step": 16781 }, { "epoch": 0.56, "grad_norm": 0.7403644323348999, "learning_rate": 8.408733648148768e-06, "loss": 2.0422, "step": 16782 }, { "epoch": 0.56, "grad_norm": 0.7490825057029724, "learning_rate": 8.407684307392924e-06, "loss": 2.0423, "step": 16783 }, { "epoch": 0.56, "grad_norm": 0.7862105965614319, "learning_rate": 8.40663498462613e-06, "loss": 2.1463, "step": 16784 }, { "epoch": 0.56, "grad_norm": 0.701542854309082, "learning_rate": 8.405585679860241e-06, "loss": 2.0197, "step": 16785 }, { "epoch": 0.56, "grad_norm": 0.7154602408409119, "learning_rate": 8.404536393107112e-06, "loss": 2.0021, "step": 16786 }, { "epoch": 0.56, "grad_norm": 0.7079073190689087, "learning_rate": 8.403487124378595e-06, "loss": 2.0782, "step": 16787 }, { "epoch": 0.56, "grad_norm": 0.742972195148468, "learning_rate": 8.402437873686544e-06, "loss": 1.9964, "step": 16788 }, { "epoch": 0.56, "grad_norm": 0.7355003356933594, "learning_rate": 8.401388641042811e-06, "loss": 2.0615, "step": 16789 }, { "epoch": 0.56, "grad_norm": 0.748282790184021, "learning_rate": 8.400339426459259e-06, "loss": 2.1343, "step": 16790 }, { "epoch": 0.56, "grad_norm": 0.7776012420654297, "learning_rate": 8.399290229947733e-06, "loss": 2.0817, "step": 16791 }, { "epoch": 0.56, "grad_norm": 0.7180848717689514, "learning_rate": 8.398241051520082e-06, "loss": 2.1021, "step": 16792 }, { "epoch": 0.56, "grad_norm": 0.7320981025695801, "learning_rate": 8.397191891188169e-06, "loss": 2.1219, "step": 16793 }, { "epoch": 0.56, "grad_norm": 0.7596744298934937, "learning_rate": 8.396142748963844e-06, "loss": 2.0453, "step": 16794 }, { "epoch": 0.56, "grad_norm": 0.723743200302124, "learning_rate": 8.395093624858956e-06, "loss": 2.0344, "step": 16795 }, { "epoch": 0.56, "grad_norm": 0.7396324872970581, "learning_rate": 8.39404451888536e-06, "loss": 2.1052, "step": 16796 }, { "epoch": 0.56, "grad_norm": 0.7423840761184692, "learning_rate": 8.392995431054906e-06, "loss": 2.1527, "step": 16797 }, { "epoch": 0.56, "grad_norm": 0.7974815368652344, "learning_rate": 8.391946361379449e-06, "loss": 2.0576, "step": 16798 }, { "epoch": 0.56, "grad_norm": 0.7525286078453064, "learning_rate": 8.390897309870845e-06, "loss": 2.1107, "step": 16799 }, { "epoch": 0.56, "grad_norm": 0.7498737573623657, "learning_rate": 8.389848276540933e-06, "loss": 2.1305, "step": 16800 }, { "epoch": 0.56, "grad_norm": 0.7181726694107056, "learning_rate": 8.388799261401575e-06, "loss": 2.0903, "step": 16801 }, { "epoch": 0.56, "grad_norm": 0.7593972682952881, "learning_rate": 8.387750264464617e-06, "loss": 2.1285, "step": 16802 }, { "epoch": 0.56, "grad_norm": 0.7410340905189514, "learning_rate": 8.386701285741914e-06, "loss": 2.0362, "step": 16803 }, { "epoch": 0.56, "grad_norm": 0.7696154117584229, "learning_rate": 8.385652325245312e-06, "loss": 2.0687, "step": 16804 }, { "epoch": 0.56, "grad_norm": 0.778201699256897, "learning_rate": 8.384603382986663e-06, "loss": 2.0366, "step": 16805 }, { "epoch": 0.56, "grad_norm": 0.7688080668449402, "learning_rate": 8.383554458977821e-06, "loss": 2.0876, "step": 16806 }, { "epoch": 0.56, "grad_norm": 0.7319440245628357, "learning_rate": 8.382505553230632e-06, "loss": 2.0209, "step": 16807 }, { "epoch": 0.56, "grad_norm": 0.7023982405662537, "learning_rate": 8.38145666575695e-06, "loss": 2.0602, "step": 16808 }, { "epoch": 0.56, "grad_norm": 0.7453293204307556, "learning_rate": 8.38040779656862e-06, "loss": 2.0923, "step": 16809 }, { "epoch": 0.56, "grad_norm": 0.7665380239486694, "learning_rate": 8.379358945677493e-06, "loss": 2.0588, "step": 16810 }, { "epoch": 0.56, "grad_norm": 0.7308092713356018, "learning_rate": 8.378310113095415e-06, "loss": 2.1175, "step": 16811 }, { "epoch": 0.56, "grad_norm": 0.7042902112007141, "learning_rate": 8.377261298834249e-06, "loss": 2.0307, "step": 16812 }, { "epoch": 0.56, "grad_norm": 0.7634798288345337, "learning_rate": 8.376212502905825e-06, "loss": 2.1133, "step": 16813 }, { "epoch": 0.56, "grad_norm": 0.7541706562042236, "learning_rate": 8.375163725322005e-06, "loss": 2.0484, "step": 16814 }, { "epoch": 0.56, "grad_norm": 0.7624711394309998, "learning_rate": 8.374114966094629e-06, "loss": 2.0673, "step": 16815 }, { "epoch": 0.56, "grad_norm": 0.7456276416778564, "learning_rate": 8.373066225235552e-06, "loss": 2.0279, "step": 16816 }, { "epoch": 0.56, "grad_norm": 0.7726129293441772, "learning_rate": 8.372017502756621e-06, "loss": 2.0446, "step": 16817 }, { "epoch": 0.56, "grad_norm": 0.7053190469741821, "learning_rate": 8.370968798669681e-06, "loss": 2.0323, "step": 16818 }, { "epoch": 0.56, "grad_norm": 0.7628491520881653, "learning_rate": 8.369920112986578e-06, "loss": 2.0779, "step": 16819 }, { "epoch": 0.56, "grad_norm": 0.7345894575119019, "learning_rate": 8.368871445719163e-06, "loss": 2.1608, "step": 16820 }, { "epoch": 0.56, "grad_norm": 0.731438159942627, "learning_rate": 8.367822796879286e-06, "loss": 2.0797, "step": 16821 }, { "epoch": 0.56, "grad_norm": 0.7195285558700562, "learning_rate": 8.366774166478788e-06, "loss": 2.0273, "step": 16822 }, { "epoch": 0.56, "grad_norm": 0.7461681365966797, "learning_rate": 8.365725554529518e-06, "loss": 2.0355, "step": 16823 }, { "epoch": 0.56, "grad_norm": 0.7426743507385254, "learning_rate": 8.364676961043324e-06, "loss": 2.0694, "step": 16824 }, { "epoch": 0.56, "grad_norm": 0.7170631289482117, "learning_rate": 8.363628386032053e-06, "loss": 2.1099, "step": 16825 }, { "epoch": 0.56, "grad_norm": 0.7615997791290283, "learning_rate": 8.362579829507547e-06, "loss": 2.029, "step": 16826 }, { "epoch": 0.56, "grad_norm": 0.7627228498458862, "learning_rate": 8.361531291481651e-06, "loss": 2.0628, "step": 16827 }, { "epoch": 0.56, "grad_norm": 0.7607656717300415, "learning_rate": 8.360482771966219e-06, "loss": 2.0749, "step": 16828 }, { "epoch": 0.56, "grad_norm": 0.7418903708457947, "learning_rate": 8.35943427097309e-06, "loss": 2.0521, "step": 16829 }, { "epoch": 0.56, "grad_norm": 0.7557323575019836, "learning_rate": 8.358385788514114e-06, "loss": 2.1258, "step": 16830 }, { "epoch": 0.56, "grad_norm": 0.7228327989578247, "learning_rate": 8.35733732460113e-06, "loss": 2.0049, "step": 16831 }, { "epoch": 0.56, "grad_norm": 0.7489004135131836, "learning_rate": 8.356288879245987e-06, "loss": 2.079, "step": 16832 }, { "epoch": 0.56, "grad_norm": 0.7277175784111023, "learning_rate": 8.355240452460527e-06, "loss": 2.0942, "step": 16833 }, { "epoch": 0.56, "grad_norm": 0.7584128379821777, "learning_rate": 8.354192044256604e-06, "loss": 2.0604, "step": 16834 }, { "epoch": 0.56, "grad_norm": 0.7833227515220642, "learning_rate": 8.353143654646046e-06, "loss": 2.0542, "step": 16835 }, { "epoch": 0.56, "grad_norm": 0.7734096050262451, "learning_rate": 8.35209528364071e-06, "loss": 2.0645, "step": 16836 }, { "epoch": 0.56, "grad_norm": 0.7233988642692566, "learning_rate": 8.351046931252435e-06, "loss": 1.9765, "step": 16837 }, { "epoch": 0.56, "grad_norm": 0.7298857569694519, "learning_rate": 8.349998597493064e-06, "loss": 2.1032, "step": 16838 }, { "epoch": 0.56, "grad_norm": 0.7337982654571533, "learning_rate": 8.348950282374446e-06, "loss": 2.0606, "step": 16839 }, { "epoch": 0.56, "grad_norm": 0.7481353282928467, "learning_rate": 8.347901985908417e-06, "loss": 2.0919, "step": 16840 }, { "epoch": 0.56, "grad_norm": 0.7400870323181152, "learning_rate": 8.346853708106821e-06, "loss": 2.0091, "step": 16841 }, { "epoch": 0.56, "grad_norm": 0.7478057742118835, "learning_rate": 8.345805448981505e-06, "loss": 2.0961, "step": 16842 }, { "epoch": 0.56, "grad_norm": 0.7515630722045898, "learning_rate": 8.344757208544312e-06, "loss": 2.0283, "step": 16843 }, { "epoch": 0.56, "grad_norm": 0.7632972002029419, "learning_rate": 8.34370898680708e-06, "loss": 2.0906, "step": 16844 }, { "epoch": 0.56, "grad_norm": 0.7500972151756287, "learning_rate": 8.342660783781653e-06, "loss": 2.0673, "step": 16845 }, { "epoch": 0.56, "grad_norm": 0.7498136162757874, "learning_rate": 8.341612599479873e-06, "loss": 2.0166, "step": 16846 }, { "epoch": 0.56, "grad_norm": 0.7213338613510132, "learning_rate": 8.34056443391358e-06, "loss": 2.0521, "step": 16847 }, { "epoch": 0.56, "grad_norm": 0.7353786826133728, "learning_rate": 8.339516287094625e-06, "loss": 2.0746, "step": 16848 }, { "epoch": 0.56, "grad_norm": 0.7712132334709167, "learning_rate": 8.338468159034834e-06, "loss": 2.1227, "step": 16849 }, { "epoch": 0.56, "grad_norm": 0.7551206946372986, "learning_rate": 8.337420049746058e-06, "loss": 2.0359, "step": 16850 }, { "epoch": 0.56, "grad_norm": 0.6947364211082458, "learning_rate": 8.336371959240136e-06, "loss": 2.0123, "step": 16851 }, { "epoch": 0.56, "grad_norm": 0.7283193469047546, "learning_rate": 8.33532388752891e-06, "loss": 2.0639, "step": 16852 }, { "epoch": 0.56, "grad_norm": 0.7416529059410095, "learning_rate": 8.334275834624219e-06, "loss": 2.0277, "step": 16853 }, { "epoch": 0.56, "grad_norm": 0.7300173044204712, "learning_rate": 8.333227800537903e-06, "loss": 2.0887, "step": 16854 }, { "epoch": 0.56, "grad_norm": 0.7377007603645325, "learning_rate": 8.3321797852818e-06, "loss": 2.0785, "step": 16855 }, { "epoch": 0.56, "grad_norm": 0.7681763172149658, "learning_rate": 8.33113178886776e-06, "loss": 2.0793, "step": 16856 }, { "epoch": 0.56, "grad_norm": 0.791058361530304, "learning_rate": 8.330083811307605e-06, "loss": 2.0324, "step": 16857 }, { "epoch": 0.56, "grad_norm": 0.7533602714538574, "learning_rate": 8.32903585261319e-06, "loss": 2.0952, "step": 16858 }, { "epoch": 0.56, "grad_norm": 0.7043353319168091, "learning_rate": 8.327987912796346e-06, "loss": 2.03, "step": 16859 }, { "epoch": 0.56, "grad_norm": 0.7328832149505615, "learning_rate": 8.326939991868918e-06, "loss": 2.1701, "step": 16860 }, { "epoch": 0.56, "grad_norm": 0.7266687154769897, "learning_rate": 8.32589208984274e-06, "loss": 2.034, "step": 16861 }, { "epoch": 0.56, "grad_norm": 0.7840631008148193, "learning_rate": 8.324844206729652e-06, "loss": 2.1154, "step": 16862 }, { "epoch": 0.56, "grad_norm": 0.73016756772995, "learning_rate": 8.32379634254149e-06, "loss": 2.0607, "step": 16863 }, { "epoch": 0.56, "grad_norm": 0.7633053660392761, "learning_rate": 8.322748497290098e-06, "loss": 2.0643, "step": 16864 }, { "epoch": 0.56, "grad_norm": 0.7259398102760315, "learning_rate": 8.32170067098731e-06, "loss": 2.083, "step": 16865 }, { "epoch": 0.56, "grad_norm": 0.7380948662757874, "learning_rate": 8.320652863644963e-06, "loss": 2.0741, "step": 16866 }, { "epoch": 0.56, "grad_norm": 0.7427602410316467, "learning_rate": 8.319605075274898e-06, "loss": 2.0679, "step": 16867 }, { "epoch": 0.56, "grad_norm": 0.7542269825935364, "learning_rate": 8.318557305888947e-06, "loss": 2.0334, "step": 16868 }, { "epoch": 0.56, "grad_norm": 0.7096551060676575, "learning_rate": 8.31750955549895e-06, "loss": 2.0085, "step": 16869 }, { "epoch": 0.56, "grad_norm": 0.7602319717407227, "learning_rate": 8.316461824116748e-06, "loss": 2.0604, "step": 16870 }, { "epoch": 0.56, "grad_norm": 0.7765876650810242, "learning_rate": 8.31541411175417e-06, "loss": 1.9951, "step": 16871 }, { "epoch": 0.56, "grad_norm": 0.7706303000450134, "learning_rate": 8.314366418423056e-06, "loss": 2.0864, "step": 16872 }, { "epoch": 0.56, "grad_norm": 0.7454278469085693, "learning_rate": 8.313318744135244e-06, "loss": 2.0434, "step": 16873 }, { "epoch": 0.56, "grad_norm": 0.7712646126747131, "learning_rate": 8.31227108890257e-06, "loss": 2.1017, "step": 16874 }, { "epoch": 0.56, "grad_norm": 0.7353293299674988, "learning_rate": 8.311223452736865e-06, "loss": 2.0812, "step": 16875 }, { "epoch": 0.56, "grad_norm": 0.739406168460846, "learning_rate": 8.31017583564997e-06, "loss": 2.1037, "step": 16876 }, { "epoch": 0.56, "grad_norm": 0.7452880144119263, "learning_rate": 8.309128237653714e-06, "loss": 2.0022, "step": 16877 }, { "epoch": 0.56, "grad_norm": 0.7778764963150024, "learning_rate": 8.308080658759939e-06, "loss": 2.0519, "step": 16878 }, { "epoch": 0.56, "grad_norm": 0.7428486943244934, "learning_rate": 8.30703309898048e-06, "loss": 2.0704, "step": 16879 }, { "epoch": 0.56, "grad_norm": 0.7345752120018005, "learning_rate": 8.305985558327165e-06, "loss": 2.1008, "step": 16880 }, { "epoch": 0.56, "grad_norm": 0.7461683750152588, "learning_rate": 8.304938036811833e-06, "loss": 2.0814, "step": 16881 }, { "epoch": 0.56, "grad_norm": 0.746198832988739, "learning_rate": 8.303890534446315e-06, "loss": 1.9922, "step": 16882 }, { "epoch": 0.56, "grad_norm": 0.7936160564422607, "learning_rate": 8.302843051242455e-06, "loss": 2.1035, "step": 16883 }, { "epoch": 0.56, "grad_norm": 0.7319876551628113, "learning_rate": 8.301795587212076e-06, "loss": 2.1279, "step": 16884 }, { "epoch": 0.56, "grad_norm": 0.738196074962616, "learning_rate": 8.300748142367012e-06, "loss": 2.0397, "step": 16885 }, { "epoch": 0.56, "grad_norm": 0.7211881279945374, "learning_rate": 8.299700716719103e-06, "loss": 2.0697, "step": 16886 }, { "epoch": 0.56, "grad_norm": 0.7151875495910645, "learning_rate": 8.29865331028018e-06, "loss": 2.102, "step": 16887 }, { "epoch": 0.56, "grad_norm": 0.7350781559944153, "learning_rate": 8.297605923062073e-06, "loss": 2.0098, "step": 16888 }, { "epoch": 0.56, "grad_norm": 0.7104495763778687, "learning_rate": 8.296558555076614e-06, "loss": 2.0207, "step": 16889 }, { "epoch": 0.56, "grad_norm": 0.7670384049415588, "learning_rate": 8.29551120633564e-06, "loss": 2.037, "step": 16890 }, { "epoch": 0.56, "grad_norm": 0.735596776008606, "learning_rate": 8.29446387685098e-06, "loss": 2.0576, "step": 16891 }, { "epoch": 0.56, "grad_norm": 0.7880738973617554, "learning_rate": 8.293416566634475e-06, "loss": 2.0586, "step": 16892 }, { "epoch": 0.56, "grad_norm": 0.7320733070373535, "learning_rate": 8.292369275697942e-06, "loss": 2.082, "step": 16893 }, { "epoch": 0.56, "grad_norm": 0.7433901429176331, "learning_rate": 8.291322004053222e-06, "loss": 1.9847, "step": 16894 }, { "epoch": 0.56, "grad_norm": 0.766225278377533, "learning_rate": 8.290274751712143e-06, "loss": 2.0901, "step": 16895 }, { "epoch": 0.56, "grad_norm": 0.7629501223564148, "learning_rate": 8.289227518686543e-06, "loss": 2.0475, "step": 16896 }, { "epoch": 0.56, "grad_norm": 0.7677200436592102, "learning_rate": 8.288180304988245e-06, "loss": 2.155, "step": 16897 }, { "epoch": 0.56, "grad_norm": 0.7372588515281677, "learning_rate": 8.28713311062908e-06, "loss": 2.1331, "step": 16898 }, { "epoch": 0.56, "grad_norm": 0.7421506643295288, "learning_rate": 8.286085935620883e-06, "loss": 2.073, "step": 16899 }, { "epoch": 0.56, "grad_norm": 0.7920910120010376, "learning_rate": 8.285038779975483e-06, "loss": 2.0234, "step": 16900 }, { "epoch": 0.56, "grad_norm": 0.723716676235199, "learning_rate": 8.283991643704712e-06, "loss": 2.0479, "step": 16901 }, { "epoch": 0.56, "grad_norm": 0.7653703093528748, "learning_rate": 8.282944526820395e-06, "loss": 2.0149, "step": 16902 }, { "epoch": 0.56, "grad_norm": 0.7333273887634277, "learning_rate": 8.281897429334366e-06, "loss": 2.0412, "step": 16903 }, { "epoch": 0.56, "grad_norm": 0.728277325630188, "learning_rate": 8.28085035125845e-06, "loss": 2.0808, "step": 16904 }, { "epoch": 0.56, "grad_norm": 0.7482560873031616, "learning_rate": 8.279803292604485e-06, "loss": 2.1631, "step": 16905 }, { "epoch": 0.56, "grad_norm": 0.7058760523796082, "learning_rate": 8.278756253384288e-06, "loss": 1.9954, "step": 16906 }, { "epoch": 0.56, "grad_norm": 0.7321876883506775, "learning_rate": 8.277709233609696e-06, "loss": 2.0602, "step": 16907 }, { "epoch": 0.56, "grad_norm": 0.7453299760818481, "learning_rate": 8.276662233292538e-06, "loss": 2.0692, "step": 16908 }, { "epoch": 0.56, "grad_norm": 0.7306625843048096, "learning_rate": 8.275615252444638e-06, "loss": 2.0439, "step": 16909 }, { "epoch": 0.56, "grad_norm": 0.75696861743927, "learning_rate": 8.274568291077829e-06, "loss": 2.0065, "step": 16910 }, { "epoch": 0.56, "grad_norm": 0.758575439453125, "learning_rate": 8.273521349203934e-06, "loss": 2.0737, "step": 16911 }, { "epoch": 0.56, "grad_norm": 0.7682310342788696, "learning_rate": 8.27247442683478e-06, "loss": 2.1188, "step": 16912 }, { "epoch": 0.56, "grad_norm": 0.7318697571754456, "learning_rate": 8.2714275239822e-06, "loss": 2.0379, "step": 16913 }, { "epoch": 0.56, "grad_norm": 0.7258166670799255, "learning_rate": 8.270380640658021e-06, "loss": 2.072, "step": 16914 }, { "epoch": 0.56, "grad_norm": 0.7358026504516602, "learning_rate": 8.269333776874066e-06, "loss": 2.1335, "step": 16915 }, { "epoch": 0.56, "grad_norm": 0.7216898798942566, "learning_rate": 8.268286932642164e-06, "loss": 2.062, "step": 16916 }, { "epoch": 0.56, "grad_norm": 0.7271497845649719, "learning_rate": 8.267240107974141e-06, "loss": 2.0766, "step": 16917 }, { "epoch": 0.56, "grad_norm": 0.7113763689994812, "learning_rate": 8.266193302881826e-06, "loss": 2.0822, "step": 16918 }, { "epoch": 0.56, "grad_norm": 0.7349677681922913, "learning_rate": 8.26514651737704e-06, "loss": 2.0254, "step": 16919 }, { "epoch": 0.56, "grad_norm": 0.7567934989929199, "learning_rate": 8.264099751471613e-06, "loss": 2.0973, "step": 16920 }, { "epoch": 0.56, "grad_norm": 0.7469909191131592, "learning_rate": 8.263053005177369e-06, "loss": 2.0803, "step": 16921 }, { "epoch": 0.56, "grad_norm": 0.7613385915756226, "learning_rate": 8.262006278506133e-06, "loss": 2.0349, "step": 16922 }, { "epoch": 0.56, "grad_norm": 0.7174393534660339, "learning_rate": 8.260959571469737e-06, "loss": 2.0547, "step": 16923 }, { "epoch": 0.56, "grad_norm": 0.7459761500358582, "learning_rate": 8.259912884079996e-06, "loss": 2.1424, "step": 16924 }, { "epoch": 0.56, "grad_norm": 0.740079939365387, "learning_rate": 8.25886621634874e-06, "loss": 2.0921, "step": 16925 }, { "epoch": 0.56, "grad_norm": 0.7167969346046448, "learning_rate": 8.25781956828779e-06, "loss": 2.0584, "step": 16926 }, { "epoch": 0.56, "grad_norm": 0.732787013053894, "learning_rate": 8.256772939908982e-06, "loss": 2.1102, "step": 16927 }, { "epoch": 0.56, "grad_norm": 0.7419447898864746, "learning_rate": 8.255726331224124e-06, "loss": 2.068, "step": 16928 }, { "epoch": 0.56, "grad_norm": 0.7233572602272034, "learning_rate": 8.254679742245051e-06, "loss": 2.0828, "step": 16929 }, { "epoch": 0.56, "grad_norm": 0.7445202469825745, "learning_rate": 8.253633172983581e-06, "loss": 2.0283, "step": 16930 }, { "epoch": 0.56, "grad_norm": 0.7320336103439331, "learning_rate": 8.252586623451541e-06, "loss": 2.1411, "step": 16931 }, { "epoch": 0.56, "grad_norm": 0.7277160286903381, "learning_rate": 8.251540093660758e-06, "loss": 2.0408, "step": 16932 }, { "epoch": 0.56, "grad_norm": 0.7278395295143127, "learning_rate": 8.250493583623045e-06, "loss": 2.0484, "step": 16933 }, { "epoch": 0.56, "grad_norm": 0.7649754285812378, "learning_rate": 8.249447093350228e-06, "loss": 2.0965, "step": 16934 }, { "epoch": 0.56, "grad_norm": 0.7683017253875732, "learning_rate": 8.248400622854137e-06, "loss": 1.9578, "step": 16935 }, { "epoch": 0.56, "grad_norm": 0.7508137822151184, "learning_rate": 8.24735417214659e-06, "loss": 2.0555, "step": 16936 }, { "epoch": 0.56, "grad_norm": 0.7491703033447266, "learning_rate": 8.246307741239405e-06, "loss": 2.0967, "step": 16937 }, { "epoch": 0.56, "grad_norm": 0.7342215180397034, "learning_rate": 8.245261330144407e-06, "loss": 2.0774, "step": 16938 }, { "epoch": 0.56, "grad_norm": 0.7476543188095093, "learning_rate": 8.24421493887342e-06, "loss": 2.1278, "step": 16939 }, { "epoch": 0.56, "grad_norm": 0.7282251119613647, "learning_rate": 8.243168567438264e-06, "loss": 2.081, "step": 16940 }, { "epoch": 0.56, "grad_norm": 0.7234349846839905, "learning_rate": 8.242122215850758e-06, "loss": 2.0022, "step": 16941 }, { "epoch": 0.56, "grad_norm": 0.8079465627670288, "learning_rate": 8.241075884122724e-06, "loss": 2.0284, "step": 16942 }, { "epoch": 0.56, "grad_norm": 0.7177997827529907, "learning_rate": 8.240029572265986e-06, "loss": 2.1089, "step": 16943 }, { "epoch": 0.56, "grad_norm": 0.7278389930725098, "learning_rate": 8.238983280292362e-06, "loss": 2.0877, "step": 16944 }, { "epoch": 0.56, "grad_norm": 0.7399302124977112, "learning_rate": 8.237937008213674e-06, "loss": 2.0688, "step": 16945 }, { "epoch": 0.56, "grad_norm": 0.7312920689582825, "learning_rate": 8.23689075604174e-06, "loss": 2.0857, "step": 16946 }, { "epoch": 0.56, "grad_norm": 0.7458975315093994, "learning_rate": 8.235844523788382e-06, "loss": 2.0802, "step": 16947 }, { "epoch": 0.56, "grad_norm": 0.7342796325683594, "learning_rate": 8.234798311465415e-06, "loss": 2.1266, "step": 16948 }, { "epoch": 0.56, "grad_norm": 0.7531648278236389, "learning_rate": 8.233752119084668e-06, "loss": 2.0133, "step": 16949 }, { "epoch": 0.56, "grad_norm": 0.7985450029373169, "learning_rate": 8.232705946657949e-06, "loss": 2.1012, "step": 16950 }, { "epoch": 0.56, "grad_norm": 0.7235446572303772, "learning_rate": 8.231659794197084e-06, "loss": 2.1286, "step": 16951 }, { "epoch": 0.56, "grad_norm": 0.7931275963783264, "learning_rate": 8.230613661713891e-06, "loss": 2.0993, "step": 16952 }, { "epoch": 0.56, "grad_norm": 0.7526986598968506, "learning_rate": 8.229567549220188e-06, "loss": 2.023, "step": 16953 }, { "epoch": 0.56, "grad_norm": 0.7853092551231384, "learning_rate": 8.228521456727795e-06, "loss": 1.9906, "step": 16954 }, { "epoch": 0.56, "grad_norm": 0.7526300549507141, "learning_rate": 8.227475384248526e-06, "loss": 2.0009, "step": 16955 }, { "epoch": 0.56, "grad_norm": 0.7730827331542969, "learning_rate": 8.2264293317942e-06, "loss": 1.9941, "step": 16956 }, { "epoch": 0.56, "grad_norm": 0.7484804391860962, "learning_rate": 8.225383299376639e-06, "loss": 2.0655, "step": 16957 }, { "epoch": 0.56, "grad_norm": 0.7525172233581543, "learning_rate": 8.224337287007658e-06, "loss": 2.0637, "step": 16958 }, { "epoch": 0.56, "grad_norm": 0.7320743203163147, "learning_rate": 8.223291294699071e-06, "loss": 2.026, "step": 16959 }, { "epoch": 0.56, "grad_norm": 0.7296802401542664, "learning_rate": 8.222245322462699e-06, "loss": 2.0024, "step": 16960 }, { "epoch": 0.56, "grad_norm": 0.7238101959228516, "learning_rate": 8.221199370310357e-06, "loss": 2.0389, "step": 16961 }, { "epoch": 0.56, "grad_norm": 0.7531610727310181, "learning_rate": 8.220153438253859e-06, "loss": 2.0138, "step": 16962 }, { "epoch": 0.56, "grad_norm": 0.7388815879821777, "learning_rate": 8.219107526305033e-06, "loss": 2.012, "step": 16963 }, { "epoch": 0.56, "grad_norm": 0.7233477234840393, "learning_rate": 8.218061634475677e-06, "loss": 2.0401, "step": 16964 }, { "epoch": 0.56, "grad_norm": 0.729682207107544, "learning_rate": 8.21701576277762e-06, "loss": 2.0812, "step": 16965 }, { "epoch": 0.56, "grad_norm": 0.7398176193237305, "learning_rate": 8.215969911222674e-06, "loss": 2.0782, "step": 16966 }, { "epoch": 0.56, "grad_norm": 0.7357902526855469, "learning_rate": 8.214924079822658e-06, "loss": 2.0281, "step": 16967 }, { "epoch": 0.56, "grad_norm": 0.7481693625450134, "learning_rate": 8.21387826858938e-06, "loss": 2.0839, "step": 16968 }, { "epoch": 0.56, "grad_norm": 0.7237067222595215, "learning_rate": 8.212832477534658e-06, "loss": 2.0631, "step": 16969 }, { "epoch": 0.56, "grad_norm": 0.7454030513763428, "learning_rate": 8.211786706670306e-06, "loss": 2.1423, "step": 16970 }, { "epoch": 0.56, "grad_norm": 0.7432722449302673, "learning_rate": 8.210740956008146e-06, "loss": 2.0932, "step": 16971 }, { "epoch": 0.56, "grad_norm": 0.7561458349227905, "learning_rate": 8.20969522555998e-06, "loss": 2.151, "step": 16972 }, { "epoch": 0.56, "grad_norm": 0.7348697185516357, "learning_rate": 8.20864951533763e-06, "loss": 2.0763, "step": 16973 }, { "epoch": 0.56, "grad_norm": 0.7251679301261902, "learning_rate": 8.207603825352908e-06, "loss": 2.1079, "step": 16974 }, { "epoch": 0.56, "grad_norm": 0.7202357053756714, "learning_rate": 8.206558155617625e-06, "loss": 1.9972, "step": 16975 }, { "epoch": 0.56, "grad_norm": 0.7224113941192627, "learning_rate": 8.205512506143603e-06, "loss": 2.0432, "step": 16976 }, { "epoch": 0.56, "grad_norm": 0.7455394268035889, "learning_rate": 8.204466876942646e-06, "loss": 2.0503, "step": 16977 }, { "epoch": 0.56, "grad_norm": 0.7600259780883789, "learning_rate": 8.203421268026567e-06, "loss": 2.0265, "step": 16978 }, { "epoch": 0.56, "grad_norm": 0.732416570186615, "learning_rate": 8.202375679407185e-06, "loss": 2.0655, "step": 16979 }, { "epoch": 0.56, "grad_norm": 0.7577841281890869, "learning_rate": 8.201330111096309e-06, "loss": 2.102, "step": 16980 }, { "epoch": 0.56, "grad_norm": 0.7591338157653809, "learning_rate": 8.20028456310575e-06, "loss": 2.1058, "step": 16981 }, { "epoch": 0.56, "grad_norm": 0.7448843717575073, "learning_rate": 8.199239035447322e-06, "loss": 2.0668, "step": 16982 }, { "epoch": 0.57, "grad_norm": 0.7480595707893372, "learning_rate": 8.198193528132833e-06, "loss": 2.074, "step": 16983 }, { "epoch": 0.57, "grad_norm": 0.7089710831642151, "learning_rate": 8.1971480411741e-06, "loss": 2.0148, "step": 16984 }, { "epoch": 0.57, "grad_norm": 0.707197368144989, "learning_rate": 8.196102574582936e-06, "loss": 1.9972, "step": 16985 }, { "epoch": 0.57, "grad_norm": 0.7421472072601318, "learning_rate": 8.195057128371143e-06, "loss": 2.0908, "step": 16986 }, { "epoch": 0.57, "grad_norm": 0.7494401931762695, "learning_rate": 8.194011702550538e-06, "loss": 2.0302, "step": 16987 }, { "epoch": 0.57, "grad_norm": 0.7262296080589294, "learning_rate": 8.19296629713293e-06, "loss": 2.0961, "step": 16988 }, { "epoch": 0.57, "grad_norm": 0.7473929524421692, "learning_rate": 8.191920912130131e-06, "loss": 2.0867, "step": 16989 }, { "epoch": 0.57, "grad_norm": 0.7438567280769348, "learning_rate": 8.190875547553949e-06, "loss": 2.0854, "step": 16990 }, { "epoch": 0.57, "grad_norm": 0.7884774804115295, "learning_rate": 8.189830203416192e-06, "loss": 2.0066, "step": 16991 }, { "epoch": 0.57, "grad_norm": 0.7366700172424316, "learning_rate": 8.188784879728675e-06, "loss": 2.0468, "step": 16992 }, { "epoch": 0.57, "grad_norm": 0.7352626919746399, "learning_rate": 8.187739576503204e-06, "loss": 2.007, "step": 16993 }, { "epoch": 0.57, "grad_norm": 0.7432831525802612, "learning_rate": 8.186694293751593e-06, "loss": 2.1443, "step": 16994 }, { "epoch": 0.57, "grad_norm": 0.7388004064559937, "learning_rate": 8.185649031485643e-06, "loss": 1.9779, "step": 16995 }, { "epoch": 0.57, "grad_norm": 0.802349865436554, "learning_rate": 8.18460378971717e-06, "loss": 2.1357, "step": 16996 }, { "epoch": 0.57, "grad_norm": 0.7232075929641724, "learning_rate": 8.183558568457974e-06, "loss": 2.1164, "step": 16997 }, { "epoch": 0.57, "grad_norm": 0.7650680541992188, "learning_rate": 8.182513367719878e-06, "loss": 2.06, "step": 16998 }, { "epoch": 0.57, "grad_norm": 0.7210886478424072, "learning_rate": 8.181468187514674e-06, "loss": 2.041, "step": 16999 }, { "epoch": 0.57, "grad_norm": 0.7546193599700928, "learning_rate": 8.180423027854177e-06, "loss": 2.0603, "step": 17000 }, { "epoch": 0.57, "grad_norm": 0.7500667572021484, "learning_rate": 8.179377888750194e-06, "loss": 2.1023, "step": 17001 }, { "epoch": 0.57, "grad_norm": 0.7135975956916809, "learning_rate": 8.178332770214537e-06, "loss": 2.0534, "step": 17002 }, { "epoch": 0.57, "grad_norm": 0.7406973242759705, "learning_rate": 8.177287672259005e-06, "loss": 2.1087, "step": 17003 }, { "epoch": 0.57, "grad_norm": 0.7706504464149475, "learning_rate": 8.17624259489541e-06, "loss": 2.0332, "step": 17004 }, { "epoch": 0.57, "grad_norm": 0.7311902642250061, "learning_rate": 8.175197538135553e-06, "loss": 2.1176, "step": 17005 }, { "epoch": 0.57, "grad_norm": 0.7533340454101562, "learning_rate": 8.174152501991248e-06, "loss": 2.0477, "step": 17006 }, { "epoch": 0.57, "grad_norm": 0.7406693696975708, "learning_rate": 8.1731074864743e-06, "loss": 2.0656, "step": 17007 }, { "epoch": 0.57, "grad_norm": 0.7273715138435364, "learning_rate": 8.172062491596513e-06, "loss": 2.0113, "step": 17008 }, { "epoch": 0.57, "grad_norm": 0.7499680519104004, "learning_rate": 8.171017517369691e-06, "loss": 2.01, "step": 17009 }, { "epoch": 0.57, "grad_norm": 0.754675567150116, "learning_rate": 8.169972563805641e-06, "loss": 2.0534, "step": 17010 }, { "epoch": 0.57, "grad_norm": 0.7410624623298645, "learning_rate": 8.16892763091617e-06, "loss": 2.0626, "step": 17011 }, { "epoch": 0.57, "grad_norm": 0.7312633395195007, "learning_rate": 8.167882718713081e-06, "loss": 2.0401, "step": 17012 }, { "epoch": 0.57, "grad_norm": 0.755429744720459, "learning_rate": 8.166837827208176e-06, "loss": 2.0671, "step": 17013 }, { "epoch": 0.57, "grad_norm": 0.7829160094261169, "learning_rate": 8.165792956413265e-06, "loss": 2.053, "step": 17014 }, { "epoch": 0.57, "grad_norm": 0.7410233020782471, "learning_rate": 8.16474810634015e-06, "loss": 2.0348, "step": 17015 }, { "epoch": 0.57, "grad_norm": 0.7611442804336548, "learning_rate": 8.163703277000639e-06, "loss": 2.0405, "step": 17016 }, { "epoch": 0.57, "grad_norm": 0.7449426651000977, "learning_rate": 8.162658468406529e-06, "loss": 2.1235, "step": 17017 }, { "epoch": 0.57, "grad_norm": 0.7550774812698364, "learning_rate": 8.161613680569627e-06, "loss": 2.0518, "step": 17018 }, { "epoch": 0.57, "grad_norm": 0.7267982959747314, "learning_rate": 8.160568913501734e-06, "loss": 2.0718, "step": 17019 }, { "epoch": 0.57, "grad_norm": 0.7500813007354736, "learning_rate": 8.15952416721466e-06, "loss": 2.015, "step": 17020 }, { "epoch": 0.57, "grad_norm": 0.7406695485115051, "learning_rate": 8.1584794417202e-06, "loss": 2.0763, "step": 17021 }, { "epoch": 0.57, "grad_norm": 0.7630616426467896, "learning_rate": 8.157434737030161e-06, "loss": 2.0833, "step": 17022 }, { "epoch": 0.57, "grad_norm": 0.7512649893760681, "learning_rate": 8.156390053156345e-06, "loss": 2.0402, "step": 17023 }, { "epoch": 0.57, "grad_norm": 0.7367219924926758, "learning_rate": 8.155345390110552e-06, "loss": 2.0866, "step": 17024 }, { "epoch": 0.57, "grad_norm": 0.7510017156600952, "learning_rate": 8.154300747904589e-06, "loss": 2.0197, "step": 17025 }, { "epoch": 0.57, "grad_norm": 0.7582144737243652, "learning_rate": 8.153256126550253e-06, "loss": 2.059, "step": 17026 }, { "epoch": 0.57, "grad_norm": 0.7426238656044006, "learning_rate": 8.152211526059344e-06, "loss": 2.0481, "step": 17027 }, { "epoch": 0.57, "grad_norm": 0.7472699880599976, "learning_rate": 8.151166946443668e-06, "loss": 2.0574, "step": 17028 }, { "epoch": 0.57, "grad_norm": 0.7463499307632446, "learning_rate": 8.150122387715027e-06, "loss": 2.0878, "step": 17029 }, { "epoch": 0.57, "grad_norm": 0.7555307745933533, "learning_rate": 8.149077849885217e-06, "loss": 2.0888, "step": 17030 }, { "epoch": 0.57, "grad_norm": 0.7193235754966736, "learning_rate": 8.148033332966042e-06, "loss": 2.0247, "step": 17031 }, { "epoch": 0.57, "grad_norm": 0.75104820728302, "learning_rate": 8.146988836969299e-06, "loss": 2.0366, "step": 17032 }, { "epoch": 0.57, "grad_norm": 0.7757755517959595, "learning_rate": 8.145944361906792e-06, "loss": 2.0643, "step": 17033 }, { "epoch": 0.57, "grad_norm": 0.7373749613761902, "learning_rate": 8.144899907790319e-06, "loss": 2.0965, "step": 17034 }, { "epoch": 0.57, "grad_norm": 0.7438095211982727, "learning_rate": 8.143855474631677e-06, "loss": 2.0251, "step": 17035 }, { "epoch": 0.57, "grad_norm": 0.7282347083091736, "learning_rate": 8.142811062442669e-06, "loss": 2.0224, "step": 17036 }, { "epoch": 0.57, "grad_norm": 0.7268370389938354, "learning_rate": 8.141766671235094e-06, "loss": 2.115, "step": 17037 }, { "epoch": 0.57, "grad_norm": 0.7212538719177246, "learning_rate": 8.14072230102075e-06, "loss": 2.0119, "step": 17038 }, { "epoch": 0.57, "grad_norm": 0.7663792967796326, "learning_rate": 8.139677951811438e-06, "loss": 2.1118, "step": 17039 }, { "epoch": 0.57, "grad_norm": 0.7457291483879089, "learning_rate": 8.138633623618952e-06, "loss": 1.9713, "step": 17040 }, { "epoch": 0.57, "grad_norm": 0.7398581504821777, "learning_rate": 8.13758931645509e-06, "loss": 2.0583, "step": 17041 }, { "epoch": 0.57, "grad_norm": 0.7091923952102661, "learning_rate": 8.13654503033166e-06, "loss": 2.0689, "step": 17042 }, { "epoch": 0.57, "grad_norm": 0.7224785089492798, "learning_rate": 8.135500765260443e-06, "loss": 1.9712, "step": 17043 }, { "epoch": 0.57, "grad_norm": 0.7078338861465454, "learning_rate": 8.13445652125325e-06, "loss": 2.0249, "step": 17044 }, { "epoch": 0.57, "grad_norm": 0.7576523423194885, "learning_rate": 8.133412298321874e-06, "loss": 2.1295, "step": 17045 }, { "epoch": 0.57, "grad_norm": 0.7234470248222351, "learning_rate": 8.132368096478111e-06, "loss": 2.0369, "step": 17046 }, { "epoch": 0.57, "grad_norm": 0.7381924390792847, "learning_rate": 8.131323915733763e-06, "loss": 2.0863, "step": 17047 }, { "epoch": 0.57, "grad_norm": 0.7391882538795471, "learning_rate": 8.130279756100616e-06, "loss": 2.0291, "step": 17048 }, { "epoch": 0.57, "grad_norm": 0.7242146730422974, "learning_rate": 8.129235617590474e-06, "loss": 1.9969, "step": 17049 }, { "epoch": 0.57, "grad_norm": 0.7562114000320435, "learning_rate": 8.128191500215132e-06, "loss": 2.0169, "step": 17050 }, { "epoch": 0.57, "grad_norm": 0.7361788153648376, "learning_rate": 8.127147403986388e-06, "loss": 2.1232, "step": 17051 }, { "epoch": 0.57, "grad_norm": 0.7673287987709045, "learning_rate": 8.126103328916034e-06, "loss": 2.1629, "step": 17052 }, { "epoch": 0.57, "grad_norm": 0.7415579557418823, "learning_rate": 8.125059275015866e-06, "loss": 2.118, "step": 17053 }, { "epoch": 0.57, "grad_norm": 0.7548667192459106, "learning_rate": 8.124015242297678e-06, "loss": 2.1, "step": 17054 }, { "epoch": 0.57, "grad_norm": 0.7153352499008179, "learning_rate": 8.122971230773265e-06, "loss": 2.0528, "step": 17055 }, { "epoch": 0.57, "grad_norm": 0.7338407039642334, "learning_rate": 8.12192724045443e-06, "loss": 2.0356, "step": 17056 }, { "epoch": 0.57, "grad_norm": 0.7370111346244812, "learning_rate": 8.120883271352953e-06, "loss": 2.0492, "step": 17057 }, { "epoch": 0.57, "grad_norm": 0.7904776334762573, "learning_rate": 8.119839323480639e-06, "loss": 2.1255, "step": 17058 }, { "epoch": 0.57, "grad_norm": 0.7730766534805298, "learning_rate": 8.118795396849277e-06, "loss": 2.0929, "step": 17059 }, { "epoch": 0.57, "grad_norm": 0.7510932087898254, "learning_rate": 8.117751491470664e-06, "loss": 2.1155, "step": 17060 }, { "epoch": 0.57, "grad_norm": 0.7636374831199646, "learning_rate": 8.116707607356591e-06, "loss": 2.0957, "step": 17061 }, { "epoch": 0.57, "grad_norm": 0.7976534366607666, "learning_rate": 8.115663744518852e-06, "loss": 2.0887, "step": 17062 }, { "epoch": 0.57, "grad_norm": 0.7607437372207642, "learning_rate": 8.114619902969237e-06, "loss": 2.0364, "step": 17063 }, { "epoch": 0.57, "grad_norm": 0.7315516471862793, "learning_rate": 8.113576082719546e-06, "loss": 2.0186, "step": 17064 }, { "epoch": 0.57, "grad_norm": 0.7578868865966797, "learning_rate": 8.112532283781562e-06, "loss": 2.0514, "step": 17065 }, { "epoch": 0.57, "grad_norm": 0.7114315629005432, "learning_rate": 8.111488506167085e-06, "loss": 2.0574, "step": 17066 }, { "epoch": 0.57, "grad_norm": 0.7378893494606018, "learning_rate": 8.110444749887902e-06, "loss": 2.0792, "step": 17067 }, { "epoch": 0.57, "grad_norm": 0.718105137348175, "learning_rate": 8.109401014955808e-06, "loss": 2.0681, "step": 17068 }, { "epoch": 0.57, "grad_norm": 0.7147154211997986, "learning_rate": 8.108357301382596e-06, "loss": 2.054, "step": 17069 }, { "epoch": 0.57, "grad_norm": 0.737736165523529, "learning_rate": 8.107313609180051e-06, "loss": 2.0929, "step": 17070 }, { "epoch": 0.57, "grad_norm": 0.713158130645752, "learning_rate": 8.106269938359968e-06, "loss": 1.9967, "step": 17071 }, { "epoch": 0.57, "grad_norm": 0.7607359290122986, "learning_rate": 8.105226288934135e-06, "loss": 2.0181, "step": 17072 }, { "epoch": 0.57, "grad_norm": 0.6978567838668823, "learning_rate": 8.104182660914352e-06, "loss": 2.0368, "step": 17073 }, { "epoch": 0.57, "grad_norm": 0.7471758723258972, "learning_rate": 8.103139054312398e-06, "loss": 2.0789, "step": 17074 }, { "epoch": 0.57, "grad_norm": 0.7648957967758179, "learning_rate": 8.102095469140068e-06, "loss": 1.9749, "step": 17075 }, { "epoch": 0.57, "grad_norm": 0.8468007445335388, "learning_rate": 8.101051905409149e-06, "loss": 2.1233, "step": 17076 }, { "epoch": 0.57, "grad_norm": 0.7360994219779968, "learning_rate": 8.100008363131433e-06, "loss": 2.0906, "step": 17077 }, { "epoch": 0.57, "grad_norm": 0.7190072536468506, "learning_rate": 8.098964842318714e-06, "loss": 2.0071, "step": 17078 }, { "epoch": 0.57, "grad_norm": 0.7156478762626648, "learning_rate": 8.097921342982771e-06, "loss": 2.0615, "step": 17079 }, { "epoch": 0.57, "grad_norm": 0.762956440448761, "learning_rate": 8.096877865135399e-06, "loss": 2.0186, "step": 17080 }, { "epoch": 0.57, "grad_norm": 0.7714542150497437, "learning_rate": 8.095834408788386e-06, "loss": 2.0781, "step": 17081 }, { "epoch": 0.57, "grad_norm": 0.7219070792198181, "learning_rate": 8.094790973953521e-06, "loss": 2.1098, "step": 17082 }, { "epoch": 0.57, "grad_norm": 0.7380785942077637, "learning_rate": 8.09374756064259e-06, "loss": 2.0571, "step": 17083 }, { "epoch": 0.57, "grad_norm": 0.7403053045272827, "learning_rate": 8.092704168867379e-06, "loss": 2.1219, "step": 17084 }, { "epoch": 0.57, "grad_norm": 0.7409008145332336, "learning_rate": 8.091660798639681e-06, "loss": 2.0964, "step": 17085 }, { "epoch": 0.57, "grad_norm": 0.7817615866661072, "learning_rate": 8.090617449971286e-06, "loss": 2.0765, "step": 17086 }, { "epoch": 0.57, "grad_norm": 0.7500510811805725, "learning_rate": 8.089574122873969e-06, "loss": 2.1222, "step": 17087 }, { "epoch": 0.57, "grad_norm": 0.7383649349212646, "learning_rate": 8.088530817359526e-06, "loss": 2.0912, "step": 17088 }, { "epoch": 0.57, "grad_norm": 0.7605384588241577, "learning_rate": 8.08748753343974e-06, "loss": 2.1023, "step": 17089 }, { "epoch": 0.57, "grad_norm": 0.7343368530273438, "learning_rate": 8.0864442711264e-06, "loss": 2.0206, "step": 17090 }, { "epoch": 0.57, "grad_norm": 0.7432597279548645, "learning_rate": 8.085401030431295e-06, "loss": 2.1033, "step": 17091 }, { "epoch": 0.57, "grad_norm": 0.7041411399841309, "learning_rate": 8.084357811366202e-06, "loss": 2.0642, "step": 17092 }, { "epoch": 0.57, "grad_norm": 0.7271472215652466, "learning_rate": 8.083314613942913e-06, "loss": 2.0897, "step": 17093 }, { "epoch": 0.57, "grad_norm": 0.7213118672370911, "learning_rate": 8.082271438173214e-06, "loss": 2.0745, "step": 17094 }, { "epoch": 0.57, "grad_norm": 0.7546001076698303, "learning_rate": 8.081228284068889e-06, "loss": 2.0743, "step": 17095 }, { "epoch": 0.57, "grad_norm": 0.767673134803772, "learning_rate": 8.08018515164172e-06, "loss": 2.0704, "step": 17096 }, { "epoch": 0.57, "grad_norm": 0.7117115259170532, "learning_rate": 8.079142040903496e-06, "loss": 2.048, "step": 17097 }, { "epoch": 0.57, "grad_norm": 0.7848190665245056, "learning_rate": 8.078098951865995e-06, "loss": 2.0523, "step": 17098 }, { "epoch": 0.57, "grad_norm": 0.7393033504486084, "learning_rate": 8.077055884541009e-06, "loss": 2.0668, "step": 17099 }, { "epoch": 0.57, "grad_norm": 0.7572908401489258, "learning_rate": 8.07601283894032e-06, "loss": 2.0313, "step": 17100 }, { "epoch": 0.57, "grad_norm": 0.7229450345039368, "learning_rate": 8.074969815075709e-06, "loss": 2.051, "step": 17101 }, { "epoch": 0.57, "grad_norm": 0.7746517658233643, "learning_rate": 8.073926812958962e-06, "loss": 1.9956, "step": 17102 }, { "epoch": 0.57, "grad_norm": 0.7412286400794983, "learning_rate": 8.072883832601858e-06, "loss": 2.0055, "step": 17103 }, { "epoch": 0.57, "grad_norm": 0.7535357475280762, "learning_rate": 8.071840874016187e-06, "loss": 2.1502, "step": 17104 }, { "epoch": 0.57, "grad_norm": 0.7136909365653992, "learning_rate": 8.070797937213725e-06, "loss": 2.1318, "step": 17105 }, { "epoch": 0.57, "grad_norm": 0.7250989079475403, "learning_rate": 8.069755022206257e-06, "loss": 2.0549, "step": 17106 }, { "epoch": 0.57, "grad_norm": 0.7634677886962891, "learning_rate": 8.068712129005566e-06, "loss": 2.1405, "step": 17107 }, { "epoch": 0.57, "grad_norm": 0.7400010228157043, "learning_rate": 8.067669257623434e-06, "loss": 2.0274, "step": 17108 }, { "epoch": 0.57, "grad_norm": 0.7430810928344727, "learning_rate": 8.066626408071644e-06, "loss": 2.1448, "step": 17109 }, { "epoch": 0.57, "grad_norm": 0.7226834893226624, "learning_rate": 8.065583580361972e-06, "loss": 2.0399, "step": 17110 }, { "epoch": 0.57, "grad_norm": 0.7559519410133362, "learning_rate": 8.064540774506207e-06, "loss": 2.0093, "step": 17111 }, { "epoch": 0.57, "grad_norm": 0.7325488328933716, "learning_rate": 8.063497990516122e-06, "loss": 2.0436, "step": 17112 }, { "epoch": 0.57, "grad_norm": 0.7469612956047058, "learning_rate": 8.062455228403508e-06, "loss": 2.0521, "step": 17113 }, { "epoch": 0.57, "grad_norm": 0.7206214666366577, "learning_rate": 8.061412488180133e-06, "loss": 2.062, "step": 17114 }, { "epoch": 0.57, "grad_norm": 0.7417789101600647, "learning_rate": 8.060369769857785e-06, "loss": 1.9739, "step": 17115 }, { "epoch": 0.57, "grad_norm": 0.767518937587738, "learning_rate": 8.059327073448244e-06, "loss": 2.0277, "step": 17116 }, { "epoch": 0.57, "grad_norm": 0.7396699786186218, "learning_rate": 8.058284398963289e-06, "loss": 2.0453, "step": 17117 }, { "epoch": 0.57, "grad_norm": 0.736834704875946, "learning_rate": 8.057241746414698e-06, "loss": 2.0229, "step": 17118 }, { "epoch": 0.57, "grad_norm": 0.7253525257110596, "learning_rate": 8.05619911581425e-06, "loss": 2.0052, "step": 17119 }, { "epoch": 0.57, "grad_norm": 0.7488128542900085, "learning_rate": 8.055156507173725e-06, "loss": 2.0485, "step": 17120 }, { "epoch": 0.57, "grad_norm": 0.7512637376785278, "learning_rate": 8.054113920504902e-06, "loss": 2.0382, "step": 17121 }, { "epoch": 0.57, "grad_norm": 0.7473495602607727, "learning_rate": 8.053071355819564e-06, "loss": 1.9607, "step": 17122 }, { "epoch": 0.57, "grad_norm": 0.7282321453094482, "learning_rate": 8.05202881312948e-06, "loss": 2.0532, "step": 17123 }, { "epoch": 0.57, "grad_norm": 0.7317062020301819, "learning_rate": 8.050986292446434e-06, "loss": 2.0507, "step": 17124 }, { "epoch": 0.57, "grad_norm": 0.734809398651123, "learning_rate": 8.049943793782203e-06, "loss": 2.0529, "step": 17125 }, { "epoch": 0.57, "grad_norm": 0.7382770776748657, "learning_rate": 8.048901317148566e-06, "loss": 2.0896, "step": 17126 }, { "epoch": 0.57, "grad_norm": 0.7528815269470215, "learning_rate": 8.047858862557297e-06, "loss": 2.0448, "step": 17127 }, { "epoch": 0.57, "grad_norm": 0.7539359927177429, "learning_rate": 8.046816430020172e-06, "loss": 2.0081, "step": 17128 }, { "epoch": 0.57, "grad_norm": 0.7364723086357117, "learning_rate": 8.045774019548972e-06, "loss": 2.1004, "step": 17129 }, { "epoch": 0.57, "grad_norm": 0.7571067214012146, "learning_rate": 8.044731631155473e-06, "loss": 2.0018, "step": 17130 }, { "epoch": 0.57, "grad_norm": 0.7279937267303467, "learning_rate": 8.043689264851452e-06, "loss": 2.0455, "step": 17131 }, { "epoch": 0.57, "grad_norm": 0.7200897932052612, "learning_rate": 8.04264692064868e-06, "loss": 2.063, "step": 17132 }, { "epoch": 0.57, "grad_norm": 0.7754957675933838, "learning_rate": 8.041604598558936e-06, "loss": 2.0831, "step": 17133 }, { "epoch": 0.57, "grad_norm": 0.7384926080703735, "learning_rate": 8.040562298593993e-06, "loss": 2.0533, "step": 17134 }, { "epoch": 0.57, "grad_norm": 0.7484129667282104, "learning_rate": 8.039520020765636e-06, "loss": 2.0111, "step": 17135 }, { "epoch": 0.57, "grad_norm": 0.7823920249938965, "learning_rate": 8.038477765085626e-06, "loss": 1.9974, "step": 17136 }, { "epoch": 0.57, "grad_norm": 0.7557188272476196, "learning_rate": 8.037435531565747e-06, "loss": 2.1407, "step": 17137 }, { "epoch": 0.57, "grad_norm": 0.7303891181945801, "learning_rate": 8.03639332021777e-06, "loss": 1.9987, "step": 17138 }, { "epoch": 0.57, "grad_norm": 0.7495269179344177, "learning_rate": 8.035351131053473e-06, "loss": 2.053, "step": 17139 }, { "epoch": 0.57, "grad_norm": 0.7523943185806274, "learning_rate": 8.034308964084629e-06, "loss": 2.125, "step": 17140 }, { "epoch": 0.57, "grad_norm": 0.7601137757301331, "learning_rate": 8.033266819323008e-06, "loss": 1.9618, "step": 17141 }, { "epoch": 0.57, "grad_norm": 0.7799620032310486, "learning_rate": 8.032224696780383e-06, "loss": 2.0252, "step": 17142 }, { "epoch": 0.57, "grad_norm": 0.722940981388092, "learning_rate": 8.031182596468532e-06, "loss": 2.0193, "step": 17143 }, { "epoch": 0.57, "grad_norm": 0.780859649181366, "learning_rate": 8.03014051839923e-06, "loss": 2.0658, "step": 17144 }, { "epoch": 0.57, "grad_norm": 0.746974766254425, "learning_rate": 8.029098462584242e-06, "loss": 2.0824, "step": 17145 }, { "epoch": 0.57, "grad_norm": 0.7359817624092102, "learning_rate": 8.028056429035345e-06, "loss": 2.1424, "step": 17146 }, { "epoch": 0.57, "grad_norm": 0.7404485940933228, "learning_rate": 8.027014417764311e-06, "loss": 2.0712, "step": 17147 }, { "epoch": 0.57, "grad_norm": 0.7576310038566589, "learning_rate": 8.025972428782915e-06, "loss": 2.0632, "step": 17148 }, { "epoch": 0.57, "grad_norm": 0.750542938709259, "learning_rate": 8.024930462102923e-06, "loss": 2.0515, "step": 17149 }, { "epoch": 0.57, "grad_norm": 0.7453352212905884, "learning_rate": 8.023888517736106e-06, "loss": 2.06, "step": 17150 }, { "epoch": 0.57, "grad_norm": 0.7599689960479736, "learning_rate": 8.022846595694241e-06, "loss": 2.0369, "step": 17151 }, { "epoch": 0.57, "grad_norm": 0.7303507328033447, "learning_rate": 8.021804695989098e-06, "loss": 2.0151, "step": 17152 }, { "epoch": 0.57, "grad_norm": 0.7523336410522461, "learning_rate": 8.020762818632446e-06, "loss": 2.0253, "step": 17153 }, { "epoch": 0.57, "grad_norm": 0.7349201440811157, "learning_rate": 8.019720963636054e-06, "loss": 2.0185, "step": 17154 }, { "epoch": 0.57, "grad_norm": 0.7671202421188354, "learning_rate": 8.018679131011695e-06, "loss": 2.1099, "step": 17155 }, { "epoch": 0.57, "grad_norm": 0.7543513774871826, "learning_rate": 8.017637320771134e-06, "loss": 2.0842, "step": 17156 }, { "epoch": 0.57, "grad_norm": 0.7141208648681641, "learning_rate": 8.016595532926154e-06, "loss": 2.0703, "step": 17157 }, { "epoch": 0.57, "grad_norm": 0.7708291411399841, "learning_rate": 8.015553767488506e-06, "loss": 2.0913, "step": 17158 }, { "epoch": 0.57, "grad_norm": 0.7589324712753296, "learning_rate": 8.014512024469973e-06, "loss": 2.0336, "step": 17159 }, { "epoch": 0.57, "grad_norm": 0.7440407276153564, "learning_rate": 8.013470303882318e-06, "loss": 2.0642, "step": 17160 }, { "epoch": 0.57, "grad_norm": 0.7379841804504395, "learning_rate": 8.012428605737313e-06, "loss": 2.1036, "step": 17161 }, { "epoch": 0.57, "grad_norm": 0.717802882194519, "learning_rate": 8.011386930046726e-06, "loss": 2.0691, "step": 17162 }, { "epoch": 0.57, "grad_norm": 0.721459150314331, "learning_rate": 8.01034527682232e-06, "loss": 2.036, "step": 17163 }, { "epoch": 0.57, "grad_norm": 0.7595359683036804, "learning_rate": 8.009303646075867e-06, "loss": 2.0756, "step": 17164 }, { "epoch": 0.57, "grad_norm": 0.7532103657722473, "learning_rate": 8.008262037819137e-06, "loss": 2.1036, "step": 17165 }, { "epoch": 0.57, "grad_norm": 0.7328066825866699, "learning_rate": 8.007220452063896e-06, "loss": 2.0697, "step": 17166 }, { "epoch": 0.57, "grad_norm": 0.7460857033729553, "learning_rate": 8.00617888882191e-06, "loss": 2.141, "step": 17167 }, { "epoch": 0.57, "grad_norm": 0.7490686774253845, "learning_rate": 8.005137348104946e-06, "loss": 2.0613, "step": 17168 }, { "epoch": 0.57, "grad_norm": 0.7344531416893005, "learning_rate": 8.004095829924769e-06, "loss": 1.9214, "step": 17169 }, { "epoch": 0.57, "grad_norm": 0.724797248840332, "learning_rate": 8.003054334293147e-06, "loss": 2.099, "step": 17170 }, { "epoch": 0.57, "grad_norm": 0.7411088943481445, "learning_rate": 8.002012861221855e-06, "loss": 2.0321, "step": 17171 }, { "epoch": 0.57, "grad_norm": 0.7376947999000549, "learning_rate": 8.000971410722641e-06, "loss": 2.0815, "step": 17172 }, { "epoch": 0.57, "grad_norm": 0.7443318963050842, "learning_rate": 7.999929982807286e-06, "loss": 2.0821, "step": 17173 }, { "epoch": 0.57, "grad_norm": 0.7186601161956787, "learning_rate": 7.998888577487547e-06, "loss": 2.0369, "step": 17174 }, { "epoch": 0.57, "grad_norm": 0.746322512626648, "learning_rate": 7.997847194775198e-06, "loss": 2.0972, "step": 17175 }, { "epoch": 0.57, "grad_norm": 0.7303517460823059, "learning_rate": 7.99680583468199e-06, "loss": 2.0411, "step": 17176 }, { "epoch": 0.57, "grad_norm": 0.7721924185752869, "learning_rate": 7.995764497219701e-06, "loss": 2.0556, "step": 17177 }, { "epoch": 0.57, "grad_norm": 0.731717586517334, "learning_rate": 7.994723182400086e-06, "loss": 2.1038, "step": 17178 }, { "epoch": 0.57, "grad_norm": 0.7152645587921143, "learning_rate": 7.993681890234918e-06, "loss": 2.0472, "step": 17179 }, { "epoch": 0.57, "grad_norm": 0.7476188540458679, "learning_rate": 7.992640620735952e-06, "loss": 2.0789, "step": 17180 }, { "epoch": 0.57, "grad_norm": 0.7399317622184753, "learning_rate": 7.991599373914958e-06, "loss": 2.0509, "step": 17181 }, { "epoch": 0.57, "grad_norm": 0.7329614162445068, "learning_rate": 7.990558149783695e-06, "loss": 2.0133, "step": 17182 }, { "epoch": 0.57, "grad_norm": 0.7275972962379456, "learning_rate": 7.98951694835393e-06, "loss": 2.1893, "step": 17183 }, { "epoch": 0.57, "grad_norm": 0.7227086424827576, "learning_rate": 7.988475769637429e-06, "loss": 2.0503, "step": 17184 }, { "epoch": 0.57, "grad_norm": 0.7978876233100891, "learning_rate": 7.987434613645941e-06, "loss": 2.0, "step": 17185 }, { "epoch": 0.57, "grad_norm": 0.7346686720848083, "learning_rate": 7.986393480391242e-06, "loss": 2.0646, "step": 17186 }, { "epoch": 0.57, "grad_norm": 0.7568705081939697, "learning_rate": 7.985352369885089e-06, "loss": 2.0996, "step": 17187 }, { "epoch": 0.57, "grad_norm": 0.7378888130187988, "learning_rate": 7.984311282139247e-06, "loss": 2.03, "step": 17188 }, { "epoch": 0.57, "grad_norm": 0.7622794508934021, "learning_rate": 7.983270217165471e-06, "loss": 2.0184, "step": 17189 }, { "epoch": 0.57, "grad_norm": 0.7497325539588928, "learning_rate": 7.982229174975527e-06, "loss": 2.0778, "step": 17190 }, { "epoch": 0.57, "grad_norm": 0.7458800077438354, "learning_rate": 7.981188155581173e-06, "loss": 2.0618, "step": 17191 }, { "epoch": 0.57, "grad_norm": 0.7652097344398499, "learning_rate": 7.980147158994175e-06, "loss": 2.0157, "step": 17192 }, { "epoch": 0.57, "grad_norm": 0.7112547755241394, "learning_rate": 7.979106185226291e-06, "loss": 2.0664, "step": 17193 }, { "epoch": 0.57, "grad_norm": 0.7529163360595703, "learning_rate": 7.978065234289281e-06, "loss": 2.0998, "step": 17194 }, { "epoch": 0.57, "grad_norm": 0.7317906022071838, "learning_rate": 7.977024306194903e-06, "loss": 1.9694, "step": 17195 }, { "epoch": 0.57, "grad_norm": 0.7348636984825134, "learning_rate": 7.97598340095492e-06, "loss": 2.0807, "step": 17196 }, { "epoch": 0.57, "grad_norm": 0.7501640915870667, "learning_rate": 7.974942518581092e-06, "loss": 2.0142, "step": 17197 }, { "epoch": 0.57, "grad_norm": 0.7218626141548157, "learning_rate": 7.973901659085175e-06, "loss": 2.0152, "step": 17198 }, { "epoch": 0.57, "grad_norm": 0.7464247941970825, "learning_rate": 7.972860822478928e-06, "loss": 2.0519, "step": 17199 }, { "epoch": 0.57, "grad_norm": 0.7218948602676392, "learning_rate": 7.971820008774111e-06, "loss": 2.0467, "step": 17200 }, { "epoch": 0.57, "grad_norm": 0.7223109602928162, "learning_rate": 7.970779217982487e-06, "loss": 2.0423, "step": 17201 }, { "epoch": 0.57, "grad_norm": 0.7714511156082153, "learning_rate": 7.969738450115809e-06, "loss": 2.0376, "step": 17202 }, { "epoch": 0.57, "grad_norm": 0.7560158967971802, "learning_rate": 7.968697705185836e-06, "loss": 2.0203, "step": 17203 }, { "epoch": 0.57, "grad_norm": 0.7776143550872803, "learning_rate": 7.967656983204323e-06, "loss": 1.9891, "step": 17204 }, { "epoch": 0.57, "grad_norm": 0.7590973377227783, "learning_rate": 7.966616284183031e-06, "loss": 2.0832, "step": 17205 }, { "epoch": 0.57, "grad_norm": 0.7555524706840515, "learning_rate": 7.965575608133722e-06, "loss": 2.0148, "step": 17206 }, { "epoch": 0.57, "grad_norm": 0.7567825317382812, "learning_rate": 7.96453495506814e-06, "loss": 2.0232, "step": 17207 }, { "epoch": 0.57, "grad_norm": 0.7442529201507568, "learning_rate": 7.963494324998054e-06, "loss": 2.0822, "step": 17208 }, { "epoch": 0.57, "grad_norm": 0.7443017363548279, "learning_rate": 7.962453717935214e-06, "loss": 2.0261, "step": 17209 }, { "epoch": 0.57, "grad_norm": 0.7546848058700562, "learning_rate": 7.96141313389138e-06, "loss": 2.0079, "step": 17210 }, { "epoch": 0.57, "grad_norm": 0.7328857779502869, "learning_rate": 7.960372572878304e-06, "loss": 2.0722, "step": 17211 }, { "epoch": 0.57, "grad_norm": 0.7243262529373169, "learning_rate": 7.95933203490774e-06, "loss": 2.0246, "step": 17212 }, { "epoch": 0.57, "grad_norm": 0.7621217370033264, "learning_rate": 7.95829151999145e-06, "loss": 2.103, "step": 17213 }, { "epoch": 0.57, "grad_norm": 0.7275574207305908, "learning_rate": 7.957251028141184e-06, "loss": 2.0062, "step": 17214 }, { "epoch": 0.57, "grad_norm": 0.7500011324882507, "learning_rate": 7.956210559368702e-06, "loss": 2.0819, "step": 17215 }, { "epoch": 0.57, "grad_norm": 0.7568040490150452, "learning_rate": 7.955170113685754e-06, "loss": 2.1076, "step": 17216 }, { "epoch": 0.57, "grad_norm": 0.7476917505264282, "learning_rate": 7.954129691104095e-06, "loss": 2.0371, "step": 17217 }, { "epoch": 0.57, "grad_norm": 0.7331631183624268, "learning_rate": 7.95308929163548e-06, "loss": 2.0589, "step": 17218 }, { "epoch": 0.57, "grad_norm": 0.751981794834137, "learning_rate": 7.952048915291664e-06, "loss": 1.9777, "step": 17219 }, { "epoch": 0.57, "grad_norm": 0.7637856006622314, "learning_rate": 7.951008562084398e-06, "loss": 2.0161, "step": 17220 }, { "epoch": 0.57, "grad_norm": 0.7636890411376953, "learning_rate": 7.949968232025434e-06, "loss": 2.0182, "step": 17221 }, { "epoch": 0.57, "grad_norm": 0.7305976748466492, "learning_rate": 7.948927925126529e-06, "loss": 2.0826, "step": 17222 }, { "epoch": 0.57, "grad_norm": 0.7499392628669739, "learning_rate": 7.947887641399435e-06, "loss": 2.0448, "step": 17223 }, { "epoch": 0.57, "grad_norm": 0.7591826915740967, "learning_rate": 7.946847380855905e-06, "loss": 2.0338, "step": 17224 }, { "epoch": 0.57, "grad_norm": 0.7297161817550659, "learning_rate": 7.945807143507688e-06, "loss": 2.0229, "step": 17225 }, { "epoch": 0.57, "grad_norm": 0.7471473217010498, "learning_rate": 7.94476692936654e-06, "loss": 2.0102, "step": 17226 }, { "epoch": 0.57, "grad_norm": 0.7940963506698608, "learning_rate": 7.943726738444207e-06, "loss": 2.0328, "step": 17227 }, { "epoch": 0.57, "grad_norm": 0.7770169377326965, "learning_rate": 7.942686570752451e-06, "loss": 2.0563, "step": 17228 }, { "epoch": 0.57, "grad_norm": 0.7700167894363403, "learning_rate": 7.94164642630301e-06, "loss": 2.0926, "step": 17229 }, { "epoch": 0.57, "grad_norm": 0.7342116236686707, "learning_rate": 7.940606305107642e-06, "loss": 2.0641, "step": 17230 }, { "epoch": 0.57, "grad_norm": 0.7330238223075867, "learning_rate": 7.939566207178099e-06, "loss": 1.9786, "step": 17231 }, { "epoch": 0.57, "grad_norm": 0.7703621983528137, "learning_rate": 7.93852613252613e-06, "loss": 2.1314, "step": 17232 }, { "epoch": 0.57, "grad_norm": 0.7340764999389648, "learning_rate": 7.937486081163483e-06, "loss": 2.0845, "step": 17233 }, { "epoch": 0.57, "grad_norm": 0.7359911203384399, "learning_rate": 7.93644605310191e-06, "loss": 2.0511, "step": 17234 }, { "epoch": 0.57, "grad_norm": 0.7481018304824829, "learning_rate": 7.935406048353159e-06, "loss": 2.0518, "step": 17235 }, { "epoch": 0.57, "grad_norm": 0.7372741103172302, "learning_rate": 7.934366066928981e-06, "loss": 2.0256, "step": 17236 }, { "epoch": 0.57, "grad_norm": 0.7193230390548706, "learning_rate": 7.933326108841127e-06, "loss": 1.97, "step": 17237 }, { "epoch": 0.57, "grad_norm": 0.7305843830108643, "learning_rate": 7.932286174101343e-06, "loss": 1.9734, "step": 17238 }, { "epoch": 0.57, "grad_norm": 0.7576166391372681, "learning_rate": 7.931246262721375e-06, "loss": 2.0774, "step": 17239 }, { "epoch": 0.57, "grad_norm": 0.7921248078346252, "learning_rate": 7.930206374712977e-06, "loss": 2.0521, "step": 17240 }, { "epoch": 0.57, "grad_norm": 0.7179931402206421, "learning_rate": 7.929166510087895e-06, "loss": 2.0685, "step": 17241 }, { "epoch": 0.57, "grad_norm": 0.7550375461578369, "learning_rate": 7.928126668857873e-06, "loss": 2.0745, "step": 17242 }, { "epoch": 0.57, "grad_norm": 0.7656317949295044, "learning_rate": 7.927086851034662e-06, "loss": 2.1207, "step": 17243 }, { "epoch": 0.57, "grad_norm": 0.7728816866874695, "learning_rate": 7.92604705663001e-06, "loss": 2.0636, "step": 17244 }, { "epoch": 0.57, "grad_norm": 0.7134026288986206, "learning_rate": 7.925007285655663e-06, "loss": 2.1002, "step": 17245 }, { "epoch": 0.57, "grad_norm": 0.7521299719810486, "learning_rate": 7.923967538123369e-06, "loss": 2.117, "step": 17246 }, { "epoch": 0.57, "grad_norm": 0.741278350353241, "learning_rate": 7.922927814044872e-06, "loss": 1.9849, "step": 17247 }, { "epoch": 0.57, "grad_norm": 0.787230908870697, "learning_rate": 7.921888113431918e-06, "loss": 2.1252, "step": 17248 }, { "epoch": 0.57, "grad_norm": 0.769725501537323, "learning_rate": 7.920848436296253e-06, "loss": 2.0438, "step": 17249 }, { "epoch": 0.57, "grad_norm": 0.7424687743186951, "learning_rate": 7.919808782649631e-06, "loss": 2.0882, "step": 17250 }, { "epoch": 0.57, "grad_norm": 0.708843469619751, "learning_rate": 7.918769152503782e-06, "loss": 1.9519, "step": 17251 }, { "epoch": 0.57, "grad_norm": 0.7164061069488525, "learning_rate": 7.917729545870462e-06, "loss": 2.0212, "step": 17252 }, { "epoch": 0.57, "grad_norm": 0.7577266693115234, "learning_rate": 7.916689962761415e-06, "loss": 2.0743, "step": 17253 }, { "epoch": 0.57, "grad_norm": 0.7395599484443665, "learning_rate": 7.915650403188382e-06, "loss": 2.0556, "step": 17254 }, { "epoch": 0.57, "grad_norm": 0.7140969038009644, "learning_rate": 7.914610867163113e-06, "loss": 2.0319, "step": 17255 }, { "epoch": 0.57, "grad_norm": 0.7289227247238159, "learning_rate": 7.913571354697344e-06, "loss": 2.0617, "step": 17256 }, { "epoch": 0.57, "grad_norm": 0.7285024523735046, "learning_rate": 7.912531865802822e-06, "loss": 2.045, "step": 17257 }, { "epoch": 0.57, "grad_norm": 0.7389302849769592, "learning_rate": 7.911492400491294e-06, "loss": 2.1072, "step": 17258 }, { "epoch": 0.57, "grad_norm": 0.7558693289756775, "learning_rate": 7.910452958774503e-06, "loss": 2.0727, "step": 17259 }, { "epoch": 0.57, "grad_norm": 0.7440516948699951, "learning_rate": 7.909413540664188e-06, "loss": 2.1013, "step": 17260 }, { "epoch": 0.57, "grad_norm": 0.7608960270881653, "learning_rate": 7.908374146172094e-06, "loss": 2.1101, "step": 17261 }, { "epoch": 0.57, "grad_norm": 0.7507086396217346, "learning_rate": 7.90733477530996e-06, "loss": 2.0097, "step": 17262 }, { "epoch": 0.57, "grad_norm": 0.721200168132782, "learning_rate": 7.906295428089537e-06, "loss": 2.0022, "step": 17263 }, { "epoch": 0.57, "grad_norm": 0.7818373441696167, "learning_rate": 7.905256104522558e-06, "loss": 2.0541, "step": 17264 }, { "epoch": 0.57, "grad_norm": 0.7486129999160767, "learning_rate": 7.904216804620764e-06, "loss": 2.0303, "step": 17265 }, { "epoch": 0.57, "grad_norm": 0.722420871257782, "learning_rate": 7.903177528395905e-06, "loss": 2.0451, "step": 17266 }, { "epoch": 0.57, "grad_norm": 0.7076942324638367, "learning_rate": 7.902138275859716e-06, "loss": 2.0768, "step": 17267 }, { "epoch": 0.57, "grad_norm": 0.7330958843231201, "learning_rate": 7.90109904702394e-06, "loss": 2.0251, "step": 17268 }, { "epoch": 0.57, "grad_norm": 0.7436361312866211, "learning_rate": 7.900059841900318e-06, "loss": 2.1124, "step": 17269 }, { "epoch": 0.57, "grad_norm": 0.7239755988121033, "learning_rate": 7.899020660500588e-06, "loss": 2.0941, "step": 17270 }, { "epoch": 0.57, "grad_norm": 0.7130014300346375, "learning_rate": 7.897981502836489e-06, "loss": 2.0563, "step": 17271 }, { "epoch": 0.57, "grad_norm": 0.753129243850708, "learning_rate": 7.89694236891977e-06, "loss": 2.1575, "step": 17272 }, { "epoch": 0.57, "grad_norm": 0.7116495370864868, "learning_rate": 7.895903258762157e-06, "loss": 2.0237, "step": 17273 }, { "epoch": 0.57, "grad_norm": 0.7680467963218689, "learning_rate": 7.894864172375395e-06, "loss": 2.0488, "step": 17274 }, { "epoch": 0.57, "grad_norm": 0.7454050183296204, "learning_rate": 7.893825109771229e-06, "loss": 2.0709, "step": 17275 }, { "epoch": 0.57, "grad_norm": 0.755456805229187, "learning_rate": 7.892786070961386e-06, "loss": 2.0577, "step": 17276 }, { "epoch": 0.57, "grad_norm": 0.7581737637519836, "learning_rate": 7.89174705595762e-06, "loss": 2.0711, "step": 17277 }, { "epoch": 0.57, "grad_norm": 0.714931309223175, "learning_rate": 7.890708064771655e-06, "loss": 2.0207, "step": 17278 }, { "epoch": 0.57, "grad_norm": 0.7360647320747375, "learning_rate": 7.889669097415232e-06, "loss": 1.9334, "step": 17279 }, { "epoch": 0.57, "grad_norm": 0.7530723810195923, "learning_rate": 7.888630153900093e-06, "loss": 2.0351, "step": 17280 }, { "epoch": 0.57, "grad_norm": 0.7844183444976807, "learning_rate": 7.887591234237975e-06, "loss": 2.1454, "step": 17281 }, { "epoch": 0.57, "grad_norm": 0.741177499294281, "learning_rate": 7.886552338440612e-06, "loss": 2.0692, "step": 17282 }, { "epoch": 0.58, "grad_norm": 0.7305881381034851, "learning_rate": 7.885513466519742e-06, "loss": 2.0769, "step": 17283 }, { "epoch": 0.58, "grad_norm": 0.747803270816803, "learning_rate": 7.8844746184871e-06, "loss": 2.1061, "step": 17284 }, { "epoch": 0.58, "grad_norm": 0.7842484712600708, "learning_rate": 7.883435794354424e-06, "loss": 2.0401, "step": 17285 }, { "epoch": 0.58, "grad_norm": 0.7240668535232544, "learning_rate": 7.882396994133456e-06, "loss": 1.9817, "step": 17286 }, { "epoch": 0.58, "grad_norm": 0.7215327620506287, "learning_rate": 7.881358217835919e-06, "loss": 2.0435, "step": 17287 }, { "epoch": 0.58, "grad_norm": 0.7056887149810791, "learning_rate": 7.880319465473556e-06, "loss": 2.0218, "step": 17288 }, { "epoch": 0.58, "grad_norm": 0.7830666899681091, "learning_rate": 7.879280737058102e-06, "loss": 2.0978, "step": 17289 }, { "epoch": 0.58, "grad_norm": 0.7306040525436401, "learning_rate": 7.878242032601294e-06, "loss": 2.0696, "step": 17290 }, { "epoch": 0.58, "grad_norm": 0.7575163245201111, "learning_rate": 7.877203352114862e-06, "loss": 2.087, "step": 17291 }, { "epoch": 0.58, "grad_norm": 0.7359973192214966, "learning_rate": 7.87616469561054e-06, "loss": 2.0459, "step": 17292 }, { "epoch": 0.58, "grad_norm": 0.7582885026931763, "learning_rate": 7.875126063100066e-06, "loss": 2.0867, "step": 17293 }, { "epoch": 0.58, "grad_norm": 0.7595769166946411, "learning_rate": 7.874087454595177e-06, "loss": 2.0227, "step": 17294 }, { "epoch": 0.58, "grad_norm": 0.7331162691116333, "learning_rate": 7.873048870107597e-06, "loss": 1.9979, "step": 17295 }, { "epoch": 0.58, "grad_norm": 0.7590515613555908, "learning_rate": 7.872010309649068e-06, "loss": 2.0663, "step": 17296 }, { "epoch": 0.58, "grad_norm": 0.7198009490966797, "learning_rate": 7.870971773231316e-06, "loss": 2.0643, "step": 17297 }, { "epoch": 0.58, "grad_norm": 0.7331738471984863, "learning_rate": 7.869933260866076e-06, "loss": 2.0973, "step": 17298 }, { "epoch": 0.58, "grad_norm": 0.7302350997924805, "learning_rate": 7.868894772565089e-06, "loss": 2.0316, "step": 17299 }, { "epoch": 0.58, "grad_norm": 0.7922964096069336, "learning_rate": 7.867856308340072e-06, "loss": 2.0585, "step": 17300 }, { "epoch": 0.58, "grad_norm": 0.7391806244850159, "learning_rate": 7.866817868202768e-06, "loss": 2.0641, "step": 17301 }, { "epoch": 0.58, "grad_norm": 0.7720181345939636, "learning_rate": 7.865779452164906e-06, "loss": 2.0592, "step": 17302 }, { "epoch": 0.58, "grad_norm": 0.743485689163208, "learning_rate": 7.864741060238218e-06, "loss": 2.0459, "step": 17303 }, { "epoch": 0.58, "grad_norm": 0.7617506384849548, "learning_rate": 7.863702692434431e-06, "loss": 2.0222, "step": 17304 }, { "epoch": 0.58, "grad_norm": 0.7448143362998962, "learning_rate": 7.86266434876528e-06, "loss": 2.0709, "step": 17305 }, { "epoch": 0.58, "grad_norm": 0.7567383646965027, "learning_rate": 7.861626029242493e-06, "loss": 2.0234, "step": 17306 }, { "epoch": 0.58, "grad_norm": 0.7609814405441284, "learning_rate": 7.860587733877804e-06, "loss": 2.1263, "step": 17307 }, { "epoch": 0.58, "grad_norm": 0.7350336909294128, "learning_rate": 7.859549462682944e-06, "loss": 2.0198, "step": 17308 }, { "epoch": 0.58, "grad_norm": 0.7451945543289185, "learning_rate": 7.858511215669636e-06, "loss": 2.0743, "step": 17309 }, { "epoch": 0.58, "grad_norm": 0.759648859500885, "learning_rate": 7.857472992849614e-06, "loss": 2.0535, "step": 17310 }, { "epoch": 0.58, "grad_norm": 0.7382601499557495, "learning_rate": 7.856434794234607e-06, "loss": 1.9995, "step": 17311 }, { "epoch": 0.58, "grad_norm": 0.7237899899482727, "learning_rate": 7.855396619836344e-06, "loss": 2.1117, "step": 17312 }, { "epoch": 0.58, "grad_norm": 0.7309780120849609, "learning_rate": 7.854358469666553e-06, "loss": 2.018, "step": 17313 }, { "epoch": 0.58, "grad_norm": 0.7413632273674011, "learning_rate": 7.853320343736959e-06, "loss": 2.0949, "step": 17314 }, { "epoch": 0.58, "grad_norm": 0.7469753623008728, "learning_rate": 7.852282242059296e-06, "loss": 2.023, "step": 17315 }, { "epoch": 0.58, "grad_norm": 0.7385715246200562, "learning_rate": 7.85124416464529e-06, "loss": 2.0998, "step": 17316 }, { "epoch": 0.58, "grad_norm": 0.7525288462638855, "learning_rate": 7.85020611150667e-06, "loss": 2.0471, "step": 17317 }, { "epoch": 0.58, "grad_norm": 0.747439444065094, "learning_rate": 7.849168082655159e-06, "loss": 2.0904, "step": 17318 }, { "epoch": 0.58, "grad_norm": 0.7694146037101746, "learning_rate": 7.848130078102486e-06, "loss": 2.0681, "step": 17319 }, { "epoch": 0.58, "grad_norm": 0.7242448925971985, "learning_rate": 7.847092097860377e-06, "loss": 2.0602, "step": 17320 }, { "epoch": 0.58, "grad_norm": 0.7295181155204773, "learning_rate": 7.846054141940567e-06, "loss": 2.0468, "step": 17321 }, { "epoch": 0.58, "grad_norm": 0.7924940586090088, "learning_rate": 7.845016210354767e-06, "loss": 2.045, "step": 17322 }, { "epoch": 0.58, "grad_norm": 0.7413346171379089, "learning_rate": 7.843978303114714e-06, "loss": 2.062, "step": 17323 }, { "epoch": 0.58, "grad_norm": 0.7604600191116333, "learning_rate": 7.84294042023213e-06, "loss": 2.0935, "step": 17324 }, { "epoch": 0.58, "grad_norm": 0.7378384470939636, "learning_rate": 7.841902561718743e-06, "loss": 2.0434, "step": 17325 }, { "epoch": 0.58, "grad_norm": 0.7457320094108582, "learning_rate": 7.840864727586275e-06, "loss": 2.0784, "step": 17326 }, { "epoch": 0.58, "grad_norm": 0.7351875901222229, "learning_rate": 7.83982691784645e-06, "loss": 2.0, "step": 17327 }, { "epoch": 0.58, "grad_norm": 0.7610269784927368, "learning_rate": 7.838789132510993e-06, "loss": 2.0687, "step": 17328 }, { "epoch": 0.58, "grad_norm": 0.7807416319847107, "learning_rate": 7.837751371591634e-06, "loss": 2.1091, "step": 17329 }, { "epoch": 0.58, "grad_norm": 0.7713356614112854, "learning_rate": 7.836713635100092e-06, "loss": 2.0833, "step": 17330 }, { "epoch": 0.58, "grad_norm": 0.728196382522583, "learning_rate": 7.835675923048091e-06, "loss": 1.9959, "step": 17331 }, { "epoch": 0.58, "grad_norm": 0.7599889636039734, "learning_rate": 7.834638235447355e-06, "loss": 2.088, "step": 17332 }, { "epoch": 0.58, "grad_norm": 0.7622956037521362, "learning_rate": 7.833600572309607e-06, "loss": 2.0768, "step": 17333 }, { "epoch": 0.58, "grad_norm": 0.7294535636901855, "learning_rate": 7.832562933646572e-06, "loss": 2.0507, "step": 17334 }, { "epoch": 0.58, "grad_norm": 0.755134642124176, "learning_rate": 7.83152531946997e-06, "loss": 2.098, "step": 17335 }, { "epoch": 0.58, "grad_norm": 0.7552882432937622, "learning_rate": 7.83048772979152e-06, "loss": 2.062, "step": 17336 }, { "epoch": 0.58, "grad_norm": 0.7497100830078125, "learning_rate": 7.829450164622952e-06, "loss": 2.1044, "step": 17337 }, { "epoch": 0.58, "grad_norm": 0.7370823621749878, "learning_rate": 7.828412623975983e-06, "loss": 2.0814, "step": 17338 }, { "epoch": 0.58, "grad_norm": 0.7301714420318604, "learning_rate": 7.827375107862337e-06, "loss": 2.1188, "step": 17339 }, { "epoch": 0.58, "grad_norm": 0.7348575592041016, "learning_rate": 7.826337616293732e-06, "loss": 2.0916, "step": 17340 }, { "epoch": 0.58, "grad_norm": 0.7609323859214783, "learning_rate": 7.82530014928189e-06, "loss": 2.0968, "step": 17341 }, { "epoch": 0.58, "grad_norm": 0.7490087747573853, "learning_rate": 7.824262706838532e-06, "loss": 2.147, "step": 17342 }, { "epoch": 0.58, "grad_norm": 0.749661386013031, "learning_rate": 7.823225288975385e-06, "loss": 2.1138, "step": 17343 }, { "epoch": 0.58, "grad_norm": 0.7424139380455017, "learning_rate": 7.822187895704157e-06, "loss": 2.0773, "step": 17344 }, { "epoch": 0.58, "grad_norm": 0.7401151061058044, "learning_rate": 7.821150527036574e-06, "loss": 2.0475, "step": 17345 }, { "epoch": 0.58, "grad_norm": 0.740336000919342, "learning_rate": 7.820113182984357e-06, "loss": 2.1321, "step": 17346 }, { "epoch": 0.58, "grad_norm": 0.7437052130699158, "learning_rate": 7.819075863559222e-06, "loss": 2.0196, "step": 17347 }, { "epoch": 0.58, "grad_norm": 0.7514331936836243, "learning_rate": 7.818038568772894e-06, "loss": 2.1593, "step": 17348 }, { "epoch": 0.58, "grad_norm": 0.7630886435508728, "learning_rate": 7.817001298637084e-06, "loss": 2.0993, "step": 17349 }, { "epoch": 0.58, "grad_norm": 0.7463335394859314, "learning_rate": 7.815964053163512e-06, "loss": 2.0569, "step": 17350 }, { "epoch": 0.58, "grad_norm": 0.7699261903762817, "learning_rate": 7.814926832363902e-06, "loss": 2.0599, "step": 17351 }, { "epoch": 0.58, "grad_norm": 0.7129340171813965, "learning_rate": 7.813889636249969e-06, "loss": 2.0283, "step": 17352 }, { "epoch": 0.58, "grad_norm": 0.7399489879608154, "learning_rate": 7.812852464833428e-06, "loss": 2.079, "step": 17353 }, { "epoch": 0.58, "grad_norm": 0.7603597640991211, "learning_rate": 7.811815318125996e-06, "loss": 2.1028, "step": 17354 }, { "epoch": 0.58, "grad_norm": 0.7158148884773254, "learning_rate": 7.810778196139393e-06, "loss": 2.1289, "step": 17355 }, { "epoch": 0.58, "grad_norm": 0.7565138339996338, "learning_rate": 7.809741098885338e-06, "loss": 2.1485, "step": 17356 }, { "epoch": 0.58, "grad_norm": 0.7400334477424622, "learning_rate": 7.808704026375542e-06, "loss": 2.0758, "step": 17357 }, { "epoch": 0.58, "grad_norm": 0.734768807888031, "learning_rate": 7.807666978621721e-06, "loss": 2.0819, "step": 17358 }, { "epoch": 0.58, "grad_norm": 0.7334807515144348, "learning_rate": 7.806629955635597e-06, "loss": 2.0198, "step": 17359 }, { "epoch": 0.58, "grad_norm": 0.7478862404823303, "learning_rate": 7.80559295742888e-06, "loss": 2.1156, "step": 17360 }, { "epoch": 0.58, "grad_norm": 0.7472066283226013, "learning_rate": 7.804555984013293e-06, "loss": 2.118, "step": 17361 }, { "epoch": 0.58, "grad_norm": 0.7464023232460022, "learning_rate": 7.803519035400542e-06, "loss": 2.0776, "step": 17362 }, { "epoch": 0.58, "grad_norm": 0.7464212775230408, "learning_rate": 7.802482111602345e-06, "loss": 2.0976, "step": 17363 }, { "epoch": 0.58, "grad_norm": 0.7521456480026245, "learning_rate": 7.801445212630416e-06, "loss": 2.1053, "step": 17364 }, { "epoch": 0.58, "grad_norm": 0.7686184048652649, "learning_rate": 7.800408338496478e-06, "loss": 2.108, "step": 17365 }, { "epoch": 0.58, "grad_norm": 0.7243461608886719, "learning_rate": 7.799371489212228e-06, "loss": 2.0259, "step": 17366 }, { "epoch": 0.58, "grad_norm": 0.7253236174583435, "learning_rate": 7.798334664789395e-06, "loss": 2.0684, "step": 17367 }, { "epoch": 0.58, "grad_norm": 0.7606551051139832, "learning_rate": 7.797297865239684e-06, "loss": 2.0791, "step": 17368 }, { "epoch": 0.58, "grad_norm": 0.742860734462738, "learning_rate": 7.796261090574809e-06, "loss": 2.0669, "step": 17369 }, { "epoch": 0.58, "grad_norm": 0.7606264352798462, "learning_rate": 7.795224340806492e-06, "loss": 2.1076, "step": 17370 }, { "epoch": 0.58, "grad_norm": 0.7465896010398865, "learning_rate": 7.794187615946433e-06, "loss": 1.9987, "step": 17371 }, { "epoch": 0.58, "grad_norm": 0.7205922603607178, "learning_rate": 7.793150916006349e-06, "loss": 2.0126, "step": 17372 }, { "epoch": 0.58, "grad_norm": 0.7139093279838562, "learning_rate": 7.792114240997954e-06, "loss": 2.0355, "step": 17373 }, { "epoch": 0.58, "grad_norm": 0.7206209301948547, "learning_rate": 7.79107759093296e-06, "loss": 2.0557, "step": 17374 }, { "epoch": 0.58, "grad_norm": 0.7571203112602234, "learning_rate": 7.790040965823077e-06, "loss": 2.0908, "step": 17375 }, { "epoch": 0.58, "grad_norm": 0.7445367574691772, "learning_rate": 7.789004365680012e-06, "loss": 2.0862, "step": 17376 }, { "epoch": 0.58, "grad_norm": 0.7295871376991272, "learning_rate": 7.78796779051548e-06, "loss": 2.0564, "step": 17377 }, { "epoch": 0.58, "grad_norm": 0.717801570892334, "learning_rate": 7.786931240341194e-06, "loss": 2.0298, "step": 17378 }, { "epoch": 0.58, "grad_norm": 0.7377458214759827, "learning_rate": 7.785894715168865e-06, "loss": 2.0364, "step": 17379 }, { "epoch": 0.58, "grad_norm": 0.7790558934211731, "learning_rate": 7.784858215010194e-06, "loss": 2.1451, "step": 17380 }, { "epoch": 0.58, "grad_norm": 0.7316217422485352, "learning_rate": 7.783821739876899e-06, "loss": 2.0655, "step": 17381 }, { "epoch": 0.58, "grad_norm": 0.7462796568870544, "learning_rate": 7.782785289780688e-06, "loss": 2.0084, "step": 17382 }, { "epoch": 0.58, "grad_norm": 0.7543364763259888, "learning_rate": 7.78174886473327e-06, "loss": 2.0886, "step": 17383 }, { "epoch": 0.58, "grad_norm": 0.7059748768806458, "learning_rate": 7.780712464746352e-06, "loss": 2.0643, "step": 17384 }, { "epoch": 0.58, "grad_norm": 0.7539125084877014, "learning_rate": 7.779676089831641e-06, "loss": 2.0097, "step": 17385 }, { "epoch": 0.58, "grad_norm": 0.7451120018959045, "learning_rate": 7.778639740000851e-06, "loss": 2.0861, "step": 17386 }, { "epoch": 0.58, "grad_norm": 0.7427269220352173, "learning_rate": 7.777603415265691e-06, "loss": 2.0159, "step": 17387 }, { "epoch": 0.58, "grad_norm": 0.739164412021637, "learning_rate": 7.77656711563786e-06, "loss": 2.0692, "step": 17388 }, { "epoch": 0.58, "grad_norm": 0.7497209310531616, "learning_rate": 7.775530841129072e-06, "loss": 2.0907, "step": 17389 }, { "epoch": 0.58, "grad_norm": 0.7496910095214844, "learning_rate": 7.774494591751034e-06, "loss": 1.9738, "step": 17390 }, { "epoch": 0.58, "grad_norm": 0.7634798884391785, "learning_rate": 7.773458367515449e-06, "loss": 2.0627, "step": 17391 }, { "epoch": 0.58, "grad_norm": 0.7251195907592773, "learning_rate": 7.772422168434034e-06, "loss": 1.9762, "step": 17392 }, { "epoch": 0.58, "grad_norm": 0.7290628552436829, "learning_rate": 7.771385994518479e-06, "loss": 2.0415, "step": 17393 }, { "epoch": 0.58, "grad_norm": 0.7531698346138, "learning_rate": 7.770349845780502e-06, "loss": 2.1319, "step": 17394 }, { "epoch": 0.58, "grad_norm": 0.7175230979919434, "learning_rate": 7.769313722231807e-06, "loss": 2.0439, "step": 17395 }, { "epoch": 0.58, "grad_norm": 0.7451719641685486, "learning_rate": 7.768277623884098e-06, "loss": 2.056, "step": 17396 }, { "epoch": 0.58, "grad_norm": 0.7396416068077087, "learning_rate": 7.767241550749079e-06, "loss": 2.0634, "step": 17397 }, { "epoch": 0.58, "grad_norm": 0.7493788599967957, "learning_rate": 7.766205502838457e-06, "loss": 2.0494, "step": 17398 }, { "epoch": 0.58, "grad_norm": 0.7432458996772766, "learning_rate": 7.765169480163935e-06, "loss": 2.0414, "step": 17399 }, { "epoch": 0.58, "grad_norm": 0.7387503385543823, "learning_rate": 7.76413348273722e-06, "loss": 2.0699, "step": 17400 }, { "epoch": 0.58, "grad_norm": 0.7569003701210022, "learning_rate": 7.763097510570016e-06, "loss": 2.0448, "step": 17401 }, { "epoch": 0.58, "grad_norm": 0.7413594722747803, "learning_rate": 7.762061563674024e-06, "loss": 2.0878, "step": 17402 }, { "epoch": 0.58, "grad_norm": 0.7320802211761475, "learning_rate": 7.76102564206095e-06, "loss": 2.0157, "step": 17403 }, { "epoch": 0.58, "grad_norm": 0.7352131605148315, "learning_rate": 7.759989745742493e-06, "loss": 2.0186, "step": 17404 }, { "epoch": 0.58, "grad_norm": 0.7776873707771301, "learning_rate": 7.758953874730363e-06, "loss": 2.0641, "step": 17405 }, { "epoch": 0.58, "grad_norm": 0.7237109541893005, "learning_rate": 7.757918029036257e-06, "loss": 2.039, "step": 17406 }, { "epoch": 0.58, "grad_norm": 0.7463359236717224, "learning_rate": 7.756882208671875e-06, "loss": 2.0016, "step": 17407 }, { "epoch": 0.58, "grad_norm": 0.7474258542060852, "learning_rate": 7.755846413648928e-06, "loss": 2.0277, "step": 17408 }, { "epoch": 0.58, "grad_norm": 0.7493696212768555, "learning_rate": 7.754810643979114e-06, "loss": 2.0492, "step": 17409 }, { "epoch": 0.58, "grad_norm": 0.7247046232223511, "learning_rate": 7.753774899674131e-06, "loss": 1.9979, "step": 17410 }, { "epoch": 0.58, "grad_norm": 0.7688528299331665, "learning_rate": 7.752739180745683e-06, "loss": 2.0897, "step": 17411 }, { "epoch": 0.58, "grad_norm": 0.7707491517066956, "learning_rate": 7.751703487205471e-06, "loss": 2.1069, "step": 17412 }, { "epoch": 0.58, "grad_norm": 0.7358784675598145, "learning_rate": 7.750667819065193e-06, "loss": 2.083, "step": 17413 }, { "epoch": 0.58, "grad_norm": 0.7353017926216125, "learning_rate": 7.749632176336558e-06, "loss": 2.1439, "step": 17414 }, { "epoch": 0.58, "grad_norm": 0.7381200790405273, "learning_rate": 7.748596559031254e-06, "loss": 2.0488, "step": 17415 }, { "epoch": 0.58, "grad_norm": 0.7448513507843018, "learning_rate": 7.747560967160988e-06, "loss": 2.0676, "step": 17416 }, { "epoch": 0.58, "grad_norm": 0.7788548469543457, "learning_rate": 7.746525400737458e-06, "loss": 2.0327, "step": 17417 }, { "epoch": 0.58, "grad_norm": 0.7512484788894653, "learning_rate": 7.745489859772367e-06, "loss": 2.0892, "step": 17418 }, { "epoch": 0.58, "grad_norm": 0.733829140663147, "learning_rate": 7.744454344277406e-06, "loss": 2.0248, "step": 17419 }, { "epoch": 0.58, "grad_norm": 0.7473303079605103, "learning_rate": 7.74341885426428e-06, "loss": 2.026, "step": 17420 }, { "epoch": 0.58, "grad_norm": 0.7779171466827393, "learning_rate": 7.742383389744681e-06, "loss": 2.1039, "step": 17421 }, { "epoch": 0.58, "grad_norm": 0.7964597344398499, "learning_rate": 7.741347950730316e-06, "loss": 2.0772, "step": 17422 }, { "epoch": 0.58, "grad_norm": 0.7253778576850891, "learning_rate": 7.740312537232878e-06, "loss": 1.9879, "step": 17423 }, { "epoch": 0.58, "grad_norm": 0.7164488434791565, "learning_rate": 7.739277149264066e-06, "loss": 2.08, "step": 17424 }, { "epoch": 0.58, "grad_norm": 0.7435901761054993, "learning_rate": 7.738241786835571e-06, "loss": 2.066, "step": 17425 }, { "epoch": 0.58, "grad_norm": 0.7549481987953186, "learning_rate": 7.737206449959098e-06, "loss": 2.1115, "step": 17426 }, { "epoch": 0.58, "grad_norm": 0.7708032131195068, "learning_rate": 7.736171138646342e-06, "loss": 2.1369, "step": 17427 }, { "epoch": 0.58, "grad_norm": 0.7282251119613647, "learning_rate": 7.735135852908997e-06, "loss": 2.0476, "step": 17428 }, { "epoch": 0.58, "grad_norm": 0.7787927985191345, "learning_rate": 7.734100592758755e-06, "loss": 2.0771, "step": 17429 }, { "epoch": 0.58, "grad_norm": 0.749177873134613, "learning_rate": 7.73306535820732e-06, "loss": 2.0528, "step": 17430 }, { "epoch": 0.58, "grad_norm": 0.7604622840881348, "learning_rate": 7.732030149266382e-06, "loss": 2.0395, "step": 17431 }, { "epoch": 0.58, "grad_norm": 0.7528326511383057, "learning_rate": 7.730994965947643e-06, "loss": 2.063, "step": 17432 }, { "epoch": 0.58, "grad_norm": 0.7310172319412231, "learning_rate": 7.72995980826279e-06, "loss": 2.0344, "step": 17433 }, { "epoch": 0.58, "grad_norm": 0.7548729181289673, "learning_rate": 7.728924676223521e-06, "loss": 2.0424, "step": 17434 }, { "epoch": 0.58, "grad_norm": 0.7577201724052429, "learning_rate": 7.727889569841528e-06, "loss": 2.0111, "step": 17435 }, { "epoch": 0.58, "grad_norm": 0.7365975379943848, "learning_rate": 7.726854489128513e-06, "loss": 2.0101, "step": 17436 }, { "epoch": 0.58, "grad_norm": 0.7256361842155457, "learning_rate": 7.725819434096157e-06, "loss": 2.0285, "step": 17437 }, { "epoch": 0.58, "grad_norm": 0.7774661183357239, "learning_rate": 7.724784404756163e-06, "loss": 2.0706, "step": 17438 }, { "epoch": 0.58, "grad_norm": 0.740241289138794, "learning_rate": 7.723749401120222e-06, "loss": 2.0661, "step": 17439 }, { "epoch": 0.58, "grad_norm": 0.7365866899490356, "learning_rate": 7.722714423200027e-06, "loss": 2.06, "step": 17440 }, { "epoch": 0.58, "grad_norm": 0.7518870830535889, "learning_rate": 7.721679471007268e-06, "loss": 2.0966, "step": 17441 }, { "epoch": 0.58, "grad_norm": 0.7267422080039978, "learning_rate": 7.720644544553639e-06, "loss": 2.0597, "step": 17442 }, { "epoch": 0.58, "grad_norm": 0.73319411277771, "learning_rate": 7.719609643850832e-06, "loss": 2.08, "step": 17443 }, { "epoch": 0.58, "grad_norm": 0.722403883934021, "learning_rate": 7.718574768910538e-06, "loss": 2.0176, "step": 17444 }, { "epoch": 0.58, "grad_norm": 0.7851993441581726, "learning_rate": 7.717539919744453e-06, "loss": 2.1465, "step": 17445 }, { "epoch": 0.58, "grad_norm": 0.7367965579032898, "learning_rate": 7.716505096364262e-06, "loss": 2.0593, "step": 17446 }, { "epoch": 0.58, "grad_norm": 0.7510982155799866, "learning_rate": 7.715470298781659e-06, "loss": 2.0726, "step": 17447 }, { "epoch": 0.58, "grad_norm": 0.7541019916534424, "learning_rate": 7.714435527008332e-06, "loss": 2.0664, "step": 17448 }, { "epoch": 0.58, "grad_norm": 0.7327848076820374, "learning_rate": 7.713400781055977e-06, "loss": 2.0555, "step": 17449 }, { "epoch": 0.58, "grad_norm": 0.7458142638206482, "learning_rate": 7.712366060936277e-06, "loss": 2.1116, "step": 17450 }, { "epoch": 0.58, "grad_norm": 0.7374458909034729, "learning_rate": 7.711331366660922e-06, "loss": 2.0663, "step": 17451 }, { "epoch": 0.58, "grad_norm": 0.7517438530921936, "learning_rate": 7.710296698241605e-06, "loss": 2.0364, "step": 17452 }, { "epoch": 0.58, "grad_norm": 0.7169767022132874, "learning_rate": 7.709262055690014e-06, "loss": 2.0435, "step": 17453 }, { "epoch": 0.58, "grad_norm": 0.7351388931274414, "learning_rate": 7.708227439017841e-06, "loss": 2.0745, "step": 17454 }, { "epoch": 0.58, "grad_norm": 0.7589265704154968, "learning_rate": 7.70719284823677e-06, "loss": 2.0851, "step": 17455 }, { "epoch": 0.58, "grad_norm": 0.7409583926200867, "learning_rate": 7.706158283358488e-06, "loss": 2.0009, "step": 17456 }, { "epoch": 0.58, "grad_norm": 0.746087908744812, "learning_rate": 7.705123744394687e-06, "loss": 1.9847, "step": 17457 }, { "epoch": 0.58, "grad_norm": 0.743193507194519, "learning_rate": 7.704089231357057e-06, "loss": 2.0815, "step": 17458 }, { "epoch": 0.58, "grad_norm": 0.7362068295478821, "learning_rate": 7.703054744257275e-06, "loss": 2.0004, "step": 17459 }, { "epoch": 0.58, "grad_norm": 0.734274685382843, "learning_rate": 7.702020283107037e-06, "loss": 2.067, "step": 17460 }, { "epoch": 0.58, "grad_norm": 0.733478307723999, "learning_rate": 7.700985847918026e-06, "loss": 2.0388, "step": 17461 }, { "epoch": 0.58, "grad_norm": 0.782490074634552, "learning_rate": 7.69995143870193e-06, "loss": 2.0244, "step": 17462 }, { "epoch": 0.58, "grad_norm": 0.7735369205474854, "learning_rate": 7.698917055470438e-06, "loss": 2.1589, "step": 17463 }, { "epoch": 0.58, "grad_norm": 0.7039288878440857, "learning_rate": 7.697882698235229e-06, "loss": 2.06, "step": 17464 }, { "epoch": 0.58, "grad_norm": 0.7719813585281372, "learning_rate": 7.696848367007992e-06, "loss": 2.0303, "step": 17465 }, { "epoch": 0.58, "grad_norm": 0.7700414061546326, "learning_rate": 7.695814061800413e-06, "loss": 2.0361, "step": 17466 }, { "epoch": 0.58, "grad_norm": 0.7483057379722595, "learning_rate": 7.694779782624178e-06, "loss": 2.0922, "step": 17467 }, { "epoch": 0.58, "grad_norm": 0.7486280202865601, "learning_rate": 7.693745529490968e-06, "loss": 2.0695, "step": 17468 }, { "epoch": 0.58, "grad_norm": 0.7262049913406372, "learning_rate": 7.69271130241247e-06, "loss": 2.0236, "step": 17469 }, { "epoch": 0.58, "grad_norm": 0.7191380262374878, "learning_rate": 7.691677101400366e-06, "loss": 2.0721, "step": 17470 }, { "epoch": 0.58, "grad_norm": 0.7430191040039062, "learning_rate": 7.690642926466346e-06, "loss": 2.0189, "step": 17471 }, { "epoch": 0.58, "grad_norm": 0.7590774893760681, "learning_rate": 7.689608777622086e-06, "loss": 1.9794, "step": 17472 }, { "epoch": 0.58, "grad_norm": 0.7510725259780884, "learning_rate": 7.68857465487927e-06, "loss": 2.046, "step": 17473 }, { "epoch": 0.58, "grad_norm": 0.7602266669273376, "learning_rate": 7.687540558249583e-06, "loss": 2.0653, "step": 17474 }, { "epoch": 0.58, "grad_norm": 0.7670218348503113, "learning_rate": 7.68650648774471e-06, "loss": 2.0999, "step": 17475 }, { "epoch": 0.58, "grad_norm": 0.7479475140571594, "learning_rate": 7.685472443376331e-06, "loss": 2.0777, "step": 17476 }, { "epoch": 0.58, "grad_norm": 0.7386905550956726, "learning_rate": 7.684438425156127e-06, "loss": 2.1075, "step": 17477 }, { "epoch": 0.58, "grad_norm": 0.7453323006629944, "learning_rate": 7.683404433095779e-06, "loss": 2.0302, "step": 17478 }, { "epoch": 0.58, "grad_norm": 0.7332302331924438, "learning_rate": 7.68237046720697e-06, "loss": 2.0645, "step": 17479 }, { "epoch": 0.58, "grad_norm": 0.7530774474143982, "learning_rate": 7.681336527501388e-06, "loss": 2.102, "step": 17480 }, { "epoch": 0.58, "grad_norm": 0.745632529258728, "learning_rate": 7.680302613990699e-06, "loss": 2.1098, "step": 17481 }, { "epoch": 0.58, "grad_norm": 0.7491067051887512, "learning_rate": 7.679268726686594e-06, "loss": 1.9696, "step": 17482 }, { "epoch": 0.58, "grad_norm": 0.773668110370636, "learning_rate": 7.678234865600752e-06, "loss": 2.0798, "step": 17483 }, { "epoch": 0.58, "grad_norm": 0.7314655780792236, "learning_rate": 7.677201030744847e-06, "loss": 2.134, "step": 17484 }, { "epoch": 0.58, "grad_norm": 0.7247464060783386, "learning_rate": 7.676167222130573e-06, "loss": 2.0378, "step": 17485 }, { "epoch": 0.58, "grad_norm": 0.7148680686950684, "learning_rate": 7.675133439769592e-06, "loss": 1.9948, "step": 17486 }, { "epoch": 0.58, "grad_norm": 0.7336472272872925, "learning_rate": 7.674099683673593e-06, "loss": 2.0247, "step": 17487 }, { "epoch": 0.58, "grad_norm": 0.7533717155456543, "learning_rate": 7.673065953854251e-06, "loss": 2.0722, "step": 17488 }, { "epoch": 0.58, "grad_norm": 0.730694591999054, "learning_rate": 7.67203225032325e-06, "loss": 2.0602, "step": 17489 }, { "epoch": 0.58, "grad_norm": 0.7377015352249146, "learning_rate": 7.670998573092263e-06, "loss": 1.9994, "step": 17490 }, { "epoch": 0.58, "grad_norm": 0.7786106467247009, "learning_rate": 7.669964922172968e-06, "loss": 2.1018, "step": 17491 }, { "epoch": 0.58, "grad_norm": 0.7203280329704285, "learning_rate": 7.668931297577042e-06, "loss": 2.031, "step": 17492 }, { "epoch": 0.58, "grad_norm": 0.712897002696991, "learning_rate": 7.667897699316166e-06, "loss": 2.0726, "step": 17493 }, { "epoch": 0.58, "grad_norm": 0.7282452583312988, "learning_rate": 7.666864127402016e-06, "loss": 2.025, "step": 17494 }, { "epoch": 0.58, "grad_norm": 0.729796290397644, "learning_rate": 7.665830581846268e-06, "loss": 1.9463, "step": 17495 }, { "epoch": 0.58, "grad_norm": 0.7368864417076111, "learning_rate": 7.664797062660597e-06, "loss": 2.064, "step": 17496 }, { "epoch": 0.58, "grad_norm": 0.7650624513626099, "learning_rate": 7.66376356985668e-06, "loss": 2.0402, "step": 17497 }, { "epoch": 0.58, "grad_norm": 0.7281854748725891, "learning_rate": 7.662730103446197e-06, "loss": 1.9779, "step": 17498 }, { "epoch": 0.58, "grad_norm": 0.7584214806556702, "learning_rate": 7.661696663440815e-06, "loss": 2.0546, "step": 17499 }, { "epoch": 0.58, "grad_norm": 0.7363767623901367, "learning_rate": 7.660663249852212e-06, "loss": 2.0355, "step": 17500 }, { "epoch": 0.58, "grad_norm": 0.7221778035163879, "learning_rate": 7.659629862692067e-06, "loss": 2.0415, "step": 17501 }, { "epoch": 0.58, "grad_norm": 0.7445442080497742, "learning_rate": 7.658596501972056e-06, "loss": 2.0745, "step": 17502 }, { "epoch": 0.58, "grad_norm": 0.7380548715591431, "learning_rate": 7.657563167703845e-06, "loss": 2.0499, "step": 17503 }, { "epoch": 0.58, "grad_norm": 0.7469645738601685, "learning_rate": 7.656529859899113e-06, "loss": 2.0701, "step": 17504 }, { "epoch": 0.58, "grad_norm": 0.7615806460380554, "learning_rate": 7.655496578569533e-06, "loss": 2.1273, "step": 17505 }, { "epoch": 0.58, "grad_norm": 0.7354166507720947, "learning_rate": 7.654463323726778e-06, "loss": 2.0827, "step": 17506 }, { "epoch": 0.58, "grad_norm": 0.7513006925582886, "learning_rate": 7.653430095382528e-06, "loss": 2.0889, "step": 17507 }, { "epoch": 0.58, "grad_norm": 0.7492263913154602, "learning_rate": 7.652396893548441e-06, "loss": 2.0597, "step": 17508 }, { "epoch": 0.58, "grad_norm": 0.7354400753974915, "learning_rate": 7.651363718236202e-06, "loss": 2.0764, "step": 17509 }, { "epoch": 0.58, "grad_norm": 0.7112005949020386, "learning_rate": 7.65033056945748e-06, "loss": 2.0635, "step": 17510 }, { "epoch": 0.58, "grad_norm": 0.7373778223991394, "learning_rate": 7.649297447223947e-06, "loss": 2.1214, "step": 17511 }, { "epoch": 0.58, "grad_norm": 0.7370812296867371, "learning_rate": 7.648264351547271e-06, "loss": 2.074, "step": 17512 }, { "epoch": 0.58, "grad_norm": 0.746357798576355, "learning_rate": 7.647231282439126e-06, "loss": 2.0507, "step": 17513 }, { "epoch": 0.58, "grad_norm": 0.7210960388183594, "learning_rate": 7.646198239911184e-06, "loss": 2.0497, "step": 17514 }, { "epoch": 0.58, "grad_norm": 0.7284794449806213, "learning_rate": 7.645165223975113e-06, "loss": 1.9811, "step": 17515 }, { "epoch": 0.58, "grad_norm": 0.7269585728645325, "learning_rate": 7.64413223464259e-06, "loss": 2.0341, "step": 17516 }, { "epoch": 0.58, "grad_norm": 0.7469526529312134, "learning_rate": 7.643099271925278e-06, "loss": 2.0446, "step": 17517 }, { "epoch": 0.58, "grad_norm": 0.73759526014328, "learning_rate": 7.64206633583485e-06, "loss": 2.1581, "step": 17518 }, { "epoch": 0.58, "grad_norm": 0.7635288834571838, "learning_rate": 7.641033426382973e-06, "loss": 2.0017, "step": 17519 }, { "epoch": 0.58, "grad_norm": 0.729167640209198, "learning_rate": 7.64000054358132e-06, "loss": 2.0578, "step": 17520 }, { "epoch": 0.58, "grad_norm": 0.7536032795906067, "learning_rate": 7.638967687441556e-06, "loss": 2.0881, "step": 17521 }, { "epoch": 0.58, "grad_norm": 0.7823000550270081, "learning_rate": 7.637934857975349e-06, "loss": 2.0958, "step": 17522 }, { "epoch": 0.58, "grad_norm": 0.7444952726364136, "learning_rate": 7.636902055194371e-06, "loss": 1.9673, "step": 17523 }, { "epoch": 0.58, "grad_norm": 0.7347999215126038, "learning_rate": 7.635869279110291e-06, "loss": 1.9993, "step": 17524 }, { "epoch": 0.58, "grad_norm": 0.7580734491348267, "learning_rate": 7.634836529734775e-06, "loss": 2.0522, "step": 17525 }, { "epoch": 0.58, "grad_norm": 0.7444814443588257, "learning_rate": 7.633803807079487e-06, "loss": 2.0208, "step": 17526 }, { "epoch": 0.58, "grad_norm": 0.7627196311950684, "learning_rate": 7.632771111156098e-06, "loss": 2.0861, "step": 17527 }, { "epoch": 0.58, "grad_norm": 0.7694302201271057, "learning_rate": 7.63173844197627e-06, "loss": 2.0658, "step": 17528 }, { "epoch": 0.58, "grad_norm": 0.7179054021835327, "learning_rate": 7.63070579955168e-06, "loss": 2.0339, "step": 17529 }, { "epoch": 0.58, "grad_norm": 0.7908560633659363, "learning_rate": 7.629673183893984e-06, "loss": 2.1161, "step": 17530 }, { "epoch": 0.58, "grad_norm": 0.748127281665802, "learning_rate": 7.62864059501485e-06, "loss": 2.0924, "step": 17531 }, { "epoch": 0.58, "grad_norm": 0.7328124642372131, "learning_rate": 7.627608032925946e-06, "loss": 2.0092, "step": 17532 }, { "epoch": 0.58, "grad_norm": 0.7364363074302673, "learning_rate": 7.626575497638938e-06, "loss": 2.1092, "step": 17533 }, { "epoch": 0.58, "grad_norm": 0.7096620798110962, "learning_rate": 7.625542989165487e-06, "loss": 2.0657, "step": 17534 }, { "epoch": 0.58, "grad_norm": 0.7504627108573914, "learning_rate": 7.62451050751726e-06, "loss": 1.9509, "step": 17535 }, { "epoch": 0.58, "grad_norm": 0.751465380191803, "learning_rate": 7.6234780527059185e-06, "loss": 1.9769, "step": 17536 }, { "epoch": 0.58, "grad_norm": 0.7992161512374878, "learning_rate": 7.622445624743131e-06, "loss": 2.1251, "step": 17537 }, { "epoch": 0.58, "grad_norm": 0.7546550631523132, "learning_rate": 7.6214132236405625e-06, "loss": 2.0552, "step": 17538 }, { "epoch": 0.58, "grad_norm": 0.717498779296875, "learning_rate": 7.620380849409871e-06, "loss": 2.0507, "step": 17539 }, { "epoch": 0.58, "grad_norm": 0.7461324334144592, "learning_rate": 7.619348502062721e-06, "loss": 2.0598, "step": 17540 }, { "epoch": 0.58, "grad_norm": 0.7266407608985901, "learning_rate": 7.618316181610777e-06, "loss": 2.0841, "step": 17541 }, { "epoch": 0.58, "grad_norm": 0.7460744380950928, "learning_rate": 7.617283888065704e-06, "loss": 2.0706, "step": 17542 }, { "epoch": 0.58, "grad_norm": 0.7377756237983704, "learning_rate": 7.6162516214391595e-06, "loss": 2.0417, "step": 17543 }, { "epoch": 0.58, "grad_norm": 0.7448939085006714, "learning_rate": 7.615219381742803e-06, "loss": 2.0431, "step": 17544 }, { "epoch": 0.58, "grad_norm": 0.713191032409668, "learning_rate": 7.614187168988304e-06, "loss": 2.0027, "step": 17545 }, { "epoch": 0.58, "grad_norm": 0.7505508065223694, "learning_rate": 7.61315498318732e-06, "loss": 2.0809, "step": 17546 }, { "epoch": 0.58, "grad_norm": 0.7530224323272705, "learning_rate": 7.612122824351513e-06, "loss": 2.0783, "step": 17547 }, { "epoch": 0.58, "grad_norm": 0.7626066207885742, "learning_rate": 7.6110906924925424e-06, "loss": 2.0813, "step": 17548 }, { "epoch": 0.58, "grad_norm": 0.7268019914627075, "learning_rate": 7.610058587622068e-06, "loss": 2.0512, "step": 17549 }, { "epoch": 0.58, "grad_norm": 0.7149131298065186, "learning_rate": 7.609026509751749e-06, "loss": 2.0892, "step": 17550 }, { "epoch": 0.58, "grad_norm": 0.7528133988380432, "learning_rate": 7.6079944588932545e-06, "loss": 2.0682, "step": 17551 }, { "epoch": 0.58, "grad_norm": 0.7297781705856323, "learning_rate": 7.60696243505823e-06, "loss": 2.0764, "step": 17552 }, { "epoch": 0.58, "grad_norm": 0.749937117099762, "learning_rate": 7.605930438258343e-06, "loss": 2.0528, "step": 17553 }, { "epoch": 0.58, "grad_norm": 0.7584051489830017, "learning_rate": 7.604898468505251e-06, "loss": 2.036, "step": 17554 }, { "epoch": 0.58, "grad_norm": 0.7718164324760437, "learning_rate": 7.603866525810613e-06, "loss": 2.0591, "step": 17555 }, { "epoch": 0.58, "grad_norm": 0.734883725643158, "learning_rate": 7.602834610186088e-06, "loss": 2.0629, "step": 17556 }, { "epoch": 0.58, "grad_norm": 0.7330256700515747, "learning_rate": 7.601802721643332e-06, "loss": 2.1074, "step": 17557 }, { "epoch": 0.58, "grad_norm": 0.7398889660835266, "learning_rate": 7.600770860194e-06, "loss": 2.0365, "step": 17558 }, { "epoch": 0.58, "grad_norm": 0.7155783772468567, "learning_rate": 7.599739025849755e-06, "loss": 2.1221, "step": 17559 }, { "epoch": 0.58, "grad_norm": 0.7461991310119629, "learning_rate": 7.5987072186222545e-06, "loss": 2.0468, "step": 17560 }, { "epoch": 0.58, "grad_norm": 0.7397270202636719, "learning_rate": 7.59767543852315e-06, "loss": 2.0263, "step": 17561 }, { "epoch": 0.58, "grad_norm": 0.7453396320343018, "learning_rate": 7.5966436855641004e-06, "loss": 2.0799, "step": 17562 }, { "epoch": 0.58, "grad_norm": 0.724539577960968, "learning_rate": 7.59561195975676e-06, "loss": 2.0088, "step": 17563 }, { "epoch": 0.58, "grad_norm": 0.7381888031959534, "learning_rate": 7.594580261112793e-06, "loss": 2.036, "step": 17564 }, { "epoch": 0.58, "grad_norm": 0.7535859942436218, "learning_rate": 7.593548589643844e-06, "loss": 2.1219, "step": 17565 }, { "epoch": 0.58, "grad_norm": 0.7686878442764282, "learning_rate": 7.592516945361571e-06, "loss": 2.0897, "step": 17566 }, { "epoch": 0.58, "grad_norm": 0.7559021711349487, "learning_rate": 7.591485328277632e-06, "loss": 2.0914, "step": 17567 }, { "epoch": 0.58, "grad_norm": 0.7427282333374023, "learning_rate": 7.59045373840368e-06, "loss": 2.0088, "step": 17568 }, { "epoch": 0.58, "grad_norm": 0.7279033660888672, "learning_rate": 7.5894221757513735e-06, "loss": 2.05, "step": 17569 }, { "epoch": 0.58, "grad_norm": 0.7510033845901489, "learning_rate": 7.58839064033236e-06, "loss": 2.0655, "step": 17570 }, { "epoch": 0.58, "grad_norm": 0.7204381227493286, "learning_rate": 7.587359132158292e-06, "loss": 2.0487, "step": 17571 }, { "epoch": 0.58, "grad_norm": 0.7264611124992371, "learning_rate": 7.58632765124083e-06, "loss": 2.0611, "step": 17572 }, { "epoch": 0.58, "grad_norm": 0.7632628083229065, "learning_rate": 7.585296197591628e-06, "loss": 2.0859, "step": 17573 }, { "epoch": 0.58, "grad_norm": 0.7134138345718384, "learning_rate": 7.584264771222326e-06, "loss": 1.9269, "step": 17574 }, { "epoch": 0.58, "grad_norm": 0.726304292678833, "learning_rate": 7.583233372144589e-06, "loss": 2.0568, "step": 17575 }, { "epoch": 0.58, "grad_norm": 0.7399855256080627, "learning_rate": 7.582202000370065e-06, "loss": 2.0152, "step": 17576 }, { "epoch": 0.58, "grad_norm": 0.7417418956756592, "learning_rate": 7.581170655910402e-06, "loss": 2.0644, "step": 17577 }, { "epoch": 0.58, "grad_norm": 0.7679969072341919, "learning_rate": 7.5801393387772635e-06, "loss": 2.0816, "step": 17578 }, { "epoch": 0.58, "grad_norm": 0.7833313345909119, "learning_rate": 7.579108048982286e-06, "loss": 2.093, "step": 17579 }, { "epoch": 0.58, "grad_norm": 0.7727939486503601, "learning_rate": 7.578076786537129e-06, "loss": 1.9803, "step": 17580 }, { "epoch": 0.58, "grad_norm": 0.7487653493881226, "learning_rate": 7.57704555145344e-06, "loss": 2.1069, "step": 17581 }, { "epoch": 0.58, "grad_norm": 0.7269291877746582, "learning_rate": 7.576014343742873e-06, "loss": 2.0744, "step": 17582 }, { "epoch": 0.58, "grad_norm": 0.761785089969635, "learning_rate": 7.5749831634170734e-06, "loss": 2.0369, "step": 17583 }, { "epoch": 0.59, "grad_norm": 0.7442471385002136, "learning_rate": 7.573952010487693e-06, "loss": 2.0468, "step": 17584 }, { "epoch": 0.59, "grad_norm": 0.7325069904327393, "learning_rate": 7.572920884966379e-06, "loss": 2.0338, "step": 17585 }, { "epoch": 0.59, "grad_norm": 0.7632442712783813, "learning_rate": 7.571889786864789e-06, "loss": 2.0137, "step": 17586 }, { "epoch": 0.59, "grad_norm": 0.7422991394996643, "learning_rate": 7.570858716194558e-06, "loss": 2.0957, "step": 17587 }, { "epoch": 0.59, "grad_norm": 0.7239232063293457, "learning_rate": 7.569827672967345e-06, "loss": 2.0533, "step": 17588 }, { "epoch": 0.59, "grad_norm": 0.7236452698707581, "learning_rate": 7.5687966571947925e-06, "loss": 2.1239, "step": 17589 }, { "epoch": 0.59, "grad_norm": 0.7207648754119873, "learning_rate": 7.567765668888553e-06, "loss": 2.1178, "step": 17590 }, { "epoch": 0.59, "grad_norm": 0.7437989115715027, "learning_rate": 7.5667347080602715e-06, "loss": 2.0421, "step": 17591 }, { "epoch": 0.59, "grad_norm": 0.7432490587234497, "learning_rate": 7.565703774721595e-06, "loss": 2.0596, "step": 17592 }, { "epoch": 0.59, "grad_norm": 0.7236190438270569, "learning_rate": 7.564672868884168e-06, "loss": 2.0492, "step": 17593 }, { "epoch": 0.59, "grad_norm": 0.7365027666091919, "learning_rate": 7.5636419905596405e-06, "loss": 2.0672, "step": 17594 }, { "epoch": 0.59, "grad_norm": 0.7086513042449951, "learning_rate": 7.562611139759662e-06, "loss": 1.9998, "step": 17595 }, { "epoch": 0.59, "grad_norm": 0.7439348101615906, "learning_rate": 7.561580316495872e-06, "loss": 2.0185, "step": 17596 }, { "epoch": 0.59, "grad_norm": 0.7480746507644653, "learning_rate": 7.5605495207799165e-06, "loss": 2.056, "step": 17597 }, { "epoch": 0.59, "grad_norm": 0.7516438364982605, "learning_rate": 7.559518752623444e-06, "loss": 2.1186, "step": 17598 }, { "epoch": 0.59, "grad_norm": 0.7640146017074585, "learning_rate": 7.558488012038097e-06, "loss": 2.0522, "step": 17599 }, { "epoch": 0.59, "grad_norm": 0.7664073705673218, "learning_rate": 7.5574572990355265e-06, "loss": 1.994, "step": 17600 }, { "epoch": 0.59, "grad_norm": 0.7271788120269775, "learning_rate": 7.556426613627367e-06, "loss": 2.0278, "step": 17601 }, { "epoch": 0.59, "grad_norm": 0.7477800846099854, "learning_rate": 7.555395955825269e-06, "loss": 1.9969, "step": 17602 }, { "epoch": 0.59, "grad_norm": 0.7379085421562195, "learning_rate": 7.554365325640876e-06, "loss": 2.0085, "step": 17603 }, { "epoch": 0.59, "grad_norm": 0.7312513589859009, "learning_rate": 7.55333472308583e-06, "loss": 2.0603, "step": 17604 }, { "epoch": 0.59, "grad_norm": 0.734138548374176, "learning_rate": 7.552304148171774e-06, "loss": 1.9783, "step": 17605 }, { "epoch": 0.59, "grad_norm": 0.7805414199829102, "learning_rate": 7.55127360091035e-06, "loss": 1.9661, "step": 17606 }, { "epoch": 0.59, "grad_norm": 0.7499974370002747, "learning_rate": 7.550243081313201e-06, "loss": 2.0952, "step": 17607 }, { "epoch": 0.59, "grad_norm": 0.7417061924934387, "learning_rate": 7.5492125893919724e-06, "loss": 2.0979, "step": 17608 }, { "epoch": 0.59, "grad_norm": 0.7168869376182556, "learning_rate": 7.548182125158304e-06, "loss": 2.0711, "step": 17609 }, { "epoch": 0.59, "grad_norm": 0.7445345520973206, "learning_rate": 7.547151688623836e-06, "loss": 2.052, "step": 17610 }, { "epoch": 0.59, "grad_norm": 0.7561436891555786, "learning_rate": 7.546121279800212e-06, "loss": 2.0457, "step": 17611 }, { "epoch": 0.59, "grad_norm": 0.7429252862930298, "learning_rate": 7.5450908986990705e-06, "loss": 2.0711, "step": 17612 }, { "epoch": 0.59, "grad_norm": 0.724929928779602, "learning_rate": 7.5440605453320545e-06, "loss": 2.06, "step": 17613 }, { "epoch": 0.59, "grad_norm": 0.726159930229187, "learning_rate": 7.5430302197108005e-06, "loss": 2.024, "step": 17614 }, { "epoch": 0.59, "grad_norm": 0.7570730447769165, "learning_rate": 7.541999921846951e-06, "loss": 2.0461, "step": 17615 }, { "epoch": 0.59, "grad_norm": 0.7577831745147705, "learning_rate": 7.540969651752148e-06, "loss": 2.0674, "step": 17616 }, { "epoch": 0.59, "grad_norm": 0.7463246583938599, "learning_rate": 7.539939409438029e-06, "loss": 1.9972, "step": 17617 }, { "epoch": 0.59, "grad_norm": 0.7243469953536987, "learning_rate": 7.538909194916233e-06, "loss": 2.1171, "step": 17618 }, { "epoch": 0.59, "grad_norm": 0.748498260974884, "learning_rate": 7.537879008198397e-06, "loss": 2.0801, "step": 17619 }, { "epoch": 0.59, "grad_norm": 0.7632192969322205, "learning_rate": 7.536848849296161e-06, "loss": 2.0306, "step": 17620 }, { "epoch": 0.59, "grad_norm": 0.7671763300895691, "learning_rate": 7.535818718221161e-06, "loss": 2.068, "step": 17621 }, { "epoch": 0.59, "grad_norm": 0.7456734776496887, "learning_rate": 7.534788614985045e-06, "loss": 2.0889, "step": 17622 }, { "epoch": 0.59, "grad_norm": 0.7639561891555786, "learning_rate": 7.533758539599434e-06, "loss": 2.1013, "step": 17623 }, { "epoch": 0.59, "grad_norm": 0.7272188067436218, "learning_rate": 7.532728492075977e-06, "loss": 2.0595, "step": 17624 }, { "epoch": 0.59, "grad_norm": 0.7256278991699219, "learning_rate": 7.531698472426307e-06, "loss": 2.0916, "step": 17625 }, { "epoch": 0.59, "grad_norm": 0.7265766859054565, "learning_rate": 7.5306684806620636e-06, "loss": 2.0952, "step": 17626 }, { "epoch": 0.59, "grad_norm": 0.7260339260101318, "learning_rate": 7.529638516794878e-06, "loss": 2.115, "step": 17627 }, { "epoch": 0.59, "grad_norm": 0.7943592667579651, "learning_rate": 7.528608580836389e-06, "loss": 2.0663, "step": 17628 }, { "epoch": 0.59, "grad_norm": 0.7220326662063599, "learning_rate": 7.527578672798229e-06, "loss": 2.0706, "step": 17629 }, { "epoch": 0.59, "grad_norm": 0.7528584599494934, "learning_rate": 7.526548792692039e-06, "loss": 2.0177, "step": 17630 }, { "epoch": 0.59, "grad_norm": 0.7268966436386108, "learning_rate": 7.525518940529454e-06, "loss": 2.1183, "step": 17631 }, { "epoch": 0.59, "grad_norm": 0.785613477230072, "learning_rate": 7.5244891163221025e-06, "loss": 2.0496, "step": 17632 }, { "epoch": 0.59, "grad_norm": 0.7384819388389587, "learning_rate": 7.523459320081623e-06, "loss": 2.0891, "step": 17633 }, { "epoch": 0.59, "grad_norm": 0.7446824312210083, "learning_rate": 7.522429551819648e-06, "loss": 2.0876, "step": 17634 }, { "epoch": 0.59, "grad_norm": 0.7553513646125793, "learning_rate": 7.5213998115478134e-06, "loss": 2.1699, "step": 17635 }, { "epoch": 0.59, "grad_norm": 0.7468435168266296, "learning_rate": 7.520370099277751e-06, "loss": 2.0711, "step": 17636 }, { "epoch": 0.59, "grad_norm": 0.7943869233131409, "learning_rate": 7.51934041502109e-06, "loss": 2.0402, "step": 17637 }, { "epoch": 0.59, "grad_norm": 0.7574362754821777, "learning_rate": 7.518310758789471e-06, "loss": 2.0437, "step": 17638 }, { "epoch": 0.59, "grad_norm": 0.7430615425109863, "learning_rate": 7.517281130594521e-06, "loss": 2.0554, "step": 17639 }, { "epoch": 0.59, "grad_norm": 0.7411746382713318, "learning_rate": 7.516251530447877e-06, "loss": 2.0518, "step": 17640 }, { "epoch": 0.59, "grad_norm": 0.7515670657157898, "learning_rate": 7.515221958361165e-06, "loss": 2.0864, "step": 17641 }, { "epoch": 0.59, "grad_norm": 0.7506204843521118, "learning_rate": 7.5141924143460195e-06, "loss": 2.081, "step": 17642 }, { "epoch": 0.59, "grad_norm": 0.7756949067115784, "learning_rate": 7.513162898414068e-06, "loss": 2.1136, "step": 17643 }, { "epoch": 0.59, "grad_norm": 0.8023573756217957, "learning_rate": 7.512133410576953e-06, "loss": 2.1303, "step": 17644 }, { "epoch": 0.59, "grad_norm": 0.7889078259468079, "learning_rate": 7.511103950846289e-06, "loss": 2.0352, "step": 17645 }, { "epoch": 0.59, "grad_norm": 0.7400040626525879, "learning_rate": 7.510074519233717e-06, "loss": 2.0663, "step": 17646 }, { "epoch": 0.59, "grad_norm": 0.727421224117279, "learning_rate": 7.509045115750862e-06, "loss": 2.1047, "step": 17647 }, { "epoch": 0.59, "grad_norm": 0.7777203917503357, "learning_rate": 7.508015740409359e-06, "loss": 2.1023, "step": 17648 }, { "epoch": 0.59, "grad_norm": 0.7542639374732971, "learning_rate": 7.506986393220831e-06, "loss": 2.1508, "step": 17649 }, { "epoch": 0.59, "grad_norm": 0.7315654158592224, "learning_rate": 7.50595707419691e-06, "loss": 2.0545, "step": 17650 }, { "epoch": 0.59, "grad_norm": 0.7466400861740112, "learning_rate": 7.504927783349222e-06, "loss": 2.0387, "step": 17651 }, { "epoch": 0.59, "grad_norm": 0.7527769207954407, "learning_rate": 7.5038985206894e-06, "loss": 2.0194, "step": 17652 }, { "epoch": 0.59, "grad_norm": 0.7366842031478882, "learning_rate": 7.502869286229072e-06, "loss": 2.0384, "step": 17653 }, { "epoch": 0.59, "grad_norm": 0.7356754541397095, "learning_rate": 7.5018400799798605e-06, "loss": 2.1044, "step": 17654 }, { "epoch": 0.59, "grad_norm": 0.7419320940971375, "learning_rate": 7.500810901953396e-06, "loss": 2.099, "step": 17655 }, { "epoch": 0.59, "grad_norm": 0.7306846380233765, "learning_rate": 7.499781752161306e-06, "loss": 2.0894, "step": 17656 }, { "epoch": 0.59, "grad_norm": 0.7321548461914062, "learning_rate": 7.498752630615218e-06, "loss": 2.0756, "step": 17657 }, { "epoch": 0.59, "grad_norm": 0.7341476678848267, "learning_rate": 7.497723537326754e-06, "loss": 2.0728, "step": 17658 }, { "epoch": 0.59, "grad_norm": 0.7661813497543335, "learning_rate": 7.4966944723075416e-06, "loss": 1.9779, "step": 17659 }, { "epoch": 0.59, "grad_norm": 0.7620532512664795, "learning_rate": 7.4956654355692105e-06, "loss": 2.1432, "step": 17660 }, { "epoch": 0.59, "grad_norm": 0.7839008569717407, "learning_rate": 7.4946364271233825e-06, "loss": 2.0541, "step": 17661 }, { "epoch": 0.59, "grad_norm": 0.714858889579773, "learning_rate": 7.493607446981688e-06, "loss": 2.0674, "step": 17662 }, { "epoch": 0.59, "grad_norm": 0.7521733641624451, "learning_rate": 7.4925784951557445e-06, "loss": 1.9875, "step": 17663 }, { "epoch": 0.59, "grad_norm": 0.7793568968772888, "learning_rate": 7.4915495716571795e-06, "loss": 2.1494, "step": 17664 }, { "epoch": 0.59, "grad_norm": 0.7606765627861023, "learning_rate": 7.490520676497615e-06, "loss": 2.0473, "step": 17665 }, { "epoch": 0.59, "grad_norm": 0.7544727921485901, "learning_rate": 7.489491809688683e-06, "loss": 2.0289, "step": 17666 }, { "epoch": 0.59, "grad_norm": 0.7753239870071411, "learning_rate": 7.4884629712419965e-06, "loss": 2.1364, "step": 17667 }, { "epoch": 0.59, "grad_norm": 0.741447389125824, "learning_rate": 7.487434161169185e-06, "loss": 2.0604, "step": 17668 }, { "epoch": 0.59, "grad_norm": 0.7883172035217285, "learning_rate": 7.4864053794818696e-06, "loss": 1.9854, "step": 17669 }, { "epoch": 0.59, "grad_norm": 0.7314766645431519, "learning_rate": 7.485376626191669e-06, "loss": 2.0909, "step": 17670 }, { "epoch": 0.59, "grad_norm": 0.7710330486297607, "learning_rate": 7.484347901310218e-06, "loss": 2.1969, "step": 17671 }, { "epoch": 0.59, "grad_norm": 0.7284137010574341, "learning_rate": 7.483319204849124e-06, "loss": 2.0081, "step": 17672 }, { "epoch": 0.59, "grad_norm": 0.7630683779716492, "learning_rate": 7.482290536820016e-06, "loss": 2.0101, "step": 17673 }, { "epoch": 0.59, "grad_norm": 0.7487777471542358, "learning_rate": 7.4812618972345155e-06, "loss": 2.082, "step": 17674 }, { "epoch": 0.59, "grad_norm": 0.7572994828224182, "learning_rate": 7.4802332861042425e-06, "loss": 2.0088, "step": 17675 }, { "epoch": 0.59, "grad_norm": 0.7358850240707397, "learning_rate": 7.4792047034408156e-06, "loss": 2.1164, "step": 17676 }, { "epoch": 0.59, "grad_norm": 0.7252094149589539, "learning_rate": 7.478176149255856e-06, "loss": 2.0362, "step": 17677 }, { "epoch": 0.59, "grad_norm": 0.7432246804237366, "learning_rate": 7.477147623560983e-06, "loss": 2.0799, "step": 17678 }, { "epoch": 0.59, "grad_norm": 0.7615903615951538, "learning_rate": 7.4761191263678245e-06, "loss": 2.1197, "step": 17679 }, { "epoch": 0.59, "grad_norm": 0.7458433508872986, "learning_rate": 7.475090657687985e-06, "loss": 2.03, "step": 17680 }, { "epoch": 0.59, "grad_norm": 0.7390851974487305, "learning_rate": 7.474062217533094e-06, "loss": 2.0554, "step": 17681 }, { "epoch": 0.59, "grad_norm": 0.7398363947868347, "learning_rate": 7.473033805914769e-06, "loss": 2.0295, "step": 17682 }, { "epoch": 0.59, "grad_norm": 0.7734447717666626, "learning_rate": 7.472005422844626e-06, "loss": 2.1253, "step": 17683 }, { "epoch": 0.59, "grad_norm": 0.76604163646698, "learning_rate": 7.470977068334286e-06, "loss": 2.1087, "step": 17684 }, { "epoch": 0.59, "grad_norm": 0.7414438128471375, "learning_rate": 7.469948742395363e-06, "loss": 2.046, "step": 17685 }, { "epoch": 0.59, "grad_norm": 0.7619325518608093, "learning_rate": 7.468920445039476e-06, "loss": 2.1187, "step": 17686 }, { "epoch": 0.59, "grad_norm": 0.7636239528656006, "learning_rate": 7.4678921762782415e-06, "loss": 2.0214, "step": 17687 }, { "epoch": 0.59, "grad_norm": 0.7501306533813477, "learning_rate": 7.466863936123282e-06, "loss": 2.019, "step": 17688 }, { "epoch": 0.59, "grad_norm": 0.7532680630683899, "learning_rate": 7.465835724586205e-06, "loss": 2.0572, "step": 17689 }, { "epoch": 0.59, "grad_norm": 0.780039370059967, "learning_rate": 7.464807541678634e-06, "loss": 2.023, "step": 17690 }, { "epoch": 0.59, "grad_norm": 0.7525805234909058, "learning_rate": 7.4637793874121775e-06, "loss": 2.0268, "step": 17691 }, { "epoch": 0.59, "grad_norm": 0.7352322936058044, "learning_rate": 7.462751261798456e-06, "loss": 2.024, "step": 17692 }, { "epoch": 0.59, "grad_norm": 0.8357675075531006, "learning_rate": 7.4617231648490885e-06, "loss": 2.0668, "step": 17693 }, { "epoch": 0.59, "grad_norm": 0.7364036440849304, "learning_rate": 7.46069509657568e-06, "loss": 1.9879, "step": 17694 }, { "epoch": 0.59, "grad_norm": 0.7361488938331604, "learning_rate": 7.459667056989852e-06, "loss": 1.9991, "step": 17695 }, { "epoch": 0.59, "grad_norm": 0.733354926109314, "learning_rate": 7.458639046103216e-06, "loss": 2.0825, "step": 17696 }, { "epoch": 0.59, "grad_norm": 0.770213782787323, "learning_rate": 7.457611063927389e-06, "loss": 2.0843, "step": 17697 }, { "epoch": 0.59, "grad_norm": 0.7373711466789246, "learning_rate": 7.456583110473981e-06, "loss": 2.0511, "step": 17698 }, { "epoch": 0.59, "grad_norm": 0.741649329662323, "learning_rate": 7.455555185754606e-06, "loss": 2.0196, "step": 17699 }, { "epoch": 0.59, "grad_norm": 0.7481525540351868, "learning_rate": 7.454527289780874e-06, "loss": 2.0739, "step": 17700 }, { "epoch": 0.59, "grad_norm": 0.7365272641181946, "learning_rate": 7.453499422564404e-06, "loss": 1.9543, "step": 17701 }, { "epoch": 0.59, "grad_norm": 0.7408632040023804, "learning_rate": 7.452471584116807e-06, "loss": 2.1058, "step": 17702 }, { "epoch": 0.59, "grad_norm": 0.7279224395751953, "learning_rate": 7.451443774449692e-06, "loss": 2.0912, "step": 17703 }, { "epoch": 0.59, "grad_norm": 0.7461594343185425, "learning_rate": 7.45041599357467e-06, "loss": 2.0864, "step": 17704 }, { "epoch": 0.59, "grad_norm": 0.7708375453948975, "learning_rate": 7.449388241503355e-06, "loss": 2.031, "step": 17705 }, { "epoch": 0.59, "grad_norm": 0.7397706508636475, "learning_rate": 7.448360518247358e-06, "loss": 1.9936, "step": 17706 }, { "epoch": 0.59, "grad_norm": 0.7420316338539124, "learning_rate": 7.447332823818287e-06, "loss": 2.0358, "step": 17707 }, { "epoch": 0.59, "grad_norm": 0.7284911274909973, "learning_rate": 7.4463051582277515e-06, "loss": 2.0527, "step": 17708 }, { "epoch": 0.59, "grad_norm": 0.7508965730667114, "learning_rate": 7.4452775214873664e-06, "loss": 2.0553, "step": 17709 }, { "epoch": 0.59, "grad_norm": 0.7549227476119995, "learning_rate": 7.444249913608741e-06, "loss": 2.0681, "step": 17710 }, { "epoch": 0.59, "grad_norm": 0.7928520441055298, "learning_rate": 7.44322233460348e-06, "loss": 2.047, "step": 17711 }, { "epoch": 0.59, "grad_norm": 0.7126542925834656, "learning_rate": 7.442194784483192e-06, "loss": 2.0172, "step": 17712 }, { "epoch": 0.59, "grad_norm": 0.7781335711479187, "learning_rate": 7.4411672632594915e-06, "loss": 2.0736, "step": 17713 }, { "epoch": 0.59, "grad_norm": 0.7449811100959778, "learning_rate": 7.4401397709439795e-06, "loss": 2.0922, "step": 17714 }, { "epoch": 0.59, "grad_norm": 0.726498544216156, "learning_rate": 7.439112307548276e-06, "loss": 2.0467, "step": 17715 }, { "epoch": 0.59, "grad_norm": 0.7464563846588135, "learning_rate": 7.438084873083974e-06, "loss": 2.1294, "step": 17716 }, { "epoch": 0.59, "grad_norm": 0.7600895762443542, "learning_rate": 7.437057467562689e-06, "loss": 2.0195, "step": 17717 }, { "epoch": 0.59, "grad_norm": 0.7245799899101257, "learning_rate": 7.436030090996028e-06, "loss": 2.0661, "step": 17718 }, { "epoch": 0.59, "grad_norm": 0.7593080401420593, "learning_rate": 7.4350027433955985e-06, "loss": 2.1653, "step": 17719 }, { "epoch": 0.59, "grad_norm": 0.7599025368690491, "learning_rate": 7.4339754247730015e-06, "loss": 2.0982, "step": 17720 }, { "epoch": 0.59, "grad_norm": 0.7334645986557007, "learning_rate": 7.432948135139846e-06, "loss": 2.0341, "step": 17721 }, { "epoch": 0.59, "grad_norm": 0.7520384192466736, "learning_rate": 7.431920874507738e-06, "loss": 2.0026, "step": 17722 }, { "epoch": 0.59, "grad_norm": 0.7388368844985962, "learning_rate": 7.430893642888284e-06, "loss": 2.0531, "step": 17723 }, { "epoch": 0.59, "grad_norm": 0.7473230957984924, "learning_rate": 7.4298664402930895e-06, "loss": 2.0798, "step": 17724 }, { "epoch": 0.59, "grad_norm": 0.7643396854400635, "learning_rate": 7.428839266733756e-06, "loss": 2.1012, "step": 17725 }, { "epoch": 0.59, "grad_norm": 0.7569268941879272, "learning_rate": 7.427812122221889e-06, "loss": 2.0428, "step": 17726 }, { "epoch": 0.59, "grad_norm": 0.7806915044784546, "learning_rate": 7.426785006769094e-06, "loss": 2.0358, "step": 17727 }, { "epoch": 0.59, "grad_norm": 0.7343487739562988, "learning_rate": 7.425757920386975e-06, "loss": 2.0736, "step": 17728 }, { "epoch": 0.59, "grad_norm": 0.7507137656211853, "learning_rate": 7.424730863087134e-06, "loss": 2.0256, "step": 17729 }, { "epoch": 0.59, "grad_norm": 0.7378207445144653, "learning_rate": 7.423703834881171e-06, "loss": 2.117, "step": 17730 }, { "epoch": 0.59, "grad_norm": 0.7440178394317627, "learning_rate": 7.422676835780696e-06, "loss": 2.0599, "step": 17731 }, { "epoch": 0.59, "grad_norm": 0.7573827505111694, "learning_rate": 7.421649865797307e-06, "loss": 2.1147, "step": 17732 }, { "epoch": 0.59, "grad_norm": 0.7461088299751282, "learning_rate": 7.4206229249426065e-06, "loss": 2.0193, "step": 17733 }, { "epoch": 0.59, "grad_norm": 0.7320839762687683, "learning_rate": 7.4195960132281965e-06, "loss": 2.0453, "step": 17734 }, { "epoch": 0.59, "grad_norm": 0.7362501621246338, "learning_rate": 7.418569130665678e-06, "loss": 2.0878, "step": 17735 }, { "epoch": 0.59, "grad_norm": 0.7760770320892334, "learning_rate": 7.417542277266651e-06, "loss": 2.013, "step": 17736 }, { "epoch": 0.59, "grad_norm": 0.7016144394874573, "learning_rate": 7.416515453042723e-06, "loss": 2.0121, "step": 17737 }, { "epoch": 0.59, "grad_norm": 0.7516626715660095, "learning_rate": 7.415488658005484e-06, "loss": 2.0747, "step": 17738 }, { "epoch": 0.59, "grad_norm": 0.7502581477165222, "learning_rate": 7.414461892166542e-06, "loss": 2.0511, "step": 17739 }, { "epoch": 0.59, "grad_norm": 0.7340521216392517, "learning_rate": 7.413435155537494e-06, "loss": 2.0665, "step": 17740 }, { "epoch": 0.59, "grad_norm": 0.7667483687400818, "learning_rate": 7.4124084481299405e-06, "loss": 2.0363, "step": 17741 }, { "epoch": 0.59, "grad_norm": 0.7313727736473083, "learning_rate": 7.411381769955479e-06, "loss": 2.0601, "step": 17742 }, { "epoch": 0.59, "grad_norm": 0.7961923480033875, "learning_rate": 7.4103551210257095e-06, "loss": 2.1401, "step": 17743 }, { "epoch": 0.59, "grad_norm": 0.7360961437225342, "learning_rate": 7.409328501352228e-06, "loss": 2.0199, "step": 17744 }, { "epoch": 0.59, "grad_norm": 0.7178511023521423, "learning_rate": 7.408301910946636e-06, "loss": 2.043, "step": 17745 }, { "epoch": 0.59, "grad_norm": 0.7400296330451965, "learning_rate": 7.407275349820533e-06, "loss": 1.9741, "step": 17746 }, { "epoch": 0.59, "grad_norm": 0.7485824823379517, "learning_rate": 7.406248817985511e-06, "loss": 2.1478, "step": 17747 }, { "epoch": 0.59, "grad_norm": 0.7607347369194031, "learning_rate": 7.40522231545317e-06, "loss": 1.9981, "step": 17748 }, { "epoch": 0.59, "grad_norm": 0.7282645106315613, "learning_rate": 7.404195842235107e-06, "loss": 2.0044, "step": 17749 }, { "epoch": 0.59, "grad_norm": 0.7420344352722168, "learning_rate": 7.4031693983429195e-06, "loss": 2.0906, "step": 17750 }, { "epoch": 0.59, "grad_norm": 0.7412364482879639, "learning_rate": 7.402142983788201e-06, "loss": 2.0336, "step": 17751 }, { "epoch": 0.59, "grad_norm": 0.7402769923210144, "learning_rate": 7.401116598582545e-06, "loss": 2.0338, "step": 17752 }, { "epoch": 0.59, "grad_norm": 0.7416160106658936, "learning_rate": 7.4000902427375544e-06, "loss": 1.9889, "step": 17753 }, { "epoch": 0.59, "grad_norm": 0.7586106061935425, "learning_rate": 7.399063916264819e-06, "loss": 2.0744, "step": 17754 }, { "epoch": 0.59, "grad_norm": 0.7518903017044067, "learning_rate": 7.3980376191759376e-06, "loss": 2.1105, "step": 17755 }, { "epoch": 0.59, "grad_norm": 0.7239671349525452, "learning_rate": 7.397011351482501e-06, "loss": 2.0692, "step": 17756 }, { "epoch": 0.59, "grad_norm": 0.7311205863952637, "learning_rate": 7.395985113196105e-06, "loss": 2.0476, "step": 17757 }, { "epoch": 0.59, "grad_norm": 0.729142963886261, "learning_rate": 7.39495890432834e-06, "loss": 2.0722, "step": 17758 }, { "epoch": 0.59, "grad_norm": 0.7488257884979248, "learning_rate": 7.393932724890809e-06, "loss": 2.0275, "step": 17759 }, { "epoch": 0.59, "grad_norm": 0.763679027557373, "learning_rate": 7.392906574895091e-06, "loss": 2.009, "step": 17760 }, { "epoch": 0.59, "grad_norm": 0.7489261627197266, "learning_rate": 7.391880454352791e-06, "loss": 2.1009, "step": 17761 }, { "epoch": 0.59, "grad_norm": 0.7624302506446838, "learning_rate": 7.390854363275497e-06, "loss": 2.0365, "step": 17762 }, { "epoch": 0.59, "grad_norm": 0.7260489463806152, "learning_rate": 7.3898283016748015e-06, "loss": 1.9936, "step": 17763 }, { "epoch": 0.59, "grad_norm": 0.751052737236023, "learning_rate": 7.388802269562296e-06, "loss": 2.05, "step": 17764 }, { "epoch": 0.59, "grad_norm": 0.7342755794525146, "learning_rate": 7.387776266949571e-06, "loss": 2.0367, "step": 17765 }, { "epoch": 0.59, "grad_norm": 0.784067690372467, "learning_rate": 7.386750293848217e-06, "loss": 2.0104, "step": 17766 }, { "epoch": 0.59, "grad_norm": 0.7388331890106201, "learning_rate": 7.38572435026983e-06, "loss": 2.0407, "step": 17767 }, { "epoch": 0.59, "grad_norm": 0.7333582639694214, "learning_rate": 7.384698436225997e-06, "loss": 2.1001, "step": 17768 }, { "epoch": 0.59, "grad_norm": 0.7617678046226501, "learning_rate": 7.383672551728308e-06, "loss": 2.1298, "step": 17769 }, { "epoch": 0.59, "grad_norm": 0.7139295339584351, "learning_rate": 7.382646696788353e-06, "loss": 1.9787, "step": 17770 }, { "epoch": 0.59, "grad_norm": 0.7355831265449524, "learning_rate": 7.381620871417718e-06, "loss": 2.0699, "step": 17771 }, { "epoch": 0.59, "grad_norm": 0.7565092444419861, "learning_rate": 7.380595075628006e-06, "loss": 2.0299, "step": 17772 }, { "epoch": 0.59, "grad_norm": 0.7425851225852966, "learning_rate": 7.379569309430785e-06, "loss": 2.0613, "step": 17773 }, { "epoch": 0.59, "grad_norm": 0.7438064813613892, "learning_rate": 7.378543572837659e-06, "loss": 2.0475, "step": 17774 }, { "epoch": 0.59, "grad_norm": 0.723822832107544, "learning_rate": 7.377517865860211e-06, "loss": 2.0264, "step": 17775 }, { "epoch": 0.59, "grad_norm": 0.7587792277336121, "learning_rate": 7.376492188510029e-06, "loss": 2.1468, "step": 17776 }, { "epoch": 0.59, "grad_norm": 0.7258195877075195, "learning_rate": 7.375466540798701e-06, "loss": 2.0642, "step": 17777 }, { "epoch": 0.59, "grad_norm": 0.727118194103241, "learning_rate": 7.374440922737813e-06, "loss": 1.9957, "step": 17778 }, { "epoch": 0.59, "grad_norm": 0.7043047547340393, "learning_rate": 7.373415334338952e-06, "loss": 2.0089, "step": 17779 }, { "epoch": 0.59, "grad_norm": 0.7414527535438538, "learning_rate": 7.3723897756137065e-06, "loss": 1.9953, "step": 17780 }, { "epoch": 0.59, "grad_norm": 0.7334194183349609, "learning_rate": 7.371364246573664e-06, "loss": 2.1033, "step": 17781 }, { "epoch": 0.59, "grad_norm": 0.7746586799621582, "learning_rate": 7.3703387472304044e-06, "loss": 2.1177, "step": 17782 }, { "epoch": 0.59, "grad_norm": 0.752119243144989, "learning_rate": 7.369313277595516e-06, "loss": 2.0425, "step": 17783 }, { "epoch": 0.59, "grad_norm": 0.7389459013938904, "learning_rate": 7.368287837680587e-06, "loss": 2.1192, "step": 17784 }, { "epoch": 0.59, "grad_norm": 0.7919149994850159, "learning_rate": 7.367262427497195e-06, "loss": 2.0668, "step": 17785 }, { "epoch": 0.59, "grad_norm": 0.743135929107666, "learning_rate": 7.366237047056937e-06, "loss": 2.0587, "step": 17786 }, { "epoch": 0.59, "grad_norm": 0.7341259121894836, "learning_rate": 7.365211696371383e-06, "loss": 2.0018, "step": 17787 }, { "epoch": 0.59, "grad_norm": 0.7511879205703735, "learning_rate": 7.364186375452125e-06, "loss": 2.0657, "step": 17788 }, { "epoch": 0.59, "grad_norm": 0.7610931396484375, "learning_rate": 7.363161084310744e-06, "loss": 2.0686, "step": 17789 }, { "epoch": 0.59, "grad_norm": 0.7377996444702148, "learning_rate": 7.362135822958826e-06, "loss": 2.1125, "step": 17790 }, { "epoch": 0.59, "grad_norm": 0.7370397448539734, "learning_rate": 7.361110591407949e-06, "loss": 2.0606, "step": 17791 }, { "epoch": 0.59, "grad_norm": 0.7400810718536377, "learning_rate": 7.360085389669699e-06, "loss": 2.0475, "step": 17792 }, { "epoch": 0.59, "grad_norm": 0.7479839324951172, "learning_rate": 7.359060217755655e-06, "loss": 2.0531, "step": 17793 }, { "epoch": 0.59, "grad_norm": 0.7449530959129333, "learning_rate": 7.358035075677407e-06, "loss": 2.0178, "step": 17794 }, { "epoch": 0.59, "grad_norm": 0.7437887191772461, "learning_rate": 7.357009963446524e-06, "loss": 2.0982, "step": 17795 }, { "epoch": 0.59, "grad_norm": 0.7237464189529419, "learning_rate": 7.355984881074595e-06, "loss": 2.1313, "step": 17796 }, { "epoch": 0.59, "grad_norm": 0.7515939474105835, "learning_rate": 7.3549598285732e-06, "loss": 2.0629, "step": 17797 }, { "epoch": 0.59, "grad_norm": 0.7517289519309998, "learning_rate": 7.353934805953918e-06, "loss": 2.0639, "step": 17798 }, { "epoch": 0.59, "grad_norm": 0.7352995872497559, "learning_rate": 7.352909813228332e-06, "loss": 2.0118, "step": 17799 }, { "epoch": 0.59, "grad_norm": 0.7311394214630127, "learning_rate": 7.351884850408019e-06, "loss": 2.0324, "step": 17800 }, { "epoch": 0.59, "grad_norm": 0.7241724729537964, "learning_rate": 7.350859917504556e-06, "loss": 2.0373, "step": 17801 }, { "epoch": 0.59, "grad_norm": 0.737528383731842, "learning_rate": 7.349835014529527e-06, "loss": 2.1413, "step": 17802 }, { "epoch": 0.59, "grad_norm": 0.7196407318115234, "learning_rate": 7.3488101414945115e-06, "loss": 2.0368, "step": 17803 }, { "epoch": 0.59, "grad_norm": 0.7350615859031677, "learning_rate": 7.347785298411081e-06, "loss": 2.0961, "step": 17804 }, { "epoch": 0.59, "grad_norm": 0.7256792783737183, "learning_rate": 7.3467604852908205e-06, "loss": 2.0103, "step": 17805 }, { "epoch": 0.59, "grad_norm": 0.7421337962150574, "learning_rate": 7.345735702145303e-06, "loss": 2.0138, "step": 17806 }, { "epoch": 0.59, "grad_norm": 0.7479231953620911, "learning_rate": 7.344710948986107e-06, "loss": 2.0582, "step": 17807 }, { "epoch": 0.59, "grad_norm": 0.7323600053787231, "learning_rate": 7.343686225824818e-06, "loss": 2.072, "step": 17808 }, { "epoch": 0.59, "grad_norm": 0.7286627888679504, "learning_rate": 7.342661532672996e-06, "loss": 2.0709, "step": 17809 }, { "epoch": 0.59, "grad_norm": 0.7404723763465881, "learning_rate": 7.341636869542232e-06, "loss": 2.0659, "step": 17810 }, { "epoch": 0.59, "grad_norm": 0.7348681688308716, "learning_rate": 7.3406122364440956e-06, "loss": 2.039, "step": 17811 }, { "epoch": 0.59, "grad_norm": 0.7604536414146423, "learning_rate": 7.339587633390164e-06, "loss": 2.0932, "step": 17812 }, { "epoch": 0.59, "grad_norm": 0.7365281581878662, "learning_rate": 7.3385630603920125e-06, "loss": 2.1849, "step": 17813 }, { "epoch": 0.59, "grad_norm": 0.7569277882575989, "learning_rate": 7.337538517461213e-06, "loss": 2.1177, "step": 17814 }, { "epoch": 0.59, "grad_norm": 0.7005457878112793, "learning_rate": 7.336514004609343e-06, "loss": 2.0606, "step": 17815 }, { "epoch": 0.59, "grad_norm": 0.706281840801239, "learning_rate": 7.335489521847979e-06, "loss": 2.0905, "step": 17816 }, { "epoch": 0.59, "grad_norm": 0.7244832515716553, "learning_rate": 7.3344650691886944e-06, "loss": 2.0739, "step": 17817 }, { "epoch": 0.59, "grad_norm": 0.7549883723258972, "learning_rate": 7.33344064664306e-06, "loss": 2.1149, "step": 17818 }, { "epoch": 0.59, "grad_norm": 0.7273354530334473, "learning_rate": 7.3324162542226496e-06, "loss": 2.1087, "step": 17819 }, { "epoch": 0.59, "grad_norm": 0.7335820198059082, "learning_rate": 7.331391891939037e-06, "loss": 2.0354, "step": 17820 }, { "epoch": 0.59, "grad_norm": 0.742071270942688, "learning_rate": 7.330367559803797e-06, "loss": 2.0625, "step": 17821 }, { "epoch": 0.59, "grad_norm": 0.7222149968147278, "learning_rate": 7.3293432578284964e-06, "loss": 2.038, "step": 17822 }, { "epoch": 0.59, "grad_norm": 0.7608742117881775, "learning_rate": 7.3283189860247095e-06, "loss": 2.0672, "step": 17823 }, { "epoch": 0.59, "grad_norm": 0.749167799949646, "learning_rate": 7.327294744404012e-06, "loss": 2.0734, "step": 17824 }, { "epoch": 0.59, "grad_norm": 0.7248327732086182, "learning_rate": 7.326270532977972e-06, "loss": 2.0268, "step": 17825 }, { "epoch": 0.59, "grad_norm": 0.8058667182922363, "learning_rate": 7.325246351758162e-06, "loss": 2.011, "step": 17826 }, { "epoch": 0.59, "grad_norm": 0.7379785776138306, "learning_rate": 7.324222200756148e-06, "loss": 2.0337, "step": 17827 }, { "epoch": 0.59, "grad_norm": 0.7427505850791931, "learning_rate": 7.323198079983504e-06, "loss": 2.0518, "step": 17828 }, { "epoch": 0.59, "grad_norm": 0.7466215491294861, "learning_rate": 7.322173989451798e-06, "loss": 2.0631, "step": 17829 }, { "epoch": 0.59, "grad_norm": 0.7374353408813477, "learning_rate": 7.321149929172606e-06, "loss": 2.0333, "step": 17830 }, { "epoch": 0.59, "grad_norm": 0.7147338390350342, "learning_rate": 7.320125899157488e-06, "loss": 2.0453, "step": 17831 }, { "epoch": 0.59, "grad_norm": 0.7602620720863342, "learning_rate": 7.319101899418018e-06, "loss": 2.0495, "step": 17832 }, { "epoch": 0.59, "grad_norm": 0.756175696849823, "learning_rate": 7.318077929965763e-06, "loss": 2.0907, "step": 17833 }, { "epoch": 0.59, "grad_norm": 0.7279608249664307, "learning_rate": 7.3170539908122936e-06, "loss": 2.0774, "step": 17834 }, { "epoch": 0.59, "grad_norm": 0.758105993270874, "learning_rate": 7.316030081969174e-06, "loss": 2.0671, "step": 17835 }, { "epoch": 0.59, "grad_norm": 0.7391592264175415, "learning_rate": 7.315006203447974e-06, "loss": 2.0209, "step": 17836 }, { "epoch": 0.59, "grad_norm": 0.7743181586265564, "learning_rate": 7.313982355260259e-06, "loss": 2.0719, "step": 17837 }, { "epoch": 0.59, "grad_norm": 0.7753261923789978, "learning_rate": 7.312958537417598e-06, "loss": 1.9928, "step": 17838 }, { "epoch": 0.59, "grad_norm": 0.7553514242172241, "learning_rate": 7.311934749931559e-06, "loss": 2.1084, "step": 17839 }, { "epoch": 0.59, "grad_norm": 0.762657880783081, "learning_rate": 7.3109109928137046e-06, "loss": 2.1374, "step": 17840 }, { "epoch": 0.59, "grad_norm": 0.7669672966003418, "learning_rate": 7.309887266075601e-06, "loss": 2.0134, "step": 17841 }, { "epoch": 0.59, "grad_norm": 0.7367768883705139, "learning_rate": 7.308863569728816e-06, "loss": 2.0721, "step": 17842 }, { "epoch": 0.59, "grad_norm": 0.7719810009002686, "learning_rate": 7.307839903784913e-06, "loss": 2.0269, "step": 17843 }, { "epoch": 0.59, "grad_norm": 0.7666747570037842, "learning_rate": 7.306816268255457e-06, "loss": 2.0814, "step": 17844 }, { "epoch": 0.59, "grad_norm": 0.7137101292610168, "learning_rate": 7.305792663152009e-06, "loss": 2.0167, "step": 17845 }, { "epoch": 0.59, "grad_norm": 0.7196882367134094, "learning_rate": 7.304769088486139e-06, "loss": 2.0055, "step": 17846 }, { "epoch": 0.59, "grad_norm": 0.7837442755699158, "learning_rate": 7.303745544269408e-06, "loss": 2.0205, "step": 17847 }, { "epoch": 0.59, "grad_norm": 0.7269256114959717, "learning_rate": 7.3027220305133825e-06, "loss": 2.0144, "step": 17848 }, { "epoch": 0.59, "grad_norm": 0.7971704602241516, "learning_rate": 7.301698547229621e-06, "loss": 2.0444, "step": 17849 }, { "epoch": 0.59, "grad_norm": 0.7280442714691162, "learning_rate": 7.300675094429687e-06, "loss": 2.1118, "step": 17850 }, { "epoch": 0.59, "grad_norm": 0.7282776832580566, "learning_rate": 7.299651672125141e-06, "loss": 2.0868, "step": 17851 }, { "epoch": 0.59, "grad_norm": 0.724920928478241, "learning_rate": 7.298628280327555e-06, "loss": 2.0116, "step": 17852 }, { "epoch": 0.59, "grad_norm": 0.7390250563621521, "learning_rate": 7.297604919048477e-06, "loss": 2.0596, "step": 17853 }, { "epoch": 0.59, "grad_norm": 0.729977011680603, "learning_rate": 7.296581588299478e-06, "loss": 2.0348, "step": 17854 }, { "epoch": 0.59, "grad_norm": 0.7874935269355774, "learning_rate": 7.295558288092115e-06, "loss": 2.1174, "step": 17855 }, { "epoch": 0.59, "grad_norm": 0.7345505952835083, "learning_rate": 7.294535018437952e-06, "loss": 2.0861, "step": 17856 }, { "epoch": 0.59, "grad_norm": 0.7641611099243164, "learning_rate": 7.293511779348543e-06, "loss": 2.1065, "step": 17857 }, { "epoch": 0.59, "grad_norm": 0.7234377264976501, "learning_rate": 7.292488570835454e-06, "loss": 2.0995, "step": 17858 }, { "epoch": 0.59, "grad_norm": 0.7408335208892822, "learning_rate": 7.29146539291024e-06, "loss": 2.0377, "step": 17859 }, { "epoch": 0.59, "grad_norm": 0.7750611901283264, "learning_rate": 7.290442245584463e-06, "loss": 2.0812, "step": 17860 }, { "epoch": 0.59, "grad_norm": 0.7305443286895752, "learning_rate": 7.289419128869686e-06, "loss": 2.1033, "step": 17861 }, { "epoch": 0.59, "grad_norm": 0.7511100769042969, "learning_rate": 7.2883960427774596e-06, "loss": 2.0365, "step": 17862 }, { "epoch": 0.59, "grad_norm": 0.7300098538398743, "learning_rate": 7.287372987319345e-06, "loss": 2.0811, "step": 17863 }, { "epoch": 0.59, "grad_norm": 0.7811533212661743, "learning_rate": 7.2863499625069e-06, "loss": 2.0463, "step": 17864 }, { "epoch": 0.59, "grad_norm": 0.7321670055389404, "learning_rate": 7.285326968351689e-06, "loss": 2.0697, "step": 17865 }, { "epoch": 0.59, "grad_norm": 0.757247805595398, "learning_rate": 7.284304004865257e-06, "loss": 2.0987, "step": 17866 }, { "epoch": 0.59, "grad_norm": 0.7395954728126526, "learning_rate": 7.283281072059166e-06, "loss": 2.0657, "step": 17867 }, { "epoch": 0.59, "grad_norm": 0.7651152014732361, "learning_rate": 7.282258169944975e-06, "loss": 2.0651, "step": 17868 }, { "epoch": 0.59, "grad_norm": 0.778978168964386, "learning_rate": 7.2812352985342395e-06, "loss": 2.0186, "step": 17869 }, { "epoch": 0.59, "grad_norm": 0.7850691080093384, "learning_rate": 7.280212457838516e-06, "loss": 2.0577, "step": 17870 }, { "epoch": 0.59, "grad_norm": 0.7409400343894958, "learning_rate": 7.279189647869355e-06, "loss": 2.1197, "step": 17871 }, { "epoch": 0.59, "grad_norm": 0.7363841533660889, "learning_rate": 7.278166868638314e-06, "loss": 2.0981, "step": 17872 }, { "epoch": 0.59, "grad_norm": 0.7663241028785706, "learning_rate": 7.277144120156949e-06, "loss": 2.0634, "step": 17873 }, { "epoch": 0.59, "grad_norm": 0.7524129748344421, "learning_rate": 7.27612140243682e-06, "loss": 2.0284, "step": 17874 }, { "epoch": 0.59, "grad_norm": 0.7456262111663818, "learning_rate": 7.275098715489468e-06, "loss": 2.0129, "step": 17875 }, { "epoch": 0.59, "grad_norm": 0.7135251760482788, "learning_rate": 7.274076059326456e-06, "loss": 2.1406, "step": 17876 }, { "epoch": 0.59, "grad_norm": 0.7329150438308716, "learning_rate": 7.273053433959334e-06, "loss": 2.1283, "step": 17877 }, { "epoch": 0.59, "grad_norm": 0.7537440061569214, "learning_rate": 7.272030839399655e-06, "loss": 2.0728, "step": 17878 }, { "epoch": 0.59, "grad_norm": 0.7616793513298035, "learning_rate": 7.27100827565898e-06, "loss": 2.0959, "step": 17879 }, { "epoch": 0.59, "grad_norm": 0.7395745515823364, "learning_rate": 7.269985742748847e-06, "loss": 2.0758, "step": 17880 }, { "epoch": 0.59, "grad_norm": 0.7580609917640686, "learning_rate": 7.268963240680816e-06, "loss": 1.9875, "step": 17881 }, { "epoch": 0.59, "grad_norm": 0.7498340606689453, "learning_rate": 7.267940769466439e-06, "loss": 2.1184, "step": 17882 }, { "epoch": 0.59, "grad_norm": 0.7456172704696655, "learning_rate": 7.266918329117268e-06, "loss": 2.0155, "step": 17883 }, { "epoch": 0.6, "grad_norm": 0.721441924571991, "learning_rate": 7.26589591964485e-06, "loss": 2.0668, "step": 17884 }, { "epoch": 0.6, "grad_norm": 0.7541223764419556, "learning_rate": 7.2648735410607375e-06, "loss": 2.1293, "step": 17885 }, { "epoch": 0.6, "grad_norm": 0.733963668346405, "learning_rate": 7.2638511933764785e-06, "loss": 2.0249, "step": 17886 }, { "epoch": 0.6, "grad_norm": 0.7305247783660889, "learning_rate": 7.262828876603632e-06, "loss": 1.9565, "step": 17887 }, { "epoch": 0.6, "grad_norm": 0.7469213008880615, "learning_rate": 7.261806590753735e-06, "loss": 2.0329, "step": 17888 }, { "epoch": 0.6, "grad_norm": 0.7154189944267273, "learning_rate": 7.260784335838342e-06, "loss": 2.074, "step": 17889 }, { "epoch": 0.6, "grad_norm": 0.7596710324287415, "learning_rate": 7.259762111869004e-06, "loss": 2.0388, "step": 17890 }, { "epoch": 0.6, "grad_norm": 0.7186411619186401, "learning_rate": 7.258739918857268e-06, "loss": 2.0028, "step": 17891 }, { "epoch": 0.6, "grad_norm": 0.7785205245018005, "learning_rate": 7.257717756814684e-06, "loss": 2.1362, "step": 17892 }, { "epoch": 0.6, "grad_norm": 0.744401752948761, "learning_rate": 7.2566956257527955e-06, "loss": 2.0834, "step": 17893 }, { "epoch": 0.6, "grad_norm": 0.769404947757721, "learning_rate": 7.255673525683149e-06, "loss": 2.0605, "step": 17894 }, { "epoch": 0.6, "grad_norm": 0.7238067388534546, "learning_rate": 7.2546514566172976e-06, "loss": 2.0423, "step": 17895 }, { "epoch": 0.6, "grad_norm": 0.7312318682670593, "learning_rate": 7.253629418566788e-06, "loss": 2.0546, "step": 17896 }, { "epoch": 0.6, "grad_norm": 0.7429822683334351, "learning_rate": 7.2526074115431596e-06, "loss": 2.1178, "step": 17897 }, { "epoch": 0.6, "grad_norm": 0.760276198387146, "learning_rate": 7.251585435557964e-06, "loss": 2.0882, "step": 17898 }, { "epoch": 0.6, "grad_norm": 0.7697278261184692, "learning_rate": 7.250563490622744e-06, "loss": 2.0796, "step": 17899 }, { "epoch": 0.6, "grad_norm": 0.7592955827713013, "learning_rate": 7.2495415767490455e-06, "loss": 2.0276, "step": 17900 }, { "epoch": 0.6, "grad_norm": 0.7309766411781311, "learning_rate": 7.2485196939484206e-06, "loss": 2.0738, "step": 17901 }, { "epoch": 0.6, "grad_norm": 0.753669261932373, "learning_rate": 7.2474978422324005e-06, "loss": 2.0784, "step": 17902 }, { "epoch": 0.6, "grad_norm": 0.7397944927215576, "learning_rate": 7.24647602161254e-06, "loss": 2.0599, "step": 17903 }, { "epoch": 0.6, "grad_norm": 0.7328388094902039, "learning_rate": 7.245454232100379e-06, "loss": 2.0207, "step": 17904 }, { "epoch": 0.6, "grad_norm": 0.7648415565490723, "learning_rate": 7.244432473707463e-06, "loss": 2.087, "step": 17905 }, { "epoch": 0.6, "grad_norm": 0.72069251537323, "learning_rate": 7.243410746445333e-06, "loss": 2.0721, "step": 17906 }, { "epoch": 0.6, "grad_norm": 0.759331464767456, "learning_rate": 7.242389050325534e-06, "loss": 2.0027, "step": 17907 }, { "epoch": 0.6, "grad_norm": 0.7365291714668274, "learning_rate": 7.241367385359603e-06, "loss": 2.0525, "step": 17908 }, { "epoch": 0.6, "grad_norm": 0.7261979579925537, "learning_rate": 7.240345751559094e-06, "loss": 2.002, "step": 17909 }, { "epoch": 0.6, "grad_norm": 0.730476438999176, "learning_rate": 7.239324148935534e-06, "loss": 2.0873, "step": 17910 }, { "epoch": 0.6, "grad_norm": 0.7355251312255859, "learning_rate": 7.2383025775004755e-06, "loss": 2.0648, "step": 17911 }, { "epoch": 0.6, "grad_norm": 0.7314817905426025, "learning_rate": 7.237281037265456e-06, "loss": 2.0702, "step": 17912 }, { "epoch": 0.6, "grad_norm": 0.758685827255249, "learning_rate": 7.2362595282420145e-06, "loss": 2.0887, "step": 17913 }, { "epoch": 0.6, "grad_norm": 0.741740345954895, "learning_rate": 7.235238050441697e-06, "loss": 2.0969, "step": 17914 }, { "epoch": 0.6, "grad_norm": 0.7191538214683533, "learning_rate": 7.2342166038760365e-06, "loss": 2.0898, "step": 17915 }, { "epoch": 0.6, "grad_norm": 0.7320390343666077, "learning_rate": 7.233195188556575e-06, "loss": 2.0545, "step": 17916 }, { "epoch": 0.6, "grad_norm": 0.7380185723304749, "learning_rate": 7.232173804494853e-06, "loss": 2.0369, "step": 17917 }, { "epoch": 0.6, "grad_norm": 0.7580181956291199, "learning_rate": 7.231152451702412e-06, "loss": 2.0429, "step": 17918 }, { "epoch": 0.6, "grad_norm": 0.7267543077468872, "learning_rate": 7.230131130190786e-06, "loss": 2.0553, "step": 17919 }, { "epoch": 0.6, "grad_norm": 0.7393495440483093, "learning_rate": 7.229109839971515e-06, "loss": 2.099, "step": 17920 }, { "epoch": 0.6, "grad_norm": 0.7081518173217773, "learning_rate": 7.228088581056138e-06, "loss": 2.0878, "step": 17921 }, { "epoch": 0.6, "grad_norm": 0.7415730953216553, "learning_rate": 7.227067353456189e-06, "loss": 2.0445, "step": 17922 }, { "epoch": 0.6, "grad_norm": 0.753551721572876, "learning_rate": 7.226046157183215e-06, "loss": 2.1551, "step": 17923 }, { "epoch": 0.6, "grad_norm": 0.7541089653968811, "learning_rate": 7.225024992248738e-06, "loss": 2.1355, "step": 17924 }, { "epoch": 0.6, "grad_norm": 0.7250111103057861, "learning_rate": 7.224003858664306e-06, "loss": 2.0474, "step": 17925 }, { "epoch": 0.6, "grad_norm": 0.742510974407196, "learning_rate": 7.22298275644145e-06, "loss": 2.1101, "step": 17926 }, { "epoch": 0.6, "grad_norm": 0.7271057963371277, "learning_rate": 7.22196168559171e-06, "loss": 2.0062, "step": 17927 }, { "epoch": 0.6, "grad_norm": 0.7311848402023315, "learning_rate": 7.220940646126617e-06, "loss": 2.0787, "step": 17928 }, { "epoch": 0.6, "grad_norm": 0.7355474829673767, "learning_rate": 7.219919638057709e-06, "loss": 2.103, "step": 17929 }, { "epoch": 0.6, "grad_norm": 0.7226027846336365, "learning_rate": 7.218898661396516e-06, "loss": 2.0114, "step": 17930 }, { "epoch": 0.6, "grad_norm": 0.7263904213905334, "learning_rate": 7.217877716154578e-06, "loss": 2.0872, "step": 17931 }, { "epoch": 0.6, "grad_norm": 0.7259615063667297, "learning_rate": 7.2168568023434305e-06, "loss": 2.0047, "step": 17932 }, { "epoch": 0.6, "grad_norm": 0.7554442882537842, "learning_rate": 7.215835919974601e-06, "loss": 2.0629, "step": 17933 }, { "epoch": 0.6, "grad_norm": 0.7583115696907043, "learning_rate": 7.214815069059624e-06, "loss": 2.1538, "step": 17934 }, { "epoch": 0.6, "grad_norm": 0.7534477710723877, "learning_rate": 7.213794249610036e-06, "loss": 2.0956, "step": 17935 }, { "epoch": 0.6, "grad_norm": 0.7588562965393066, "learning_rate": 7.212773461637369e-06, "loss": 2.066, "step": 17936 }, { "epoch": 0.6, "grad_norm": 0.7516148686408997, "learning_rate": 7.211752705153152e-06, "loss": 2.0868, "step": 17937 }, { "epoch": 0.6, "grad_norm": 0.7448422908782959, "learning_rate": 7.210731980168917e-06, "loss": 2.031, "step": 17938 }, { "epoch": 0.6, "grad_norm": 0.7475968599319458, "learning_rate": 7.2097112866961995e-06, "loss": 2.021, "step": 17939 }, { "epoch": 0.6, "grad_norm": 0.7238162755966187, "learning_rate": 7.208690624746531e-06, "loss": 2.0914, "step": 17940 }, { "epoch": 0.6, "grad_norm": 0.7308852076530457, "learning_rate": 7.207669994331436e-06, "loss": 2.0549, "step": 17941 }, { "epoch": 0.6, "grad_norm": 0.7234670519828796, "learning_rate": 7.2066493954624515e-06, "loss": 2.0747, "step": 17942 }, { "epoch": 0.6, "grad_norm": 0.7665688395500183, "learning_rate": 7.205628828151105e-06, "loss": 2.0923, "step": 17943 }, { "epoch": 0.6, "grad_norm": 0.7416234612464905, "learning_rate": 7.2046082924089225e-06, "loss": 2.104, "step": 17944 }, { "epoch": 0.6, "grad_norm": 0.7242316603660583, "learning_rate": 7.203587788247444e-06, "loss": 2.0429, "step": 17945 }, { "epoch": 0.6, "grad_norm": 0.7783018946647644, "learning_rate": 7.202567315678186e-06, "loss": 2.0479, "step": 17946 }, { "epoch": 0.6, "grad_norm": 0.7264226078987122, "learning_rate": 7.201546874712685e-06, "loss": 2.0087, "step": 17947 }, { "epoch": 0.6, "grad_norm": 0.7330355644226074, "learning_rate": 7.200526465362467e-06, "loss": 2.0535, "step": 17948 }, { "epoch": 0.6, "grad_norm": 0.7609871625900269, "learning_rate": 7.1995060876390635e-06, "loss": 2.1133, "step": 17949 }, { "epoch": 0.6, "grad_norm": 0.7337692975997925, "learning_rate": 7.198485741553996e-06, "loss": 2.034, "step": 17950 }, { "epoch": 0.6, "grad_norm": 0.7522493004798889, "learning_rate": 7.197465427118795e-06, "loss": 2.0977, "step": 17951 }, { "epoch": 0.6, "grad_norm": 0.7586632966995239, "learning_rate": 7.1964451443449835e-06, "loss": 2.0329, "step": 17952 }, { "epoch": 0.6, "grad_norm": 0.7458822727203369, "learning_rate": 7.195424893244096e-06, "loss": 2.0068, "step": 17953 }, { "epoch": 0.6, "grad_norm": 0.7102757692337036, "learning_rate": 7.194404673827655e-06, "loss": 2.0581, "step": 17954 }, { "epoch": 0.6, "grad_norm": 0.770132839679718, "learning_rate": 7.193384486107185e-06, "loss": 2.0479, "step": 17955 }, { "epoch": 0.6, "grad_norm": 0.7801522016525269, "learning_rate": 7.192364330094211e-06, "loss": 2.0888, "step": 17956 }, { "epoch": 0.6, "grad_norm": 0.717063307762146, "learning_rate": 7.191344205800258e-06, "loss": 2.0847, "step": 17957 }, { "epoch": 0.6, "grad_norm": 0.7659975290298462, "learning_rate": 7.1903241132368585e-06, "loss": 2.0721, "step": 17958 }, { "epoch": 0.6, "grad_norm": 0.7504226565361023, "learning_rate": 7.189304052415527e-06, "loss": 2.1044, "step": 17959 }, { "epoch": 0.6, "grad_norm": 0.7535562515258789, "learning_rate": 7.188284023347787e-06, "loss": 2.033, "step": 17960 }, { "epoch": 0.6, "grad_norm": 0.8167473673820496, "learning_rate": 7.18726402604517e-06, "loss": 2.0448, "step": 17961 }, { "epoch": 0.6, "grad_norm": 0.7546722292900085, "learning_rate": 7.186244060519194e-06, "loss": 2.015, "step": 17962 }, { "epoch": 0.6, "grad_norm": 0.7364355325698853, "learning_rate": 7.185224126781387e-06, "loss": 2.133, "step": 17963 }, { "epoch": 0.6, "grad_norm": 0.7394760251045227, "learning_rate": 7.184204224843266e-06, "loss": 2.1231, "step": 17964 }, { "epoch": 0.6, "grad_norm": 0.7557095885276794, "learning_rate": 7.183184354716353e-06, "loss": 2.0895, "step": 17965 }, { "epoch": 0.6, "grad_norm": 0.7203226685523987, "learning_rate": 7.182164516412176e-06, "loss": 1.9864, "step": 17966 }, { "epoch": 0.6, "grad_norm": 0.7318986058235168, "learning_rate": 7.181144709942255e-06, "loss": 2.0836, "step": 17967 }, { "epoch": 0.6, "grad_norm": 0.7299092411994934, "learning_rate": 7.180124935318104e-06, "loss": 2.0121, "step": 17968 }, { "epoch": 0.6, "grad_norm": 0.71380215883255, "learning_rate": 7.17910519255125e-06, "loss": 2.0794, "step": 17969 }, { "epoch": 0.6, "grad_norm": 0.7712464332580566, "learning_rate": 7.178085481653212e-06, "loss": 2.0264, "step": 17970 }, { "epoch": 0.6, "grad_norm": 0.7579473257064819, "learning_rate": 7.177065802635514e-06, "loss": 2.1623, "step": 17971 }, { "epoch": 0.6, "grad_norm": 0.7407003045082092, "learning_rate": 7.176046155509669e-06, "loss": 2.1105, "step": 17972 }, { "epoch": 0.6, "grad_norm": 0.759030282497406, "learning_rate": 7.175026540287198e-06, "loss": 2.0729, "step": 17973 }, { "epoch": 0.6, "grad_norm": 0.7267532348632812, "learning_rate": 7.174006956979624e-06, "loss": 2.0225, "step": 17974 }, { "epoch": 0.6, "grad_norm": 0.7460594177246094, "learning_rate": 7.1729874055984615e-06, "loss": 2.0585, "step": 17975 }, { "epoch": 0.6, "grad_norm": 0.7238723635673523, "learning_rate": 7.171967886155235e-06, "loss": 2.0518, "step": 17976 }, { "epoch": 0.6, "grad_norm": 0.7584162950515747, "learning_rate": 7.170948398661455e-06, "loss": 2.1248, "step": 17977 }, { "epoch": 0.6, "grad_norm": 0.7261293530464172, "learning_rate": 7.169928943128641e-06, "loss": 2.072, "step": 17978 }, { "epoch": 0.6, "grad_norm": 0.733440101146698, "learning_rate": 7.16890951956831e-06, "loss": 1.9702, "step": 17979 }, { "epoch": 0.6, "grad_norm": 0.7274598479270935, "learning_rate": 7.167890127991986e-06, "loss": 2.0399, "step": 17980 }, { "epoch": 0.6, "grad_norm": 0.7679572105407715, "learning_rate": 7.1668707684111724e-06, "loss": 2.0206, "step": 17981 }, { "epoch": 0.6, "grad_norm": 0.7306627035140991, "learning_rate": 7.165851440837396e-06, "loss": 2.0372, "step": 17982 }, { "epoch": 0.6, "grad_norm": 0.7349066734313965, "learning_rate": 7.1648321452821675e-06, "loss": 2.0691, "step": 17983 }, { "epoch": 0.6, "grad_norm": 0.7820791006088257, "learning_rate": 7.1638128817570064e-06, "loss": 2.0598, "step": 17984 }, { "epoch": 0.6, "grad_norm": 0.7475489377975464, "learning_rate": 7.1627936502734255e-06, "loss": 2.1282, "step": 17985 }, { "epoch": 0.6, "grad_norm": 0.737223744392395, "learning_rate": 7.161774450842938e-06, "loss": 2.0838, "step": 17986 }, { "epoch": 0.6, "grad_norm": 0.7610255479812622, "learning_rate": 7.160755283477056e-06, "loss": 2.0312, "step": 17987 }, { "epoch": 0.6, "grad_norm": 0.7358512878417969, "learning_rate": 7.1597361481873e-06, "loss": 2.0384, "step": 17988 }, { "epoch": 0.6, "grad_norm": 0.7503926157951355, "learning_rate": 7.158717044985182e-06, "loss": 2.0614, "step": 17989 }, { "epoch": 0.6, "grad_norm": 0.7606817483901978, "learning_rate": 7.157697973882211e-06, "loss": 2.0713, "step": 17990 }, { "epoch": 0.6, "grad_norm": 0.7520619034767151, "learning_rate": 7.1566789348899035e-06, "loss": 2.0496, "step": 17991 }, { "epoch": 0.6, "grad_norm": 0.7657539248466492, "learning_rate": 7.155659928019772e-06, "loss": 2.0818, "step": 17992 }, { "epoch": 0.6, "grad_norm": 0.7440844178199768, "learning_rate": 7.154640953283323e-06, "loss": 2.012, "step": 17993 }, { "epoch": 0.6, "grad_norm": 0.7560644745826721, "learning_rate": 7.1536220106920806e-06, "loss": 2.0935, "step": 17994 }, { "epoch": 0.6, "grad_norm": 0.7839361429214478, "learning_rate": 7.152603100257543e-06, "loss": 2.1166, "step": 17995 }, { "epoch": 0.6, "grad_norm": 0.7554149627685547, "learning_rate": 7.151584221991228e-06, "loss": 2.1226, "step": 17996 }, { "epoch": 0.6, "grad_norm": 0.7348247766494751, "learning_rate": 7.150565375904646e-06, "loss": 2.1048, "step": 17997 }, { "epoch": 0.6, "grad_norm": 0.7266359925270081, "learning_rate": 7.149546562009307e-06, "loss": 2.0478, "step": 17998 }, { "epoch": 0.6, "grad_norm": 0.7353495955467224, "learning_rate": 7.14852778031672e-06, "loss": 2.0227, "step": 17999 }, { "epoch": 0.6, "grad_norm": 0.7610156536102295, "learning_rate": 7.147509030838393e-06, "loss": 2.0514, "step": 18000 }, { "epoch": 0.6, "grad_norm": 0.7567755579948425, "learning_rate": 7.146490313585837e-06, "loss": 1.9938, "step": 18001 }, { "epoch": 0.6, "grad_norm": 0.7315750122070312, "learning_rate": 7.145471628570567e-06, "loss": 2.0548, "step": 18002 }, { "epoch": 0.6, "grad_norm": 0.7647253274917603, "learning_rate": 7.144452975804079e-06, "loss": 2.1238, "step": 18003 }, { "epoch": 0.6, "grad_norm": 0.7441285252571106, "learning_rate": 7.14343435529789e-06, "loss": 2.0525, "step": 18004 }, { "epoch": 0.6, "grad_norm": 0.7529734373092651, "learning_rate": 7.142415767063506e-06, "loss": 2.1253, "step": 18005 }, { "epoch": 0.6, "grad_norm": 0.7385460138320923, "learning_rate": 7.141397211112433e-06, "loss": 2.0226, "step": 18006 }, { "epoch": 0.6, "grad_norm": 0.7354551553726196, "learning_rate": 7.140378687456182e-06, "loss": 2.0724, "step": 18007 }, { "epoch": 0.6, "grad_norm": 0.7536328434944153, "learning_rate": 7.139360196106254e-06, "loss": 2.0308, "step": 18008 }, { "epoch": 0.6, "grad_norm": 0.7330408692359924, "learning_rate": 7.138341737074157e-06, "loss": 2.0781, "step": 18009 }, { "epoch": 0.6, "grad_norm": 0.7354621291160583, "learning_rate": 7.137323310371398e-06, "loss": 2.0599, "step": 18010 }, { "epoch": 0.6, "grad_norm": 0.7453399896621704, "learning_rate": 7.136304916009487e-06, "loss": 2.1046, "step": 18011 }, { "epoch": 0.6, "grad_norm": 0.7583020329475403, "learning_rate": 7.135286553999921e-06, "loss": 2.0924, "step": 18012 }, { "epoch": 0.6, "grad_norm": 0.7479957342147827, "learning_rate": 7.134268224354208e-06, "loss": 2.0334, "step": 18013 }, { "epoch": 0.6, "grad_norm": 0.7251604199409485, "learning_rate": 7.1332499270838555e-06, "loss": 2.0588, "step": 18014 }, { "epoch": 0.6, "grad_norm": 0.7254505753517151, "learning_rate": 7.132231662200361e-06, "loss": 2.0312, "step": 18015 }, { "epoch": 0.6, "grad_norm": 0.7298362851142883, "learning_rate": 7.1312134297152405e-06, "loss": 2.0419, "step": 18016 }, { "epoch": 0.6, "grad_norm": 0.7568021416664124, "learning_rate": 7.130195229639982e-06, "loss": 2.0727, "step": 18017 }, { "epoch": 0.6, "grad_norm": 0.7288749814033508, "learning_rate": 7.129177061986099e-06, "loss": 2.0768, "step": 18018 }, { "epoch": 0.6, "grad_norm": 0.7445328235626221, "learning_rate": 7.128158926765089e-06, "loss": 2.1317, "step": 18019 }, { "epoch": 0.6, "grad_norm": 0.7336205840110779, "learning_rate": 7.127140823988461e-06, "loss": 2.0374, "step": 18020 }, { "epoch": 0.6, "grad_norm": 0.7346328496932983, "learning_rate": 7.126122753667709e-06, "loss": 2.0645, "step": 18021 }, { "epoch": 0.6, "grad_norm": 0.7292501926422119, "learning_rate": 7.1251047158143375e-06, "loss": 2.0145, "step": 18022 }, { "epoch": 0.6, "grad_norm": 0.7757049798965454, "learning_rate": 7.124086710439846e-06, "loss": 1.9689, "step": 18023 }, { "epoch": 0.6, "grad_norm": 0.7141915559768677, "learning_rate": 7.123068737555741e-06, "loss": 2.0621, "step": 18024 }, { "epoch": 0.6, "grad_norm": 0.7428039908409119, "learning_rate": 7.12205079717352e-06, "loss": 2.05, "step": 18025 }, { "epoch": 0.6, "grad_norm": 0.7673541903495789, "learning_rate": 7.1210328893046806e-06, "loss": 2.085, "step": 18026 }, { "epoch": 0.6, "grad_norm": 0.7300795316696167, "learning_rate": 7.120015013960724e-06, "loss": 2.0322, "step": 18027 }, { "epoch": 0.6, "grad_norm": 0.7526683807373047, "learning_rate": 7.11899717115315e-06, "loss": 2.0871, "step": 18028 }, { "epoch": 0.6, "grad_norm": 0.7445402145385742, "learning_rate": 7.117979360893461e-06, "loss": 2.0928, "step": 18029 }, { "epoch": 0.6, "grad_norm": 0.7442363500595093, "learning_rate": 7.11696158319315e-06, "loss": 2.0099, "step": 18030 }, { "epoch": 0.6, "grad_norm": 0.7453740239143372, "learning_rate": 7.115943838063714e-06, "loss": 2.0634, "step": 18031 }, { "epoch": 0.6, "grad_norm": 0.7569308280944824, "learning_rate": 7.114926125516657e-06, "loss": 2.0504, "step": 18032 }, { "epoch": 0.6, "grad_norm": 0.7178748250007629, "learning_rate": 7.113908445563476e-06, "loss": 2.0273, "step": 18033 }, { "epoch": 0.6, "grad_norm": 0.756657063961029, "learning_rate": 7.112890798215662e-06, "loss": 2.0211, "step": 18034 }, { "epoch": 0.6, "grad_norm": 0.7130996584892273, "learning_rate": 7.111873183484719e-06, "loss": 2.0833, "step": 18035 }, { "epoch": 0.6, "grad_norm": 0.7441720366477966, "learning_rate": 7.110855601382138e-06, "loss": 2.1397, "step": 18036 }, { "epoch": 0.6, "grad_norm": 0.7583101391792297, "learning_rate": 7.1098380519194155e-06, "loss": 1.9749, "step": 18037 }, { "epoch": 0.6, "grad_norm": 0.7455106377601624, "learning_rate": 7.108820535108055e-06, "loss": 2.0437, "step": 18038 }, { "epoch": 0.6, "grad_norm": 0.7399457097053528, "learning_rate": 7.10780305095954e-06, "loss": 2.0697, "step": 18039 }, { "epoch": 0.6, "grad_norm": 0.718306839466095, "learning_rate": 7.106785599485373e-06, "loss": 2.0669, "step": 18040 }, { "epoch": 0.6, "grad_norm": 0.7415761947631836, "learning_rate": 7.1057681806970454e-06, "loss": 2.0279, "step": 18041 }, { "epoch": 0.6, "grad_norm": 0.7630634903907776, "learning_rate": 7.1047507946060565e-06, "loss": 2.0171, "step": 18042 }, { "epoch": 0.6, "grad_norm": 0.7491014003753662, "learning_rate": 7.1037334412238945e-06, "loss": 2.0968, "step": 18043 }, { "epoch": 0.6, "grad_norm": 0.7615128755569458, "learning_rate": 7.1027161205620544e-06, "loss": 2.0475, "step": 18044 }, { "epoch": 0.6, "grad_norm": 0.7651875615119934, "learning_rate": 7.1016988326320265e-06, "loss": 2.1358, "step": 18045 }, { "epoch": 0.6, "grad_norm": 0.7522626519203186, "learning_rate": 7.10068157744531e-06, "loss": 2.058, "step": 18046 }, { "epoch": 0.6, "grad_norm": 0.8102654218673706, "learning_rate": 7.099664355013394e-06, "loss": 1.9938, "step": 18047 }, { "epoch": 0.6, "grad_norm": 0.7514823079109192, "learning_rate": 7.09864716534777e-06, "loss": 2.0167, "step": 18048 }, { "epoch": 0.6, "grad_norm": 0.7498155832290649, "learning_rate": 7.0976300084599305e-06, "loss": 2.0517, "step": 18049 }, { "epoch": 0.6, "grad_norm": 0.7520943880081177, "learning_rate": 7.0966128843613626e-06, "loss": 1.9845, "step": 18050 }, { "epoch": 0.6, "grad_norm": 0.7664876580238342, "learning_rate": 7.095595793063568e-06, "loss": 2.0978, "step": 18051 }, { "epoch": 0.6, "grad_norm": 0.7374511361122131, "learning_rate": 7.094578734578024e-06, "loss": 2.0769, "step": 18052 }, { "epoch": 0.6, "grad_norm": 0.7230625748634338, "learning_rate": 7.093561708916226e-06, "loss": 2.0929, "step": 18053 }, { "epoch": 0.6, "grad_norm": 0.7672091126441956, "learning_rate": 7.092544716089666e-06, "loss": 2.1082, "step": 18054 }, { "epoch": 0.6, "grad_norm": 0.7308668494224548, "learning_rate": 7.091527756109835e-06, "loss": 2.0395, "step": 18055 }, { "epoch": 0.6, "grad_norm": 0.7366390824317932, "learning_rate": 7.090510828988217e-06, "loss": 2.0047, "step": 18056 }, { "epoch": 0.6, "grad_norm": 0.7361094355583191, "learning_rate": 7.089493934736303e-06, "loss": 2.0521, "step": 18057 }, { "epoch": 0.6, "grad_norm": 0.73790442943573, "learning_rate": 7.088477073365576e-06, "loss": 2.0748, "step": 18058 }, { "epoch": 0.6, "grad_norm": 0.7527731657028198, "learning_rate": 7.087460244887532e-06, "loss": 2.0797, "step": 18059 }, { "epoch": 0.6, "grad_norm": 0.7341246604919434, "learning_rate": 7.0864434493136605e-06, "loss": 2.0252, "step": 18060 }, { "epoch": 0.6, "grad_norm": 0.7160708904266357, "learning_rate": 7.085426686655437e-06, "loss": 2.0839, "step": 18061 }, { "epoch": 0.6, "grad_norm": 0.7648302912712097, "learning_rate": 7.084409956924355e-06, "loss": 2.0859, "step": 18062 }, { "epoch": 0.6, "grad_norm": 0.7517544031143188, "learning_rate": 7.083393260131902e-06, "loss": 2.0298, "step": 18063 }, { "epoch": 0.6, "grad_norm": 0.7727470397949219, "learning_rate": 7.082376596289564e-06, "loss": 2.099, "step": 18064 }, { "epoch": 0.6, "grad_norm": 0.7306755185127258, "learning_rate": 7.081359965408825e-06, "loss": 2.0287, "step": 18065 }, { "epoch": 0.6, "grad_norm": 0.7938405871391296, "learning_rate": 7.080343367501166e-06, "loss": 2.0735, "step": 18066 }, { "epoch": 0.6, "grad_norm": 0.7469201683998108, "learning_rate": 7.07932680257808e-06, "loss": 2.0678, "step": 18067 }, { "epoch": 0.6, "grad_norm": 0.7460929751396179, "learning_rate": 7.078310270651048e-06, "loss": 2.0706, "step": 18068 }, { "epoch": 0.6, "grad_norm": 0.7478275299072266, "learning_rate": 7.077293771731557e-06, "loss": 2.0209, "step": 18069 }, { "epoch": 0.6, "grad_norm": 0.7411018013954163, "learning_rate": 7.0762773058310855e-06, "loss": 1.9688, "step": 18070 }, { "epoch": 0.6, "grad_norm": 0.7111914753913879, "learning_rate": 7.075260872961118e-06, "loss": 2.0359, "step": 18071 }, { "epoch": 0.6, "grad_norm": 0.7612175345420837, "learning_rate": 7.07424447313314e-06, "loss": 2.1008, "step": 18072 }, { "epoch": 0.6, "grad_norm": 0.7553202509880066, "learning_rate": 7.0732281063586375e-06, "loss": 2.0742, "step": 18073 }, { "epoch": 0.6, "grad_norm": 0.7372733950614929, "learning_rate": 7.072211772649083e-06, "loss": 2.0548, "step": 18074 }, { "epoch": 0.6, "grad_norm": 0.7415850162506104, "learning_rate": 7.0711954720159656e-06, "loss": 2.0382, "step": 18075 }, { "epoch": 0.6, "grad_norm": 0.7618895173072815, "learning_rate": 7.0701792044707665e-06, "loss": 2.0445, "step": 18076 }, { "epoch": 0.6, "grad_norm": 0.7362727522850037, "learning_rate": 7.069162970024963e-06, "loss": 2.0706, "step": 18077 }, { "epoch": 0.6, "grad_norm": 0.731784462928772, "learning_rate": 7.0681467686900426e-06, "loss": 2.0291, "step": 18078 }, { "epoch": 0.6, "grad_norm": 0.7363026142120361, "learning_rate": 7.067130600477481e-06, "loss": 2.0285, "step": 18079 }, { "epoch": 0.6, "grad_norm": 0.734635591506958, "learning_rate": 7.0661144653987546e-06, "loss": 1.9913, "step": 18080 }, { "epoch": 0.6, "grad_norm": 0.7456371188163757, "learning_rate": 7.06509836346535e-06, "loss": 2.0694, "step": 18081 }, { "epoch": 0.6, "grad_norm": 0.7284636497497559, "learning_rate": 7.064082294688748e-06, "loss": 2.0142, "step": 18082 }, { "epoch": 0.6, "grad_norm": 0.8021695613861084, "learning_rate": 7.06306625908042e-06, "loss": 2.0865, "step": 18083 }, { "epoch": 0.6, "grad_norm": 0.7355048060417175, "learning_rate": 7.062050256651849e-06, "loss": 2.0367, "step": 18084 }, { "epoch": 0.6, "grad_norm": 0.7409468293190002, "learning_rate": 7.061034287414512e-06, "loss": 2.0342, "step": 18085 }, { "epoch": 0.6, "grad_norm": 0.7389208078384399, "learning_rate": 7.0600183513798895e-06, "loss": 2.0909, "step": 18086 }, { "epoch": 0.6, "grad_norm": 0.7692319750785828, "learning_rate": 7.0590024485594535e-06, "loss": 2.0929, "step": 18087 }, { "epoch": 0.6, "grad_norm": 0.7708553671836853, "learning_rate": 7.057986578964683e-06, "loss": 2.0585, "step": 18088 }, { "epoch": 0.6, "grad_norm": 0.7319138646125793, "learning_rate": 7.056970742607058e-06, "loss": 2.1247, "step": 18089 }, { "epoch": 0.6, "grad_norm": 0.7338038682937622, "learning_rate": 7.055954939498053e-06, "loss": 2.0439, "step": 18090 }, { "epoch": 0.6, "grad_norm": 0.7281038165092468, "learning_rate": 7.054939169649144e-06, "loss": 2.002, "step": 18091 }, { "epoch": 0.6, "grad_norm": 0.7459545135498047, "learning_rate": 7.0539234330718065e-06, "loss": 1.9635, "step": 18092 }, { "epoch": 0.6, "grad_norm": 0.7280564308166504, "learning_rate": 7.052907729777515e-06, "loss": 1.9998, "step": 18093 }, { "epoch": 0.6, "grad_norm": 0.7475546002388, "learning_rate": 7.0518920597777426e-06, "loss": 2.0288, "step": 18094 }, { "epoch": 0.6, "grad_norm": 0.748587965965271, "learning_rate": 7.0508764230839726e-06, "loss": 2.102, "step": 18095 }, { "epoch": 0.6, "grad_norm": 0.7370194792747498, "learning_rate": 7.049860819707664e-06, "loss": 1.9967, "step": 18096 }, { "epoch": 0.6, "grad_norm": 0.874679446220398, "learning_rate": 7.048845249660303e-06, "loss": 2.0963, "step": 18097 }, { "epoch": 0.6, "grad_norm": 0.7541933655738831, "learning_rate": 7.047829712953358e-06, "loss": 2.0522, "step": 18098 }, { "epoch": 0.6, "grad_norm": 0.7603085041046143, "learning_rate": 7.046814209598304e-06, "loss": 2.0865, "step": 18099 }, { "epoch": 0.6, "grad_norm": 0.7144136428833008, "learning_rate": 7.045798739606613e-06, "loss": 2.0835, "step": 18100 }, { "epoch": 0.6, "grad_norm": 0.7531644701957703, "learning_rate": 7.044783302989755e-06, "loss": 1.9642, "step": 18101 }, { "epoch": 0.6, "grad_norm": 0.7483905553817749, "learning_rate": 7.0437678997592015e-06, "loss": 2.0714, "step": 18102 }, { "epoch": 0.6, "grad_norm": 0.725131630897522, "learning_rate": 7.042752529926428e-06, "loss": 2.0412, "step": 18103 }, { "epoch": 0.6, "grad_norm": 0.7616822719573975, "learning_rate": 7.041737193502904e-06, "loss": 2.0768, "step": 18104 }, { "epoch": 0.6, "grad_norm": 0.7543684840202332, "learning_rate": 7.0407218905001e-06, "loss": 2.0541, "step": 18105 }, { "epoch": 0.6, "grad_norm": 0.8712818026542664, "learning_rate": 7.039706620929483e-06, "loss": 2.0779, "step": 18106 }, { "epoch": 0.6, "grad_norm": 0.742544412612915, "learning_rate": 7.0386913848025274e-06, "loss": 2.0098, "step": 18107 }, { "epoch": 0.6, "grad_norm": 0.726898193359375, "learning_rate": 7.037676182130697e-06, "loss": 2.1157, "step": 18108 }, { "epoch": 0.6, "grad_norm": 0.7171651721000671, "learning_rate": 7.036661012925473e-06, "loss": 1.9814, "step": 18109 }, { "epoch": 0.6, "grad_norm": 0.7455134391784668, "learning_rate": 7.035645877198309e-06, "loss": 2.054, "step": 18110 }, { "epoch": 0.6, "grad_norm": 0.7424584031105042, "learning_rate": 7.034630774960685e-06, "loss": 2.045, "step": 18111 }, { "epoch": 0.6, "grad_norm": 0.7152026295661926, "learning_rate": 7.033615706224062e-06, "loss": 2.0315, "step": 18112 }, { "epoch": 0.6, "grad_norm": 0.7167842388153076, "learning_rate": 7.0326006709999126e-06, "loss": 2.0923, "step": 18113 }, { "epoch": 0.6, "grad_norm": 0.750282347202301, "learning_rate": 7.031585669299701e-06, "loss": 2.0203, "step": 18114 }, { "epoch": 0.6, "grad_norm": 0.739466667175293, "learning_rate": 7.030570701134893e-06, "loss": 2.0882, "step": 18115 }, { "epoch": 0.6, "grad_norm": 0.7285804748535156, "learning_rate": 7.029555766516956e-06, "loss": 2.0558, "step": 18116 }, { "epoch": 0.6, "grad_norm": 0.7640320658683777, "learning_rate": 7.028540865457364e-06, "loss": 2.1033, "step": 18117 }, { "epoch": 0.6, "grad_norm": 0.7902219295501709, "learning_rate": 7.027525997967569e-06, "loss": 2.0939, "step": 18118 }, { "epoch": 0.6, "grad_norm": 0.7541019320487976, "learning_rate": 7.026511164059046e-06, "loss": 2.0715, "step": 18119 }, { "epoch": 0.6, "grad_norm": 0.734528124332428, "learning_rate": 7.025496363743256e-06, "loss": 2.1014, "step": 18120 }, { "epoch": 0.6, "grad_norm": 0.7371823787689209, "learning_rate": 7.024481597031665e-06, "loss": 2.0798, "step": 18121 }, { "epoch": 0.6, "grad_norm": 0.7510143518447876, "learning_rate": 7.02346686393574e-06, "loss": 2.034, "step": 18122 }, { "epoch": 0.6, "grad_norm": 0.7397037148475647, "learning_rate": 7.022452164466939e-06, "loss": 2.0524, "step": 18123 }, { "epoch": 0.6, "grad_norm": 0.7526743412017822, "learning_rate": 7.021437498636727e-06, "loss": 2.0169, "step": 18124 }, { "epoch": 0.6, "grad_norm": 0.7480762600898743, "learning_rate": 7.020422866456571e-06, "loss": 2.065, "step": 18125 }, { "epoch": 0.6, "grad_norm": 0.749373733997345, "learning_rate": 7.019408267937933e-06, "loss": 2.0272, "step": 18126 }, { "epoch": 0.6, "grad_norm": 0.7428086400032043, "learning_rate": 7.01839370309227e-06, "loss": 2.018, "step": 18127 }, { "epoch": 0.6, "grad_norm": 0.7550248503684998, "learning_rate": 7.017379171931049e-06, "loss": 2.0974, "step": 18128 }, { "epoch": 0.6, "grad_norm": 0.724724292755127, "learning_rate": 7.01636467446573e-06, "loss": 2.0284, "step": 18129 }, { "epoch": 0.6, "grad_norm": 0.7309322953224182, "learning_rate": 7.015350210707772e-06, "loss": 2.0776, "step": 18130 }, { "epoch": 0.6, "grad_norm": 0.7560361623764038, "learning_rate": 7.014335780668644e-06, "loss": 2.1305, "step": 18131 }, { "epoch": 0.6, "grad_norm": 0.7583454251289368, "learning_rate": 7.013321384359795e-06, "loss": 2.0581, "step": 18132 }, { "epoch": 0.6, "grad_norm": 0.7379392385482788, "learning_rate": 7.012307021792692e-06, "loss": 2.0349, "step": 18133 }, { "epoch": 0.6, "grad_norm": 0.7524101734161377, "learning_rate": 7.011292692978795e-06, "loss": 2.0191, "step": 18134 }, { "epoch": 0.6, "grad_norm": 0.7881929874420166, "learning_rate": 7.010278397929562e-06, "loss": 2.0165, "step": 18135 }, { "epoch": 0.6, "grad_norm": 0.7490965127944946, "learning_rate": 7.009264136656451e-06, "loss": 2.1086, "step": 18136 }, { "epoch": 0.6, "grad_norm": 0.7501972913742065, "learning_rate": 7.008249909170919e-06, "loss": 2.0319, "step": 18137 }, { "epoch": 0.6, "grad_norm": 0.7473400831222534, "learning_rate": 7.007235715484426e-06, "loss": 2.0092, "step": 18138 }, { "epoch": 0.6, "grad_norm": 0.71453857421875, "learning_rate": 7.00622155560843e-06, "loss": 1.991, "step": 18139 }, { "epoch": 0.6, "grad_norm": 0.7535525560379028, "learning_rate": 7.005207429554392e-06, "loss": 2.0664, "step": 18140 }, { "epoch": 0.6, "grad_norm": 0.7530552744865417, "learning_rate": 7.004193337333762e-06, "loss": 2.0249, "step": 18141 }, { "epoch": 0.6, "grad_norm": 0.7480345368385315, "learning_rate": 7.003179278958001e-06, "loss": 2.1416, "step": 18142 }, { "epoch": 0.6, "grad_norm": 0.7439345121383667, "learning_rate": 7.002165254438564e-06, "loss": 2.0591, "step": 18143 }, { "epoch": 0.6, "grad_norm": 0.7347488403320312, "learning_rate": 7.00115126378691e-06, "loss": 2.0722, "step": 18144 }, { "epoch": 0.6, "grad_norm": 0.7574864029884338, "learning_rate": 7.0001373070144885e-06, "loss": 2.0725, "step": 18145 }, { "epoch": 0.6, "grad_norm": 0.7754042148590088, "learning_rate": 6.999123384132755e-06, "loss": 2.1173, "step": 18146 }, { "epoch": 0.6, "grad_norm": 0.7491180300712585, "learning_rate": 6.99810949515317e-06, "loss": 2.0697, "step": 18147 }, { "epoch": 0.6, "grad_norm": 0.7150956392288208, "learning_rate": 6.997095640087186e-06, "loss": 2.0872, "step": 18148 }, { "epoch": 0.6, "grad_norm": 0.7521947026252747, "learning_rate": 6.996081818946254e-06, "loss": 2.16, "step": 18149 }, { "epoch": 0.6, "grad_norm": 0.7278939485549927, "learning_rate": 6.99506803174183e-06, "loss": 2.0479, "step": 18150 }, { "epoch": 0.6, "grad_norm": 0.7365953922271729, "learning_rate": 6.994054278485363e-06, "loss": 2.1116, "step": 18151 }, { "epoch": 0.6, "grad_norm": 0.7362545132637024, "learning_rate": 6.993040559188311e-06, "loss": 2.0437, "step": 18152 }, { "epoch": 0.6, "grad_norm": 0.740341067314148, "learning_rate": 6.992026873862128e-06, "loss": 2.0602, "step": 18153 }, { "epoch": 0.6, "grad_norm": 0.7152746319770813, "learning_rate": 6.991013222518257e-06, "loss": 2.0581, "step": 18154 }, { "epoch": 0.6, "grad_norm": 0.7657521367073059, "learning_rate": 6.989999605168158e-06, "loss": 2.044, "step": 18155 }, { "epoch": 0.6, "grad_norm": 0.6964150667190552, "learning_rate": 6.988986021823278e-06, "loss": 1.9869, "step": 18156 }, { "epoch": 0.6, "grad_norm": 0.7674651145935059, "learning_rate": 6.987972472495073e-06, "loss": 2.0541, "step": 18157 }, { "epoch": 0.6, "grad_norm": 0.7327138781547546, "learning_rate": 6.986958957194987e-06, "loss": 2.0214, "step": 18158 }, { "epoch": 0.6, "grad_norm": 0.7678636908531189, "learning_rate": 6.9859454759344715e-06, "loss": 2.1532, "step": 18159 }, { "epoch": 0.6, "grad_norm": 0.7500786185264587, "learning_rate": 6.984932028724979e-06, "loss": 2.0584, "step": 18160 }, { "epoch": 0.6, "grad_norm": 0.7126113176345825, "learning_rate": 6.983918615577957e-06, "loss": 2.0771, "step": 18161 }, { "epoch": 0.6, "grad_norm": 0.7425627708435059, "learning_rate": 6.9829052365048575e-06, "loss": 2.0804, "step": 18162 }, { "epoch": 0.6, "grad_norm": 0.755148708820343, "learning_rate": 6.981891891517124e-06, "loss": 2.1279, "step": 18163 }, { "epoch": 0.6, "grad_norm": 0.7634586691856384, "learning_rate": 6.9808785806262065e-06, "loss": 2.0576, "step": 18164 }, { "epoch": 0.6, "grad_norm": 0.7546915411949158, "learning_rate": 6.979865303843554e-06, "loss": 2.1051, "step": 18165 }, { "epoch": 0.6, "grad_norm": 0.7502654790878296, "learning_rate": 6.978852061180617e-06, "loss": 2.0236, "step": 18166 }, { "epoch": 0.6, "grad_norm": 0.742770254611969, "learning_rate": 6.977838852648834e-06, "loss": 2.0981, "step": 18167 }, { "epoch": 0.6, "grad_norm": 0.7127379179000854, "learning_rate": 6.976825678259658e-06, "loss": 2.0388, "step": 18168 }, { "epoch": 0.6, "grad_norm": 0.7363996505737305, "learning_rate": 6.975812538024533e-06, "loss": 2.0451, "step": 18169 }, { "epoch": 0.6, "grad_norm": 0.7417378425598145, "learning_rate": 6.974799431954906e-06, "loss": 2.0398, "step": 18170 }, { "epoch": 0.6, "grad_norm": 0.7367112636566162, "learning_rate": 6.9737863600622244e-06, "loss": 1.9832, "step": 18171 }, { "epoch": 0.6, "grad_norm": 0.7370618581771851, "learning_rate": 6.97277332235793e-06, "loss": 1.9401, "step": 18172 }, { "epoch": 0.6, "grad_norm": 0.789521336555481, "learning_rate": 6.971760318853465e-06, "loss": 2.0558, "step": 18173 }, { "epoch": 0.6, "grad_norm": 0.7447170615196228, "learning_rate": 6.97074734956028e-06, "loss": 2.0702, "step": 18174 }, { "epoch": 0.6, "grad_norm": 0.7433381080627441, "learning_rate": 6.969734414489817e-06, "loss": 2.0556, "step": 18175 }, { "epoch": 0.6, "grad_norm": 0.7597349882125854, "learning_rate": 6.968721513653518e-06, "loss": 2.0098, "step": 18176 }, { "epoch": 0.6, "grad_norm": 0.720356822013855, "learning_rate": 6.967708647062826e-06, "loss": 1.9944, "step": 18177 }, { "epoch": 0.6, "grad_norm": 0.7344619035720825, "learning_rate": 6.966695814729185e-06, "loss": 2.0315, "step": 18178 }, { "epoch": 0.6, "grad_norm": 0.7849770188331604, "learning_rate": 6.9656830166640386e-06, "loss": 2.067, "step": 18179 }, { "epoch": 0.6, "grad_norm": 0.7438925504684448, "learning_rate": 6.964670252878826e-06, "loss": 2.0682, "step": 18180 }, { "epoch": 0.6, "grad_norm": 0.7585625648498535, "learning_rate": 6.963657523384988e-06, "loss": 2.0561, "step": 18181 }, { "epoch": 0.6, "grad_norm": 0.7291232347488403, "learning_rate": 6.9626448281939685e-06, "loss": 2.0134, "step": 18182 }, { "epoch": 0.6, "grad_norm": 0.7285677790641785, "learning_rate": 6.961632167317209e-06, "loss": 2.0304, "step": 18183 }, { "epoch": 0.6, "grad_norm": 0.723584771156311, "learning_rate": 6.960619540766149e-06, "loss": 2.0925, "step": 18184 }, { "epoch": 0.61, "grad_norm": 0.7189376950263977, "learning_rate": 6.959606948552228e-06, "loss": 2.0081, "step": 18185 }, { "epoch": 0.61, "grad_norm": 0.7332265377044678, "learning_rate": 6.958594390686884e-06, "loss": 2.1064, "step": 18186 }, { "epoch": 0.61, "grad_norm": 0.7597272396087646, "learning_rate": 6.957581867181558e-06, "loss": 2.0967, "step": 18187 }, { "epoch": 0.61, "grad_norm": 0.7286507487297058, "learning_rate": 6.956569378047695e-06, "loss": 2.0868, "step": 18188 }, { "epoch": 0.61, "grad_norm": 0.7588894963264465, "learning_rate": 6.95555692329672e-06, "loss": 2.0954, "step": 18189 }, { "epoch": 0.61, "grad_norm": 0.7559142708778381, "learning_rate": 6.954544502940081e-06, "loss": 2.0429, "step": 18190 }, { "epoch": 0.61, "grad_norm": 0.7189953923225403, "learning_rate": 6.953532116989215e-06, "loss": 2.0861, "step": 18191 }, { "epoch": 0.61, "grad_norm": 0.7446472644805908, "learning_rate": 6.952519765455557e-06, "loss": 2.1248, "step": 18192 }, { "epoch": 0.61, "grad_norm": 0.7270435690879822, "learning_rate": 6.951507448350545e-06, "loss": 2.0325, "step": 18193 }, { "epoch": 0.61, "grad_norm": 0.7205339074134827, "learning_rate": 6.9504951656856155e-06, "loss": 2.0393, "step": 18194 }, { "epoch": 0.61, "grad_norm": 0.7231996059417725, "learning_rate": 6.949482917472201e-06, "loss": 2.0385, "step": 18195 }, { "epoch": 0.61, "grad_norm": 0.7476510405540466, "learning_rate": 6.948470703721744e-06, "loss": 2.0507, "step": 18196 }, { "epoch": 0.61, "grad_norm": 0.7621219754219055, "learning_rate": 6.947458524445677e-06, "loss": 2.1001, "step": 18197 }, { "epoch": 0.61, "grad_norm": 0.7413654923439026, "learning_rate": 6.946446379655434e-06, "loss": 2.0137, "step": 18198 }, { "epoch": 0.61, "grad_norm": 0.7493494153022766, "learning_rate": 6.94543426936245e-06, "loss": 2.0946, "step": 18199 }, { "epoch": 0.61, "grad_norm": 0.765190601348877, "learning_rate": 6.944422193578158e-06, "loss": 2.032, "step": 18200 }, { "epoch": 0.61, "grad_norm": 0.7761021256446838, "learning_rate": 6.943410152313992e-06, "loss": 2.0568, "step": 18201 }, { "epoch": 0.61, "grad_norm": 0.7578926086425781, "learning_rate": 6.942398145581395e-06, "loss": 1.9944, "step": 18202 }, { "epoch": 0.61, "grad_norm": 0.7496709227561951, "learning_rate": 6.941386173391783e-06, "loss": 2.1391, "step": 18203 }, { "epoch": 0.61, "grad_norm": 0.7286618947982788, "learning_rate": 6.940374235756601e-06, "loss": 2.063, "step": 18204 }, { "epoch": 0.61, "grad_norm": 0.7208502292633057, "learning_rate": 6.939362332687276e-06, "loss": 2.1064, "step": 18205 }, { "epoch": 0.61, "grad_norm": 0.7326884269714355, "learning_rate": 6.9383504641952456e-06, "loss": 2.1525, "step": 18206 }, { "epoch": 0.61, "grad_norm": 0.7668668031692505, "learning_rate": 6.937338630291934e-06, "loss": 2.1133, "step": 18207 }, { "epoch": 0.61, "grad_norm": 0.7831897139549255, "learning_rate": 6.936326830988778e-06, "loss": 2.1251, "step": 18208 }, { "epoch": 0.61, "grad_norm": 0.7393659949302673, "learning_rate": 6.9353150662972015e-06, "loss": 2.1005, "step": 18209 }, { "epoch": 0.61, "grad_norm": 0.754098117351532, "learning_rate": 6.934303336228647e-06, "loss": 2.0567, "step": 18210 }, { "epoch": 0.61, "grad_norm": 0.7291830778121948, "learning_rate": 6.933291640794529e-06, "loss": 2.0431, "step": 18211 }, { "epoch": 0.61, "grad_norm": 0.7129613757133484, "learning_rate": 6.932279980006288e-06, "loss": 2.0157, "step": 18212 }, { "epoch": 0.61, "grad_norm": 0.7537611722946167, "learning_rate": 6.931268353875349e-06, "loss": 2.0763, "step": 18213 }, { "epoch": 0.61, "grad_norm": 0.7093154788017273, "learning_rate": 6.930256762413144e-06, "loss": 2.1204, "step": 18214 }, { "epoch": 0.61, "grad_norm": 0.785552978515625, "learning_rate": 6.929245205631099e-06, "loss": 2.0705, "step": 18215 }, { "epoch": 0.61, "grad_norm": 0.7133358120918274, "learning_rate": 6.9282336835406394e-06, "loss": 2.0595, "step": 18216 }, { "epoch": 0.61, "grad_norm": 0.7240973114967346, "learning_rate": 6.9272221961531935e-06, "loss": 2.0806, "step": 18217 }, { "epoch": 0.61, "grad_norm": 0.7669894099235535, "learning_rate": 6.9262107434801935e-06, "loss": 2.0055, "step": 18218 }, { "epoch": 0.61, "grad_norm": 0.7436234354972839, "learning_rate": 6.925199325533065e-06, "loss": 2.1167, "step": 18219 }, { "epoch": 0.61, "grad_norm": 0.744316041469574, "learning_rate": 6.9241879423232305e-06, "loss": 2.0898, "step": 18220 }, { "epoch": 0.61, "grad_norm": 0.7304786443710327, "learning_rate": 6.923176593862119e-06, "loss": 2.0662, "step": 18221 }, { "epoch": 0.61, "grad_norm": 0.7371495962142944, "learning_rate": 6.922165280161153e-06, "loss": 2.0637, "step": 18222 }, { "epoch": 0.61, "grad_norm": 0.7590890526771545, "learning_rate": 6.9211540012317595e-06, "loss": 2.0631, "step": 18223 }, { "epoch": 0.61, "grad_norm": 0.7795783877372742, "learning_rate": 6.920142757085368e-06, "loss": 2.104, "step": 18224 }, { "epoch": 0.61, "grad_norm": 0.718311071395874, "learning_rate": 6.919131547733396e-06, "loss": 2.0495, "step": 18225 }, { "epoch": 0.61, "grad_norm": 0.7413643002510071, "learning_rate": 6.91812037318727e-06, "loss": 2.0638, "step": 18226 }, { "epoch": 0.61, "grad_norm": 0.7659221291542053, "learning_rate": 6.917109233458412e-06, "loss": 2.078, "step": 18227 }, { "epoch": 0.61, "grad_norm": 0.7427729964256287, "learning_rate": 6.916098128558252e-06, "loss": 2.0465, "step": 18228 }, { "epoch": 0.61, "grad_norm": 0.7672173380851746, "learning_rate": 6.915087058498205e-06, "loss": 2.0816, "step": 18229 }, { "epoch": 0.61, "grad_norm": 0.7420541644096375, "learning_rate": 6.914076023289697e-06, "loss": 2.1033, "step": 18230 }, { "epoch": 0.61, "grad_norm": 0.7388933897018433, "learning_rate": 6.913065022944146e-06, "loss": 2.0463, "step": 18231 }, { "epoch": 0.61, "grad_norm": 0.7369776964187622, "learning_rate": 6.912054057472984e-06, "loss": 2.0026, "step": 18232 }, { "epoch": 0.61, "grad_norm": 0.7766572833061218, "learning_rate": 6.91104312688762e-06, "loss": 2.0494, "step": 18233 }, { "epoch": 0.61, "grad_norm": 0.7203308343887329, "learning_rate": 6.910032231199483e-06, "loss": 2.08, "step": 18234 }, { "epoch": 0.61, "grad_norm": 0.7288216352462769, "learning_rate": 6.909021370419991e-06, "loss": 2.0403, "step": 18235 }, { "epoch": 0.61, "grad_norm": 0.7403327226638794, "learning_rate": 6.908010544560562e-06, "loss": 2.0082, "step": 18236 }, { "epoch": 0.61, "grad_norm": 0.7094191312789917, "learning_rate": 6.906999753632621e-06, "loss": 2.0973, "step": 18237 }, { "epoch": 0.61, "grad_norm": 0.7305642366409302, "learning_rate": 6.905988997647582e-06, "loss": 2.0955, "step": 18238 }, { "epoch": 0.61, "grad_norm": 0.7384151220321655, "learning_rate": 6.904978276616865e-06, "loss": 2.0293, "step": 18239 }, { "epoch": 0.61, "grad_norm": 0.8124091029167175, "learning_rate": 6.90396759055189e-06, "loss": 2.0888, "step": 18240 }, { "epoch": 0.61, "grad_norm": 0.7468953132629395, "learning_rate": 6.902956939464078e-06, "loss": 2.0936, "step": 18241 }, { "epoch": 0.61, "grad_norm": 0.740871250629425, "learning_rate": 6.901946323364841e-06, "loss": 2.1664, "step": 18242 }, { "epoch": 0.61, "grad_norm": 0.7360413670539856, "learning_rate": 6.900935742265599e-06, "loss": 2.0426, "step": 18243 }, { "epoch": 0.61, "grad_norm": 0.7525025010108948, "learning_rate": 6.899925196177768e-06, "loss": 2.0686, "step": 18244 }, { "epoch": 0.61, "grad_norm": 0.7337868809700012, "learning_rate": 6.898914685112763e-06, "loss": 2.0599, "step": 18245 }, { "epoch": 0.61, "grad_norm": 0.7474092841148376, "learning_rate": 6.8979042090820106e-06, "loss": 2.0192, "step": 18246 }, { "epoch": 0.61, "grad_norm": 0.7477457523345947, "learning_rate": 6.896893768096911e-06, "loss": 2.0532, "step": 18247 }, { "epoch": 0.61, "grad_norm": 0.7334063053131104, "learning_rate": 6.895883362168889e-06, "loss": 2.0913, "step": 18248 }, { "epoch": 0.61, "grad_norm": 0.7706827521324158, "learning_rate": 6.894872991309358e-06, "loss": 2.0169, "step": 18249 }, { "epoch": 0.61, "grad_norm": 0.75234454870224, "learning_rate": 6.893862655529733e-06, "loss": 2.0462, "step": 18250 }, { "epoch": 0.61, "grad_norm": 0.776328980922699, "learning_rate": 6.892852354841426e-06, "loss": 2.0801, "step": 18251 }, { "epoch": 0.61, "grad_norm": 0.739913821220398, "learning_rate": 6.891842089255853e-06, "loss": 1.9989, "step": 18252 }, { "epoch": 0.61, "grad_norm": 0.73139888048172, "learning_rate": 6.890831858784424e-06, "loss": 2.042, "step": 18253 }, { "epoch": 0.61, "grad_norm": 0.7494426369667053, "learning_rate": 6.889821663438556e-06, "loss": 2.111, "step": 18254 }, { "epoch": 0.61, "grad_norm": 0.7208462953567505, "learning_rate": 6.888811503229662e-06, "loss": 2.0243, "step": 18255 }, { "epoch": 0.61, "grad_norm": 0.7479475140571594, "learning_rate": 6.8878013781691525e-06, "loss": 2.1137, "step": 18256 }, { "epoch": 0.61, "grad_norm": 0.732677161693573, "learning_rate": 6.886791288268436e-06, "loss": 2.0721, "step": 18257 }, { "epoch": 0.61, "grad_norm": 0.7236099243164062, "learning_rate": 6.885781233538927e-06, "loss": 2.0554, "step": 18258 }, { "epoch": 0.61, "grad_norm": 0.7083085179328918, "learning_rate": 6.884771213992042e-06, "loss": 2.0869, "step": 18259 }, { "epoch": 0.61, "grad_norm": 0.7463151812553406, "learning_rate": 6.8837612296391795e-06, "loss": 2.0546, "step": 18260 }, { "epoch": 0.61, "grad_norm": 0.7234214544296265, "learning_rate": 6.882751280491759e-06, "loss": 1.9444, "step": 18261 }, { "epoch": 0.61, "grad_norm": 0.7817516326904297, "learning_rate": 6.881741366561187e-06, "loss": 2.0606, "step": 18262 }, { "epoch": 0.61, "grad_norm": 0.7475141882896423, "learning_rate": 6.880731487858876e-06, "loss": 2.0575, "step": 18263 }, { "epoch": 0.61, "grad_norm": 0.7212839126586914, "learning_rate": 6.8797216443962316e-06, "loss": 2.096, "step": 18264 }, { "epoch": 0.61, "grad_norm": 0.7221172451972961, "learning_rate": 6.878711836184662e-06, "loss": 2.0835, "step": 18265 }, { "epoch": 0.61, "grad_norm": 0.7483072280883789, "learning_rate": 6.877702063235577e-06, "loss": 2.0605, "step": 18266 }, { "epoch": 0.61, "grad_norm": 0.7404163479804993, "learning_rate": 6.876692325560383e-06, "loss": 2.1038, "step": 18267 }, { "epoch": 0.61, "grad_norm": 0.7535737752914429, "learning_rate": 6.875682623170492e-06, "loss": 2.1002, "step": 18268 }, { "epoch": 0.61, "grad_norm": 0.7278376221656799, "learning_rate": 6.874672956077306e-06, "loss": 2.1425, "step": 18269 }, { "epoch": 0.61, "grad_norm": 0.7086727023124695, "learning_rate": 6.873663324292233e-06, "loss": 2.016, "step": 18270 }, { "epoch": 0.61, "grad_norm": 0.7278428077697754, "learning_rate": 6.872653727826679e-06, "loss": 2.0242, "step": 18271 }, { "epoch": 0.61, "grad_norm": 0.7262416481971741, "learning_rate": 6.871644166692054e-06, "loss": 2.074, "step": 18272 }, { "epoch": 0.61, "grad_norm": 0.7270860075950623, "learning_rate": 6.870634640899757e-06, "loss": 2.0176, "step": 18273 }, { "epoch": 0.61, "grad_norm": 0.7612005472183228, "learning_rate": 6.869625150461194e-06, "loss": 2.1016, "step": 18274 }, { "epoch": 0.61, "grad_norm": 0.740704357624054, "learning_rate": 6.868615695387772e-06, "loss": 2.008, "step": 18275 }, { "epoch": 0.61, "grad_norm": 0.7130936980247498, "learning_rate": 6.867606275690897e-06, "loss": 2.0525, "step": 18276 }, { "epoch": 0.61, "grad_norm": 0.7626101970672607, "learning_rate": 6.8665968913819715e-06, "loss": 2.0708, "step": 18277 }, { "epoch": 0.61, "grad_norm": 0.7217760682106018, "learning_rate": 6.865587542472396e-06, "loss": 2.0619, "step": 18278 }, { "epoch": 0.61, "grad_norm": 0.7392137050628662, "learning_rate": 6.864578228973575e-06, "loss": 2.0962, "step": 18279 }, { "epoch": 0.61, "grad_norm": 0.7717177867889404, "learning_rate": 6.8635689508969105e-06, "loss": 2.0839, "step": 18280 }, { "epoch": 0.61, "grad_norm": 0.7491602897644043, "learning_rate": 6.862559708253811e-06, "loss": 2.0911, "step": 18281 }, { "epoch": 0.61, "grad_norm": 0.7501385807991028, "learning_rate": 6.861550501055667e-06, "loss": 2.1131, "step": 18282 }, { "epoch": 0.61, "grad_norm": 0.7387890815734863, "learning_rate": 6.86054132931389e-06, "loss": 1.9938, "step": 18283 }, { "epoch": 0.61, "grad_norm": 0.7624941468238831, "learning_rate": 6.859532193039875e-06, "loss": 2.0668, "step": 18284 }, { "epoch": 0.61, "grad_norm": 0.7394777536392212, "learning_rate": 6.858523092245026e-06, "loss": 2.0959, "step": 18285 }, { "epoch": 0.61, "grad_norm": 0.7738788723945618, "learning_rate": 6.8575140269407434e-06, "loss": 2.0869, "step": 18286 }, { "epoch": 0.61, "grad_norm": 0.7552123665809631, "learning_rate": 6.856504997138424e-06, "loss": 2.1117, "step": 18287 }, { "epoch": 0.61, "grad_norm": 0.7498782277107239, "learning_rate": 6.855496002849467e-06, "loss": 2.0699, "step": 18288 }, { "epoch": 0.61, "grad_norm": 0.7292712330818176, "learning_rate": 6.854487044085275e-06, "loss": 1.9774, "step": 18289 }, { "epoch": 0.61, "grad_norm": 0.7111600041389465, "learning_rate": 6.853478120857247e-06, "loss": 2.0973, "step": 18290 }, { "epoch": 0.61, "grad_norm": 0.7142061591148376, "learning_rate": 6.852469233176777e-06, "loss": 2.0028, "step": 18291 }, { "epoch": 0.61, "grad_norm": 0.7392668724060059, "learning_rate": 6.851460381055266e-06, "loss": 2.1294, "step": 18292 }, { "epoch": 0.61, "grad_norm": 0.7337582111358643, "learning_rate": 6.850451564504109e-06, "loss": 2.089, "step": 18293 }, { "epoch": 0.61, "grad_norm": 0.7718833684921265, "learning_rate": 6.849442783534708e-06, "loss": 1.9778, "step": 18294 }, { "epoch": 0.61, "grad_norm": 0.7311339378356934, "learning_rate": 6.848434038158452e-06, "loss": 2.1452, "step": 18295 }, { "epoch": 0.61, "grad_norm": 0.7324917912483215, "learning_rate": 6.847425328386741e-06, "loss": 2.0249, "step": 18296 }, { "epoch": 0.61, "grad_norm": 0.7163553833961487, "learning_rate": 6.846416654230972e-06, "loss": 2.0709, "step": 18297 }, { "epoch": 0.61, "grad_norm": 0.7434574961662292, "learning_rate": 6.845408015702539e-06, "loss": 2.1334, "step": 18298 }, { "epoch": 0.61, "grad_norm": 0.7435803413391113, "learning_rate": 6.844399412812841e-06, "loss": 2.0288, "step": 18299 }, { "epoch": 0.61, "grad_norm": 0.7465127110481262, "learning_rate": 6.843390845573265e-06, "loss": 2.0745, "step": 18300 }, { "epoch": 0.61, "grad_norm": 0.7331094741821289, "learning_rate": 6.842382313995212e-06, "loss": 2.012, "step": 18301 }, { "epoch": 0.61, "grad_norm": 0.7311022281646729, "learning_rate": 6.841373818090069e-06, "loss": 2.0705, "step": 18302 }, { "epoch": 0.61, "grad_norm": 0.7777676582336426, "learning_rate": 6.840365357869241e-06, "loss": 2.0623, "step": 18303 }, { "epoch": 0.61, "grad_norm": 0.7453696727752686, "learning_rate": 6.839356933344106e-06, "loss": 2.128, "step": 18304 }, { "epoch": 0.61, "grad_norm": 0.7418368458747864, "learning_rate": 6.838348544526067e-06, "loss": 2.1084, "step": 18305 }, { "epoch": 0.61, "grad_norm": 0.75780189037323, "learning_rate": 6.837340191426513e-06, "loss": 2.1053, "step": 18306 }, { "epoch": 0.61, "grad_norm": 0.7905848622322083, "learning_rate": 6.836331874056835e-06, "loss": 2.0577, "step": 18307 }, { "epoch": 0.61, "grad_norm": 0.7332119345664978, "learning_rate": 6.835323592428429e-06, "loss": 2.0418, "step": 18308 }, { "epoch": 0.61, "grad_norm": 0.7225558757781982, "learning_rate": 6.834315346552679e-06, "loss": 2.0872, "step": 18309 }, { "epoch": 0.61, "grad_norm": 0.7501688003540039, "learning_rate": 6.833307136440977e-06, "loss": 2.077, "step": 18310 }, { "epoch": 0.61, "grad_norm": 0.7427091598510742, "learning_rate": 6.832298962104718e-06, "loss": 2.0343, "step": 18311 }, { "epoch": 0.61, "grad_norm": 0.7550827264785767, "learning_rate": 6.831290823555291e-06, "loss": 2.0622, "step": 18312 }, { "epoch": 0.61, "grad_norm": 0.7726485133171082, "learning_rate": 6.83028272080408e-06, "loss": 2.1355, "step": 18313 }, { "epoch": 0.61, "grad_norm": 0.7859175205230713, "learning_rate": 6.829274653862479e-06, "loss": 2.1358, "step": 18314 }, { "epoch": 0.61, "grad_norm": 0.7417984008789062, "learning_rate": 6.828266622741873e-06, "loss": 2.0562, "step": 18315 }, { "epoch": 0.61, "grad_norm": 0.7341346740722656, "learning_rate": 6.82725862745365e-06, "loss": 2.0908, "step": 18316 }, { "epoch": 0.61, "grad_norm": 0.7390155792236328, "learning_rate": 6.826250668009207e-06, "loss": 2.119, "step": 18317 }, { "epoch": 0.61, "grad_norm": 0.7509703040122986, "learning_rate": 6.825242744419918e-06, "loss": 2.1016, "step": 18318 }, { "epoch": 0.61, "grad_norm": 0.7445874214172363, "learning_rate": 6.824234856697176e-06, "loss": 2.0534, "step": 18319 }, { "epoch": 0.61, "grad_norm": 0.7243942618370056, "learning_rate": 6.823227004852369e-06, "loss": 2.0557, "step": 18320 }, { "epoch": 0.61, "grad_norm": 0.745593249797821, "learning_rate": 6.822219188896883e-06, "loss": 2.0694, "step": 18321 }, { "epoch": 0.61, "grad_norm": 0.8173550963401794, "learning_rate": 6.821211408842099e-06, "loss": 2.0261, "step": 18322 }, { "epoch": 0.61, "grad_norm": 0.7469823956489563, "learning_rate": 6.820203664699407e-06, "loss": 2.07, "step": 18323 }, { "epoch": 0.61, "grad_norm": 0.7456266283988953, "learning_rate": 6.819195956480189e-06, "loss": 2.0812, "step": 18324 }, { "epoch": 0.61, "grad_norm": 0.7292190194129944, "learning_rate": 6.818188284195836e-06, "loss": 1.9946, "step": 18325 }, { "epoch": 0.61, "grad_norm": 0.7258754968643188, "learning_rate": 6.81718064785772e-06, "loss": 2.0829, "step": 18326 }, { "epoch": 0.61, "grad_norm": 0.7230064272880554, "learning_rate": 6.8161730474772355e-06, "loss": 2.0257, "step": 18327 }, { "epoch": 0.61, "grad_norm": 0.7246164083480835, "learning_rate": 6.815165483065762e-06, "loss": 2.0067, "step": 18328 }, { "epoch": 0.61, "grad_norm": 0.7395140528678894, "learning_rate": 6.8141579546346814e-06, "loss": 2.0563, "step": 18329 }, { "epoch": 0.61, "grad_norm": 0.7418071627616882, "learning_rate": 6.8131504621953795e-06, "loss": 2.0798, "step": 18330 }, { "epoch": 0.61, "grad_norm": 0.7509021162986755, "learning_rate": 6.812143005759234e-06, "loss": 2.0437, "step": 18331 }, { "epoch": 0.61, "grad_norm": 0.763006329536438, "learning_rate": 6.811135585337627e-06, "loss": 2.0082, "step": 18332 }, { "epoch": 0.61, "grad_norm": 0.7587042450904846, "learning_rate": 6.810128200941942e-06, "loss": 2.037, "step": 18333 }, { "epoch": 0.61, "grad_norm": 0.7177515029907227, "learning_rate": 6.809120852583563e-06, "loss": 2.0849, "step": 18334 }, { "epoch": 0.61, "grad_norm": 0.7240169644355774, "learning_rate": 6.808113540273863e-06, "loss": 2.0446, "step": 18335 }, { "epoch": 0.61, "grad_norm": 0.7504788637161255, "learning_rate": 6.8071062640242254e-06, "loss": 2.0324, "step": 18336 }, { "epoch": 0.61, "grad_norm": 0.739625871181488, "learning_rate": 6.806099023846031e-06, "loss": 2.0158, "step": 18337 }, { "epoch": 0.61, "grad_norm": 0.7648923993110657, "learning_rate": 6.805091819750656e-06, "loss": 2.1078, "step": 18338 }, { "epoch": 0.61, "grad_norm": 0.7783934473991394, "learning_rate": 6.804084651749487e-06, "loss": 2.0255, "step": 18339 }, { "epoch": 0.61, "grad_norm": 0.7486938834190369, "learning_rate": 6.803077519853891e-06, "loss": 2.1277, "step": 18340 }, { "epoch": 0.61, "grad_norm": 0.7740841507911682, "learning_rate": 6.802070424075254e-06, "loss": 2.1301, "step": 18341 }, { "epoch": 0.61, "grad_norm": 0.7448037266731262, "learning_rate": 6.801063364424949e-06, "loss": 2.0197, "step": 18342 }, { "epoch": 0.61, "grad_norm": 0.7407925724983215, "learning_rate": 6.80005634091436e-06, "loss": 2.0038, "step": 18343 }, { "epoch": 0.61, "grad_norm": 0.7466814517974854, "learning_rate": 6.799049353554856e-06, "loss": 2.0602, "step": 18344 }, { "epoch": 0.61, "grad_norm": 0.73466557264328, "learning_rate": 6.798042402357817e-06, "loss": 2.0553, "step": 18345 }, { "epoch": 0.61, "grad_norm": 0.780166506767273, "learning_rate": 6.797035487334614e-06, "loss": 2.1165, "step": 18346 }, { "epoch": 0.61, "grad_norm": 0.7334133386611938, "learning_rate": 6.796028608496631e-06, "loss": 2.0967, "step": 18347 }, { "epoch": 0.61, "grad_norm": 0.7509230375289917, "learning_rate": 6.795021765855241e-06, "loss": 2.1019, "step": 18348 }, { "epoch": 0.61, "grad_norm": 0.7461289763450623, "learning_rate": 6.794014959421815e-06, "loss": 2.0445, "step": 18349 }, { "epoch": 0.61, "grad_norm": 0.7383481860160828, "learning_rate": 6.793008189207728e-06, "loss": 2.0878, "step": 18350 }, { "epoch": 0.61, "grad_norm": 0.7769946455955505, "learning_rate": 6.792001455224353e-06, "loss": 2.214, "step": 18351 }, { "epoch": 0.61, "grad_norm": 0.7748289108276367, "learning_rate": 6.790994757483073e-06, "loss": 2.0995, "step": 18352 }, { "epoch": 0.61, "grad_norm": 0.7307443022727966, "learning_rate": 6.789988095995248e-06, "loss": 2.0563, "step": 18353 }, { "epoch": 0.61, "grad_norm": 0.7601832747459412, "learning_rate": 6.788981470772256e-06, "loss": 2.0805, "step": 18354 }, { "epoch": 0.61, "grad_norm": 0.7346131801605225, "learning_rate": 6.78797488182547e-06, "loss": 2.0322, "step": 18355 }, { "epoch": 0.61, "grad_norm": 0.7610582113265991, "learning_rate": 6.7869683291662634e-06, "loss": 2.0448, "step": 18356 }, { "epoch": 0.61, "grad_norm": 0.7298884391784668, "learning_rate": 6.785961812806004e-06, "loss": 2.0795, "step": 18357 }, { "epoch": 0.61, "grad_norm": 0.7294670343399048, "learning_rate": 6.784955332756065e-06, "loss": 2.0507, "step": 18358 }, { "epoch": 0.61, "grad_norm": 0.7797404527664185, "learning_rate": 6.783948889027814e-06, "loss": 2.1029, "step": 18359 }, { "epoch": 0.61, "grad_norm": 0.707570493221283, "learning_rate": 6.782942481632625e-06, "loss": 1.9876, "step": 18360 }, { "epoch": 0.61, "grad_norm": 0.7228553891181946, "learning_rate": 6.7819361105818694e-06, "loss": 2.0954, "step": 18361 }, { "epoch": 0.61, "grad_norm": 0.7360501885414124, "learning_rate": 6.780929775886911e-06, "loss": 2.0595, "step": 18362 }, { "epoch": 0.61, "grad_norm": 0.7117306590080261, "learning_rate": 6.779923477559122e-06, "loss": 2.0799, "step": 18363 }, { "epoch": 0.61, "grad_norm": 0.7397478818893433, "learning_rate": 6.77891721560987e-06, "loss": 2.08, "step": 18364 }, { "epoch": 0.61, "grad_norm": 0.7367069125175476, "learning_rate": 6.777910990050525e-06, "loss": 2.0283, "step": 18365 }, { "epoch": 0.61, "grad_norm": 0.7536083459854126, "learning_rate": 6.7769048008924525e-06, "loss": 2.0385, "step": 18366 }, { "epoch": 0.61, "grad_norm": 0.7222318053245544, "learning_rate": 6.775898648147018e-06, "loss": 2.0766, "step": 18367 }, { "epoch": 0.61, "grad_norm": 0.7275909781455994, "learning_rate": 6.774892531825593e-06, "loss": 2.0737, "step": 18368 }, { "epoch": 0.61, "grad_norm": 0.7236939072608948, "learning_rate": 6.773886451939542e-06, "loss": 2.0485, "step": 18369 }, { "epoch": 0.61, "grad_norm": 0.7356298565864563, "learning_rate": 6.772880408500233e-06, "loss": 2.1176, "step": 18370 }, { "epoch": 0.61, "grad_norm": 0.7407395839691162, "learning_rate": 6.77187440151903e-06, "loss": 2.0662, "step": 18371 }, { "epoch": 0.61, "grad_norm": 0.7195042371749878, "learning_rate": 6.770868431007296e-06, "loss": 1.9862, "step": 18372 }, { "epoch": 0.61, "grad_norm": 0.716382622718811, "learning_rate": 6.769862496976396e-06, "loss": 2.0134, "step": 18373 }, { "epoch": 0.61, "grad_norm": 0.731662929058075, "learning_rate": 6.768856599437703e-06, "loss": 2.1402, "step": 18374 }, { "epoch": 0.61, "grad_norm": 0.7474837303161621, "learning_rate": 6.767850738402568e-06, "loss": 2.0542, "step": 18375 }, { "epoch": 0.61, "grad_norm": 0.7314082384109497, "learning_rate": 6.7668449138823635e-06, "loss": 2.0537, "step": 18376 }, { "epoch": 0.61, "grad_norm": 0.7353610396385193, "learning_rate": 6.76583912588845e-06, "loss": 2.043, "step": 18377 }, { "epoch": 0.61, "grad_norm": 0.7164190411567688, "learning_rate": 6.764833374432191e-06, "loss": 2.0358, "step": 18378 }, { "epoch": 0.61, "grad_norm": 0.7325714230537415, "learning_rate": 6.763827659524951e-06, "loss": 2.1277, "step": 18379 }, { "epoch": 0.61, "grad_norm": 0.7469738125801086, "learning_rate": 6.762821981178087e-06, "loss": 2.1015, "step": 18380 }, { "epoch": 0.61, "grad_norm": 0.7329541444778442, "learning_rate": 6.761816339402961e-06, "loss": 2.0307, "step": 18381 }, { "epoch": 0.61, "grad_norm": 0.7335854768753052, "learning_rate": 6.760810734210938e-06, "loss": 1.9868, "step": 18382 }, { "epoch": 0.61, "grad_norm": 0.6994116902351379, "learning_rate": 6.75980516561338e-06, "loss": 2.0982, "step": 18383 }, { "epoch": 0.61, "grad_norm": 0.7387685775756836, "learning_rate": 6.758799633621642e-06, "loss": 2.0488, "step": 18384 }, { "epoch": 0.61, "grad_norm": 0.7489231824874878, "learning_rate": 6.757794138247085e-06, "loss": 2.0081, "step": 18385 }, { "epoch": 0.61, "grad_norm": 0.7993369698524475, "learning_rate": 6.75678867950107e-06, "loss": 2.0628, "step": 18386 }, { "epoch": 0.61, "grad_norm": 0.7146592736244202, "learning_rate": 6.755783257394959e-06, "loss": 2.0484, "step": 18387 }, { "epoch": 0.61, "grad_norm": 0.7356510758399963, "learning_rate": 6.754777871940106e-06, "loss": 2.0267, "step": 18388 }, { "epoch": 0.61, "grad_norm": 0.7581258416175842, "learning_rate": 6.7537725231478665e-06, "loss": 2.0336, "step": 18389 }, { "epoch": 0.61, "grad_norm": 0.7559896111488342, "learning_rate": 6.752767211029605e-06, "loss": 2.0105, "step": 18390 }, { "epoch": 0.61, "grad_norm": 0.7393775582313538, "learning_rate": 6.751761935596678e-06, "loss": 2.0674, "step": 18391 }, { "epoch": 0.61, "grad_norm": 0.7657954096794128, "learning_rate": 6.750756696860442e-06, "loss": 1.9824, "step": 18392 }, { "epoch": 0.61, "grad_norm": 0.746306300163269, "learning_rate": 6.74975149483225e-06, "loss": 2.043, "step": 18393 }, { "epoch": 0.61, "grad_norm": 0.7265415191650391, "learning_rate": 6.748746329523461e-06, "loss": 2.0348, "step": 18394 }, { "epoch": 0.61, "grad_norm": 0.7116268873214722, "learning_rate": 6.74774120094543e-06, "loss": 2.0803, "step": 18395 }, { "epoch": 0.61, "grad_norm": 0.7705291509628296, "learning_rate": 6.746736109109518e-06, "loss": 2.0998, "step": 18396 }, { "epoch": 0.61, "grad_norm": 0.7329710125923157, "learning_rate": 6.745731054027069e-06, "loss": 2.0695, "step": 18397 }, { "epoch": 0.61, "grad_norm": 0.7447865605354309, "learning_rate": 6.744726035709445e-06, "loss": 2.0934, "step": 18398 }, { "epoch": 0.61, "grad_norm": 0.7582442760467529, "learning_rate": 6.743721054167998e-06, "loss": 2.1285, "step": 18399 }, { "epoch": 0.61, "grad_norm": 0.7348441481590271, "learning_rate": 6.742716109414083e-06, "loss": 2.1181, "step": 18400 }, { "epoch": 0.61, "grad_norm": 0.7339258193969727, "learning_rate": 6.741711201459053e-06, "loss": 1.9627, "step": 18401 }, { "epoch": 0.61, "grad_norm": 0.748984694480896, "learning_rate": 6.740706330314261e-06, "loss": 2.0739, "step": 18402 }, { "epoch": 0.61, "grad_norm": 0.7580757737159729, "learning_rate": 6.739701495991056e-06, "loss": 2.0685, "step": 18403 }, { "epoch": 0.61, "grad_norm": 0.7279966473579407, "learning_rate": 6.738696698500793e-06, "loss": 2.058, "step": 18404 }, { "epoch": 0.61, "grad_norm": 0.7286447882652283, "learning_rate": 6.737691937854826e-06, "loss": 2.0845, "step": 18405 }, { "epoch": 0.61, "grad_norm": 0.7269473671913147, "learning_rate": 6.736687214064502e-06, "loss": 2.0812, "step": 18406 }, { "epoch": 0.61, "grad_norm": 0.7312459349632263, "learning_rate": 6.735682527141174e-06, "loss": 2.0512, "step": 18407 }, { "epoch": 0.61, "grad_norm": 0.7474531531333923, "learning_rate": 6.734677877096191e-06, "loss": 2.1034, "step": 18408 }, { "epoch": 0.61, "grad_norm": 0.7328645586967468, "learning_rate": 6.733673263940905e-06, "loss": 2.0509, "step": 18409 }, { "epoch": 0.61, "grad_norm": 0.7562387585639954, "learning_rate": 6.732668687686663e-06, "loss": 2.061, "step": 18410 }, { "epoch": 0.61, "grad_norm": 0.7350242733955383, "learning_rate": 6.731664148344814e-06, "loss": 1.9607, "step": 18411 }, { "epoch": 0.61, "grad_norm": 0.7807022929191589, "learning_rate": 6.730659645926709e-06, "loss": 2.1291, "step": 18412 }, { "epoch": 0.61, "grad_norm": 0.7233297824859619, "learning_rate": 6.729655180443695e-06, "loss": 2.068, "step": 18413 }, { "epoch": 0.61, "grad_norm": 0.7439149618148804, "learning_rate": 6.728650751907124e-06, "loss": 2.0344, "step": 18414 }, { "epoch": 0.61, "grad_norm": 0.7627593874931335, "learning_rate": 6.727646360328336e-06, "loss": 1.9976, "step": 18415 }, { "epoch": 0.61, "grad_norm": 0.7293951511383057, "learning_rate": 6.726642005718682e-06, "loss": 2.0824, "step": 18416 }, { "epoch": 0.61, "grad_norm": 0.7175565958023071, "learning_rate": 6.725637688089506e-06, "loss": 2.0198, "step": 18417 }, { "epoch": 0.61, "grad_norm": 0.7496711611747742, "learning_rate": 6.724633407452164e-06, "loss": 2.0684, "step": 18418 }, { "epoch": 0.61, "grad_norm": 0.7368891835212708, "learning_rate": 6.723629163817986e-06, "loss": 2.1075, "step": 18419 }, { "epoch": 0.61, "grad_norm": 0.7421596050262451, "learning_rate": 6.722624957198328e-06, "loss": 1.9933, "step": 18420 }, { "epoch": 0.61, "grad_norm": 0.7583807110786438, "learning_rate": 6.721620787604533e-06, "loss": 2.0893, "step": 18421 }, { "epoch": 0.61, "grad_norm": 0.7468669414520264, "learning_rate": 6.720616655047944e-06, "loss": 1.9784, "step": 18422 }, { "epoch": 0.61, "grad_norm": 0.7592142224311829, "learning_rate": 6.71961255953991e-06, "loss": 1.9859, "step": 18423 }, { "epoch": 0.61, "grad_norm": 0.7409591674804688, "learning_rate": 6.718608501091768e-06, "loss": 2.082, "step": 18424 }, { "epoch": 0.61, "grad_norm": 0.7203793525695801, "learning_rate": 6.7176044797148625e-06, "loss": 2.0792, "step": 18425 }, { "epoch": 0.61, "grad_norm": 0.7320877313613892, "learning_rate": 6.716600495420539e-06, "loss": 2.0026, "step": 18426 }, { "epoch": 0.61, "grad_norm": 0.7911693453788757, "learning_rate": 6.715596548220142e-06, "loss": 2.0704, "step": 18427 }, { "epoch": 0.61, "grad_norm": 0.76523756980896, "learning_rate": 6.714592638125006e-06, "loss": 2.0687, "step": 18428 }, { "epoch": 0.61, "grad_norm": 0.7689847350120544, "learning_rate": 6.713588765146479e-06, "loss": 2.079, "step": 18429 }, { "epoch": 0.61, "grad_norm": 0.7348833680152893, "learning_rate": 6.7125849292959e-06, "loss": 2.0177, "step": 18430 }, { "epoch": 0.61, "grad_norm": 0.7341191172599792, "learning_rate": 6.711581130584608e-06, "loss": 2.0583, "step": 18431 }, { "epoch": 0.61, "grad_norm": 0.7732895612716675, "learning_rate": 6.710577369023949e-06, "loss": 2.005, "step": 18432 }, { "epoch": 0.61, "grad_norm": 0.742037296295166, "learning_rate": 6.709573644625256e-06, "loss": 2.0123, "step": 18433 }, { "epoch": 0.61, "grad_norm": 0.7439534068107605, "learning_rate": 6.708569957399872e-06, "loss": 2.062, "step": 18434 }, { "epoch": 0.61, "grad_norm": 0.7266255021095276, "learning_rate": 6.707566307359137e-06, "loss": 2.0489, "step": 18435 }, { "epoch": 0.61, "grad_norm": 0.7161381840705872, "learning_rate": 6.706562694514389e-06, "loss": 2.041, "step": 18436 }, { "epoch": 0.61, "grad_norm": 0.7617234587669373, "learning_rate": 6.705559118876963e-06, "loss": 2.0744, "step": 18437 }, { "epoch": 0.61, "grad_norm": 0.740382969379425, "learning_rate": 6.704555580458201e-06, "loss": 2.0834, "step": 18438 }, { "epoch": 0.61, "grad_norm": 0.7304786443710327, "learning_rate": 6.703552079269435e-06, "loss": 2.1013, "step": 18439 }, { "epoch": 0.61, "grad_norm": 0.768141508102417, "learning_rate": 6.702548615322013e-06, "loss": 2.054, "step": 18440 }, { "epoch": 0.61, "grad_norm": 0.7651693224906921, "learning_rate": 6.701545188627258e-06, "loss": 1.9818, "step": 18441 }, { "epoch": 0.61, "grad_norm": 0.7493178248405457, "learning_rate": 6.7005417991965135e-06, "loss": 2.0885, "step": 18442 }, { "epoch": 0.61, "grad_norm": 0.7126354575157166, "learning_rate": 6.699538447041115e-06, "loss": 2.0811, "step": 18443 }, { "epoch": 0.61, "grad_norm": 0.7364470958709717, "learning_rate": 6.698535132172394e-06, "loss": 2.1081, "step": 18444 }, { "epoch": 0.61, "grad_norm": 0.7296072840690613, "learning_rate": 6.697531854601695e-06, "loss": 2.0842, "step": 18445 }, { "epoch": 0.61, "grad_norm": 0.7903474569320679, "learning_rate": 6.696528614340343e-06, "loss": 2.0718, "step": 18446 }, { "epoch": 0.61, "grad_norm": 0.7271576523780823, "learning_rate": 6.6955254113996704e-06, "loss": 2.0491, "step": 18447 }, { "epoch": 0.61, "grad_norm": 0.777874231338501, "learning_rate": 6.694522245791017e-06, "loss": 2.1067, "step": 18448 }, { "epoch": 0.61, "grad_norm": 0.7434333562850952, "learning_rate": 6.693519117525719e-06, "loss": 2.1115, "step": 18449 }, { "epoch": 0.61, "grad_norm": 0.7488025426864624, "learning_rate": 6.6925160266150994e-06, "loss": 2.0482, "step": 18450 }, { "epoch": 0.61, "grad_norm": 0.7355394959449768, "learning_rate": 6.691512973070497e-06, "loss": 2.0656, "step": 18451 }, { "epoch": 0.61, "grad_norm": 0.7592676877975464, "learning_rate": 6.6905099569032385e-06, "loss": 2.0391, "step": 18452 }, { "epoch": 0.61, "grad_norm": 0.748758852481842, "learning_rate": 6.689506978124663e-06, "loss": 2.0435, "step": 18453 }, { "epoch": 0.61, "grad_norm": 0.7562140822410583, "learning_rate": 6.6885040367461e-06, "loss": 2.0165, "step": 18454 }, { "epoch": 0.61, "grad_norm": 0.7367588877677917, "learning_rate": 6.687501132778871e-06, "loss": 2.0504, "step": 18455 }, { "epoch": 0.61, "grad_norm": 0.7489113211631775, "learning_rate": 6.686498266234316e-06, "loss": 1.9668, "step": 18456 }, { "epoch": 0.61, "grad_norm": 0.7488969564437866, "learning_rate": 6.685495437123761e-06, "loss": 2.0431, "step": 18457 }, { "epoch": 0.61, "grad_norm": 0.7641574144363403, "learning_rate": 6.684492645458539e-06, "loss": 2.0341, "step": 18458 }, { "epoch": 0.61, "grad_norm": 0.7666091918945312, "learning_rate": 6.683489891249973e-06, "loss": 2.102, "step": 18459 }, { "epoch": 0.61, "grad_norm": 0.7313241958618164, "learning_rate": 6.682487174509393e-06, "loss": 2.0753, "step": 18460 }, { "epoch": 0.61, "grad_norm": 0.7338123321533203, "learning_rate": 6.681484495248132e-06, "loss": 2.027, "step": 18461 }, { "epoch": 0.61, "grad_norm": 0.7557225227355957, "learning_rate": 6.680481853477511e-06, "loss": 2.0674, "step": 18462 }, { "epoch": 0.61, "grad_norm": 0.7447043657302856, "learning_rate": 6.679479249208867e-06, "loss": 2.0064, "step": 18463 }, { "epoch": 0.61, "grad_norm": 0.7302720546722412, "learning_rate": 6.678476682453515e-06, "loss": 2.0392, "step": 18464 }, { "epoch": 0.61, "grad_norm": 0.7419682145118713, "learning_rate": 6.677474153222787e-06, "loss": 2.0805, "step": 18465 }, { "epoch": 0.61, "grad_norm": 0.7353573441505432, "learning_rate": 6.676471661528008e-06, "loss": 2.0408, "step": 18466 }, { "epoch": 0.61, "grad_norm": 0.7257627248764038, "learning_rate": 6.675469207380511e-06, "loss": 2.0541, "step": 18467 }, { "epoch": 0.61, "grad_norm": 0.7347812652587891, "learning_rate": 6.674466790791608e-06, "loss": 2.0675, "step": 18468 }, { "epoch": 0.61, "grad_norm": 0.7464412450790405, "learning_rate": 6.6734644117726324e-06, "loss": 2.0622, "step": 18469 }, { "epoch": 0.61, "grad_norm": 0.764533519744873, "learning_rate": 6.672462070334904e-06, "loss": 2.1465, "step": 18470 }, { "epoch": 0.61, "grad_norm": 0.7607544660568237, "learning_rate": 6.671459766489754e-06, "loss": 2.0871, "step": 18471 }, { "epoch": 0.61, "grad_norm": 0.7826836109161377, "learning_rate": 6.670457500248497e-06, "loss": 2.094, "step": 18472 }, { "epoch": 0.61, "grad_norm": 0.8203714489936829, "learning_rate": 6.66945527162246e-06, "loss": 2.0204, "step": 18473 }, { "epoch": 0.61, "grad_norm": 0.7409281730651855, "learning_rate": 6.6684530806229654e-06, "loss": 1.9983, "step": 18474 }, { "epoch": 0.61, "grad_norm": 0.7420737147331238, "learning_rate": 6.6674509272613364e-06, "loss": 2.047, "step": 18475 }, { "epoch": 0.61, "grad_norm": 0.7578684091567993, "learning_rate": 6.666448811548895e-06, "loss": 2.0877, "step": 18476 }, { "epoch": 0.61, "grad_norm": 0.7497484087944031, "learning_rate": 6.665446733496958e-06, "loss": 1.998, "step": 18477 }, { "epoch": 0.61, "grad_norm": 0.743179202079773, "learning_rate": 6.664444693116853e-06, "loss": 2.1187, "step": 18478 }, { "epoch": 0.61, "grad_norm": 0.7174262404441833, "learning_rate": 6.663442690419895e-06, "loss": 2.0759, "step": 18479 }, { "epoch": 0.61, "grad_norm": 0.74409419298172, "learning_rate": 6.662440725417409e-06, "loss": 2.0075, "step": 18480 }, { "epoch": 0.61, "grad_norm": 0.711876392364502, "learning_rate": 6.661438798120707e-06, "loss": 2.0686, "step": 18481 }, { "epoch": 0.61, "grad_norm": 0.7251047492027283, "learning_rate": 6.6604369085411146e-06, "loss": 1.9935, "step": 18482 }, { "epoch": 0.61, "grad_norm": 0.8006262183189392, "learning_rate": 6.659435056689949e-06, "loss": 1.9989, "step": 18483 }, { "epoch": 0.61, "grad_norm": 0.7704413533210754, "learning_rate": 6.658433242578528e-06, "loss": 2.0244, "step": 18484 }, { "epoch": 0.61, "grad_norm": 0.7554044723510742, "learning_rate": 6.6574314662181715e-06, "loss": 2.0108, "step": 18485 }, { "epoch": 0.62, "grad_norm": 0.7527334094047546, "learning_rate": 6.656429727620195e-06, "loss": 2.0894, "step": 18486 }, { "epoch": 0.62, "grad_norm": 0.7796379923820496, "learning_rate": 6.655428026795916e-06, "loss": 2.0312, "step": 18487 }, { "epoch": 0.62, "grad_norm": 0.7374343276023865, "learning_rate": 6.654426363756647e-06, "loss": 2.084, "step": 18488 }, { "epoch": 0.62, "grad_norm": 0.738320529460907, "learning_rate": 6.6534247385137164e-06, "loss": 2.046, "step": 18489 }, { "epoch": 0.62, "grad_norm": 0.7060432434082031, "learning_rate": 6.652423151078424e-06, "loss": 2.0325, "step": 18490 }, { "epoch": 0.62, "grad_norm": 0.8049686551094055, "learning_rate": 6.651421601462096e-06, "loss": 2.0477, "step": 18491 }, { "epoch": 0.62, "grad_norm": 0.7570608854293823, "learning_rate": 6.650420089676044e-06, "loss": 2.0288, "step": 18492 }, { "epoch": 0.62, "grad_norm": 0.7236107587814331, "learning_rate": 6.649418615731582e-06, "loss": 2.1105, "step": 18493 }, { "epoch": 0.62, "grad_norm": 0.7687537670135498, "learning_rate": 6.648417179640029e-06, "loss": 2.0354, "step": 18494 }, { "epoch": 0.62, "grad_norm": 0.702089250087738, "learning_rate": 6.64741578141269e-06, "loss": 2.069, "step": 18495 }, { "epoch": 0.62, "grad_norm": 0.7813162207603455, "learning_rate": 6.646414421060883e-06, "loss": 2.06, "step": 18496 }, { "epoch": 0.62, "grad_norm": 0.7599120140075684, "learning_rate": 6.6454130985959205e-06, "loss": 2.1108, "step": 18497 }, { "epoch": 0.62, "grad_norm": 0.7184450030326843, "learning_rate": 6.644411814029118e-06, "loss": 2.0256, "step": 18498 }, { "epoch": 0.62, "grad_norm": 0.748634934425354, "learning_rate": 6.643410567371782e-06, "loss": 2.1011, "step": 18499 }, { "epoch": 0.62, "grad_norm": 0.7581062316894531, "learning_rate": 6.642409358635227e-06, "loss": 2.0223, "step": 18500 }, { "epoch": 0.62, "grad_norm": 0.7567332983016968, "learning_rate": 6.641408187830762e-06, "loss": 2.051, "step": 18501 }, { "epoch": 0.62, "grad_norm": 0.724748432636261, "learning_rate": 6.640407054969702e-06, "loss": 2.0302, "step": 18502 }, { "epoch": 0.62, "grad_norm": 0.716160774230957, "learning_rate": 6.639405960063351e-06, "loss": 2.1197, "step": 18503 }, { "epoch": 0.62, "grad_norm": 0.7451995015144348, "learning_rate": 6.63840490312302e-06, "loss": 2.1023, "step": 18504 }, { "epoch": 0.62, "grad_norm": 0.7319174408912659, "learning_rate": 6.637403884160023e-06, "loss": 2.045, "step": 18505 }, { "epoch": 0.62, "grad_norm": 0.729198157787323, "learning_rate": 6.636402903185666e-06, "loss": 2.0904, "step": 18506 }, { "epoch": 0.62, "grad_norm": 0.723279595375061, "learning_rate": 6.6354019602112605e-06, "loss": 2.0464, "step": 18507 }, { "epoch": 0.62, "grad_norm": 0.755120038986206, "learning_rate": 6.63440105524811e-06, "loss": 2.0741, "step": 18508 }, { "epoch": 0.62, "grad_norm": 0.7346803545951843, "learning_rate": 6.633400188307523e-06, "loss": 2.0764, "step": 18509 }, { "epoch": 0.62, "grad_norm": 0.7358288168907166, "learning_rate": 6.632399359400805e-06, "loss": 2.0336, "step": 18510 }, { "epoch": 0.62, "grad_norm": 0.7369973063468933, "learning_rate": 6.631398568539273e-06, "loss": 2.0748, "step": 18511 }, { "epoch": 0.62, "grad_norm": 0.7449140548706055, "learning_rate": 6.630397815734219e-06, "loss": 2.075, "step": 18512 }, { "epoch": 0.62, "grad_norm": 0.7668919563293457, "learning_rate": 6.6293971009969574e-06, "loss": 2.089, "step": 18513 }, { "epoch": 0.62, "grad_norm": 0.740190863609314, "learning_rate": 6.6283964243387925e-06, "loss": 2.0374, "step": 18514 }, { "epoch": 0.62, "grad_norm": 0.7501717209815979, "learning_rate": 6.627395785771029e-06, "loss": 2.0612, "step": 18515 }, { "epoch": 0.62, "grad_norm": 0.7683904767036438, "learning_rate": 6.626395185304972e-06, "loss": 2.1385, "step": 18516 }, { "epoch": 0.62, "grad_norm": 0.7253684401512146, "learning_rate": 6.625394622951924e-06, "loss": 2.0448, "step": 18517 }, { "epoch": 0.62, "grad_norm": 0.7418583035469055, "learning_rate": 6.624394098723188e-06, "loss": 2.0304, "step": 18518 }, { "epoch": 0.62, "grad_norm": 0.738362193107605, "learning_rate": 6.6233936126300715e-06, "loss": 2.1297, "step": 18519 }, { "epoch": 0.62, "grad_norm": 0.7750780582427979, "learning_rate": 6.622393164683877e-06, "loss": 2.0816, "step": 18520 }, { "epoch": 0.62, "grad_norm": 0.7565974593162537, "learning_rate": 6.621392754895902e-06, "loss": 2.0151, "step": 18521 }, { "epoch": 0.62, "grad_norm": 0.7451688051223755, "learning_rate": 6.6203923832774534e-06, "loss": 2.0805, "step": 18522 }, { "epoch": 0.62, "grad_norm": 0.7435022592544556, "learning_rate": 6.61939204983983e-06, "loss": 2.087, "step": 18523 }, { "epoch": 0.62, "grad_norm": 0.7271678447723389, "learning_rate": 6.618391754594331e-06, "loss": 2.0815, "step": 18524 }, { "epoch": 0.62, "grad_norm": 0.7610463500022888, "learning_rate": 6.617391497552268e-06, "loss": 2.0309, "step": 18525 }, { "epoch": 0.62, "grad_norm": 0.7413537502288818, "learning_rate": 6.616391278724925e-06, "loss": 2.1002, "step": 18526 }, { "epoch": 0.62, "grad_norm": 0.7352351546287537, "learning_rate": 6.615391098123615e-06, "loss": 2.0511, "step": 18527 }, { "epoch": 0.62, "grad_norm": 0.7370828986167908, "learning_rate": 6.614390955759631e-06, "loss": 2.107, "step": 18528 }, { "epoch": 0.62, "grad_norm": 0.7311270833015442, "learning_rate": 6.613390851644277e-06, "loss": 2.0287, "step": 18529 }, { "epoch": 0.62, "grad_norm": 0.7523303627967834, "learning_rate": 6.6123907857888455e-06, "loss": 2.019, "step": 18530 }, { "epoch": 0.62, "grad_norm": 0.7598008513450623, "learning_rate": 6.6113907582046386e-06, "loss": 2.1078, "step": 18531 }, { "epoch": 0.62, "grad_norm": 0.7564973831176758, "learning_rate": 6.61039076890295e-06, "loss": 2.0271, "step": 18532 }, { "epoch": 0.62, "grad_norm": 0.7365027070045471, "learning_rate": 6.6093908178950875e-06, "loss": 2.0824, "step": 18533 }, { "epoch": 0.62, "grad_norm": 0.7344740629196167, "learning_rate": 6.608390905192332e-06, "loss": 2.0873, "step": 18534 }, { "epoch": 0.62, "grad_norm": 0.7407451868057251, "learning_rate": 6.607391030805992e-06, "loss": 2.0686, "step": 18535 }, { "epoch": 0.62, "grad_norm": 0.7146464586257935, "learning_rate": 6.606391194747359e-06, "loss": 2.0567, "step": 18536 }, { "epoch": 0.62, "grad_norm": 0.7395581007003784, "learning_rate": 6.605391397027728e-06, "loss": 2.046, "step": 18537 }, { "epoch": 0.62, "grad_norm": 0.7429301738739014, "learning_rate": 6.604391637658403e-06, "loss": 2.1161, "step": 18538 }, { "epoch": 0.62, "grad_norm": 0.751114010810852, "learning_rate": 6.603391916650665e-06, "loss": 2.1003, "step": 18539 }, { "epoch": 0.62, "grad_norm": 0.779699444770813, "learning_rate": 6.602392234015813e-06, "loss": 2.1002, "step": 18540 }, { "epoch": 0.62, "grad_norm": 0.7516820430755615, "learning_rate": 6.601392589765145e-06, "loss": 1.9803, "step": 18541 }, { "epoch": 0.62, "grad_norm": 0.764054536819458, "learning_rate": 6.600392983909953e-06, "loss": 1.9509, "step": 18542 }, { "epoch": 0.62, "grad_norm": 0.7106245160102844, "learning_rate": 6.5993934164615285e-06, "loss": 2.0499, "step": 18543 }, { "epoch": 0.62, "grad_norm": 0.7369493246078491, "learning_rate": 6.5983938874311615e-06, "loss": 2.0224, "step": 18544 }, { "epoch": 0.62, "grad_norm": 0.7746315002441406, "learning_rate": 6.597394396830146e-06, "loss": 2.1313, "step": 18545 }, { "epoch": 0.62, "grad_norm": 0.7222324013710022, "learning_rate": 6.596394944669777e-06, "loss": 2.0667, "step": 18546 }, { "epoch": 0.62, "grad_norm": 0.7578703165054321, "learning_rate": 6.595395530961346e-06, "loss": 2.1307, "step": 18547 }, { "epoch": 0.62, "grad_norm": 0.7602728009223938, "learning_rate": 6.594396155716136e-06, "loss": 2.0473, "step": 18548 }, { "epoch": 0.62, "grad_norm": 0.7471659183502197, "learning_rate": 6.593396818945444e-06, "loss": 2.103, "step": 18549 }, { "epoch": 0.62, "grad_norm": 0.7322835326194763, "learning_rate": 6.592397520660559e-06, "loss": 1.976, "step": 18550 }, { "epoch": 0.62, "grad_norm": 0.7507852911949158, "learning_rate": 6.591398260872772e-06, "loss": 2.0413, "step": 18551 }, { "epoch": 0.62, "grad_norm": 0.7564936280250549, "learning_rate": 6.590399039593367e-06, "loss": 2.0494, "step": 18552 }, { "epoch": 0.62, "grad_norm": 0.7578199505805969, "learning_rate": 6.589399856833634e-06, "loss": 2.0612, "step": 18553 }, { "epoch": 0.62, "grad_norm": 0.7551158666610718, "learning_rate": 6.588400712604863e-06, "loss": 2.0606, "step": 18554 }, { "epoch": 0.62, "grad_norm": 0.7667950987815857, "learning_rate": 6.587401606918346e-06, "loss": 1.9682, "step": 18555 }, { "epoch": 0.62, "grad_norm": 0.7982816696166992, "learning_rate": 6.586402539785361e-06, "loss": 2.1332, "step": 18556 }, { "epoch": 0.62, "grad_norm": 0.7543305158615112, "learning_rate": 6.585403511217201e-06, "loss": 2.0477, "step": 18557 }, { "epoch": 0.62, "grad_norm": 0.7680090069770813, "learning_rate": 6.58440452122515e-06, "loss": 2.079, "step": 18558 }, { "epoch": 0.62, "grad_norm": 0.7442122101783752, "learning_rate": 6.583405569820493e-06, "loss": 2.0377, "step": 18559 }, { "epoch": 0.62, "grad_norm": 0.7470167279243469, "learning_rate": 6.582406657014524e-06, "loss": 2.1016, "step": 18560 }, { "epoch": 0.62, "grad_norm": 0.7974061369895935, "learning_rate": 6.581407782818517e-06, "loss": 2.1421, "step": 18561 }, { "epoch": 0.62, "grad_norm": 0.7259982228279114, "learning_rate": 6.580408947243762e-06, "loss": 2.0858, "step": 18562 }, { "epoch": 0.62, "grad_norm": 0.7713364958763123, "learning_rate": 6.579410150301542e-06, "loss": 2.0368, "step": 18563 }, { "epoch": 0.62, "grad_norm": 0.7410104870796204, "learning_rate": 6.578411392003145e-06, "loss": 2.1292, "step": 18564 }, { "epoch": 0.62, "grad_norm": 0.7354673147201538, "learning_rate": 6.5774126723598485e-06, "loss": 2.0171, "step": 18565 }, { "epoch": 0.62, "grad_norm": 0.7544422745704651, "learning_rate": 6.5764139913829375e-06, "loss": 2.0134, "step": 18566 }, { "epoch": 0.62, "grad_norm": 0.7577709555625916, "learning_rate": 6.575415349083691e-06, "loss": 1.9614, "step": 18567 }, { "epoch": 0.62, "grad_norm": 0.7345360517501831, "learning_rate": 6.5744167454734e-06, "loss": 2.1248, "step": 18568 }, { "epoch": 0.62, "grad_norm": 0.7373293042182922, "learning_rate": 6.573418180563341e-06, "loss": 2.1422, "step": 18569 }, { "epoch": 0.62, "grad_norm": 0.7358099818229675, "learning_rate": 6.5724196543647945e-06, "loss": 2.0634, "step": 18570 }, { "epoch": 0.62, "grad_norm": 0.7530602812767029, "learning_rate": 6.57142116688904e-06, "loss": 2.0213, "step": 18571 }, { "epoch": 0.62, "grad_norm": 0.7092524170875549, "learning_rate": 6.570422718147362e-06, "loss": 1.9974, "step": 18572 }, { "epoch": 0.62, "grad_norm": 0.7512563467025757, "learning_rate": 6.56942430815104e-06, "loss": 2.0375, "step": 18573 }, { "epoch": 0.62, "grad_norm": 0.7542651295661926, "learning_rate": 6.568425936911349e-06, "loss": 2.079, "step": 18574 }, { "epoch": 0.62, "grad_norm": 0.7435977458953857, "learning_rate": 6.567427604439569e-06, "loss": 2.0053, "step": 18575 }, { "epoch": 0.62, "grad_norm": 0.7171084880828857, "learning_rate": 6.5664293107469816e-06, "loss": 2.0331, "step": 18576 }, { "epoch": 0.62, "grad_norm": 0.7775682210922241, "learning_rate": 6.565431055844864e-06, "loss": 2.0395, "step": 18577 }, { "epoch": 0.62, "grad_norm": 0.7534427642822266, "learning_rate": 6.564432839744494e-06, "loss": 2.0958, "step": 18578 }, { "epoch": 0.62, "grad_norm": 0.7390238046646118, "learning_rate": 6.563434662457149e-06, "loss": 2.0276, "step": 18579 }, { "epoch": 0.62, "grad_norm": 0.7491191029548645, "learning_rate": 6.562436523994105e-06, "loss": 2.0514, "step": 18580 }, { "epoch": 0.62, "grad_norm": 0.7336969971656799, "learning_rate": 6.561438424366634e-06, "loss": 2.0236, "step": 18581 }, { "epoch": 0.62, "grad_norm": 0.7265817523002625, "learning_rate": 6.560440363586025e-06, "loss": 2.0702, "step": 18582 }, { "epoch": 0.62, "grad_norm": 0.7386250495910645, "learning_rate": 6.559442341663538e-06, "loss": 2.0736, "step": 18583 }, { "epoch": 0.62, "grad_norm": 0.7195847034454346, "learning_rate": 6.558444358610457e-06, "loss": 2.0197, "step": 18584 }, { "epoch": 0.62, "grad_norm": 0.7293532490730286, "learning_rate": 6.557446414438053e-06, "loss": 2.1337, "step": 18585 }, { "epoch": 0.62, "grad_norm": 0.7479192018508911, "learning_rate": 6.556448509157607e-06, "loss": 2.0234, "step": 18586 }, { "epoch": 0.62, "grad_norm": 0.7440563440322876, "learning_rate": 6.555450642780383e-06, "loss": 2.0098, "step": 18587 }, { "epoch": 0.62, "grad_norm": 0.7220137715339661, "learning_rate": 6.55445281531766e-06, "loss": 1.9824, "step": 18588 }, { "epoch": 0.62, "grad_norm": 0.7574236392974854, "learning_rate": 6.553455026780709e-06, "loss": 2.0997, "step": 18589 }, { "epoch": 0.62, "grad_norm": 0.7628411650657654, "learning_rate": 6.552457277180804e-06, "loss": 1.9646, "step": 18590 }, { "epoch": 0.62, "grad_norm": 0.7317736148834229, "learning_rate": 6.551459566529218e-06, "loss": 2.0694, "step": 18591 }, { "epoch": 0.62, "grad_norm": 0.7322997450828552, "learning_rate": 6.550461894837219e-06, "loss": 2.0876, "step": 18592 }, { "epoch": 0.62, "grad_norm": 0.7850541472434998, "learning_rate": 6.54946426211608e-06, "loss": 2.1307, "step": 18593 }, { "epoch": 0.62, "grad_norm": 0.7619250416755676, "learning_rate": 6.548466668377072e-06, "loss": 2.0039, "step": 18594 }, { "epoch": 0.62, "grad_norm": 0.7589712738990784, "learning_rate": 6.547469113631466e-06, "loss": 2.0307, "step": 18595 }, { "epoch": 0.62, "grad_norm": 0.7439185976982117, "learning_rate": 6.546471597890529e-06, "loss": 2.0712, "step": 18596 }, { "epoch": 0.62, "grad_norm": 0.7332115769386292, "learning_rate": 6.54547412116553e-06, "loss": 2.0485, "step": 18597 }, { "epoch": 0.62, "grad_norm": 0.7299757599830627, "learning_rate": 6.544476683467742e-06, "loss": 2.1653, "step": 18598 }, { "epoch": 0.62, "grad_norm": 0.7329026460647583, "learning_rate": 6.543479284808429e-06, "loss": 1.9965, "step": 18599 }, { "epoch": 0.62, "grad_norm": 0.7692344784736633, "learning_rate": 6.542481925198867e-06, "loss": 2.1086, "step": 18600 }, { "epoch": 0.62, "grad_norm": 0.7431564331054688, "learning_rate": 6.541484604650314e-06, "loss": 2.0737, "step": 18601 }, { "epoch": 0.62, "grad_norm": 0.7647882699966431, "learning_rate": 6.540487323174041e-06, "loss": 2.1082, "step": 18602 }, { "epoch": 0.62, "grad_norm": 0.7459238171577454, "learning_rate": 6.539490080781312e-06, "loss": 2.1055, "step": 18603 }, { "epoch": 0.62, "grad_norm": 0.7486720085144043, "learning_rate": 6.538492877483405e-06, "loss": 2.1142, "step": 18604 }, { "epoch": 0.62, "grad_norm": 0.7434862852096558, "learning_rate": 6.537495713291569e-06, "loss": 2.09, "step": 18605 }, { "epoch": 0.62, "grad_norm": 0.7574500441551208, "learning_rate": 6.536498588217077e-06, "loss": 2.0853, "step": 18606 }, { "epoch": 0.62, "grad_norm": 0.7360900044441223, "learning_rate": 6.535501502271198e-06, "loss": 2.017, "step": 18607 }, { "epoch": 0.62, "grad_norm": 0.7576977610588074, "learning_rate": 6.534504455465189e-06, "loss": 2.0722, "step": 18608 }, { "epoch": 0.62, "grad_norm": 0.7284348607063293, "learning_rate": 6.533507447810322e-06, "loss": 1.9841, "step": 18609 }, { "epoch": 0.62, "grad_norm": 0.7146414518356323, "learning_rate": 6.532510479317854e-06, "loss": 2.0287, "step": 18610 }, { "epoch": 0.62, "grad_norm": 0.7438479661941528, "learning_rate": 6.531513549999048e-06, "loss": 2.0424, "step": 18611 }, { "epoch": 0.62, "grad_norm": 0.7490966320037842, "learning_rate": 6.530516659865171e-06, "loss": 2.0965, "step": 18612 }, { "epoch": 0.62, "grad_norm": 0.7352954149246216, "learning_rate": 6.529519808927487e-06, "loss": 2.1059, "step": 18613 }, { "epoch": 0.62, "grad_norm": 0.7383978366851807, "learning_rate": 6.528522997197251e-06, "loss": 2.01, "step": 18614 }, { "epoch": 0.62, "grad_norm": 0.7254314422607422, "learning_rate": 6.527526224685727e-06, "loss": 2.0492, "step": 18615 }, { "epoch": 0.62, "grad_norm": 0.7378753423690796, "learning_rate": 6.5265294914041775e-06, "loss": 2.043, "step": 18616 }, { "epoch": 0.62, "grad_norm": 0.7849466800689697, "learning_rate": 6.525532797363865e-06, "loss": 2.1109, "step": 18617 }, { "epoch": 0.62, "grad_norm": 0.7531061768531799, "learning_rate": 6.524536142576043e-06, "loss": 2.0784, "step": 18618 }, { "epoch": 0.62, "grad_norm": 0.7506963610649109, "learning_rate": 6.523539527051974e-06, "loss": 2.0142, "step": 18619 }, { "epoch": 0.62, "grad_norm": 0.7287465929985046, "learning_rate": 6.5225429508029194e-06, "loss": 2.1078, "step": 18620 }, { "epoch": 0.62, "grad_norm": 0.7326143383979797, "learning_rate": 6.521546413840137e-06, "loss": 2.0994, "step": 18621 }, { "epoch": 0.62, "grad_norm": 0.7535800337791443, "learning_rate": 6.520549916174888e-06, "loss": 1.9911, "step": 18622 }, { "epoch": 0.62, "grad_norm": 0.81272953748703, "learning_rate": 6.519553457818421e-06, "loss": 2.0249, "step": 18623 }, { "epoch": 0.62, "grad_norm": 0.7339419722557068, "learning_rate": 6.518557038782003e-06, "loss": 2.0275, "step": 18624 }, { "epoch": 0.62, "grad_norm": 0.8154991865158081, "learning_rate": 6.517560659076883e-06, "loss": 2.0438, "step": 18625 }, { "epoch": 0.62, "grad_norm": 0.7350296378135681, "learning_rate": 6.516564318714329e-06, "loss": 1.9909, "step": 18626 }, { "epoch": 0.62, "grad_norm": 0.7530209422111511, "learning_rate": 6.515568017705585e-06, "loss": 2.053, "step": 18627 }, { "epoch": 0.62, "grad_norm": 0.752149224281311, "learning_rate": 6.514571756061911e-06, "loss": 2.1196, "step": 18628 }, { "epoch": 0.62, "grad_norm": 0.7609082460403442, "learning_rate": 6.513575533794564e-06, "loss": 2.0659, "step": 18629 }, { "epoch": 0.62, "grad_norm": 0.7366597652435303, "learning_rate": 6.512579350914796e-06, "loss": 2.0386, "step": 18630 }, { "epoch": 0.62, "grad_norm": 0.7405533790588379, "learning_rate": 6.511583207433867e-06, "loss": 2.0984, "step": 18631 }, { "epoch": 0.62, "grad_norm": 0.7394817471504211, "learning_rate": 6.510587103363022e-06, "loss": 2.0218, "step": 18632 }, { "epoch": 0.62, "grad_norm": 0.7754571437835693, "learning_rate": 6.509591038713519e-06, "loss": 2.0161, "step": 18633 }, { "epoch": 0.62, "grad_norm": 0.7264158725738525, "learning_rate": 6.5085950134966114e-06, "loss": 2.0168, "step": 18634 }, { "epoch": 0.62, "grad_norm": 0.7486907839775085, "learning_rate": 6.507599027723554e-06, "loss": 2.0733, "step": 18635 }, { "epoch": 0.62, "grad_norm": 0.7322481870651245, "learning_rate": 6.506603081405593e-06, "loss": 2.0795, "step": 18636 }, { "epoch": 0.62, "grad_norm": 0.781262218952179, "learning_rate": 6.505607174553985e-06, "loss": 2.0985, "step": 18637 }, { "epoch": 0.62, "grad_norm": 0.7223289012908936, "learning_rate": 6.504611307179976e-06, "loss": 2.0166, "step": 18638 }, { "epoch": 0.62, "grad_norm": 0.7323281764984131, "learning_rate": 6.5036154792948215e-06, "loss": 2.0576, "step": 18639 }, { "epoch": 0.62, "grad_norm": 0.7427610754966736, "learning_rate": 6.502619690909775e-06, "loss": 2.1117, "step": 18640 }, { "epoch": 0.62, "grad_norm": 0.7252446413040161, "learning_rate": 6.501623942036075e-06, "loss": 2.0444, "step": 18641 }, { "epoch": 0.62, "grad_norm": 0.7693911790847778, "learning_rate": 6.50062823268498e-06, "loss": 2.0276, "step": 18642 }, { "epoch": 0.62, "grad_norm": 0.7529953122138977, "learning_rate": 6.499632562867736e-06, "loss": 2.0136, "step": 18643 }, { "epoch": 0.62, "grad_norm": 0.7839812636375427, "learning_rate": 6.498636932595594e-06, "loss": 1.9861, "step": 18644 }, { "epoch": 0.62, "grad_norm": 0.7708064317703247, "learning_rate": 6.497641341879799e-06, "loss": 2.0608, "step": 18645 }, { "epoch": 0.62, "grad_norm": 0.7637760639190674, "learning_rate": 6.496645790731595e-06, "loss": 2.0478, "step": 18646 }, { "epoch": 0.62, "grad_norm": 0.7299715280532837, "learning_rate": 6.495650279162237e-06, "loss": 2.0025, "step": 18647 }, { "epoch": 0.62, "grad_norm": 0.7631213665008545, "learning_rate": 6.494654807182974e-06, "loss": 2.0233, "step": 18648 }, { "epoch": 0.62, "grad_norm": 0.7270708084106445, "learning_rate": 6.493659374805039e-06, "loss": 2.0337, "step": 18649 }, { "epoch": 0.62, "grad_norm": 0.7305204272270203, "learning_rate": 6.492663982039687e-06, "loss": 2.0174, "step": 18650 }, { "epoch": 0.62, "grad_norm": 0.7252962589263916, "learning_rate": 6.4916686288981626e-06, "loss": 2.0308, "step": 18651 }, { "epoch": 0.62, "grad_norm": 0.7339796423912048, "learning_rate": 6.490673315391709e-06, "loss": 2.0419, "step": 18652 }, { "epoch": 0.62, "grad_norm": 0.7314265966415405, "learning_rate": 6.489678041531575e-06, "loss": 2.0869, "step": 18653 }, { "epoch": 0.62, "grad_norm": 0.7639754414558411, "learning_rate": 6.488682807328998e-06, "loss": 2.0728, "step": 18654 }, { "epoch": 0.62, "grad_norm": 0.7587751746177673, "learning_rate": 6.487687612795225e-06, "loss": 2.0064, "step": 18655 }, { "epoch": 0.62, "grad_norm": 0.7325262427330017, "learning_rate": 6.486692457941499e-06, "loss": 2.0074, "step": 18656 }, { "epoch": 0.62, "grad_norm": 0.7487002611160278, "learning_rate": 6.485697342779066e-06, "loss": 2.0582, "step": 18657 }, { "epoch": 0.62, "grad_norm": 0.7500911355018616, "learning_rate": 6.484702267319162e-06, "loss": 2.0243, "step": 18658 }, { "epoch": 0.62, "grad_norm": 0.736762285232544, "learning_rate": 6.483707231573031e-06, "loss": 2.0751, "step": 18659 }, { "epoch": 0.62, "grad_norm": 0.7285959720611572, "learning_rate": 6.482712235551913e-06, "loss": 2.0565, "step": 18660 }, { "epoch": 0.62, "grad_norm": 0.7394577264785767, "learning_rate": 6.481717279267055e-06, "loss": 2.0657, "step": 18661 }, { "epoch": 0.62, "grad_norm": 0.7303134202957153, "learning_rate": 6.480722362729692e-06, "loss": 2.0865, "step": 18662 }, { "epoch": 0.62, "grad_norm": 0.749218761920929, "learning_rate": 6.479727485951066e-06, "loss": 2.114, "step": 18663 }, { "epoch": 0.62, "grad_norm": 0.759756863117218, "learning_rate": 6.478732648942414e-06, "loss": 2.0115, "step": 18664 }, { "epoch": 0.62, "grad_norm": 0.7273774147033691, "learning_rate": 6.477737851714978e-06, "loss": 2.0354, "step": 18665 }, { "epoch": 0.62, "grad_norm": 0.7539119720458984, "learning_rate": 6.476743094279996e-06, "loss": 2.0247, "step": 18666 }, { "epoch": 0.62, "grad_norm": 0.7333539128303528, "learning_rate": 6.475748376648705e-06, "loss": 2.0584, "step": 18667 }, { "epoch": 0.62, "grad_norm": 0.7649315595626831, "learning_rate": 6.474753698832341e-06, "loss": 2.0007, "step": 18668 }, { "epoch": 0.62, "grad_norm": 0.7428023219108582, "learning_rate": 6.473759060842146e-06, "loss": 2.0572, "step": 18669 }, { "epoch": 0.62, "grad_norm": 0.7401738166809082, "learning_rate": 6.472764462689354e-06, "loss": 2.0553, "step": 18670 }, { "epoch": 0.62, "grad_norm": 0.7846989035606384, "learning_rate": 6.471769904385205e-06, "loss": 2.059, "step": 18671 }, { "epoch": 0.62, "grad_norm": 0.7468416094779968, "learning_rate": 6.470775385940928e-06, "loss": 2.0536, "step": 18672 }, { "epoch": 0.62, "grad_norm": 0.734138548374176, "learning_rate": 6.4697809073677645e-06, "loss": 2.0694, "step": 18673 }, { "epoch": 0.62, "grad_norm": 0.7326032519340515, "learning_rate": 6.468786468676944e-06, "loss": 2.0581, "step": 18674 }, { "epoch": 0.62, "grad_norm": 0.7980799674987793, "learning_rate": 6.4677920698797115e-06, "loss": 2.0984, "step": 18675 }, { "epoch": 0.62, "grad_norm": 0.7337396740913391, "learning_rate": 6.466797710987287e-06, "loss": 2.0581, "step": 18676 }, { "epoch": 0.62, "grad_norm": 0.7542121410369873, "learning_rate": 6.4658033920109145e-06, "loss": 2.0279, "step": 18677 }, { "epoch": 0.62, "grad_norm": 0.7093291878700256, "learning_rate": 6.464809112961823e-06, "loss": 2.0322, "step": 18678 }, { "epoch": 0.62, "grad_norm": 0.7377110719680786, "learning_rate": 6.4638148738512485e-06, "loss": 2.1181, "step": 18679 }, { "epoch": 0.62, "grad_norm": 0.7264137268066406, "learning_rate": 6.4628206746904195e-06, "loss": 2.0391, "step": 18680 }, { "epoch": 0.62, "grad_norm": 0.7288026213645935, "learning_rate": 6.46182651549057e-06, "loss": 1.9488, "step": 18681 }, { "epoch": 0.62, "grad_norm": 0.7632126212120056, "learning_rate": 6.460832396262929e-06, "loss": 2.0333, "step": 18682 }, { "epoch": 0.62, "grad_norm": 0.707691490650177, "learning_rate": 6.45983831701873e-06, "loss": 2.0451, "step": 18683 }, { "epoch": 0.62, "grad_norm": 0.7263169884681702, "learning_rate": 6.458844277769205e-06, "loss": 2.0973, "step": 18684 }, { "epoch": 0.62, "grad_norm": 0.8025210499763489, "learning_rate": 6.457850278525581e-06, "loss": 2.1304, "step": 18685 }, { "epoch": 0.62, "grad_norm": 0.7579771280288696, "learning_rate": 6.456856319299089e-06, "loss": 2.0455, "step": 18686 }, { "epoch": 0.62, "grad_norm": 0.7365447282791138, "learning_rate": 6.4558624001009565e-06, "loss": 2.0863, "step": 18687 }, { "epoch": 0.62, "grad_norm": 0.7418259382247925, "learning_rate": 6.4548685209424164e-06, "loss": 2.1442, "step": 18688 }, { "epoch": 0.62, "grad_norm": 0.7231990694999695, "learning_rate": 6.453874681834692e-06, "loss": 2.0924, "step": 18689 }, { "epoch": 0.62, "grad_norm": 0.7420551180839539, "learning_rate": 6.452880882789011e-06, "loss": 2.0362, "step": 18690 }, { "epoch": 0.62, "grad_norm": 0.7739832401275635, "learning_rate": 6.451887123816605e-06, "loss": 2.1134, "step": 18691 }, { "epoch": 0.62, "grad_norm": 0.7566604018211365, "learning_rate": 6.450893404928699e-06, "loss": 2.0859, "step": 18692 }, { "epoch": 0.62, "grad_norm": 0.7582191228866577, "learning_rate": 6.449899726136521e-06, "loss": 2.0834, "step": 18693 }, { "epoch": 0.62, "grad_norm": 0.7484259605407715, "learning_rate": 6.448906087451294e-06, "loss": 2.0583, "step": 18694 }, { "epoch": 0.62, "grad_norm": 0.739743173122406, "learning_rate": 6.447912488884245e-06, "loss": 2.0309, "step": 18695 }, { "epoch": 0.62, "grad_norm": 0.7430228590965271, "learning_rate": 6.446918930446596e-06, "loss": 2.0861, "step": 18696 }, { "epoch": 0.62, "grad_norm": 0.7674102187156677, "learning_rate": 6.445925412149581e-06, "loss": 2.0436, "step": 18697 }, { "epoch": 0.62, "grad_norm": 0.7373661398887634, "learning_rate": 6.444931934004412e-06, "loss": 2.0017, "step": 18698 }, { "epoch": 0.62, "grad_norm": 0.8076265454292297, "learning_rate": 6.44393849602232e-06, "loss": 2.0765, "step": 18699 }, { "epoch": 0.62, "grad_norm": 0.7347669005393982, "learning_rate": 6.442945098214528e-06, "loss": 2.0545, "step": 18700 }, { "epoch": 0.62, "grad_norm": 0.7427253723144531, "learning_rate": 6.441951740592256e-06, "loss": 2.0403, "step": 18701 }, { "epoch": 0.62, "grad_norm": 0.7446337342262268, "learning_rate": 6.440958423166732e-06, "loss": 2.017, "step": 18702 }, { "epoch": 0.62, "grad_norm": 0.7308014035224915, "learning_rate": 6.439965145949171e-06, "loss": 1.9847, "step": 18703 }, { "epoch": 0.62, "grad_norm": 0.7425352931022644, "learning_rate": 6.438971908950795e-06, "loss": 2.0106, "step": 18704 }, { "epoch": 0.62, "grad_norm": 0.7417636513710022, "learning_rate": 6.4379787121828286e-06, "loss": 2.1202, "step": 18705 }, { "epoch": 0.62, "grad_norm": 0.7295835018157959, "learning_rate": 6.436985555656495e-06, "loss": 2.0216, "step": 18706 }, { "epoch": 0.62, "grad_norm": 0.7420196533203125, "learning_rate": 6.435992439383007e-06, "loss": 2.0603, "step": 18707 }, { "epoch": 0.62, "grad_norm": 0.7313871383666992, "learning_rate": 6.434999363373589e-06, "loss": 2.0297, "step": 18708 }, { "epoch": 0.62, "grad_norm": 0.7531962990760803, "learning_rate": 6.4340063276394585e-06, "loss": 2.1336, "step": 18709 }, { "epoch": 0.62, "grad_norm": 0.7855445146560669, "learning_rate": 6.433013332191837e-06, "loss": 2.0704, "step": 18710 }, { "epoch": 0.62, "grad_norm": 0.732207715511322, "learning_rate": 6.432020377041937e-06, "loss": 2.0647, "step": 18711 }, { "epoch": 0.62, "grad_norm": 0.7460048794746399, "learning_rate": 6.431027462200979e-06, "loss": 2.035, "step": 18712 }, { "epoch": 0.62, "grad_norm": 0.7749593257904053, "learning_rate": 6.430034587680183e-06, "loss": 2.0897, "step": 18713 }, { "epoch": 0.62, "grad_norm": 0.7406306862831116, "learning_rate": 6.429041753490763e-06, "loss": 2.0204, "step": 18714 }, { "epoch": 0.62, "grad_norm": 0.7183247804641724, "learning_rate": 6.42804895964394e-06, "loss": 2.0585, "step": 18715 }, { "epoch": 0.62, "grad_norm": 0.7433620691299438, "learning_rate": 6.427056206150923e-06, "loss": 2.0462, "step": 18716 }, { "epoch": 0.62, "grad_norm": 0.7360041737556458, "learning_rate": 6.4260634930229315e-06, "loss": 2.1453, "step": 18717 }, { "epoch": 0.62, "grad_norm": 0.7492828965187073, "learning_rate": 6.425070820271178e-06, "loss": 2.0092, "step": 18718 }, { "epoch": 0.62, "grad_norm": 0.7267391681671143, "learning_rate": 6.424078187906886e-06, "loss": 2.0884, "step": 18719 }, { "epoch": 0.62, "grad_norm": 0.7435503005981445, "learning_rate": 6.423085595941256e-06, "loss": 2.0289, "step": 18720 }, { "epoch": 0.62, "grad_norm": 0.7530199289321899, "learning_rate": 6.4220930443855114e-06, "loss": 2.0705, "step": 18721 }, { "epoch": 0.62, "grad_norm": 0.7416803240776062, "learning_rate": 6.421100533250864e-06, "loss": 2.0462, "step": 18722 }, { "epoch": 0.62, "grad_norm": 0.7259017825126648, "learning_rate": 6.420108062548522e-06, "loss": 2.0659, "step": 18723 }, { "epoch": 0.62, "grad_norm": 0.743595540523529, "learning_rate": 6.419115632289706e-06, "loss": 1.9893, "step": 18724 }, { "epoch": 0.62, "grad_norm": 0.7349228262901306, "learning_rate": 6.418123242485619e-06, "loss": 2.0662, "step": 18725 }, { "epoch": 0.62, "grad_norm": 0.7312232851982117, "learning_rate": 6.417130893147476e-06, "loss": 2.0, "step": 18726 }, { "epoch": 0.62, "grad_norm": 0.744740903377533, "learning_rate": 6.416138584286489e-06, "loss": 2.074, "step": 18727 }, { "epoch": 0.62, "grad_norm": 0.7381023168563843, "learning_rate": 6.415146315913872e-06, "loss": 2.0204, "step": 18728 }, { "epoch": 0.62, "grad_norm": 0.7310817241668701, "learning_rate": 6.414154088040827e-06, "loss": 2.1035, "step": 18729 }, { "epoch": 0.62, "grad_norm": 0.730907142162323, "learning_rate": 6.413161900678568e-06, "loss": 2.0759, "step": 18730 }, { "epoch": 0.62, "grad_norm": 0.7264283299446106, "learning_rate": 6.412169753838304e-06, "loss": 2.0223, "step": 18731 }, { "epoch": 0.62, "grad_norm": 0.7308387756347656, "learning_rate": 6.411177647531246e-06, "loss": 2.0935, "step": 18732 }, { "epoch": 0.62, "grad_norm": 0.7343766093254089, "learning_rate": 6.410185581768596e-06, "loss": 2.0389, "step": 18733 }, { "epoch": 0.62, "grad_norm": 0.7109951972961426, "learning_rate": 6.409193556561566e-06, "loss": 1.964, "step": 18734 }, { "epoch": 0.62, "grad_norm": 0.7619087100028992, "learning_rate": 6.4082015719213616e-06, "loss": 2.0308, "step": 18735 }, { "epoch": 0.62, "grad_norm": 0.7385216951370239, "learning_rate": 6.407209627859194e-06, "loss": 2.045, "step": 18736 }, { "epoch": 0.62, "grad_norm": 0.7368202209472656, "learning_rate": 6.406217724386267e-06, "loss": 2.0764, "step": 18737 }, { "epoch": 0.62, "grad_norm": 0.7617353796958923, "learning_rate": 6.405225861513784e-06, "loss": 2.0652, "step": 18738 }, { "epoch": 0.62, "grad_norm": 0.7459923624992371, "learning_rate": 6.404234039252952e-06, "loss": 2.0529, "step": 18739 }, { "epoch": 0.62, "grad_norm": 0.7317358255386353, "learning_rate": 6.403242257614974e-06, "loss": 2.0328, "step": 18740 }, { "epoch": 0.62, "grad_norm": 0.7320467233657837, "learning_rate": 6.402250516611066e-06, "loss": 2.0343, "step": 18741 }, { "epoch": 0.62, "grad_norm": 0.7482047080993652, "learning_rate": 6.401258816252415e-06, "loss": 2.0868, "step": 18742 }, { "epoch": 0.62, "grad_norm": 0.7384339570999146, "learning_rate": 6.400267156550235e-06, "loss": 2.0375, "step": 18743 }, { "epoch": 0.62, "grad_norm": 0.7386241555213928, "learning_rate": 6.3992755375157266e-06, "loss": 2.0985, "step": 18744 }, { "epoch": 0.62, "grad_norm": 0.7524778246879578, "learning_rate": 6.398283959160092e-06, "loss": 2.1486, "step": 18745 }, { "epoch": 0.62, "grad_norm": 0.749251127243042, "learning_rate": 6.3972924214945386e-06, "loss": 2.0073, "step": 18746 }, { "epoch": 0.62, "grad_norm": 0.7471858859062195, "learning_rate": 6.39630092453026e-06, "loss": 2.0451, "step": 18747 }, { "epoch": 0.62, "grad_norm": 0.7485433220863342, "learning_rate": 6.395309468278463e-06, "loss": 2.1109, "step": 18748 }, { "epoch": 0.62, "grad_norm": 0.737152099609375, "learning_rate": 6.3943180527503464e-06, "loss": 1.983, "step": 18749 }, { "epoch": 0.62, "grad_norm": 0.7454278469085693, "learning_rate": 6.3933266779571146e-06, "loss": 2.108, "step": 18750 }, { "epoch": 0.62, "grad_norm": 0.7259944081306458, "learning_rate": 6.392335343909961e-06, "loss": 1.9763, "step": 18751 }, { "epoch": 0.62, "grad_norm": 0.7083936333656311, "learning_rate": 6.3913440506200896e-06, "loss": 2.0154, "step": 18752 }, { "epoch": 0.62, "grad_norm": 0.7520551085472107, "learning_rate": 6.390352798098695e-06, "loss": 2.0173, "step": 18753 }, { "epoch": 0.62, "grad_norm": 0.7846324443817139, "learning_rate": 6.3893615863569816e-06, "loss": 2.0291, "step": 18754 }, { "epoch": 0.62, "grad_norm": 0.7679775357246399, "learning_rate": 6.388370415406147e-06, "loss": 2.0628, "step": 18755 }, { "epoch": 0.62, "grad_norm": 0.7163739204406738, "learning_rate": 6.387379285257385e-06, "loss": 2.044, "step": 18756 }, { "epoch": 0.62, "grad_norm": 0.7470616698265076, "learning_rate": 6.386388195921893e-06, "loss": 2.0238, "step": 18757 }, { "epoch": 0.62, "grad_norm": 0.7423052787780762, "learning_rate": 6.385397147410873e-06, "loss": 2.0296, "step": 18758 }, { "epoch": 0.62, "grad_norm": 0.7891109585762024, "learning_rate": 6.3844061397355166e-06, "loss": 2.0465, "step": 18759 }, { "epoch": 0.62, "grad_norm": 0.7684121131896973, "learning_rate": 6.383415172907019e-06, "loss": 2.0532, "step": 18760 }, { "epoch": 0.62, "grad_norm": 0.7775060534477234, "learning_rate": 6.382424246936576e-06, "loss": 2.0173, "step": 18761 }, { "epoch": 0.62, "grad_norm": 0.7461726069450378, "learning_rate": 6.381433361835385e-06, "loss": 2.0323, "step": 18762 }, { "epoch": 0.62, "grad_norm": 0.7559880018234253, "learning_rate": 6.380442517614643e-06, "loss": 2.0817, "step": 18763 }, { "epoch": 0.62, "grad_norm": 0.7563473582267761, "learning_rate": 6.379451714285536e-06, "loss": 2.0937, "step": 18764 }, { "epoch": 0.62, "grad_norm": 0.7257256507873535, "learning_rate": 6.378460951859262e-06, "loss": 2.0808, "step": 18765 }, { "epoch": 0.62, "grad_norm": 0.7032012343406677, "learning_rate": 6.3774702303470134e-06, "loss": 2.0326, "step": 18766 }, { "epoch": 0.62, "grad_norm": 0.7205162048339844, "learning_rate": 6.376479549759983e-06, "loss": 2.0323, "step": 18767 }, { "epoch": 0.62, "grad_norm": 0.7723408937454224, "learning_rate": 6.375488910109369e-06, "loss": 2.0689, "step": 18768 }, { "epoch": 0.62, "grad_norm": 0.7330254912376404, "learning_rate": 6.37449831140635e-06, "loss": 2.0783, "step": 18769 }, { "epoch": 0.62, "grad_norm": 0.755257785320282, "learning_rate": 6.373507753662126e-06, "loss": 2.0329, "step": 18770 }, { "epoch": 0.62, "grad_norm": 0.7577824592590332, "learning_rate": 6.372517236887886e-06, "loss": 2.1095, "step": 18771 }, { "epoch": 0.62, "grad_norm": 0.7358545660972595, "learning_rate": 6.3715267610948235e-06, "loss": 2.0193, "step": 18772 }, { "epoch": 0.62, "grad_norm": 0.7600029706954956, "learning_rate": 6.370536326294122e-06, "loss": 2.052, "step": 18773 }, { "epoch": 0.62, "grad_norm": 0.7242233753204346, "learning_rate": 6.3695459324969745e-06, "loss": 2.0666, "step": 18774 }, { "epoch": 0.62, "grad_norm": 0.7165502309799194, "learning_rate": 6.368555579714568e-06, "loss": 2.012, "step": 18775 }, { "epoch": 0.62, "grad_norm": 0.7487038969993591, "learning_rate": 6.3675652679580935e-06, "loss": 2.0192, "step": 18776 }, { "epoch": 0.62, "grad_norm": 0.76726895570755, "learning_rate": 6.3665749972387405e-06, "loss": 2.0843, "step": 18777 }, { "epoch": 0.62, "grad_norm": 0.7534230351448059, "learning_rate": 6.365584767567692e-06, "loss": 2.0355, "step": 18778 }, { "epoch": 0.62, "grad_norm": 0.7393602132797241, "learning_rate": 6.3645945789561355e-06, "loss": 2.0991, "step": 18779 }, { "epoch": 0.62, "grad_norm": 0.7179310321807861, "learning_rate": 6.3636044314152605e-06, "loss": 2.0137, "step": 18780 }, { "epoch": 0.62, "grad_norm": 0.7253159284591675, "learning_rate": 6.362614324956251e-06, "loss": 2.0459, "step": 18781 }, { "epoch": 0.62, "grad_norm": 0.7539914846420288, "learning_rate": 6.361624259590293e-06, "loss": 2.1153, "step": 18782 }, { "epoch": 0.62, "grad_norm": 0.7424477934837341, "learning_rate": 6.36063423532857e-06, "loss": 2.0425, "step": 18783 }, { "epoch": 0.62, "grad_norm": 0.7200141549110413, "learning_rate": 6.35964425218227e-06, "loss": 2.0172, "step": 18784 }, { "epoch": 0.62, "grad_norm": 0.7303518652915955, "learning_rate": 6.3586543101625755e-06, "loss": 2.0619, "step": 18785 }, { "epoch": 0.63, "grad_norm": 0.7125300765037537, "learning_rate": 6.357664409280673e-06, "loss": 1.9523, "step": 18786 }, { "epoch": 0.63, "grad_norm": 0.758920431137085, "learning_rate": 6.356674549547741e-06, "loss": 2.0219, "step": 18787 }, { "epoch": 0.63, "grad_norm": 0.7676115036010742, "learning_rate": 6.355684730974965e-06, "loss": 2.1011, "step": 18788 }, { "epoch": 0.63, "grad_norm": 0.7601562738418579, "learning_rate": 6.354694953573526e-06, "loss": 2.0497, "step": 18789 }, { "epoch": 0.63, "grad_norm": 0.7763060927391052, "learning_rate": 6.3537052173546125e-06, "loss": 2.0567, "step": 18790 }, { "epoch": 0.63, "grad_norm": 0.7505967020988464, "learning_rate": 6.352715522329394e-06, "loss": 2.1275, "step": 18791 }, { "epoch": 0.63, "grad_norm": 0.7501811981201172, "learning_rate": 6.35172586850906e-06, "loss": 2.0197, "step": 18792 }, { "epoch": 0.63, "grad_norm": 0.7911760807037354, "learning_rate": 6.3507362559047895e-06, "loss": 2.1185, "step": 18793 }, { "epoch": 0.63, "grad_norm": 0.7416780591011047, "learning_rate": 6.349746684527763e-06, "loss": 2.0292, "step": 18794 }, { "epoch": 0.63, "grad_norm": 0.7451139092445374, "learning_rate": 6.348757154389158e-06, "loss": 2.0702, "step": 18795 }, { "epoch": 0.63, "grad_norm": 0.7680156826972961, "learning_rate": 6.3477676655001555e-06, "loss": 2.0738, "step": 18796 }, { "epoch": 0.63, "grad_norm": 0.7559218406677246, "learning_rate": 6.346778217871931e-06, "loss": 2.0396, "step": 18797 }, { "epoch": 0.63, "grad_norm": 0.7054615020751953, "learning_rate": 6.345788811515667e-06, "loss": 1.9979, "step": 18798 }, { "epoch": 0.63, "grad_norm": 0.7205617427825928, "learning_rate": 6.344799446442542e-06, "loss": 2.0038, "step": 18799 }, { "epoch": 0.63, "grad_norm": 0.7144215703010559, "learning_rate": 6.343810122663727e-06, "loss": 2.066, "step": 18800 }, { "epoch": 0.63, "grad_norm": 0.7509363889694214, "learning_rate": 6.342820840190404e-06, "loss": 2.0233, "step": 18801 }, { "epoch": 0.63, "grad_norm": 0.7284533381462097, "learning_rate": 6.3418315990337474e-06, "loss": 2.0219, "step": 18802 }, { "epoch": 0.63, "grad_norm": 0.7296149730682373, "learning_rate": 6.340842399204936e-06, "loss": 2.0583, "step": 18803 }, { "epoch": 0.63, "grad_norm": 0.7395448684692383, "learning_rate": 6.339853240715141e-06, "loss": 2.0851, "step": 18804 }, { "epoch": 0.63, "grad_norm": 0.7388550043106079, "learning_rate": 6.338864123575537e-06, "loss": 2.0942, "step": 18805 }, { "epoch": 0.63, "grad_norm": 0.7206251621246338, "learning_rate": 6.337875047797302e-06, "loss": 2.0826, "step": 18806 }, { "epoch": 0.63, "grad_norm": 0.7091691493988037, "learning_rate": 6.336886013391607e-06, "loss": 2.0247, "step": 18807 }, { "epoch": 0.63, "grad_norm": 0.7675430178642273, "learning_rate": 6.33589702036963e-06, "loss": 2.041, "step": 18808 }, { "epoch": 0.63, "grad_norm": 0.7665770053863525, "learning_rate": 6.334908068742541e-06, "loss": 2.0008, "step": 18809 }, { "epoch": 0.63, "grad_norm": 0.7505615949630737, "learning_rate": 6.333919158521511e-06, "loss": 2.0334, "step": 18810 }, { "epoch": 0.63, "grad_norm": 0.7185102701187134, "learning_rate": 6.332930289717711e-06, "loss": 2.0753, "step": 18811 }, { "epoch": 0.63, "grad_norm": 0.7296304106712341, "learning_rate": 6.3319414623423234e-06, "loss": 2.0587, "step": 18812 }, { "epoch": 0.63, "grad_norm": 0.7443856596946716, "learning_rate": 6.330952676406503e-06, "loss": 2.0348, "step": 18813 }, { "epoch": 0.63, "grad_norm": 0.7643793821334839, "learning_rate": 6.329963931921433e-06, "loss": 2.0405, "step": 18814 }, { "epoch": 0.63, "grad_norm": 0.8094752430915833, "learning_rate": 6.3289752288982795e-06, "loss": 2.1057, "step": 18815 }, { "epoch": 0.63, "grad_norm": 0.7543708086013794, "learning_rate": 6.327986567348212e-06, "loss": 2.0209, "step": 18816 }, { "epoch": 0.63, "grad_norm": 0.7493652105331421, "learning_rate": 6.3269979472824025e-06, "loss": 1.9915, "step": 18817 }, { "epoch": 0.63, "grad_norm": 0.7479726672172546, "learning_rate": 6.326009368712016e-06, "loss": 2.0059, "step": 18818 }, { "epoch": 0.63, "grad_norm": 0.7446410059928894, "learning_rate": 6.3250208316482185e-06, "loss": 2.0518, "step": 18819 }, { "epoch": 0.63, "grad_norm": 0.7375264763832092, "learning_rate": 6.324032336102186e-06, "loss": 2.0219, "step": 18820 }, { "epoch": 0.63, "grad_norm": 0.7324852347373962, "learning_rate": 6.323043882085084e-06, "loss": 2.0326, "step": 18821 }, { "epoch": 0.63, "grad_norm": 0.752420961856842, "learning_rate": 6.322055469608074e-06, "loss": 2.0192, "step": 18822 }, { "epoch": 0.63, "grad_norm": 0.7476141452789307, "learning_rate": 6.321067098682327e-06, "loss": 2.0403, "step": 18823 }, { "epoch": 0.63, "grad_norm": 0.773668110370636, "learning_rate": 6.320078769319007e-06, "loss": 2.0494, "step": 18824 }, { "epoch": 0.63, "grad_norm": 0.7394488453865051, "learning_rate": 6.319090481529282e-06, "loss": 2.0447, "step": 18825 }, { "epoch": 0.63, "grad_norm": 0.7454477548599243, "learning_rate": 6.318102235324315e-06, "loss": 2.038, "step": 18826 }, { "epoch": 0.63, "grad_norm": 0.7297065258026123, "learning_rate": 6.317114030715268e-06, "loss": 2.0683, "step": 18827 }, { "epoch": 0.63, "grad_norm": 0.7349576354026794, "learning_rate": 6.31612586771331e-06, "loss": 1.9774, "step": 18828 }, { "epoch": 0.63, "grad_norm": 0.7506108283996582, "learning_rate": 6.315137746329603e-06, "loss": 2.1085, "step": 18829 }, { "epoch": 0.63, "grad_norm": 0.7347702980041504, "learning_rate": 6.314149666575313e-06, "loss": 2.0122, "step": 18830 }, { "epoch": 0.63, "grad_norm": 0.743095874786377, "learning_rate": 6.3131616284615975e-06, "loss": 2.056, "step": 18831 }, { "epoch": 0.63, "grad_norm": 0.7406293749809265, "learning_rate": 6.312173631999621e-06, "loss": 2.0326, "step": 18832 }, { "epoch": 0.63, "grad_norm": 0.7895215749740601, "learning_rate": 6.311185677200544e-06, "loss": 1.9859, "step": 18833 }, { "epoch": 0.63, "grad_norm": 0.7579501867294312, "learning_rate": 6.310197764075536e-06, "loss": 2.0292, "step": 18834 }, { "epoch": 0.63, "grad_norm": 0.758316159248352, "learning_rate": 6.3092098926357435e-06, "loss": 2.0597, "step": 18835 }, { "epoch": 0.63, "grad_norm": 0.7564467191696167, "learning_rate": 6.308222062892337e-06, "loss": 2.0556, "step": 18836 }, { "epoch": 0.63, "grad_norm": 0.7399389743804932, "learning_rate": 6.307234274856476e-06, "loss": 2.0491, "step": 18837 }, { "epoch": 0.63, "grad_norm": 0.7551249861717224, "learning_rate": 6.3062465285393144e-06, "loss": 2.0523, "step": 18838 }, { "epoch": 0.63, "grad_norm": 0.7298482656478882, "learning_rate": 6.305258823952021e-06, "loss": 1.9853, "step": 18839 }, { "epoch": 0.63, "grad_norm": 0.7758609056472778, "learning_rate": 6.304271161105744e-06, "loss": 2.0415, "step": 18840 }, { "epoch": 0.63, "grad_norm": 0.7690340876579285, "learning_rate": 6.3032835400116444e-06, "loss": 2.0893, "step": 18841 }, { "epoch": 0.63, "grad_norm": 0.7569836974143982, "learning_rate": 6.302295960680882e-06, "loss": 2.1109, "step": 18842 }, { "epoch": 0.63, "grad_norm": 0.7443965673446655, "learning_rate": 6.3013084231246145e-06, "loss": 2.0124, "step": 18843 }, { "epoch": 0.63, "grad_norm": 0.7283071875572205, "learning_rate": 6.300320927353995e-06, "loss": 2.032, "step": 18844 }, { "epoch": 0.63, "grad_norm": 0.7403337955474854, "learning_rate": 6.299333473380181e-06, "loss": 2.0412, "step": 18845 }, { "epoch": 0.63, "grad_norm": 0.7507011294364929, "learning_rate": 6.298346061214328e-06, "loss": 1.9751, "step": 18846 }, { "epoch": 0.63, "grad_norm": 0.7596787214279175, "learning_rate": 6.297358690867592e-06, "loss": 2.0802, "step": 18847 }, { "epoch": 0.63, "grad_norm": 0.7611180543899536, "learning_rate": 6.296371362351131e-06, "loss": 2.0374, "step": 18848 }, { "epoch": 0.63, "grad_norm": 0.797439694404602, "learning_rate": 6.295384075676094e-06, "loss": 2.0166, "step": 18849 }, { "epoch": 0.63, "grad_norm": 0.7259639501571655, "learning_rate": 6.2943968308536355e-06, "loss": 2.075, "step": 18850 }, { "epoch": 0.63, "grad_norm": 0.7475815415382385, "learning_rate": 6.293409627894911e-06, "loss": 2.0614, "step": 18851 }, { "epoch": 0.63, "grad_norm": 0.782560408115387, "learning_rate": 6.292422466811072e-06, "loss": 2.0379, "step": 18852 }, { "epoch": 0.63, "grad_norm": 0.7449297904968262, "learning_rate": 6.291435347613271e-06, "loss": 2.086, "step": 18853 }, { "epoch": 0.63, "grad_norm": 0.7293553948402405, "learning_rate": 6.290448270312659e-06, "loss": 2.0851, "step": 18854 }, { "epoch": 0.63, "grad_norm": 0.7401352524757385, "learning_rate": 6.289461234920389e-06, "loss": 1.9794, "step": 18855 }, { "epoch": 0.63, "grad_norm": 0.7567707896232605, "learning_rate": 6.288474241447613e-06, "loss": 2.0699, "step": 18856 }, { "epoch": 0.63, "grad_norm": 0.7312875986099243, "learning_rate": 6.2874872899054785e-06, "loss": 2.0306, "step": 18857 }, { "epoch": 0.63, "grad_norm": 0.7319051027297974, "learning_rate": 6.286500380305136e-06, "loss": 2.0655, "step": 18858 }, { "epoch": 0.63, "grad_norm": 0.7353688478469849, "learning_rate": 6.285513512657737e-06, "loss": 2.0912, "step": 18859 }, { "epoch": 0.63, "grad_norm": 0.7498114705085754, "learning_rate": 6.284526686974427e-06, "loss": 2.0623, "step": 18860 }, { "epoch": 0.63, "grad_norm": 0.7116929888725281, "learning_rate": 6.2835399032663636e-06, "loss": 2.0806, "step": 18861 }, { "epoch": 0.63, "grad_norm": 0.7725409865379333, "learning_rate": 6.2825531615446824e-06, "loss": 2.0085, "step": 18862 }, { "epoch": 0.63, "grad_norm": 0.7499915361404419, "learning_rate": 6.281566461820537e-06, "loss": 2.0156, "step": 18863 }, { "epoch": 0.63, "grad_norm": 0.7374317049980164, "learning_rate": 6.280579804105076e-06, "loss": 1.9742, "step": 18864 }, { "epoch": 0.63, "grad_norm": 0.7191151976585388, "learning_rate": 6.2795931884094475e-06, "loss": 2.1051, "step": 18865 }, { "epoch": 0.63, "grad_norm": 0.746450662612915, "learning_rate": 6.278606614744791e-06, "loss": 2.0737, "step": 18866 }, { "epoch": 0.63, "grad_norm": 0.7371501326560974, "learning_rate": 6.2776200831222564e-06, "loss": 2.013, "step": 18867 }, { "epoch": 0.63, "grad_norm": 0.7405148148536682, "learning_rate": 6.2766335935529875e-06, "loss": 2.0381, "step": 18868 }, { "epoch": 0.63, "grad_norm": 0.73369300365448, "learning_rate": 6.275647146048132e-06, "loss": 2.0712, "step": 18869 }, { "epoch": 0.63, "grad_norm": 0.7255458235740662, "learning_rate": 6.2746607406188335e-06, "loss": 2.0127, "step": 18870 }, { "epoch": 0.63, "grad_norm": 0.7474572062492371, "learning_rate": 6.273674377276233e-06, "loss": 2.0622, "step": 18871 }, { "epoch": 0.63, "grad_norm": 0.7415841817855835, "learning_rate": 6.272688056031475e-06, "loss": 2.0033, "step": 18872 }, { "epoch": 0.63, "grad_norm": 0.7845476269721985, "learning_rate": 6.271701776895704e-06, "loss": 2.1478, "step": 18873 }, { "epoch": 0.63, "grad_norm": 0.7431630492210388, "learning_rate": 6.270715539880063e-06, "loss": 2.0553, "step": 18874 }, { "epoch": 0.63, "grad_norm": 0.7332531809806824, "learning_rate": 6.26972934499569e-06, "loss": 2.0915, "step": 18875 }, { "epoch": 0.63, "grad_norm": 0.7215174436569214, "learning_rate": 6.268743192253726e-06, "loss": 2.0074, "step": 18876 }, { "epoch": 0.63, "grad_norm": 0.7457708716392517, "learning_rate": 6.267757081665318e-06, "loss": 2.0066, "step": 18877 }, { "epoch": 0.63, "grad_norm": 0.7775422930717468, "learning_rate": 6.266771013241605e-06, "loss": 2.111, "step": 18878 }, { "epoch": 0.63, "grad_norm": 0.7220528721809387, "learning_rate": 6.265784986993723e-06, "loss": 2.0063, "step": 18879 }, { "epoch": 0.63, "grad_norm": 0.7515190243721008, "learning_rate": 6.264799002932813e-06, "loss": 2.059, "step": 18880 }, { "epoch": 0.63, "grad_norm": 0.7343034148216248, "learning_rate": 6.2638130610700165e-06, "loss": 2.0313, "step": 18881 }, { "epoch": 0.63, "grad_norm": 0.7566269636154175, "learning_rate": 6.262827161416467e-06, "loss": 2.0743, "step": 18882 }, { "epoch": 0.63, "grad_norm": 0.7605767250061035, "learning_rate": 6.261841303983312e-06, "loss": 2.0509, "step": 18883 }, { "epoch": 0.63, "grad_norm": 0.75034499168396, "learning_rate": 6.2608554887816775e-06, "loss": 2.0545, "step": 18884 }, { "epoch": 0.63, "grad_norm": 0.7680898308753967, "learning_rate": 6.259869715822708e-06, "loss": 2.0814, "step": 18885 }, { "epoch": 0.63, "grad_norm": 0.7437170147895813, "learning_rate": 6.258883985117539e-06, "loss": 2.1077, "step": 18886 }, { "epoch": 0.63, "grad_norm": 0.7450584769248962, "learning_rate": 6.257898296677309e-06, "loss": 2.0673, "step": 18887 }, { "epoch": 0.63, "grad_norm": 0.7381688356399536, "learning_rate": 6.256912650513147e-06, "loss": 2.0077, "step": 18888 }, { "epoch": 0.63, "grad_norm": 0.7569523453712463, "learning_rate": 6.255927046636194e-06, "loss": 2.0891, "step": 18889 }, { "epoch": 0.63, "grad_norm": 0.7439249157905579, "learning_rate": 6.254941485057579e-06, "loss": 2.1107, "step": 18890 }, { "epoch": 0.63, "grad_norm": 0.7431926131248474, "learning_rate": 6.253955965788445e-06, "loss": 2.0644, "step": 18891 }, { "epoch": 0.63, "grad_norm": 0.7719762921333313, "learning_rate": 6.252970488839923e-06, "loss": 2.0672, "step": 18892 }, { "epoch": 0.63, "grad_norm": 0.7319810390472412, "learning_rate": 6.25198505422314e-06, "loss": 2.052, "step": 18893 }, { "epoch": 0.63, "grad_norm": 0.7023764252662659, "learning_rate": 6.250999661949234e-06, "loss": 1.9886, "step": 18894 }, { "epoch": 0.63, "grad_norm": 0.7248998284339905, "learning_rate": 6.2500143120293375e-06, "loss": 2.0277, "step": 18895 }, { "epoch": 0.63, "grad_norm": 0.7359451651573181, "learning_rate": 6.249029004474583e-06, "loss": 2.0112, "step": 18896 }, { "epoch": 0.63, "grad_norm": 0.7736878991127014, "learning_rate": 6.248043739296099e-06, "loss": 2.0923, "step": 18897 }, { "epoch": 0.63, "grad_norm": 0.7660742998123169, "learning_rate": 6.247058516505016e-06, "loss": 2.1117, "step": 18898 }, { "epoch": 0.63, "grad_norm": 0.7357897162437439, "learning_rate": 6.246073336112468e-06, "loss": 2.0711, "step": 18899 }, { "epoch": 0.63, "grad_norm": 0.7587547302246094, "learning_rate": 6.245088198129583e-06, "loss": 2.0658, "step": 18900 }, { "epoch": 0.63, "grad_norm": 0.7507548332214355, "learning_rate": 6.244103102567494e-06, "loss": 2.0349, "step": 18901 }, { "epoch": 0.63, "grad_norm": 0.7386896014213562, "learning_rate": 6.243118049437324e-06, "loss": 2.0747, "step": 18902 }, { "epoch": 0.63, "grad_norm": 0.7314772009849548, "learning_rate": 6.242133038750206e-06, "loss": 2.0844, "step": 18903 }, { "epoch": 0.63, "grad_norm": 0.7198120355606079, "learning_rate": 6.241148070517263e-06, "loss": 2.1009, "step": 18904 }, { "epoch": 0.63, "grad_norm": 0.746636152267456, "learning_rate": 6.240163144749634e-06, "loss": 2.0591, "step": 18905 }, { "epoch": 0.63, "grad_norm": 0.7432420253753662, "learning_rate": 6.23917826145843e-06, "loss": 2.1291, "step": 18906 }, { "epoch": 0.63, "grad_norm": 0.7301312685012817, "learning_rate": 6.2381934206547905e-06, "loss": 2.051, "step": 18907 }, { "epoch": 0.63, "grad_norm": 0.7646254301071167, "learning_rate": 6.237208622349835e-06, "loss": 2.0685, "step": 18908 }, { "epoch": 0.63, "grad_norm": 0.7602600455284119, "learning_rate": 6.2362238665546936e-06, "loss": 2.0391, "step": 18909 }, { "epoch": 0.63, "grad_norm": 0.7583686709403992, "learning_rate": 6.235239153280488e-06, "loss": 2.0748, "step": 18910 }, { "epoch": 0.63, "grad_norm": 0.7293695211410522, "learning_rate": 6.234254482538343e-06, "loss": 2.0225, "step": 18911 }, { "epoch": 0.63, "grad_norm": 0.7662408351898193, "learning_rate": 6.2332698543393835e-06, "loss": 2.0571, "step": 18912 }, { "epoch": 0.63, "grad_norm": 0.8053572177886963, "learning_rate": 6.232285268694733e-06, "loss": 2.0432, "step": 18913 }, { "epoch": 0.63, "grad_norm": 0.7788376808166504, "learning_rate": 6.23130072561552e-06, "loss": 2.0647, "step": 18914 }, { "epoch": 0.63, "grad_norm": 0.7436146140098572, "learning_rate": 6.2303162251128605e-06, "loss": 2.0823, "step": 18915 }, { "epoch": 0.63, "grad_norm": 0.7464315295219421, "learning_rate": 6.2293317671978774e-06, "loss": 2.1677, "step": 18916 }, { "epoch": 0.63, "grad_norm": 0.7420830726623535, "learning_rate": 6.228347351881696e-06, "loss": 2.1319, "step": 18917 }, { "epoch": 0.63, "grad_norm": 0.7582462430000305, "learning_rate": 6.227362979175436e-06, "loss": 2.0607, "step": 18918 }, { "epoch": 0.63, "grad_norm": 0.7229472398757935, "learning_rate": 6.226378649090217e-06, "loss": 2.0593, "step": 18919 }, { "epoch": 0.63, "grad_norm": 0.7313787937164307, "learning_rate": 6.2253943616371584e-06, "loss": 2.0725, "step": 18920 }, { "epoch": 0.63, "grad_norm": 0.7334299087524414, "learning_rate": 6.224410116827384e-06, "loss": 2.0663, "step": 18921 }, { "epoch": 0.63, "grad_norm": 0.7316075563430786, "learning_rate": 6.223425914672012e-06, "loss": 2.0255, "step": 18922 }, { "epoch": 0.63, "grad_norm": 0.7424073219299316, "learning_rate": 6.222441755182163e-06, "loss": 2.0498, "step": 18923 }, { "epoch": 0.63, "grad_norm": 0.7573325037956238, "learning_rate": 6.221457638368951e-06, "loss": 2.0554, "step": 18924 }, { "epoch": 0.63, "grad_norm": 0.7465238571166992, "learning_rate": 6.220473564243495e-06, "loss": 1.9506, "step": 18925 }, { "epoch": 0.63, "grad_norm": 0.7241690158843994, "learning_rate": 6.2194895328169125e-06, "loss": 1.9883, "step": 18926 }, { "epoch": 0.63, "grad_norm": 0.778339684009552, "learning_rate": 6.218505544100328e-06, "loss": 2.072, "step": 18927 }, { "epoch": 0.63, "grad_norm": 0.7653154730796814, "learning_rate": 6.217521598104846e-06, "loss": 2.0309, "step": 18928 }, { "epoch": 0.63, "grad_norm": 0.7538443803787231, "learning_rate": 6.216537694841589e-06, "loss": 2.0668, "step": 18929 }, { "epoch": 0.63, "grad_norm": 0.7608966827392578, "learning_rate": 6.215553834321671e-06, "loss": 2.0407, "step": 18930 }, { "epoch": 0.63, "grad_norm": 0.7398580312728882, "learning_rate": 6.214570016556207e-06, "loss": 2.0948, "step": 18931 }, { "epoch": 0.63, "grad_norm": 0.7513548135757446, "learning_rate": 6.21358624155632e-06, "loss": 2.0335, "step": 18932 }, { "epoch": 0.63, "grad_norm": 0.7951690554618835, "learning_rate": 6.21260250933311e-06, "loss": 2.018, "step": 18933 }, { "epoch": 0.63, "grad_norm": 0.8567488789558411, "learning_rate": 6.211618819897697e-06, "loss": 2.0214, "step": 18934 }, { "epoch": 0.63, "grad_norm": 0.7366189360618591, "learning_rate": 6.210635173261196e-06, "loss": 2.0782, "step": 18935 }, { "epoch": 0.63, "grad_norm": 0.7258440256118774, "learning_rate": 6.20965156943472e-06, "loss": 2.08, "step": 18936 }, { "epoch": 0.63, "grad_norm": 0.7189493179321289, "learning_rate": 6.2086680084293775e-06, "loss": 2.0772, "step": 18937 }, { "epoch": 0.63, "grad_norm": 0.8011818528175354, "learning_rate": 6.20768449025628e-06, "loss": 2.0059, "step": 18938 }, { "epoch": 0.63, "grad_norm": 0.7659844160079956, "learning_rate": 6.20670101492654e-06, "loss": 2.0732, "step": 18939 }, { "epoch": 0.63, "grad_norm": 0.7397416830062866, "learning_rate": 6.2057175824512765e-06, "loss": 2.1713, "step": 18940 }, { "epoch": 0.63, "grad_norm": 0.75091153383255, "learning_rate": 6.204734192841586e-06, "loss": 2.0287, "step": 18941 }, { "epoch": 0.63, "grad_norm": 0.770153284072876, "learning_rate": 6.2037508461085824e-06, "loss": 2.0134, "step": 18942 }, { "epoch": 0.63, "grad_norm": 0.743701696395874, "learning_rate": 6.202767542263379e-06, "loss": 2.0143, "step": 18943 }, { "epoch": 0.63, "grad_norm": 0.771472156047821, "learning_rate": 6.201784281317082e-06, "loss": 2.0456, "step": 18944 }, { "epoch": 0.63, "grad_norm": 0.7537869811058044, "learning_rate": 6.200801063280803e-06, "loss": 2.1383, "step": 18945 }, { "epoch": 0.63, "grad_norm": 0.732533872127533, "learning_rate": 6.199817888165647e-06, "loss": 2.0567, "step": 18946 }, { "epoch": 0.63, "grad_norm": 0.7311195135116577, "learning_rate": 6.1988347559827164e-06, "loss": 2.0848, "step": 18947 }, { "epoch": 0.63, "grad_norm": 0.7823708057403564, "learning_rate": 6.197851666743127e-06, "loss": 2.0828, "step": 18948 }, { "epoch": 0.63, "grad_norm": 0.7525556087493896, "learning_rate": 6.196868620457983e-06, "loss": 2.0616, "step": 18949 }, { "epoch": 0.63, "grad_norm": 0.7519344687461853, "learning_rate": 6.195885617138384e-06, "loss": 2.0798, "step": 18950 }, { "epoch": 0.63, "grad_norm": 0.7866621613502502, "learning_rate": 6.194902656795442e-06, "loss": 2.0851, "step": 18951 }, { "epoch": 0.63, "grad_norm": 0.7650327086448669, "learning_rate": 6.19391973944026e-06, "loss": 2.0297, "step": 18952 }, { "epoch": 0.63, "grad_norm": 0.728334367275238, "learning_rate": 6.192936865083938e-06, "loss": 1.9922, "step": 18953 }, { "epoch": 0.63, "grad_norm": 0.7529034614562988, "learning_rate": 6.191954033737593e-06, "loss": 2.0684, "step": 18954 }, { "epoch": 0.63, "grad_norm": 0.7680163383483887, "learning_rate": 6.190971245412311e-06, "loss": 2.0423, "step": 18955 }, { "epoch": 0.63, "grad_norm": 0.7563573718070984, "learning_rate": 6.189988500119208e-06, "loss": 1.9834, "step": 18956 }, { "epoch": 0.63, "grad_norm": 0.7373895049095154, "learning_rate": 6.18900579786938e-06, "loss": 2.0874, "step": 18957 }, { "epoch": 0.63, "grad_norm": 0.742694616317749, "learning_rate": 6.188023138673936e-06, "loss": 2.1133, "step": 18958 }, { "epoch": 0.63, "grad_norm": 0.7468710541725159, "learning_rate": 6.1870405225439676e-06, "loss": 1.9955, "step": 18959 }, { "epoch": 0.63, "grad_norm": 0.7460325360298157, "learning_rate": 6.186057949490582e-06, "loss": 2.0938, "step": 18960 }, { "epoch": 0.63, "grad_norm": 0.7731943726539612, "learning_rate": 6.185075419524875e-06, "loss": 2.0157, "step": 18961 }, { "epoch": 0.63, "grad_norm": 0.7375780940055847, "learning_rate": 6.184092932657955e-06, "loss": 2.1012, "step": 18962 }, { "epoch": 0.63, "grad_norm": 0.757139265537262, "learning_rate": 6.183110488900916e-06, "loss": 2.0576, "step": 18963 }, { "epoch": 0.63, "grad_norm": 0.76951003074646, "learning_rate": 6.182128088264856e-06, "loss": 2.0523, "step": 18964 }, { "epoch": 0.63, "grad_norm": 0.758908748626709, "learning_rate": 6.181145730760877e-06, "loss": 2.1013, "step": 18965 }, { "epoch": 0.63, "grad_norm": 0.7553361654281616, "learning_rate": 6.180163416400075e-06, "loss": 1.9999, "step": 18966 }, { "epoch": 0.63, "grad_norm": 0.7436027526855469, "learning_rate": 6.179181145193549e-06, "loss": 2.045, "step": 18967 }, { "epoch": 0.63, "grad_norm": 0.7504845261573792, "learning_rate": 6.178198917152394e-06, "loss": 2.0902, "step": 18968 }, { "epoch": 0.63, "grad_norm": 0.7410982847213745, "learning_rate": 6.177216732287705e-06, "loss": 2.143, "step": 18969 }, { "epoch": 0.63, "grad_norm": 0.7179784774780273, "learning_rate": 6.176234590610585e-06, "loss": 2.0389, "step": 18970 }, { "epoch": 0.63, "grad_norm": 0.7723079919815063, "learning_rate": 6.175252492132127e-06, "loss": 2.1084, "step": 18971 }, { "epoch": 0.63, "grad_norm": 0.7151641845703125, "learning_rate": 6.174270436863421e-06, "loss": 2.0772, "step": 18972 }, { "epoch": 0.63, "grad_norm": 0.7439836859703064, "learning_rate": 6.173288424815567e-06, "loss": 2.0842, "step": 18973 }, { "epoch": 0.63, "grad_norm": 0.7648043632507324, "learning_rate": 6.172306455999657e-06, "loss": 2.0742, "step": 18974 }, { "epoch": 0.63, "grad_norm": 0.792762279510498, "learning_rate": 6.171324530426785e-06, "loss": 2.0964, "step": 18975 }, { "epoch": 0.63, "grad_norm": 0.762012243270874, "learning_rate": 6.170342648108049e-06, "loss": 2.1139, "step": 18976 }, { "epoch": 0.63, "grad_norm": 0.749306857585907, "learning_rate": 6.169360809054532e-06, "loss": 2.0385, "step": 18977 }, { "epoch": 0.63, "grad_norm": 0.7499094009399414, "learning_rate": 6.168379013277332e-06, "loss": 2.0402, "step": 18978 }, { "epoch": 0.63, "grad_norm": 0.7303341627120972, "learning_rate": 6.167397260787542e-06, "loss": 1.9865, "step": 18979 }, { "epoch": 0.63, "grad_norm": 0.7549657821655273, "learning_rate": 6.166415551596255e-06, "loss": 2.0686, "step": 18980 }, { "epoch": 0.63, "grad_norm": 0.7460115551948547, "learning_rate": 6.165433885714554e-06, "loss": 2.0521, "step": 18981 }, { "epoch": 0.63, "grad_norm": 0.7544904351234436, "learning_rate": 6.164452263153534e-06, "loss": 2.0351, "step": 18982 }, { "epoch": 0.63, "grad_norm": 0.7404347062110901, "learning_rate": 6.163470683924283e-06, "loss": 2.0467, "step": 18983 }, { "epoch": 0.63, "grad_norm": 0.7850483655929565, "learning_rate": 6.162489148037894e-06, "loss": 2.056, "step": 18984 }, { "epoch": 0.63, "grad_norm": 0.7432218790054321, "learning_rate": 6.161507655505456e-06, "loss": 2.0172, "step": 18985 }, { "epoch": 0.63, "grad_norm": 0.7372378706932068, "learning_rate": 6.160526206338051e-06, "loss": 2.0648, "step": 18986 }, { "epoch": 0.63, "grad_norm": 0.7359632849693298, "learning_rate": 6.159544800546772e-06, "loss": 2.0578, "step": 18987 }, { "epoch": 0.63, "grad_norm": 1.9628517627716064, "learning_rate": 6.158563438142706e-06, "loss": 2.1338, "step": 18988 }, { "epoch": 0.63, "grad_norm": 0.7353386282920837, "learning_rate": 6.15758211913694e-06, "loss": 2.1106, "step": 18989 }, { "epoch": 0.63, "grad_norm": 0.7633206844329834, "learning_rate": 6.156600843540558e-06, "loss": 2.0105, "step": 18990 }, { "epoch": 0.63, "grad_norm": 0.7370821237564087, "learning_rate": 6.155619611364645e-06, "loss": 2.0696, "step": 18991 }, { "epoch": 0.63, "grad_norm": 0.7421459555625916, "learning_rate": 6.154638422620289e-06, "loss": 2.0277, "step": 18992 }, { "epoch": 0.63, "grad_norm": 0.7444862127304077, "learning_rate": 6.153657277318577e-06, "loss": 2.1075, "step": 18993 }, { "epoch": 0.63, "grad_norm": 0.7598435878753662, "learning_rate": 6.152676175470591e-06, "loss": 2.0886, "step": 18994 }, { "epoch": 0.63, "grad_norm": 0.7477863430976868, "learning_rate": 6.151695117087413e-06, "loss": 2.0416, "step": 18995 }, { "epoch": 0.63, "grad_norm": 0.7621042728424072, "learning_rate": 6.150714102180128e-06, "loss": 2.0405, "step": 18996 }, { "epoch": 0.63, "grad_norm": 0.7530084848403931, "learning_rate": 6.1497331307598185e-06, "loss": 2.0606, "step": 18997 }, { "epoch": 0.63, "grad_norm": 0.7559533715248108, "learning_rate": 6.148752202837574e-06, "loss": 2.0701, "step": 18998 }, { "epoch": 0.63, "grad_norm": 0.7394441962242126, "learning_rate": 6.147771318424463e-06, "loss": 2.0065, "step": 18999 }, { "epoch": 0.63, "grad_norm": 0.7441007494926453, "learning_rate": 6.146790477531577e-06, "loss": 2.163, "step": 19000 }, { "epoch": 0.63, "grad_norm": 0.733950674533844, "learning_rate": 6.145809680169992e-06, "loss": 1.9913, "step": 19001 }, { "epoch": 0.63, "grad_norm": 0.7372605204582214, "learning_rate": 6.144828926350795e-06, "loss": 2.0893, "step": 19002 }, { "epoch": 0.63, "grad_norm": 0.7606984376907349, "learning_rate": 6.1438482160850564e-06, "loss": 2.001, "step": 19003 }, { "epoch": 0.63, "grad_norm": 0.7368562817573547, "learning_rate": 6.142867549383862e-06, "loss": 2.0692, "step": 19004 }, { "epoch": 0.63, "grad_norm": 0.7467755079269409, "learning_rate": 6.141886926258288e-06, "loss": 2.0911, "step": 19005 }, { "epoch": 0.63, "grad_norm": 0.7457506060600281, "learning_rate": 6.140906346719417e-06, "loss": 1.9745, "step": 19006 }, { "epoch": 0.63, "grad_norm": 0.7633259892463684, "learning_rate": 6.1399258107783255e-06, "loss": 2.0657, "step": 19007 }, { "epoch": 0.63, "grad_norm": 0.7399932146072388, "learning_rate": 6.138945318446088e-06, "loss": 2.0671, "step": 19008 }, { "epoch": 0.63, "grad_norm": 0.7649640440940857, "learning_rate": 6.137964869733784e-06, "loss": 2.0461, "step": 19009 }, { "epoch": 0.63, "grad_norm": 0.7500134110450745, "learning_rate": 6.136984464652489e-06, "loss": 2.0212, "step": 19010 }, { "epoch": 0.63, "grad_norm": 0.7268110513687134, "learning_rate": 6.136004103213282e-06, "loss": 2.0502, "step": 19011 }, { "epoch": 0.63, "grad_norm": 0.7325233221054077, "learning_rate": 6.135023785427234e-06, "loss": 2.0784, "step": 19012 }, { "epoch": 0.63, "grad_norm": 0.7395865321159363, "learning_rate": 6.13404351130542e-06, "loss": 2.0843, "step": 19013 }, { "epoch": 0.63, "grad_norm": 0.7059459686279297, "learning_rate": 6.133063280858919e-06, "loss": 1.9953, "step": 19014 }, { "epoch": 0.63, "grad_norm": 0.7485175728797913, "learning_rate": 6.132083094098802e-06, "loss": 2.0188, "step": 19015 }, { "epoch": 0.63, "grad_norm": 0.7527862787246704, "learning_rate": 6.131102951036145e-06, "loss": 2.0947, "step": 19016 }, { "epoch": 0.63, "grad_norm": 0.7434383034706116, "learning_rate": 6.130122851682019e-06, "loss": 1.9987, "step": 19017 }, { "epoch": 0.63, "grad_norm": 0.7599979043006897, "learning_rate": 6.129142796047497e-06, "loss": 2.0251, "step": 19018 }, { "epoch": 0.63, "grad_norm": 0.7668964266777039, "learning_rate": 6.128162784143649e-06, "loss": 2.0119, "step": 19019 }, { "epoch": 0.63, "grad_norm": 0.7716457843780518, "learning_rate": 6.127182815981554e-06, "loss": 2.0977, "step": 19020 }, { "epoch": 0.63, "grad_norm": 0.7432906627655029, "learning_rate": 6.126202891572273e-06, "loss": 2.0335, "step": 19021 }, { "epoch": 0.63, "grad_norm": 0.7379641532897949, "learning_rate": 6.1252230109268815e-06, "loss": 2.0739, "step": 19022 }, { "epoch": 0.63, "grad_norm": 0.7216739058494568, "learning_rate": 6.124243174056451e-06, "loss": 2.0757, "step": 19023 }, { "epoch": 0.63, "grad_norm": 0.7414604425430298, "learning_rate": 6.123263380972047e-06, "loss": 2.037, "step": 19024 }, { "epoch": 0.63, "grad_norm": 0.7387256622314453, "learning_rate": 6.122283631684749e-06, "loss": 2.0829, "step": 19025 }, { "epoch": 0.63, "grad_norm": 0.7434527277946472, "learning_rate": 6.121303926205613e-06, "loss": 2.0555, "step": 19026 }, { "epoch": 0.63, "grad_norm": 0.7379152178764343, "learning_rate": 6.12032426454571e-06, "loss": 2.0671, "step": 19027 }, { "epoch": 0.63, "grad_norm": 0.7796815037727356, "learning_rate": 6.11934464671611e-06, "loss": 2.0313, "step": 19028 }, { "epoch": 0.63, "grad_norm": 0.7453703880310059, "learning_rate": 6.118365072727884e-06, "loss": 2.0718, "step": 19029 }, { "epoch": 0.63, "grad_norm": 0.7363460659980774, "learning_rate": 6.1173855425920925e-06, "loss": 2.0882, "step": 19030 }, { "epoch": 0.63, "grad_norm": 0.7074148654937744, "learning_rate": 6.116406056319804e-06, "loss": 1.9717, "step": 19031 }, { "epoch": 0.63, "grad_norm": 0.7342532277107239, "learning_rate": 6.11542661392208e-06, "loss": 2.1006, "step": 19032 }, { "epoch": 0.63, "grad_norm": 0.7515509128570557, "learning_rate": 6.114447215409998e-06, "loss": 2.0826, "step": 19033 }, { "epoch": 0.63, "grad_norm": 0.7187480330467224, "learning_rate": 6.113467860794608e-06, "loss": 2.0339, "step": 19034 }, { "epoch": 0.63, "grad_norm": 0.7254448533058167, "learning_rate": 6.112488550086979e-06, "loss": 2.1013, "step": 19035 }, { "epoch": 0.63, "grad_norm": 0.7331321239471436, "learning_rate": 6.111509283298178e-06, "loss": 2.0714, "step": 19036 }, { "epoch": 0.63, "grad_norm": 0.7561440467834473, "learning_rate": 6.110530060439267e-06, "loss": 2.1132, "step": 19037 }, { "epoch": 0.63, "grad_norm": 0.7482367157936096, "learning_rate": 6.109550881521309e-06, "loss": 2.046, "step": 19038 }, { "epoch": 0.63, "grad_norm": 0.7554906010627747, "learning_rate": 6.108571746555364e-06, "loss": 2.0119, "step": 19039 }, { "epoch": 0.63, "grad_norm": 0.718529224395752, "learning_rate": 6.107592655552492e-06, "loss": 2.0967, "step": 19040 }, { "epoch": 0.63, "grad_norm": 0.7599681615829468, "learning_rate": 6.106613608523759e-06, "loss": 2.0571, "step": 19041 }, { "epoch": 0.63, "grad_norm": 0.7526300549507141, "learning_rate": 6.105634605480228e-06, "loss": 2.0204, "step": 19042 }, { "epoch": 0.63, "grad_norm": 0.7577962875366211, "learning_rate": 6.1046556464329495e-06, "loss": 2.1025, "step": 19043 }, { "epoch": 0.63, "grad_norm": 0.7504680156707764, "learning_rate": 6.103676731392991e-06, "loss": 2.0949, "step": 19044 }, { "epoch": 0.63, "grad_norm": 0.7198368906974792, "learning_rate": 6.102697860371407e-06, "loss": 2.0802, "step": 19045 }, { "epoch": 0.63, "grad_norm": 0.7264554500579834, "learning_rate": 6.1017190333792584e-06, "loss": 2.0167, "step": 19046 }, { "epoch": 0.63, "grad_norm": 0.7941092848777771, "learning_rate": 6.10074025042761e-06, "loss": 2.0976, "step": 19047 }, { "epoch": 0.63, "grad_norm": 0.7501634359359741, "learning_rate": 6.099761511527505e-06, "loss": 2.1237, "step": 19048 }, { "epoch": 0.63, "grad_norm": 0.7681805491447449, "learning_rate": 6.098782816690012e-06, "loss": 2.0611, "step": 19049 }, { "epoch": 0.63, "grad_norm": 0.767367422580719, "learning_rate": 6.097804165926184e-06, "loss": 1.9902, "step": 19050 }, { "epoch": 0.63, "grad_norm": 0.7810367941856384, "learning_rate": 6.09682555924708e-06, "loss": 2.1044, "step": 19051 }, { "epoch": 0.63, "grad_norm": 0.7504892945289612, "learning_rate": 6.095846996663753e-06, "loss": 2.0476, "step": 19052 }, { "epoch": 0.63, "grad_norm": 0.7267982959747314, "learning_rate": 6.094868478187256e-06, "loss": 2.0273, "step": 19053 }, { "epoch": 0.63, "grad_norm": 0.7418473362922668, "learning_rate": 6.093890003828644e-06, "loss": 2.0439, "step": 19054 }, { "epoch": 0.63, "grad_norm": 0.7511504292488098, "learning_rate": 6.092911573598981e-06, "loss": 2.0498, "step": 19055 }, { "epoch": 0.63, "grad_norm": 0.7115115523338318, "learning_rate": 6.091933187509307e-06, "loss": 1.9938, "step": 19056 }, { "epoch": 0.63, "grad_norm": 0.7520797848701477, "learning_rate": 6.0909548455706836e-06, "loss": 2.1027, "step": 19057 }, { "epoch": 0.63, "grad_norm": 0.7222529649734497, "learning_rate": 6.089976547794161e-06, "loss": 2.0253, "step": 19058 }, { "epoch": 0.63, "grad_norm": 0.7306830286979675, "learning_rate": 6.088998294190792e-06, "loss": 1.9993, "step": 19059 }, { "epoch": 0.63, "grad_norm": 0.7403666377067566, "learning_rate": 6.0880200847716305e-06, "loss": 2.0295, "step": 19060 }, { "epoch": 0.63, "grad_norm": 0.7364788055419922, "learning_rate": 6.0870419195477225e-06, "loss": 2.1113, "step": 19061 }, { "epoch": 0.63, "grad_norm": 0.7246177792549133, "learning_rate": 6.086063798530122e-06, "loss": 2.0668, "step": 19062 }, { "epoch": 0.63, "grad_norm": 0.7360320091247559, "learning_rate": 6.085085721729879e-06, "loss": 2.0429, "step": 19063 }, { "epoch": 0.63, "grad_norm": 0.730337381362915, "learning_rate": 6.0841076891580455e-06, "loss": 2.0821, "step": 19064 }, { "epoch": 0.63, "grad_norm": 0.7329220175743103, "learning_rate": 6.083129700825665e-06, "loss": 2.0015, "step": 19065 }, { "epoch": 0.63, "grad_norm": 0.7216154932975769, "learning_rate": 6.082151756743791e-06, "loss": 2.0675, "step": 19066 }, { "epoch": 0.63, "grad_norm": 0.8051868081092834, "learning_rate": 6.08117385692347e-06, "loss": 2.0045, "step": 19067 }, { "epoch": 0.63, "grad_norm": 0.7464811205863953, "learning_rate": 6.080196001375749e-06, "loss": 2.0358, "step": 19068 }, { "epoch": 0.63, "grad_norm": 0.7313085198402405, "learning_rate": 6.079218190111682e-06, "loss": 1.9768, "step": 19069 }, { "epoch": 0.63, "grad_norm": 0.7746431827545166, "learning_rate": 6.078240423142304e-06, "loss": 2.0817, "step": 19070 }, { "epoch": 0.63, "grad_norm": 0.753528892993927, "learning_rate": 6.07726270047867e-06, "loss": 2.1245, "step": 19071 }, { "epoch": 0.63, "grad_norm": 0.7479079961776733, "learning_rate": 6.076285022131822e-06, "loss": 2.0999, "step": 19072 }, { "epoch": 0.63, "grad_norm": 0.7363954186439514, "learning_rate": 6.075307388112808e-06, "loss": 1.991, "step": 19073 }, { "epoch": 0.63, "grad_norm": 0.7307406067848206, "learning_rate": 6.07432979843267e-06, "loss": 2.081, "step": 19074 }, { "epoch": 0.63, "grad_norm": 0.727405846118927, "learning_rate": 6.0733522531024525e-06, "loss": 2.0917, "step": 19075 }, { "epoch": 0.63, "grad_norm": 0.7711823582649231, "learning_rate": 6.072374752133199e-06, "loss": 2.1087, "step": 19076 }, { "epoch": 0.63, "grad_norm": 0.7533994317054749, "learning_rate": 6.071397295535954e-06, "loss": 2.0788, "step": 19077 }, { "epoch": 0.63, "grad_norm": 0.7498617768287659, "learning_rate": 6.070419883321763e-06, "loss": 2.0468, "step": 19078 }, { "epoch": 0.63, "grad_norm": 0.7496045231819153, "learning_rate": 6.069442515501665e-06, "loss": 2.051, "step": 19079 }, { "epoch": 0.63, "grad_norm": 0.7359864711761475, "learning_rate": 6.0684651920866984e-06, "loss": 2.0802, "step": 19080 }, { "epoch": 0.63, "grad_norm": 0.735066294670105, "learning_rate": 6.067487913087911e-06, "loss": 2.064, "step": 19081 }, { "epoch": 0.63, "grad_norm": 0.7362089157104492, "learning_rate": 6.06651067851634e-06, "loss": 2.0755, "step": 19082 }, { "epoch": 0.63, "grad_norm": 0.7744904160499573, "learning_rate": 6.065533488383027e-06, "loss": 2.0828, "step": 19083 }, { "epoch": 0.63, "grad_norm": 0.7433419823646545, "learning_rate": 6.064556342699006e-06, "loss": 2.0412, "step": 19084 }, { "epoch": 0.63, "grad_norm": 0.7551929950714111, "learning_rate": 6.063579241475324e-06, "loss": 2.031, "step": 19085 }, { "epoch": 0.63, "grad_norm": 0.7276280522346497, "learning_rate": 6.0626021847230185e-06, "loss": 2.0256, "step": 19086 }, { "epoch": 0.64, "grad_norm": 0.7605628371238708, "learning_rate": 6.061625172453125e-06, "loss": 2.0164, "step": 19087 }, { "epoch": 0.64, "grad_norm": 0.7388648390769958, "learning_rate": 6.060648204676683e-06, "loss": 2.0555, "step": 19088 }, { "epoch": 0.64, "grad_norm": 0.7514597177505493, "learning_rate": 6.059671281404727e-06, "loss": 2.0265, "step": 19089 }, { "epoch": 0.64, "grad_norm": 0.7534149885177612, "learning_rate": 6.058694402648293e-06, "loss": 1.9767, "step": 19090 }, { "epoch": 0.64, "grad_norm": 0.7699351906776428, "learning_rate": 6.0577175684184265e-06, "loss": 2.0116, "step": 19091 }, { "epoch": 0.64, "grad_norm": 0.7233486771583557, "learning_rate": 6.0567407787261515e-06, "loss": 2.0347, "step": 19092 }, { "epoch": 0.64, "grad_norm": 0.7396078705787659, "learning_rate": 6.05576403358251e-06, "loss": 2.1008, "step": 19093 }, { "epoch": 0.64, "grad_norm": 0.7538793683052063, "learning_rate": 6.054787332998534e-06, "loss": 2.0109, "step": 19094 }, { "epoch": 0.64, "grad_norm": 0.7183260321617126, "learning_rate": 6.053810676985261e-06, "loss": 2.076, "step": 19095 }, { "epoch": 0.64, "grad_norm": 0.7488780617713928, "learning_rate": 6.052834065553721e-06, "loss": 2.0722, "step": 19096 }, { "epoch": 0.64, "grad_norm": 0.7384499907493591, "learning_rate": 6.051857498714948e-06, "loss": 2.0224, "step": 19097 }, { "epoch": 0.64, "grad_norm": 0.7839840650558472, "learning_rate": 6.0508809764799725e-06, "loss": 2.083, "step": 19098 }, { "epoch": 0.64, "grad_norm": 0.708393394947052, "learning_rate": 6.049904498859831e-06, "loss": 2.0376, "step": 19099 }, { "epoch": 0.64, "grad_norm": 0.7500677108764648, "learning_rate": 6.048928065865557e-06, "loss": 2.0504, "step": 19100 }, { "epoch": 0.64, "grad_norm": 0.7380995750427246, "learning_rate": 6.047951677508175e-06, "loss": 2.0271, "step": 19101 }, { "epoch": 0.64, "grad_norm": 0.7628551125526428, "learning_rate": 6.046975333798719e-06, "loss": 2.0762, "step": 19102 }, { "epoch": 0.64, "grad_norm": 0.7465739846229553, "learning_rate": 6.04599903474822e-06, "loss": 2.0173, "step": 19103 }, { "epoch": 0.64, "grad_norm": 0.7347677946090698, "learning_rate": 6.045022780367709e-06, "loss": 1.9893, "step": 19104 }, { "epoch": 0.64, "grad_norm": 0.7281553149223328, "learning_rate": 6.044046570668209e-06, "loss": 2.0831, "step": 19105 }, { "epoch": 0.64, "grad_norm": 0.71649169921875, "learning_rate": 6.043070405660752e-06, "loss": 2.0719, "step": 19106 }, { "epoch": 0.64, "grad_norm": 0.7366738319396973, "learning_rate": 6.042094285356366e-06, "loss": 2.0994, "step": 19107 }, { "epoch": 0.64, "grad_norm": 0.745780348777771, "learning_rate": 6.041118209766081e-06, "loss": 2.0669, "step": 19108 }, { "epoch": 0.64, "grad_norm": 0.7423575520515442, "learning_rate": 6.040142178900925e-06, "loss": 2.1502, "step": 19109 }, { "epoch": 0.64, "grad_norm": 0.7108673453330994, "learning_rate": 6.039166192771919e-06, "loss": 2.0192, "step": 19110 }, { "epoch": 0.64, "grad_norm": 0.7495326995849609, "learning_rate": 6.038190251390093e-06, "loss": 2.1305, "step": 19111 }, { "epoch": 0.64, "grad_norm": 0.7389315366744995, "learning_rate": 6.037214354766467e-06, "loss": 1.9906, "step": 19112 }, { "epoch": 0.64, "grad_norm": 0.7322598695755005, "learning_rate": 6.036238502912079e-06, "loss": 2.0824, "step": 19113 }, { "epoch": 0.64, "grad_norm": 0.7282187342643738, "learning_rate": 6.03526269583794e-06, "loss": 2.0722, "step": 19114 }, { "epoch": 0.64, "grad_norm": 0.7238509058952332, "learning_rate": 6.03428693355508e-06, "loss": 2.0217, "step": 19115 }, { "epoch": 0.64, "grad_norm": 0.7318863868713379, "learning_rate": 6.033311216074522e-06, "loss": 2.1182, "step": 19116 }, { "epoch": 0.64, "grad_norm": 0.7685198783874512, "learning_rate": 6.032335543407293e-06, "loss": 2.1149, "step": 19117 }, { "epoch": 0.64, "grad_norm": 0.7499120235443115, "learning_rate": 6.0313599155644085e-06, "loss": 2.0433, "step": 19118 }, { "epoch": 0.64, "grad_norm": 0.7354946136474609, "learning_rate": 6.030384332556893e-06, "loss": 2.019, "step": 19119 }, { "epoch": 0.64, "grad_norm": 0.7552575469017029, "learning_rate": 6.029408794395769e-06, "loss": 2.0688, "step": 19120 }, { "epoch": 0.64, "grad_norm": 0.716498851776123, "learning_rate": 6.028433301092058e-06, "loss": 2.0253, "step": 19121 }, { "epoch": 0.64, "grad_norm": 0.7641031742095947, "learning_rate": 6.027457852656782e-06, "loss": 2.0042, "step": 19122 }, { "epoch": 0.64, "grad_norm": 0.7754260897636414, "learning_rate": 6.026482449100958e-06, "loss": 2.0328, "step": 19123 }, { "epoch": 0.64, "grad_norm": 0.7616274952888489, "learning_rate": 6.025507090435605e-06, "loss": 2.0367, "step": 19124 }, { "epoch": 0.64, "grad_norm": 0.7371190786361694, "learning_rate": 6.024531776671743e-06, "loss": 2.0127, "step": 19125 }, { "epoch": 0.64, "grad_norm": 0.732056736946106, "learning_rate": 6.023556507820396e-06, "loss": 2.0277, "step": 19126 }, { "epoch": 0.64, "grad_norm": 0.7304233908653259, "learning_rate": 6.022581283892574e-06, "loss": 2.0299, "step": 19127 }, { "epoch": 0.64, "grad_norm": 0.7453927397727966, "learning_rate": 6.021606104899296e-06, "loss": 1.9987, "step": 19128 }, { "epoch": 0.64, "grad_norm": 0.7373469471931458, "learning_rate": 6.020630970851582e-06, "loss": 2.0557, "step": 19129 }, { "epoch": 0.64, "grad_norm": 0.7404829263687134, "learning_rate": 6.019655881760448e-06, "loss": 2.0833, "step": 19130 }, { "epoch": 0.64, "grad_norm": 0.7484673261642456, "learning_rate": 6.01868083763691e-06, "loss": 2.0619, "step": 19131 }, { "epoch": 0.64, "grad_norm": 0.726915717124939, "learning_rate": 6.017705838491981e-06, "loss": 2.0662, "step": 19132 }, { "epoch": 0.64, "grad_norm": 0.7894306778907776, "learning_rate": 6.016730884336675e-06, "loss": 2.084, "step": 19133 }, { "epoch": 0.64, "grad_norm": 0.7407098412513733, "learning_rate": 6.0157559751820114e-06, "loss": 1.9999, "step": 19134 }, { "epoch": 0.64, "grad_norm": 0.724006712436676, "learning_rate": 6.014781111039008e-06, "loss": 2.0273, "step": 19135 }, { "epoch": 0.64, "grad_norm": 0.7340962886810303, "learning_rate": 6.013806291918662e-06, "loss": 2.0534, "step": 19136 }, { "epoch": 0.64, "grad_norm": 0.7371348738670349, "learning_rate": 6.012831517832002e-06, "loss": 2.0994, "step": 19137 }, { "epoch": 0.64, "grad_norm": 0.708812952041626, "learning_rate": 6.011856788790034e-06, "loss": 2.0436, "step": 19138 }, { "epoch": 0.64, "grad_norm": 0.7353003025054932, "learning_rate": 6.010882104803767e-06, "loss": 2.0646, "step": 19139 }, { "epoch": 0.64, "grad_norm": 0.7479092478752136, "learning_rate": 6.009907465884223e-06, "loss": 2.0727, "step": 19140 }, { "epoch": 0.64, "grad_norm": 0.7548202872276306, "learning_rate": 6.0089328720424e-06, "loss": 2.0404, "step": 19141 }, { "epoch": 0.64, "grad_norm": 0.7340999245643616, "learning_rate": 6.007958323289316e-06, "loss": 2.1217, "step": 19142 }, { "epoch": 0.64, "grad_norm": 0.7164263129234314, "learning_rate": 6.006983819635981e-06, "loss": 1.9961, "step": 19143 }, { "epoch": 0.64, "grad_norm": 0.7564786672592163, "learning_rate": 6.006009361093403e-06, "loss": 2.132, "step": 19144 }, { "epoch": 0.64, "grad_norm": 0.736319363117218, "learning_rate": 6.005034947672589e-06, "loss": 1.9355, "step": 19145 }, { "epoch": 0.64, "grad_norm": 0.732265830039978, "learning_rate": 6.00406057938455e-06, "loss": 2.0716, "step": 19146 }, { "epoch": 0.64, "grad_norm": 0.7521505355834961, "learning_rate": 6.003086256240289e-06, "loss": 2.1018, "step": 19147 }, { "epoch": 0.64, "grad_norm": 0.7507603168487549, "learning_rate": 6.002111978250824e-06, "loss": 2.0444, "step": 19148 }, { "epoch": 0.64, "grad_norm": 0.7531901597976685, "learning_rate": 6.001137745427148e-06, "loss": 2.0279, "step": 19149 }, { "epoch": 0.64, "grad_norm": 0.7980210185050964, "learning_rate": 6.000163557780276e-06, "loss": 2.0461, "step": 19150 }, { "epoch": 0.64, "grad_norm": 0.7654465436935425, "learning_rate": 5.999189415321214e-06, "loss": 2.0668, "step": 19151 }, { "epoch": 0.64, "grad_norm": 0.760086715221405, "learning_rate": 5.998215318060964e-06, "loss": 2.0511, "step": 19152 }, { "epoch": 0.64, "grad_norm": 0.734534502029419, "learning_rate": 5.997241266010534e-06, "loss": 2.1294, "step": 19153 }, { "epoch": 0.64, "grad_norm": 0.7488111853599548, "learning_rate": 5.996267259180923e-06, "loss": 2.039, "step": 19154 }, { "epoch": 0.64, "grad_norm": 0.7419861555099487, "learning_rate": 5.995293297583138e-06, "loss": 2.0779, "step": 19155 }, { "epoch": 0.64, "grad_norm": 0.7214480042457581, "learning_rate": 5.994319381228184e-06, "loss": 1.9984, "step": 19156 }, { "epoch": 0.64, "grad_norm": 0.7586864233016968, "learning_rate": 5.993345510127064e-06, "loss": 2.1168, "step": 19157 }, { "epoch": 0.64, "grad_norm": 0.7587993741035461, "learning_rate": 5.992371684290776e-06, "loss": 2.05, "step": 19158 }, { "epoch": 0.64, "grad_norm": 0.7237886190414429, "learning_rate": 5.991397903730325e-06, "loss": 1.9867, "step": 19159 }, { "epoch": 0.64, "grad_norm": 0.7438483238220215, "learning_rate": 5.99042416845671e-06, "loss": 2.0314, "step": 19160 }, { "epoch": 0.64, "grad_norm": 0.7289181351661682, "learning_rate": 5.989450478480932e-06, "loss": 2.0579, "step": 19161 }, { "epoch": 0.64, "grad_norm": 0.7463656663894653, "learning_rate": 5.988476833813997e-06, "loss": 2.0208, "step": 19162 }, { "epoch": 0.64, "grad_norm": 0.7410986423492432, "learning_rate": 5.987503234466893e-06, "loss": 2.0256, "step": 19163 }, { "epoch": 0.64, "grad_norm": 0.7611365914344788, "learning_rate": 5.98652968045063e-06, "loss": 2.0598, "step": 19164 }, { "epoch": 0.64, "grad_norm": 0.7491375207901001, "learning_rate": 5.985556171776203e-06, "loss": 2.037, "step": 19165 }, { "epoch": 0.64, "grad_norm": 0.7455026507377625, "learning_rate": 5.984582708454609e-06, "loss": 2.0055, "step": 19166 }, { "epoch": 0.64, "grad_norm": 0.7361711263656616, "learning_rate": 5.983609290496846e-06, "loss": 2.06, "step": 19167 }, { "epoch": 0.64, "grad_norm": 0.7529285550117493, "learning_rate": 5.98263591791391e-06, "loss": 2.0599, "step": 19168 }, { "epoch": 0.64, "grad_norm": 0.7700035572052002, "learning_rate": 5.981662590716799e-06, "loss": 2.0836, "step": 19169 }, { "epoch": 0.64, "grad_norm": 0.7223185896873474, "learning_rate": 5.98068930891651e-06, "loss": 2.0884, "step": 19170 }, { "epoch": 0.64, "grad_norm": 0.7457024455070496, "learning_rate": 5.97971607252404e-06, "loss": 2.036, "step": 19171 }, { "epoch": 0.64, "grad_norm": 0.7347481846809387, "learning_rate": 5.978742881550379e-06, "loss": 2.0752, "step": 19172 }, { "epoch": 0.64, "grad_norm": 0.7358304858207703, "learning_rate": 5.977769736006525e-06, "loss": 2.2, "step": 19173 }, { "epoch": 0.64, "grad_norm": 0.7456408739089966, "learning_rate": 5.97679663590347e-06, "loss": 2.032, "step": 19174 }, { "epoch": 0.64, "grad_norm": 0.7679852843284607, "learning_rate": 5.975823581252213e-06, "loss": 1.995, "step": 19175 }, { "epoch": 0.64, "grad_norm": 0.7573293447494507, "learning_rate": 5.974850572063739e-06, "loss": 2.0841, "step": 19176 }, { "epoch": 0.64, "grad_norm": 0.7442545294761658, "learning_rate": 5.973877608349043e-06, "loss": 2.0245, "step": 19177 }, { "epoch": 0.64, "grad_norm": 0.7365491390228271, "learning_rate": 5.972904690119119e-06, "loss": 2.0924, "step": 19178 }, { "epoch": 0.64, "grad_norm": 0.750670850276947, "learning_rate": 5.971931817384961e-06, "loss": 2.0752, "step": 19179 }, { "epoch": 0.64, "grad_norm": 0.7695629596710205, "learning_rate": 5.970958990157555e-06, "loss": 2.1139, "step": 19180 }, { "epoch": 0.64, "grad_norm": 0.7435917854309082, "learning_rate": 5.969986208447892e-06, "loss": 2.1091, "step": 19181 }, { "epoch": 0.64, "grad_norm": 0.7457911968231201, "learning_rate": 5.969013472266963e-06, "loss": 2.1064, "step": 19182 }, { "epoch": 0.64, "grad_norm": 0.7086613178253174, "learning_rate": 5.9680407816257565e-06, "loss": 2.0946, "step": 19183 }, { "epoch": 0.64, "grad_norm": 0.734380841255188, "learning_rate": 5.967068136535268e-06, "loss": 2.0304, "step": 19184 }, { "epoch": 0.64, "grad_norm": 0.7453035116195679, "learning_rate": 5.966095537006474e-06, "loss": 2.0942, "step": 19185 }, { "epoch": 0.64, "grad_norm": 0.6994192600250244, "learning_rate": 5.965122983050369e-06, "loss": 2.0625, "step": 19186 }, { "epoch": 0.64, "grad_norm": 0.7254350781440735, "learning_rate": 5.964150474677941e-06, "loss": 2.048, "step": 19187 }, { "epoch": 0.64, "grad_norm": 0.7239168286323547, "learning_rate": 5.963178011900179e-06, "loss": 2.0109, "step": 19188 }, { "epoch": 0.64, "grad_norm": 0.7233615517616272, "learning_rate": 5.962205594728062e-06, "loss": 1.9992, "step": 19189 }, { "epoch": 0.64, "grad_norm": 0.7357884645462036, "learning_rate": 5.961233223172581e-06, "loss": 2.0629, "step": 19190 }, { "epoch": 0.64, "grad_norm": 0.7586014270782471, "learning_rate": 5.960260897244718e-06, "loss": 2.0305, "step": 19191 }, { "epoch": 0.64, "grad_norm": 0.7470717430114746, "learning_rate": 5.959288616955461e-06, "loss": 2.0738, "step": 19192 }, { "epoch": 0.64, "grad_norm": 0.7382586598396301, "learning_rate": 5.9583163823157964e-06, "loss": 2.0621, "step": 19193 }, { "epoch": 0.64, "grad_norm": 0.7709409594535828, "learning_rate": 5.957344193336702e-06, "loss": 2.1101, "step": 19194 }, { "epoch": 0.64, "grad_norm": 0.7249276638031006, "learning_rate": 5.956372050029164e-06, "loss": 2.0866, "step": 19195 }, { "epoch": 0.64, "grad_norm": 0.7140607237815857, "learning_rate": 5.955399952404164e-06, "loss": 2.0681, "step": 19196 }, { "epoch": 0.64, "grad_norm": 0.7380946278572083, "learning_rate": 5.954427900472688e-06, "loss": 2.0458, "step": 19197 }, { "epoch": 0.64, "grad_norm": 0.7358243465423584, "learning_rate": 5.9534558942457125e-06, "loss": 2.0682, "step": 19198 }, { "epoch": 0.64, "grad_norm": 0.7433512806892395, "learning_rate": 5.952483933734217e-06, "loss": 2.1639, "step": 19199 }, { "epoch": 0.64, "grad_norm": 0.782249927520752, "learning_rate": 5.951512018949189e-06, "loss": 2.039, "step": 19200 }, { "epoch": 0.64, "grad_norm": 0.7400708794593811, "learning_rate": 5.950540149901609e-06, "loss": 2.124, "step": 19201 }, { "epoch": 0.64, "grad_norm": 0.7516607046127319, "learning_rate": 5.94956832660245e-06, "loss": 2.039, "step": 19202 }, { "epoch": 0.64, "grad_norm": 0.7418254017829895, "learning_rate": 5.948596549062693e-06, "loss": 2.1014, "step": 19203 }, { "epoch": 0.64, "grad_norm": 0.7593517899513245, "learning_rate": 5.94762481729332e-06, "loss": 2.087, "step": 19204 }, { "epoch": 0.64, "grad_norm": 0.7435586452484131, "learning_rate": 5.946653131305304e-06, "loss": 2.0988, "step": 19205 }, { "epoch": 0.64, "grad_norm": 0.7288510203361511, "learning_rate": 5.945681491109632e-06, "loss": 2.0964, "step": 19206 }, { "epoch": 0.64, "grad_norm": 0.7469578385353088, "learning_rate": 5.944709896717267e-06, "loss": 2.0151, "step": 19207 }, { "epoch": 0.64, "grad_norm": 0.7223272323608398, "learning_rate": 5.9437383481391965e-06, "loss": 2.0502, "step": 19208 }, { "epoch": 0.64, "grad_norm": 0.7579313516616821, "learning_rate": 5.942766845386392e-06, "loss": 2.0254, "step": 19209 }, { "epoch": 0.64, "grad_norm": 0.7483022212982178, "learning_rate": 5.9417953884698325e-06, "loss": 2.1108, "step": 19210 }, { "epoch": 0.64, "grad_norm": 0.7677231431007385, "learning_rate": 5.940823977400487e-06, "loss": 2.1057, "step": 19211 }, { "epoch": 0.64, "grad_norm": 0.8006629943847656, "learning_rate": 5.939852612189334e-06, "loss": 2.0114, "step": 19212 }, { "epoch": 0.64, "grad_norm": 0.7508304119110107, "learning_rate": 5.938881292847346e-06, "loss": 2.1146, "step": 19213 }, { "epoch": 0.64, "grad_norm": 0.7391867637634277, "learning_rate": 5.937910019385498e-06, "loss": 2.0628, "step": 19214 }, { "epoch": 0.64, "grad_norm": 0.7518301010131836, "learning_rate": 5.936938791814764e-06, "loss": 2.055, "step": 19215 }, { "epoch": 0.64, "grad_norm": 0.7381339073181152, "learning_rate": 5.935967610146113e-06, "loss": 2.0662, "step": 19216 }, { "epoch": 0.64, "grad_norm": 0.7265461087226868, "learning_rate": 5.934996474390517e-06, "loss": 2.0036, "step": 19217 }, { "epoch": 0.64, "grad_norm": 0.7341548800468445, "learning_rate": 5.93402538455895e-06, "loss": 2.033, "step": 19218 }, { "epoch": 0.64, "grad_norm": 0.7477364540100098, "learning_rate": 5.933054340662382e-06, "loss": 2.061, "step": 19219 }, { "epoch": 0.64, "grad_norm": 0.7255041599273682, "learning_rate": 5.932083342711781e-06, "loss": 2.0635, "step": 19220 }, { "epoch": 0.64, "grad_norm": 0.7433214783668518, "learning_rate": 5.931112390718116e-06, "loss": 2.0445, "step": 19221 }, { "epoch": 0.64, "grad_norm": 0.7255092263221741, "learning_rate": 5.9301414846923625e-06, "loss": 2.013, "step": 19222 }, { "epoch": 0.64, "grad_norm": 0.7595827579498291, "learning_rate": 5.929170624645482e-06, "loss": 2.0775, "step": 19223 }, { "epoch": 0.64, "grad_norm": 0.725871205329895, "learning_rate": 5.928199810588451e-06, "loss": 2.042, "step": 19224 }, { "epoch": 0.64, "grad_norm": 0.7592419385910034, "learning_rate": 5.927229042532229e-06, "loss": 2.0548, "step": 19225 }, { "epoch": 0.64, "grad_norm": 0.7516739964485168, "learning_rate": 5.926258320487784e-06, "loss": 2.0611, "step": 19226 }, { "epoch": 0.64, "grad_norm": 0.7327117919921875, "learning_rate": 5.925287644466088e-06, "loss": 2.0475, "step": 19227 }, { "epoch": 0.64, "grad_norm": 0.7661980986595154, "learning_rate": 5.924317014478108e-06, "loss": 2.0747, "step": 19228 }, { "epoch": 0.64, "grad_norm": 0.7286189794540405, "learning_rate": 5.9233464305348e-06, "loss": 2.0921, "step": 19229 }, { "epoch": 0.64, "grad_norm": 0.7389179468154907, "learning_rate": 5.922375892647136e-06, "loss": 2.0769, "step": 19230 }, { "epoch": 0.64, "grad_norm": 0.732392430305481, "learning_rate": 5.921405400826079e-06, "loss": 2.0414, "step": 19231 }, { "epoch": 0.64, "grad_norm": 0.7302137017250061, "learning_rate": 5.920434955082597e-06, "loss": 1.9575, "step": 19232 }, { "epoch": 0.64, "grad_norm": 0.7562469244003296, "learning_rate": 5.9194645554276476e-06, "loss": 2.0328, "step": 19233 }, { "epoch": 0.64, "grad_norm": 0.7462032437324524, "learning_rate": 5.918494201872194e-06, "loss": 2.0978, "step": 19234 }, { "epoch": 0.64, "grad_norm": 0.7223637104034424, "learning_rate": 5.917523894427203e-06, "loss": 2.0334, "step": 19235 }, { "epoch": 0.64, "grad_norm": 0.7506973147392273, "learning_rate": 5.9165536331036346e-06, "loss": 2.1301, "step": 19236 }, { "epoch": 0.64, "grad_norm": 0.7551196813583374, "learning_rate": 5.915583417912452e-06, "loss": 2.0424, "step": 19237 }, { "epoch": 0.64, "grad_norm": 0.720759928226471, "learning_rate": 5.914613248864611e-06, "loss": 2.0834, "step": 19238 }, { "epoch": 0.64, "grad_norm": 0.7510061860084534, "learning_rate": 5.913643125971077e-06, "loss": 2.0592, "step": 19239 }, { "epoch": 0.64, "grad_norm": 0.7208470106124878, "learning_rate": 5.9126730492428055e-06, "loss": 2.0674, "step": 19240 }, { "epoch": 0.64, "grad_norm": 0.7481751441955566, "learning_rate": 5.911703018690764e-06, "loss": 2.1479, "step": 19241 }, { "epoch": 0.64, "grad_norm": 0.7326985597610474, "learning_rate": 5.910733034325902e-06, "loss": 2.0618, "step": 19242 }, { "epoch": 0.64, "grad_norm": 0.7425806522369385, "learning_rate": 5.90976309615918e-06, "loss": 2.0645, "step": 19243 }, { "epoch": 0.64, "grad_norm": 0.7452540993690491, "learning_rate": 5.9087932042015595e-06, "loss": 2.0633, "step": 19244 }, { "epoch": 0.64, "grad_norm": 0.7579016089439392, "learning_rate": 5.907823358463997e-06, "loss": 2.1258, "step": 19245 }, { "epoch": 0.64, "grad_norm": 0.7459138631820679, "learning_rate": 5.906853558957448e-06, "loss": 2.0105, "step": 19246 }, { "epoch": 0.64, "grad_norm": 0.7671080827713013, "learning_rate": 5.905883805692867e-06, "loss": 2.0304, "step": 19247 }, { "epoch": 0.64, "grad_norm": 0.7103632688522339, "learning_rate": 5.904914098681209e-06, "loss": 2.062, "step": 19248 }, { "epoch": 0.64, "grad_norm": 0.7271375060081482, "learning_rate": 5.903944437933435e-06, "loss": 2.0686, "step": 19249 }, { "epoch": 0.64, "grad_norm": 0.7257586121559143, "learning_rate": 5.902974823460497e-06, "loss": 2.069, "step": 19250 }, { "epoch": 0.64, "grad_norm": 0.7446653842926025, "learning_rate": 5.9020052552733465e-06, "loss": 2.0674, "step": 19251 }, { "epoch": 0.64, "grad_norm": 0.7253462076187134, "learning_rate": 5.901035733382939e-06, "loss": 2.0401, "step": 19252 }, { "epoch": 0.64, "grad_norm": 0.7309347987174988, "learning_rate": 5.900066257800228e-06, "loss": 2.0605, "step": 19253 }, { "epoch": 0.64, "grad_norm": 0.7597154378890991, "learning_rate": 5.899096828536164e-06, "loss": 2.0873, "step": 19254 }, { "epoch": 0.64, "grad_norm": 0.7332039475440979, "learning_rate": 5.898127445601706e-06, "loss": 2.0919, "step": 19255 }, { "epoch": 0.64, "grad_norm": 0.7476253509521484, "learning_rate": 5.897158109007793e-06, "loss": 2.0664, "step": 19256 }, { "epoch": 0.64, "grad_norm": 0.7333676815032959, "learning_rate": 5.896188818765387e-06, "loss": 2.0098, "step": 19257 }, { "epoch": 0.64, "grad_norm": 0.7383410930633545, "learning_rate": 5.895219574885433e-06, "loss": 2.0208, "step": 19258 }, { "epoch": 0.64, "grad_norm": 0.7190619707107544, "learning_rate": 5.8942503773788875e-06, "loss": 2.0182, "step": 19259 }, { "epoch": 0.64, "grad_norm": 0.7608978748321533, "learning_rate": 5.893281226256691e-06, "loss": 2.0697, "step": 19260 }, { "epoch": 0.64, "grad_norm": 0.7587461471557617, "learning_rate": 5.892312121529795e-06, "loss": 1.9883, "step": 19261 }, { "epoch": 0.64, "grad_norm": 0.7532840967178345, "learning_rate": 5.891343063209149e-06, "loss": 2.0745, "step": 19262 }, { "epoch": 0.64, "grad_norm": 0.7378754615783691, "learning_rate": 5.890374051305707e-06, "loss": 2.1629, "step": 19263 }, { "epoch": 0.64, "grad_norm": 0.7699658274650574, "learning_rate": 5.889405085830405e-06, "loss": 2.0759, "step": 19264 }, { "epoch": 0.64, "grad_norm": 0.7339633703231812, "learning_rate": 5.888436166794198e-06, "loss": 2.0987, "step": 19265 }, { "epoch": 0.64, "grad_norm": 0.7366234660148621, "learning_rate": 5.887467294208027e-06, "loss": 2.0435, "step": 19266 }, { "epoch": 0.64, "grad_norm": 0.736527144908905, "learning_rate": 5.886498468082842e-06, "loss": 2.0938, "step": 19267 }, { "epoch": 0.64, "grad_norm": 0.7632753849029541, "learning_rate": 5.885529688429589e-06, "loss": 2.0661, "step": 19268 }, { "epoch": 0.64, "grad_norm": 0.7658427357673645, "learning_rate": 5.884560955259208e-06, "loss": 2.1159, "step": 19269 }, { "epoch": 0.64, "grad_norm": 0.7677491903305054, "learning_rate": 5.883592268582643e-06, "loss": 2.0382, "step": 19270 }, { "epoch": 0.64, "grad_norm": 0.74180668592453, "learning_rate": 5.8826236284108414e-06, "loss": 2.0791, "step": 19271 }, { "epoch": 0.64, "grad_norm": 0.7231850624084473, "learning_rate": 5.881655034754747e-06, "loss": 2.0748, "step": 19272 }, { "epoch": 0.64, "grad_norm": 0.7487387657165527, "learning_rate": 5.8806864876253e-06, "loss": 2.0966, "step": 19273 }, { "epoch": 0.64, "grad_norm": 0.7177024483680725, "learning_rate": 5.879717987033442e-06, "loss": 2.0282, "step": 19274 }, { "epoch": 0.64, "grad_norm": 0.7158411145210266, "learning_rate": 5.878749532990115e-06, "loss": 2.0027, "step": 19275 }, { "epoch": 0.64, "grad_norm": 0.7476642727851868, "learning_rate": 5.877781125506259e-06, "loss": 2.0662, "step": 19276 }, { "epoch": 0.64, "grad_norm": 0.7672646045684814, "learning_rate": 5.876812764592823e-06, "loss": 2.0712, "step": 19277 }, { "epoch": 0.64, "grad_norm": 0.722247302532196, "learning_rate": 5.875844450260733e-06, "loss": 1.9937, "step": 19278 }, { "epoch": 0.64, "grad_norm": 0.7458515167236328, "learning_rate": 5.874876182520937e-06, "loss": 2.092, "step": 19279 }, { "epoch": 0.64, "grad_norm": 0.7151179313659668, "learning_rate": 5.873907961384373e-06, "loss": 2.008, "step": 19280 }, { "epoch": 0.64, "grad_norm": 0.7594295144081116, "learning_rate": 5.87293978686198e-06, "loss": 2.0703, "step": 19281 }, { "epoch": 0.64, "grad_norm": 0.7413676381111145, "learning_rate": 5.871971658964693e-06, "loss": 2.0658, "step": 19282 }, { "epoch": 0.64, "grad_norm": 0.7421931028366089, "learning_rate": 5.8710035777034515e-06, "loss": 2.0958, "step": 19283 }, { "epoch": 0.64, "grad_norm": 0.7141311168670654, "learning_rate": 5.870035543089189e-06, "loss": 2.0586, "step": 19284 }, { "epoch": 0.64, "grad_norm": 0.737067699432373, "learning_rate": 5.869067555132846e-06, "loss": 2.0211, "step": 19285 }, { "epoch": 0.64, "grad_norm": 0.7343761920928955, "learning_rate": 5.868099613845359e-06, "loss": 2.0725, "step": 19286 }, { "epoch": 0.64, "grad_norm": 0.7383939623832703, "learning_rate": 5.867131719237661e-06, "loss": 2.0454, "step": 19287 }, { "epoch": 0.64, "grad_norm": 0.7365996241569519, "learning_rate": 5.866163871320685e-06, "loss": 2.0282, "step": 19288 }, { "epoch": 0.64, "grad_norm": 0.7391613125801086, "learning_rate": 5.865196070105368e-06, "loss": 2.0481, "step": 19289 }, { "epoch": 0.64, "grad_norm": 0.7368939518928528, "learning_rate": 5.864228315602643e-06, "loss": 2.1101, "step": 19290 }, { "epoch": 0.64, "grad_norm": 0.735118567943573, "learning_rate": 5.8632606078234424e-06, "loss": 2.062, "step": 19291 }, { "epoch": 0.64, "grad_norm": 0.7388679385185242, "learning_rate": 5.862292946778696e-06, "loss": 1.9737, "step": 19292 }, { "epoch": 0.64, "grad_norm": 0.7371713519096375, "learning_rate": 5.86132533247934e-06, "loss": 2.0368, "step": 19293 }, { "epoch": 0.64, "grad_norm": 0.7312802672386169, "learning_rate": 5.860357764936309e-06, "loss": 2.0142, "step": 19294 }, { "epoch": 0.64, "grad_norm": 0.7293230295181274, "learning_rate": 5.859390244160526e-06, "loss": 1.988, "step": 19295 }, { "epoch": 0.64, "grad_norm": 0.7205647230148315, "learning_rate": 5.858422770162926e-06, "loss": 2.043, "step": 19296 }, { "epoch": 0.64, "grad_norm": 0.7324780821800232, "learning_rate": 5.857455342954439e-06, "loss": 2.0381, "step": 19297 }, { "epoch": 0.64, "grad_norm": 0.7390618920326233, "learning_rate": 5.856487962545991e-06, "loss": 2.0216, "step": 19298 }, { "epoch": 0.64, "grad_norm": 0.7698684334754944, "learning_rate": 5.855520628948521e-06, "loss": 2.0134, "step": 19299 }, { "epoch": 0.64, "grad_norm": 0.7475852370262146, "learning_rate": 5.854553342172943e-06, "loss": 2.0586, "step": 19300 }, { "epoch": 0.64, "grad_norm": 0.7435522079467773, "learning_rate": 5.853586102230193e-06, "loss": 2.0284, "step": 19301 }, { "epoch": 0.64, "grad_norm": 0.7425633668899536, "learning_rate": 5.852618909131199e-06, "loss": 2.1087, "step": 19302 }, { "epoch": 0.64, "grad_norm": 0.7309922575950623, "learning_rate": 5.851651762886887e-06, "loss": 1.9555, "step": 19303 }, { "epoch": 0.64, "grad_norm": 0.7259184718132019, "learning_rate": 5.8506846635081805e-06, "loss": 2.0185, "step": 19304 }, { "epoch": 0.64, "grad_norm": 0.7532318234443665, "learning_rate": 5.849717611006007e-06, "loss": 2.0905, "step": 19305 }, { "epoch": 0.64, "grad_norm": 0.7281666398048401, "learning_rate": 5.848750605391289e-06, "loss": 2.0243, "step": 19306 }, { "epoch": 0.64, "grad_norm": 0.7618240118026733, "learning_rate": 5.847783646674956e-06, "loss": 2.1155, "step": 19307 }, { "epoch": 0.64, "grad_norm": 0.7278028726577759, "learning_rate": 5.846816734867933e-06, "loss": 2.0609, "step": 19308 }, { "epoch": 0.64, "grad_norm": 0.7037928700447083, "learning_rate": 5.845849869981137e-06, "loss": 2.07, "step": 19309 }, { "epoch": 0.64, "grad_norm": 0.7242680191993713, "learning_rate": 5.844883052025495e-06, "loss": 2.0062, "step": 19310 }, { "epoch": 0.64, "grad_norm": 0.7730261087417603, "learning_rate": 5.84391628101193e-06, "loss": 2.1198, "step": 19311 }, { "epoch": 0.64, "grad_norm": 0.7794146537780762, "learning_rate": 5.842949556951365e-06, "loss": 2.064, "step": 19312 }, { "epoch": 0.64, "grad_norm": 0.7712916135787964, "learning_rate": 5.841982879854716e-06, "loss": 2.0233, "step": 19313 }, { "epoch": 0.64, "grad_norm": 0.7195977568626404, "learning_rate": 5.841016249732907e-06, "loss": 1.989, "step": 19314 }, { "epoch": 0.64, "grad_norm": 0.7682862877845764, "learning_rate": 5.840049666596861e-06, "loss": 2.0746, "step": 19315 }, { "epoch": 0.64, "grad_norm": 0.724327027797699, "learning_rate": 5.839083130457495e-06, "loss": 2.0371, "step": 19316 }, { "epoch": 0.64, "grad_norm": 0.7283473610877991, "learning_rate": 5.838116641325733e-06, "loss": 2.1012, "step": 19317 }, { "epoch": 0.64, "grad_norm": 0.7155072689056396, "learning_rate": 5.837150199212484e-06, "loss": 2.0933, "step": 19318 }, { "epoch": 0.64, "grad_norm": 0.7399768829345703, "learning_rate": 5.836183804128678e-06, "loss": 2.0443, "step": 19319 }, { "epoch": 0.64, "grad_norm": 0.7203693985939026, "learning_rate": 5.835217456085223e-06, "loss": 2.1381, "step": 19320 }, { "epoch": 0.64, "grad_norm": 0.7302543520927429, "learning_rate": 5.8342511550930425e-06, "loss": 2.0388, "step": 19321 }, { "epoch": 0.64, "grad_norm": 0.7463512420654297, "learning_rate": 5.833284901163053e-06, "loss": 2.0212, "step": 19322 }, { "epoch": 0.64, "grad_norm": 0.7585991621017456, "learning_rate": 5.832318694306165e-06, "loss": 2.0599, "step": 19323 }, { "epoch": 0.64, "grad_norm": 0.7586641311645508, "learning_rate": 5.831352534533297e-06, "loss": 2.0419, "step": 19324 }, { "epoch": 0.64, "grad_norm": 0.7267483472824097, "learning_rate": 5.830386421855372e-06, "loss": 1.9803, "step": 19325 }, { "epoch": 0.64, "grad_norm": 0.7542256116867065, "learning_rate": 5.82942035628329e-06, "loss": 2.1037, "step": 19326 }, { "epoch": 0.64, "grad_norm": 0.7502795457839966, "learning_rate": 5.828454337827975e-06, "loss": 2.0784, "step": 19327 }, { "epoch": 0.64, "grad_norm": 0.7340919375419617, "learning_rate": 5.82748836650034e-06, "loss": 2.0388, "step": 19328 }, { "epoch": 0.64, "grad_norm": 0.7320643663406372, "learning_rate": 5.826522442311293e-06, "loss": 2.0036, "step": 19329 }, { "epoch": 0.64, "grad_norm": 0.7654105424880981, "learning_rate": 5.825556565271752e-06, "loss": 2.0668, "step": 19330 }, { "epoch": 0.64, "grad_norm": 0.7645840644836426, "learning_rate": 5.8245907353926235e-06, "loss": 2.0877, "step": 19331 }, { "epoch": 0.64, "grad_norm": 0.7496606707572937, "learning_rate": 5.8236249526848265e-06, "loss": 2.1354, "step": 19332 }, { "epoch": 0.64, "grad_norm": 0.7427768707275391, "learning_rate": 5.822659217159263e-06, "loss": 2.0187, "step": 19333 }, { "epoch": 0.64, "grad_norm": 0.7317584156990051, "learning_rate": 5.821693528826851e-06, "loss": 2.068, "step": 19334 }, { "epoch": 0.64, "grad_norm": 0.7440376281738281, "learning_rate": 5.8207278876984965e-06, "loss": 2.0807, "step": 19335 }, { "epoch": 0.64, "grad_norm": 0.7936128973960876, "learning_rate": 5.819762293785105e-06, "loss": 2.0555, "step": 19336 }, { "epoch": 0.64, "grad_norm": 0.752167820930481, "learning_rate": 5.81879674709759e-06, "loss": 2.0284, "step": 19337 }, { "epoch": 0.64, "grad_norm": 0.7355301976203918, "learning_rate": 5.817831247646862e-06, "loss": 2.0846, "step": 19338 }, { "epoch": 0.64, "grad_norm": 0.7559276819229126, "learning_rate": 5.8168657954438265e-06, "loss": 2.0324, "step": 19339 }, { "epoch": 0.64, "grad_norm": 0.7320514917373657, "learning_rate": 5.815900390499385e-06, "loss": 2.0623, "step": 19340 }, { "epoch": 0.64, "grad_norm": 0.7647855281829834, "learning_rate": 5.814935032824454e-06, "loss": 2.0782, "step": 19341 }, { "epoch": 0.64, "grad_norm": 0.7557506561279297, "learning_rate": 5.813969722429928e-06, "loss": 2.021, "step": 19342 }, { "epoch": 0.64, "grad_norm": 0.7308631539344788, "learning_rate": 5.8130044593267254e-06, "loss": 2.0332, "step": 19343 }, { "epoch": 0.64, "grad_norm": 0.7623540163040161, "learning_rate": 5.812039243525743e-06, "loss": 2.0967, "step": 19344 }, { "epoch": 0.64, "grad_norm": 0.7644964456558228, "learning_rate": 5.811074075037884e-06, "loss": 2.0963, "step": 19345 }, { "epoch": 0.64, "grad_norm": 0.7548443675041199, "learning_rate": 5.810108953874054e-06, "loss": 2.0031, "step": 19346 }, { "epoch": 0.64, "grad_norm": 0.7097053527832031, "learning_rate": 5.809143880045162e-06, "loss": 2.0426, "step": 19347 }, { "epoch": 0.64, "grad_norm": 0.7633403539657593, "learning_rate": 5.8081788535621054e-06, "loss": 2.0003, "step": 19348 }, { "epoch": 0.64, "grad_norm": 0.7294674515724182, "learning_rate": 5.807213874435784e-06, "loss": 2.0367, "step": 19349 }, { "epoch": 0.64, "grad_norm": 0.7612988352775574, "learning_rate": 5.806248942677108e-06, "loss": 2.0416, "step": 19350 }, { "epoch": 0.64, "grad_norm": 0.7672849893569946, "learning_rate": 5.8052840582969694e-06, "loss": 1.9726, "step": 19351 }, { "epoch": 0.64, "grad_norm": 0.7392340302467346, "learning_rate": 5.804319221306276e-06, "loss": 2.0066, "step": 19352 }, { "epoch": 0.64, "grad_norm": 0.7485770583152771, "learning_rate": 5.803354431715922e-06, "loss": 2.0193, "step": 19353 }, { "epoch": 0.64, "grad_norm": 0.7442919015884399, "learning_rate": 5.802389689536815e-06, "loss": 2.0727, "step": 19354 }, { "epoch": 0.64, "grad_norm": 0.7927093505859375, "learning_rate": 5.801424994779844e-06, "loss": 2.1193, "step": 19355 }, { "epoch": 0.64, "grad_norm": 0.7455630302429199, "learning_rate": 5.800460347455917e-06, "loss": 2.0338, "step": 19356 }, { "epoch": 0.64, "grad_norm": 0.767478346824646, "learning_rate": 5.799495747575927e-06, "loss": 2.1311, "step": 19357 }, { "epoch": 0.64, "grad_norm": 0.7590939402580261, "learning_rate": 5.79853119515077e-06, "loss": 2.1129, "step": 19358 }, { "epoch": 0.64, "grad_norm": 0.7514663338661194, "learning_rate": 5.797566690191345e-06, "loss": 2.1004, "step": 19359 }, { "epoch": 0.64, "grad_norm": 0.772892951965332, "learning_rate": 5.7966022327085525e-06, "loss": 2.0497, "step": 19360 }, { "epoch": 0.64, "grad_norm": 0.7244913578033447, "learning_rate": 5.795637822713283e-06, "loss": 2.0228, "step": 19361 }, { "epoch": 0.64, "grad_norm": 0.7378636002540588, "learning_rate": 5.794673460216431e-06, "loss": 2.0622, "step": 19362 }, { "epoch": 0.64, "grad_norm": 0.7456867694854736, "learning_rate": 5.793709145228898e-06, "loss": 2.0013, "step": 19363 }, { "epoch": 0.64, "grad_norm": 0.7523425221443176, "learning_rate": 5.7927448777615695e-06, "loss": 2.0321, "step": 19364 }, { "epoch": 0.64, "grad_norm": 0.719103217124939, "learning_rate": 5.791780657825347e-06, "loss": 2.0718, "step": 19365 }, { "epoch": 0.64, "grad_norm": 0.74888676404953, "learning_rate": 5.790816485431121e-06, "loss": 2.107, "step": 19366 }, { "epoch": 0.64, "grad_norm": 0.7395013570785522, "learning_rate": 5.789852360589778e-06, "loss": 2.0485, "step": 19367 }, { "epoch": 0.64, "grad_norm": 0.7528979778289795, "learning_rate": 5.788888283312217e-06, "loss": 2.0972, "step": 19368 }, { "epoch": 0.64, "grad_norm": 0.7309052348136902, "learning_rate": 5.7879242536093325e-06, "loss": 2.0043, "step": 19369 }, { "epoch": 0.64, "grad_norm": 0.7477414608001709, "learning_rate": 5.786960271492011e-06, "loss": 2.1012, "step": 19370 }, { "epoch": 0.64, "grad_norm": 0.7451391816139221, "learning_rate": 5.785996336971141e-06, "loss": 2.0361, "step": 19371 }, { "epoch": 0.64, "grad_norm": 0.7219563722610474, "learning_rate": 5.785032450057615e-06, "loss": 2.0396, "step": 19372 }, { "epoch": 0.64, "grad_norm": 0.7223684787750244, "learning_rate": 5.784068610762321e-06, "loss": 2.0509, "step": 19373 }, { "epoch": 0.64, "grad_norm": 0.7322028279304504, "learning_rate": 5.783104819096152e-06, "loss": 2.085, "step": 19374 }, { "epoch": 0.64, "grad_norm": 0.7592397928237915, "learning_rate": 5.782141075069989e-06, "loss": 2.0217, "step": 19375 }, { "epoch": 0.64, "grad_norm": 0.7386236190795898, "learning_rate": 5.781177378694729e-06, "loss": 2.0661, "step": 19376 }, { "epoch": 0.64, "grad_norm": 0.7334775924682617, "learning_rate": 5.78021372998125e-06, "loss": 2.0481, "step": 19377 }, { "epoch": 0.64, "grad_norm": 0.7374983429908752, "learning_rate": 5.779250128940448e-06, "loss": 2.057, "step": 19378 }, { "epoch": 0.64, "grad_norm": 0.7519813179969788, "learning_rate": 5.778286575583204e-06, "loss": 2.0135, "step": 19379 }, { "epoch": 0.64, "grad_norm": 0.7982410788536072, "learning_rate": 5.7773230699204e-06, "loss": 2.026, "step": 19380 }, { "epoch": 0.64, "grad_norm": 0.7886046767234802, "learning_rate": 5.776359611962925e-06, "loss": 2.0746, "step": 19381 }, { "epoch": 0.64, "grad_norm": 0.7747352719306946, "learning_rate": 5.775396201721668e-06, "loss": 2.107, "step": 19382 }, { "epoch": 0.64, "grad_norm": 0.7699923515319824, "learning_rate": 5.774432839207509e-06, "loss": 2.0289, "step": 19383 }, { "epoch": 0.64, "grad_norm": 0.7316173911094666, "learning_rate": 5.773469524431328e-06, "loss": 2.0382, "step": 19384 }, { "epoch": 0.64, "grad_norm": 0.7248473763465881, "learning_rate": 5.772506257404015e-06, "loss": 2.0545, "step": 19385 }, { "epoch": 0.64, "grad_norm": 0.7396063208580017, "learning_rate": 5.7715430381364445e-06, "loss": 2.0428, "step": 19386 }, { "epoch": 0.65, "grad_norm": 0.7512771487236023, "learning_rate": 5.7705798666395055e-06, "loss": 2.0178, "step": 19387 }, { "epoch": 0.65, "grad_norm": 0.7658633589744568, "learning_rate": 5.769616742924078e-06, "loss": 2.0727, "step": 19388 }, { "epoch": 0.65, "grad_norm": 0.7438501715660095, "learning_rate": 5.768653667001036e-06, "loss": 2.0275, "step": 19389 }, { "epoch": 0.65, "grad_norm": 0.7386695742607117, "learning_rate": 5.767690638881267e-06, "loss": 2.0652, "step": 19390 }, { "epoch": 0.65, "grad_norm": 0.7638639807701111, "learning_rate": 5.766727658575651e-06, "loss": 2.1377, "step": 19391 }, { "epoch": 0.65, "grad_norm": 0.7473623752593994, "learning_rate": 5.765764726095067e-06, "loss": 2.1341, "step": 19392 }, { "epoch": 0.65, "grad_norm": 0.7257064580917358, "learning_rate": 5.764801841450385e-06, "loss": 2.067, "step": 19393 }, { "epoch": 0.65, "grad_norm": 0.7486692070960999, "learning_rate": 5.763839004652495e-06, "loss": 2.1181, "step": 19394 }, { "epoch": 0.65, "grad_norm": 0.7448033690452576, "learning_rate": 5.762876215712265e-06, "loss": 2.0695, "step": 19395 }, { "epoch": 0.65, "grad_norm": 0.7675816416740417, "learning_rate": 5.761913474640582e-06, "loss": 1.9844, "step": 19396 }, { "epoch": 0.65, "grad_norm": 0.7241543531417847, "learning_rate": 5.7609507814483105e-06, "loss": 1.9962, "step": 19397 }, { "epoch": 0.65, "grad_norm": 0.7510273456573486, "learning_rate": 5.759988136146337e-06, "loss": 2.0829, "step": 19398 }, { "epoch": 0.65, "grad_norm": 0.7283874750137329, "learning_rate": 5.75902553874553e-06, "loss": 2.0442, "step": 19399 }, { "epoch": 0.65, "grad_norm": 0.7749091982841492, "learning_rate": 5.75806298925677e-06, "loss": 2.0809, "step": 19400 }, { "epoch": 0.65, "grad_norm": 0.7831537127494812, "learning_rate": 5.757100487690928e-06, "loss": 2.0922, "step": 19401 }, { "epoch": 0.65, "grad_norm": 0.7498375177383423, "learning_rate": 5.756138034058876e-06, "loss": 2.0291, "step": 19402 }, { "epoch": 0.65, "grad_norm": 0.7431955933570862, "learning_rate": 5.755175628371488e-06, "loss": 2.0428, "step": 19403 }, { "epoch": 0.65, "grad_norm": 0.7263168692588806, "learning_rate": 5.754213270639641e-06, "loss": 2.0291, "step": 19404 }, { "epoch": 0.65, "grad_norm": 0.7727211117744446, "learning_rate": 5.753250960874206e-06, "loss": 2.0624, "step": 19405 }, { "epoch": 0.65, "grad_norm": 0.7376839518547058, "learning_rate": 5.752288699086048e-06, "loss": 2.0465, "step": 19406 }, { "epoch": 0.65, "grad_norm": 0.7739841938018799, "learning_rate": 5.751326485286046e-06, "loss": 2.0302, "step": 19407 }, { "epoch": 0.65, "grad_norm": 0.7334620952606201, "learning_rate": 5.750364319485064e-06, "loss": 1.9641, "step": 19408 }, { "epoch": 0.65, "grad_norm": 0.7411165237426758, "learning_rate": 5.749402201693981e-06, "loss": 1.9964, "step": 19409 }, { "epoch": 0.65, "grad_norm": 0.7419471144676208, "learning_rate": 5.74844013192366e-06, "loss": 2.0848, "step": 19410 }, { "epoch": 0.65, "grad_norm": 0.7416826486587524, "learning_rate": 5.747478110184965e-06, "loss": 2.1211, "step": 19411 }, { "epoch": 0.65, "grad_norm": 0.796728253364563, "learning_rate": 5.746516136488772e-06, "loss": 2.0687, "step": 19412 }, { "epoch": 0.65, "grad_norm": 0.7421063184738159, "learning_rate": 5.745554210845951e-06, "loss": 2.0373, "step": 19413 }, { "epoch": 0.65, "grad_norm": 0.7619338035583496, "learning_rate": 5.744592333267365e-06, "loss": 2.0652, "step": 19414 }, { "epoch": 0.65, "grad_norm": 0.7377417683601379, "learning_rate": 5.743630503763875e-06, "loss": 2.0885, "step": 19415 }, { "epoch": 0.65, "grad_norm": 0.7505934238433838, "learning_rate": 5.7426687223463585e-06, "loss": 2.12, "step": 19416 }, { "epoch": 0.65, "grad_norm": 0.7287315130233765, "learning_rate": 5.741706989025673e-06, "loss": 2.0536, "step": 19417 }, { "epoch": 0.65, "grad_norm": 0.7966857552528381, "learning_rate": 5.740745303812688e-06, "loss": 2.1518, "step": 19418 }, { "epoch": 0.65, "grad_norm": 0.7386447787284851, "learning_rate": 5.739783666718264e-06, "loss": 2.046, "step": 19419 }, { "epoch": 0.65, "grad_norm": 0.7366262078285217, "learning_rate": 5.73882207775327e-06, "loss": 2.0123, "step": 19420 }, { "epoch": 0.65, "grad_norm": 0.7422581911087036, "learning_rate": 5.737860536928563e-06, "loss": 1.9843, "step": 19421 }, { "epoch": 0.65, "grad_norm": 0.7076981067657471, "learning_rate": 5.736899044255011e-06, "loss": 2.1299, "step": 19422 }, { "epoch": 0.65, "grad_norm": 0.7595935463905334, "learning_rate": 5.735937599743483e-06, "loss": 2.0903, "step": 19423 }, { "epoch": 0.65, "grad_norm": 0.7507017254829407, "learning_rate": 5.734976203404825e-06, "loss": 2.037, "step": 19424 }, { "epoch": 0.65, "grad_norm": 0.7223144769668579, "learning_rate": 5.734014855249905e-06, "loss": 2.0901, "step": 19425 }, { "epoch": 0.65, "grad_norm": 0.7433385848999023, "learning_rate": 5.733053555289592e-06, "loss": 2.0044, "step": 19426 }, { "epoch": 0.65, "grad_norm": 0.7472304105758667, "learning_rate": 5.732092303534736e-06, "loss": 2.0265, "step": 19427 }, { "epoch": 0.65, "grad_norm": 0.7494765520095825, "learning_rate": 5.731131099996197e-06, "loss": 2.042, "step": 19428 }, { "epoch": 0.65, "grad_norm": 0.7683257460594177, "learning_rate": 5.730169944684842e-06, "loss": 2.0605, "step": 19429 }, { "epoch": 0.65, "grad_norm": 0.7253664135932922, "learning_rate": 5.7292088376115196e-06, "loss": 2.0087, "step": 19430 }, { "epoch": 0.65, "grad_norm": 0.7241952419281006, "learning_rate": 5.728247778787093e-06, "loss": 2.0274, "step": 19431 }, { "epoch": 0.65, "grad_norm": 0.7326438426971436, "learning_rate": 5.727286768222428e-06, "loss": 2.0373, "step": 19432 }, { "epoch": 0.65, "grad_norm": 0.7287969589233398, "learning_rate": 5.726325805928364e-06, "loss": 2.027, "step": 19433 }, { "epoch": 0.65, "grad_norm": 0.7718603610992432, "learning_rate": 5.725364891915768e-06, "loss": 2.0725, "step": 19434 }, { "epoch": 0.65, "grad_norm": 0.7437602877616882, "learning_rate": 5.724404026195496e-06, "loss": 2.0406, "step": 19435 }, { "epoch": 0.65, "grad_norm": 0.7366235256195068, "learning_rate": 5.723443208778403e-06, "loss": 2.0025, "step": 19436 }, { "epoch": 0.65, "grad_norm": 0.744770348072052, "learning_rate": 5.722482439675339e-06, "loss": 2.0652, "step": 19437 }, { "epoch": 0.65, "grad_norm": 0.7605843544006348, "learning_rate": 5.721521718897161e-06, "loss": 2.0353, "step": 19438 }, { "epoch": 0.65, "grad_norm": 0.7382888197898865, "learning_rate": 5.7205610464547265e-06, "loss": 2.0605, "step": 19439 }, { "epoch": 0.65, "grad_norm": 0.7444260120391846, "learning_rate": 5.719600422358886e-06, "loss": 2.0764, "step": 19440 }, { "epoch": 0.65, "grad_norm": 0.8098291158676147, "learning_rate": 5.718639846620486e-06, "loss": 1.9586, "step": 19441 }, { "epoch": 0.65, "grad_norm": 0.7371774911880493, "learning_rate": 5.717679319250388e-06, "loss": 1.9305, "step": 19442 }, { "epoch": 0.65, "grad_norm": 0.7559946179389954, "learning_rate": 5.716718840259437e-06, "loss": 2.0201, "step": 19443 }, { "epoch": 0.65, "grad_norm": 0.739691436290741, "learning_rate": 5.7157584096584866e-06, "loss": 2.0632, "step": 19444 }, { "epoch": 0.65, "grad_norm": 0.7354042530059814, "learning_rate": 5.7147980274583935e-06, "loss": 2.0514, "step": 19445 }, { "epoch": 0.65, "grad_norm": 0.7199611663818359, "learning_rate": 5.713837693669993e-06, "loss": 2.0428, "step": 19446 }, { "epoch": 0.65, "grad_norm": 0.7737756371498108, "learning_rate": 5.712877408304143e-06, "loss": 2.015, "step": 19447 }, { "epoch": 0.65, "grad_norm": 0.7603662610054016, "learning_rate": 5.711917171371695e-06, "loss": 2.112, "step": 19448 }, { "epoch": 0.65, "grad_norm": 0.7443706393241882, "learning_rate": 5.7109569828834935e-06, "loss": 2.1144, "step": 19449 }, { "epoch": 0.65, "grad_norm": 0.7563647031784058, "learning_rate": 5.709996842850383e-06, "loss": 2.0966, "step": 19450 }, { "epoch": 0.65, "grad_norm": 0.7375903725624084, "learning_rate": 5.709036751283218e-06, "loss": 2.0463, "step": 19451 }, { "epoch": 0.65, "grad_norm": 0.7632454633712769, "learning_rate": 5.708076708192837e-06, "loss": 2.0715, "step": 19452 }, { "epoch": 0.65, "grad_norm": 0.7655351161956787, "learning_rate": 5.70711671359009e-06, "loss": 2.0306, "step": 19453 }, { "epoch": 0.65, "grad_norm": 0.7505519986152649, "learning_rate": 5.706156767485827e-06, "loss": 2.0705, "step": 19454 }, { "epoch": 0.65, "grad_norm": 0.7584763765335083, "learning_rate": 5.705196869890887e-06, "loss": 2.0479, "step": 19455 }, { "epoch": 0.65, "grad_norm": 0.761532187461853, "learning_rate": 5.704237020816112e-06, "loss": 2.0541, "step": 19456 }, { "epoch": 0.65, "grad_norm": 0.7704643607139587, "learning_rate": 5.703277220272354e-06, "loss": 2.1491, "step": 19457 }, { "epoch": 0.65, "grad_norm": 0.7389281392097473, "learning_rate": 5.7023174682704515e-06, "loss": 1.9965, "step": 19458 }, { "epoch": 0.65, "grad_norm": 0.7665868401527405, "learning_rate": 5.701357764821245e-06, "loss": 2.1359, "step": 19459 }, { "epoch": 0.65, "grad_norm": 0.7701390981674194, "learning_rate": 5.700398109935578e-06, "loss": 2.0531, "step": 19460 }, { "epoch": 0.65, "grad_norm": 0.7802620530128479, "learning_rate": 5.699438503624297e-06, "loss": 2.1126, "step": 19461 }, { "epoch": 0.65, "grad_norm": 0.7476899027824402, "learning_rate": 5.698478945898236e-06, "loss": 2.0601, "step": 19462 }, { "epoch": 0.65, "grad_norm": 0.7184543609619141, "learning_rate": 5.697519436768243e-06, "loss": 2.0641, "step": 19463 }, { "epoch": 0.65, "grad_norm": 0.7296576499938965, "learning_rate": 5.696559976245153e-06, "loss": 2.0568, "step": 19464 }, { "epoch": 0.65, "grad_norm": 0.7817097902297974, "learning_rate": 5.695600564339803e-06, "loss": 2.096, "step": 19465 }, { "epoch": 0.65, "grad_norm": 0.7401478886604309, "learning_rate": 5.694641201063036e-06, "loss": 2.0237, "step": 19466 }, { "epoch": 0.65, "grad_norm": 0.7490755319595337, "learning_rate": 5.693681886425697e-06, "loss": 2.0661, "step": 19467 }, { "epoch": 0.65, "grad_norm": 0.754508912563324, "learning_rate": 5.692722620438608e-06, "loss": 2.1601, "step": 19468 }, { "epoch": 0.65, "grad_norm": 0.7290053367614746, "learning_rate": 5.691763403112614e-06, "loss": 2.0222, "step": 19469 }, { "epoch": 0.65, "grad_norm": 0.7764450311660767, "learning_rate": 5.690804234458557e-06, "loss": 2.0059, "step": 19470 }, { "epoch": 0.65, "grad_norm": 0.7240897417068481, "learning_rate": 5.689845114487268e-06, "loss": 2.0092, "step": 19471 }, { "epoch": 0.65, "grad_norm": 0.7328385710716248, "learning_rate": 5.688886043209579e-06, "loss": 1.9539, "step": 19472 }, { "epoch": 0.65, "grad_norm": 0.782795786857605, "learning_rate": 5.687927020636332e-06, "loss": 2.1574, "step": 19473 }, { "epoch": 0.65, "grad_norm": 0.7345569729804993, "learning_rate": 5.686968046778356e-06, "loss": 1.9965, "step": 19474 }, { "epoch": 0.65, "grad_norm": 0.7304505705833435, "learning_rate": 5.6860091216464875e-06, "loss": 2.0839, "step": 19475 }, { "epoch": 0.65, "grad_norm": 0.7205492258071899, "learning_rate": 5.685050245251562e-06, "loss": 2.0329, "step": 19476 }, { "epoch": 0.65, "grad_norm": 0.7615114450454712, "learning_rate": 5.684091417604411e-06, "loss": 2.0503, "step": 19477 }, { "epoch": 0.65, "grad_norm": 0.7482945322990417, "learning_rate": 5.683132638715862e-06, "loss": 2.0483, "step": 19478 }, { "epoch": 0.65, "grad_norm": 0.7542290687561035, "learning_rate": 5.682173908596754e-06, "loss": 2.0911, "step": 19479 }, { "epoch": 0.65, "grad_norm": 0.7569753527641296, "learning_rate": 5.681215227257915e-06, "loss": 2.0426, "step": 19480 }, { "epoch": 0.65, "grad_norm": 0.7392866611480713, "learning_rate": 5.6802565947101714e-06, "loss": 2.0831, "step": 19481 }, { "epoch": 0.65, "grad_norm": 0.7456530332565308, "learning_rate": 5.679298010964357e-06, "loss": 1.9991, "step": 19482 }, { "epoch": 0.65, "grad_norm": 0.7383148074150085, "learning_rate": 5.678339476031305e-06, "loss": 2.041, "step": 19483 }, { "epoch": 0.65, "grad_norm": 0.782660722732544, "learning_rate": 5.6773809899218366e-06, "loss": 2.0085, "step": 19484 }, { "epoch": 0.65, "grad_norm": 0.7700353264808655, "learning_rate": 5.67642255264679e-06, "loss": 2.1, "step": 19485 }, { "epoch": 0.65, "grad_norm": 0.7472296953201294, "learning_rate": 5.675464164216986e-06, "loss": 1.9953, "step": 19486 }, { "epoch": 0.65, "grad_norm": 0.7187811732292175, "learning_rate": 5.674505824643251e-06, "loss": 2.0449, "step": 19487 }, { "epoch": 0.65, "grad_norm": 0.7457808256149292, "learning_rate": 5.673547533936413e-06, "loss": 2.0077, "step": 19488 }, { "epoch": 0.65, "grad_norm": 0.7338558435440063, "learning_rate": 5.6725892921073075e-06, "loss": 1.9933, "step": 19489 }, { "epoch": 0.65, "grad_norm": 0.7376455664634705, "learning_rate": 5.6716310991667455e-06, "loss": 2.0459, "step": 19490 }, { "epoch": 0.65, "grad_norm": 0.7485432028770447, "learning_rate": 5.6706729551255566e-06, "loss": 2.0059, "step": 19491 }, { "epoch": 0.65, "grad_norm": 0.7623322606086731, "learning_rate": 5.6697148599945724e-06, "loss": 2.0685, "step": 19492 }, { "epoch": 0.65, "grad_norm": 0.7522966265678406, "learning_rate": 5.668756813784608e-06, "loss": 2.0508, "step": 19493 }, { "epoch": 0.65, "grad_norm": 0.73207688331604, "learning_rate": 5.6677988165064945e-06, "loss": 2.0408, "step": 19494 }, { "epoch": 0.65, "grad_norm": 0.7407101988792419, "learning_rate": 5.666840868171051e-06, "loss": 1.9876, "step": 19495 }, { "epoch": 0.65, "grad_norm": 0.7429600358009338, "learning_rate": 5.665882968789096e-06, "loss": 2.0357, "step": 19496 }, { "epoch": 0.65, "grad_norm": 0.7351678013801575, "learning_rate": 5.664925118371456e-06, "loss": 2.067, "step": 19497 }, { "epoch": 0.65, "grad_norm": 0.7288463115692139, "learning_rate": 5.663967316928954e-06, "loss": 2.0954, "step": 19498 }, { "epoch": 0.65, "grad_norm": 0.7242512702941895, "learning_rate": 5.663009564472408e-06, "loss": 2.0665, "step": 19499 }, { "epoch": 0.65, "grad_norm": 0.7324820160865784, "learning_rate": 5.662051861012636e-06, "loss": 2.0483, "step": 19500 }, { "epoch": 0.65, "grad_norm": 0.7637337446212769, "learning_rate": 5.6610942065604625e-06, "loss": 2.062, "step": 19501 }, { "epoch": 0.65, "grad_norm": 0.7275210618972778, "learning_rate": 5.660136601126705e-06, "loss": 2.0106, "step": 19502 }, { "epoch": 0.65, "grad_norm": 0.7587413787841797, "learning_rate": 5.659179044722174e-06, "loss": 2.1523, "step": 19503 }, { "epoch": 0.65, "grad_norm": 0.7411825656890869, "learning_rate": 5.658221537357697e-06, "loss": 2.0448, "step": 19504 }, { "epoch": 0.65, "grad_norm": 0.7222442030906677, "learning_rate": 5.657264079044091e-06, "loss": 2.0365, "step": 19505 }, { "epoch": 0.65, "grad_norm": 0.7505329251289368, "learning_rate": 5.656306669792166e-06, "loss": 2.0258, "step": 19506 }, { "epoch": 0.65, "grad_norm": 0.7594444751739502, "learning_rate": 5.655349309612749e-06, "loss": 2.0931, "step": 19507 }, { "epoch": 0.65, "grad_norm": 0.7588348388671875, "learning_rate": 5.654391998516647e-06, "loss": 2.0902, "step": 19508 }, { "epoch": 0.65, "grad_norm": 0.7546616792678833, "learning_rate": 5.653434736514675e-06, "loss": 2.1068, "step": 19509 }, { "epoch": 0.65, "grad_norm": 0.7324091792106628, "learning_rate": 5.652477523617649e-06, "loss": 1.9526, "step": 19510 }, { "epoch": 0.65, "grad_norm": 0.7973360419273376, "learning_rate": 5.651520359836391e-06, "loss": 2.0423, "step": 19511 }, { "epoch": 0.65, "grad_norm": 0.7721773982048035, "learning_rate": 5.650563245181701e-06, "loss": 2.0758, "step": 19512 }, { "epoch": 0.65, "grad_norm": 0.7424566745758057, "learning_rate": 5.649606179664399e-06, "loss": 1.9872, "step": 19513 }, { "epoch": 0.65, "grad_norm": 0.7309728264808655, "learning_rate": 5.648649163295299e-06, "loss": 2.0307, "step": 19514 }, { "epoch": 0.65, "grad_norm": 0.7741451859474182, "learning_rate": 5.647692196085208e-06, "loss": 2.1028, "step": 19515 }, { "epoch": 0.65, "grad_norm": 0.7368655204772949, "learning_rate": 5.6467352780449435e-06, "loss": 2.038, "step": 19516 }, { "epoch": 0.65, "grad_norm": 0.7623676657676697, "learning_rate": 5.6457784091853115e-06, "loss": 2.0769, "step": 19517 }, { "epoch": 0.65, "grad_norm": 0.7327426671981812, "learning_rate": 5.644821589517121e-06, "loss": 2.1007, "step": 19518 }, { "epoch": 0.65, "grad_norm": 0.7521306872367859, "learning_rate": 5.6438648190511835e-06, "loss": 2.0718, "step": 19519 }, { "epoch": 0.65, "grad_norm": 0.7192142605781555, "learning_rate": 5.642908097798312e-06, "loss": 2.0908, "step": 19520 }, { "epoch": 0.65, "grad_norm": 0.7225168347358704, "learning_rate": 5.641951425769311e-06, "loss": 2.0381, "step": 19521 }, { "epoch": 0.65, "grad_norm": 0.7924708127975464, "learning_rate": 5.640994802974984e-06, "loss": 2.0796, "step": 19522 }, { "epoch": 0.65, "grad_norm": 0.7717782855033875, "learning_rate": 5.640038229426145e-06, "loss": 2.1126, "step": 19523 }, { "epoch": 0.65, "grad_norm": 0.7277173399925232, "learning_rate": 5.639081705133601e-06, "loss": 2.0176, "step": 19524 }, { "epoch": 0.65, "grad_norm": 0.7578544020652771, "learning_rate": 5.638125230108158e-06, "loss": 2.168, "step": 19525 }, { "epoch": 0.65, "grad_norm": 0.7500308156013489, "learning_rate": 5.637168804360614e-06, "loss": 2.0703, "step": 19526 }, { "epoch": 0.65, "grad_norm": 0.7537912130355835, "learning_rate": 5.636212427901785e-06, "loss": 2.0617, "step": 19527 }, { "epoch": 0.65, "grad_norm": 0.759221076965332, "learning_rate": 5.6352561007424655e-06, "loss": 2.0329, "step": 19528 }, { "epoch": 0.65, "grad_norm": 0.7061829566955566, "learning_rate": 5.63429982289347e-06, "loss": 2.1202, "step": 19529 }, { "epoch": 0.65, "grad_norm": 0.7606263160705566, "learning_rate": 5.633343594365597e-06, "loss": 2.0445, "step": 19530 }, { "epoch": 0.65, "grad_norm": 0.7495203018188477, "learning_rate": 5.632387415169643e-06, "loss": 2.0584, "step": 19531 }, { "epoch": 0.65, "grad_norm": 0.7259423732757568, "learning_rate": 5.6314312853164175e-06, "loss": 2.0214, "step": 19532 }, { "epoch": 0.65, "grad_norm": 0.7746559381484985, "learning_rate": 5.630475204816729e-06, "loss": 2.0205, "step": 19533 }, { "epoch": 0.65, "grad_norm": 0.72916179895401, "learning_rate": 5.629519173681363e-06, "loss": 2.0204, "step": 19534 }, { "epoch": 0.65, "grad_norm": 0.7190880179405212, "learning_rate": 5.628563191921128e-06, "loss": 2.043, "step": 19535 }, { "epoch": 0.65, "grad_norm": 0.7427259087562561, "learning_rate": 5.627607259546828e-06, "loss": 2.1085, "step": 19536 }, { "epoch": 0.65, "grad_norm": 0.7584714293479919, "learning_rate": 5.626651376569254e-06, "loss": 1.9946, "step": 19537 }, { "epoch": 0.65, "grad_norm": 0.7303141355514526, "learning_rate": 5.625695542999215e-06, "loss": 2.046, "step": 19538 }, { "epoch": 0.65, "grad_norm": 0.7365593314170837, "learning_rate": 5.624739758847498e-06, "loss": 2.0988, "step": 19539 }, { "epoch": 0.65, "grad_norm": 0.7867761254310608, "learning_rate": 5.623784024124913e-06, "loss": 2.0822, "step": 19540 }, { "epoch": 0.65, "grad_norm": 0.7571848034858704, "learning_rate": 5.622828338842248e-06, "loss": 2.0545, "step": 19541 }, { "epoch": 0.65, "grad_norm": 0.7367216348648071, "learning_rate": 5.621872703010305e-06, "loss": 2.0449, "step": 19542 }, { "epoch": 0.65, "grad_norm": 0.7408362030982971, "learning_rate": 5.620917116639879e-06, "loss": 2.0571, "step": 19543 }, { "epoch": 0.65, "grad_norm": 0.7886891961097717, "learning_rate": 5.619961579741762e-06, "loss": 2.036, "step": 19544 }, { "epoch": 0.65, "grad_norm": 0.7480760216712952, "learning_rate": 5.619006092326751e-06, "loss": 2.0132, "step": 19545 }, { "epoch": 0.65, "grad_norm": 0.7563057541847229, "learning_rate": 5.618050654405647e-06, "loss": 1.9871, "step": 19546 }, { "epoch": 0.65, "grad_norm": 0.7795711159706116, "learning_rate": 5.617095265989237e-06, "loss": 2.0776, "step": 19547 }, { "epoch": 0.65, "grad_norm": 0.7605361938476562, "learning_rate": 5.616139927088313e-06, "loss": 2.0296, "step": 19548 }, { "epoch": 0.65, "grad_norm": 0.7357608675956726, "learning_rate": 5.615184637713675e-06, "loss": 2.0368, "step": 19549 }, { "epoch": 0.65, "grad_norm": 0.793256938457489, "learning_rate": 5.6142293978761075e-06, "loss": 2.1577, "step": 19550 }, { "epoch": 0.65, "grad_norm": 0.7241933345794678, "learning_rate": 5.613274207586409e-06, "loss": 2.0908, "step": 19551 }, { "epoch": 0.65, "grad_norm": 0.7310154438018799, "learning_rate": 5.6123190668553686e-06, "loss": 2.0341, "step": 19552 }, { "epoch": 0.65, "grad_norm": 0.7121602296829224, "learning_rate": 5.611363975693771e-06, "loss": 2.0761, "step": 19553 }, { "epoch": 0.65, "grad_norm": 0.750005304813385, "learning_rate": 5.610408934112412e-06, "loss": 2.0377, "step": 19554 }, { "epoch": 0.65, "grad_norm": 0.7561762928962708, "learning_rate": 5.609453942122083e-06, "loss": 2.0452, "step": 19555 }, { "epoch": 0.65, "grad_norm": 0.7533517479896545, "learning_rate": 5.608498999733571e-06, "loss": 2.0212, "step": 19556 }, { "epoch": 0.65, "grad_norm": 0.7263368964195251, "learning_rate": 5.607544106957661e-06, "loss": 2.0552, "step": 19557 }, { "epoch": 0.65, "grad_norm": 0.7331065535545349, "learning_rate": 5.606589263805147e-06, "loss": 2.0108, "step": 19558 }, { "epoch": 0.65, "grad_norm": 0.7458671927452087, "learning_rate": 5.605634470286807e-06, "loss": 2.0802, "step": 19559 }, { "epoch": 0.65, "grad_norm": 0.742459237575531, "learning_rate": 5.604679726413438e-06, "loss": 2.0593, "step": 19560 }, { "epoch": 0.65, "grad_norm": 0.7391287088394165, "learning_rate": 5.603725032195818e-06, "loss": 2.0863, "step": 19561 }, { "epoch": 0.65, "grad_norm": 0.7931849956512451, "learning_rate": 5.6027703876447405e-06, "loss": 2.0486, "step": 19562 }, { "epoch": 0.65, "grad_norm": 0.7547613382339478, "learning_rate": 5.601815792770981e-06, "loss": 2.0284, "step": 19563 }, { "epoch": 0.65, "grad_norm": 0.7238773107528687, "learning_rate": 5.600861247585334e-06, "loss": 2.0713, "step": 19564 }, { "epoch": 0.65, "grad_norm": 0.7398040890693665, "learning_rate": 5.599906752098578e-06, "loss": 2.0184, "step": 19565 }, { "epoch": 0.65, "grad_norm": 0.7282812595367432, "learning_rate": 5.5989523063214936e-06, "loss": 2.0511, "step": 19566 }, { "epoch": 0.65, "grad_norm": 0.7346721887588501, "learning_rate": 5.597997910264866e-06, "loss": 2.1091, "step": 19567 }, { "epoch": 0.65, "grad_norm": 0.7421655058860779, "learning_rate": 5.597043563939483e-06, "loss": 2.1149, "step": 19568 }, { "epoch": 0.65, "grad_norm": 0.7590541243553162, "learning_rate": 5.5960892673561196e-06, "loss": 2.079, "step": 19569 }, { "epoch": 0.65, "grad_norm": 0.7407588362693787, "learning_rate": 5.595135020525557e-06, "loss": 2.0436, "step": 19570 }, { "epoch": 0.65, "grad_norm": 0.7701472640037537, "learning_rate": 5.5941808234585796e-06, "loss": 2.0795, "step": 19571 }, { "epoch": 0.65, "grad_norm": 0.729473888874054, "learning_rate": 5.593226676165962e-06, "loss": 2.009, "step": 19572 }, { "epoch": 0.65, "grad_norm": 0.7512383460998535, "learning_rate": 5.592272578658491e-06, "loss": 2.0464, "step": 19573 }, { "epoch": 0.65, "grad_norm": 0.7690520286560059, "learning_rate": 5.591318530946941e-06, "loss": 2.0164, "step": 19574 }, { "epoch": 0.65, "grad_norm": 0.7310040593147278, "learning_rate": 5.590364533042087e-06, "loss": 2.0385, "step": 19575 }, { "epoch": 0.65, "grad_norm": 0.7455087900161743, "learning_rate": 5.589410584954708e-06, "loss": 2.0599, "step": 19576 }, { "epoch": 0.65, "grad_norm": 0.7293804883956909, "learning_rate": 5.58845668669559e-06, "loss": 2.0718, "step": 19577 }, { "epoch": 0.65, "grad_norm": 0.7608941197395325, "learning_rate": 5.587502838275502e-06, "loss": 2.1001, "step": 19578 }, { "epoch": 0.65, "grad_norm": 0.7469384670257568, "learning_rate": 5.586549039705218e-06, "loss": 2.0431, "step": 19579 }, { "epoch": 0.65, "grad_norm": 0.7249419689178467, "learning_rate": 5.585595290995518e-06, "loss": 2.0125, "step": 19580 }, { "epoch": 0.65, "grad_norm": 0.78377366065979, "learning_rate": 5.584641592157174e-06, "loss": 2.0263, "step": 19581 }, { "epoch": 0.65, "grad_norm": 0.7254436016082764, "learning_rate": 5.583687943200964e-06, "loss": 2.0254, "step": 19582 }, { "epoch": 0.65, "grad_norm": 0.7522599697113037, "learning_rate": 5.582734344137655e-06, "loss": 2.0567, "step": 19583 }, { "epoch": 0.65, "grad_norm": 0.7589452862739563, "learning_rate": 5.581780794978029e-06, "loss": 2.0551, "step": 19584 }, { "epoch": 0.65, "grad_norm": 0.7531152367591858, "learning_rate": 5.580827295732852e-06, "loss": 2.0197, "step": 19585 }, { "epoch": 0.65, "grad_norm": 0.7827947735786438, "learning_rate": 5.5798738464129e-06, "loss": 2.0804, "step": 19586 }, { "epoch": 0.65, "grad_norm": 0.7234615683555603, "learning_rate": 5.578920447028943e-06, "loss": 2.0441, "step": 19587 }, { "epoch": 0.65, "grad_norm": 0.7164815068244934, "learning_rate": 5.57796709759175e-06, "loss": 2.0516, "step": 19588 }, { "epoch": 0.65, "grad_norm": 0.7341342568397522, "learning_rate": 5.577013798112091e-06, "loss": 2.0537, "step": 19589 }, { "epoch": 0.65, "grad_norm": 0.7366927266120911, "learning_rate": 5.576060548600742e-06, "loss": 2.0223, "step": 19590 }, { "epoch": 0.65, "grad_norm": 0.7171513438224792, "learning_rate": 5.5751073490684696e-06, "loss": 2.0096, "step": 19591 }, { "epoch": 0.65, "grad_norm": 0.7562494874000549, "learning_rate": 5.574154199526037e-06, "loss": 2.0849, "step": 19592 }, { "epoch": 0.65, "grad_norm": 0.759568989276886, "learning_rate": 5.573201099984219e-06, "loss": 2.1198, "step": 19593 }, { "epoch": 0.65, "grad_norm": 0.7625716924667358, "learning_rate": 5.572248050453777e-06, "loss": 2.1028, "step": 19594 }, { "epoch": 0.65, "grad_norm": 0.7648226618766785, "learning_rate": 5.571295050945487e-06, "loss": 2.0429, "step": 19595 }, { "epoch": 0.65, "grad_norm": 0.7351311445236206, "learning_rate": 5.570342101470108e-06, "loss": 2.0274, "step": 19596 }, { "epoch": 0.65, "grad_norm": 0.7135232090950012, "learning_rate": 5.569389202038405e-06, "loss": 2.0429, "step": 19597 }, { "epoch": 0.65, "grad_norm": 0.7387197613716125, "learning_rate": 5.568436352661146e-06, "loss": 2.0981, "step": 19598 }, { "epoch": 0.65, "grad_norm": 0.764707624912262, "learning_rate": 5.567483553349101e-06, "loss": 2.0425, "step": 19599 }, { "epoch": 0.65, "grad_norm": 0.7871748805046082, "learning_rate": 5.566530804113028e-06, "loss": 2.0275, "step": 19600 }, { "epoch": 0.65, "grad_norm": 0.7661146521568298, "learning_rate": 5.565578104963688e-06, "loss": 2.1114, "step": 19601 }, { "epoch": 0.65, "grad_norm": 0.750670313835144, "learning_rate": 5.564625455911852e-06, "loss": 2.1222, "step": 19602 }, { "epoch": 0.65, "grad_norm": 0.741730809211731, "learning_rate": 5.563672856968274e-06, "loss": 2.0747, "step": 19603 }, { "epoch": 0.65, "grad_norm": 0.7372225522994995, "learning_rate": 5.562720308143724e-06, "loss": 2.0286, "step": 19604 }, { "epoch": 0.65, "grad_norm": 0.727664589881897, "learning_rate": 5.561767809448956e-06, "loss": 2.0256, "step": 19605 }, { "epoch": 0.65, "grad_norm": 0.7347592115402222, "learning_rate": 5.560815360894738e-06, "loss": 2.0535, "step": 19606 }, { "epoch": 0.65, "grad_norm": 0.7356708645820618, "learning_rate": 5.559862962491822e-06, "loss": 2.0339, "step": 19607 }, { "epoch": 0.65, "grad_norm": 0.7301467061042786, "learning_rate": 5.558910614250972e-06, "loss": 2.0551, "step": 19608 }, { "epoch": 0.65, "grad_norm": 0.7556509375572205, "learning_rate": 5.557958316182956e-06, "loss": 2.0962, "step": 19609 }, { "epoch": 0.65, "grad_norm": 0.7481658458709717, "learning_rate": 5.557006068298514e-06, "loss": 2.038, "step": 19610 }, { "epoch": 0.65, "grad_norm": 0.7360104322433472, "learning_rate": 5.556053870608415e-06, "loss": 2.0775, "step": 19611 }, { "epoch": 0.65, "grad_norm": 0.7646734714508057, "learning_rate": 5.555101723123419e-06, "loss": 2.1493, "step": 19612 }, { "epoch": 0.65, "grad_norm": 0.7490301728248596, "learning_rate": 5.5541496258542774e-06, "loss": 2.0259, "step": 19613 }, { "epoch": 0.65, "grad_norm": 0.7321333885192871, "learning_rate": 5.553197578811745e-06, "loss": 2.0977, "step": 19614 }, { "epoch": 0.65, "grad_norm": 0.7561035752296448, "learning_rate": 5.5522455820065835e-06, "loss": 2.1632, "step": 19615 }, { "epoch": 0.65, "grad_norm": 0.786316454410553, "learning_rate": 5.551293635449542e-06, "loss": 2.031, "step": 19616 }, { "epoch": 0.65, "grad_norm": 0.7408419251441956, "learning_rate": 5.550341739151382e-06, "loss": 2.1028, "step": 19617 }, { "epoch": 0.65, "grad_norm": 0.7293141484260559, "learning_rate": 5.549389893122852e-06, "loss": 2.1178, "step": 19618 }, { "epoch": 0.65, "grad_norm": 0.7868127822875977, "learning_rate": 5.548438097374702e-06, "loss": 2.0776, "step": 19619 }, { "epoch": 0.65, "grad_norm": 0.7648585438728333, "learning_rate": 5.5474863519176916e-06, "loss": 2.0766, "step": 19620 }, { "epoch": 0.65, "grad_norm": 0.7292461395263672, "learning_rate": 5.546534656762573e-06, "loss": 2.0316, "step": 19621 }, { "epoch": 0.65, "grad_norm": 0.6971485018730164, "learning_rate": 5.545583011920097e-06, "loss": 1.9956, "step": 19622 }, { "epoch": 0.65, "grad_norm": 0.7470062971115112, "learning_rate": 5.544631417401009e-06, "loss": 2.0261, "step": 19623 }, { "epoch": 0.65, "grad_norm": 0.7032642960548401, "learning_rate": 5.5436798732160655e-06, "loss": 1.9848, "step": 19624 }, { "epoch": 0.65, "grad_norm": 0.7521611452102661, "learning_rate": 5.5427283793760174e-06, "loss": 2.0703, "step": 19625 }, { "epoch": 0.65, "grad_norm": 0.739387035369873, "learning_rate": 5.541776935891613e-06, "loss": 2.0466, "step": 19626 }, { "epoch": 0.65, "grad_norm": 0.7621315121650696, "learning_rate": 5.540825542773596e-06, "loss": 2.0746, "step": 19627 }, { "epoch": 0.65, "grad_norm": 0.7290324568748474, "learning_rate": 5.539874200032722e-06, "loss": 2.0583, "step": 19628 }, { "epoch": 0.65, "grad_norm": 0.7492371201515198, "learning_rate": 5.538922907679731e-06, "loss": 2.1333, "step": 19629 }, { "epoch": 0.65, "grad_norm": 0.7589752674102783, "learning_rate": 5.5379716657253755e-06, "loss": 2.048, "step": 19630 }, { "epoch": 0.65, "grad_norm": 0.7361384630203247, "learning_rate": 5.537020474180409e-06, "loss": 2.0762, "step": 19631 }, { "epoch": 0.65, "grad_norm": 0.7307048439979553, "learning_rate": 5.536069333055562e-06, "loss": 2.0141, "step": 19632 }, { "epoch": 0.65, "grad_norm": 0.7685796022415161, "learning_rate": 5.535118242361587e-06, "loss": 1.9984, "step": 19633 }, { "epoch": 0.65, "grad_norm": 0.7387921810150146, "learning_rate": 5.534167202109233e-06, "loss": 2.0646, "step": 19634 }, { "epoch": 0.65, "grad_norm": 0.7297471761703491, "learning_rate": 5.533216212309241e-06, "loss": 2.0476, "step": 19635 }, { "epoch": 0.65, "grad_norm": 0.7760244011878967, "learning_rate": 5.53226527297235e-06, "loss": 2.0629, "step": 19636 }, { "epoch": 0.65, "grad_norm": 0.7446165680885315, "learning_rate": 5.531314384109313e-06, "loss": 2.0742, "step": 19637 }, { "epoch": 0.65, "grad_norm": 0.7211996912956238, "learning_rate": 5.530363545730862e-06, "loss": 2.0412, "step": 19638 }, { "epoch": 0.65, "grad_norm": 0.7500234842300415, "learning_rate": 5.529412757847745e-06, "loss": 2.044, "step": 19639 }, { "epoch": 0.65, "grad_norm": 0.7551546096801758, "learning_rate": 5.528462020470706e-06, "loss": 2.1282, "step": 19640 }, { "epoch": 0.65, "grad_norm": 0.7420210242271423, "learning_rate": 5.527511333610482e-06, "loss": 2.0632, "step": 19641 }, { "epoch": 0.65, "grad_norm": 0.7384548187255859, "learning_rate": 5.526560697277812e-06, "loss": 2.1293, "step": 19642 }, { "epoch": 0.65, "grad_norm": 0.7413015961647034, "learning_rate": 5.525610111483439e-06, "loss": 2.0784, "step": 19643 }, { "epoch": 0.65, "grad_norm": 0.7680754065513611, "learning_rate": 5.524659576238102e-06, "loss": 2.0734, "step": 19644 }, { "epoch": 0.65, "grad_norm": 0.7408783435821533, "learning_rate": 5.523709091552535e-06, "loss": 2.0845, "step": 19645 }, { "epoch": 0.65, "grad_norm": 0.7617834806442261, "learning_rate": 5.522758657437478e-06, "loss": 2.0895, "step": 19646 }, { "epoch": 0.65, "grad_norm": 0.7709333896636963, "learning_rate": 5.521808273903675e-06, "loss": 2.1035, "step": 19647 }, { "epoch": 0.65, "grad_norm": 0.763192892074585, "learning_rate": 5.520857940961857e-06, "loss": 2.0713, "step": 19648 }, { "epoch": 0.65, "grad_norm": 0.7635023593902588, "learning_rate": 5.519907658622756e-06, "loss": 1.9942, "step": 19649 }, { "epoch": 0.65, "grad_norm": 0.7393171787261963, "learning_rate": 5.518957426897118e-06, "loss": 2.0775, "step": 19650 }, { "epoch": 0.65, "grad_norm": 0.7395033240318298, "learning_rate": 5.518007245795668e-06, "loss": 1.9617, "step": 19651 }, { "epoch": 0.65, "grad_norm": 0.7473169565200806, "learning_rate": 5.517057115329146e-06, "loss": 2.0368, "step": 19652 }, { "epoch": 0.65, "grad_norm": 0.7427636384963989, "learning_rate": 5.516107035508292e-06, "loss": 2.0577, "step": 19653 }, { "epoch": 0.65, "grad_norm": 0.7609863877296448, "learning_rate": 5.515157006343828e-06, "loss": 2.0449, "step": 19654 }, { "epoch": 0.65, "grad_norm": 0.7596525549888611, "learning_rate": 5.514207027846489e-06, "loss": 2.0976, "step": 19655 }, { "epoch": 0.65, "grad_norm": 0.7819472551345825, "learning_rate": 5.5132571000270145e-06, "loss": 2.0401, "step": 19656 }, { "epoch": 0.65, "grad_norm": 0.7262775897979736, "learning_rate": 5.512307222896132e-06, "loss": 2.0723, "step": 19657 }, { "epoch": 0.65, "grad_norm": 0.7380446791648865, "learning_rate": 5.511357396464569e-06, "loss": 2.0543, "step": 19658 }, { "epoch": 0.65, "grad_norm": 0.764438807964325, "learning_rate": 5.510407620743064e-06, "loss": 2.0464, "step": 19659 }, { "epoch": 0.65, "grad_norm": 0.7211927175521851, "learning_rate": 5.509457895742336e-06, "loss": 2.0, "step": 19660 }, { "epoch": 0.65, "grad_norm": 0.7760045528411865, "learning_rate": 5.508508221473124e-06, "loss": 2.0366, "step": 19661 }, { "epoch": 0.65, "grad_norm": 0.7364428043365479, "learning_rate": 5.507558597946156e-06, "loss": 2.0496, "step": 19662 }, { "epoch": 0.65, "grad_norm": 0.7347718477249146, "learning_rate": 5.5066090251721586e-06, "loss": 2.1065, "step": 19663 }, { "epoch": 0.65, "grad_norm": 0.7134438157081604, "learning_rate": 5.505659503161855e-06, "loss": 2.0351, "step": 19664 }, { "epoch": 0.65, "grad_norm": 0.7505150437355042, "learning_rate": 5.504710031925982e-06, "loss": 1.9995, "step": 19665 }, { "epoch": 0.65, "grad_norm": 0.8008188009262085, "learning_rate": 5.5037606114752576e-06, "loss": 2.0418, "step": 19666 }, { "epoch": 0.65, "grad_norm": 0.747008740901947, "learning_rate": 5.5028112418204095e-06, "loss": 2.1185, "step": 19667 }, { "epoch": 0.65, "grad_norm": 0.7310032844543457, "learning_rate": 5.501861922972163e-06, "loss": 2.0703, "step": 19668 }, { "epoch": 0.65, "grad_norm": 0.7666939496994019, "learning_rate": 5.500912654941248e-06, "loss": 2.0696, "step": 19669 }, { "epoch": 0.65, "grad_norm": 0.7351548671722412, "learning_rate": 5.499963437738382e-06, "loss": 2.1072, "step": 19670 }, { "epoch": 0.65, "grad_norm": 0.7615830302238464, "learning_rate": 5.4990142713742945e-06, "loss": 1.988, "step": 19671 }, { "epoch": 0.65, "grad_norm": 0.7271462678909302, "learning_rate": 5.498065155859706e-06, "loss": 2.055, "step": 19672 }, { "epoch": 0.65, "grad_norm": 0.7374181747436523, "learning_rate": 5.497116091205336e-06, "loss": 2.0506, "step": 19673 }, { "epoch": 0.65, "grad_norm": 0.7781620025634766, "learning_rate": 5.49616707742191e-06, "loss": 2.0548, "step": 19674 }, { "epoch": 0.65, "grad_norm": 0.764536440372467, "learning_rate": 5.495218114520156e-06, "loss": 1.9853, "step": 19675 }, { "epoch": 0.65, "grad_norm": 0.7336354851722717, "learning_rate": 5.49426920251078e-06, "loss": 2.1152, "step": 19676 }, { "epoch": 0.65, "grad_norm": 0.7385858297348022, "learning_rate": 5.493320341404509e-06, "loss": 1.9814, "step": 19677 }, { "epoch": 0.65, "grad_norm": 0.7404362559318542, "learning_rate": 5.4923715312120686e-06, "loss": 2.0353, "step": 19678 }, { "epoch": 0.65, "grad_norm": 0.7485159635543823, "learning_rate": 5.4914227719441726e-06, "loss": 2.0705, "step": 19679 }, { "epoch": 0.65, "grad_norm": 0.7675884366035461, "learning_rate": 5.490474063611535e-06, "loss": 2.0367, "step": 19680 }, { "epoch": 0.65, "grad_norm": 0.7122249007225037, "learning_rate": 5.4895254062248845e-06, "loss": 2.0245, "step": 19681 }, { "epoch": 0.65, "grad_norm": 0.7568567395210266, "learning_rate": 5.4885767997949265e-06, "loss": 2.0471, "step": 19682 }, { "epoch": 0.65, "grad_norm": 0.7437814474105835, "learning_rate": 5.487628244332386e-06, "loss": 2.0393, "step": 19683 }, { "epoch": 0.65, "grad_norm": 0.7349488735198975, "learning_rate": 5.48667973984798e-06, "loss": 2.0481, "step": 19684 }, { "epoch": 0.65, "grad_norm": 0.7411438822746277, "learning_rate": 5.48573128635242e-06, "loss": 2.1024, "step": 19685 }, { "epoch": 0.65, "grad_norm": 0.7478607296943665, "learning_rate": 5.48478288385642e-06, "loss": 1.9975, "step": 19686 }, { "epoch": 0.65, "grad_norm": 0.7716949582099915, "learning_rate": 5.4838345323707e-06, "loss": 2.1017, "step": 19687 }, { "epoch": 0.66, "grad_norm": 0.7322512865066528, "learning_rate": 5.4828862319059705e-06, "loss": 2.0395, "step": 19688 }, { "epoch": 0.66, "grad_norm": 0.7583751678466797, "learning_rate": 5.4819379824729424e-06, "loss": 2.054, "step": 19689 }, { "epoch": 0.66, "grad_norm": 0.7522130012512207, "learning_rate": 5.48098978408233e-06, "loss": 2.078, "step": 19690 }, { "epoch": 0.66, "grad_norm": 0.75520920753479, "learning_rate": 5.48004163674485e-06, "loss": 2.0066, "step": 19691 }, { "epoch": 0.66, "grad_norm": 0.7588621377944946, "learning_rate": 5.479093540471208e-06, "loss": 2.1121, "step": 19692 }, { "epoch": 0.66, "grad_norm": 0.7495948672294617, "learning_rate": 5.4781454952721225e-06, "loss": 2.0807, "step": 19693 }, { "epoch": 0.66, "grad_norm": 0.744258463382721, "learning_rate": 5.477197501158298e-06, "loss": 2.0421, "step": 19694 }, { "epoch": 0.66, "grad_norm": 0.7458747625350952, "learning_rate": 5.47624955814044e-06, "loss": 2.0585, "step": 19695 }, { "epoch": 0.66, "grad_norm": 0.755458652973175, "learning_rate": 5.4753016662292645e-06, "loss": 2.0567, "step": 19696 }, { "epoch": 0.66, "grad_norm": 0.7341300249099731, "learning_rate": 5.474353825435488e-06, "loss": 2.0649, "step": 19697 }, { "epoch": 0.66, "grad_norm": 0.7299632430076599, "learning_rate": 5.4734060357698004e-06, "loss": 2.0686, "step": 19698 }, { "epoch": 0.66, "grad_norm": 0.7370162606239319, "learning_rate": 5.472458297242919e-06, "loss": 2.0584, "step": 19699 }, { "epoch": 0.66, "grad_norm": 0.736213743686676, "learning_rate": 5.471510609865555e-06, "loss": 2.0918, "step": 19700 }, { "epoch": 0.66, "grad_norm": 0.7233362197875977, "learning_rate": 5.47056297364841e-06, "loss": 2.0629, "step": 19701 }, { "epoch": 0.66, "grad_norm": 0.7354491949081421, "learning_rate": 5.469615388602185e-06, "loss": 2.0457, "step": 19702 }, { "epoch": 0.66, "grad_norm": 0.7724765539169312, "learning_rate": 5.468667854737595e-06, "loss": 2.0476, "step": 19703 }, { "epoch": 0.66, "grad_norm": 0.7542290091514587, "learning_rate": 5.467720372065335e-06, "loss": 2.1405, "step": 19704 }, { "epoch": 0.66, "grad_norm": 0.7747575044631958, "learning_rate": 5.466772940596116e-06, "loss": 2.0122, "step": 19705 }, { "epoch": 0.66, "grad_norm": 0.7280795574188232, "learning_rate": 5.465825560340642e-06, "loss": 2.0257, "step": 19706 }, { "epoch": 0.66, "grad_norm": 0.783419668674469, "learning_rate": 5.464878231309614e-06, "loss": 2.1004, "step": 19707 }, { "epoch": 0.66, "grad_norm": 0.7385865449905396, "learning_rate": 5.46393095351373e-06, "loss": 2.0513, "step": 19708 }, { "epoch": 0.66, "grad_norm": 0.7379505038261414, "learning_rate": 5.462983726963695e-06, "loss": 2.0579, "step": 19709 }, { "epoch": 0.66, "grad_norm": 0.8014028668403625, "learning_rate": 5.4620365516702204e-06, "loss": 2.0854, "step": 19710 }, { "epoch": 0.66, "grad_norm": 0.7600050568580627, "learning_rate": 5.461089427643988e-06, "loss": 2.0756, "step": 19711 }, { "epoch": 0.66, "grad_norm": 0.7606709003448486, "learning_rate": 5.460142354895707e-06, "loss": 2.0589, "step": 19712 }, { "epoch": 0.66, "grad_norm": 0.7199676632881165, "learning_rate": 5.459195333436082e-06, "loss": 2.0555, "step": 19713 }, { "epoch": 0.66, "grad_norm": 0.7474765181541443, "learning_rate": 5.458248363275802e-06, "loss": 2.0495, "step": 19714 }, { "epoch": 0.66, "grad_norm": 0.7738089561462402, "learning_rate": 5.457301444425576e-06, "loss": 2.1036, "step": 19715 }, { "epoch": 0.66, "grad_norm": 0.7274369597434998, "learning_rate": 5.456354576896094e-06, "loss": 2.0065, "step": 19716 }, { "epoch": 0.66, "grad_norm": 0.7285518050193787, "learning_rate": 5.455407760698053e-06, "loss": 2.0169, "step": 19717 }, { "epoch": 0.66, "grad_norm": 0.7219896912574768, "learning_rate": 5.45446099584215e-06, "loss": 2.0489, "step": 19718 }, { "epoch": 0.66, "grad_norm": 0.7287682890892029, "learning_rate": 5.453514282339092e-06, "loss": 2.0968, "step": 19719 }, { "epoch": 0.66, "grad_norm": 0.7363569140434265, "learning_rate": 5.452567620199556e-06, "loss": 2.0978, "step": 19720 }, { "epoch": 0.66, "grad_norm": 0.7803433537483215, "learning_rate": 5.451621009434247e-06, "loss": 2.0022, "step": 19721 }, { "epoch": 0.66, "grad_norm": 0.7706819772720337, "learning_rate": 5.450674450053861e-06, "loss": 1.9931, "step": 19722 }, { "epoch": 0.66, "grad_norm": 0.7571954131126404, "learning_rate": 5.449727942069086e-06, "loss": 2.0296, "step": 19723 }, { "epoch": 0.66, "grad_norm": 0.750938892364502, "learning_rate": 5.448781485490622e-06, "loss": 2.04, "step": 19724 }, { "epoch": 0.66, "grad_norm": 0.7727022171020508, "learning_rate": 5.4478350803291536e-06, "loss": 2.0418, "step": 19725 }, { "epoch": 0.66, "grad_norm": 0.7717944979667664, "learning_rate": 5.446888726595381e-06, "loss": 2.0223, "step": 19726 }, { "epoch": 0.66, "grad_norm": 0.7570573091506958, "learning_rate": 5.445942424299986e-06, "loss": 2.0455, "step": 19727 }, { "epoch": 0.66, "grad_norm": 0.7799829244613647, "learning_rate": 5.444996173453668e-06, "loss": 2.1299, "step": 19728 }, { "epoch": 0.66, "grad_norm": 0.745093584060669, "learning_rate": 5.444049974067115e-06, "loss": 2.1388, "step": 19729 }, { "epoch": 0.66, "grad_norm": 0.7703282237052917, "learning_rate": 5.4431038261510104e-06, "loss": 1.9845, "step": 19730 }, { "epoch": 0.66, "grad_norm": 0.7471466660499573, "learning_rate": 5.442157729716049e-06, "loss": 2.0651, "step": 19731 }, { "epoch": 0.66, "grad_norm": 0.7399017214775085, "learning_rate": 5.441211684772927e-06, "loss": 2.1515, "step": 19732 }, { "epoch": 0.66, "grad_norm": 0.7519108653068542, "learning_rate": 5.4402656913323135e-06, "loss": 2.0743, "step": 19733 }, { "epoch": 0.66, "grad_norm": 0.7227649092674255, "learning_rate": 5.439319749404907e-06, "loss": 2.0384, "step": 19734 }, { "epoch": 0.66, "grad_norm": 0.7172938585281372, "learning_rate": 5.438373859001399e-06, "loss": 2.0351, "step": 19735 }, { "epoch": 0.66, "grad_norm": 0.7852622866630554, "learning_rate": 5.437428020132464e-06, "loss": 2.0453, "step": 19736 }, { "epoch": 0.66, "grad_norm": 0.7595424056053162, "learning_rate": 5.436482232808797e-06, "loss": 2.098, "step": 19737 }, { "epoch": 0.66, "grad_norm": 0.75477534532547, "learning_rate": 5.435536497041081e-06, "loss": 2.104, "step": 19738 }, { "epoch": 0.66, "grad_norm": 0.7570781707763672, "learning_rate": 5.434590812839993e-06, "loss": 2.0665, "step": 19739 }, { "epoch": 0.66, "grad_norm": 0.7319640517234802, "learning_rate": 5.433645180216223e-06, "loss": 2.1142, "step": 19740 }, { "epoch": 0.66, "grad_norm": 0.7563708424568176, "learning_rate": 5.432699599180457e-06, "loss": 2.0574, "step": 19741 }, { "epoch": 0.66, "grad_norm": 0.7478208541870117, "learning_rate": 5.431754069743374e-06, "loss": 1.994, "step": 19742 }, { "epoch": 0.66, "grad_norm": 0.7453145384788513, "learning_rate": 5.430808591915654e-06, "loss": 2.0486, "step": 19743 }, { "epoch": 0.66, "grad_norm": 0.7371820211410522, "learning_rate": 5.429863165707983e-06, "loss": 2.146, "step": 19744 }, { "epoch": 0.66, "grad_norm": 0.7339878678321838, "learning_rate": 5.4289177911310365e-06, "loss": 2.0643, "step": 19745 }, { "epoch": 0.66, "grad_norm": 0.7759557366371155, "learning_rate": 5.427972468195501e-06, "loss": 2.0611, "step": 19746 }, { "epoch": 0.66, "grad_norm": 0.7640635371208191, "learning_rate": 5.42702719691205e-06, "loss": 2.0867, "step": 19747 }, { "epoch": 0.66, "grad_norm": 0.7491858005523682, "learning_rate": 5.42608197729137e-06, "loss": 2.0078, "step": 19748 }, { "epoch": 0.66, "grad_norm": 0.7212887406349182, "learning_rate": 5.42513680934413e-06, "loss": 2.0102, "step": 19749 }, { "epoch": 0.66, "grad_norm": 0.7689663171768188, "learning_rate": 5.424191693081018e-06, "loss": 1.97, "step": 19750 }, { "epoch": 0.66, "grad_norm": 0.7429208159446716, "learning_rate": 5.423246628512706e-06, "loss": 2.111, "step": 19751 }, { "epoch": 0.66, "grad_norm": 0.7256428599357605, "learning_rate": 5.422301615649868e-06, "loss": 2.0829, "step": 19752 }, { "epoch": 0.66, "grad_norm": 0.7550253868103027, "learning_rate": 5.421356654503183e-06, "loss": 2.0416, "step": 19753 }, { "epoch": 0.66, "grad_norm": 0.7384501695632935, "learning_rate": 5.4204117450833315e-06, "loss": 2.1119, "step": 19754 }, { "epoch": 0.66, "grad_norm": 0.7323759198188782, "learning_rate": 5.419466887400985e-06, "loss": 2.0756, "step": 19755 }, { "epoch": 0.66, "grad_norm": 0.7427287697792053, "learning_rate": 5.418522081466812e-06, "loss": 1.9783, "step": 19756 }, { "epoch": 0.66, "grad_norm": 0.7797662615776062, "learning_rate": 5.417577327291496e-06, "loss": 1.9917, "step": 19757 }, { "epoch": 0.66, "grad_norm": 0.7585448622703552, "learning_rate": 5.416632624885701e-06, "loss": 2.0274, "step": 19758 }, { "epoch": 0.66, "grad_norm": 0.7598999738693237, "learning_rate": 5.41568797426011e-06, "loss": 2.1064, "step": 19759 }, { "epoch": 0.66, "grad_norm": 0.7197571396827698, "learning_rate": 5.414743375425389e-06, "loss": 2.125, "step": 19760 }, { "epoch": 0.66, "grad_norm": 0.7665246725082397, "learning_rate": 5.413798828392205e-06, "loss": 2.1403, "step": 19761 }, { "epoch": 0.66, "grad_norm": 0.7306970357894897, "learning_rate": 5.412854333171236e-06, "loss": 2.0329, "step": 19762 }, { "epoch": 0.66, "grad_norm": 0.732742190361023, "learning_rate": 5.411909889773153e-06, "loss": 2.0902, "step": 19763 }, { "epoch": 0.66, "grad_norm": 0.761755108833313, "learning_rate": 5.410965498208622e-06, "loss": 2.058, "step": 19764 }, { "epoch": 0.66, "grad_norm": 0.7586823105812073, "learning_rate": 5.4100211584883126e-06, "loss": 2.1033, "step": 19765 }, { "epoch": 0.66, "grad_norm": 0.7191476225852966, "learning_rate": 5.409076870622896e-06, "loss": 2.0321, "step": 19766 }, { "epoch": 0.66, "grad_norm": 0.7183763980865479, "learning_rate": 5.408132634623035e-06, "loss": 2.0834, "step": 19767 }, { "epoch": 0.66, "grad_norm": 0.7388396263122559, "learning_rate": 5.407188450499403e-06, "loss": 1.9888, "step": 19768 }, { "epoch": 0.66, "grad_norm": 0.73529452085495, "learning_rate": 5.406244318262662e-06, "loss": 2.0209, "step": 19769 }, { "epoch": 0.66, "grad_norm": 0.7525423765182495, "learning_rate": 5.405300237923483e-06, "loss": 2.0674, "step": 19770 }, { "epoch": 0.66, "grad_norm": 0.7598569989204407, "learning_rate": 5.404356209492527e-06, "loss": 1.9819, "step": 19771 }, { "epoch": 0.66, "grad_norm": 0.7715239524841309, "learning_rate": 5.403412232980465e-06, "loss": 2.0709, "step": 19772 }, { "epoch": 0.66, "grad_norm": 0.7499877214431763, "learning_rate": 5.402468308397957e-06, "loss": 2.0422, "step": 19773 }, { "epoch": 0.66, "grad_norm": 0.7537586688995361, "learning_rate": 5.401524435755663e-06, "loss": 2.0573, "step": 19774 }, { "epoch": 0.66, "grad_norm": 0.7416905164718628, "learning_rate": 5.400580615064252e-06, "loss": 1.9734, "step": 19775 }, { "epoch": 0.66, "grad_norm": 0.7273804545402527, "learning_rate": 5.399636846334388e-06, "loss": 2.0147, "step": 19776 }, { "epoch": 0.66, "grad_norm": 0.7350367903709412, "learning_rate": 5.398693129576733e-06, "loss": 2.0857, "step": 19777 }, { "epoch": 0.66, "grad_norm": 0.7223485112190247, "learning_rate": 5.397749464801941e-06, "loss": 2.0228, "step": 19778 }, { "epoch": 0.66, "grad_norm": 0.7379430532455444, "learning_rate": 5.396805852020683e-06, "loss": 1.9777, "step": 19779 }, { "epoch": 0.66, "grad_norm": 0.7442083954811096, "learning_rate": 5.395862291243611e-06, "loss": 1.9731, "step": 19780 }, { "epoch": 0.66, "grad_norm": 0.757374107837677, "learning_rate": 5.394918782481392e-06, "loss": 2.0251, "step": 19781 }, { "epoch": 0.66, "grad_norm": 0.751857578754425, "learning_rate": 5.393975325744682e-06, "loss": 2.0454, "step": 19782 }, { "epoch": 0.66, "grad_norm": 0.7656716108322144, "learning_rate": 5.3930319210441354e-06, "loss": 2.1023, "step": 19783 }, { "epoch": 0.66, "grad_norm": 0.762579619884491, "learning_rate": 5.392088568390415e-06, "loss": 1.9963, "step": 19784 }, { "epoch": 0.66, "grad_norm": 0.7674401998519897, "learning_rate": 5.39114526779418e-06, "loss": 2.1245, "step": 19785 }, { "epoch": 0.66, "grad_norm": 0.7667040824890137, "learning_rate": 5.390202019266084e-06, "loss": 2.0757, "step": 19786 }, { "epoch": 0.66, "grad_norm": 0.7771633863449097, "learning_rate": 5.389258822816782e-06, "loss": 2.0628, "step": 19787 }, { "epoch": 0.66, "grad_norm": 0.7289263606071472, "learning_rate": 5.3883156784569345e-06, "loss": 1.9789, "step": 19788 }, { "epoch": 0.66, "grad_norm": 0.766508936882019, "learning_rate": 5.387372586197191e-06, "loss": 2.1838, "step": 19789 }, { "epoch": 0.66, "grad_norm": 0.7497766017913818, "learning_rate": 5.386429546048211e-06, "loss": 2.0673, "step": 19790 }, { "epoch": 0.66, "grad_norm": 0.7579146027565002, "learning_rate": 5.385486558020643e-06, "loss": 2.0931, "step": 19791 }, { "epoch": 0.66, "grad_norm": 0.7560113072395325, "learning_rate": 5.384543622125148e-06, "loss": 2.1174, "step": 19792 }, { "epoch": 0.66, "grad_norm": 0.7545515894889832, "learning_rate": 5.38360073837237e-06, "loss": 2.0724, "step": 19793 }, { "epoch": 0.66, "grad_norm": 0.748836874961853, "learning_rate": 5.382657906772969e-06, "loss": 2.0201, "step": 19794 }, { "epoch": 0.66, "grad_norm": 0.7140710353851318, "learning_rate": 5.3817151273375934e-06, "loss": 2.0541, "step": 19795 }, { "epoch": 0.66, "grad_norm": 0.7162518501281738, "learning_rate": 5.3807724000768895e-06, "loss": 2.0239, "step": 19796 }, { "epoch": 0.66, "grad_norm": 0.7653456330299377, "learning_rate": 5.379829725001511e-06, "loss": 2.0596, "step": 19797 }, { "epoch": 0.66, "grad_norm": 0.7563791275024414, "learning_rate": 5.3788871021221145e-06, "loss": 2.0381, "step": 19798 }, { "epoch": 0.66, "grad_norm": 0.7218927145004272, "learning_rate": 5.377944531449341e-06, "loss": 2.0801, "step": 19799 }, { "epoch": 0.66, "grad_norm": 0.7141469717025757, "learning_rate": 5.3770020129938395e-06, "loss": 2.0641, "step": 19800 }, { "epoch": 0.66, "grad_norm": 0.7583063840866089, "learning_rate": 5.376059546766264e-06, "loss": 2.0237, "step": 19801 }, { "epoch": 0.66, "grad_norm": 0.7499353289604187, "learning_rate": 5.375117132777252e-06, "loss": 2.0781, "step": 19802 }, { "epoch": 0.66, "grad_norm": 0.730440080165863, "learning_rate": 5.37417477103746e-06, "loss": 2.0126, "step": 19803 }, { "epoch": 0.66, "grad_norm": 0.7603054642677307, "learning_rate": 5.373232461557532e-06, "loss": 2.0461, "step": 19804 }, { "epoch": 0.66, "grad_norm": 0.7560338377952576, "learning_rate": 5.3722902043481075e-06, "loss": 2.0306, "step": 19805 }, { "epoch": 0.66, "grad_norm": 0.7698346376419067, "learning_rate": 5.371347999419836e-06, "loss": 2.046, "step": 19806 }, { "epoch": 0.66, "grad_norm": 0.7237495183944702, "learning_rate": 5.370405846783366e-06, "loss": 2.0453, "step": 19807 }, { "epoch": 0.66, "grad_norm": 0.7479041814804077, "learning_rate": 5.3694637464493395e-06, "loss": 2.0339, "step": 19808 }, { "epoch": 0.66, "grad_norm": 0.7250011563301086, "learning_rate": 5.368521698428392e-06, "loss": 2.0603, "step": 19809 }, { "epoch": 0.66, "grad_norm": 0.750363290309906, "learning_rate": 5.367579702731176e-06, "loss": 2.0232, "step": 19810 }, { "epoch": 0.66, "grad_norm": 0.747576117515564, "learning_rate": 5.366637759368325e-06, "loss": 2.1004, "step": 19811 }, { "epoch": 0.66, "grad_norm": 0.7434778213500977, "learning_rate": 5.365695868350491e-06, "loss": 2.0606, "step": 19812 }, { "epoch": 0.66, "grad_norm": 0.7559706568717957, "learning_rate": 5.364754029688304e-06, "loss": 2.0826, "step": 19813 }, { "epoch": 0.66, "grad_norm": 0.7356227040290833, "learning_rate": 5.363812243392414e-06, "loss": 2.0474, "step": 19814 }, { "epoch": 0.66, "grad_norm": 0.7391427755355835, "learning_rate": 5.362870509473452e-06, "loss": 1.9577, "step": 19815 }, { "epoch": 0.66, "grad_norm": 0.7255693674087524, "learning_rate": 5.36192882794206e-06, "loss": 2.0871, "step": 19816 }, { "epoch": 0.66, "grad_norm": 0.7460416555404663, "learning_rate": 5.360987198808888e-06, "loss": 2.0773, "step": 19817 }, { "epoch": 0.66, "grad_norm": 0.7636624574661255, "learning_rate": 5.360045622084555e-06, "loss": 2.0483, "step": 19818 }, { "epoch": 0.66, "grad_norm": 0.7631672620773315, "learning_rate": 5.359104097779708e-06, "loss": 2.115, "step": 19819 }, { "epoch": 0.66, "grad_norm": 0.7428252696990967, "learning_rate": 5.358162625904985e-06, "loss": 2.0578, "step": 19820 }, { "epoch": 0.66, "grad_norm": 0.769159197807312, "learning_rate": 5.357221206471022e-06, "loss": 2.067, "step": 19821 }, { "epoch": 0.66, "grad_norm": 0.7813783288002014, "learning_rate": 5.356279839488449e-06, "loss": 2.0208, "step": 19822 }, { "epoch": 0.66, "grad_norm": 0.7492214441299438, "learning_rate": 5.355338524967908e-06, "loss": 2.0232, "step": 19823 }, { "epoch": 0.66, "grad_norm": 0.781178891658783, "learning_rate": 5.354397262920028e-06, "loss": 2.0861, "step": 19824 }, { "epoch": 0.66, "grad_norm": 0.7603986859321594, "learning_rate": 5.353456053355447e-06, "loss": 2.0151, "step": 19825 }, { "epoch": 0.66, "grad_norm": 0.7563328146934509, "learning_rate": 5.352514896284793e-06, "loss": 2.1212, "step": 19826 }, { "epoch": 0.66, "grad_norm": 0.7241660952568054, "learning_rate": 5.3515737917187075e-06, "loss": 2.0515, "step": 19827 }, { "epoch": 0.66, "grad_norm": 0.7466849088668823, "learning_rate": 5.350632739667811e-06, "loss": 2.0861, "step": 19828 }, { "epoch": 0.66, "grad_norm": 0.7603322863578796, "learning_rate": 5.349691740142746e-06, "loss": 2.0577, "step": 19829 }, { "epoch": 0.66, "grad_norm": 0.7365947365760803, "learning_rate": 5.348750793154138e-06, "loss": 2.0361, "step": 19830 }, { "epoch": 0.66, "grad_norm": 0.7431128621101379, "learning_rate": 5.347809898712614e-06, "loss": 2.0992, "step": 19831 }, { "epoch": 0.66, "grad_norm": 0.7111643552780151, "learning_rate": 5.346869056828807e-06, "loss": 1.981, "step": 19832 }, { "epoch": 0.66, "grad_norm": 0.7219464182853699, "learning_rate": 5.34592826751335e-06, "loss": 2.0646, "step": 19833 }, { "epoch": 0.66, "grad_norm": 0.7504080533981323, "learning_rate": 5.344987530776868e-06, "loss": 2.0321, "step": 19834 }, { "epoch": 0.66, "grad_norm": 0.7847098112106323, "learning_rate": 5.3440468466299866e-06, "loss": 2.0029, "step": 19835 }, { "epoch": 0.66, "grad_norm": 0.7563808560371399, "learning_rate": 5.343106215083338e-06, "loss": 2.0545, "step": 19836 }, { "epoch": 0.66, "grad_norm": 0.7318623065948486, "learning_rate": 5.342165636147542e-06, "loss": 2.1518, "step": 19837 }, { "epoch": 0.66, "grad_norm": 0.7477677464485168, "learning_rate": 5.341225109833228e-06, "loss": 2.0879, "step": 19838 }, { "epoch": 0.66, "grad_norm": 0.7810040712356567, "learning_rate": 5.340284636151032e-06, "loss": 2.0714, "step": 19839 }, { "epoch": 0.66, "grad_norm": 0.7411178350448608, "learning_rate": 5.33934421511156e-06, "loss": 2.1211, "step": 19840 }, { "epoch": 0.66, "grad_norm": 0.7598457336425781, "learning_rate": 5.338403846725446e-06, "loss": 2.0268, "step": 19841 }, { "epoch": 0.66, "grad_norm": 0.7214486598968506, "learning_rate": 5.337463531003319e-06, "loss": 2.065, "step": 19842 }, { "epoch": 0.66, "grad_norm": 0.7749118208885193, "learning_rate": 5.336523267955794e-06, "loss": 2.0255, "step": 19843 }, { "epoch": 0.66, "grad_norm": 0.7391039133071899, "learning_rate": 5.335583057593494e-06, "loss": 1.9614, "step": 19844 }, { "epoch": 0.66, "grad_norm": 0.7386989593505859, "learning_rate": 5.334642899927046e-06, "loss": 2.0631, "step": 19845 }, { "epoch": 0.66, "grad_norm": 0.7345327138900757, "learning_rate": 5.3337027949670635e-06, "loss": 2.0313, "step": 19846 }, { "epoch": 0.66, "grad_norm": 0.7425429224967957, "learning_rate": 5.332762742724173e-06, "loss": 2.0791, "step": 19847 }, { "epoch": 0.66, "grad_norm": 0.7565324306488037, "learning_rate": 5.331822743208999e-06, "loss": 2.0676, "step": 19848 }, { "epoch": 0.66, "grad_norm": 0.7313607931137085, "learning_rate": 5.330882796432155e-06, "loss": 1.996, "step": 19849 }, { "epoch": 0.66, "grad_norm": 0.7483778595924377, "learning_rate": 5.329942902404257e-06, "loss": 2.0279, "step": 19850 }, { "epoch": 0.66, "grad_norm": 0.7266696095466614, "learning_rate": 5.32900306113593e-06, "loss": 1.9857, "step": 19851 }, { "epoch": 0.66, "grad_norm": 0.7487393021583557, "learning_rate": 5.328063272637789e-06, "loss": 2.153, "step": 19852 }, { "epoch": 0.66, "grad_norm": 0.7430779337882996, "learning_rate": 5.327123536920449e-06, "loss": 2.0856, "step": 19853 }, { "epoch": 0.66, "grad_norm": 0.7344644665718079, "learning_rate": 5.3261838539945265e-06, "loss": 2.0582, "step": 19854 }, { "epoch": 0.66, "grad_norm": 0.7301772832870483, "learning_rate": 5.325244223870645e-06, "loss": 2.0258, "step": 19855 }, { "epoch": 0.66, "grad_norm": 0.7243576049804688, "learning_rate": 5.324304646559415e-06, "loss": 2.0844, "step": 19856 }, { "epoch": 0.66, "grad_norm": 0.7731500864028931, "learning_rate": 5.323365122071446e-06, "loss": 2.1195, "step": 19857 }, { "epoch": 0.66, "grad_norm": 0.7676663398742676, "learning_rate": 5.322425650417361e-06, "loss": 2.0695, "step": 19858 }, { "epoch": 0.66, "grad_norm": 0.7373532056808472, "learning_rate": 5.321486231607767e-06, "loss": 2.0614, "step": 19859 }, { "epoch": 0.66, "grad_norm": 0.7399048805236816, "learning_rate": 5.320546865653278e-06, "loss": 2.0725, "step": 19860 }, { "epoch": 0.66, "grad_norm": 0.7232118844985962, "learning_rate": 5.319607552564516e-06, "loss": 2.0605, "step": 19861 }, { "epoch": 0.66, "grad_norm": 0.7304097414016724, "learning_rate": 5.318668292352078e-06, "loss": 2.0582, "step": 19862 }, { "epoch": 0.66, "grad_norm": 0.7253690361976624, "learning_rate": 5.317729085026582e-06, "loss": 2.0318, "step": 19863 }, { "epoch": 0.66, "grad_norm": 0.7731072306632996, "learning_rate": 5.3167899305986416e-06, "loss": 2.0767, "step": 19864 }, { "epoch": 0.66, "grad_norm": 0.7335454225540161, "learning_rate": 5.315850829078864e-06, "loss": 2.0654, "step": 19865 }, { "epoch": 0.66, "grad_norm": 0.7608612179756165, "learning_rate": 5.314911780477856e-06, "loss": 2.061, "step": 19866 }, { "epoch": 0.66, "grad_norm": 0.7415502071380615, "learning_rate": 5.313972784806232e-06, "loss": 2.063, "step": 19867 }, { "epoch": 0.66, "grad_norm": 0.7321881055831909, "learning_rate": 5.3130338420745935e-06, "loss": 2.0547, "step": 19868 }, { "epoch": 0.66, "grad_norm": 0.7696564793586731, "learning_rate": 5.312094952293552e-06, "loss": 2.0859, "step": 19869 }, { "epoch": 0.66, "grad_norm": 0.7354307770729065, "learning_rate": 5.311156115473718e-06, "loss": 2.0228, "step": 19870 }, { "epoch": 0.66, "grad_norm": 0.7332853674888611, "learning_rate": 5.310217331625695e-06, "loss": 2.0092, "step": 19871 }, { "epoch": 0.66, "grad_norm": 0.8143352270126343, "learning_rate": 5.309278600760083e-06, "loss": 2.0506, "step": 19872 }, { "epoch": 0.66, "grad_norm": 0.7098326683044434, "learning_rate": 5.308339922887497e-06, "loss": 2.0738, "step": 19873 }, { "epoch": 0.66, "grad_norm": 0.7538421154022217, "learning_rate": 5.307401298018536e-06, "loss": 2.0569, "step": 19874 }, { "epoch": 0.66, "grad_norm": 0.7396963238716125, "learning_rate": 5.306462726163802e-06, "loss": 2.0579, "step": 19875 }, { "epoch": 0.66, "grad_norm": 0.7569546103477478, "learning_rate": 5.305524207333901e-06, "loss": 2.0504, "step": 19876 }, { "epoch": 0.66, "grad_norm": 0.7797518968582153, "learning_rate": 5.304585741539441e-06, "loss": 2.0642, "step": 19877 }, { "epoch": 0.66, "grad_norm": 0.726717472076416, "learning_rate": 5.30364732879102e-06, "loss": 2.0929, "step": 19878 }, { "epoch": 0.66, "grad_norm": 0.7441016435623169, "learning_rate": 5.302708969099233e-06, "loss": 2.0631, "step": 19879 }, { "epoch": 0.66, "grad_norm": 0.7279456853866577, "learning_rate": 5.301770662474692e-06, "loss": 1.9721, "step": 19880 }, { "epoch": 0.66, "grad_norm": 0.7581925988197327, "learning_rate": 5.30083240892799e-06, "loss": 2.0241, "step": 19881 }, { "epoch": 0.66, "grad_norm": 0.7430950999259949, "learning_rate": 5.299894208469727e-06, "loss": 2.0567, "step": 19882 }, { "epoch": 0.66, "grad_norm": 0.7804224491119385, "learning_rate": 5.298956061110514e-06, "loss": 2.0719, "step": 19883 }, { "epoch": 0.66, "grad_norm": 0.7645136713981628, "learning_rate": 5.298017966860934e-06, "loss": 2.0291, "step": 19884 }, { "epoch": 0.66, "grad_norm": 0.7815450429916382, "learning_rate": 5.2970799257315895e-06, "loss": 2.0743, "step": 19885 }, { "epoch": 0.66, "grad_norm": 0.7373940944671631, "learning_rate": 5.296141937733083e-06, "loss": 2.0605, "step": 19886 }, { "epoch": 0.66, "grad_norm": 0.7616168856620789, "learning_rate": 5.2952040028760096e-06, "loss": 2.0796, "step": 19887 }, { "epoch": 0.66, "grad_norm": 0.7629055976867676, "learning_rate": 5.29426612117096e-06, "loss": 2.0607, "step": 19888 }, { "epoch": 0.66, "grad_norm": 0.7281550765037537, "learning_rate": 5.293328292628538e-06, "loss": 2.0465, "step": 19889 }, { "epoch": 0.66, "grad_norm": 0.7405682802200317, "learning_rate": 5.292390517259331e-06, "loss": 2.0659, "step": 19890 }, { "epoch": 0.66, "grad_norm": 0.7313674688339233, "learning_rate": 5.291452795073936e-06, "loss": 1.9816, "step": 19891 }, { "epoch": 0.66, "grad_norm": 0.740441620349884, "learning_rate": 5.290515126082951e-06, "loss": 2.0449, "step": 19892 }, { "epoch": 0.66, "grad_norm": 0.7219458222389221, "learning_rate": 5.289577510296968e-06, "loss": 2.0435, "step": 19893 }, { "epoch": 0.66, "grad_norm": 0.7322210669517517, "learning_rate": 5.288639947726573e-06, "loss": 2.0973, "step": 19894 }, { "epoch": 0.66, "grad_norm": 0.7987276315689087, "learning_rate": 5.2877024383823676e-06, "loss": 2.0378, "step": 19895 }, { "epoch": 0.66, "grad_norm": 0.7828547358512878, "learning_rate": 5.286764982274937e-06, "loss": 2.027, "step": 19896 }, { "epoch": 0.66, "grad_norm": 0.7652302980422974, "learning_rate": 5.285827579414869e-06, "loss": 2.0783, "step": 19897 }, { "epoch": 0.66, "grad_norm": 0.746668815612793, "learning_rate": 5.2848902298127595e-06, "loss": 2.047, "step": 19898 }, { "epoch": 0.66, "grad_norm": 0.7368899583816528, "learning_rate": 5.2839529334791996e-06, "loss": 2.0039, "step": 19899 }, { "epoch": 0.66, "grad_norm": 0.7455770969390869, "learning_rate": 5.2830156904247735e-06, "loss": 2.0547, "step": 19900 }, { "epoch": 0.66, "grad_norm": 0.7459996342658997, "learning_rate": 5.282078500660074e-06, "loss": 1.976, "step": 19901 }, { "epoch": 0.66, "grad_norm": 0.7488899230957031, "learning_rate": 5.281141364195687e-06, "loss": 2.1192, "step": 19902 }, { "epoch": 0.66, "grad_norm": 0.7316449880599976, "learning_rate": 5.280204281042196e-06, "loss": 2.0865, "step": 19903 }, { "epoch": 0.66, "grad_norm": 0.764094889163971, "learning_rate": 5.279267251210191e-06, "loss": 2.0741, "step": 19904 }, { "epoch": 0.66, "grad_norm": 0.7434185743331909, "learning_rate": 5.278330274710265e-06, "loss": 2.0473, "step": 19905 }, { "epoch": 0.66, "grad_norm": 0.7129002213478088, "learning_rate": 5.277393351552989e-06, "loss": 2.0476, "step": 19906 }, { "epoch": 0.66, "grad_norm": 0.7266384363174438, "learning_rate": 5.276456481748955e-06, "loss": 2.0627, "step": 19907 }, { "epoch": 0.66, "grad_norm": 0.7599093914031982, "learning_rate": 5.275519665308751e-06, "loss": 2.1002, "step": 19908 }, { "epoch": 0.66, "grad_norm": 0.7283793091773987, "learning_rate": 5.274582902242957e-06, "loss": 2.0156, "step": 19909 }, { "epoch": 0.66, "grad_norm": 0.7429416179656982, "learning_rate": 5.273646192562154e-06, "loss": 2.0569, "step": 19910 }, { "epoch": 0.66, "grad_norm": 0.7249921560287476, "learning_rate": 5.272709536276928e-06, "loss": 2.0216, "step": 19911 }, { "epoch": 0.66, "grad_norm": 0.7380189895629883, "learning_rate": 5.2717729333978565e-06, "loss": 1.9882, "step": 19912 }, { "epoch": 0.66, "grad_norm": 0.7266087532043457, "learning_rate": 5.2708363839355224e-06, "loss": 2.1028, "step": 19913 }, { "epoch": 0.66, "grad_norm": 0.772238552570343, "learning_rate": 5.269899887900512e-06, "loss": 2.1044, "step": 19914 }, { "epoch": 0.66, "grad_norm": 0.7243350148200989, "learning_rate": 5.268963445303401e-06, "loss": 2.0439, "step": 19915 }, { "epoch": 0.66, "grad_norm": 0.7428587675094604, "learning_rate": 5.268027056154764e-06, "loss": 2.064, "step": 19916 }, { "epoch": 0.66, "grad_norm": 0.7535495162010193, "learning_rate": 5.267090720465185e-06, "loss": 2.0988, "step": 19917 }, { "epoch": 0.66, "grad_norm": 0.7133070230484009, "learning_rate": 5.266154438245247e-06, "loss": 2.0498, "step": 19918 }, { "epoch": 0.66, "grad_norm": 0.7141594290733337, "learning_rate": 5.265218209505515e-06, "loss": 2.0488, "step": 19919 }, { "epoch": 0.66, "grad_norm": 0.7474831342697144, "learning_rate": 5.264282034256573e-06, "loss": 2.0632, "step": 19920 }, { "epoch": 0.66, "grad_norm": 0.7682548761367798, "learning_rate": 5.263345912509001e-06, "loss": 2.0167, "step": 19921 }, { "epoch": 0.66, "grad_norm": 0.7651169300079346, "learning_rate": 5.262409844273366e-06, "loss": 1.9975, "step": 19922 }, { "epoch": 0.66, "grad_norm": 0.7359027862548828, "learning_rate": 5.261473829560253e-06, "loss": 2.0822, "step": 19923 }, { "epoch": 0.66, "grad_norm": 0.7360365390777588, "learning_rate": 5.260537868380232e-06, "loss": 2.0256, "step": 19924 }, { "epoch": 0.66, "grad_norm": 0.7417351007461548, "learning_rate": 5.259601960743872e-06, "loss": 2.1517, "step": 19925 }, { "epoch": 0.66, "grad_norm": 0.7542404532432556, "learning_rate": 5.258666106661752e-06, "loss": 2.1161, "step": 19926 }, { "epoch": 0.66, "grad_norm": 0.7494674921035767, "learning_rate": 5.257730306144446e-06, "loss": 2.0389, "step": 19927 }, { "epoch": 0.66, "grad_norm": 0.720329761505127, "learning_rate": 5.256794559202525e-06, "loss": 2.0281, "step": 19928 }, { "epoch": 0.66, "grad_norm": 0.7066154479980469, "learning_rate": 5.2558588658465545e-06, "loss": 2.0894, "step": 19929 }, { "epoch": 0.66, "grad_norm": 0.7589161992073059, "learning_rate": 5.2549232260871144e-06, "loss": 2.0275, "step": 19930 }, { "epoch": 0.66, "grad_norm": 0.7606261968612671, "learning_rate": 5.2539876399347675e-06, "loss": 2.0511, "step": 19931 }, { "epoch": 0.66, "grad_norm": 0.7273508310317993, "learning_rate": 5.25305210740009e-06, "loss": 2.0369, "step": 19932 }, { "epoch": 0.66, "grad_norm": 0.7534418106079102, "learning_rate": 5.252116628493644e-06, "loss": 2.0617, "step": 19933 }, { "epoch": 0.66, "grad_norm": 0.7711711525917053, "learning_rate": 5.251181203226006e-06, "loss": 2.0514, "step": 19934 }, { "epoch": 0.66, "grad_norm": 0.807056725025177, "learning_rate": 5.250245831607734e-06, "loss": 1.9888, "step": 19935 }, { "epoch": 0.66, "grad_norm": 0.7039667963981628, "learning_rate": 5.249310513649407e-06, "loss": 1.9892, "step": 19936 }, { "epoch": 0.66, "grad_norm": 0.7574709057807922, "learning_rate": 5.2483752493615856e-06, "loss": 2.0768, "step": 19937 }, { "epoch": 0.66, "grad_norm": 0.7482070326805115, "learning_rate": 5.247440038754832e-06, "loss": 2.1215, "step": 19938 }, { "epoch": 0.66, "grad_norm": 0.742131769657135, "learning_rate": 5.246504881839714e-06, "loss": 2.0348, "step": 19939 }, { "epoch": 0.66, "grad_norm": 0.7584533095359802, "learning_rate": 5.2455697786268066e-06, "loss": 2.1008, "step": 19940 }, { "epoch": 0.66, "grad_norm": 0.7259510159492493, "learning_rate": 5.244634729126658e-06, "loss": 2.0063, "step": 19941 }, { "epoch": 0.66, "grad_norm": 0.7475548982620239, "learning_rate": 5.24369973334984e-06, "loss": 2.045, "step": 19942 }, { "epoch": 0.66, "grad_norm": 0.7696491479873657, "learning_rate": 5.242764791306918e-06, "loss": 2.0993, "step": 19943 }, { "epoch": 0.66, "grad_norm": 0.7594428658485413, "learning_rate": 5.241829903008447e-06, "loss": 1.9864, "step": 19944 }, { "epoch": 0.66, "grad_norm": 0.7473368048667908, "learning_rate": 5.240895068464997e-06, "loss": 2.0561, "step": 19945 }, { "epoch": 0.66, "grad_norm": 0.7332181334495544, "learning_rate": 5.239960287687127e-06, "loss": 2.086, "step": 19946 }, { "epoch": 0.66, "grad_norm": 0.7444409132003784, "learning_rate": 5.239025560685392e-06, "loss": 2.0171, "step": 19947 }, { "epoch": 0.66, "grad_norm": 0.7400988340377808, "learning_rate": 5.238090887470354e-06, "loss": 2.0553, "step": 19948 }, { "epoch": 0.66, "grad_norm": 0.7323520183563232, "learning_rate": 5.237156268052579e-06, "loss": 2.0285, "step": 19949 }, { "epoch": 0.66, "grad_norm": 0.7472159266471863, "learning_rate": 5.236221702442622e-06, "loss": 2.0478, "step": 19950 }, { "epoch": 0.66, "grad_norm": 0.7648470401763916, "learning_rate": 5.235287190651036e-06, "loss": 1.9858, "step": 19951 }, { "epoch": 0.66, "grad_norm": 0.7110038995742798, "learning_rate": 5.234352732688387e-06, "loss": 2.08, "step": 19952 }, { "epoch": 0.66, "grad_norm": 0.7678295969963074, "learning_rate": 5.233418328565224e-06, "loss": 2.0557, "step": 19953 }, { "epoch": 0.66, "grad_norm": 0.7629528641700745, "learning_rate": 5.232483978292111e-06, "loss": 2.0565, "step": 19954 }, { "epoch": 0.66, "grad_norm": 0.7358205914497375, "learning_rate": 5.231549681879596e-06, "loss": 1.9522, "step": 19955 }, { "epoch": 0.66, "grad_norm": 0.7580413222312927, "learning_rate": 5.2306154393382424e-06, "loss": 2.0677, "step": 19956 }, { "epoch": 0.66, "grad_norm": 0.7195526957511902, "learning_rate": 5.229681250678596e-06, "loss": 2.0417, "step": 19957 }, { "epoch": 0.66, "grad_norm": 0.7724376916885376, "learning_rate": 5.228747115911219e-06, "loss": 2.0566, "step": 19958 }, { "epoch": 0.66, "grad_norm": 0.7452408671379089, "learning_rate": 5.2278130350466615e-06, "loss": 2.1133, "step": 19959 }, { "epoch": 0.66, "grad_norm": 0.729656457901001, "learning_rate": 5.226879008095472e-06, "loss": 2.0317, "step": 19960 }, { "epoch": 0.66, "grad_norm": 0.7790346145629883, "learning_rate": 5.225945035068205e-06, "loss": 2.0465, "step": 19961 }, { "epoch": 0.66, "grad_norm": 0.729098916053772, "learning_rate": 5.225011115975418e-06, "loss": 2.0353, "step": 19962 }, { "epoch": 0.66, "grad_norm": 0.7400538325309753, "learning_rate": 5.224077250827655e-06, "loss": 2.1183, "step": 19963 }, { "epoch": 0.66, "grad_norm": 0.727157473564148, "learning_rate": 5.223143439635467e-06, "loss": 2.0979, "step": 19964 }, { "epoch": 0.66, "grad_norm": 0.7277399301528931, "learning_rate": 5.222209682409407e-06, "loss": 2.0525, "step": 19965 }, { "epoch": 0.66, "grad_norm": 0.7268252968788147, "learning_rate": 5.221275979160019e-06, "loss": 2.0624, "step": 19966 }, { "epoch": 0.66, "grad_norm": 0.7479997873306274, "learning_rate": 5.220342329897859e-06, "loss": 2.0253, "step": 19967 }, { "epoch": 0.66, "grad_norm": 0.7392446994781494, "learning_rate": 5.219408734633467e-06, "loss": 2.0931, "step": 19968 }, { "epoch": 0.66, "grad_norm": 0.7270455360412598, "learning_rate": 5.218475193377392e-06, "loss": 2.0447, "step": 19969 }, { "epoch": 0.66, "grad_norm": 0.7498703598976135, "learning_rate": 5.217541706140182e-06, "loss": 2.1104, "step": 19970 }, { "epoch": 0.66, "grad_norm": 0.754414975643158, "learning_rate": 5.2166082729323864e-06, "loss": 2.1149, "step": 19971 }, { "epoch": 0.66, "grad_norm": 0.7405804395675659, "learning_rate": 5.215674893764548e-06, "loss": 2.0885, "step": 19972 }, { "epoch": 0.66, "grad_norm": 0.7434942722320557, "learning_rate": 5.214741568647205e-06, "loss": 2.0513, "step": 19973 }, { "epoch": 0.66, "grad_norm": 0.7541471719741821, "learning_rate": 5.213808297590915e-06, "loss": 2.0816, "step": 19974 }, { "epoch": 0.66, "grad_norm": 0.7442706823348999, "learning_rate": 5.212875080606205e-06, "loss": 2.049, "step": 19975 }, { "epoch": 0.66, "grad_norm": 0.765104353427887, "learning_rate": 5.211941917703633e-06, "loss": 2.0681, "step": 19976 }, { "epoch": 0.66, "grad_norm": 0.7398830056190491, "learning_rate": 5.211008808893732e-06, "loss": 2.0597, "step": 19977 }, { "epoch": 0.66, "grad_norm": 0.7380279302597046, "learning_rate": 5.2100757541870505e-06, "loss": 1.9759, "step": 19978 }, { "epoch": 0.66, "grad_norm": 0.7522118091583252, "learning_rate": 5.209142753594122e-06, "loss": 2.0663, "step": 19979 }, { "epoch": 0.66, "grad_norm": 0.7491288185119629, "learning_rate": 5.208209807125495e-06, "loss": 2.1054, "step": 19980 }, { "epoch": 0.66, "grad_norm": 0.7285361886024475, "learning_rate": 5.207276914791704e-06, "loss": 2.0537, "step": 19981 }, { "epoch": 0.66, "grad_norm": 0.7258206009864807, "learning_rate": 5.206344076603287e-06, "loss": 2.1286, "step": 19982 }, { "epoch": 0.66, "grad_norm": 0.7306070327758789, "learning_rate": 5.205411292570784e-06, "loss": 1.9826, "step": 19983 }, { "epoch": 0.66, "grad_norm": 0.7251618504524231, "learning_rate": 5.20447856270474e-06, "loss": 2.0984, "step": 19984 }, { "epoch": 0.66, "grad_norm": 0.7967289686203003, "learning_rate": 5.203545887015685e-06, "loss": 2.0448, "step": 19985 }, { "epoch": 0.66, "grad_norm": 0.7521054744720459, "learning_rate": 5.202613265514155e-06, "loss": 2.0123, "step": 19986 }, { "epoch": 0.66, "grad_norm": 0.7265276908874512, "learning_rate": 5.201680698210692e-06, "loss": 1.9526, "step": 19987 }, { "epoch": 0.67, "grad_norm": 0.7440691590309143, "learning_rate": 5.200748185115825e-06, "loss": 2.0316, "step": 19988 }, { "epoch": 0.67, "grad_norm": 0.7906495332717896, "learning_rate": 5.199815726240096e-06, "loss": 2.1201, "step": 19989 }, { "epoch": 0.67, "grad_norm": 0.7587226629257202, "learning_rate": 5.198883321594035e-06, "loss": 2.109, "step": 19990 }, { "epoch": 0.67, "grad_norm": 0.7623456716537476, "learning_rate": 5.197950971188174e-06, "loss": 2.0895, "step": 19991 }, { "epoch": 0.67, "grad_norm": 0.7347291707992554, "learning_rate": 5.1970186750330475e-06, "loss": 2.0545, "step": 19992 }, { "epoch": 0.67, "grad_norm": 0.7406683564186096, "learning_rate": 5.196086433139193e-06, "loss": 2.0509, "step": 19993 }, { "epoch": 0.67, "grad_norm": 0.7540918588638306, "learning_rate": 5.19515424551714e-06, "loss": 2.0061, "step": 19994 }, { "epoch": 0.67, "grad_norm": 0.7136572599411011, "learning_rate": 5.194222112177413e-06, "loss": 2.0557, "step": 19995 }, { "epoch": 0.67, "grad_norm": 0.7437838315963745, "learning_rate": 5.193290033130553e-06, "loss": 2.1386, "step": 19996 }, { "epoch": 0.67, "grad_norm": 0.7490946650505066, "learning_rate": 5.19235800838708e-06, "loss": 2.0568, "step": 19997 }, { "epoch": 0.67, "grad_norm": 0.7455244064331055, "learning_rate": 5.191426037957535e-06, "loss": 2.0906, "step": 19998 }, { "epoch": 0.67, "grad_norm": 0.7630649209022522, "learning_rate": 5.190494121852434e-06, "loss": 1.9964, "step": 19999 }, { "epoch": 0.67, "grad_norm": 0.7571759223937988, "learning_rate": 5.189562260082317e-06, "loss": 2.0671, "step": 20000 }, { "epoch": 0.67, "grad_norm": 0.7247136831283569, "learning_rate": 5.188630452657701e-06, "loss": 2.0538, "step": 20001 }, { "epoch": 0.67, "grad_norm": 0.7452728152275085, "learning_rate": 5.187698699589126e-06, "loss": 2.0477, "step": 20002 }, { "epoch": 0.67, "grad_norm": 0.7373649477958679, "learning_rate": 5.1867670008871075e-06, "loss": 2.1015, "step": 20003 }, { "epoch": 0.67, "grad_norm": 0.7305713891983032, "learning_rate": 5.185835356562171e-06, "loss": 2.0371, "step": 20004 }, { "epoch": 0.67, "grad_norm": 0.7433360815048218, "learning_rate": 5.184903766624846e-06, "loss": 2.1105, "step": 20005 }, { "epoch": 0.67, "grad_norm": 0.749610960483551, "learning_rate": 5.18397223108566e-06, "loss": 2.0265, "step": 20006 }, { "epoch": 0.67, "grad_norm": 0.7169085144996643, "learning_rate": 5.183040749955133e-06, "loss": 2.0323, "step": 20007 }, { "epoch": 0.67, "grad_norm": 0.7832186222076416, "learning_rate": 5.1821093232437845e-06, "loss": 2.0054, "step": 20008 }, { "epoch": 0.67, "grad_norm": 0.7493696808815002, "learning_rate": 5.181177950962146e-06, "loss": 2.0821, "step": 20009 }, { "epoch": 0.67, "grad_norm": 0.7384002208709717, "learning_rate": 5.180246633120731e-06, "loss": 2.0385, "step": 20010 }, { "epoch": 0.67, "grad_norm": 0.7419658303260803, "learning_rate": 5.179315369730069e-06, "loss": 2.0029, "step": 20011 }, { "epoch": 0.67, "grad_norm": 0.7409466505050659, "learning_rate": 5.178384160800676e-06, "loss": 2.0558, "step": 20012 }, { "epoch": 0.67, "grad_norm": 0.7367585897445679, "learning_rate": 5.1774530063430695e-06, "loss": 2.0284, "step": 20013 }, { "epoch": 0.67, "grad_norm": 0.7381303310394287, "learning_rate": 5.176521906367773e-06, "loss": 2.0617, "step": 20014 }, { "epoch": 0.67, "grad_norm": 0.7545026540756226, "learning_rate": 5.175590860885308e-06, "loss": 2.0453, "step": 20015 }, { "epoch": 0.67, "grad_norm": 0.7902075052261353, "learning_rate": 5.174659869906191e-06, "loss": 2.0302, "step": 20016 }, { "epoch": 0.67, "grad_norm": 0.7370849847793579, "learning_rate": 5.173728933440933e-06, "loss": 2.0626, "step": 20017 }, { "epoch": 0.67, "grad_norm": 0.7438147068023682, "learning_rate": 5.17279805150006e-06, "loss": 2.0966, "step": 20018 }, { "epoch": 0.67, "grad_norm": 0.7443079352378845, "learning_rate": 5.1718672240940885e-06, "loss": 2.0697, "step": 20019 }, { "epoch": 0.67, "grad_norm": 0.7590956091880798, "learning_rate": 5.1709364512335305e-06, "loss": 1.9949, "step": 20020 }, { "epoch": 0.67, "grad_norm": 0.7073783874511719, "learning_rate": 5.1700057329289e-06, "loss": 2.0951, "step": 20021 }, { "epoch": 0.67, "grad_norm": 0.7462553381919861, "learning_rate": 5.1690750691907165e-06, "loss": 2.0961, "step": 20022 }, { "epoch": 0.67, "grad_norm": 0.7517601847648621, "learning_rate": 5.168144460029488e-06, "loss": 1.9983, "step": 20023 }, { "epoch": 0.67, "grad_norm": 0.7401293516159058, "learning_rate": 5.167213905455737e-06, "loss": 2.0703, "step": 20024 }, { "epoch": 0.67, "grad_norm": 0.7259426116943359, "learning_rate": 5.166283405479969e-06, "loss": 2.0753, "step": 20025 }, { "epoch": 0.67, "grad_norm": 0.7252581715583801, "learning_rate": 5.165352960112695e-06, "loss": 2.0019, "step": 20026 }, { "epoch": 0.67, "grad_norm": 0.7144137620925903, "learning_rate": 5.164422569364431e-06, "loss": 2.0136, "step": 20027 }, { "epoch": 0.67, "grad_norm": 0.7215256094932556, "learning_rate": 5.163492233245689e-06, "loss": 2.0144, "step": 20028 }, { "epoch": 0.67, "grad_norm": 0.724189281463623, "learning_rate": 5.162561951766979e-06, "loss": 2.0375, "step": 20029 }, { "epoch": 0.67, "grad_norm": 0.7326545715332031, "learning_rate": 5.161631724938805e-06, "loss": 2.0505, "step": 20030 }, { "epoch": 0.67, "grad_norm": 0.7610042691230774, "learning_rate": 5.160701552771683e-06, "loss": 2.154, "step": 20031 }, { "epoch": 0.67, "grad_norm": 0.7465367913246155, "learning_rate": 5.159771435276115e-06, "loss": 2.1116, "step": 20032 }, { "epoch": 0.67, "grad_norm": 0.7436209321022034, "learning_rate": 5.158841372462617e-06, "loss": 2.0161, "step": 20033 }, { "epoch": 0.67, "grad_norm": 0.7725054025650024, "learning_rate": 5.1579113643416875e-06, "loss": 2.1128, "step": 20034 }, { "epoch": 0.67, "grad_norm": 0.7418740391731262, "learning_rate": 5.156981410923843e-06, "loss": 2.09, "step": 20035 }, { "epoch": 0.67, "grad_norm": 0.7483685612678528, "learning_rate": 5.156051512219579e-06, "loss": 2.0591, "step": 20036 }, { "epoch": 0.67, "grad_norm": 0.7394378185272217, "learning_rate": 5.15512166823941e-06, "loss": 2.0313, "step": 20037 }, { "epoch": 0.67, "grad_norm": 0.7457744479179382, "learning_rate": 5.154191878993837e-06, "loss": 2.0701, "step": 20038 }, { "epoch": 0.67, "grad_norm": 0.7545459866523743, "learning_rate": 5.153262144493361e-06, "loss": 2.0649, "step": 20039 }, { "epoch": 0.67, "grad_norm": 0.71275794506073, "learning_rate": 5.152332464748488e-06, "loss": 2.0785, "step": 20040 }, { "epoch": 0.67, "grad_norm": 0.7768306732177734, "learning_rate": 5.151402839769728e-06, "loss": 2.112, "step": 20041 }, { "epoch": 0.67, "grad_norm": 0.7237671613693237, "learning_rate": 5.150473269567575e-06, "loss": 2.0299, "step": 20042 }, { "epoch": 0.67, "grad_norm": 0.7397258877754211, "learning_rate": 5.149543754152529e-06, "loss": 1.9948, "step": 20043 }, { "epoch": 0.67, "grad_norm": 0.7323939800262451, "learning_rate": 5.148614293535099e-06, "loss": 2.0347, "step": 20044 }, { "epoch": 0.67, "grad_norm": 0.7212158441543579, "learning_rate": 5.147684887725779e-06, "loss": 2.0848, "step": 20045 }, { "epoch": 0.67, "grad_norm": 0.7401658892631531, "learning_rate": 5.1467555367350705e-06, "loss": 2.0377, "step": 20046 }, { "epoch": 0.67, "grad_norm": 0.729089081287384, "learning_rate": 5.145826240573481e-06, "loss": 1.9853, "step": 20047 }, { "epoch": 0.67, "grad_norm": 0.7421712279319763, "learning_rate": 5.144896999251494e-06, "loss": 2.0624, "step": 20048 }, { "epoch": 0.67, "grad_norm": 0.7291492819786072, "learning_rate": 5.143967812779616e-06, "loss": 2.0371, "step": 20049 }, { "epoch": 0.67, "grad_norm": 0.7577301263809204, "learning_rate": 5.1430386811683475e-06, "loss": 2.0273, "step": 20050 }, { "epoch": 0.67, "grad_norm": 0.7708485126495361, "learning_rate": 5.142109604428182e-06, "loss": 2.0588, "step": 20051 }, { "epoch": 0.67, "grad_norm": 0.7486909031867981, "learning_rate": 5.14118058256961e-06, "loss": 2.0067, "step": 20052 }, { "epoch": 0.67, "grad_norm": 0.7512775659561157, "learning_rate": 5.1402516156031375e-06, "loss": 2.0467, "step": 20053 }, { "epoch": 0.67, "grad_norm": 0.7575007081031799, "learning_rate": 5.139322703539249e-06, "loss": 2.0675, "step": 20054 }, { "epoch": 0.67, "grad_norm": 0.738827645778656, "learning_rate": 5.138393846388449e-06, "loss": 2.1307, "step": 20055 }, { "epoch": 0.67, "grad_norm": 0.7490928769111633, "learning_rate": 5.13746504416122e-06, "loss": 2.0303, "step": 20056 }, { "epoch": 0.67, "grad_norm": 0.7330386638641357, "learning_rate": 5.136536296868068e-06, "loss": 2.056, "step": 20057 }, { "epoch": 0.67, "grad_norm": 0.7562073469161987, "learning_rate": 5.135607604519474e-06, "loss": 2.1006, "step": 20058 }, { "epoch": 0.67, "grad_norm": 0.7223935723304749, "learning_rate": 5.134678967125937e-06, "loss": 2.1147, "step": 20059 }, { "epoch": 0.67, "grad_norm": 0.7269719243049622, "learning_rate": 5.133750384697946e-06, "loss": 2.0126, "step": 20060 }, { "epoch": 0.67, "grad_norm": 0.7357509136199951, "learning_rate": 5.132821857245989e-06, "loss": 2.0654, "step": 20061 }, { "epoch": 0.67, "grad_norm": 0.7558983564376831, "learning_rate": 5.1318933847805575e-06, "loss": 2.0992, "step": 20062 }, { "epoch": 0.67, "grad_norm": 0.7379820346832275, "learning_rate": 5.1309649673121445e-06, "loss": 2.0907, "step": 20063 }, { "epoch": 0.67, "grad_norm": 0.7279856204986572, "learning_rate": 5.130036604851236e-06, "loss": 2.0556, "step": 20064 }, { "epoch": 0.67, "grad_norm": 0.7204431295394897, "learning_rate": 5.129108297408316e-06, "loss": 2.0407, "step": 20065 }, { "epoch": 0.67, "grad_norm": 0.7701833248138428, "learning_rate": 5.12818004499388e-06, "loss": 2.0316, "step": 20066 }, { "epoch": 0.67, "grad_norm": 0.732609748840332, "learning_rate": 5.127251847618407e-06, "loss": 2.0728, "step": 20067 }, { "epoch": 0.67, "grad_norm": 0.7291454672813416, "learning_rate": 5.126323705292388e-06, "loss": 2.0765, "step": 20068 }, { "epoch": 0.67, "grad_norm": 0.7350364923477173, "learning_rate": 5.125395618026313e-06, "loss": 1.9922, "step": 20069 }, { "epoch": 0.67, "grad_norm": 0.7442386150360107, "learning_rate": 5.124467585830655e-06, "loss": 2.053, "step": 20070 }, { "epoch": 0.67, "grad_norm": 0.7704840898513794, "learning_rate": 5.123539608715904e-06, "loss": 2.1284, "step": 20071 }, { "epoch": 0.67, "grad_norm": 0.7391213774681091, "learning_rate": 5.122611686692549e-06, "loss": 2.038, "step": 20072 }, { "epoch": 0.67, "grad_norm": 0.7477456331253052, "learning_rate": 5.12168381977107e-06, "loss": 2.1045, "step": 20073 }, { "epoch": 0.67, "grad_norm": 0.7386415004730225, "learning_rate": 5.120756007961943e-06, "loss": 1.9846, "step": 20074 }, { "epoch": 0.67, "grad_norm": 0.749864399433136, "learning_rate": 5.119828251275659e-06, "loss": 2.0561, "step": 20075 }, { "epoch": 0.67, "grad_norm": 0.7605765461921692, "learning_rate": 5.1189005497226915e-06, "loss": 2.0556, "step": 20076 }, { "epoch": 0.67, "grad_norm": 0.7270441055297852, "learning_rate": 5.117972903313526e-06, "loss": 2.0782, "step": 20077 }, { "epoch": 0.67, "grad_norm": 0.7454894781112671, "learning_rate": 5.117045312058644e-06, "loss": 2.0336, "step": 20078 }, { "epoch": 0.67, "grad_norm": 0.7591621279716492, "learning_rate": 5.1161177759685235e-06, "loss": 2.0597, "step": 20079 }, { "epoch": 0.67, "grad_norm": 0.7574657797813416, "learning_rate": 5.115190295053637e-06, "loss": 2.0287, "step": 20080 }, { "epoch": 0.67, "grad_norm": 0.7541659474372864, "learning_rate": 5.114262869324472e-06, "loss": 2.1193, "step": 20081 }, { "epoch": 0.67, "grad_norm": 0.7173806428909302, "learning_rate": 5.113335498791503e-06, "loss": 2.0313, "step": 20082 }, { "epoch": 0.67, "grad_norm": 0.7680360674858093, "learning_rate": 5.112408183465201e-06, "loss": 2.0505, "step": 20083 }, { "epoch": 0.67, "grad_norm": 0.7402453422546387, "learning_rate": 5.111480923356046e-06, "loss": 2.0421, "step": 20084 }, { "epoch": 0.67, "grad_norm": 0.7516781687736511, "learning_rate": 5.110553718474519e-06, "loss": 2.0124, "step": 20085 }, { "epoch": 0.67, "grad_norm": 0.7488648891448975, "learning_rate": 5.109626568831092e-06, "loss": 2.0546, "step": 20086 }, { "epoch": 0.67, "grad_norm": 0.7771928906440735, "learning_rate": 5.108699474436232e-06, "loss": 2.018, "step": 20087 }, { "epoch": 0.67, "grad_norm": 0.757161021232605, "learning_rate": 5.1077724353004245e-06, "loss": 2.0654, "step": 20088 }, { "epoch": 0.67, "grad_norm": 0.7520354986190796, "learning_rate": 5.106845451434131e-06, "loss": 2.0345, "step": 20089 }, { "epoch": 0.67, "grad_norm": 0.727942168712616, "learning_rate": 5.1059185228478314e-06, "loss": 1.9976, "step": 20090 }, { "epoch": 0.67, "grad_norm": 0.7235636115074158, "learning_rate": 5.104991649552004e-06, "loss": 2.0792, "step": 20091 }, { "epoch": 0.67, "grad_norm": 0.7473171353340149, "learning_rate": 5.104064831557103e-06, "loss": 2.0928, "step": 20092 }, { "epoch": 0.67, "grad_norm": 0.7357332110404968, "learning_rate": 5.10313806887361e-06, "loss": 2.1112, "step": 20093 }, { "epoch": 0.67, "grad_norm": 0.7877339124679565, "learning_rate": 5.102211361511995e-06, "loss": 2.0575, "step": 20094 }, { "epoch": 0.67, "grad_norm": 0.7484314441680908, "learning_rate": 5.1012847094827276e-06, "loss": 2.0577, "step": 20095 }, { "epoch": 0.67, "grad_norm": 0.7079607844352722, "learning_rate": 5.100358112796271e-06, "loss": 2.0286, "step": 20096 }, { "epoch": 0.67, "grad_norm": 0.7349743247032166, "learning_rate": 5.099431571463099e-06, "loss": 1.995, "step": 20097 }, { "epoch": 0.67, "grad_norm": 0.7466428875923157, "learning_rate": 5.098505085493675e-06, "loss": 2.0056, "step": 20098 }, { "epoch": 0.67, "grad_norm": 0.742247462272644, "learning_rate": 5.0975786548984665e-06, "loss": 2.0835, "step": 20099 }, { "epoch": 0.67, "grad_norm": 0.7676612138748169, "learning_rate": 5.096652279687946e-06, "loss": 2.1192, "step": 20100 }, { "epoch": 0.67, "grad_norm": 0.7876275181770325, "learning_rate": 5.0957259598725735e-06, "loss": 2.0656, "step": 20101 }, { "epoch": 0.67, "grad_norm": 0.7228032350540161, "learning_rate": 5.094799695462812e-06, "loss": 2.0314, "step": 20102 }, { "epoch": 0.67, "grad_norm": 0.7323240637779236, "learning_rate": 5.0938734864691286e-06, "loss": 1.9901, "step": 20103 }, { "epoch": 0.67, "grad_norm": 0.7284857630729675, "learning_rate": 5.092947332901995e-06, "loss": 2.0941, "step": 20104 }, { "epoch": 0.67, "grad_norm": 0.7204777002334595, "learning_rate": 5.092021234771859e-06, "loss": 2.0256, "step": 20105 }, { "epoch": 0.67, "grad_norm": 0.7562199234962463, "learning_rate": 5.091095192089191e-06, "loss": 2.012, "step": 20106 }, { "epoch": 0.67, "grad_norm": 0.7451204061508179, "learning_rate": 5.090169204864454e-06, "loss": 2.0756, "step": 20107 }, { "epoch": 0.67, "grad_norm": 0.7404773831367493, "learning_rate": 5.089243273108108e-06, "loss": 2.0261, "step": 20108 }, { "epoch": 0.67, "grad_norm": 0.7379840016365051, "learning_rate": 5.088317396830616e-06, "loss": 2.0318, "step": 20109 }, { "epoch": 0.67, "grad_norm": 0.7416504621505737, "learning_rate": 5.087391576042434e-06, "loss": 2.1036, "step": 20110 }, { "epoch": 0.67, "grad_norm": 0.7413413524627686, "learning_rate": 5.08646581075402e-06, "loss": 2.1051, "step": 20111 }, { "epoch": 0.67, "grad_norm": 0.7426283359527588, "learning_rate": 5.085540100975835e-06, "loss": 2.061, "step": 20112 }, { "epoch": 0.67, "grad_norm": 0.734928548336029, "learning_rate": 5.084614446718346e-06, "loss": 2.0862, "step": 20113 }, { "epoch": 0.67, "grad_norm": 0.7508774995803833, "learning_rate": 5.083688847991996e-06, "loss": 2.0271, "step": 20114 }, { "epoch": 0.67, "grad_norm": 0.7515003085136414, "learning_rate": 5.082763304807246e-06, "loss": 2.0122, "step": 20115 }, { "epoch": 0.67, "grad_norm": 0.7840917110443115, "learning_rate": 5.0818378171745596e-06, "loss": 2.0817, "step": 20116 }, { "epoch": 0.67, "grad_norm": 0.7541688084602356, "learning_rate": 5.080912385104386e-06, "loss": 2.1253, "step": 20117 }, { "epoch": 0.67, "grad_norm": 0.7512958645820618, "learning_rate": 5.0799870086071786e-06, "loss": 1.9948, "step": 20118 }, { "epoch": 0.67, "grad_norm": 0.7273443341255188, "learning_rate": 5.079061687693394e-06, "loss": 2.0042, "step": 20119 }, { "epoch": 0.67, "grad_norm": 0.7307820916175842, "learning_rate": 5.078136422373492e-06, "loss": 1.9866, "step": 20120 }, { "epoch": 0.67, "grad_norm": 0.7286121845245361, "learning_rate": 5.077211212657914e-06, "loss": 2.058, "step": 20121 }, { "epoch": 0.67, "grad_norm": 0.7404541969299316, "learning_rate": 5.076286058557122e-06, "loss": 2.091, "step": 20122 }, { "epoch": 0.67, "grad_norm": 0.7277871966362, "learning_rate": 5.075360960081568e-06, "loss": 2.0351, "step": 20123 }, { "epoch": 0.67, "grad_norm": 0.7821372747421265, "learning_rate": 5.074435917241694e-06, "loss": 2.0563, "step": 20124 }, { "epoch": 0.67, "grad_norm": 0.7270851135253906, "learning_rate": 5.073510930047956e-06, "loss": 1.9878, "step": 20125 }, { "epoch": 0.67, "grad_norm": 0.7430328130722046, "learning_rate": 5.072585998510813e-06, "loss": 2.0072, "step": 20126 }, { "epoch": 0.67, "grad_norm": 0.7706193327903748, "learning_rate": 5.071661122640696e-06, "loss": 2.1189, "step": 20127 }, { "epoch": 0.67, "grad_norm": 0.7251985669136047, "learning_rate": 5.0707363024480645e-06, "loss": 2.0503, "step": 20128 }, { "epoch": 0.67, "grad_norm": 0.7563086748123169, "learning_rate": 5.06981153794337e-06, "loss": 2.018, "step": 20129 }, { "epoch": 0.67, "grad_norm": 0.7297871112823486, "learning_rate": 5.068886829137051e-06, "loss": 2.0584, "step": 20130 }, { "epoch": 0.67, "grad_norm": 0.7636356353759766, "learning_rate": 5.067962176039563e-06, "loss": 2.0574, "step": 20131 }, { "epoch": 0.67, "grad_norm": 0.7723695635795593, "learning_rate": 5.067037578661347e-06, "loss": 2.0425, "step": 20132 }, { "epoch": 0.67, "grad_norm": 0.7517685890197754, "learning_rate": 5.0661130370128455e-06, "loss": 2.0296, "step": 20133 }, { "epoch": 0.67, "grad_norm": 0.7514216303825378, "learning_rate": 5.065188551104508e-06, "loss": 1.978, "step": 20134 }, { "epoch": 0.67, "grad_norm": 0.7456119656562805, "learning_rate": 5.0642641209467815e-06, "loss": 1.9984, "step": 20135 }, { "epoch": 0.67, "grad_norm": 0.7448129653930664, "learning_rate": 5.063339746550107e-06, "loss": 2.053, "step": 20136 }, { "epoch": 0.67, "grad_norm": 0.7284356951713562, "learning_rate": 5.062415427924921e-06, "loss": 2.0523, "step": 20137 }, { "epoch": 0.67, "grad_norm": 0.7735322713851929, "learning_rate": 5.0614911650816775e-06, "loss": 2.0856, "step": 20138 }, { "epoch": 0.67, "grad_norm": 0.7671406269073486, "learning_rate": 5.060566958030809e-06, "loss": 2.1047, "step": 20139 }, { "epoch": 0.67, "grad_norm": 0.7591806054115295, "learning_rate": 5.059642806782763e-06, "loss": 2.1821, "step": 20140 }, { "epoch": 0.67, "grad_norm": 0.7437096834182739, "learning_rate": 5.058718711347974e-06, "loss": 1.9782, "step": 20141 }, { "epoch": 0.67, "grad_norm": 0.7560997605323792, "learning_rate": 5.057794671736889e-06, "loss": 2.0899, "step": 20142 }, { "epoch": 0.67, "grad_norm": 0.7178904414176941, "learning_rate": 5.05687068795994e-06, "loss": 2.0771, "step": 20143 }, { "epoch": 0.67, "grad_norm": 0.7227692008018494, "learning_rate": 5.055946760027572e-06, "loss": 2.077, "step": 20144 }, { "epoch": 0.67, "grad_norm": 0.7364628911018372, "learning_rate": 5.055022887950221e-06, "loss": 1.9643, "step": 20145 }, { "epoch": 0.67, "grad_norm": 0.7421463131904602, "learning_rate": 5.054099071738319e-06, "loss": 1.9642, "step": 20146 }, { "epoch": 0.67, "grad_norm": 0.7379451990127563, "learning_rate": 5.053175311402305e-06, "loss": 2.0321, "step": 20147 }, { "epoch": 0.67, "grad_norm": 0.7867157459259033, "learning_rate": 5.052251606952627e-06, "loss": 2.0037, "step": 20148 }, { "epoch": 0.67, "grad_norm": 0.7711898684501648, "learning_rate": 5.051327958399703e-06, "loss": 2.1314, "step": 20149 }, { "epoch": 0.67, "grad_norm": 0.7432572841644287, "learning_rate": 5.050404365753976e-06, "loss": 2.0794, "step": 20150 }, { "epoch": 0.67, "grad_norm": 0.7395318746566772, "learning_rate": 5.049480829025883e-06, "loss": 2.0745, "step": 20151 }, { "epoch": 0.67, "grad_norm": 0.7119020223617554, "learning_rate": 5.04855734822585e-06, "loss": 2.0361, "step": 20152 }, { "epoch": 0.67, "grad_norm": 0.7582393288612366, "learning_rate": 5.047633923364319e-06, "loss": 2.0267, "step": 20153 }, { "epoch": 0.67, "grad_norm": 0.7178239822387695, "learning_rate": 5.046710554451717e-06, "loss": 2.0546, "step": 20154 }, { "epoch": 0.67, "grad_norm": 0.7576134204864502, "learning_rate": 5.045787241498472e-06, "loss": 2.0723, "step": 20155 }, { "epoch": 0.67, "grad_norm": 0.7419856786727905, "learning_rate": 5.044863984515019e-06, "loss": 2.0765, "step": 20156 }, { "epoch": 0.67, "grad_norm": 0.7691748738288879, "learning_rate": 5.043940783511794e-06, "loss": 2.1191, "step": 20157 }, { "epoch": 0.67, "grad_norm": 0.7436739802360535, "learning_rate": 5.043017638499221e-06, "loss": 2.0157, "step": 20158 }, { "epoch": 0.67, "grad_norm": 0.7361814379692078, "learning_rate": 5.042094549487725e-06, "loss": 2.0315, "step": 20159 }, { "epoch": 0.67, "grad_norm": 0.7527881860733032, "learning_rate": 5.041171516487744e-06, "loss": 2.0589, "step": 20160 }, { "epoch": 0.67, "grad_norm": 0.7332875728607178, "learning_rate": 5.040248539509696e-06, "loss": 2.0984, "step": 20161 }, { "epoch": 0.67, "grad_norm": 0.7163532972335815, "learning_rate": 5.039325618564019e-06, "loss": 2.012, "step": 20162 }, { "epoch": 0.67, "grad_norm": 0.7222986221313477, "learning_rate": 5.038402753661129e-06, "loss": 2.0764, "step": 20163 }, { "epoch": 0.67, "grad_norm": 0.736585795879364, "learning_rate": 5.03747994481146e-06, "loss": 1.9682, "step": 20164 }, { "epoch": 0.67, "grad_norm": 0.7504633665084839, "learning_rate": 5.03655719202543e-06, "loss": 2.0639, "step": 20165 }, { "epoch": 0.67, "grad_norm": 0.7210029363632202, "learning_rate": 5.035634495313474e-06, "loss": 2.0371, "step": 20166 }, { "epoch": 0.67, "grad_norm": 0.7701663970947266, "learning_rate": 5.03471185468601e-06, "loss": 2.0545, "step": 20167 }, { "epoch": 0.67, "grad_norm": 0.7473902702331543, "learning_rate": 5.0337892701534555e-06, "loss": 2.0895, "step": 20168 }, { "epoch": 0.67, "grad_norm": 0.7309924960136414, "learning_rate": 5.032866741726241e-06, "loss": 2.0314, "step": 20169 }, { "epoch": 0.67, "grad_norm": 0.7625515460968018, "learning_rate": 5.03194426941479e-06, "loss": 2.0218, "step": 20170 }, { "epoch": 0.67, "grad_norm": 0.7600356340408325, "learning_rate": 5.0310218532295215e-06, "loss": 2.1146, "step": 20171 }, { "epoch": 0.67, "grad_norm": 0.7412950992584229, "learning_rate": 5.030099493180853e-06, "loss": 2.0257, "step": 20172 }, { "epoch": 0.67, "grad_norm": 0.7458981275558472, "learning_rate": 5.029177189279211e-06, "loss": 2.0065, "step": 20173 }, { "epoch": 0.67, "grad_norm": 0.7469793558120728, "learning_rate": 5.028254941535007e-06, "loss": 1.9491, "step": 20174 }, { "epoch": 0.67, "grad_norm": 0.733208417892456, "learning_rate": 5.02733274995867e-06, "loss": 2.0558, "step": 20175 }, { "epoch": 0.67, "grad_norm": 0.7655627727508545, "learning_rate": 5.026410614560613e-06, "loss": 2.0462, "step": 20176 }, { "epoch": 0.67, "grad_norm": 0.7639448642730713, "learning_rate": 5.02548853535125e-06, "loss": 2.0447, "step": 20177 }, { "epoch": 0.67, "grad_norm": 0.7658535242080688, "learning_rate": 5.0245665123410025e-06, "loss": 2.1014, "step": 20178 }, { "epoch": 0.67, "grad_norm": 0.7848217487335205, "learning_rate": 5.023644545540289e-06, "loss": 2.0871, "step": 20179 }, { "epoch": 0.67, "grad_norm": 0.8134219646453857, "learning_rate": 5.022722634959525e-06, "loss": 2.0102, "step": 20180 }, { "epoch": 0.67, "grad_norm": 0.7372454404830933, "learning_rate": 5.02180078060912e-06, "loss": 2.0412, "step": 20181 }, { "epoch": 0.67, "grad_norm": 0.7544186115264893, "learning_rate": 5.020878982499495e-06, "loss": 2.0488, "step": 20182 }, { "epoch": 0.67, "grad_norm": 0.7728284001350403, "learning_rate": 5.0199572406410575e-06, "loss": 2.1243, "step": 20183 }, { "epoch": 0.67, "grad_norm": 0.7464938759803772, "learning_rate": 5.0190355550442296e-06, "loss": 2.0784, "step": 20184 }, { "epoch": 0.67, "grad_norm": 0.7765725255012512, "learning_rate": 5.018113925719412e-06, "loss": 2.058, "step": 20185 }, { "epoch": 0.67, "grad_norm": 0.7639203667640686, "learning_rate": 5.01719235267703e-06, "loss": 2.1222, "step": 20186 }, { "epoch": 0.67, "grad_norm": 0.7611469626426697, "learning_rate": 5.016270835927485e-06, "loss": 2.0738, "step": 20187 }, { "epoch": 0.67, "grad_norm": 0.7171225547790527, "learning_rate": 5.015349375481194e-06, "loss": 2.0887, "step": 20188 }, { "epoch": 0.67, "grad_norm": 0.7405815720558167, "learning_rate": 5.014427971348565e-06, "loss": 2.0376, "step": 20189 }, { "epoch": 0.67, "grad_norm": 0.7422084808349609, "learning_rate": 5.013506623540003e-06, "loss": 2.081, "step": 20190 }, { "epoch": 0.67, "grad_norm": 0.7416921257972717, "learning_rate": 5.01258533206592e-06, "loss": 2.0061, "step": 20191 }, { "epoch": 0.67, "grad_norm": 0.7340272665023804, "learning_rate": 5.01166409693673e-06, "loss": 2.0381, "step": 20192 }, { "epoch": 0.67, "grad_norm": 0.7215927839279175, "learning_rate": 5.010742918162834e-06, "loss": 1.9879, "step": 20193 }, { "epoch": 0.67, "grad_norm": 0.7443199157714844, "learning_rate": 5.009821795754639e-06, "loss": 2.0834, "step": 20194 }, { "epoch": 0.67, "grad_norm": 0.7572246193885803, "learning_rate": 5.008900729722555e-06, "loss": 2.0319, "step": 20195 }, { "epoch": 0.67, "grad_norm": 0.7422019839286804, "learning_rate": 5.007979720076982e-06, "loss": 2.1361, "step": 20196 }, { "epoch": 0.67, "grad_norm": 0.7569596767425537, "learning_rate": 5.007058766828332e-06, "loss": 2.0468, "step": 20197 }, { "epoch": 0.67, "grad_norm": 0.7486196160316467, "learning_rate": 5.006137869987006e-06, "loss": 2.0119, "step": 20198 }, { "epoch": 0.67, "grad_norm": 0.7292360067367554, "learning_rate": 5.0052170295634054e-06, "loss": 2.0542, "step": 20199 }, { "epoch": 0.67, "grad_norm": 0.7497430443763733, "learning_rate": 5.004296245567934e-06, "loss": 2.0722, "step": 20200 }, { "epoch": 0.67, "grad_norm": 0.7626875042915344, "learning_rate": 5.003375518011e-06, "loss": 2.1399, "step": 20201 }, { "epoch": 0.67, "grad_norm": 0.7521160244941711, "learning_rate": 5.002454846903001e-06, "loss": 2.008, "step": 20202 }, { "epoch": 0.67, "grad_norm": 0.724474310874939, "learning_rate": 5.001534232254335e-06, "loss": 2.029, "step": 20203 }, { "epoch": 0.67, "grad_norm": 0.7609326243400574, "learning_rate": 5.000613674075405e-06, "loss": 2.119, "step": 20204 }, { "epoch": 0.67, "grad_norm": 0.7410104870796204, "learning_rate": 4.999693172376616e-06, "loss": 2.0567, "step": 20205 }, { "epoch": 0.67, "grad_norm": 0.7534316182136536, "learning_rate": 4.998772727168363e-06, "loss": 2.0729, "step": 20206 }, { "epoch": 0.67, "grad_norm": 0.7654723525047302, "learning_rate": 4.9978523384610415e-06, "loss": 2.0195, "step": 20207 }, { "epoch": 0.67, "grad_norm": 0.7872684001922607, "learning_rate": 4.996932006265056e-06, "loss": 1.9924, "step": 20208 }, { "epoch": 0.67, "grad_norm": 0.7648528218269348, "learning_rate": 4.996011730590796e-06, "loss": 2.1342, "step": 20209 }, { "epoch": 0.67, "grad_norm": 0.7380174994468689, "learning_rate": 4.995091511448668e-06, "loss": 2.1132, "step": 20210 }, { "epoch": 0.67, "grad_norm": 0.7399367094039917, "learning_rate": 4.994171348849063e-06, "loss": 2.0181, "step": 20211 }, { "epoch": 0.67, "grad_norm": 0.7644140720367432, "learning_rate": 4.9932512428023715e-06, "loss": 2.0172, "step": 20212 }, { "epoch": 0.67, "grad_norm": 0.728918731212616, "learning_rate": 4.992331193318995e-06, "loss": 2.022, "step": 20213 }, { "epoch": 0.67, "grad_norm": 0.774364173412323, "learning_rate": 4.991411200409327e-06, "loss": 2.0694, "step": 20214 }, { "epoch": 0.67, "grad_norm": 0.7457428574562073, "learning_rate": 4.990491264083762e-06, "loss": 2.0405, "step": 20215 }, { "epoch": 0.67, "grad_norm": 0.7411237955093384, "learning_rate": 4.989571384352686e-06, "loss": 2.0213, "step": 20216 }, { "epoch": 0.67, "grad_norm": 0.7275320291519165, "learning_rate": 4.988651561226501e-06, "loss": 2.038, "step": 20217 }, { "epoch": 0.67, "grad_norm": 0.743889331817627, "learning_rate": 4.987731794715589e-06, "loss": 2.0915, "step": 20218 }, { "epoch": 0.67, "grad_norm": 0.7613849639892578, "learning_rate": 4.986812084830349e-06, "loss": 2.0229, "step": 20219 }, { "epoch": 0.67, "grad_norm": 0.7522518038749695, "learning_rate": 4.9858924315811656e-06, "loss": 2.05, "step": 20220 }, { "epoch": 0.67, "grad_norm": 0.7419420480728149, "learning_rate": 4.984972834978434e-06, "loss": 2.0583, "step": 20221 }, { "epoch": 0.67, "grad_norm": 0.7336491346359253, "learning_rate": 4.984053295032536e-06, "loss": 2.0061, "step": 20222 }, { "epoch": 0.67, "grad_norm": 0.7542902827262878, "learning_rate": 4.98313381175387e-06, "loss": 2.0837, "step": 20223 }, { "epoch": 0.67, "grad_norm": 0.7150443196296692, "learning_rate": 4.982214385152816e-06, "loss": 2.0285, "step": 20224 }, { "epoch": 0.67, "grad_norm": 0.7371305227279663, "learning_rate": 4.98129501523976e-06, "loss": 2.0779, "step": 20225 }, { "epoch": 0.67, "grad_norm": 0.7909289002418518, "learning_rate": 4.980375702025091e-06, "loss": 1.9799, "step": 20226 }, { "epoch": 0.67, "grad_norm": 0.7459080815315247, "learning_rate": 4.9794564455192005e-06, "loss": 2.084, "step": 20227 }, { "epoch": 0.67, "grad_norm": 0.7883503437042236, "learning_rate": 4.978537245732468e-06, "loss": 2.0325, "step": 20228 }, { "epoch": 0.67, "grad_norm": 0.7346156239509583, "learning_rate": 4.977618102675276e-06, "loss": 2.0613, "step": 20229 }, { "epoch": 0.67, "grad_norm": 0.7766497135162354, "learning_rate": 4.9766990163580145e-06, "loss": 2.0469, "step": 20230 }, { "epoch": 0.67, "grad_norm": 0.7682079672813416, "learning_rate": 4.975779986791058e-06, "loss": 2.0531, "step": 20231 }, { "epoch": 0.67, "grad_norm": 0.7674205899238586, "learning_rate": 4.974861013984801e-06, "loss": 2.0955, "step": 20232 }, { "epoch": 0.67, "grad_norm": 0.7623017430305481, "learning_rate": 4.973942097949619e-06, "loss": 2.0122, "step": 20233 }, { "epoch": 0.67, "grad_norm": 0.7504947185516357, "learning_rate": 4.973023238695889e-06, "loss": 2.0464, "step": 20234 }, { "epoch": 0.67, "grad_norm": 0.7465676069259644, "learning_rate": 4.972104436233997e-06, "loss": 1.9995, "step": 20235 }, { "epoch": 0.67, "grad_norm": 0.7662003636360168, "learning_rate": 4.971185690574325e-06, "loss": 2.0476, "step": 20236 }, { "epoch": 0.67, "grad_norm": 0.7539246678352356, "learning_rate": 4.97026700172725e-06, "loss": 2.0907, "step": 20237 }, { "epoch": 0.67, "grad_norm": 0.7450074553489685, "learning_rate": 4.969348369703149e-06, "loss": 2.1369, "step": 20238 }, { "epoch": 0.67, "grad_norm": 0.7725008130073547, "learning_rate": 4.968429794512404e-06, "loss": 1.9899, "step": 20239 }, { "epoch": 0.67, "grad_norm": 0.7478427290916443, "learning_rate": 4.967511276165387e-06, "loss": 1.9892, "step": 20240 }, { "epoch": 0.67, "grad_norm": 0.7403272390365601, "learning_rate": 4.966592814672481e-06, "loss": 2.0286, "step": 20241 }, { "epoch": 0.67, "grad_norm": 0.7236624360084534, "learning_rate": 4.965674410044057e-06, "loss": 2.0061, "step": 20242 }, { "epoch": 0.67, "grad_norm": 0.758434534072876, "learning_rate": 4.964756062290496e-06, "loss": 2.0962, "step": 20243 }, { "epoch": 0.67, "grad_norm": 0.7605840563774109, "learning_rate": 4.963837771422168e-06, "loss": 2.0276, "step": 20244 }, { "epoch": 0.67, "grad_norm": 0.7595072984695435, "learning_rate": 4.962919537449451e-06, "loss": 2.0905, "step": 20245 }, { "epoch": 0.67, "grad_norm": 0.7529708743095398, "learning_rate": 4.962001360382717e-06, "loss": 2.0964, "step": 20246 }, { "epoch": 0.67, "grad_norm": 0.7270084619522095, "learning_rate": 4.961083240232337e-06, "loss": 2.0108, "step": 20247 }, { "epoch": 0.67, "grad_norm": 0.7639528512954712, "learning_rate": 4.960165177008685e-06, "loss": 2.0072, "step": 20248 }, { "epoch": 0.67, "grad_norm": 0.7482243180274963, "learning_rate": 4.959247170722137e-06, "loss": 2.0831, "step": 20249 }, { "epoch": 0.67, "grad_norm": 0.7259333729743958, "learning_rate": 4.95832922138306e-06, "loss": 2.0056, "step": 20250 }, { "epoch": 0.67, "grad_norm": 0.744625449180603, "learning_rate": 4.957411329001821e-06, "loss": 2.0256, "step": 20251 }, { "epoch": 0.67, "grad_norm": 0.7366506457328796, "learning_rate": 4.956493493588798e-06, "loss": 2.1093, "step": 20252 }, { "epoch": 0.67, "grad_norm": 0.7946878671646118, "learning_rate": 4.95557571515435e-06, "loss": 2.0458, "step": 20253 }, { "epoch": 0.67, "grad_norm": 0.7475458383560181, "learning_rate": 4.954657993708854e-06, "loss": 2.117, "step": 20254 }, { "epoch": 0.67, "grad_norm": 0.7527669668197632, "learning_rate": 4.953740329262681e-06, "loss": 2.0329, "step": 20255 }, { "epoch": 0.67, "grad_norm": 0.7667949795722961, "learning_rate": 4.952822721826185e-06, "loss": 2.0219, "step": 20256 }, { "epoch": 0.67, "grad_norm": 0.7709560990333557, "learning_rate": 4.95190517140974e-06, "loss": 2.0301, "step": 20257 }, { "epoch": 0.67, "grad_norm": 0.7398699522018433, "learning_rate": 4.950987678023715e-06, "loss": 2.0061, "step": 20258 }, { "epoch": 0.67, "grad_norm": 0.74430912733078, "learning_rate": 4.950070241678473e-06, "loss": 2.0727, "step": 20259 }, { "epoch": 0.67, "grad_norm": 0.7148361802101135, "learning_rate": 4.9491528623843745e-06, "loss": 2.0758, "step": 20260 }, { "epoch": 0.67, "grad_norm": 0.7388547658920288, "learning_rate": 4.94823554015179e-06, "loss": 2.0403, "step": 20261 }, { "epoch": 0.67, "grad_norm": 0.7750593423843384, "learning_rate": 4.947318274991075e-06, "loss": 2.0008, "step": 20262 }, { "epoch": 0.67, "grad_norm": 0.7450194954872131, "learning_rate": 4.946401066912603e-06, "loss": 2.0381, "step": 20263 }, { "epoch": 0.67, "grad_norm": 0.7713590860366821, "learning_rate": 4.945483915926724e-06, "loss": 2.1119, "step": 20264 }, { "epoch": 0.67, "grad_norm": 0.7457413077354431, "learning_rate": 4.944566822043811e-06, "loss": 2.0469, "step": 20265 }, { "epoch": 0.67, "grad_norm": 0.7562053203582764, "learning_rate": 4.943649785274215e-06, "loss": 2.0163, "step": 20266 }, { "epoch": 0.67, "grad_norm": 0.7206932902336121, "learning_rate": 4.942732805628304e-06, "loss": 2.0275, "step": 20267 }, { "epoch": 0.67, "grad_norm": 0.7527848482131958, "learning_rate": 4.941815883116434e-06, "loss": 2.0329, "step": 20268 }, { "epoch": 0.67, "grad_norm": 0.7741339802742004, "learning_rate": 4.940899017748959e-06, "loss": 2.1259, "step": 20269 }, { "epoch": 0.67, "grad_norm": 0.7469882965087891, "learning_rate": 4.939982209536244e-06, "loss": 2.0885, "step": 20270 }, { "epoch": 0.67, "grad_norm": 0.7610406279563904, "learning_rate": 4.939065458488646e-06, "loss": 2.0591, "step": 20271 }, { "epoch": 0.67, "grad_norm": 0.7616428732872009, "learning_rate": 4.938148764616523e-06, "loss": 2.009, "step": 20272 }, { "epoch": 0.67, "grad_norm": 0.755438506603241, "learning_rate": 4.937232127930223e-06, "loss": 2.1145, "step": 20273 }, { "epoch": 0.67, "grad_norm": 0.7465696930885315, "learning_rate": 4.936315548440111e-06, "loss": 2.0181, "step": 20274 }, { "epoch": 0.67, "grad_norm": 0.7882199287414551, "learning_rate": 4.935399026156536e-06, "loss": 2.0389, "step": 20275 }, { "epoch": 0.67, "grad_norm": 0.7349933981895447, "learning_rate": 4.934482561089854e-06, "loss": 2.0716, "step": 20276 }, { "epoch": 0.67, "grad_norm": 0.766400158405304, "learning_rate": 4.933566153250426e-06, "loss": 2.0069, "step": 20277 }, { "epoch": 0.67, "grad_norm": 0.7231996655464172, "learning_rate": 4.932649802648593e-06, "loss": 2.1048, "step": 20278 }, { "epoch": 0.67, "grad_norm": 0.7518306374549866, "learning_rate": 4.931733509294711e-06, "loss": 2.0339, "step": 20279 }, { "epoch": 0.67, "grad_norm": 0.7173805236816406, "learning_rate": 4.930817273199138e-06, "loss": 1.9898, "step": 20280 }, { "epoch": 0.67, "grad_norm": 0.7579454183578491, "learning_rate": 4.929901094372219e-06, "loss": 2.106, "step": 20281 }, { "epoch": 0.67, "grad_norm": 0.7537594437599182, "learning_rate": 4.928984972824304e-06, "loss": 2.009, "step": 20282 }, { "epoch": 0.67, "grad_norm": 0.7648007869720459, "learning_rate": 4.928068908565748e-06, "loss": 1.9961, "step": 20283 }, { "epoch": 0.67, "grad_norm": 0.736355185508728, "learning_rate": 4.927152901606894e-06, "loss": 2.0935, "step": 20284 }, { "epoch": 0.67, "grad_norm": 0.7260897159576416, "learning_rate": 4.926236951958094e-06, "loss": 1.9959, "step": 20285 }, { "epoch": 0.67, "grad_norm": 0.7549333572387695, "learning_rate": 4.925321059629697e-06, "loss": 2.0424, "step": 20286 }, { "epoch": 0.67, "grad_norm": 0.7273990511894226, "learning_rate": 4.924405224632051e-06, "loss": 2.0846, "step": 20287 }, { "epoch": 0.67, "grad_norm": 0.7282859683036804, "learning_rate": 4.923489446975494e-06, "loss": 2.0618, "step": 20288 }, { "epoch": 0.68, "grad_norm": 0.7400189638137817, "learning_rate": 4.922573726670383e-06, "loss": 2.063, "step": 20289 }, { "epoch": 0.68, "grad_norm": 0.7513425350189209, "learning_rate": 4.921658063727059e-06, "loss": 2.1098, "step": 20290 }, { "epoch": 0.68, "grad_norm": 0.7649042010307312, "learning_rate": 4.9207424581558615e-06, "loss": 2.0703, "step": 20291 }, { "epoch": 0.68, "grad_norm": 0.7169502973556519, "learning_rate": 4.919826909967139e-06, "loss": 2.0656, "step": 20292 }, { "epoch": 0.68, "grad_norm": 0.7470426559448242, "learning_rate": 4.918911419171239e-06, "loss": 2.0896, "step": 20293 }, { "epoch": 0.68, "grad_norm": 0.7448184490203857, "learning_rate": 4.9179959857785e-06, "loss": 2.08, "step": 20294 }, { "epoch": 0.68, "grad_norm": 0.7486196160316467, "learning_rate": 4.91708060979926e-06, "loss": 2.0852, "step": 20295 }, { "epoch": 0.68, "grad_norm": 0.7555215358734131, "learning_rate": 4.9161652912438684e-06, "loss": 1.9551, "step": 20296 }, { "epoch": 0.68, "grad_norm": 0.7569241523742676, "learning_rate": 4.915250030122657e-06, "loss": 2.0467, "step": 20297 }, { "epoch": 0.68, "grad_norm": 0.7789692878723145, "learning_rate": 4.914334826445973e-06, "loss": 2.0798, "step": 20298 }, { "epoch": 0.68, "grad_norm": 0.7335914373397827, "learning_rate": 4.9134196802241584e-06, "loss": 2.1291, "step": 20299 }, { "epoch": 0.68, "grad_norm": 0.7551798224449158, "learning_rate": 4.912504591467542e-06, "loss": 2.0688, "step": 20300 }, { "epoch": 0.68, "grad_norm": 0.7534513473510742, "learning_rate": 4.911589560186466e-06, "loss": 2.0535, "step": 20301 }, { "epoch": 0.68, "grad_norm": 0.753381073474884, "learning_rate": 4.910674586391273e-06, "loss": 2.0995, "step": 20302 }, { "epoch": 0.68, "grad_norm": 0.7559484243392944, "learning_rate": 4.909759670092296e-06, "loss": 2.0351, "step": 20303 }, { "epoch": 0.68, "grad_norm": 0.7543895244598389, "learning_rate": 4.908844811299868e-06, "loss": 1.9691, "step": 20304 }, { "epoch": 0.68, "grad_norm": 0.7272427678108215, "learning_rate": 4.907930010024326e-06, "loss": 2.0241, "step": 20305 }, { "epoch": 0.68, "grad_norm": 0.7709632515907288, "learning_rate": 4.9070152662760115e-06, "loss": 2.0672, "step": 20306 }, { "epoch": 0.68, "grad_norm": 0.7403322458267212, "learning_rate": 4.90610058006525e-06, "loss": 2.0416, "step": 20307 }, { "epoch": 0.68, "grad_norm": 0.7713525295257568, "learning_rate": 4.905185951402382e-06, "loss": 2.0875, "step": 20308 }, { "epoch": 0.68, "grad_norm": 0.735372006893158, "learning_rate": 4.904271380297737e-06, "loss": 2.0153, "step": 20309 }, { "epoch": 0.68, "grad_norm": 0.7406299114227295, "learning_rate": 4.903356866761645e-06, "loss": 2.0424, "step": 20310 }, { "epoch": 0.68, "grad_norm": 0.771120548248291, "learning_rate": 4.902442410804439e-06, "loss": 2.0309, "step": 20311 }, { "epoch": 0.68, "grad_norm": 0.7251144051551819, "learning_rate": 4.901528012436459e-06, "loss": 2.0446, "step": 20312 }, { "epoch": 0.68, "grad_norm": 0.7609586715698242, "learning_rate": 4.9006136716680204e-06, "loss": 2.101, "step": 20313 }, { "epoch": 0.68, "grad_norm": 0.7532699108123779, "learning_rate": 4.8996993885094604e-06, "loss": 2.083, "step": 20314 }, { "epoch": 0.68, "grad_norm": 0.7276277542114258, "learning_rate": 4.89878516297111e-06, "loss": 2.07, "step": 20315 }, { "epoch": 0.68, "grad_norm": 0.7588284015655518, "learning_rate": 4.897870995063293e-06, "loss": 2.0662, "step": 20316 }, { "epoch": 0.68, "grad_norm": 0.7322894334793091, "learning_rate": 4.896956884796342e-06, "loss": 2.0547, "step": 20317 }, { "epoch": 0.68, "grad_norm": 0.7784355878829956, "learning_rate": 4.896042832180582e-06, "loss": 2.0884, "step": 20318 }, { "epoch": 0.68, "grad_norm": 0.7522271871566772, "learning_rate": 4.895128837226335e-06, "loss": 2.0993, "step": 20319 }, { "epoch": 0.68, "grad_norm": 0.7424781322479248, "learning_rate": 4.89421489994393e-06, "loss": 2.1079, "step": 20320 }, { "epoch": 0.68, "grad_norm": 0.724582314491272, "learning_rate": 4.893301020343697e-06, "loss": 2.0196, "step": 20321 }, { "epoch": 0.68, "grad_norm": 0.7345448732376099, "learning_rate": 4.892387198435957e-06, "loss": 2.095, "step": 20322 }, { "epoch": 0.68, "grad_norm": 0.7577356696128845, "learning_rate": 4.891473434231029e-06, "loss": 2.0118, "step": 20323 }, { "epoch": 0.68, "grad_norm": 0.7410258650779724, "learning_rate": 4.890559727739243e-06, "loss": 1.9982, "step": 20324 }, { "epoch": 0.68, "grad_norm": 0.7438172698020935, "learning_rate": 4.88964607897092e-06, "loss": 1.9892, "step": 20325 }, { "epoch": 0.68, "grad_norm": 0.7261031270027161, "learning_rate": 4.888732487936376e-06, "loss": 2.0531, "step": 20326 }, { "epoch": 0.68, "grad_norm": 0.722767174243927, "learning_rate": 4.887818954645938e-06, "loss": 2.1851, "step": 20327 }, { "epoch": 0.68, "grad_norm": 0.7393938899040222, "learning_rate": 4.886905479109928e-06, "loss": 2.0448, "step": 20328 }, { "epoch": 0.68, "grad_norm": 0.7540817260742188, "learning_rate": 4.885992061338659e-06, "loss": 2.0334, "step": 20329 }, { "epoch": 0.68, "grad_norm": 0.761029839515686, "learning_rate": 4.885078701342459e-06, "loss": 2.0829, "step": 20330 }, { "epoch": 0.68, "grad_norm": 0.7174654006958008, "learning_rate": 4.884165399131643e-06, "loss": 2.053, "step": 20331 }, { "epoch": 0.68, "grad_norm": 0.7766607403755188, "learning_rate": 4.883252154716525e-06, "loss": 2.145, "step": 20332 }, { "epoch": 0.68, "grad_norm": 0.7217127084732056, "learning_rate": 4.882338968107423e-06, "loss": 2.0547, "step": 20333 }, { "epoch": 0.68, "grad_norm": 0.7582725286483765, "learning_rate": 4.881425839314665e-06, "loss": 2.033, "step": 20334 }, { "epoch": 0.68, "grad_norm": 0.7554873824119568, "learning_rate": 4.8805127683485505e-06, "loss": 2.0552, "step": 20335 }, { "epoch": 0.68, "grad_norm": 0.7513056993484497, "learning_rate": 4.879599755219403e-06, "loss": 2.0338, "step": 20336 }, { "epoch": 0.68, "grad_norm": 0.7349498867988586, "learning_rate": 4.87868679993754e-06, "loss": 2.0714, "step": 20337 }, { "epoch": 0.68, "grad_norm": 0.7764919996261597, "learning_rate": 4.877773902513268e-06, "loss": 2.0858, "step": 20338 }, { "epoch": 0.68, "grad_norm": 0.7439562678337097, "learning_rate": 4.876861062956908e-06, "loss": 2.0247, "step": 20339 }, { "epoch": 0.68, "grad_norm": 0.7294931411743164, "learning_rate": 4.87594828127877e-06, "loss": 1.9829, "step": 20340 }, { "epoch": 0.68, "grad_norm": 0.7425527572631836, "learning_rate": 4.8750355574891616e-06, "loss": 2.1112, "step": 20341 }, { "epoch": 0.68, "grad_norm": 0.7498957514762878, "learning_rate": 4.874122891598397e-06, "loss": 2.1181, "step": 20342 }, { "epoch": 0.68, "grad_norm": 0.7695623636245728, "learning_rate": 4.873210283616793e-06, "loss": 2.0127, "step": 20343 }, { "epoch": 0.68, "grad_norm": 0.7829837203025818, "learning_rate": 4.872297733554654e-06, "loss": 1.9818, "step": 20344 }, { "epoch": 0.68, "grad_norm": 0.7342893481254578, "learning_rate": 4.871385241422286e-06, "loss": 1.9928, "step": 20345 }, { "epoch": 0.68, "grad_norm": 0.7282729744911194, "learning_rate": 4.870472807230005e-06, "loss": 2.0053, "step": 20346 }, { "epoch": 0.68, "grad_norm": 0.7584148049354553, "learning_rate": 4.869560430988119e-06, "loss": 2.1086, "step": 20347 }, { "epoch": 0.68, "grad_norm": 0.7623714208602905, "learning_rate": 4.8686481127069255e-06, "loss": 2.0209, "step": 20348 }, { "epoch": 0.68, "grad_norm": 0.7448371648788452, "learning_rate": 4.86773585239674e-06, "loss": 2.0369, "step": 20349 }, { "epoch": 0.68, "grad_norm": 0.7644042372703552, "learning_rate": 4.86682365006787e-06, "loss": 2.054, "step": 20350 }, { "epoch": 0.68, "grad_norm": 0.7371789813041687, "learning_rate": 4.865911505730615e-06, "loss": 1.9901, "step": 20351 }, { "epoch": 0.68, "grad_norm": 0.7283911108970642, "learning_rate": 4.864999419395285e-06, "loss": 2.0822, "step": 20352 }, { "epoch": 0.68, "grad_norm": 0.748014509677887, "learning_rate": 4.864087391072184e-06, "loss": 2.0598, "step": 20353 }, { "epoch": 0.68, "grad_norm": 0.759214460849762, "learning_rate": 4.863175420771609e-06, "loss": 2.0407, "step": 20354 }, { "epoch": 0.68, "grad_norm": 0.7503107190132141, "learning_rate": 4.86226350850387e-06, "loss": 2.0753, "step": 20355 }, { "epoch": 0.68, "grad_norm": 0.7295525074005127, "learning_rate": 4.861351654279272e-06, "loss": 2.0302, "step": 20356 }, { "epoch": 0.68, "grad_norm": 0.7519281506538391, "learning_rate": 4.860439858108104e-06, "loss": 2.0355, "step": 20357 }, { "epoch": 0.68, "grad_norm": 0.7402242422103882, "learning_rate": 4.859528120000675e-06, "loss": 2.1186, "step": 20358 }, { "epoch": 0.68, "grad_norm": 0.7495577931404114, "learning_rate": 4.8586164399672875e-06, "loss": 2.0934, "step": 20359 }, { "epoch": 0.68, "grad_norm": 0.7431715726852417, "learning_rate": 4.857704818018235e-06, "loss": 2.0549, "step": 20360 }, { "epoch": 0.68, "grad_norm": 0.7669147849082947, "learning_rate": 4.856793254163824e-06, "loss": 2.0875, "step": 20361 }, { "epoch": 0.68, "grad_norm": 0.7521370649337769, "learning_rate": 4.85588174841435e-06, "loss": 2.0338, "step": 20362 }, { "epoch": 0.68, "grad_norm": 0.7257447838783264, "learning_rate": 4.854970300780103e-06, "loss": 2.1013, "step": 20363 }, { "epoch": 0.68, "grad_norm": 0.7225533723831177, "learning_rate": 4.854058911271387e-06, "loss": 1.9696, "step": 20364 }, { "epoch": 0.68, "grad_norm": 0.7443277835845947, "learning_rate": 4.853147579898502e-06, "loss": 2.0066, "step": 20365 }, { "epoch": 0.68, "grad_norm": 0.7521112561225891, "learning_rate": 4.852236306671738e-06, "loss": 2.1167, "step": 20366 }, { "epoch": 0.68, "grad_norm": 0.7702720761299133, "learning_rate": 4.851325091601388e-06, "loss": 2.0378, "step": 20367 }, { "epoch": 0.68, "grad_norm": 0.7443459630012512, "learning_rate": 4.850413934697755e-06, "loss": 1.99, "step": 20368 }, { "epoch": 0.68, "grad_norm": 0.7265153527259827, "learning_rate": 4.8495028359711226e-06, "loss": 2.0596, "step": 20369 }, { "epoch": 0.68, "grad_norm": 0.7205891609191895, "learning_rate": 4.848591795431792e-06, "loss": 1.9984, "step": 20370 }, { "epoch": 0.68, "grad_norm": 0.7610408663749695, "learning_rate": 4.847680813090049e-06, "loss": 2.0627, "step": 20371 }, { "epoch": 0.68, "grad_norm": 0.7298734784126282, "learning_rate": 4.846769888956192e-06, "loss": 2.0032, "step": 20372 }, { "epoch": 0.68, "grad_norm": 0.7168726325035095, "learning_rate": 4.845859023040506e-06, "loss": 2.0882, "step": 20373 }, { "epoch": 0.68, "grad_norm": 0.7291662096977234, "learning_rate": 4.8449482153532865e-06, "loss": 2.0484, "step": 20374 }, { "epoch": 0.68, "grad_norm": 0.7627370953559875, "learning_rate": 4.844037465904821e-06, "loss": 2.0183, "step": 20375 }, { "epoch": 0.68, "grad_norm": 0.7389827966690063, "learning_rate": 4.843126774705396e-06, "loss": 2.0617, "step": 20376 }, { "epoch": 0.68, "grad_norm": 0.742956817150116, "learning_rate": 4.842216141765301e-06, "loss": 2.0683, "step": 20377 }, { "epoch": 0.68, "grad_norm": 0.750640332698822, "learning_rate": 4.841305567094834e-06, "loss": 2.0704, "step": 20378 }, { "epoch": 0.68, "grad_norm": 0.7446610331535339, "learning_rate": 4.840395050704266e-06, "loss": 2.0673, "step": 20379 }, { "epoch": 0.68, "grad_norm": 0.7661430835723877, "learning_rate": 4.8394845926038905e-06, "loss": 2.0234, "step": 20380 }, { "epoch": 0.68, "grad_norm": 0.7378416657447815, "learning_rate": 4.838574192803996e-06, "loss": 2.0595, "step": 20381 }, { "epoch": 0.68, "grad_norm": 0.7276251912117004, "learning_rate": 4.837663851314863e-06, "loss": 2.047, "step": 20382 }, { "epoch": 0.68, "grad_norm": 0.7150817513465881, "learning_rate": 4.8367535681467825e-06, "loss": 2.0456, "step": 20383 }, { "epoch": 0.68, "grad_norm": 0.7495589256286621, "learning_rate": 4.835843343310034e-06, "loss": 2.0572, "step": 20384 }, { "epoch": 0.68, "grad_norm": 0.7423121333122253, "learning_rate": 4.834933176814897e-06, "loss": 1.9679, "step": 20385 }, { "epoch": 0.68, "grad_norm": 0.7680132985115051, "learning_rate": 4.834023068671658e-06, "loss": 2.1354, "step": 20386 }, { "epoch": 0.68, "grad_norm": 0.7638832330703735, "learning_rate": 4.8331130188906026e-06, "loss": 2.0261, "step": 20387 }, { "epoch": 0.68, "grad_norm": 0.742345929145813, "learning_rate": 4.832203027482008e-06, "loss": 2.052, "step": 20388 }, { "epoch": 0.68, "grad_norm": 0.7553034424781799, "learning_rate": 4.8312930944561505e-06, "loss": 2.0596, "step": 20389 }, { "epoch": 0.68, "grad_norm": 0.7282969951629639, "learning_rate": 4.830383219823319e-06, "loss": 2.0171, "step": 20390 }, { "epoch": 0.68, "grad_norm": 0.7771686911582947, "learning_rate": 4.829473403593785e-06, "loss": 1.9674, "step": 20391 }, { "epoch": 0.68, "grad_norm": 0.7629665732383728, "learning_rate": 4.828563645777834e-06, "loss": 2.0723, "step": 20392 }, { "epoch": 0.68, "grad_norm": 0.7780708074569702, "learning_rate": 4.827653946385735e-06, "loss": 2.1162, "step": 20393 }, { "epoch": 0.68, "grad_norm": 0.8334059119224548, "learning_rate": 4.826744305427775e-06, "loss": 2.0355, "step": 20394 }, { "epoch": 0.68, "grad_norm": 0.7552018165588379, "learning_rate": 4.825834722914222e-06, "loss": 2.065, "step": 20395 }, { "epoch": 0.68, "grad_norm": 0.7361000776290894, "learning_rate": 4.8249251988553604e-06, "loss": 2.0541, "step": 20396 }, { "epoch": 0.68, "grad_norm": 0.7520321607589722, "learning_rate": 4.824015733261461e-06, "loss": 2.1027, "step": 20397 }, { "epoch": 0.68, "grad_norm": 0.739631175994873, "learning_rate": 4.823106326142794e-06, "loss": 2.0566, "step": 20398 }, { "epoch": 0.68, "grad_norm": 0.7471319437026978, "learning_rate": 4.822196977509637e-06, "loss": 2.0837, "step": 20399 }, { "epoch": 0.68, "grad_norm": 0.7342908382415771, "learning_rate": 4.82128768737227e-06, "loss": 1.9992, "step": 20400 }, { "epoch": 0.68, "grad_norm": 0.7498869895935059, "learning_rate": 4.820378455740958e-06, "loss": 2.042, "step": 20401 }, { "epoch": 0.68, "grad_norm": 0.7480858564376831, "learning_rate": 4.819469282625971e-06, "loss": 1.9531, "step": 20402 }, { "epoch": 0.68, "grad_norm": 0.7311519384384155, "learning_rate": 4.818560168037589e-06, "loss": 2.07, "step": 20403 }, { "epoch": 0.68, "grad_norm": 0.7690201997756958, "learning_rate": 4.817651111986073e-06, "loss": 2.1156, "step": 20404 }, { "epoch": 0.68, "grad_norm": 0.7537568211555481, "learning_rate": 4.816742114481702e-06, "loss": 2.0254, "step": 20405 }, { "epoch": 0.68, "grad_norm": 0.7524348497390747, "learning_rate": 4.815833175534736e-06, "loss": 2.0702, "step": 20406 }, { "epoch": 0.68, "grad_norm": 0.7387667298316956, "learning_rate": 4.814924295155453e-06, "loss": 2.0797, "step": 20407 }, { "epoch": 0.68, "grad_norm": 0.7292746305465698, "learning_rate": 4.814015473354112e-06, "loss": 2.0995, "step": 20408 }, { "epoch": 0.68, "grad_norm": 0.7267752289772034, "learning_rate": 4.81310671014099e-06, "loss": 2.0697, "step": 20409 }, { "epoch": 0.68, "grad_norm": 0.723987877368927, "learning_rate": 4.812198005526348e-06, "loss": 1.9733, "step": 20410 }, { "epoch": 0.68, "grad_norm": 0.7221235036849976, "learning_rate": 4.811289359520448e-06, "loss": 2.0677, "step": 20411 }, { "epoch": 0.68, "grad_norm": 0.7846252918243408, "learning_rate": 4.810380772133561e-06, "loss": 2.0519, "step": 20412 }, { "epoch": 0.68, "grad_norm": 0.7510960698127747, "learning_rate": 4.8094722433759535e-06, "loss": 2.057, "step": 20413 }, { "epoch": 0.68, "grad_norm": 0.7808570861816406, "learning_rate": 4.8085637732578874e-06, "loss": 1.9589, "step": 20414 }, { "epoch": 0.68, "grad_norm": 0.7436812520027161, "learning_rate": 4.80765536178962e-06, "loss": 2.0733, "step": 20415 }, { "epoch": 0.68, "grad_norm": 0.7322107553482056, "learning_rate": 4.806747008981425e-06, "loss": 2.0411, "step": 20416 }, { "epoch": 0.68, "grad_norm": 0.7608382105827332, "learning_rate": 4.8058387148435544e-06, "loss": 2.0774, "step": 20417 }, { "epoch": 0.68, "grad_norm": 0.7409543991088867, "learning_rate": 4.804930479386278e-06, "loss": 2.0456, "step": 20418 }, { "epoch": 0.68, "grad_norm": 0.7738876342773438, "learning_rate": 4.804022302619852e-06, "loss": 2.0681, "step": 20419 }, { "epoch": 0.68, "grad_norm": 0.7454972863197327, "learning_rate": 4.8031141845545326e-06, "loss": 2.0065, "step": 20420 }, { "epoch": 0.68, "grad_norm": 0.7192872762680054, "learning_rate": 4.802206125200585e-06, "loss": 2.0718, "step": 20421 }, { "epoch": 0.68, "grad_norm": 0.7515000700950623, "learning_rate": 4.801298124568269e-06, "loss": 2.0854, "step": 20422 }, { "epoch": 0.68, "grad_norm": 0.7728748917579651, "learning_rate": 4.80039018266784e-06, "loss": 2.0832, "step": 20423 }, { "epoch": 0.68, "grad_norm": 0.7670896053314209, "learning_rate": 4.799482299509551e-06, "loss": 2.0419, "step": 20424 }, { "epoch": 0.68, "grad_norm": 0.7562118172645569, "learning_rate": 4.798574475103669e-06, "loss": 2.0281, "step": 20425 }, { "epoch": 0.68, "grad_norm": 0.7317204475402832, "learning_rate": 4.797666709460439e-06, "loss": 2.0709, "step": 20426 }, { "epoch": 0.68, "grad_norm": 0.758247971534729, "learning_rate": 4.796759002590126e-06, "loss": 2.0628, "step": 20427 }, { "epoch": 0.68, "grad_norm": 0.723442554473877, "learning_rate": 4.795851354502976e-06, "loss": 2.0266, "step": 20428 }, { "epoch": 0.68, "grad_norm": 0.7600600123405457, "learning_rate": 4.794943765209251e-06, "loss": 2.0312, "step": 20429 }, { "epoch": 0.68, "grad_norm": 0.7232041358947754, "learning_rate": 4.794036234719198e-06, "loss": 2.0026, "step": 20430 }, { "epoch": 0.68, "grad_norm": 0.7174778580665588, "learning_rate": 4.793128763043077e-06, "loss": 2.0214, "step": 20431 }, { "epoch": 0.68, "grad_norm": 0.724126398563385, "learning_rate": 4.792221350191134e-06, "loss": 2.0104, "step": 20432 }, { "epoch": 0.68, "grad_norm": 0.7235143184661865, "learning_rate": 4.791313996173619e-06, "loss": 2.0621, "step": 20433 }, { "epoch": 0.68, "grad_norm": 0.7409482002258301, "learning_rate": 4.790406701000786e-06, "loss": 2.0634, "step": 20434 }, { "epoch": 0.68, "grad_norm": 0.759099543094635, "learning_rate": 4.789499464682889e-06, "loss": 2.0579, "step": 20435 }, { "epoch": 0.68, "grad_norm": 0.7290852665901184, "learning_rate": 4.7885922872301734e-06, "loss": 2.0414, "step": 20436 }, { "epoch": 0.68, "grad_norm": 0.7448917031288147, "learning_rate": 4.787685168652883e-06, "loss": 2.0277, "step": 20437 }, { "epoch": 0.68, "grad_norm": 0.7272475361824036, "learning_rate": 4.786778108961277e-06, "loss": 2.0372, "step": 20438 }, { "epoch": 0.68, "grad_norm": 0.7505134344100952, "learning_rate": 4.785871108165591e-06, "loss": 2.0067, "step": 20439 }, { "epoch": 0.68, "grad_norm": 0.7514858841896057, "learning_rate": 4.784964166276082e-06, "loss": 2.1668, "step": 20440 }, { "epoch": 0.68, "grad_norm": 0.7385721206665039, "learning_rate": 4.784057283302991e-06, "loss": 2.0606, "step": 20441 }, { "epoch": 0.68, "grad_norm": 0.7280479073524475, "learning_rate": 4.7831504592565605e-06, "loss": 2.0884, "step": 20442 }, { "epoch": 0.68, "grad_norm": 0.7752751111984253, "learning_rate": 4.782243694147038e-06, "loss": 2.1108, "step": 20443 }, { "epoch": 0.68, "grad_norm": 0.7416374087333679, "learning_rate": 4.781336987984672e-06, "loss": 2.0289, "step": 20444 }, { "epoch": 0.68, "grad_norm": 0.7601982951164246, "learning_rate": 4.780430340779705e-06, "loss": 1.9475, "step": 20445 }, { "epoch": 0.68, "grad_norm": 0.7930278182029724, "learning_rate": 4.779523752542371e-06, "loss": 2.1037, "step": 20446 }, { "epoch": 0.68, "grad_norm": 0.7413278818130493, "learning_rate": 4.778617223282922e-06, "loss": 2.0806, "step": 20447 }, { "epoch": 0.68, "grad_norm": 0.7162112593650818, "learning_rate": 4.777710753011592e-06, "loss": 2.1349, "step": 20448 }, { "epoch": 0.68, "grad_norm": 0.750184178352356, "learning_rate": 4.776804341738629e-06, "loss": 2.0645, "step": 20449 }, { "epoch": 0.68, "grad_norm": 0.7359859943389893, "learning_rate": 4.775897989474266e-06, "loss": 2.0391, "step": 20450 }, { "epoch": 0.68, "grad_norm": 0.7356467247009277, "learning_rate": 4.774991696228749e-06, "loss": 1.9737, "step": 20451 }, { "epoch": 0.68, "grad_norm": 0.7214052081108093, "learning_rate": 4.774085462012311e-06, "loss": 2.0512, "step": 20452 }, { "epoch": 0.68, "grad_norm": 0.7331635355949402, "learning_rate": 4.773179286835196e-06, "loss": 2.0202, "step": 20453 }, { "epoch": 0.68, "grad_norm": 0.7434374094009399, "learning_rate": 4.7722731707076375e-06, "loss": 2.0918, "step": 20454 }, { "epoch": 0.68, "grad_norm": 0.7501091361045837, "learning_rate": 4.77136711363987e-06, "loss": 2.0839, "step": 20455 }, { "epoch": 0.68, "grad_norm": 0.764359176158905, "learning_rate": 4.770461115642133e-06, "loss": 2.1057, "step": 20456 }, { "epoch": 0.68, "grad_norm": 0.7278562784194946, "learning_rate": 4.769555176724664e-06, "loss": 2.017, "step": 20457 }, { "epoch": 0.68, "grad_norm": 0.7361772656440735, "learning_rate": 4.768649296897696e-06, "loss": 2.0697, "step": 20458 }, { "epoch": 0.68, "grad_norm": 0.7353608012199402, "learning_rate": 4.767743476171459e-06, "loss": 2.0007, "step": 20459 }, { "epoch": 0.68, "grad_norm": 0.737897515296936, "learning_rate": 4.766837714556193e-06, "loss": 2.0216, "step": 20460 }, { "epoch": 0.68, "grad_norm": 0.7516582608222961, "learning_rate": 4.765932012062124e-06, "loss": 2.0516, "step": 20461 }, { "epoch": 0.68, "grad_norm": 0.7193740606307983, "learning_rate": 4.765026368699488e-06, "loss": 2.0644, "step": 20462 }, { "epoch": 0.68, "grad_norm": 0.7444588541984558, "learning_rate": 4.7641207844785235e-06, "loss": 2.0242, "step": 20463 }, { "epoch": 0.68, "grad_norm": 0.7530665993690491, "learning_rate": 4.763215259409445e-06, "loss": 2.0757, "step": 20464 }, { "epoch": 0.68, "grad_norm": 0.751469075679779, "learning_rate": 4.762309793502493e-06, "loss": 1.9776, "step": 20465 }, { "epoch": 0.68, "grad_norm": 0.7186638116836548, "learning_rate": 4.761404386767898e-06, "loss": 2.0801, "step": 20466 }, { "epoch": 0.68, "grad_norm": 0.7297989130020142, "learning_rate": 4.760499039215887e-06, "loss": 2.0574, "step": 20467 }, { "epoch": 0.68, "grad_norm": 0.7413662075996399, "learning_rate": 4.759593750856684e-06, "loss": 2.0585, "step": 20468 }, { "epoch": 0.68, "grad_norm": 0.7596720457077026, "learning_rate": 4.758688521700522e-06, "loss": 2.0655, "step": 20469 }, { "epoch": 0.68, "grad_norm": 0.7487229108810425, "learning_rate": 4.7577833517576225e-06, "loss": 2.0336, "step": 20470 }, { "epoch": 0.68, "grad_norm": 0.7519277930259705, "learning_rate": 4.756878241038218e-06, "loss": 2.0469, "step": 20471 }, { "epoch": 0.68, "grad_norm": 0.7618056535720825, "learning_rate": 4.755973189552526e-06, "loss": 2.0758, "step": 20472 }, { "epoch": 0.68, "grad_norm": 0.7563413381576538, "learning_rate": 4.755068197310779e-06, "loss": 1.9951, "step": 20473 }, { "epoch": 0.68, "grad_norm": 0.7489362955093384, "learning_rate": 4.754163264323195e-06, "loss": 2.0276, "step": 20474 }, { "epoch": 0.68, "grad_norm": 0.762759804725647, "learning_rate": 4.753258390600004e-06, "loss": 2.0855, "step": 20475 }, { "epoch": 0.68, "grad_norm": 0.7637871503829956, "learning_rate": 4.752353576151425e-06, "loss": 1.9752, "step": 20476 }, { "epoch": 0.68, "grad_norm": 0.7573428153991699, "learning_rate": 4.7514488209876756e-06, "loss": 2.0404, "step": 20477 }, { "epoch": 0.68, "grad_norm": 0.7467048764228821, "learning_rate": 4.750544125118981e-06, "loss": 2.0644, "step": 20478 }, { "epoch": 0.68, "grad_norm": 0.7243214845657349, "learning_rate": 4.749639488555567e-06, "loss": 2.0781, "step": 20479 }, { "epoch": 0.68, "grad_norm": 0.714649498462677, "learning_rate": 4.7487349113076475e-06, "loss": 2.0241, "step": 20480 }, { "epoch": 0.68, "grad_norm": 0.7398096919059753, "learning_rate": 4.747830393385441e-06, "loss": 2.0858, "step": 20481 }, { "epoch": 0.68, "grad_norm": 0.7399649620056152, "learning_rate": 4.746925934799173e-06, "loss": 2.0609, "step": 20482 }, { "epoch": 0.68, "grad_norm": 0.7404807209968567, "learning_rate": 4.746021535559053e-06, "loss": 2.0778, "step": 20483 }, { "epoch": 0.68, "grad_norm": 0.7340667843818665, "learning_rate": 4.745117195675301e-06, "loss": 2.0773, "step": 20484 }, { "epoch": 0.68, "grad_norm": 0.7648415565490723, "learning_rate": 4.744212915158144e-06, "loss": 2.0674, "step": 20485 }, { "epoch": 0.68, "grad_norm": 0.7472794651985168, "learning_rate": 4.743308694017782e-06, "loss": 2.0145, "step": 20486 }, { "epoch": 0.68, "grad_norm": 0.7433467507362366, "learning_rate": 4.742404532264437e-06, "loss": 2.0904, "step": 20487 }, { "epoch": 0.68, "grad_norm": 0.7263256907463074, "learning_rate": 4.741500429908328e-06, "loss": 2.1474, "step": 20488 }, { "epoch": 0.68, "grad_norm": 0.7645857334136963, "learning_rate": 4.740596386959666e-06, "loss": 2.115, "step": 20489 }, { "epoch": 0.68, "grad_norm": 0.730527937412262, "learning_rate": 4.73969240342866e-06, "loss": 1.9591, "step": 20490 }, { "epoch": 0.68, "grad_norm": 0.7711911797523499, "learning_rate": 4.7387884793255305e-06, "loss": 2.0992, "step": 20491 }, { "epoch": 0.68, "grad_norm": 0.7540721893310547, "learning_rate": 4.737884614660481e-06, "loss": 2.033, "step": 20492 }, { "epoch": 0.68, "grad_norm": 0.750450611114502, "learning_rate": 4.7369808094437265e-06, "loss": 2.0707, "step": 20493 }, { "epoch": 0.68, "grad_norm": 0.7450700402259827, "learning_rate": 4.736077063685482e-06, "loss": 2.1053, "step": 20494 }, { "epoch": 0.68, "grad_norm": 0.7187801003456116, "learning_rate": 4.735173377395955e-06, "loss": 2.0051, "step": 20495 }, { "epoch": 0.68, "grad_norm": 0.7522356510162354, "learning_rate": 4.734269750585351e-06, "loss": 2.0737, "step": 20496 }, { "epoch": 0.68, "grad_norm": 0.7331739664077759, "learning_rate": 4.733366183263879e-06, "loss": 2.129, "step": 20497 }, { "epoch": 0.68, "grad_norm": 0.7554284334182739, "learning_rate": 4.7324626754417576e-06, "loss": 2.0646, "step": 20498 }, { "epoch": 0.68, "grad_norm": 0.7666828632354736, "learning_rate": 4.731559227129179e-06, "loss": 2.0682, "step": 20499 }, { "epoch": 0.68, "grad_norm": 0.7296462059020996, "learning_rate": 4.730655838336356e-06, "loss": 2.0792, "step": 20500 }, { "epoch": 0.68, "grad_norm": 0.7414780855178833, "learning_rate": 4.7297525090735e-06, "loss": 1.9611, "step": 20501 }, { "epoch": 0.68, "grad_norm": 0.7634021639823914, "learning_rate": 4.7288492393508105e-06, "loss": 2.0707, "step": 20502 }, { "epoch": 0.68, "grad_norm": 0.712399423122406, "learning_rate": 4.727946029178489e-06, "loss": 2.0153, "step": 20503 }, { "epoch": 0.68, "grad_norm": 0.7428499460220337, "learning_rate": 4.727042878566748e-06, "loss": 2.0316, "step": 20504 }, { "epoch": 0.68, "grad_norm": 0.772386908531189, "learning_rate": 4.726139787525782e-06, "loss": 2.0018, "step": 20505 }, { "epoch": 0.68, "grad_norm": 0.7363370060920715, "learning_rate": 4.725236756065798e-06, "loss": 2.0829, "step": 20506 }, { "epoch": 0.68, "grad_norm": 0.7690001130104065, "learning_rate": 4.724333784197002e-06, "loss": 2.0778, "step": 20507 }, { "epoch": 0.68, "grad_norm": 0.7524162530899048, "learning_rate": 4.723430871929591e-06, "loss": 2.074, "step": 20508 }, { "epoch": 0.68, "grad_norm": 0.7658815383911133, "learning_rate": 4.722528019273762e-06, "loss": 2.1016, "step": 20509 }, { "epoch": 0.68, "grad_norm": 0.7552663087844849, "learning_rate": 4.721625226239721e-06, "loss": 2.0229, "step": 20510 }, { "epoch": 0.68, "grad_norm": 0.7261641025543213, "learning_rate": 4.720722492837666e-06, "loss": 2.0887, "step": 20511 }, { "epoch": 0.68, "grad_norm": 0.7457427382469177, "learning_rate": 4.719819819077791e-06, "loss": 2.0321, "step": 20512 }, { "epoch": 0.68, "grad_norm": 0.7392370700836182, "learning_rate": 4.718917204970296e-06, "loss": 1.9621, "step": 20513 }, { "epoch": 0.68, "grad_norm": 0.7675729393959045, "learning_rate": 4.718014650525384e-06, "loss": 2.0704, "step": 20514 }, { "epoch": 0.68, "grad_norm": 0.7457990050315857, "learning_rate": 4.717112155753243e-06, "loss": 2.0906, "step": 20515 }, { "epoch": 0.68, "grad_norm": 0.7620640397071838, "learning_rate": 4.716209720664076e-06, "loss": 2.0682, "step": 20516 }, { "epoch": 0.68, "grad_norm": 0.751504123210907, "learning_rate": 4.715307345268075e-06, "loss": 2.031, "step": 20517 }, { "epoch": 0.68, "grad_norm": 0.7397680282592773, "learning_rate": 4.7144050295754315e-06, "loss": 2.0697, "step": 20518 }, { "epoch": 0.68, "grad_norm": 0.7322844862937927, "learning_rate": 4.713502773596342e-06, "loss": 2.054, "step": 20519 }, { "epoch": 0.68, "grad_norm": 0.744929850101471, "learning_rate": 4.712600577341008e-06, "loss": 2.0255, "step": 20520 }, { "epoch": 0.68, "grad_norm": 0.7625748515129089, "learning_rate": 4.711698440819606e-06, "loss": 2.0862, "step": 20521 }, { "epoch": 0.68, "grad_norm": 0.7595180869102478, "learning_rate": 4.7107963640423345e-06, "loss": 2.0425, "step": 20522 }, { "epoch": 0.68, "grad_norm": 0.7510827779769897, "learning_rate": 4.7098943470193915e-06, "loss": 2.0521, "step": 20523 }, { "epoch": 0.68, "grad_norm": 0.7614597082138062, "learning_rate": 4.70899238976096e-06, "loss": 1.9971, "step": 20524 }, { "epoch": 0.68, "grad_norm": 0.725976288318634, "learning_rate": 4.708090492277229e-06, "loss": 2.0315, "step": 20525 }, { "epoch": 0.68, "grad_norm": 0.7496153116226196, "learning_rate": 4.707188654578395e-06, "loss": 2.0728, "step": 20526 }, { "epoch": 0.68, "grad_norm": 0.7535601854324341, "learning_rate": 4.706286876674636e-06, "loss": 2.0798, "step": 20527 }, { "epoch": 0.68, "grad_norm": 0.7332228422164917, "learning_rate": 4.705385158576146e-06, "loss": 2.0031, "step": 20528 }, { "epoch": 0.68, "grad_norm": 0.754009485244751, "learning_rate": 4.704483500293117e-06, "loss": 2.0683, "step": 20529 }, { "epoch": 0.68, "grad_norm": 0.7289254665374756, "learning_rate": 4.703581901835729e-06, "loss": 2.0641, "step": 20530 }, { "epoch": 0.68, "grad_norm": 0.7583450078964233, "learning_rate": 4.702680363214164e-06, "loss": 2.0636, "step": 20531 }, { "epoch": 0.68, "grad_norm": 0.723491907119751, "learning_rate": 4.701778884438616e-06, "loss": 2.1046, "step": 20532 }, { "epoch": 0.68, "grad_norm": 0.7596973180770874, "learning_rate": 4.700877465519264e-06, "loss": 2.0808, "step": 20533 }, { "epoch": 0.68, "grad_norm": 0.71733158826828, "learning_rate": 4.699976106466291e-06, "loss": 2.0931, "step": 20534 }, { "epoch": 0.68, "grad_norm": 0.7321738600730896, "learning_rate": 4.69907480728988e-06, "loss": 2.0512, "step": 20535 }, { "epoch": 0.68, "grad_norm": 0.733470618724823, "learning_rate": 4.69817356800022e-06, "loss": 2.0778, "step": 20536 }, { "epoch": 0.68, "grad_norm": 0.7388619780540466, "learning_rate": 4.6972723886074845e-06, "loss": 2.0839, "step": 20537 }, { "epoch": 0.68, "grad_norm": 0.7670189142227173, "learning_rate": 4.696371269121862e-06, "loss": 2.1255, "step": 20538 }, { "epoch": 0.68, "grad_norm": 0.7171437740325928, "learning_rate": 4.6954702095535276e-06, "loss": 2.1051, "step": 20539 }, { "epoch": 0.68, "grad_norm": 0.7075817584991455, "learning_rate": 4.694569209912658e-06, "loss": 2.1092, "step": 20540 }, { "epoch": 0.68, "grad_norm": 0.7409306764602661, "learning_rate": 4.693668270209437e-06, "loss": 2.0543, "step": 20541 }, { "epoch": 0.68, "grad_norm": 0.782078206539154, "learning_rate": 4.692767390454049e-06, "loss": 2.0682, "step": 20542 }, { "epoch": 0.68, "grad_norm": 0.7323274612426758, "learning_rate": 4.691866570656658e-06, "loss": 2.0801, "step": 20543 }, { "epoch": 0.68, "grad_norm": 0.7530894875526428, "learning_rate": 4.690965810827447e-06, "loss": 2.0167, "step": 20544 }, { "epoch": 0.68, "grad_norm": 0.7447795271873474, "learning_rate": 4.690065110976596e-06, "loss": 1.9928, "step": 20545 }, { "epoch": 0.68, "grad_norm": 0.7484596967697144, "learning_rate": 4.689164471114274e-06, "loss": 2.0922, "step": 20546 }, { "epoch": 0.68, "grad_norm": 0.7381933331489563, "learning_rate": 4.688263891250664e-06, "loss": 1.9893, "step": 20547 }, { "epoch": 0.68, "grad_norm": 0.7429854273796082, "learning_rate": 4.687363371395934e-06, "loss": 2.0649, "step": 20548 }, { "epoch": 0.68, "grad_norm": 0.7724708318710327, "learning_rate": 4.686462911560257e-06, "loss": 2.0202, "step": 20549 }, { "epoch": 0.68, "grad_norm": 0.7481154203414917, "learning_rate": 4.685562511753807e-06, "loss": 2.0692, "step": 20550 }, { "epoch": 0.68, "grad_norm": 0.7638412117958069, "learning_rate": 4.6846621719867615e-06, "loss": 2.0186, "step": 20551 }, { "epoch": 0.68, "grad_norm": 0.7455574870109558, "learning_rate": 4.683761892269287e-06, "loss": 2.0613, "step": 20552 }, { "epoch": 0.68, "grad_norm": 0.733113706111908, "learning_rate": 4.682861672611553e-06, "loss": 2.0565, "step": 20553 }, { "epoch": 0.68, "grad_norm": 0.7891930341720581, "learning_rate": 4.681961513023734e-06, "loss": 2.0146, "step": 20554 }, { "epoch": 0.68, "grad_norm": 0.7603203058242798, "learning_rate": 4.681061413515997e-06, "loss": 2.06, "step": 20555 }, { "epoch": 0.68, "grad_norm": 0.7696518301963806, "learning_rate": 4.680161374098508e-06, "loss": 2.0463, "step": 20556 }, { "epoch": 0.68, "grad_norm": 0.7396694421768188, "learning_rate": 4.679261394781437e-06, "loss": 2.098, "step": 20557 }, { "epoch": 0.68, "grad_norm": 0.7369046807289124, "learning_rate": 4.678361475574956e-06, "loss": 1.9948, "step": 20558 }, { "epoch": 0.68, "grad_norm": 0.752503514289856, "learning_rate": 4.677461616489226e-06, "loss": 2.0432, "step": 20559 }, { "epoch": 0.68, "grad_norm": 0.7310574054718018, "learning_rate": 4.676561817534419e-06, "loss": 2.0153, "step": 20560 }, { "epoch": 0.68, "grad_norm": 0.7470492720603943, "learning_rate": 4.675662078720695e-06, "loss": 2.0997, "step": 20561 }, { "epoch": 0.68, "grad_norm": 0.7730416059494019, "learning_rate": 4.674762400058218e-06, "loss": 2.0878, "step": 20562 }, { "epoch": 0.68, "grad_norm": 0.7962812781333923, "learning_rate": 4.673862781557154e-06, "loss": 2.1179, "step": 20563 }, { "epoch": 0.68, "grad_norm": 0.7277517318725586, "learning_rate": 4.672963223227676e-06, "loss": 2.0518, "step": 20564 }, { "epoch": 0.68, "grad_norm": 0.7481294870376587, "learning_rate": 4.672063725079929e-06, "loss": 2.0493, "step": 20565 }, { "epoch": 0.68, "grad_norm": 0.7629480361938477, "learning_rate": 4.671164287124083e-06, "loss": 2.0815, "step": 20566 }, { "epoch": 0.68, "grad_norm": 0.7178028225898743, "learning_rate": 4.670264909370304e-06, "loss": 2.0346, "step": 20567 }, { "epoch": 0.68, "grad_norm": 0.7206147909164429, "learning_rate": 4.669365591828744e-06, "loss": 1.9999, "step": 20568 }, { "epoch": 0.68, "grad_norm": 0.7332802414894104, "learning_rate": 4.668466334509573e-06, "loss": 1.981, "step": 20569 }, { "epoch": 0.68, "grad_norm": 0.7353415489196777, "learning_rate": 4.6675671374229436e-06, "loss": 2.1159, "step": 20570 }, { "epoch": 0.68, "grad_norm": 0.7349573373794556, "learning_rate": 4.666668000579011e-06, "loss": 2.0362, "step": 20571 }, { "epoch": 0.68, "grad_norm": 0.7618395090103149, "learning_rate": 4.665768923987939e-06, "loss": 2.0612, "step": 20572 }, { "epoch": 0.68, "grad_norm": 0.8040956854820251, "learning_rate": 4.664869907659887e-06, "loss": 2.0582, "step": 20573 }, { "epoch": 0.68, "grad_norm": 0.7944275140762329, "learning_rate": 4.663970951605008e-06, "loss": 2.0161, "step": 20574 }, { "epoch": 0.68, "grad_norm": 0.7462409138679504, "learning_rate": 4.663072055833454e-06, "loss": 2.0926, "step": 20575 }, { "epoch": 0.68, "grad_norm": 0.7482702732086182, "learning_rate": 4.662173220355389e-06, "loss": 2.0514, "step": 20576 }, { "epoch": 0.68, "grad_norm": 0.7561256289482117, "learning_rate": 4.661274445180958e-06, "loss": 2.0495, "step": 20577 }, { "epoch": 0.68, "grad_norm": 0.7476239204406738, "learning_rate": 4.6603757303203234e-06, "loss": 2.0683, "step": 20578 }, { "epoch": 0.68, "grad_norm": 0.771104633808136, "learning_rate": 4.659477075783631e-06, "loss": 2.0545, "step": 20579 }, { "epoch": 0.68, "grad_norm": 0.7423458695411682, "learning_rate": 4.65857848158104e-06, "loss": 2.0359, "step": 20580 }, { "epoch": 0.68, "grad_norm": 0.7229087352752686, "learning_rate": 4.657679947722695e-06, "loss": 2.076, "step": 20581 }, { "epoch": 0.68, "grad_norm": 0.7289699912071228, "learning_rate": 4.656781474218756e-06, "loss": 2.0132, "step": 20582 }, { "epoch": 0.68, "grad_norm": 0.7427715063095093, "learning_rate": 4.655883061079367e-06, "loss": 2.0275, "step": 20583 }, { "epoch": 0.68, "grad_norm": 0.7307316660881042, "learning_rate": 4.654984708314676e-06, "loss": 2.022, "step": 20584 }, { "epoch": 0.68, "grad_norm": 0.7169957160949707, "learning_rate": 4.654086415934835e-06, "loss": 2.001, "step": 20585 }, { "epoch": 0.68, "grad_norm": 0.7443612813949585, "learning_rate": 4.653188183950001e-06, "loss": 2.0472, "step": 20586 }, { "epoch": 0.68, "grad_norm": 0.7593010663986206, "learning_rate": 4.652290012370305e-06, "loss": 2.0624, "step": 20587 }, { "epoch": 0.68, "grad_norm": 0.7444010972976685, "learning_rate": 4.651391901205903e-06, "loss": 2.1033, "step": 20588 }, { "epoch": 0.68, "grad_norm": 0.764243483543396, "learning_rate": 4.650493850466944e-06, "loss": 2.0856, "step": 20589 }, { "epoch": 0.69, "grad_norm": 0.754604697227478, "learning_rate": 4.649595860163567e-06, "loss": 2.0115, "step": 20590 }, { "epoch": 0.69, "grad_norm": 0.740834653377533, "learning_rate": 4.6486979303059245e-06, "loss": 2.1185, "step": 20591 }, { "epoch": 0.69, "grad_norm": 0.7251995801925659, "learning_rate": 4.647800060904155e-06, "loss": 2.0435, "step": 20592 }, { "epoch": 0.69, "grad_norm": 0.7572815418243408, "learning_rate": 4.646902251968402e-06, "loss": 2.0285, "step": 20593 }, { "epoch": 0.69, "grad_norm": 0.7676687836647034, "learning_rate": 4.6460045035088085e-06, "loss": 2.0819, "step": 20594 }, { "epoch": 0.69, "grad_norm": 0.7294437289237976, "learning_rate": 4.645106815535523e-06, "loss": 2.0622, "step": 20595 }, { "epoch": 0.69, "grad_norm": 0.7461565732955933, "learning_rate": 4.644209188058683e-06, "loss": 2.0477, "step": 20596 }, { "epoch": 0.69, "grad_norm": 0.7593443393707275, "learning_rate": 4.643311621088423e-06, "loss": 2.0988, "step": 20597 }, { "epoch": 0.69, "grad_norm": 0.7722288966178894, "learning_rate": 4.642414114634891e-06, "loss": 2.0459, "step": 20598 }, { "epoch": 0.69, "grad_norm": 0.7255918383598328, "learning_rate": 4.6415166687082265e-06, "loss": 2.0974, "step": 20599 }, { "epoch": 0.69, "grad_norm": 0.7778660655021667, "learning_rate": 4.640619283318568e-06, "loss": 2.0836, "step": 20600 }, { "epoch": 0.69, "grad_norm": 0.7646399140357971, "learning_rate": 4.639721958476047e-06, "loss": 2.0945, "step": 20601 }, { "epoch": 0.69, "grad_norm": 0.7390388250350952, "learning_rate": 4.63882469419081e-06, "loss": 2.1229, "step": 20602 }, { "epoch": 0.69, "grad_norm": 0.741004467010498, "learning_rate": 4.637927490472986e-06, "loss": 2.0102, "step": 20603 }, { "epoch": 0.69, "grad_norm": 0.7691118121147156, "learning_rate": 4.637030347332719e-06, "loss": 2.069, "step": 20604 }, { "epoch": 0.69, "grad_norm": 0.7195420265197754, "learning_rate": 4.636133264780139e-06, "loss": 2.0681, "step": 20605 }, { "epoch": 0.69, "grad_norm": 0.7475249171257019, "learning_rate": 4.635236242825379e-06, "loss": 1.9803, "step": 20606 }, { "epoch": 0.69, "grad_norm": 0.7600256204605103, "learning_rate": 4.634339281478575e-06, "loss": 2.1122, "step": 20607 }, { "epoch": 0.69, "grad_norm": 0.7564387321472168, "learning_rate": 4.633442380749865e-06, "loss": 2.0599, "step": 20608 }, { "epoch": 0.69, "grad_norm": 0.7315038442611694, "learning_rate": 4.632545540649379e-06, "loss": 1.9785, "step": 20609 }, { "epoch": 0.69, "grad_norm": 0.7411050796508789, "learning_rate": 4.6316487611872426e-06, "loss": 2.0009, "step": 20610 }, { "epoch": 0.69, "grad_norm": 0.7475032210350037, "learning_rate": 4.6307520423735975e-06, "loss": 1.9819, "step": 20611 }, { "epoch": 0.69, "grad_norm": 0.7271531224250793, "learning_rate": 4.6298553842185644e-06, "loss": 2.0071, "step": 20612 }, { "epoch": 0.69, "grad_norm": 0.7877135872840881, "learning_rate": 4.628958786732283e-06, "loss": 2.0297, "step": 20613 }, { "epoch": 0.69, "grad_norm": 0.7407258152961731, "learning_rate": 4.628062249924873e-06, "loss": 2.0624, "step": 20614 }, { "epoch": 0.69, "grad_norm": 0.7576996684074402, "learning_rate": 4.62716577380647e-06, "loss": 2.0162, "step": 20615 }, { "epoch": 0.69, "grad_norm": 0.764274001121521, "learning_rate": 4.6262693583871975e-06, "loss": 2.0156, "step": 20616 }, { "epoch": 0.69, "grad_norm": 0.7336465120315552, "learning_rate": 4.625373003677187e-06, "loss": 2.0306, "step": 20617 }, { "epoch": 0.69, "grad_norm": 0.7349523901939392, "learning_rate": 4.624476709686563e-06, "loss": 2.0718, "step": 20618 }, { "epoch": 0.69, "grad_norm": 0.7525498270988464, "learning_rate": 4.623580476425447e-06, "loss": 2.0632, "step": 20619 }, { "epoch": 0.69, "grad_norm": 0.7286232709884644, "learning_rate": 4.622684303903967e-06, "loss": 2.0319, "step": 20620 }, { "epoch": 0.69, "grad_norm": 0.7352551221847534, "learning_rate": 4.621788192132253e-06, "loss": 1.9962, "step": 20621 }, { "epoch": 0.69, "grad_norm": 0.723606288433075, "learning_rate": 4.620892141120423e-06, "loss": 2.1099, "step": 20622 }, { "epoch": 0.69, "grad_norm": 0.7471742033958435, "learning_rate": 4.619996150878598e-06, "loss": 2.0824, "step": 20623 }, { "epoch": 0.69, "grad_norm": 0.7192665934562683, "learning_rate": 4.619100221416908e-06, "loss": 2.0594, "step": 20624 }, { "epoch": 0.69, "grad_norm": 0.7166378498077393, "learning_rate": 4.618204352745466e-06, "loss": 1.9926, "step": 20625 }, { "epoch": 0.69, "grad_norm": 0.7362362742424011, "learning_rate": 4.6173085448744e-06, "loss": 1.9973, "step": 20626 }, { "epoch": 0.69, "grad_norm": 0.7270293235778809, "learning_rate": 4.616412797813829e-06, "loss": 2.0718, "step": 20627 }, { "epoch": 0.69, "grad_norm": 0.746406614780426, "learning_rate": 4.615517111573867e-06, "loss": 2.0615, "step": 20628 }, { "epoch": 0.69, "grad_norm": 0.7357895374298096, "learning_rate": 4.614621486164636e-06, "loss": 1.9852, "step": 20629 }, { "epoch": 0.69, "grad_norm": 0.7593170404434204, "learning_rate": 4.61372592159626e-06, "loss": 2.0919, "step": 20630 }, { "epoch": 0.69, "grad_norm": 0.7290507555007935, "learning_rate": 4.6128304178788516e-06, "loss": 2.057, "step": 20631 }, { "epoch": 0.69, "grad_norm": 0.7337861061096191, "learning_rate": 4.611934975022524e-06, "loss": 2.0018, "step": 20632 }, { "epoch": 0.69, "grad_norm": 0.7265309691429138, "learning_rate": 4.611039593037402e-06, "loss": 2.1178, "step": 20633 }, { "epoch": 0.69, "grad_norm": 0.7529125809669495, "learning_rate": 4.610144271933592e-06, "loss": 2.0105, "step": 20634 }, { "epoch": 0.69, "grad_norm": 0.7269579172134399, "learning_rate": 4.609249011721216e-06, "loss": 2.047, "step": 20635 }, { "epoch": 0.69, "grad_norm": 0.7493817806243896, "learning_rate": 4.608353812410384e-06, "loss": 2.0359, "step": 20636 }, { "epoch": 0.69, "grad_norm": 0.783760666847229, "learning_rate": 4.607458674011212e-06, "loss": 2.0546, "step": 20637 }, { "epoch": 0.69, "grad_norm": 0.7337034940719604, "learning_rate": 4.606563596533809e-06, "loss": 1.9935, "step": 20638 }, { "epoch": 0.69, "grad_norm": 0.7485724091529846, "learning_rate": 4.605668579988294e-06, "loss": 2.1013, "step": 20639 }, { "epoch": 0.69, "grad_norm": 0.7395462393760681, "learning_rate": 4.604773624384773e-06, "loss": 2.008, "step": 20640 }, { "epoch": 0.69, "grad_norm": 0.7468209266662598, "learning_rate": 4.603878729733355e-06, "loss": 2.0063, "step": 20641 }, { "epoch": 0.69, "grad_norm": 0.743109405040741, "learning_rate": 4.602983896044152e-06, "loss": 2.0565, "step": 20642 }, { "epoch": 0.69, "grad_norm": 0.7304777503013611, "learning_rate": 4.6020891233272766e-06, "loss": 2.089, "step": 20643 }, { "epoch": 0.69, "grad_norm": 0.7182857990264893, "learning_rate": 4.601194411592836e-06, "loss": 1.9974, "step": 20644 }, { "epoch": 0.69, "grad_norm": 0.7245355248451233, "learning_rate": 4.600299760850933e-06, "loss": 2.1012, "step": 20645 }, { "epoch": 0.69, "grad_norm": 0.7487111687660217, "learning_rate": 4.599405171111683e-06, "loss": 2.031, "step": 20646 }, { "epoch": 0.69, "grad_norm": 0.7455403804779053, "learning_rate": 4.598510642385184e-06, "loss": 2.0316, "step": 20647 }, { "epoch": 0.69, "grad_norm": 0.7576206922531128, "learning_rate": 4.597616174681551e-06, "loss": 2.1144, "step": 20648 }, { "epoch": 0.69, "grad_norm": 0.7099148035049438, "learning_rate": 4.596721768010883e-06, "loss": 2.0355, "step": 20649 }, { "epoch": 0.69, "grad_norm": 0.7514818906784058, "learning_rate": 4.595827422383282e-06, "loss": 2.0929, "step": 20650 }, { "epoch": 0.69, "grad_norm": 0.7305217981338501, "learning_rate": 4.594933137808857e-06, "loss": 2.0168, "step": 20651 }, { "epoch": 0.69, "grad_norm": 0.7320857644081116, "learning_rate": 4.5940389142977125e-06, "loss": 2.0652, "step": 20652 }, { "epoch": 0.69, "grad_norm": 0.7518050670623779, "learning_rate": 4.593144751859948e-06, "loss": 2.1426, "step": 20653 }, { "epoch": 0.69, "grad_norm": 0.7550265192985535, "learning_rate": 4.592250650505662e-06, "loss": 2.0469, "step": 20654 }, { "epoch": 0.69, "grad_norm": 0.749140739440918, "learning_rate": 4.5913566102449625e-06, "loss": 2.0679, "step": 20655 }, { "epoch": 0.69, "grad_norm": 0.740583598613739, "learning_rate": 4.5904626310879415e-06, "loss": 2.1177, "step": 20656 }, { "epoch": 0.69, "grad_norm": 0.7535557150840759, "learning_rate": 4.5895687130447085e-06, "loss": 2.1063, "step": 20657 }, { "epoch": 0.69, "grad_norm": 0.7353165745735168, "learning_rate": 4.588674856125353e-06, "loss": 2.049, "step": 20658 }, { "epoch": 0.69, "grad_norm": 0.7539626359939575, "learning_rate": 4.587781060339982e-06, "loss": 2.0784, "step": 20659 }, { "epoch": 0.69, "grad_norm": 0.7740181088447571, "learning_rate": 4.586887325698684e-06, "loss": 2.1294, "step": 20660 }, { "epoch": 0.69, "grad_norm": 0.7257598042488098, "learning_rate": 4.585993652211565e-06, "loss": 2.0579, "step": 20661 }, { "epoch": 0.69, "grad_norm": 0.757351815700531, "learning_rate": 4.585100039888718e-06, "loss": 2.0183, "step": 20662 }, { "epoch": 0.69, "grad_norm": 0.7439040541648865, "learning_rate": 4.584206488740231e-06, "loss": 1.983, "step": 20663 }, { "epoch": 0.69, "grad_norm": 0.7441537380218506, "learning_rate": 4.583312998776207e-06, "loss": 2.0595, "step": 20664 }, { "epoch": 0.69, "grad_norm": 0.7405146956443787, "learning_rate": 4.582419570006742e-06, "loss": 2.0092, "step": 20665 }, { "epoch": 0.69, "grad_norm": 0.7386098504066467, "learning_rate": 4.581526202441925e-06, "loss": 2.0385, "step": 20666 }, { "epoch": 0.69, "grad_norm": 0.7512227892875671, "learning_rate": 4.580632896091845e-06, "loss": 2.0219, "step": 20667 }, { "epoch": 0.69, "grad_norm": 0.738365888595581, "learning_rate": 4.5797396509666035e-06, "loss": 2.0976, "step": 20668 }, { "epoch": 0.69, "grad_norm": 0.728626012802124, "learning_rate": 4.578846467076283e-06, "loss": 2.0452, "step": 20669 }, { "epoch": 0.69, "grad_norm": 0.7077351808547974, "learning_rate": 4.577953344430981e-06, "loss": 1.9964, "step": 20670 }, { "epoch": 0.69, "grad_norm": 0.7818958759307861, "learning_rate": 4.577060283040785e-06, "loss": 2.0141, "step": 20671 }, { "epoch": 0.69, "grad_norm": 0.7469938397407532, "learning_rate": 4.57616728291578e-06, "loss": 2.0579, "step": 20672 }, { "epoch": 0.69, "grad_norm": 0.7606632113456726, "learning_rate": 4.5752743440660586e-06, "loss": 2.0733, "step": 20673 }, { "epoch": 0.69, "grad_norm": 0.7833924293518066, "learning_rate": 4.574381466501711e-06, "loss": 2.0533, "step": 20674 }, { "epoch": 0.69, "grad_norm": 0.7717723250389099, "learning_rate": 4.5734886502328236e-06, "loss": 2.0557, "step": 20675 }, { "epoch": 0.69, "grad_norm": 0.7466371059417725, "learning_rate": 4.572595895269476e-06, "loss": 2.0967, "step": 20676 }, { "epoch": 0.69, "grad_norm": 0.746803343296051, "learning_rate": 4.5717032016217635e-06, "loss": 1.9864, "step": 20677 }, { "epoch": 0.69, "grad_norm": 0.7636306881904602, "learning_rate": 4.5708105692997625e-06, "loss": 2.0232, "step": 20678 }, { "epoch": 0.69, "grad_norm": 0.7452732920646667, "learning_rate": 4.569917998313567e-06, "loss": 2.0539, "step": 20679 }, { "epoch": 0.69, "grad_norm": 0.736293375492096, "learning_rate": 4.569025488673251e-06, "loss": 2.051, "step": 20680 }, { "epoch": 0.69, "grad_norm": 0.7503386735916138, "learning_rate": 4.568133040388906e-06, "loss": 1.9707, "step": 20681 }, { "epoch": 0.69, "grad_norm": 0.7489938735961914, "learning_rate": 4.567240653470607e-06, "loss": 1.9514, "step": 20682 }, { "epoch": 0.69, "grad_norm": 0.7505583763122559, "learning_rate": 4.566348327928439e-06, "loss": 2.0382, "step": 20683 }, { "epoch": 0.69, "grad_norm": 0.7644677758216858, "learning_rate": 4.565456063772491e-06, "loss": 2.0597, "step": 20684 }, { "epoch": 0.69, "grad_norm": 0.7377408742904663, "learning_rate": 4.564563861012827e-06, "loss": 2.0417, "step": 20685 }, { "epoch": 0.69, "grad_norm": 0.778724730014801, "learning_rate": 4.563671719659536e-06, "loss": 2.0493, "step": 20686 }, { "epoch": 0.69, "grad_norm": 0.7527204751968384, "learning_rate": 4.5627796397227e-06, "loss": 2.0696, "step": 20687 }, { "epoch": 0.69, "grad_norm": 0.7249613404273987, "learning_rate": 4.561887621212392e-06, "loss": 2.0366, "step": 20688 }, { "epoch": 0.69, "grad_norm": 0.7465932965278625, "learning_rate": 4.560995664138687e-06, "loss": 2.0244, "step": 20689 }, { "epoch": 0.69, "grad_norm": 0.7409705519676208, "learning_rate": 4.560103768511669e-06, "loss": 2.0766, "step": 20690 }, { "epoch": 0.69, "grad_norm": 0.7407171726226807, "learning_rate": 4.5592119343414074e-06, "loss": 2.0528, "step": 20691 }, { "epoch": 0.69, "grad_norm": 0.7700774073600769, "learning_rate": 4.55832016163798e-06, "loss": 2.0649, "step": 20692 }, { "epoch": 0.69, "grad_norm": 0.816064178943634, "learning_rate": 4.557428450411471e-06, "loss": 2.0646, "step": 20693 }, { "epoch": 0.69, "grad_norm": 0.746732771396637, "learning_rate": 4.556536800671938e-06, "loss": 2.0337, "step": 20694 }, { "epoch": 0.69, "grad_norm": 0.7390273213386536, "learning_rate": 4.555645212429461e-06, "loss": 2.0344, "step": 20695 }, { "epoch": 0.69, "grad_norm": 0.7555868625640869, "learning_rate": 4.554753685694118e-06, "loss": 2.0636, "step": 20696 }, { "epoch": 0.69, "grad_norm": 0.7718966603279114, "learning_rate": 4.553862220475976e-06, "loss": 2.0908, "step": 20697 }, { "epoch": 0.69, "grad_norm": 0.8078123927116394, "learning_rate": 4.552970816785105e-06, "loss": 2.0931, "step": 20698 }, { "epoch": 0.69, "grad_norm": 0.7131874561309814, "learning_rate": 4.552079474631576e-06, "loss": 2.1001, "step": 20699 }, { "epoch": 0.69, "grad_norm": 0.7474454045295715, "learning_rate": 4.551188194025464e-06, "loss": 2.0712, "step": 20700 }, { "epoch": 0.69, "grad_norm": 0.7461369037628174, "learning_rate": 4.5502969749768345e-06, "loss": 2.0215, "step": 20701 }, { "epoch": 0.69, "grad_norm": 0.7163628339767456, "learning_rate": 4.549405817495753e-06, "loss": 2.0817, "step": 20702 }, { "epoch": 0.69, "grad_norm": 0.7464097142219543, "learning_rate": 4.548514721592293e-06, "loss": 2.1172, "step": 20703 }, { "epoch": 0.69, "grad_norm": 0.7577479481697083, "learning_rate": 4.547623687276516e-06, "loss": 2.0232, "step": 20704 }, { "epoch": 0.69, "grad_norm": 0.7378754019737244, "learning_rate": 4.54673271455849e-06, "loss": 2.0562, "step": 20705 }, { "epoch": 0.69, "grad_norm": 0.743084192276001, "learning_rate": 4.5458418034482895e-06, "loss": 2.0392, "step": 20706 }, { "epoch": 0.69, "grad_norm": 0.744170606136322, "learning_rate": 4.544950953955966e-06, "loss": 2.0033, "step": 20707 }, { "epoch": 0.69, "grad_norm": 0.7399832010269165, "learning_rate": 4.544060166091589e-06, "loss": 1.9628, "step": 20708 }, { "epoch": 0.69, "grad_norm": 0.7041359543800354, "learning_rate": 4.5431694398652244e-06, "loss": 2.0099, "step": 20709 }, { "epoch": 0.69, "grad_norm": 0.7466070652008057, "learning_rate": 4.542278775286936e-06, "loss": 2.0641, "step": 20710 }, { "epoch": 0.69, "grad_norm": 0.7509164810180664, "learning_rate": 4.541388172366779e-06, "loss": 2.046, "step": 20711 }, { "epoch": 0.69, "grad_norm": 0.787859320640564, "learning_rate": 4.540497631114822e-06, "loss": 1.9834, "step": 20712 }, { "epoch": 0.69, "grad_norm": 0.7788780927658081, "learning_rate": 4.539607151541121e-06, "loss": 2.0329, "step": 20713 }, { "epoch": 0.69, "grad_norm": 0.7453721761703491, "learning_rate": 4.538716733655737e-06, "loss": 2.061, "step": 20714 }, { "epoch": 0.69, "grad_norm": 0.73618084192276, "learning_rate": 4.537826377468735e-06, "loss": 2.0339, "step": 20715 }, { "epoch": 0.69, "grad_norm": 0.7471417188644409, "learning_rate": 4.53693608299017e-06, "loss": 2.0238, "step": 20716 }, { "epoch": 0.69, "grad_norm": 0.7380427122116089, "learning_rate": 4.536045850230095e-06, "loss": 2.0307, "step": 20717 }, { "epoch": 0.69, "grad_norm": 0.7479676604270935, "learning_rate": 4.535155679198575e-06, "loss": 2.0112, "step": 20718 }, { "epoch": 0.69, "grad_norm": 0.7329914569854736, "learning_rate": 4.5342655699056644e-06, "loss": 2.0499, "step": 20719 }, { "epoch": 0.69, "grad_norm": 0.733182966709137, "learning_rate": 4.533375522361415e-06, "loss": 2.0208, "step": 20720 }, { "epoch": 0.69, "grad_norm": 0.7644082307815552, "learning_rate": 4.5324855365758836e-06, "loss": 1.9936, "step": 20721 }, { "epoch": 0.69, "grad_norm": 0.7468414306640625, "learning_rate": 4.531595612559131e-06, "loss": 1.9706, "step": 20722 }, { "epoch": 0.69, "grad_norm": 0.7537177205085754, "learning_rate": 4.530705750321203e-06, "loss": 2.0651, "step": 20723 }, { "epoch": 0.69, "grad_norm": 0.7326295971870422, "learning_rate": 4.52981594987216e-06, "loss": 2.0244, "step": 20724 }, { "epoch": 0.69, "grad_norm": 0.7241556644439697, "learning_rate": 4.528926211222049e-06, "loss": 2.0724, "step": 20725 }, { "epoch": 0.69, "grad_norm": 0.7399282455444336, "learning_rate": 4.52803653438092e-06, "loss": 2.0541, "step": 20726 }, { "epoch": 0.69, "grad_norm": 0.7799809575080872, "learning_rate": 4.527146919358828e-06, "loss": 2.0187, "step": 20727 }, { "epoch": 0.69, "grad_norm": 0.7700902223587036, "learning_rate": 4.52625736616583e-06, "loss": 2.157, "step": 20728 }, { "epoch": 0.69, "grad_norm": 0.7256603837013245, "learning_rate": 4.525367874811961e-06, "loss": 2.0672, "step": 20729 }, { "epoch": 0.69, "grad_norm": 0.7290663719177246, "learning_rate": 4.5244784453072766e-06, "loss": 2.0412, "step": 20730 }, { "epoch": 0.69, "grad_norm": 0.7704488039016724, "learning_rate": 4.523589077661831e-06, "loss": 2.0808, "step": 20731 }, { "epoch": 0.69, "grad_norm": 0.7384892106056213, "learning_rate": 4.5226997718856645e-06, "loss": 1.999, "step": 20732 }, { "epoch": 0.69, "grad_norm": 0.7603267431259155, "learning_rate": 4.521810527988824e-06, "loss": 2.0581, "step": 20733 }, { "epoch": 0.69, "grad_norm": 0.7297340631484985, "learning_rate": 4.5209213459813605e-06, "loss": 2.0398, "step": 20734 }, { "epoch": 0.69, "grad_norm": 0.7596359252929688, "learning_rate": 4.520032225873313e-06, "loss": 2.0853, "step": 20735 }, { "epoch": 0.69, "grad_norm": 0.7111049294471741, "learning_rate": 4.5191431676747296e-06, "loss": 1.9701, "step": 20736 }, { "epoch": 0.69, "grad_norm": 0.7719289660453796, "learning_rate": 4.518254171395659e-06, "loss": 1.9509, "step": 20737 }, { "epoch": 0.69, "grad_norm": 0.7342584729194641, "learning_rate": 4.51736523704614e-06, "loss": 2.0419, "step": 20738 }, { "epoch": 0.69, "grad_norm": 0.7334843277931213, "learning_rate": 4.516476364636211e-06, "loss": 1.9935, "step": 20739 }, { "epoch": 0.69, "grad_norm": 0.7519233226776123, "learning_rate": 4.515587554175922e-06, "loss": 2.0898, "step": 20740 }, { "epoch": 0.69, "grad_norm": 0.7534491419792175, "learning_rate": 4.514698805675311e-06, "loss": 2.0472, "step": 20741 }, { "epoch": 0.69, "grad_norm": 0.7541595101356506, "learning_rate": 4.513810119144415e-06, "loss": 2.0152, "step": 20742 }, { "epoch": 0.69, "grad_norm": 0.7310873866081238, "learning_rate": 4.512921494593276e-06, "loss": 2.062, "step": 20743 }, { "epoch": 0.69, "grad_norm": 0.7659986615180969, "learning_rate": 4.512032932031939e-06, "loss": 1.9964, "step": 20744 }, { "epoch": 0.69, "grad_norm": 0.743319571018219, "learning_rate": 4.5111444314704334e-06, "loss": 2.053, "step": 20745 }, { "epoch": 0.69, "grad_norm": 0.7624233365058899, "learning_rate": 4.510255992918805e-06, "loss": 2.0479, "step": 20746 }, { "epoch": 0.69, "grad_norm": 0.7290558815002441, "learning_rate": 4.509367616387087e-06, "loss": 2.0114, "step": 20747 }, { "epoch": 0.69, "grad_norm": 0.7348864674568176, "learning_rate": 4.508479301885312e-06, "loss": 2.0528, "step": 20748 }, { "epoch": 0.69, "grad_norm": 0.7290246486663818, "learning_rate": 4.50759104942352e-06, "loss": 2.0978, "step": 20749 }, { "epoch": 0.69, "grad_norm": 0.7566142082214355, "learning_rate": 4.5067028590117525e-06, "loss": 1.9224, "step": 20750 }, { "epoch": 0.69, "grad_norm": 0.7833240628242493, "learning_rate": 4.50581473066003e-06, "loss": 2.0714, "step": 20751 }, { "epoch": 0.69, "grad_norm": 0.7549216151237488, "learning_rate": 4.504926664378392e-06, "loss": 2.029, "step": 20752 }, { "epoch": 0.69, "grad_norm": 0.7524871230125427, "learning_rate": 4.504038660176876e-06, "loss": 2.0943, "step": 20753 }, { "epoch": 0.69, "grad_norm": 0.7237919569015503, "learning_rate": 4.503150718065507e-06, "loss": 2.0462, "step": 20754 }, { "epoch": 0.69, "grad_norm": 0.7428598403930664, "learning_rate": 4.502262838054322e-06, "loss": 2.1008, "step": 20755 }, { "epoch": 0.69, "grad_norm": 0.7651428580284119, "learning_rate": 4.501375020153351e-06, "loss": 2.0361, "step": 20756 }, { "epoch": 0.69, "grad_norm": 0.7211928963661194, "learning_rate": 4.500487264372618e-06, "loss": 2.0436, "step": 20757 }, { "epoch": 0.69, "grad_norm": 0.7370629906654358, "learning_rate": 4.4995995707221574e-06, "loss": 1.9601, "step": 20758 }, { "epoch": 0.69, "grad_norm": 0.7417383790016174, "learning_rate": 4.4987119392120005e-06, "loss": 2.0799, "step": 20759 }, { "epoch": 0.69, "grad_norm": 0.7371569871902466, "learning_rate": 4.497824369852173e-06, "loss": 2.0137, "step": 20760 }, { "epoch": 0.69, "grad_norm": 0.7550243139266968, "learning_rate": 4.496936862652697e-06, "loss": 2.1249, "step": 20761 }, { "epoch": 0.69, "grad_norm": 0.7530125379562378, "learning_rate": 4.496049417623606e-06, "loss": 1.9857, "step": 20762 }, { "epoch": 0.69, "grad_norm": 0.7377340793609619, "learning_rate": 4.495162034774923e-06, "loss": 2.0132, "step": 20763 }, { "epoch": 0.69, "grad_norm": 0.735969603061676, "learning_rate": 4.494274714116671e-06, "loss": 1.982, "step": 20764 }, { "epoch": 0.69, "grad_norm": 0.7294069528579712, "learning_rate": 4.4933874556588755e-06, "loss": 2.0431, "step": 20765 }, { "epoch": 0.69, "grad_norm": 0.7481253743171692, "learning_rate": 4.492500259411565e-06, "loss": 2.0991, "step": 20766 }, { "epoch": 0.69, "grad_norm": 0.7536741495132446, "learning_rate": 4.491613125384756e-06, "loss": 1.971, "step": 20767 }, { "epoch": 0.69, "grad_norm": 0.7384999394416809, "learning_rate": 4.4907260535884766e-06, "loss": 2.0145, "step": 20768 }, { "epoch": 0.69, "grad_norm": 0.7133184671401978, "learning_rate": 4.489839044032746e-06, "loss": 2.0348, "step": 20769 }, { "epoch": 0.69, "grad_norm": 0.7517434358596802, "learning_rate": 4.4889520967275806e-06, "loss": 1.9865, "step": 20770 }, { "epoch": 0.69, "grad_norm": 0.7638693451881409, "learning_rate": 4.4880652116830046e-06, "loss": 2.0911, "step": 20771 }, { "epoch": 0.69, "grad_norm": 0.7233844995498657, "learning_rate": 4.487178388909045e-06, "loss": 2.031, "step": 20772 }, { "epoch": 0.69, "grad_norm": 0.7490777969360352, "learning_rate": 4.486291628415705e-06, "loss": 2.0302, "step": 20773 }, { "epoch": 0.69, "grad_norm": 0.777400553226471, "learning_rate": 4.485404930213012e-06, "loss": 2.0361, "step": 20774 }, { "epoch": 0.69, "grad_norm": 0.759142279624939, "learning_rate": 4.484518294310985e-06, "loss": 2.0815, "step": 20775 }, { "epoch": 0.69, "grad_norm": 0.7801027894020081, "learning_rate": 4.483631720719635e-06, "loss": 2.1633, "step": 20776 }, { "epoch": 0.69, "grad_norm": 0.7248302698135376, "learning_rate": 4.482745209448985e-06, "loss": 2.0487, "step": 20777 }, { "epoch": 0.69, "grad_norm": 0.7488502860069275, "learning_rate": 4.481858760509046e-06, "loss": 2.0261, "step": 20778 }, { "epoch": 0.69, "grad_norm": 0.7612342834472656, "learning_rate": 4.480972373909827e-06, "loss": 2.1106, "step": 20779 }, { "epoch": 0.69, "grad_norm": 0.7591012716293335, "learning_rate": 4.480086049661351e-06, "loss": 2.0186, "step": 20780 }, { "epoch": 0.69, "grad_norm": 0.759454607963562, "learning_rate": 4.4791997877736295e-06, "loss": 2.0683, "step": 20781 }, { "epoch": 0.69, "grad_norm": 0.7688586115837097, "learning_rate": 4.478313588256673e-06, "loss": 2.1423, "step": 20782 }, { "epoch": 0.69, "grad_norm": 0.7735649347305298, "learning_rate": 4.477427451120491e-06, "loss": 2.0813, "step": 20783 }, { "epoch": 0.69, "grad_norm": 0.7577570676803589, "learning_rate": 4.476541376375096e-06, "loss": 2.0337, "step": 20784 }, { "epoch": 0.69, "grad_norm": 0.7704837322235107, "learning_rate": 4.475655364030503e-06, "loss": 2.0298, "step": 20785 }, { "epoch": 0.69, "grad_norm": 0.7391753792762756, "learning_rate": 4.47476941409672e-06, "loss": 2.0523, "step": 20786 }, { "epoch": 0.69, "grad_norm": 0.7368494272232056, "learning_rate": 4.473883526583749e-06, "loss": 1.9872, "step": 20787 }, { "epoch": 0.69, "grad_norm": 0.7604776620864868, "learning_rate": 4.472997701501607e-06, "loss": 2.0781, "step": 20788 }, { "epoch": 0.69, "grad_norm": 0.7496863603591919, "learning_rate": 4.472111938860294e-06, "loss": 2.0919, "step": 20789 }, { "epoch": 0.69, "grad_norm": 0.7324661612510681, "learning_rate": 4.4712262386698245e-06, "loss": 2.0437, "step": 20790 }, { "epoch": 0.69, "grad_norm": 0.7379403710365295, "learning_rate": 4.470340600940202e-06, "loss": 1.989, "step": 20791 }, { "epoch": 0.69, "grad_norm": 0.7646297812461853, "learning_rate": 4.469455025681425e-06, "loss": 1.9787, "step": 20792 }, { "epoch": 0.69, "grad_norm": 0.7677327394485474, "learning_rate": 4.468569512903506e-06, "loss": 2.0304, "step": 20793 }, { "epoch": 0.69, "grad_norm": 0.742173433303833, "learning_rate": 4.4676840626164515e-06, "loss": 2.0636, "step": 20794 }, { "epoch": 0.69, "grad_norm": 0.756023108959198, "learning_rate": 4.466798674830255e-06, "loss": 2.0003, "step": 20795 }, { "epoch": 0.69, "grad_norm": 0.755954921245575, "learning_rate": 4.465913349554923e-06, "loss": 2.086, "step": 20796 }, { "epoch": 0.69, "grad_norm": 0.7673295736312866, "learning_rate": 4.465028086800464e-06, "loss": 2.0671, "step": 20797 }, { "epoch": 0.69, "grad_norm": 0.7505176663398743, "learning_rate": 4.4641428865768685e-06, "loss": 2.0125, "step": 20798 }, { "epoch": 0.69, "grad_norm": 0.7369430661201477, "learning_rate": 4.463257748894147e-06, "loss": 2.0274, "step": 20799 }, { "epoch": 0.69, "grad_norm": 0.7497841119766235, "learning_rate": 4.462372673762291e-06, "loss": 2.033, "step": 20800 }, { "epoch": 0.69, "grad_norm": 0.7442204356193542, "learning_rate": 4.461487661191307e-06, "loss": 1.992, "step": 20801 }, { "epoch": 0.69, "grad_norm": 0.7401643991470337, "learning_rate": 4.460602711191185e-06, "loss": 2.0354, "step": 20802 }, { "epoch": 0.69, "grad_norm": 0.7482219338417053, "learning_rate": 4.459717823771932e-06, "loss": 2.0555, "step": 20803 }, { "epoch": 0.69, "grad_norm": 0.7713789343833923, "learning_rate": 4.458832998943539e-06, "loss": 2.0459, "step": 20804 }, { "epoch": 0.69, "grad_norm": 0.7458503842353821, "learning_rate": 4.457948236716e-06, "loss": 2.0243, "step": 20805 }, { "epoch": 0.69, "grad_norm": 0.8652258515357971, "learning_rate": 4.457063537099314e-06, "loss": 2.1076, "step": 20806 }, { "epoch": 0.69, "grad_norm": 0.7668341994285583, "learning_rate": 4.456178900103479e-06, "loss": 2.1124, "step": 20807 }, { "epoch": 0.69, "grad_norm": 0.7421086430549622, "learning_rate": 4.4552943257384865e-06, "loss": 2.0411, "step": 20808 }, { "epoch": 0.69, "grad_norm": 0.7413544058799744, "learning_rate": 4.4544098140143245e-06, "loss": 2.0997, "step": 20809 }, { "epoch": 0.69, "grad_norm": 0.7332929372787476, "learning_rate": 4.453525364940995e-06, "loss": 2.0415, "step": 20810 }, { "epoch": 0.69, "grad_norm": 0.7543869614601135, "learning_rate": 4.4526409785284805e-06, "loss": 2.0877, "step": 20811 }, { "epoch": 0.69, "grad_norm": 0.7388126254081726, "learning_rate": 4.451756654786782e-06, "loss": 2.0428, "step": 20812 }, { "epoch": 0.69, "grad_norm": 0.737076461315155, "learning_rate": 4.4508723937258845e-06, "loss": 2.0285, "step": 20813 }, { "epoch": 0.69, "grad_norm": 0.7427147626876831, "learning_rate": 4.449988195355775e-06, "loss": 2.06, "step": 20814 }, { "epoch": 0.69, "grad_norm": 0.8073004484176636, "learning_rate": 4.449104059686446e-06, "loss": 2.0009, "step": 20815 }, { "epoch": 0.69, "grad_norm": 0.7563554644584656, "learning_rate": 4.448219986727892e-06, "loss": 2.083, "step": 20816 }, { "epoch": 0.69, "grad_norm": 0.7553269267082214, "learning_rate": 4.447335976490092e-06, "loss": 2.0941, "step": 20817 }, { "epoch": 0.69, "grad_norm": 0.7905998229980469, "learning_rate": 4.446452028983034e-06, "loss": 2.1161, "step": 20818 }, { "epoch": 0.69, "grad_norm": 0.7428863644599915, "learning_rate": 4.44556814421671e-06, "loss": 1.9842, "step": 20819 }, { "epoch": 0.69, "grad_norm": 0.764682412147522, "learning_rate": 4.444684322201097e-06, "loss": 2.0279, "step": 20820 }, { "epoch": 0.69, "grad_norm": 0.7824804186820984, "learning_rate": 4.44380056294619e-06, "loss": 2.0965, "step": 20821 }, { "epoch": 0.69, "grad_norm": 0.7619942426681519, "learning_rate": 4.4429168664619636e-06, "loss": 2.0963, "step": 20822 }, { "epoch": 0.69, "grad_norm": 0.8125794529914856, "learning_rate": 4.442033232758409e-06, "loss": 2.0527, "step": 20823 }, { "epoch": 0.69, "grad_norm": 0.7460378408432007, "learning_rate": 4.441149661845502e-06, "loss": 2.0014, "step": 20824 }, { "epoch": 0.69, "grad_norm": 0.7442352175712585, "learning_rate": 4.440266153733232e-06, "loss": 2.03, "step": 20825 }, { "epoch": 0.69, "grad_norm": 0.786788284778595, "learning_rate": 4.4393827084315774e-06, "loss": 2.0477, "step": 20826 }, { "epoch": 0.69, "grad_norm": 0.7161422967910767, "learning_rate": 4.438499325950514e-06, "loss": 2.0111, "step": 20827 }, { "epoch": 0.69, "grad_norm": 0.7410697340965271, "learning_rate": 4.437616006300025e-06, "loss": 2.0749, "step": 20828 }, { "epoch": 0.69, "grad_norm": 0.719542384147644, "learning_rate": 4.436732749490096e-06, "loss": 2.0226, "step": 20829 }, { "epoch": 0.69, "grad_norm": 0.7266137599945068, "learning_rate": 4.435849555530698e-06, "loss": 1.9702, "step": 20830 }, { "epoch": 0.69, "grad_norm": 0.7762044668197632, "learning_rate": 4.434966424431809e-06, "loss": 2.1038, "step": 20831 }, { "epoch": 0.69, "grad_norm": 0.7996153831481934, "learning_rate": 4.4340833562034105e-06, "loss": 2.0618, "step": 20832 }, { "epoch": 0.69, "grad_norm": 0.7810563445091248, "learning_rate": 4.433200350855472e-06, "loss": 2.0361, "step": 20833 }, { "epoch": 0.69, "grad_norm": 0.7641668915748596, "learning_rate": 4.432317408397978e-06, "loss": 2.0255, "step": 20834 }, { "epoch": 0.69, "grad_norm": 0.783947765827179, "learning_rate": 4.431434528840899e-06, "loss": 2.0733, "step": 20835 }, { "epoch": 0.69, "grad_norm": 0.743037223815918, "learning_rate": 4.430551712194206e-06, "loss": 2.0015, "step": 20836 }, { "epoch": 0.69, "grad_norm": 0.7233148217201233, "learning_rate": 4.429668958467875e-06, "loss": 1.9739, "step": 20837 }, { "epoch": 0.69, "grad_norm": 0.7716609835624695, "learning_rate": 4.428786267671883e-06, "loss": 2.058, "step": 20838 }, { "epoch": 0.69, "grad_norm": 0.7279636859893799, "learning_rate": 4.427903639816199e-06, "loss": 1.9954, "step": 20839 }, { "epoch": 0.69, "grad_norm": 0.7316717505455017, "learning_rate": 4.427021074910791e-06, "loss": 2.1015, "step": 20840 }, { "epoch": 0.69, "grad_norm": 0.7835211753845215, "learning_rate": 4.426138572965636e-06, "loss": 2.1206, "step": 20841 }, { "epoch": 0.69, "grad_norm": 0.7458429336547852, "learning_rate": 4.425256133990697e-06, "loss": 1.9581, "step": 20842 }, { "epoch": 0.69, "grad_norm": 0.7534080147743225, "learning_rate": 4.4243737579959514e-06, "loss": 2.0013, "step": 20843 }, { "epoch": 0.69, "grad_norm": 0.7578807473182678, "learning_rate": 4.423491444991359e-06, "loss": 2.0306, "step": 20844 }, { "epoch": 0.69, "grad_norm": 0.7501322031021118, "learning_rate": 4.422609194986896e-06, "loss": 2.0157, "step": 20845 }, { "epoch": 0.69, "grad_norm": 0.7568397521972656, "learning_rate": 4.421727007992521e-06, "loss": 2.0421, "step": 20846 }, { "epoch": 0.69, "grad_norm": 0.7529815435409546, "learning_rate": 4.42084488401821e-06, "loss": 2.0531, "step": 20847 }, { "epoch": 0.69, "grad_norm": 0.7425066232681274, "learning_rate": 4.419962823073924e-06, "loss": 2.0758, "step": 20848 }, { "epoch": 0.69, "grad_norm": 0.730161726474762, "learning_rate": 4.419080825169623e-06, "loss": 2.0353, "step": 20849 }, { "epoch": 0.69, "grad_norm": 0.7719929814338684, "learning_rate": 4.418198890315277e-06, "loss": 2.0996, "step": 20850 }, { "epoch": 0.69, "grad_norm": 0.7241494059562683, "learning_rate": 4.417317018520852e-06, "loss": 2.0753, "step": 20851 }, { "epoch": 0.69, "grad_norm": 0.7702763080596924, "learning_rate": 4.416435209796308e-06, "loss": 2.0619, "step": 20852 }, { "epoch": 0.69, "grad_norm": 0.7326291799545288, "learning_rate": 4.415553464151603e-06, "loss": 2.0125, "step": 20853 }, { "epoch": 0.69, "grad_norm": 0.7589186429977417, "learning_rate": 4.414671781596705e-06, "loss": 2.025, "step": 20854 }, { "epoch": 0.69, "grad_norm": 0.8006117939949036, "learning_rate": 4.413790162141569e-06, "loss": 2.0227, "step": 20855 }, { "epoch": 0.69, "grad_norm": 0.7472789883613586, "learning_rate": 4.412908605796161e-06, "loss": 2.0299, "step": 20856 }, { "epoch": 0.69, "grad_norm": 0.7466250061988831, "learning_rate": 4.412027112570438e-06, "loss": 2.0088, "step": 20857 }, { "epoch": 0.69, "grad_norm": 0.7326253652572632, "learning_rate": 4.411145682474354e-06, "loss": 2.0088, "step": 20858 }, { "epoch": 0.69, "grad_norm": 0.7280779480934143, "learning_rate": 4.410264315517869e-06, "loss": 2.0634, "step": 20859 }, { "epoch": 0.69, "grad_norm": 0.743938684463501, "learning_rate": 4.409383011710948e-06, "loss": 2.0153, "step": 20860 }, { "epoch": 0.69, "grad_norm": 0.739898145198822, "learning_rate": 4.40850177106354e-06, "loss": 2.0658, "step": 20861 }, { "epoch": 0.69, "grad_norm": 0.7107728123664856, "learning_rate": 4.407620593585598e-06, "loss": 2.0208, "step": 20862 }, { "epoch": 0.69, "grad_norm": 0.7273580431938171, "learning_rate": 4.406739479287085e-06, "loss": 2.0246, "step": 20863 }, { "epoch": 0.69, "grad_norm": 0.7113926410675049, "learning_rate": 4.405858428177949e-06, "loss": 2.0506, "step": 20864 }, { "epoch": 0.69, "grad_norm": 0.789524495601654, "learning_rate": 4.404977440268149e-06, "loss": 2.0891, "step": 20865 }, { "epoch": 0.69, "grad_norm": 0.7280378937721252, "learning_rate": 4.404096515567631e-06, "loss": 1.9893, "step": 20866 }, { "epoch": 0.69, "grad_norm": 0.7493146061897278, "learning_rate": 4.403215654086353e-06, "loss": 2.079, "step": 20867 }, { "epoch": 0.69, "grad_norm": 0.7611211538314819, "learning_rate": 4.402334855834264e-06, "loss": 2.1065, "step": 20868 }, { "epoch": 0.69, "grad_norm": 0.7188528776168823, "learning_rate": 4.401454120821317e-06, "loss": 2.0156, "step": 20869 }, { "epoch": 0.69, "grad_norm": 0.7213507890701294, "learning_rate": 4.400573449057461e-06, "loss": 1.993, "step": 20870 }, { "epoch": 0.69, "grad_norm": 0.7683514356613159, "learning_rate": 4.39969284055264e-06, "loss": 2.0045, "step": 20871 }, { "epoch": 0.69, "grad_norm": 0.7380040287971497, "learning_rate": 4.398812295316808e-06, "loss": 2.0441, "step": 20872 }, { "epoch": 0.69, "grad_norm": 0.7355031967163086, "learning_rate": 4.397931813359916e-06, "loss": 2.0502, "step": 20873 }, { "epoch": 0.69, "grad_norm": 0.7478317022323608, "learning_rate": 4.397051394691906e-06, "loss": 2.0626, "step": 20874 }, { "epoch": 0.69, "grad_norm": 0.7597023844718933, "learning_rate": 4.396171039322723e-06, "loss": 2.0386, "step": 20875 }, { "epoch": 0.69, "grad_norm": 0.7457597851753235, "learning_rate": 4.395290747262318e-06, "loss": 2.0561, "step": 20876 }, { "epoch": 0.69, "grad_norm": 0.7545474171638489, "learning_rate": 4.3944105185206305e-06, "loss": 2.1344, "step": 20877 }, { "epoch": 0.69, "grad_norm": 0.7520078420639038, "learning_rate": 4.393530353107612e-06, "loss": 2.0438, "step": 20878 }, { "epoch": 0.69, "grad_norm": 0.763316810131073, "learning_rate": 4.3926502510332015e-06, "loss": 2.1304, "step": 20879 }, { "epoch": 0.69, "grad_norm": 0.7145616412162781, "learning_rate": 4.391770212307339e-06, "loss": 2.0254, "step": 20880 }, { "epoch": 0.69, "grad_norm": 0.7690443396568298, "learning_rate": 4.390890236939969e-06, "loss": 2.1471, "step": 20881 }, { "epoch": 0.69, "grad_norm": 0.7353788614273071, "learning_rate": 4.390010324941038e-06, "loss": 1.9803, "step": 20882 }, { "epoch": 0.69, "grad_norm": 0.7601040005683899, "learning_rate": 4.389130476320483e-06, "loss": 2.1161, "step": 20883 }, { "epoch": 0.69, "grad_norm": 0.7856127619743347, "learning_rate": 4.388250691088238e-06, "loss": 2.0307, "step": 20884 }, { "epoch": 0.69, "grad_norm": 0.7432969212532043, "learning_rate": 4.38737096925425e-06, "loss": 2.0661, "step": 20885 }, { "epoch": 0.69, "grad_norm": 0.7459391355514526, "learning_rate": 4.386491310828458e-06, "loss": 2.0061, "step": 20886 }, { "epoch": 0.69, "grad_norm": 0.7476521730422974, "learning_rate": 4.385611715820798e-06, "loss": 2.1387, "step": 20887 }, { "epoch": 0.69, "grad_norm": 0.7181851863861084, "learning_rate": 4.384732184241202e-06, "loss": 2.0542, "step": 20888 }, { "epoch": 0.69, "grad_norm": 0.7343102097511292, "learning_rate": 4.383852716099616e-06, "loss": 2.0714, "step": 20889 }, { "epoch": 0.7, "grad_norm": 0.7368954420089722, "learning_rate": 4.382973311405966e-06, "loss": 2.0334, "step": 20890 }, { "epoch": 0.7, "grad_norm": 0.7403631210327148, "learning_rate": 4.382093970170192e-06, "loss": 2.0019, "step": 20891 }, { "epoch": 0.7, "grad_norm": 0.7525413036346436, "learning_rate": 4.381214692402235e-06, "loss": 2.0172, "step": 20892 }, { "epoch": 0.7, "grad_norm": 0.7506888508796692, "learning_rate": 4.380335478112014e-06, "loss": 2.0014, "step": 20893 }, { "epoch": 0.7, "grad_norm": 0.7323147654533386, "learning_rate": 4.379456327309469e-06, "loss": 2.1111, "step": 20894 }, { "epoch": 0.7, "grad_norm": 0.7436090707778931, "learning_rate": 4.378577240004537e-06, "loss": 2.0652, "step": 20895 }, { "epoch": 0.7, "grad_norm": 0.7276225686073303, "learning_rate": 4.3776982162071435e-06, "loss": 1.9943, "step": 20896 }, { "epoch": 0.7, "grad_norm": 0.7257574200630188, "learning_rate": 4.376819255927218e-06, "loss": 2.0492, "step": 20897 }, { "epoch": 0.7, "grad_norm": 0.7536762356758118, "learning_rate": 4.375940359174697e-06, "loss": 2.0704, "step": 20898 }, { "epoch": 0.7, "grad_norm": 0.737755537033081, "learning_rate": 4.375061525959501e-06, "loss": 2.0504, "step": 20899 }, { "epoch": 0.7, "grad_norm": 0.7262691855430603, "learning_rate": 4.374182756291564e-06, "loss": 2.0501, "step": 20900 }, { "epoch": 0.7, "grad_norm": 0.7723524570465088, "learning_rate": 4.373304050180816e-06, "loss": 1.97, "step": 20901 }, { "epoch": 0.7, "grad_norm": 0.7597225308418274, "learning_rate": 4.372425407637183e-06, "loss": 2.0688, "step": 20902 }, { "epoch": 0.7, "grad_norm": 0.7424806356430054, "learning_rate": 4.3715468286705854e-06, "loss": 1.9986, "step": 20903 }, { "epoch": 0.7, "grad_norm": 0.7479363679885864, "learning_rate": 4.370668313290957e-06, "loss": 2.0455, "step": 20904 }, { "epoch": 0.7, "grad_norm": 0.7332838177680969, "learning_rate": 4.369789861508218e-06, "loss": 2.1273, "step": 20905 }, { "epoch": 0.7, "grad_norm": 0.7419009208679199, "learning_rate": 4.36891147333229e-06, "loss": 2.045, "step": 20906 }, { "epoch": 0.7, "grad_norm": 0.7403627038002014, "learning_rate": 4.3680331487731e-06, "loss": 2.0272, "step": 20907 }, { "epoch": 0.7, "grad_norm": 0.7130307555198669, "learning_rate": 4.367154887840574e-06, "loss": 2.0137, "step": 20908 }, { "epoch": 0.7, "grad_norm": 0.7361346483230591, "learning_rate": 4.366276690544633e-06, "loss": 2.0525, "step": 20909 }, { "epoch": 0.7, "grad_norm": 0.7482061386108398, "learning_rate": 4.365398556895191e-06, "loss": 2.0731, "step": 20910 }, { "epoch": 0.7, "grad_norm": 0.7448564171791077, "learning_rate": 4.364520486902178e-06, "loss": 2.0639, "step": 20911 }, { "epoch": 0.7, "grad_norm": 0.7333919405937195, "learning_rate": 4.3636424805755055e-06, "loss": 1.8909, "step": 20912 }, { "epoch": 0.7, "grad_norm": 0.7612888216972351, "learning_rate": 4.362764537925097e-06, "loss": 2.0235, "step": 20913 }, { "epoch": 0.7, "grad_norm": 0.762174129486084, "learning_rate": 4.361886658960879e-06, "loss": 1.967, "step": 20914 }, { "epoch": 0.7, "grad_norm": 0.7602876424789429, "learning_rate": 4.361008843692755e-06, "loss": 2.0764, "step": 20915 }, { "epoch": 0.7, "grad_norm": 0.7404674887657166, "learning_rate": 4.360131092130646e-06, "loss": 2.0103, "step": 20916 }, { "epoch": 0.7, "grad_norm": 0.7679657340049744, "learning_rate": 4.359253404284476e-06, "loss": 2.1127, "step": 20917 }, { "epoch": 0.7, "grad_norm": 0.7320108413696289, "learning_rate": 4.3583757801641544e-06, "loss": 2.0087, "step": 20918 }, { "epoch": 0.7, "grad_norm": 0.7394277453422546, "learning_rate": 4.357498219779594e-06, "loss": 2.048, "step": 20919 }, { "epoch": 0.7, "grad_norm": 0.7317453622817993, "learning_rate": 4.356620723140714e-06, "loss": 2.0135, "step": 20920 }, { "epoch": 0.7, "grad_norm": 0.7696622014045715, "learning_rate": 4.355743290257424e-06, "loss": 2.0668, "step": 20921 }, { "epoch": 0.7, "grad_norm": 0.7574049830436707, "learning_rate": 4.354865921139637e-06, "loss": 2.0867, "step": 20922 }, { "epoch": 0.7, "grad_norm": 0.7377979159355164, "learning_rate": 4.35398861579727e-06, "loss": 2.001, "step": 20923 }, { "epoch": 0.7, "grad_norm": 0.735192596912384, "learning_rate": 4.353111374240232e-06, "loss": 2.0848, "step": 20924 }, { "epoch": 0.7, "grad_norm": 0.7342488765716553, "learning_rate": 4.3522341964784275e-06, "loss": 1.9757, "step": 20925 }, { "epoch": 0.7, "grad_norm": 0.7903199195861816, "learning_rate": 4.351357082521775e-06, "loss": 2.0419, "step": 20926 }, { "epoch": 0.7, "grad_norm": 0.7413288354873657, "learning_rate": 4.350480032380181e-06, "loss": 1.9733, "step": 20927 }, { "epoch": 0.7, "grad_norm": 0.7485882043838501, "learning_rate": 4.349603046063547e-06, "loss": 2.0269, "step": 20928 }, { "epoch": 0.7, "grad_norm": 0.7932947278022766, "learning_rate": 4.3487261235817875e-06, "loss": 2.1098, "step": 20929 }, { "epoch": 0.7, "grad_norm": 0.7317259311676025, "learning_rate": 4.347849264944812e-06, "loss": 2.0328, "step": 20930 }, { "epoch": 0.7, "grad_norm": 0.7468205690383911, "learning_rate": 4.34697247016252e-06, "loss": 2.0634, "step": 20931 }, { "epoch": 0.7, "grad_norm": 0.7577621340751648, "learning_rate": 4.346095739244822e-06, "loss": 2.0523, "step": 20932 }, { "epoch": 0.7, "grad_norm": 0.7528536319732666, "learning_rate": 4.345219072201622e-06, "loss": 2.1111, "step": 20933 }, { "epoch": 0.7, "grad_norm": 0.7423205375671387, "learning_rate": 4.344342469042819e-06, "loss": 2.0947, "step": 20934 }, { "epoch": 0.7, "grad_norm": 0.7344153523445129, "learning_rate": 4.34346592977832e-06, "loss": 2.0887, "step": 20935 }, { "epoch": 0.7, "grad_norm": 0.7375187873840332, "learning_rate": 4.342589454418036e-06, "loss": 2.0906, "step": 20936 }, { "epoch": 0.7, "grad_norm": 0.7269762754440308, "learning_rate": 4.3417130429718525e-06, "loss": 2.013, "step": 20937 }, { "epoch": 0.7, "grad_norm": 0.725002110004425, "learning_rate": 4.340836695449679e-06, "loss": 1.9872, "step": 20938 }, { "epoch": 0.7, "grad_norm": 0.7511231303215027, "learning_rate": 4.339960411861419e-06, "loss": 2.0393, "step": 20939 }, { "epoch": 0.7, "grad_norm": 0.7751635313034058, "learning_rate": 4.33908419221697e-06, "loss": 2.0242, "step": 20940 }, { "epoch": 0.7, "grad_norm": 0.7222945690155029, "learning_rate": 4.338208036526227e-06, "loss": 2.013, "step": 20941 }, { "epoch": 0.7, "grad_norm": 0.7517219185829163, "learning_rate": 4.337331944799095e-06, "loss": 2.0914, "step": 20942 }, { "epoch": 0.7, "grad_norm": 0.7752137184143066, "learning_rate": 4.336455917045464e-06, "loss": 2.0267, "step": 20943 }, { "epoch": 0.7, "grad_norm": 0.7595149278640747, "learning_rate": 4.335579953275235e-06, "loss": 2.0407, "step": 20944 }, { "epoch": 0.7, "grad_norm": 0.7288411259651184, "learning_rate": 4.334704053498307e-06, "loss": 2.0686, "step": 20945 }, { "epoch": 0.7, "grad_norm": 0.7827559113502502, "learning_rate": 4.333828217724572e-06, "loss": 2.0261, "step": 20946 }, { "epoch": 0.7, "grad_norm": 0.7418181300163269, "learning_rate": 4.3329524459639235e-06, "loss": 2.0447, "step": 20947 }, { "epoch": 0.7, "grad_norm": 0.7492141127586365, "learning_rate": 4.33207673822626e-06, "loss": 2.0065, "step": 20948 }, { "epoch": 0.7, "grad_norm": 0.7948200702667236, "learning_rate": 4.331201094521471e-06, "loss": 2.016, "step": 20949 }, { "epoch": 0.7, "grad_norm": 0.7476620674133301, "learning_rate": 4.330325514859447e-06, "loss": 2.0004, "step": 20950 }, { "epoch": 0.7, "grad_norm": 0.7451117634773254, "learning_rate": 4.329449999250082e-06, "loss": 2.0602, "step": 20951 }, { "epoch": 0.7, "grad_norm": 0.7352684140205383, "learning_rate": 4.328574547703272e-06, "loss": 2.0919, "step": 20952 }, { "epoch": 0.7, "grad_norm": 0.738615870475769, "learning_rate": 4.3276991602288975e-06, "loss": 1.9731, "step": 20953 }, { "epoch": 0.7, "grad_norm": 0.7358562350273132, "learning_rate": 4.326823836836859e-06, "loss": 2.0772, "step": 20954 }, { "epoch": 0.7, "grad_norm": 0.7248356342315674, "learning_rate": 4.325948577537039e-06, "loss": 2.0315, "step": 20955 }, { "epoch": 0.7, "grad_norm": 0.7414695620536804, "learning_rate": 4.3250733823393245e-06, "loss": 1.9947, "step": 20956 }, { "epoch": 0.7, "grad_norm": 0.7431443929672241, "learning_rate": 4.324198251253604e-06, "loss": 2.1041, "step": 20957 }, { "epoch": 0.7, "grad_norm": 0.7553936839103699, "learning_rate": 4.3233231842897725e-06, "loss": 2.0654, "step": 20958 }, { "epoch": 0.7, "grad_norm": 0.7770218849182129, "learning_rate": 4.3224481814577015e-06, "loss": 2.0002, "step": 20959 }, { "epoch": 0.7, "grad_norm": 0.755456805229187, "learning_rate": 4.321573242767284e-06, "loss": 2.0834, "step": 20960 }, { "epoch": 0.7, "grad_norm": 0.755619466304779, "learning_rate": 4.3206983682284075e-06, "loss": 2.0519, "step": 20961 }, { "epoch": 0.7, "grad_norm": 0.7442072033882141, "learning_rate": 4.319823557850948e-06, "loss": 2.082, "step": 20962 }, { "epoch": 0.7, "grad_norm": 0.7534950375556946, "learning_rate": 4.3189488116447975e-06, "loss": 2.083, "step": 20963 }, { "epoch": 0.7, "grad_norm": 0.7583304047584534, "learning_rate": 4.318074129619835e-06, "loss": 2.0234, "step": 20964 }, { "epoch": 0.7, "grad_norm": 0.7556930184364319, "learning_rate": 4.317199511785935e-06, "loss": 2.0207, "step": 20965 }, { "epoch": 0.7, "grad_norm": 0.7183598875999451, "learning_rate": 4.316324958152987e-06, "loss": 2.0972, "step": 20966 }, { "epoch": 0.7, "grad_norm": 0.734172523021698, "learning_rate": 4.315450468730871e-06, "loss": 2.1143, "step": 20967 }, { "epoch": 0.7, "grad_norm": 0.71507328748703, "learning_rate": 4.314576043529464e-06, "loss": 2.035, "step": 20968 }, { "epoch": 0.7, "grad_norm": 0.7492097020149231, "learning_rate": 4.313701682558641e-06, "loss": 2.0405, "step": 20969 }, { "epoch": 0.7, "grad_norm": 0.7467382550239563, "learning_rate": 4.312827385828287e-06, "loss": 2.1079, "step": 20970 }, { "epoch": 0.7, "grad_norm": 0.7231432199478149, "learning_rate": 4.311953153348278e-06, "loss": 2.0342, "step": 20971 }, { "epoch": 0.7, "grad_norm": 0.7084182500839233, "learning_rate": 4.311078985128484e-06, "loss": 2.0111, "step": 20972 }, { "epoch": 0.7, "grad_norm": 0.7634757161140442, "learning_rate": 4.310204881178787e-06, "loss": 2.0548, "step": 20973 }, { "epoch": 0.7, "grad_norm": 0.7556639313697815, "learning_rate": 4.3093308415090625e-06, "loss": 2.0337, "step": 20974 }, { "epoch": 0.7, "grad_norm": 0.7499269843101501, "learning_rate": 4.3084568661291805e-06, "loss": 1.9524, "step": 20975 }, { "epoch": 0.7, "grad_norm": 0.7183787226676941, "learning_rate": 4.30758295504902e-06, "loss": 2.1186, "step": 20976 }, { "epoch": 0.7, "grad_norm": 0.7539067268371582, "learning_rate": 4.306709108278452e-06, "loss": 1.9414, "step": 20977 }, { "epoch": 0.7, "grad_norm": 0.7588797807693481, "learning_rate": 4.305835325827344e-06, "loss": 2.1252, "step": 20978 }, { "epoch": 0.7, "grad_norm": 0.7294199466705322, "learning_rate": 4.304961607705571e-06, "loss": 2.0471, "step": 20979 }, { "epoch": 0.7, "grad_norm": 0.7335314750671387, "learning_rate": 4.304087953923012e-06, "loss": 2.0491, "step": 20980 }, { "epoch": 0.7, "grad_norm": 0.7508732080459595, "learning_rate": 4.303214364489522e-06, "loss": 2.0798, "step": 20981 }, { "epoch": 0.7, "grad_norm": 0.7473549842834473, "learning_rate": 4.302340839414977e-06, "loss": 2.0493, "step": 20982 }, { "epoch": 0.7, "grad_norm": 0.7533383965492249, "learning_rate": 4.30146737870925e-06, "loss": 2.0773, "step": 20983 }, { "epoch": 0.7, "grad_norm": 0.7534587979316711, "learning_rate": 4.300593982382201e-06, "loss": 2.0773, "step": 20984 }, { "epoch": 0.7, "grad_norm": 0.7360287308692932, "learning_rate": 4.299720650443705e-06, "loss": 2.0046, "step": 20985 }, { "epoch": 0.7, "grad_norm": 0.7410458922386169, "learning_rate": 4.298847382903624e-06, "loss": 2.0817, "step": 20986 }, { "epoch": 0.7, "grad_norm": 0.758012056350708, "learning_rate": 4.29797417977182e-06, "loss": 2.0247, "step": 20987 }, { "epoch": 0.7, "grad_norm": 0.7606417536735535, "learning_rate": 4.297101041058163e-06, "loss": 2.0312, "step": 20988 }, { "epoch": 0.7, "grad_norm": 0.774811863899231, "learning_rate": 4.296227966772519e-06, "loss": 2.0086, "step": 20989 }, { "epoch": 0.7, "grad_norm": 0.7648612856864929, "learning_rate": 4.295354956924749e-06, "loss": 2.1601, "step": 20990 }, { "epoch": 0.7, "grad_norm": 0.7314696311950684, "learning_rate": 4.294482011524712e-06, "loss": 2.0637, "step": 20991 }, { "epoch": 0.7, "grad_norm": 0.7665406465530396, "learning_rate": 4.293609130582274e-06, "loss": 2.0349, "step": 20992 }, { "epoch": 0.7, "grad_norm": 0.7729514837265015, "learning_rate": 4.2927363141072985e-06, "loss": 2.1032, "step": 20993 }, { "epoch": 0.7, "grad_norm": 0.792939305305481, "learning_rate": 4.2918635621096434e-06, "loss": 1.9774, "step": 20994 }, { "epoch": 0.7, "grad_norm": 0.7295057773590088, "learning_rate": 4.290990874599165e-06, "loss": 2.1058, "step": 20995 }, { "epoch": 0.7, "grad_norm": 0.7357131242752075, "learning_rate": 4.29011825158573e-06, "loss": 2.0721, "step": 20996 }, { "epoch": 0.7, "grad_norm": 0.7390365600585938, "learning_rate": 4.289245693079188e-06, "loss": 2.0238, "step": 20997 }, { "epoch": 0.7, "grad_norm": 0.730293869972229, "learning_rate": 4.288373199089406e-06, "loss": 2.0366, "step": 20998 }, { "epoch": 0.7, "grad_norm": 0.7476906180381775, "learning_rate": 4.287500769626236e-06, "loss": 2.0088, "step": 20999 }, { "epoch": 0.7, "grad_norm": 0.7538316249847412, "learning_rate": 4.28662840469953e-06, "loss": 2.0543, "step": 21000 }, { "epoch": 0.7, "grad_norm": 0.7453371286392212, "learning_rate": 4.285756104319149e-06, "loss": 1.9689, "step": 21001 }, { "epoch": 0.7, "grad_norm": 0.7979702949523926, "learning_rate": 4.28488386849495e-06, "loss": 2.0149, "step": 21002 }, { "epoch": 0.7, "grad_norm": 0.7417769432067871, "learning_rate": 4.2840116972367825e-06, "loss": 2.0298, "step": 21003 }, { "epoch": 0.7, "grad_norm": 0.7227742075920105, "learning_rate": 4.2831395905544995e-06, "loss": 2.0126, "step": 21004 }, { "epoch": 0.7, "grad_norm": 0.7313569188117981, "learning_rate": 4.282267548457957e-06, "loss": 2.1004, "step": 21005 }, { "epoch": 0.7, "grad_norm": 0.7342365384101868, "learning_rate": 4.281395570957002e-06, "loss": 2.0503, "step": 21006 }, { "epoch": 0.7, "grad_norm": 0.7255306243896484, "learning_rate": 4.280523658061492e-06, "loss": 2.0229, "step": 21007 }, { "epoch": 0.7, "grad_norm": 0.7710427641868591, "learning_rate": 4.279651809781269e-06, "loss": 2.041, "step": 21008 }, { "epoch": 0.7, "grad_norm": 0.7418169975280762, "learning_rate": 4.2787800261261924e-06, "loss": 2.0617, "step": 21009 }, { "epoch": 0.7, "grad_norm": 0.747134804725647, "learning_rate": 4.277908307106101e-06, "loss": 2.0209, "step": 21010 }, { "epoch": 0.7, "grad_norm": 0.7242542505264282, "learning_rate": 4.277036652730854e-06, "loss": 1.9805, "step": 21011 }, { "epoch": 0.7, "grad_norm": 0.7391896843910217, "learning_rate": 4.276165063010291e-06, "loss": 2.0791, "step": 21012 }, { "epoch": 0.7, "grad_norm": 0.7502656579017639, "learning_rate": 4.275293537954257e-06, "loss": 1.9633, "step": 21013 }, { "epoch": 0.7, "grad_norm": 0.7289907336235046, "learning_rate": 4.274422077572602e-06, "loss": 2.0852, "step": 21014 }, { "epoch": 0.7, "grad_norm": 0.7822039127349854, "learning_rate": 4.273550681875175e-06, "loss": 2.0263, "step": 21015 }, { "epoch": 0.7, "grad_norm": 0.7572738528251648, "learning_rate": 4.272679350871816e-06, "loss": 2.0733, "step": 21016 }, { "epoch": 0.7, "grad_norm": 0.7437106966972351, "learning_rate": 4.271808084572365e-06, "loss": 1.9805, "step": 21017 }, { "epoch": 0.7, "grad_norm": 0.7589899897575378, "learning_rate": 4.270936882986674e-06, "loss": 2.083, "step": 21018 }, { "epoch": 0.7, "grad_norm": 0.7456165552139282, "learning_rate": 4.2700657461245766e-06, "loss": 2.1259, "step": 21019 }, { "epoch": 0.7, "grad_norm": 0.7609027624130249, "learning_rate": 4.269194673995921e-06, "loss": 2.091, "step": 21020 }, { "epoch": 0.7, "grad_norm": 0.7182235717773438, "learning_rate": 4.268323666610547e-06, "loss": 2.0344, "step": 21021 }, { "epoch": 0.7, "grad_norm": 0.7459161877632141, "learning_rate": 4.267452723978288e-06, "loss": 2.0636, "step": 21022 }, { "epoch": 0.7, "grad_norm": 0.7360116839408875, "learning_rate": 4.266581846108989e-06, "loss": 2.1076, "step": 21023 }, { "epoch": 0.7, "grad_norm": 0.7476702332496643, "learning_rate": 4.265711033012491e-06, "loss": 2.0299, "step": 21024 }, { "epoch": 0.7, "grad_norm": 0.7542983889579773, "learning_rate": 4.2648402846986305e-06, "loss": 1.9617, "step": 21025 }, { "epoch": 0.7, "grad_norm": 0.7456952929496765, "learning_rate": 4.2639696011772394e-06, "loss": 2.0613, "step": 21026 }, { "epoch": 0.7, "grad_norm": 0.7220632433891296, "learning_rate": 4.263098982458162e-06, "loss": 2.0514, "step": 21027 }, { "epoch": 0.7, "grad_norm": 0.8018488883972168, "learning_rate": 4.262228428551225e-06, "loss": 2.1148, "step": 21028 }, { "epoch": 0.7, "grad_norm": 0.736148476600647, "learning_rate": 4.2613579394662726e-06, "loss": 2.0039, "step": 21029 }, { "epoch": 0.7, "grad_norm": 0.7318221926689148, "learning_rate": 4.260487515213133e-06, "loss": 2.0497, "step": 21030 }, { "epoch": 0.7, "grad_norm": 0.7555590271949768, "learning_rate": 4.259617155801644e-06, "loss": 1.9626, "step": 21031 }, { "epoch": 0.7, "grad_norm": 0.715420126914978, "learning_rate": 4.258746861241633e-06, "loss": 2.0491, "step": 21032 }, { "epoch": 0.7, "grad_norm": 0.7699520587921143, "learning_rate": 4.257876631542939e-06, "loss": 2.0804, "step": 21033 }, { "epoch": 0.7, "grad_norm": 0.7378990650177002, "learning_rate": 4.25700646671539e-06, "loss": 2.055, "step": 21034 }, { "epoch": 0.7, "grad_norm": 0.746109664440155, "learning_rate": 4.256136366768812e-06, "loss": 2.0452, "step": 21035 }, { "epoch": 0.7, "grad_norm": 0.7395492792129517, "learning_rate": 4.255266331713038e-06, "loss": 2.0725, "step": 21036 }, { "epoch": 0.7, "grad_norm": 0.7483282685279846, "learning_rate": 4.2543963615579035e-06, "loss": 2.066, "step": 21037 }, { "epoch": 0.7, "grad_norm": 0.7508336305618286, "learning_rate": 4.2535264563132305e-06, "loss": 1.9821, "step": 21038 }, { "epoch": 0.7, "grad_norm": 0.7591149210929871, "learning_rate": 4.252656615988845e-06, "loss": 2.0181, "step": 21039 }, { "epoch": 0.7, "grad_norm": 0.7492714524269104, "learning_rate": 4.251786840594581e-06, "loss": 2.1037, "step": 21040 }, { "epoch": 0.7, "grad_norm": 0.7390636205673218, "learning_rate": 4.250917130140256e-06, "loss": 2.0334, "step": 21041 }, { "epoch": 0.7, "grad_norm": 0.752388596534729, "learning_rate": 4.250047484635703e-06, "loss": 2.0624, "step": 21042 }, { "epoch": 0.7, "grad_norm": 0.7288717031478882, "learning_rate": 4.2491779040907446e-06, "loss": 2.0611, "step": 21043 }, { "epoch": 0.7, "grad_norm": 0.7594519853591919, "learning_rate": 4.2483083885152e-06, "loss": 2.0125, "step": 21044 }, { "epoch": 0.7, "grad_norm": 0.7400230765342712, "learning_rate": 4.247438937918897e-06, "loss": 2.0037, "step": 21045 }, { "epoch": 0.7, "grad_norm": 0.742692768573761, "learning_rate": 4.2465695523116605e-06, "loss": 2.0332, "step": 21046 }, { "epoch": 0.7, "grad_norm": 0.7476955056190491, "learning_rate": 4.245700231703309e-06, "loss": 1.9941, "step": 21047 }, { "epoch": 0.7, "grad_norm": 0.7290368676185608, "learning_rate": 4.244830976103661e-06, "loss": 2.0993, "step": 21048 }, { "epoch": 0.7, "grad_norm": 0.7544967532157898, "learning_rate": 4.243961785522543e-06, "loss": 2.0124, "step": 21049 }, { "epoch": 0.7, "grad_norm": 0.7738626599311829, "learning_rate": 4.243092659969769e-06, "loss": 2.0769, "step": 21050 }, { "epoch": 0.7, "grad_norm": 0.7474047541618347, "learning_rate": 4.242223599455163e-06, "loss": 2.0246, "step": 21051 }, { "epoch": 0.7, "grad_norm": 0.7523102760314941, "learning_rate": 4.241354603988537e-06, "loss": 2.0479, "step": 21052 }, { "epoch": 0.7, "grad_norm": 0.7259365916252136, "learning_rate": 4.240485673579714e-06, "loss": 2.0748, "step": 21053 }, { "epoch": 0.7, "grad_norm": 0.7227917313575745, "learning_rate": 4.239616808238506e-06, "loss": 2.1259, "step": 21054 }, { "epoch": 0.7, "grad_norm": 0.7475734353065491, "learning_rate": 4.238748007974734e-06, "loss": 2.0844, "step": 21055 }, { "epoch": 0.7, "grad_norm": 0.7138159871101379, "learning_rate": 4.237879272798212e-06, "loss": 1.9265, "step": 21056 }, { "epoch": 0.7, "grad_norm": 0.774865448474884, "learning_rate": 4.237010602718749e-06, "loss": 2.0619, "step": 21057 }, { "epoch": 0.7, "grad_norm": 0.7187455892562866, "learning_rate": 4.236141997746163e-06, "loss": 2.0425, "step": 21058 }, { "epoch": 0.7, "grad_norm": 0.7787527441978455, "learning_rate": 4.23527345789027e-06, "loss": 2.0832, "step": 21059 }, { "epoch": 0.7, "grad_norm": 0.7430287599563599, "learning_rate": 4.234404983160879e-06, "loss": 2.0817, "step": 21060 }, { "epoch": 0.7, "grad_norm": 0.747592031955719, "learning_rate": 4.233536573567798e-06, "loss": 2.037, "step": 21061 }, { "epoch": 0.7, "grad_norm": 0.7205986380577087, "learning_rate": 4.232668229120845e-06, "loss": 1.9729, "step": 21062 }, { "epoch": 0.7, "grad_norm": 0.7626847624778748, "learning_rate": 4.2317999498298225e-06, "loss": 2.0343, "step": 21063 }, { "epoch": 0.7, "grad_norm": 0.7446271181106567, "learning_rate": 4.230931735704548e-06, "loss": 2.0758, "step": 21064 }, { "epoch": 0.7, "grad_norm": 0.738436222076416, "learning_rate": 4.230063586754824e-06, "loss": 1.9513, "step": 21065 }, { "epoch": 0.7, "grad_norm": 0.7439196705818176, "learning_rate": 4.229195502990459e-06, "loss": 2.0742, "step": 21066 }, { "epoch": 0.7, "grad_norm": 0.7966446876525879, "learning_rate": 4.228327484421258e-06, "loss": 2.033, "step": 21067 }, { "epoch": 0.7, "grad_norm": 0.7459100484848022, "learning_rate": 4.227459531057036e-06, "loss": 2.0206, "step": 21068 }, { "epoch": 0.7, "grad_norm": 0.7852141857147217, "learning_rate": 4.2265916429075936e-06, "loss": 2.1247, "step": 21069 }, { "epoch": 0.7, "grad_norm": 0.7393693327903748, "learning_rate": 4.225723819982732e-06, "loss": 2.1023, "step": 21070 }, { "epoch": 0.7, "grad_norm": 0.7960148453712463, "learning_rate": 4.224856062292261e-06, "loss": 2.0011, "step": 21071 }, { "epoch": 0.7, "grad_norm": 0.7540220022201538, "learning_rate": 4.2239883698459786e-06, "loss": 2.0546, "step": 21072 }, { "epoch": 0.7, "grad_norm": 0.7263895869255066, "learning_rate": 4.223120742653694e-06, "loss": 2.0958, "step": 21073 }, { "epoch": 0.7, "grad_norm": 0.7181264758110046, "learning_rate": 4.222253180725202e-06, "loss": 2.0466, "step": 21074 }, { "epoch": 0.7, "grad_norm": 0.7425655722618103, "learning_rate": 4.2213856840703115e-06, "loss": 2.0145, "step": 21075 }, { "epoch": 0.7, "grad_norm": 0.7140820622444153, "learning_rate": 4.220518252698814e-06, "loss": 2.0511, "step": 21076 }, { "epoch": 0.7, "grad_norm": 0.7371968626976013, "learning_rate": 4.2196508866205155e-06, "loss": 2.0699, "step": 21077 }, { "epoch": 0.7, "grad_norm": 0.7523742914199829, "learning_rate": 4.2187835858452205e-06, "loss": 2.0723, "step": 21078 }, { "epoch": 0.7, "grad_norm": 0.7941635847091675, "learning_rate": 4.217916350382713e-06, "loss": 2.0678, "step": 21079 }, { "epoch": 0.7, "grad_norm": 0.7553759813308716, "learning_rate": 4.217049180242798e-06, "loss": 2.083, "step": 21080 }, { "epoch": 0.7, "grad_norm": 0.7196733951568604, "learning_rate": 4.2161820754352765e-06, "loss": 2.1521, "step": 21081 }, { "epoch": 0.7, "grad_norm": 0.7599328756332397, "learning_rate": 4.2153150359699405e-06, "loss": 2.0044, "step": 21082 }, { "epoch": 0.7, "grad_norm": 0.7524970769882202, "learning_rate": 4.2144480618565794e-06, "loss": 2.0398, "step": 21083 }, { "epoch": 0.7, "grad_norm": 0.766802966594696, "learning_rate": 4.2135811531049985e-06, "loss": 2.0346, "step": 21084 }, { "epoch": 0.7, "grad_norm": 0.7712546586990356, "learning_rate": 4.212714309724984e-06, "loss": 2.0482, "step": 21085 }, { "epoch": 0.7, "grad_norm": 0.7100131511688232, "learning_rate": 4.211847531726333e-06, "loss": 2.0459, "step": 21086 }, { "epoch": 0.7, "grad_norm": 0.7697110176086426, "learning_rate": 4.210980819118837e-06, "loss": 2.0677, "step": 21087 }, { "epoch": 0.7, "grad_norm": 0.7413848638534546, "learning_rate": 4.210114171912284e-06, "loss": 2.047, "step": 21088 }, { "epoch": 0.7, "grad_norm": 0.757116973400116, "learning_rate": 4.209247590116467e-06, "loss": 2.0313, "step": 21089 }, { "epoch": 0.7, "grad_norm": 0.741335928440094, "learning_rate": 4.208381073741182e-06, "loss": 2.0835, "step": 21090 }, { "epoch": 0.7, "grad_norm": 0.7616726756095886, "learning_rate": 4.2075146227962125e-06, "loss": 2.0021, "step": 21091 }, { "epoch": 0.7, "grad_norm": 0.7213118076324463, "learning_rate": 4.2066482372913455e-06, "loss": 1.9977, "step": 21092 }, { "epoch": 0.7, "grad_norm": 0.723595917224884, "learning_rate": 4.2057819172363705e-06, "loss": 2.0951, "step": 21093 }, { "epoch": 0.7, "grad_norm": 0.7553505897521973, "learning_rate": 4.204915662641079e-06, "loss": 2.0968, "step": 21094 }, { "epoch": 0.7, "grad_norm": 0.7894681692123413, "learning_rate": 4.2040494735152545e-06, "loss": 2.0131, "step": 21095 }, { "epoch": 0.7, "grad_norm": 0.7436649799346924, "learning_rate": 4.203183349868678e-06, "loss": 2.0492, "step": 21096 }, { "epoch": 0.7, "grad_norm": 0.7550320625305176, "learning_rate": 4.202317291711143e-06, "loss": 1.998, "step": 21097 }, { "epoch": 0.7, "grad_norm": 0.7144345641136169, "learning_rate": 4.201451299052426e-06, "loss": 1.9696, "step": 21098 }, { "epoch": 0.7, "grad_norm": 0.7527051568031311, "learning_rate": 4.200585371902313e-06, "loss": 2.009, "step": 21099 }, { "epoch": 0.7, "grad_norm": 0.7350705862045288, "learning_rate": 4.199719510270597e-06, "loss": 2.0169, "step": 21100 }, { "epoch": 0.7, "grad_norm": 0.7215577960014343, "learning_rate": 4.198853714167042e-06, "loss": 2.0594, "step": 21101 }, { "epoch": 0.7, "grad_norm": 0.7307222485542297, "learning_rate": 4.197987983601438e-06, "loss": 2.0066, "step": 21102 }, { "epoch": 0.7, "grad_norm": 0.7510185241699219, "learning_rate": 4.197122318583568e-06, "loss": 2.0858, "step": 21103 }, { "epoch": 0.7, "grad_norm": 0.7271878123283386, "learning_rate": 4.196256719123212e-06, "loss": 2.0209, "step": 21104 }, { "epoch": 0.7, "grad_norm": 0.7476165294647217, "learning_rate": 4.19539118523014e-06, "loss": 2.0544, "step": 21105 }, { "epoch": 0.7, "grad_norm": 0.7673901319503784, "learning_rate": 4.194525716914142e-06, "loss": 2.0345, "step": 21106 }, { "epoch": 0.7, "grad_norm": 0.7536939978599548, "learning_rate": 4.193660314184985e-06, "loss": 1.9984, "step": 21107 }, { "epoch": 0.7, "grad_norm": 0.7503591179847717, "learning_rate": 4.1927949770524515e-06, "loss": 2.0912, "step": 21108 }, { "epoch": 0.7, "grad_norm": 0.7571287155151367, "learning_rate": 4.191929705526321e-06, "loss": 2.0141, "step": 21109 }, { "epoch": 0.7, "grad_norm": 0.7579281330108643, "learning_rate": 4.191064499616364e-06, "loss": 2.0657, "step": 21110 }, { "epoch": 0.7, "grad_norm": 0.7343004941940308, "learning_rate": 4.190199359332353e-06, "loss": 2.0356, "step": 21111 }, { "epoch": 0.7, "grad_norm": 0.7436162233352661, "learning_rate": 4.189334284684068e-06, "loss": 2.0623, "step": 21112 }, { "epoch": 0.7, "grad_norm": 0.7513119578361511, "learning_rate": 4.18846927568128e-06, "loss": 2.0481, "step": 21113 }, { "epoch": 0.7, "grad_norm": 0.7383400797843933, "learning_rate": 4.187604332333754e-06, "loss": 2.0409, "step": 21114 }, { "epoch": 0.7, "grad_norm": 0.7594037055969238, "learning_rate": 4.18673945465127e-06, "loss": 1.9551, "step": 21115 }, { "epoch": 0.7, "grad_norm": 0.7496464848518372, "learning_rate": 4.1858746426436e-06, "loss": 2.0357, "step": 21116 }, { "epoch": 0.7, "grad_norm": 0.7450153827667236, "learning_rate": 4.185009896320511e-06, "loss": 2.0899, "step": 21117 }, { "epoch": 0.7, "grad_norm": 0.7222242951393127, "learning_rate": 4.184145215691768e-06, "loss": 2.0336, "step": 21118 }, { "epoch": 0.7, "grad_norm": 0.7727676630020142, "learning_rate": 4.183280600767148e-06, "loss": 1.9743, "step": 21119 }, { "epoch": 0.7, "grad_norm": 0.8211701512336731, "learning_rate": 4.1824160515564116e-06, "loss": 2.0295, "step": 21120 }, { "epoch": 0.7, "grad_norm": 0.7615621089935303, "learning_rate": 4.181551568069328e-06, "loss": 2.1144, "step": 21121 }, { "epoch": 0.7, "grad_norm": 0.7511119842529297, "learning_rate": 4.180687150315673e-06, "loss": 2.0675, "step": 21122 }, { "epoch": 0.7, "grad_norm": 0.7202280759811401, "learning_rate": 4.179822798305198e-06, "loss": 2.0432, "step": 21123 }, { "epoch": 0.7, "grad_norm": 0.7367507815361023, "learning_rate": 4.1789585120476714e-06, "loss": 2.1228, "step": 21124 }, { "epoch": 0.7, "grad_norm": 0.7337611317634583, "learning_rate": 4.178094291552866e-06, "loss": 2.0564, "step": 21125 }, { "epoch": 0.7, "grad_norm": 0.760400116443634, "learning_rate": 4.177230136830538e-06, "loss": 2.0925, "step": 21126 }, { "epoch": 0.7, "grad_norm": 0.7509064674377441, "learning_rate": 4.176366047890448e-06, "loss": 2.1394, "step": 21127 }, { "epoch": 0.7, "grad_norm": 0.7551696300506592, "learning_rate": 4.175502024742365e-06, "loss": 2.0501, "step": 21128 }, { "epoch": 0.7, "grad_norm": 0.7799085974693298, "learning_rate": 4.174638067396044e-06, "loss": 2.065, "step": 21129 }, { "epoch": 0.7, "grad_norm": 0.7300541400909424, "learning_rate": 4.173774175861247e-06, "loss": 2.0639, "step": 21130 }, { "epoch": 0.7, "grad_norm": 0.7414801716804504, "learning_rate": 4.172910350147739e-06, "loss": 2.0452, "step": 21131 }, { "epoch": 0.7, "grad_norm": 0.7696745991706848, "learning_rate": 4.172046590265275e-06, "loss": 2.094, "step": 21132 }, { "epoch": 0.7, "grad_norm": 0.7716477513313293, "learning_rate": 4.171182896223609e-06, "loss": 2.1346, "step": 21133 }, { "epoch": 0.7, "grad_norm": 0.7372884154319763, "learning_rate": 4.170319268032506e-06, "loss": 2.0353, "step": 21134 }, { "epoch": 0.7, "grad_norm": 0.7364732027053833, "learning_rate": 4.16945570570172e-06, "loss": 2.0391, "step": 21135 }, { "epoch": 0.7, "grad_norm": 0.7534252405166626, "learning_rate": 4.168592209241002e-06, "loss": 2.082, "step": 21136 }, { "epoch": 0.7, "grad_norm": 0.7084001898765564, "learning_rate": 4.167728778660113e-06, "loss": 1.9833, "step": 21137 }, { "epoch": 0.7, "grad_norm": 0.7450693249702454, "learning_rate": 4.166865413968809e-06, "loss": 2.0622, "step": 21138 }, { "epoch": 0.7, "grad_norm": 0.7291602492332458, "learning_rate": 4.166002115176837e-06, "loss": 2.0135, "step": 21139 }, { "epoch": 0.7, "grad_norm": 0.7351317405700684, "learning_rate": 4.165138882293959e-06, "loss": 2.0216, "step": 21140 }, { "epoch": 0.7, "grad_norm": 0.7490224242210388, "learning_rate": 4.16427571532992e-06, "loss": 2.0443, "step": 21141 }, { "epoch": 0.7, "grad_norm": 0.7626574635505676, "learning_rate": 4.163412614294473e-06, "loss": 2.0468, "step": 21142 }, { "epoch": 0.7, "grad_norm": 0.7457489371299744, "learning_rate": 4.162549579197368e-06, "loss": 1.9997, "step": 21143 }, { "epoch": 0.7, "grad_norm": 0.7390961647033691, "learning_rate": 4.1616866100483646e-06, "loss": 2.088, "step": 21144 }, { "epoch": 0.7, "grad_norm": 0.7708448171615601, "learning_rate": 4.160823706857197e-06, "loss": 2.1157, "step": 21145 }, { "epoch": 0.7, "grad_norm": 0.7340668439865112, "learning_rate": 4.1599608696336215e-06, "loss": 2.0811, "step": 21146 }, { "epoch": 0.7, "grad_norm": 0.7487319111824036, "learning_rate": 4.159098098387388e-06, "loss": 2.0759, "step": 21147 }, { "epoch": 0.7, "grad_norm": 0.7525543570518494, "learning_rate": 4.158235393128242e-06, "loss": 2.1056, "step": 21148 }, { "epoch": 0.7, "grad_norm": 0.7721343040466309, "learning_rate": 4.157372753865925e-06, "loss": 2.0569, "step": 21149 }, { "epoch": 0.7, "grad_norm": 0.711355984210968, "learning_rate": 4.156510180610191e-06, "loss": 2.0776, "step": 21150 }, { "epoch": 0.7, "grad_norm": 0.7315240502357483, "learning_rate": 4.155647673370775e-06, "loss": 2.0584, "step": 21151 }, { "epoch": 0.7, "grad_norm": 0.748462975025177, "learning_rate": 4.154785232157428e-06, "loss": 2.0153, "step": 21152 }, { "epoch": 0.7, "grad_norm": 0.7599499821662903, "learning_rate": 4.153922856979894e-06, "loss": 2.0382, "step": 21153 }, { "epoch": 0.7, "grad_norm": 0.7571646571159363, "learning_rate": 4.153060547847915e-06, "loss": 1.9976, "step": 21154 }, { "epoch": 0.7, "grad_norm": 0.7218501567840576, "learning_rate": 4.152198304771226e-06, "loss": 2.0599, "step": 21155 }, { "epoch": 0.7, "grad_norm": 0.7356863021850586, "learning_rate": 4.1513361277595775e-06, "loss": 2.0344, "step": 21156 }, { "epoch": 0.7, "grad_norm": 0.78043133020401, "learning_rate": 4.150474016822706e-06, "loss": 2.1459, "step": 21157 }, { "epoch": 0.7, "grad_norm": 0.7683544754981995, "learning_rate": 4.149611971970348e-06, "loss": 2.0762, "step": 21158 }, { "epoch": 0.7, "grad_norm": 0.8066489100456238, "learning_rate": 4.148749993212245e-06, "loss": 2.0565, "step": 21159 }, { "epoch": 0.7, "grad_norm": 0.7338630557060242, "learning_rate": 4.147888080558139e-06, "loss": 2.012, "step": 21160 }, { "epoch": 0.7, "grad_norm": 0.7711341381072998, "learning_rate": 4.147026234017759e-06, "loss": 2.0805, "step": 21161 }, { "epoch": 0.7, "grad_norm": 0.7382785081863403, "learning_rate": 4.146164453600851e-06, "loss": 2.1114, "step": 21162 }, { "epoch": 0.7, "grad_norm": 0.7462503910064697, "learning_rate": 4.145302739317147e-06, "loss": 2.0668, "step": 21163 }, { "epoch": 0.7, "grad_norm": 0.7692015171051025, "learning_rate": 4.1444410911763766e-06, "loss": 2.0866, "step": 21164 }, { "epoch": 0.7, "grad_norm": 0.7501469254493713, "learning_rate": 4.14357950918828e-06, "loss": 2.1324, "step": 21165 }, { "epoch": 0.7, "grad_norm": 0.7722364664077759, "learning_rate": 4.142717993362596e-06, "loss": 2.0301, "step": 21166 }, { "epoch": 0.7, "grad_norm": 0.7971662282943726, "learning_rate": 4.141856543709045e-06, "loss": 2.0892, "step": 21167 }, { "epoch": 0.7, "grad_norm": 0.749788224697113, "learning_rate": 4.140995160237366e-06, "loss": 2.0432, "step": 21168 }, { "epoch": 0.7, "grad_norm": 0.7396644949913025, "learning_rate": 4.140133842957292e-06, "loss": 2.0979, "step": 21169 }, { "epoch": 0.7, "grad_norm": 0.7401080131530762, "learning_rate": 4.139272591878553e-06, "loss": 2.1058, "step": 21170 }, { "epoch": 0.7, "grad_norm": 0.7734871506690979, "learning_rate": 4.138411407010874e-06, "loss": 2.0313, "step": 21171 }, { "epoch": 0.7, "grad_norm": 0.7337951064109802, "learning_rate": 4.1375502883639905e-06, "loss": 2.0812, "step": 21172 }, { "epoch": 0.7, "grad_norm": 0.7274264693260193, "learning_rate": 4.1366892359476255e-06, "loss": 2.0601, "step": 21173 }, { "epoch": 0.7, "grad_norm": 0.7199066877365112, "learning_rate": 4.135828249771509e-06, "loss": 2.0544, "step": 21174 }, { "epoch": 0.7, "grad_norm": 0.7285482883453369, "learning_rate": 4.134967329845371e-06, "loss": 1.9976, "step": 21175 }, { "epoch": 0.7, "grad_norm": 0.7522498965263367, "learning_rate": 4.134106476178935e-06, "loss": 2.1344, "step": 21176 }, { "epoch": 0.7, "grad_norm": 0.7349876761436462, "learning_rate": 4.133245688781923e-06, "loss": 2.0672, "step": 21177 }, { "epoch": 0.7, "grad_norm": 0.7467001676559448, "learning_rate": 4.132384967664063e-06, "loss": 2.0416, "step": 21178 }, { "epoch": 0.7, "grad_norm": 0.7529169321060181, "learning_rate": 4.131524312835086e-06, "loss": 2.0785, "step": 21179 }, { "epoch": 0.7, "grad_norm": 0.7465900182723999, "learning_rate": 4.130663724304701e-06, "loss": 2.0765, "step": 21180 }, { "epoch": 0.7, "grad_norm": 0.7788456678390503, "learning_rate": 4.129803202082638e-06, "loss": 1.976, "step": 21181 }, { "epoch": 0.7, "grad_norm": 0.7323268055915833, "learning_rate": 4.12894274617862e-06, "loss": 2.0643, "step": 21182 }, { "epoch": 0.7, "grad_norm": 0.759446382522583, "learning_rate": 4.128082356602364e-06, "loss": 2.0903, "step": 21183 }, { "epoch": 0.7, "grad_norm": 0.740777313709259, "learning_rate": 4.127222033363596e-06, "loss": 2.1029, "step": 21184 }, { "epoch": 0.7, "grad_norm": 0.7321988344192505, "learning_rate": 4.1263617764720305e-06, "loss": 2.109, "step": 21185 }, { "epoch": 0.7, "grad_norm": 0.7759171724319458, "learning_rate": 4.125501585937385e-06, "loss": 2.0956, "step": 21186 }, { "epoch": 0.7, "grad_norm": 0.7270573973655701, "learning_rate": 4.1246414617693785e-06, "loss": 2.0221, "step": 21187 }, { "epoch": 0.7, "grad_norm": 0.7446803450584412, "learning_rate": 4.123781403977737e-06, "loss": 2.0967, "step": 21188 }, { "epoch": 0.7, "grad_norm": 0.7315859198570251, "learning_rate": 4.122921412572163e-06, "loss": 1.9801, "step": 21189 }, { "epoch": 0.7, "grad_norm": 0.7851159572601318, "learning_rate": 4.122061487562378e-06, "loss": 2.1115, "step": 21190 }, { "epoch": 0.71, "grad_norm": 0.7572748064994812, "learning_rate": 4.121201628958101e-06, "loss": 2.042, "step": 21191 }, { "epoch": 0.71, "grad_norm": 0.8017840385437012, "learning_rate": 4.12034183676904e-06, "loss": 2.0625, "step": 21192 }, { "epoch": 0.71, "grad_norm": 0.7449313402175903, "learning_rate": 4.119482111004913e-06, "loss": 2.056, "step": 21193 }, { "epoch": 0.71, "grad_norm": 0.7714320421218872, "learning_rate": 4.118622451675428e-06, "loss": 2.0491, "step": 21194 }, { "epoch": 0.71, "grad_norm": 0.7843227386474609, "learning_rate": 4.117762858790304e-06, "loss": 2.0139, "step": 21195 }, { "epoch": 0.71, "grad_norm": 0.7438114285469055, "learning_rate": 4.116903332359243e-06, "loss": 2.078, "step": 21196 }, { "epoch": 0.71, "grad_norm": 0.7246606945991516, "learning_rate": 4.116043872391966e-06, "loss": 2.0367, "step": 21197 }, { "epoch": 0.71, "grad_norm": 0.7526752352714539, "learning_rate": 4.115184478898176e-06, "loss": 2.0745, "step": 21198 }, { "epoch": 0.71, "grad_norm": 0.7319991588592529, "learning_rate": 4.114325151887578e-06, "loss": 2.0645, "step": 21199 }, { "epoch": 0.71, "grad_norm": 0.7308666110038757, "learning_rate": 4.113465891369886e-06, "loss": 2.0629, "step": 21200 }, { "epoch": 0.71, "grad_norm": 0.742752194404602, "learning_rate": 4.112606697354814e-06, "loss": 2.045, "step": 21201 }, { "epoch": 0.71, "grad_norm": 0.724912703037262, "learning_rate": 4.111747569852053e-06, "loss": 2.0269, "step": 21202 }, { "epoch": 0.71, "grad_norm": 0.7503074407577515, "learning_rate": 4.110888508871319e-06, "loss": 2.1369, "step": 21203 }, { "epoch": 0.71, "grad_norm": 0.7225751876831055, "learning_rate": 4.110029514422318e-06, "loss": 2.0305, "step": 21204 }, { "epoch": 0.71, "grad_norm": 0.7492079734802246, "learning_rate": 4.109170586514747e-06, "loss": 2.0346, "step": 21205 }, { "epoch": 0.71, "grad_norm": 0.7500308752059937, "learning_rate": 4.108311725158319e-06, "loss": 1.9794, "step": 21206 }, { "epoch": 0.71, "grad_norm": 0.8019648790359497, "learning_rate": 4.107452930362732e-06, "loss": 1.995, "step": 21207 }, { "epoch": 0.71, "grad_norm": 0.7460503578186035, "learning_rate": 4.106594202137685e-06, "loss": 2.0129, "step": 21208 }, { "epoch": 0.71, "grad_norm": 0.7221418619155884, "learning_rate": 4.105735540492883e-06, "loss": 2.0176, "step": 21209 }, { "epoch": 0.71, "grad_norm": 0.7298194169998169, "learning_rate": 4.10487694543803e-06, "loss": 2.1055, "step": 21210 }, { "epoch": 0.71, "grad_norm": 0.7203308343887329, "learning_rate": 4.1040184169828215e-06, "loss": 2.0188, "step": 21211 }, { "epoch": 0.71, "grad_norm": 0.7507845163345337, "learning_rate": 4.103159955136955e-06, "loss": 2.0439, "step": 21212 }, { "epoch": 0.71, "grad_norm": 0.7286096811294556, "learning_rate": 4.102301559910134e-06, "loss": 2.066, "step": 21213 }, { "epoch": 0.71, "grad_norm": 0.7619630098342896, "learning_rate": 4.101443231312051e-06, "loss": 2.061, "step": 21214 }, { "epoch": 0.71, "grad_norm": 0.7443332076072693, "learning_rate": 4.100584969352409e-06, "loss": 2.0133, "step": 21215 }, { "epoch": 0.71, "grad_norm": 0.7614389061927795, "learning_rate": 4.099726774040896e-06, "loss": 2.0917, "step": 21216 }, { "epoch": 0.71, "grad_norm": 0.7440372705459595, "learning_rate": 4.098868645387217e-06, "loss": 2.0098, "step": 21217 }, { "epoch": 0.71, "grad_norm": 0.7518798112869263, "learning_rate": 4.098010583401058e-06, "loss": 2.0827, "step": 21218 }, { "epoch": 0.71, "grad_norm": 0.7577544450759888, "learning_rate": 4.097152588092119e-06, "loss": 2.0792, "step": 21219 }, { "epoch": 0.71, "grad_norm": 0.7271602749824524, "learning_rate": 4.096294659470092e-06, "loss": 2.0068, "step": 21220 }, { "epoch": 0.71, "grad_norm": 0.7386311292648315, "learning_rate": 4.095436797544663e-06, "loss": 2.046, "step": 21221 }, { "epoch": 0.71, "grad_norm": 0.7639943361282349, "learning_rate": 4.094579002325528e-06, "loss": 2.0461, "step": 21222 }, { "epoch": 0.71, "grad_norm": 0.7812091708183289, "learning_rate": 4.093721273822384e-06, "loss": 2.0419, "step": 21223 }, { "epoch": 0.71, "grad_norm": 0.7445766925811768, "learning_rate": 4.092863612044915e-06, "loss": 2.0324, "step": 21224 }, { "epoch": 0.71, "grad_norm": 0.7536240816116333, "learning_rate": 4.092006017002807e-06, "loss": 2.1078, "step": 21225 }, { "epoch": 0.71, "grad_norm": 0.7600485682487488, "learning_rate": 4.091148488705757e-06, "loss": 2.0239, "step": 21226 }, { "epoch": 0.71, "grad_norm": 0.736775279045105, "learning_rate": 4.0902910271634445e-06, "loss": 1.9806, "step": 21227 }, { "epoch": 0.71, "grad_norm": 0.7672955989837646, "learning_rate": 4.0894336323855645e-06, "loss": 2.0263, "step": 21228 }, { "epoch": 0.71, "grad_norm": 0.7606447339057922, "learning_rate": 4.088576304381798e-06, "loss": 2.0031, "step": 21229 }, { "epoch": 0.71, "grad_norm": 0.7457943558692932, "learning_rate": 4.0877190431618295e-06, "loss": 2.0025, "step": 21230 }, { "epoch": 0.71, "grad_norm": 0.7377336621284485, "learning_rate": 4.086861848735346e-06, "loss": 2.1082, "step": 21231 }, { "epoch": 0.71, "grad_norm": 0.7360185384750366, "learning_rate": 4.086004721112035e-06, "loss": 2.0282, "step": 21232 }, { "epoch": 0.71, "grad_norm": 0.756420373916626, "learning_rate": 4.085147660301578e-06, "loss": 2.0075, "step": 21233 }, { "epoch": 0.71, "grad_norm": 0.7417958378791809, "learning_rate": 4.084290666313653e-06, "loss": 2.0223, "step": 21234 }, { "epoch": 0.71, "grad_norm": 0.7650126814842224, "learning_rate": 4.083433739157947e-06, "loss": 2.1059, "step": 21235 }, { "epoch": 0.71, "grad_norm": 0.7502461075782776, "learning_rate": 4.082576878844137e-06, "loss": 2.0638, "step": 21236 }, { "epoch": 0.71, "grad_norm": 0.7931260466575623, "learning_rate": 4.081720085381909e-06, "loss": 2.1172, "step": 21237 }, { "epoch": 0.71, "grad_norm": 0.781365156173706, "learning_rate": 4.0808633587809335e-06, "loss": 2.1195, "step": 21238 }, { "epoch": 0.71, "grad_norm": 0.7687897682189941, "learning_rate": 4.0800066990509005e-06, "loss": 2.0745, "step": 21239 }, { "epoch": 0.71, "grad_norm": 0.7438607215881348, "learning_rate": 4.079150106201477e-06, "loss": 1.9598, "step": 21240 }, { "epoch": 0.71, "grad_norm": 0.7564340829849243, "learning_rate": 4.078293580242351e-06, "loss": 2.0705, "step": 21241 }, { "epoch": 0.71, "grad_norm": 0.7360088229179382, "learning_rate": 4.077437121183192e-06, "loss": 2.0594, "step": 21242 }, { "epoch": 0.71, "grad_norm": 0.7288684248924255, "learning_rate": 4.0765807290336754e-06, "loss": 1.9965, "step": 21243 }, { "epoch": 0.71, "grad_norm": 0.7138208746910095, "learning_rate": 4.075724403803477e-06, "loss": 2.0128, "step": 21244 }, { "epoch": 0.71, "grad_norm": 0.7765095829963684, "learning_rate": 4.074868145502277e-06, "loss": 2.0956, "step": 21245 }, { "epoch": 0.71, "grad_norm": 0.7369911074638367, "learning_rate": 4.074011954139744e-06, "loss": 2.086, "step": 21246 }, { "epoch": 0.71, "grad_norm": 0.7661831974983215, "learning_rate": 4.073155829725547e-06, "loss": 2.0805, "step": 21247 }, { "epoch": 0.71, "grad_norm": 0.7349428534507751, "learning_rate": 4.072299772269366e-06, "loss": 2.0712, "step": 21248 }, { "epoch": 0.71, "grad_norm": 0.7489886283874512, "learning_rate": 4.0714437817808636e-06, "loss": 1.9968, "step": 21249 }, { "epoch": 0.71, "grad_norm": 0.744309663772583, "learning_rate": 4.070587858269719e-06, "loss": 2.0418, "step": 21250 }, { "epoch": 0.71, "grad_norm": 0.7691968083381653, "learning_rate": 4.069732001745599e-06, "loss": 1.9849, "step": 21251 }, { "epoch": 0.71, "grad_norm": 0.7161206007003784, "learning_rate": 4.068876212218166e-06, "loss": 2.0442, "step": 21252 }, { "epoch": 0.71, "grad_norm": 0.7284473180770874, "learning_rate": 4.0680204896970945e-06, "loss": 2.057, "step": 21253 }, { "epoch": 0.71, "grad_norm": 0.7282404899597168, "learning_rate": 4.0671648341920545e-06, "loss": 2.0709, "step": 21254 }, { "epoch": 0.71, "grad_norm": 0.7428138256072998, "learning_rate": 4.066309245712709e-06, "loss": 2.0123, "step": 21255 }, { "epoch": 0.71, "grad_norm": 0.7475786209106445, "learning_rate": 4.065453724268721e-06, "loss": 2.1857, "step": 21256 }, { "epoch": 0.71, "grad_norm": 0.7161439061164856, "learning_rate": 4.064598269869762e-06, "loss": 2.0459, "step": 21257 }, { "epoch": 0.71, "grad_norm": 0.7482534050941467, "learning_rate": 4.06374288252549e-06, "loss": 2.0383, "step": 21258 }, { "epoch": 0.71, "grad_norm": 0.7543566823005676, "learning_rate": 4.062887562245574e-06, "loss": 2.0576, "step": 21259 }, { "epoch": 0.71, "grad_norm": 0.7574505805969238, "learning_rate": 4.062032309039673e-06, "loss": 2.0545, "step": 21260 }, { "epoch": 0.71, "grad_norm": 0.7527257204055786, "learning_rate": 4.061177122917454e-06, "loss": 2.0634, "step": 21261 }, { "epoch": 0.71, "grad_norm": 0.7766549587249756, "learning_rate": 4.0603220038885725e-06, "loss": 2.0742, "step": 21262 }, { "epoch": 0.71, "grad_norm": 0.7360628247261047, "learning_rate": 4.059466951962695e-06, "loss": 2.0318, "step": 21263 }, { "epoch": 0.71, "grad_norm": 0.7445189356803894, "learning_rate": 4.058611967149479e-06, "loss": 1.974, "step": 21264 }, { "epoch": 0.71, "grad_norm": 0.7440265417098999, "learning_rate": 4.0577570494585784e-06, "loss": 2.0367, "step": 21265 }, { "epoch": 0.71, "grad_norm": 0.7752862572669983, "learning_rate": 4.056902198899656e-06, "loss": 2.03, "step": 21266 }, { "epoch": 0.71, "grad_norm": 0.7330976724624634, "learning_rate": 4.056047415482374e-06, "loss": 2.0709, "step": 21267 }, { "epoch": 0.71, "grad_norm": 0.7066649198532104, "learning_rate": 4.055192699216385e-06, "loss": 2.0486, "step": 21268 }, { "epoch": 0.71, "grad_norm": 0.7284162640571594, "learning_rate": 4.054338050111341e-06, "loss": 2.0204, "step": 21269 }, { "epoch": 0.71, "grad_norm": 0.7552085518836975, "learning_rate": 4.0534834681769045e-06, "loss": 2.097, "step": 21270 }, { "epoch": 0.71, "grad_norm": 0.7341477274894714, "learning_rate": 4.052628953422722e-06, "loss": 2.0481, "step": 21271 }, { "epoch": 0.71, "grad_norm": 0.757088840007782, "learning_rate": 4.051774505858458e-06, "loss": 1.9901, "step": 21272 }, { "epoch": 0.71, "grad_norm": 0.7234058380126953, "learning_rate": 4.050920125493758e-06, "loss": 1.9827, "step": 21273 }, { "epoch": 0.71, "grad_norm": 0.7285860776901245, "learning_rate": 4.050065812338273e-06, "loss": 2.0084, "step": 21274 }, { "epoch": 0.71, "grad_norm": 0.7341120839118958, "learning_rate": 4.049211566401657e-06, "loss": 2.0302, "step": 21275 }, { "epoch": 0.71, "grad_norm": 0.7501924633979797, "learning_rate": 4.048357387693566e-06, "loss": 2.0675, "step": 21276 }, { "epoch": 0.71, "grad_norm": 0.7225576639175415, "learning_rate": 4.047503276223644e-06, "loss": 1.9988, "step": 21277 }, { "epoch": 0.71, "grad_norm": 0.71672523021698, "learning_rate": 4.0466492320015384e-06, "loss": 2.0426, "step": 21278 }, { "epoch": 0.71, "grad_norm": 0.7027603983879089, "learning_rate": 4.045795255036901e-06, "loss": 2.0373, "step": 21279 }, { "epoch": 0.71, "grad_norm": 0.7571180462837219, "learning_rate": 4.044941345339383e-06, "loss": 2.0997, "step": 21280 }, { "epoch": 0.71, "grad_norm": 0.7571749091148376, "learning_rate": 4.0440875029186264e-06, "loss": 2.0474, "step": 21281 }, { "epoch": 0.71, "grad_norm": 0.7222186326980591, "learning_rate": 4.043233727784276e-06, "loss": 1.9903, "step": 21282 }, { "epoch": 0.71, "grad_norm": 0.757353663444519, "learning_rate": 4.042380019945984e-06, "loss": 2.0782, "step": 21283 }, { "epoch": 0.71, "grad_norm": 0.7647561430931091, "learning_rate": 4.041526379413386e-06, "loss": 2.0216, "step": 21284 }, { "epoch": 0.71, "grad_norm": 0.7630417346954346, "learning_rate": 4.040672806196132e-06, "loss": 2.0544, "step": 21285 }, { "epoch": 0.71, "grad_norm": 0.7682597637176514, "learning_rate": 4.039819300303871e-06, "loss": 2.0262, "step": 21286 }, { "epoch": 0.71, "grad_norm": 0.7500863671302795, "learning_rate": 4.038965861746231e-06, "loss": 2.1353, "step": 21287 }, { "epoch": 0.71, "grad_norm": 0.7579953670501709, "learning_rate": 4.038112490532863e-06, "loss": 2.1042, "step": 21288 }, { "epoch": 0.71, "grad_norm": 0.7646166086196899, "learning_rate": 4.0372591866734075e-06, "loss": 2.0496, "step": 21289 }, { "epoch": 0.71, "grad_norm": 0.7469083666801453, "learning_rate": 4.036405950177504e-06, "loss": 2.0748, "step": 21290 }, { "epoch": 0.71, "grad_norm": 0.7607119083404541, "learning_rate": 4.035552781054788e-06, "loss": 2.0531, "step": 21291 }, { "epoch": 0.71, "grad_norm": 0.7517334222793579, "learning_rate": 4.034699679314904e-06, "loss": 2.0797, "step": 21292 }, { "epoch": 0.71, "grad_norm": 0.7112447023391724, "learning_rate": 4.033846644967484e-06, "loss": 1.998, "step": 21293 }, { "epoch": 0.71, "grad_norm": 0.7686113119125366, "learning_rate": 4.032993678022171e-06, "loss": 2.1176, "step": 21294 }, { "epoch": 0.71, "grad_norm": 0.7528063058853149, "learning_rate": 4.032140778488596e-06, "loss": 1.9898, "step": 21295 }, { "epoch": 0.71, "grad_norm": 0.7219647169113159, "learning_rate": 4.0312879463764e-06, "loss": 2.0548, "step": 21296 }, { "epoch": 0.71, "grad_norm": 0.7587942481040955, "learning_rate": 4.03043518169521e-06, "loss": 2.0857, "step": 21297 }, { "epoch": 0.71, "grad_norm": 0.7651197910308838, "learning_rate": 4.029582484454669e-06, "loss": 2.0726, "step": 21298 }, { "epoch": 0.71, "grad_norm": 0.7514709234237671, "learning_rate": 4.028729854664407e-06, "loss": 2.1182, "step": 21299 }, { "epoch": 0.71, "grad_norm": 0.758703351020813, "learning_rate": 4.027877292334051e-06, "loss": 2.0518, "step": 21300 }, { "epoch": 0.71, "grad_norm": 0.7077468037605286, "learning_rate": 4.027024797473239e-06, "loss": 2.0566, "step": 21301 }, { "epoch": 0.71, "grad_norm": 0.7395803928375244, "learning_rate": 4.026172370091602e-06, "loss": 2.0151, "step": 21302 }, { "epoch": 0.71, "grad_norm": 0.7239998579025269, "learning_rate": 4.02532001019877e-06, "loss": 2.0472, "step": 21303 }, { "epoch": 0.71, "grad_norm": 0.7565023899078369, "learning_rate": 4.024467717804367e-06, "loss": 2.0653, "step": 21304 }, { "epoch": 0.71, "grad_norm": 0.7333878874778748, "learning_rate": 4.0236154929180285e-06, "loss": 2.1081, "step": 21305 }, { "epoch": 0.71, "grad_norm": 0.7882394790649414, "learning_rate": 4.022763335549377e-06, "loss": 1.9773, "step": 21306 }, { "epoch": 0.71, "grad_norm": 0.7473047375679016, "learning_rate": 4.021911245708041e-06, "loss": 1.991, "step": 21307 }, { "epoch": 0.71, "grad_norm": 0.7568400502204895, "learning_rate": 4.0210592234036564e-06, "loss": 2.0985, "step": 21308 }, { "epoch": 0.71, "grad_norm": 0.7377325892448425, "learning_rate": 4.0202072686458336e-06, "loss": 2.035, "step": 21309 }, { "epoch": 0.71, "grad_norm": 0.7409537434577942, "learning_rate": 4.019355381444204e-06, "loss": 2.0519, "step": 21310 }, { "epoch": 0.71, "grad_norm": 0.7115760445594788, "learning_rate": 4.018503561808397e-06, "loss": 2.0268, "step": 21311 }, { "epoch": 0.71, "grad_norm": 0.7857978343963623, "learning_rate": 4.01765180974803e-06, "loss": 2.0624, "step": 21312 }, { "epoch": 0.71, "grad_norm": 0.7179853320121765, "learning_rate": 4.016800125272724e-06, "loss": 2.0864, "step": 21313 }, { "epoch": 0.71, "grad_norm": 0.7399454116821289, "learning_rate": 4.015948508392107e-06, "loss": 2.0455, "step": 21314 }, { "epoch": 0.71, "grad_norm": 0.7557646632194519, "learning_rate": 4.015096959115794e-06, "loss": 2.0243, "step": 21315 }, { "epoch": 0.71, "grad_norm": 0.7594959735870361, "learning_rate": 4.014245477453407e-06, "loss": 2.0423, "step": 21316 }, { "epoch": 0.71, "grad_norm": 0.7429527044296265, "learning_rate": 4.013394063414571e-06, "loss": 2.0482, "step": 21317 }, { "epoch": 0.71, "grad_norm": 0.7559183239936829, "learning_rate": 4.012542717008899e-06, "loss": 2.0604, "step": 21318 }, { "epoch": 0.71, "grad_norm": 0.7324590086936951, "learning_rate": 4.0116914382460086e-06, "loss": 2.0687, "step": 21319 }, { "epoch": 0.71, "grad_norm": 0.7298872470855713, "learning_rate": 4.01084022713552e-06, "loss": 2.0176, "step": 21320 }, { "epoch": 0.71, "grad_norm": 0.7775012254714966, "learning_rate": 4.009989083687051e-06, "loss": 2.0576, "step": 21321 }, { "epoch": 0.71, "grad_norm": 0.7438179850578308, "learning_rate": 4.00913800791021e-06, "loss": 2.0232, "step": 21322 }, { "epoch": 0.71, "grad_norm": 0.7394139766693115, "learning_rate": 4.008286999814617e-06, "loss": 2.0316, "step": 21323 }, { "epoch": 0.71, "grad_norm": 0.7422469854354858, "learning_rate": 4.007436059409891e-06, "loss": 2.0237, "step": 21324 }, { "epoch": 0.71, "grad_norm": 0.7643668055534363, "learning_rate": 4.006585186705638e-06, "loss": 2.0695, "step": 21325 }, { "epoch": 0.71, "grad_norm": 0.7612189650535583, "learning_rate": 4.00573438171147e-06, "loss": 2.0943, "step": 21326 }, { "epoch": 0.71, "grad_norm": 0.7669534087181091, "learning_rate": 4.004883644437006e-06, "loss": 2.0904, "step": 21327 }, { "epoch": 0.71, "grad_norm": 0.7432018518447876, "learning_rate": 4.004032974891851e-06, "loss": 2.1258, "step": 21328 }, { "epoch": 0.71, "grad_norm": 0.7501332759857178, "learning_rate": 4.003182373085616e-06, "loss": 2.0563, "step": 21329 }, { "epoch": 0.71, "grad_norm": 0.7527912855148315, "learning_rate": 4.002331839027919e-06, "loss": 2.0166, "step": 21330 }, { "epoch": 0.71, "grad_norm": 0.7284863591194153, "learning_rate": 4.0014813727283555e-06, "loss": 2.0797, "step": 21331 }, { "epoch": 0.71, "grad_norm": 0.7435654997825623, "learning_rate": 4.000630974196539e-06, "loss": 2.0377, "step": 21332 }, { "epoch": 0.71, "grad_norm": 0.7310600280761719, "learning_rate": 3.999780643442081e-06, "loss": 2.0069, "step": 21333 }, { "epoch": 0.71, "grad_norm": 0.7430515289306641, "learning_rate": 3.998930380474587e-06, "loss": 2.1448, "step": 21334 }, { "epoch": 0.71, "grad_norm": 0.7232074737548828, "learning_rate": 3.998080185303656e-06, "loss": 2.063, "step": 21335 }, { "epoch": 0.71, "grad_norm": 0.7502022385597229, "learning_rate": 3.9972300579389e-06, "loss": 1.9953, "step": 21336 }, { "epoch": 0.71, "grad_norm": 0.7370855212211609, "learning_rate": 3.996379998389919e-06, "loss": 2.0406, "step": 21337 }, { "epoch": 0.71, "grad_norm": 0.7352990508079529, "learning_rate": 3.9955300066663175e-06, "loss": 2.0829, "step": 21338 }, { "epoch": 0.71, "grad_norm": 0.7266630530357361, "learning_rate": 3.994680082777702e-06, "loss": 2.0996, "step": 21339 }, { "epoch": 0.71, "grad_norm": 0.7441461682319641, "learning_rate": 3.993830226733673e-06, "loss": 2.0362, "step": 21340 }, { "epoch": 0.71, "grad_norm": 0.794948399066925, "learning_rate": 3.992980438543824e-06, "loss": 2.108, "step": 21341 }, { "epoch": 0.71, "grad_norm": 0.7389959096908569, "learning_rate": 3.992130718217767e-06, "loss": 2.1068, "step": 21342 }, { "epoch": 0.71, "grad_norm": 0.7634884119033813, "learning_rate": 3.991281065765096e-06, "loss": 2.0365, "step": 21343 }, { "epoch": 0.71, "grad_norm": 0.7259693145751953, "learning_rate": 3.990431481195407e-06, "loss": 2.0282, "step": 21344 }, { "epoch": 0.71, "grad_norm": 0.7387796640396118, "learning_rate": 3.9895819645182996e-06, "loss": 2.0115, "step": 21345 }, { "epoch": 0.71, "grad_norm": 0.7690161466598511, "learning_rate": 3.988732515743377e-06, "loss": 2.1236, "step": 21346 }, { "epoch": 0.71, "grad_norm": 0.7430232763290405, "learning_rate": 3.987883134880233e-06, "loss": 2.0377, "step": 21347 }, { "epoch": 0.71, "grad_norm": 0.764133632183075, "learning_rate": 3.9870338219384565e-06, "loss": 2.063, "step": 21348 }, { "epoch": 0.71, "grad_norm": 0.738273024559021, "learning_rate": 3.986184576927652e-06, "loss": 1.9718, "step": 21349 }, { "epoch": 0.71, "grad_norm": 0.7478065490722656, "learning_rate": 3.9853353998574065e-06, "loss": 2.073, "step": 21350 }, { "epoch": 0.71, "grad_norm": 0.7851174473762512, "learning_rate": 3.984486290737316e-06, "loss": 2.0409, "step": 21351 }, { "epoch": 0.71, "grad_norm": 0.7587388753890991, "learning_rate": 3.983637249576983e-06, "loss": 2.0833, "step": 21352 }, { "epoch": 0.71, "grad_norm": 0.77997887134552, "learning_rate": 3.982788276385981e-06, "loss": 2.1537, "step": 21353 }, { "epoch": 0.71, "grad_norm": 0.7260881662368774, "learning_rate": 3.981939371173912e-06, "loss": 1.9942, "step": 21354 }, { "epoch": 0.71, "grad_norm": 0.7690961360931396, "learning_rate": 3.981090533950367e-06, "loss": 2.0405, "step": 21355 }, { "epoch": 0.71, "grad_norm": 0.7225907444953918, "learning_rate": 3.980241764724935e-06, "loss": 2.0736, "step": 21356 }, { "epoch": 0.71, "grad_norm": 0.7503808736801147, "learning_rate": 3.979393063507199e-06, "loss": 1.9669, "step": 21357 }, { "epoch": 0.71, "grad_norm": 0.764228343963623, "learning_rate": 3.978544430306757e-06, "loss": 2.1097, "step": 21358 }, { "epoch": 0.71, "grad_norm": 0.722518265247345, "learning_rate": 3.977695865133186e-06, "loss": 1.985, "step": 21359 }, { "epoch": 0.71, "grad_norm": 0.7803450226783752, "learning_rate": 3.97684736799608e-06, "loss": 2.1004, "step": 21360 }, { "epoch": 0.71, "grad_norm": 0.7315242886543274, "learning_rate": 3.975998938905023e-06, "loss": 2.0165, "step": 21361 }, { "epoch": 0.71, "grad_norm": 0.7466260194778442, "learning_rate": 3.975150577869602e-06, "loss": 2.1572, "step": 21362 }, { "epoch": 0.71, "grad_norm": 0.7696135640144348, "learning_rate": 3.974302284899394e-06, "loss": 2.0188, "step": 21363 }, { "epoch": 0.71, "grad_norm": 0.7399890422821045, "learning_rate": 3.973454060003992e-06, "loss": 2.0231, "step": 21364 }, { "epoch": 0.71, "grad_norm": 0.7707297801971436, "learning_rate": 3.972605903192973e-06, "loss": 2.039, "step": 21365 }, { "epoch": 0.71, "grad_norm": 0.7262179255485535, "learning_rate": 3.971757814475916e-06, "loss": 2.0411, "step": 21366 }, { "epoch": 0.71, "grad_norm": 0.7617310881614685, "learning_rate": 3.970909793862407e-06, "loss": 2.0262, "step": 21367 }, { "epoch": 0.71, "grad_norm": 0.7320592999458313, "learning_rate": 3.970061841362031e-06, "loss": 1.9711, "step": 21368 }, { "epoch": 0.71, "grad_norm": 0.7193421721458435, "learning_rate": 3.969213956984357e-06, "loss": 2.0601, "step": 21369 }, { "epoch": 0.71, "grad_norm": 0.7337999939918518, "learning_rate": 3.968366140738973e-06, "loss": 2.0577, "step": 21370 }, { "epoch": 0.71, "grad_norm": 0.7412766218185425, "learning_rate": 3.967518392635455e-06, "loss": 2.1195, "step": 21371 }, { "epoch": 0.71, "grad_norm": 0.7772192358970642, "learning_rate": 3.966670712683373e-06, "loss": 2.0744, "step": 21372 }, { "epoch": 0.71, "grad_norm": 0.7592119574546814, "learning_rate": 3.965823100892311e-06, "loss": 2.0563, "step": 21373 }, { "epoch": 0.71, "grad_norm": 0.7117629647254944, "learning_rate": 3.96497555727185e-06, "loss": 2.0334, "step": 21374 }, { "epoch": 0.71, "grad_norm": 0.7413805723190308, "learning_rate": 3.96412808183155e-06, "loss": 2.0347, "step": 21375 }, { "epoch": 0.71, "grad_norm": 0.7522855997085571, "learning_rate": 3.963280674580995e-06, "loss": 2.0302, "step": 21376 }, { "epoch": 0.71, "grad_norm": 0.7498225569725037, "learning_rate": 3.96243333552976e-06, "loss": 2.0803, "step": 21377 }, { "epoch": 0.71, "grad_norm": 0.7160243988037109, "learning_rate": 3.961586064687415e-06, "loss": 2.0033, "step": 21378 }, { "epoch": 0.71, "grad_norm": 0.7567553520202637, "learning_rate": 3.960738862063528e-06, "loss": 2.0542, "step": 21379 }, { "epoch": 0.71, "grad_norm": 0.7421330809593201, "learning_rate": 3.959891727667674e-06, "loss": 1.995, "step": 21380 }, { "epoch": 0.71, "grad_norm": 0.7460967898368835, "learning_rate": 3.959044661509428e-06, "loss": 2.0632, "step": 21381 }, { "epoch": 0.71, "grad_norm": 0.7928849458694458, "learning_rate": 3.95819766359835e-06, "loss": 2.0483, "step": 21382 }, { "epoch": 0.71, "grad_norm": 0.7430436611175537, "learning_rate": 3.9573507339440186e-06, "loss": 2.052, "step": 21383 }, { "epoch": 0.71, "grad_norm": 0.7506824135780334, "learning_rate": 3.9565038725559965e-06, "loss": 1.9686, "step": 21384 }, { "epoch": 0.71, "grad_norm": 0.7379739880561829, "learning_rate": 3.955657079443849e-06, "loss": 2.0193, "step": 21385 }, { "epoch": 0.71, "grad_norm": 0.73024982213974, "learning_rate": 3.954810354617145e-06, "loss": 2.0286, "step": 21386 }, { "epoch": 0.71, "grad_norm": 0.7755810022354126, "learning_rate": 3.953963698085458e-06, "loss": 1.9977, "step": 21387 }, { "epoch": 0.71, "grad_norm": 0.7560776472091675, "learning_rate": 3.953117109858339e-06, "loss": 2.079, "step": 21388 }, { "epoch": 0.71, "grad_norm": 0.7478843927383423, "learning_rate": 3.952270589945358e-06, "loss": 2.0169, "step": 21389 }, { "epoch": 0.71, "grad_norm": 0.7652735114097595, "learning_rate": 3.951424138356083e-06, "loss": 2.0577, "step": 21390 }, { "epoch": 0.71, "grad_norm": 0.7692378759384155, "learning_rate": 3.950577755100072e-06, "loss": 2.0864, "step": 21391 }, { "epoch": 0.71, "grad_norm": 0.7544057965278625, "learning_rate": 3.94973144018689e-06, "loss": 2.046, "step": 21392 }, { "epoch": 0.71, "grad_norm": 0.7314255237579346, "learning_rate": 3.948885193626097e-06, "loss": 2.0764, "step": 21393 }, { "epoch": 0.71, "grad_norm": 0.7477734684944153, "learning_rate": 3.948039015427248e-06, "loss": 2.0307, "step": 21394 }, { "epoch": 0.71, "grad_norm": 0.7233955264091492, "learning_rate": 3.9471929055999095e-06, "loss": 2.004, "step": 21395 }, { "epoch": 0.71, "grad_norm": 0.7487828135490417, "learning_rate": 3.94634686415364e-06, "loss": 1.9984, "step": 21396 }, { "epoch": 0.71, "grad_norm": 0.7148840427398682, "learning_rate": 3.945500891097996e-06, "loss": 1.9625, "step": 21397 }, { "epoch": 0.71, "grad_norm": 0.7414119243621826, "learning_rate": 3.944654986442532e-06, "loss": 2.0716, "step": 21398 }, { "epoch": 0.71, "grad_norm": 0.7241600155830383, "learning_rate": 3.9438091501968104e-06, "loss": 2.0401, "step": 21399 }, { "epoch": 0.71, "grad_norm": 0.7317625284194946, "learning_rate": 3.942963382370381e-06, "loss": 2.1135, "step": 21400 }, { "epoch": 0.71, "grad_norm": 0.7348754405975342, "learning_rate": 3.942117682972803e-06, "loss": 1.9299, "step": 21401 }, { "epoch": 0.71, "grad_norm": 0.7352507710456848, "learning_rate": 3.941272052013627e-06, "loss": 2.077, "step": 21402 }, { "epoch": 0.71, "grad_norm": 0.7569603323936462, "learning_rate": 3.940426489502413e-06, "loss": 2.0066, "step": 21403 }, { "epoch": 0.71, "grad_norm": 0.749212384223938, "learning_rate": 3.939580995448704e-06, "loss": 2.1216, "step": 21404 }, { "epoch": 0.71, "grad_norm": 0.7462937831878662, "learning_rate": 3.938735569862061e-06, "loss": 2.0365, "step": 21405 }, { "epoch": 0.71, "grad_norm": 0.7680366635322571, "learning_rate": 3.937890212752033e-06, "loss": 1.9848, "step": 21406 }, { "epoch": 0.71, "grad_norm": 0.7387123107910156, "learning_rate": 3.9370449241281625e-06, "loss": 2.049, "step": 21407 }, { "epoch": 0.71, "grad_norm": 0.7487072944641113, "learning_rate": 3.936199704000006e-06, "loss": 2.0654, "step": 21408 }, { "epoch": 0.71, "grad_norm": 0.7406418919563293, "learning_rate": 3.935354552377119e-06, "loss": 2.038, "step": 21409 }, { "epoch": 0.71, "grad_norm": 0.7581602334976196, "learning_rate": 3.934509469269035e-06, "loss": 2.1079, "step": 21410 }, { "epoch": 0.71, "grad_norm": 0.7259083390235901, "learning_rate": 3.933664454685308e-06, "loss": 2.0271, "step": 21411 }, { "epoch": 0.71, "grad_norm": 0.736352264881134, "learning_rate": 3.932819508635489e-06, "loss": 2.0548, "step": 21412 }, { "epoch": 0.71, "grad_norm": 0.7491690516471863, "learning_rate": 3.931974631129116e-06, "loss": 2.1485, "step": 21413 }, { "epoch": 0.71, "grad_norm": 0.72933429479599, "learning_rate": 3.931129822175741e-06, "loss": 2.0654, "step": 21414 }, { "epoch": 0.71, "grad_norm": 0.7801526784896851, "learning_rate": 3.930285081784904e-06, "loss": 2.0771, "step": 21415 }, { "epoch": 0.71, "grad_norm": 0.7803142070770264, "learning_rate": 3.929440409966146e-06, "loss": 1.963, "step": 21416 }, { "epoch": 0.71, "grad_norm": 0.7689708471298218, "learning_rate": 3.928595806729011e-06, "loss": 2.0732, "step": 21417 }, { "epoch": 0.71, "grad_norm": 0.7630940675735474, "learning_rate": 3.927751272083047e-06, "loss": 2.0239, "step": 21418 }, { "epoch": 0.71, "grad_norm": 0.7382839918136597, "learning_rate": 3.92690680603779e-06, "loss": 2.0361, "step": 21419 }, { "epoch": 0.71, "grad_norm": 0.7478662133216858, "learning_rate": 3.926062408602778e-06, "loss": 1.9768, "step": 21420 }, { "epoch": 0.71, "grad_norm": 0.7571648955345154, "learning_rate": 3.925218079787556e-06, "loss": 2.0739, "step": 21421 }, { "epoch": 0.71, "grad_norm": 0.7343557476997375, "learning_rate": 3.924373819601657e-06, "loss": 2.0649, "step": 21422 }, { "epoch": 0.71, "grad_norm": 0.6995106339454651, "learning_rate": 3.923529628054625e-06, "loss": 2.0624, "step": 21423 }, { "epoch": 0.71, "grad_norm": 0.7152701020240784, "learning_rate": 3.922685505155991e-06, "loss": 2.0368, "step": 21424 }, { "epoch": 0.71, "grad_norm": 0.7540472745895386, "learning_rate": 3.921841450915298e-06, "loss": 2.0329, "step": 21425 }, { "epoch": 0.71, "grad_norm": 0.7402940988540649, "learning_rate": 3.920997465342075e-06, "loss": 2.0738, "step": 21426 }, { "epoch": 0.71, "grad_norm": 0.7235376834869385, "learning_rate": 3.920153548445862e-06, "loss": 2.0378, "step": 21427 }, { "epoch": 0.71, "grad_norm": 0.7677651047706604, "learning_rate": 3.9193097002361925e-06, "loss": 2.0632, "step": 21428 }, { "epoch": 0.71, "grad_norm": 0.7298663258552551, "learning_rate": 3.9184659207225935e-06, "loss": 1.9868, "step": 21429 }, { "epoch": 0.71, "grad_norm": 0.7538634538650513, "learning_rate": 3.917622209914604e-06, "loss": 2.1251, "step": 21430 }, { "epoch": 0.71, "grad_norm": 0.7540154457092285, "learning_rate": 3.916778567821756e-06, "loss": 2.0574, "step": 21431 }, { "epoch": 0.71, "grad_norm": 0.7502318024635315, "learning_rate": 3.915934994453581e-06, "loss": 2.0824, "step": 21432 }, { "epoch": 0.71, "grad_norm": 0.7425729036331177, "learning_rate": 3.915091489819601e-06, "loss": 2.035, "step": 21433 }, { "epoch": 0.71, "grad_norm": 0.7414492964744568, "learning_rate": 3.9142480539293555e-06, "loss": 2.1069, "step": 21434 }, { "epoch": 0.71, "grad_norm": 0.7266777753829956, "learning_rate": 3.913404686792366e-06, "loss": 2.0128, "step": 21435 }, { "epoch": 0.71, "grad_norm": 0.7208375334739685, "learning_rate": 3.9125613884181655e-06, "loss": 1.9742, "step": 21436 }, { "epoch": 0.71, "grad_norm": 0.7459979057312012, "learning_rate": 3.91171815881628e-06, "loss": 2.0638, "step": 21437 }, { "epoch": 0.71, "grad_norm": 0.7630561590194702, "learning_rate": 3.910874997996231e-06, "loss": 2.0663, "step": 21438 }, { "epoch": 0.71, "grad_norm": 0.7476745843887329, "learning_rate": 3.910031905967547e-06, "loss": 2.1321, "step": 21439 }, { "epoch": 0.71, "grad_norm": 0.7570052146911621, "learning_rate": 3.909188882739757e-06, "loss": 2.0722, "step": 21440 }, { "epoch": 0.71, "grad_norm": 0.7259035706520081, "learning_rate": 3.9083459283223825e-06, "loss": 1.9792, "step": 21441 }, { "epoch": 0.71, "grad_norm": 0.7302448749542236, "learning_rate": 3.907503042724942e-06, "loss": 2.0439, "step": 21442 }, { "epoch": 0.71, "grad_norm": 0.7748907208442688, "learning_rate": 3.9066602259569645e-06, "loss": 2.0541, "step": 21443 }, { "epoch": 0.71, "grad_norm": 0.7658467888832092, "learning_rate": 3.905817478027965e-06, "loss": 2.0543, "step": 21444 }, { "epoch": 0.71, "grad_norm": 0.761789858341217, "learning_rate": 3.904974798947472e-06, "loss": 1.9304, "step": 21445 }, { "epoch": 0.71, "grad_norm": 0.7721625566482544, "learning_rate": 3.904132188724997e-06, "loss": 2.0236, "step": 21446 }, { "epoch": 0.71, "grad_norm": 0.7606375217437744, "learning_rate": 3.9032896473700685e-06, "loss": 2.0615, "step": 21447 }, { "epoch": 0.71, "grad_norm": 0.750980794429779, "learning_rate": 3.902447174892198e-06, "loss": 2.0628, "step": 21448 }, { "epoch": 0.71, "grad_norm": 0.7425456643104553, "learning_rate": 3.901604771300907e-06, "loss": 2.0527, "step": 21449 }, { "epoch": 0.71, "grad_norm": 0.7835389971733093, "learning_rate": 3.900762436605714e-06, "loss": 2.0646, "step": 21450 }, { "epoch": 0.71, "grad_norm": 0.7637593150138855, "learning_rate": 3.899920170816127e-06, "loss": 2.1123, "step": 21451 }, { "epoch": 0.71, "grad_norm": 0.7873336672782898, "learning_rate": 3.899077973941667e-06, "loss": 2.0898, "step": 21452 }, { "epoch": 0.71, "grad_norm": 0.7414779663085938, "learning_rate": 3.898235845991853e-06, "loss": 2.0251, "step": 21453 }, { "epoch": 0.71, "grad_norm": 0.7563363909721375, "learning_rate": 3.897393786976195e-06, "loss": 2.0321, "step": 21454 }, { "epoch": 0.71, "grad_norm": 0.7331929802894592, "learning_rate": 3.896551796904201e-06, "loss": 2.0463, "step": 21455 }, { "epoch": 0.71, "grad_norm": 0.7873733639717102, "learning_rate": 3.89570987578539e-06, "loss": 2.0163, "step": 21456 }, { "epoch": 0.71, "grad_norm": 0.7550294399261475, "learning_rate": 3.8948680236292704e-06, "loss": 1.9914, "step": 21457 }, { "epoch": 0.71, "grad_norm": 0.7415096759796143, "learning_rate": 3.894026240445357e-06, "loss": 2.0486, "step": 21458 }, { "epoch": 0.71, "grad_norm": 0.733745813369751, "learning_rate": 3.893184526243155e-06, "loss": 2.0219, "step": 21459 }, { "epoch": 0.71, "grad_norm": 0.7427349090576172, "learning_rate": 3.892342881032173e-06, "loss": 2.0665, "step": 21460 }, { "epoch": 0.71, "grad_norm": 0.7349501848220825, "learning_rate": 3.8915013048219205e-06, "loss": 2.0718, "step": 21461 }, { "epoch": 0.71, "grad_norm": 0.7173113226890564, "learning_rate": 3.8906597976219115e-06, "loss": 2.0557, "step": 21462 }, { "epoch": 0.71, "grad_norm": 0.7213647961616516, "learning_rate": 3.889818359441647e-06, "loss": 2.0597, "step": 21463 }, { "epoch": 0.71, "grad_norm": 0.7322161197662354, "learning_rate": 3.888976990290629e-06, "loss": 2.0755, "step": 21464 }, { "epoch": 0.71, "grad_norm": 0.7613168358802795, "learning_rate": 3.888135690178373e-06, "loss": 2.0138, "step": 21465 }, { "epoch": 0.71, "grad_norm": 0.7436955571174622, "learning_rate": 3.8872944591143735e-06, "loss": 2.1143, "step": 21466 }, { "epoch": 0.71, "grad_norm": 0.7717354893684387, "learning_rate": 3.886453297108143e-06, "loss": 2.076, "step": 21467 }, { "epoch": 0.71, "grad_norm": 0.7147963643074036, "learning_rate": 3.8856122041691765e-06, "loss": 2.0178, "step": 21468 }, { "epoch": 0.71, "grad_norm": 0.7277140617370605, "learning_rate": 3.884771180306983e-06, "loss": 1.9674, "step": 21469 }, { "epoch": 0.71, "grad_norm": 0.7208665609359741, "learning_rate": 3.8839302255310575e-06, "loss": 1.9855, "step": 21470 }, { "epoch": 0.71, "grad_norm": 0.740422785282135, "learning_rate": 3.883089339850907e-06, "loss": 2.0048, "step": 21471 }, { "epoch": 0.71, "grad_norm": 0.7569411993026733, "learning_rate": 3.88224852327603e-06, "loss": 2.043, "step": 21472 }, { "epoch": 0.71, "grad_norm": 0.7522729635238647, "learning_rate": 3.881407775815919e-06, "loss": 2.0141, "step": 21473 }, { "epoch": 0.71, "grad_norm": 0.7667873501777649, "learning_rate": 3.880567097480077e-06, "loss": 2.0684, "step": 21474 }, { "epoch": 0.71, "grad_norm": 0.7424026727676392, "learning_rate": 3.879726488278005e-06, "loss": 2.0381, "step": 21475 }, { "epoch": 0.71, "grad_norm": 0.7515433430671692, "learning_rate": 3.878885948219197e-06, "loss": 2.0956, "step": 21476 }, { "epoch": 0.71, "grad_norm": 0.7306869626045227, "learning_rate": 3.878045477313145e-06, "loss": 1.9961, "step": 21477 }, { "epoch": 0.71, "grad_norm": 0.7354339957237244, "learning_rate": 3.87720507556935e-06, "loss": 2.0522, "step": 21478 }, { "epoch": 0.71, "grad_norm": 0.7475075125694275, "learning_rate": 3.876364742997301e-06, "loss": 2.0572, "step": 21479 }, { "epoch": 0.71, "grad_norm": 0.7628931403160095, "learning_rate": 3.875524479606496e-06, "loss": 2.1192, "step": 21480 }, { "epoch": 0.71, "grad_norm": 0.772562563419342, "learning_rate": 3.874684285406425e-06, "loss": 2.0421, "step": 21481 }, { "epoch": 0.71, "grad_norm": 0.8290002942085266, "learning_rate": 3.873844160406584e-06, "loss": 2.1171, "step": 21482 }, { "epoch": 0.71, "grad_norm": 0.732464611530304, "learning_rate": 3.873004104616457e-06, "loss": 2.0024, "step": 21483 }, { "epoch": 0.71, "grad_norm": 0.7848442792892456, "learning_rate": 3.872164118045543e-06, "loss": 2.0191, "step": 21484 }, { "epoch": 0.71, "grad_norm": 0.7439508438110352, "learning_rate": 3.871324200703327e-06, "loss": 2.0364, "step": 21485 }, { "epoch": 0.71, "grad_norm": 0.7405891418457031, "learning_rate": 3.870484352599295e-06, "loss": 2.0341, "step": 21486 }, { "epoch": 0.71, "grad_norm": 0.7574360370635986, "learning_rate": 3.8696445737429385e-06, "loss": 2.0537, "step": 21487 }, { "epoch": 0.71, "grad_norm": 0.7521209120750427, "learning_rate": 3.868804864143749e-06, "loss": 2.0518, "step": 21488 }, { "epoch": 0.71, "grad_norm": 0.743155300617218, "learning_rate": 3.86796522381121e-06, "loss": 2.1213, "step": 21489 }, { "epoch": 0.71, "grad_norm": 0.737542986869812, "learning_rate": 3.8671256527548005e-06, "loss": 2.0603, "step": 21490 }, { "epoch": 0.72, "grad_norm": 0.7969669103622437, "learning_rate": 3.866286150984016e-06, "loss": 2.1625, "step": 21491 }, { "epoch": 0.72, "grad_norm": 0.7197364568710327, "learning_rate": 3.865446718508331e-06, "loss": 2.049, "step": 21492 }, { "epoch": 0.72, "grad_norm": 0.777123749256134, "learning_rate": 3.8646073553372385e-06, "loss": 2.0096, "step": 21493 }, { "epoch": 0.72, "grad_norm": 0.7500991821289062, "learning_rate": 3.863768061480216e-06, "loss": 2.032, "step": 21494 }, { "epoch": 0.72, "grad_norm": 0.751045823097229, "learning_rate": 3.862928836946742e-06, "loss": 2.0831, "step": 21495 }, { "epoch": 0.72, "grad_norm": 0.7382387518882751, "learning_rate": 3.862089681746301e-06, "loss": 2.0403, "step": 21496 }, { "epoch": 0.72, "grad_norm": 0.7508519291877747, "learning_rate": 3.8612505958883786e-06, "loss": 2.0819, "step": 21497 }, { "epoch": 0.72, "grad_norm": 0.7182164192199707, "learning_rate": 3.860411579382448e-06, "loss": 2.0569, "step": 21498 }, { "epoch": 0.72, "grad_norm": 0.7209972143173218, "learning_rate": 3.859572632237987e-06, "loss": 1.9511, "step": 21499 }, { "epoch": 0.72, "grad_norm": 0.7400722503662109, "learning_rate": 3.8587337544644776e-06, "loss": 2.0848, "step": 21500 }, { "epoch": 0.72, "grad_norm": 0.722638726234436, "learning_rate": 3.857894946071393e-06, "loss": 2.026, "step": 21501 }, { "epoch": 0.72, "grad_norm": 0.7230244874954224, "learning_rate": 3.857056207068215e-06, "loss": 2.0225, "step": 21502 }, { "epoch": 0.72, "grad_norm": 0.748950719833374, "learning_rate": 3.856217537464412e-06, "loss": 2.1133, "step": 21503 }, { "epoch": 0.72, "grad_norm": 0.7379436492919922, "learning_rate": 3.855378937269465e-06, "loss": 2.1023, "step": 21504 }, { "epoch": 0.72, "grad_norm": 0.7290952801704407, "learning_rate": 3.854540406492844e-06, "loss": 2.0199, "step": 21505 }, { "epoch": 0.72, "grad_norm": 0.7335649728775024, "learning_rate": 3.853701945144026e-06, "loss": 2.0268, "step": 21506 }, { "epoch": 0.72, "grad_norm": 0.7217668890953064, "learning_rate": 3.852863553232482e-06, "loss": 2.0092, "step": 21507 }, { "epoch": 0.72, "grad_norm": 0.7387538552284241, "learning_rate": 3.8520252307676795e-06, "loss": 2.0459, "step": 21508 }, { "epoch": 0.72, "grad_norm": 0.7419674396514893, "learning_rate": 3.8511869777590925e-06, "loss": 2.1174, "step": 21509 }, { "epoch": 0.72, "grad_norm": 0.7644091844558716, "learning_rate": 3.850348794216196e-06, "loss": 2.0466, "step": 21510 }, { "epoch": 0.72, "grad_norm": 0.7405504584312439, "learning_rate": 3.8495106801484535e-06, "loss": 1.9902, "step": 21511 }, { "epoch": 0.72, "grad_norm": 0.7505638003349304, "learning_rate": 3.848672635565333e-06, "loss": 2.08, "step": 21512 }, { "epoch": 0.72, "grad_norm": 0.7229627370834351, "learning_rate": 3.847834660476306e-06, "loss": 2.0612, "step": 21513 }, { "epoch": 0.72, "grad_norm": 0.727674663066864, "learning_rate": 3.8469967548908335e-06, "loss": 2.0653, "step": 21514 }, { "epoch": 0.72, "grad_norm": 0.7076156139373779, "learning_rate": 3.846158918818387e-06, "loss": 2.1248, "step": 21515 }, { "epoch": 0.72, "grad_norm": 0.7592533230781555, "learning_rate": 3.845321152268437e-06, "loss": 1.9897, "step": 21516 }, { "epoch": 0.72, "grad_norm": 0.7777933478355408, "learning_rate": 3.8444834552504355e-06, "loss": 2.0548, "step": 21517 }, { "epoch": 0.72, "grad_norm": 0.7544406056404114, "learning_rate": 3.843645827773851e-06, "loss": 2.027, "step": 21518 }, { "epoch": 0.72, "grad_norm": 0.7289154529571533, "learning_rate": 3.842808269848153e-06, "loss": 2.0405, "step": 21519 }, { "epoch": 0.72, "grad_norm": 0.7296667695045471, "learning_rate": 3.8419707814827965e-06, "loss": 2.011, "step": 21520 }, { "epoch": 0.72, "grad_norm": 0.7448946237564087, "learning_rate": 3.841133362687244e-06, "loss": 2.0964, "step": 21521 }, { "epoch": 0.72, "grad_norm": 0.8013725280761719, "learning_rate": 3.84029601347096e-06, "loss": 2.0698, "step": 21522 }, { "epoch": 0.72, "grad_norm": 0.7270270586013794, "learning_rate": 3.839458733843398e-06, "loss": 1.9686, "step": 21523 }, { "epoch": 0.72, "grad_norm": 0.7436785697937012, "learning_rate": 3.8386215238140235e-06, "loss": 1.9855, "step": 21524 }, { "epoch": 0.72, "grad_norm": 0.7553300261497498, "learning_rate": 3.837784383392289e-06, "loss": 2.0065, "step": 21525 }, { "epoch": 0.72, "grad_norm": 0.7277274131774902, "learning_rate": 3.83694731258766e-06, "loss": 2.0231, "step": 21526 }, { "epoch": 0.72, "grad_norm": 0.7313604950904846, "learning_rate": 3.836110311409583e-06, "loss": 2.0776, "step": 21527 }, { "epoch": 0.72, "grad_norm": 0.7905172109603882, "learning_rate": 3.835273379867525e-06, "loss": 2.089, "step": 21528 }, { "epoch": 0.72, "grad_norm": 0.7373872399330139, "learning_rate": 3.834436517970933e-06, "loss": 2.0455, "step": 21529 }, { "epoch": 0.72, "grad_norm": 0.7588306069374084, "learning_rate": 3.833599725729261e-06, "loss": 2.0692, "step": 21530 }, { "epoch": 0.72, "grad_norm": 0.7282304167747498, "learning_rate": 3.832763003151967e-06, "loss": 2.0105, "step": 21531 }, { "epoch": 0.72, "grad_norm": 0.7318737506866455, "learning_rate": 3.831926350248504e-06, "loss": 2.0757, "step": 21532 }, { "epoch": 0.72, "grad_norm": 0.7589004635810852, "learning_rate": 3.831089767028323e-06, "loss": 2.108, "step": 21533 }, { "epoch": 0.72, "grad_norm": 0.7397930026054382, "learning_rate": 3.830253253500871e-06, "loss": 2.0537, "step": 21534 }, { "epoch": 0.72, "grad_norm": 0.7481244802474976, "learning_rate": 3.829416809675606e-06, "loss": 2.0636, "step": 21535 }, { "epoch": 0.72, "grad_norm": 0.7941077947616577, "learning_rate": 3.828580435561969e-06, "loss": 2.1034, "step": 21536 }, { "epoch": 0.72, "grad_norm": 0.7500936388969421, "learning_rate": 3.827744131169413e-06, "loss": 2.0739, "step": 21537 }, { "epoch": 0.72, "grad_norm": 0.7513919472694397, "learning_rate": 3.826907896507394e-06, "loss": 2.0437, "step": 21538 }, { "epoch": 0.72, "grad_norm": 0.7727073431015015, "learning_rate": 3.826071731585346e-06, "loss": 2.0676, "step": 21539 }, { "epoch": 0.72, "grad_norm": 0.7821904420852661, "learning_rate": 3.82523563641272e-06, "loss": 2.0193, "step": 21540 }, { "epoch": 0.72, "grad_norm": 0.7399191856384277, "learning_rate": 3.824399610998966e-06, "loss": 2.0892, "step": 21541 }, { "epoch": 0.72, "grad_norm": 0.7797570824623108, "learning_rate": 3.823563655353528e-06, "loss": 2.0599, "step": 21542 }, { "epoch": 0.72, "grad_norm": 0.7480449080467224, "learning_rate": 3.822727769485843e-06, "loss": 1.9836, "step": 21543 }, { "epoch": 0.72, "grad_norm": 0.7507665753364563, "learning_rate": 3.821891953405363e-06, "loss": 2.0347, "step": 21544 }, { "epoch": 0.72, "grad_norm": 0.7362467646598816, "learning_rate": 3.8210562071215244e-06, "loss": 2.0395, "step": 21545 }, { "epoch": 0.72, "grad_norm": 0.740428626537323, "learning_rate": 3.820220530643771e-06, "loss": 2.0462, "step": 21546 }, { "epoch": 0.72, "grad_norm": 0.7387788891792297, "learning_rate": 3.8193849239815476e-06, "loss": 2.0533, "step": 21547 }, { "epoch": 0.72, "grad_norm": 0.7580827474594116, "learning_rate": 3.818549387144292e-06, "loss": 1.9864, "step": 21548 }, { "epoch": 0.72, "grad_norm": 0.7236201167106628, "learning_rate": 3.817713920141438e-06, "loss": 2.0719, "step": 21549 }, { "epoch": 0.72, "grad_norm": 0.7252009510993958, "learning_rate": 3.816878522982433e-06, "loss": 2.0532, "step": 21550 }, { "epoch": 0.72, "grad_norm": 0.7367619276046753, "learning_rate": 3.81604319567671e-06, "loss": 2.1164, "step": 21551 }, { "epoch": 0.72, "grad_norm": 0.7204755544662476, "learning_rate": 3.815207938233705e-06, "loss": 2.0013, "step": 21552 }, { "epoch": 0.72, "grad_norm": 0.7876721620559692, "learning_rate": 3.8143727506628557e-06, "loss": 2.0542, "step": 21553 }, { "epoch": 0.72, "grad_norm": 0.731182873249054, "learning_rate": 3.8135376329736006e-06, "loss": 2.0512, "step": 21554 }, { "epoch": 0.72, "grad_norm": 0.734283983707428, "learning_rate": 3.8127025851753717e-06, "loss": 2.043, "step": 21555 }, { "epoch": 0.72, "grad_norm": 0.7458754181861877, "learning_rate": 3.8118676072775996e-06, "loss": 2.0813, "step": 21556 }, { "epoch": 0.72, "grad_norm": 0.7408250570297241, "learning_rate": 3.8110326992897252e-06, "loss": 2.1003, "step": 21557 }, { "epoch": 0.72, "grad_norm": 0.7290232181549072, "learning_rate": 3.810197861221171e-06, "loss": 2.0626, "step": 21558 }, { "epoch": 0.72, "grad_norm": 0.7285833954811096, "learning_rate": 3.809363093081375e-06, "loss": 2.0462, "step": 21559 }, { "epoch": 0.72, "grad_norm": 0.7419911026954651, "learning_rate": 3.8085283948797737e-06, "loss": 2.0444, "step": 21560 }, { "epoch": 0.72, "grad_norm": 0.7306240200996399, "learning_rate": 3.807693766625782e-06, "loss": 1.984, "step": 21561 }, { "epoch": 0.72, "grad_norm": 0.7614285349845886, "learning_rate": 3.806859208328838e-06, "loss": 2.0794, "step": 21562 }, { "epoch": 0.72, "grad_norm": 0.756132960319519, "learning_rate": 3.8060247199983724e-06, "loss": 2.0412, "step": 21563 }, { "epoch": 0.72, "grad_norm": 0.736219048500061, "learning_rate": 3.8051903016438097e-06, "loss": 2.0665, "step": 21564 }, { "epoch": 0.72, "grad_norm": 0.7423990964889526, "learning_rate": 3.8043559532745722e-06, "loss": 2.0392, "step": 21565 }, { "epoch": 0.72, "grad_norm": 0.7708452939987183, "learning_rate": 3.8035216749000946e-06, "loss": 2.0705, "step": 21566 }, { "epoch": 0.72, "grad_norm": 0.7682214975357056, "learning_rate": 3.8026874665297942e-06, "loss": 2.0665, "step": 21567 }, { "epoch": 0.72, "grad_norm": 0.717788815498352, "learning_rate": 3.801853328173097e-06, "loss": 2.0148, "step": 21568 }, { "epoch": 0.72, "grad_norm": 0.7442294359207153, "learning_rate": 3.8010192598394336e-06, "loss": 2.0644, "step": 21569 }, { "epoch": 0.72, "grad_norm": 0.7120874524116516, "learning_rate": 3.800185261538222e-06, "loss": 2.0158, "step": 21570 }, { "epoch": 0.72, "grad_norm": 0.7640377879142761, "learning_rate": 3.7993513332788788e-06, "loss": 2.0566, "step": 21571 }, { "epoch": 0.72, "grad_norm": 0.7456615567207336, "learning_rate": 3.798517475070832e-06, "loss": 1.9765, "step": 21572 }, { "epoch": 0.72, "grad_norm": 0.751395583152771, "learning_rate": 3.797683686923507e-06, "loss": 2.0656, "step": 21573 }, { "epoch": 0.72, "grad_norm": 0.7519612908363342, "learning_rate": 3.796849968846309e-06, "loss": 2.078, "step": 21574 }, { "epoch": 0.72, "grad_norm": 0.7474293112754822, "learning_rate": 3.7960163208486644e-06, "loss": 2.078, "step": 21575 }, { "epoch": 0.72, "grad_norm": 0.7299622893333435, "learning_rate": 3.7951827429399956e-06, "loss": 2.1233, "step": 21576 }, { "epoch": 0.72, "grad_norm": 0.7540601491928101, "learning_rate": 3.794349235129712e-06, "loss": 2.0729, "step": 21577 }, { "epoch": 0.72, "grad_norm": 0.7533949017524719, "learning_rate": 3.7935157974272373e-06, "loss": 2.0649, "step": 21578 }, { "epoch": 0.72, "grad_norm": 0.7274425625801086, "learning_rate": 3.7926824298419853e-06, "loss": 2.0529, "step": 21579 }, { "epoch": 0.72, "grad_norm": 0.7268219590187073, "learning_rate": 3.791849132383364e-06, "loss": 2.0991, "step": 21580 }, { "epoch": 0.72, "grad_norm": 0.7536106705665588, "learning_rate": 3.791015905060793e-06, "loss": 2.0744, "step": 21581 }, { "epoch": 0.72, "grad_norm": 0.7600683569908142, "learning_rate": 3.7901827478836895e-06, "loss": 2.0488, "step": 21582 }, { "epoch": 0.72, "grad_norm": 0.7468461990356445, "learning_rate": 3.789349660861462e-06, "loss": 2.0704, "step": 21583 }, { "epoch": 0.72, "grad_norm": 0.7356901168823242, "learning_rate": 3.7885166440035195e-06, "loss": 1.9909, "step": 21584 }, { "epoch": 0.72, "grad_norm": 0.728850781917572, "learning_rate": 3.787683697319278e-06, "loss": 2.0298, "step": 21585 }, { "epoch": 0.72, "grad_norm": 0.7194045186042786, "learning_rate": 3.7868508208181453e-06, "loss": 2.102, "step": 21586 }, { "epoch": 0.72, "grad_norm": 0.7471626996994019, "learning_rate": 3.786018014509528e-06, "loss": 2.0547, "step": 21587 }, { "epoch": 0.72, "grad_norm": 0.7512971758842468, "learning_rate": 3.7851852784028374e-06, "loss": 2.0013, "step": 21588 }, { "epoch": 0.72, "grad_norm": 0.7494862079620361, "learning_rate": 3.7843526125074847e-06, "loss": 2.0233, "step": 21589 }, { "epoch": 0.72, "grad_norm": 0.7349495887756348, "learning_rate": 3.783520016832869e-06, "loss": 2.0624, "step": 21590 }, { "epoch": 0.72, "grad_norm": 0.736751139163971, "learning_rate": 3.782687491388406e-06, "loss": 2.0592, "step": 21591 }, { "epoch": 0.72, "grad_norm": 0.7692188620567322, "learning_rate": 3.781855036183495e-06, "loss": 2.0273, "step": 21592 }, { "epoch": 0.72, "grad_norm": 0.7243779301643372, "learning_rate": 3.7810226512275385e-06, "loss": 1.9866, "step": 21593 }, { "epoch": 0.72, "grad_norm": 0.7490507364273071, "learning_rate": 3.780190336529943e-06, "loss": 2.0309, "step": 21594 }, { "epoch": 0.72, "grad_norm": 0.7662305235862732, "learning_rate": 3.7793580921001195e-06, "loss": 2.0796, "step": 21595 }, { "epoch": 0.72, "grad_norm": 0.7392173409461975, "learning_rate": 3.7785259179474544e-06, "loss": 2.0492, "step": 21596 }, { "epoch": 0.72, "grad_norm": 0.7431060671806335, "learning_rate": 3.777693814081358e-06, "loss": 2.0363, "step": 21597 }, { "epoch": 0.72, "grad_norm": 0.7477800250053406, "learning_rate": 3.776861780511234e-06, "loss": 2.0598, "step": 21598 }, { "epoch": 0.72, "grad_norm": 0.7250113487243652, "learning_rate": 3.7760298172464747e-06, "loss": 2.0449, "step": 21599 }, { "epoch": 0.72, "grad_norm": 0.7409761548042297, "learning_rate": 3.7751979242964878e-06, "loss": 2.0817, "step": 21600 }, { "epoch": 0.72, "grad_norm": 0.7400169968605042, "learning_rate": 3.7743661016706646e-06, "loss": 2.0246, "step": 21601 }, { "epoch": 0.72, "grad_norm": 0.7603542804718018, "learning_rate": 3.7735343493784026e-06, "loss": 2.1047, "step": 21602 }, { "epoch": 0.72, "grad_norm": 0.7335270643234253, "learning_rate": 3.7727026674291e-06, "loss": 2.0021, "step": 21603 }, { "epoch": 0.72, "grad_norm": 0.7503976225852966, "learning_rate": 3.771871055832156e-06, "loss": 2.0831, "step": 21604 }, { "epoch": 0.72, "grad_norm": 0.7597103118896484, "learning_rate": 3.771039514596964e-06, "loss": 2.0277, "step": 21605 }, { "epoch": 0.72, "grad_norm": 0.7354955077171326, "learning_rate": 3.7702080437329126e-06, "loss": 2.0297, "step": 21606 }, { "epoch": 0.72, "grad_norm": 0.7315601110458374, "learning_rate": 3.769376643249403e-06, "loss": 2.0882, "step": 21607 }, { "epoch": 0.72, "grad_norm": 0.7215171456336975, "learning_rate": 3.7685453131558214e-06, "loss": 2.1018, "step": 21608 }, { "epoch": 0.72, "grad_norm": 0.7600667476654053, "learning_rate": 3.7677140534615665e-06, "loss": 2.0475, "step": 21609 }, { "epoch": 0.72, "grad_norm": 0.7460535764694214, "learning_rate": 3.7668828641760223e-06, "loss": 2.0307, "step": 21610 }, { "epoch": 0.72, "grad_norm": 0.7252292633056641, "learning_rate": 3.7660517453085855e-06, "loss": 1.9611, "step": 21611 }, { "epoch": 0.72, "grad_norm": 0.7567553520202637, "learning_rate": 3.76522069686864e-06, "loss": 2.0753, "step": 21612 }, { "epoch": 0.72, "grad_norm": 0.7616589665412903, "learning_rate": 3.7643897188655797e-06, "loss": 2.0521, "step": 21613 }, { "epoch": 0.72, "grad_norm": 0.7324743866920471, "learning_rate": 3.7635588113087906e-06, "loss": 2.0433, "step": 21614 }, { "epoch": 0.72, "grad_norm": 0.732434093952179, "learning_rate": 3.762727974207655e-06, "loss": 2.0907, "step": 21615 }, { "epoch": 0.72, "grad_norm": 1.3238574266433716, "learning_rate": 3.7618972075715643e-06, "loss": 2.0421, "step": 21616 }, { "epoch": 0.72, "grad_norm": 0.7584875226020813, "learning_rate": 3.761066511409909e-06, "loss": 2.0903, "step": 21617 }, { "epoch": 0.72, "grad_norm": 0.7575371861457825, "learning_rate": 3.760235885732062e-06, "loss": 2.063, "step": 21618 }, { "epoch": 0.72, "grad_norm": 0.7505260705947876, "learning_rate": 3.759405330547412e-06, "loss": 2.0031, "step": 21619 }, { "epoch": 0.72, "grad_norm": 0.7564098834991455, "learning_rate": 3.758574845865347e-06, "loss": 2.0545, "step": 21620 }, { "epoch": 0.72, "grad_norm": 0.7262295484542847, "learning_rate": 3.757744431695243e-06, "loss": 2.0283, "step": 21621 }, { "epoch": 0.72, "grad_norm": 0.774411678314209, "learning_rate": 3.756914088046487e-06, "loss": 2.0562, "step": 21622 }, { "epoch": 0.72, "grad_norm": 0.7485014200210571, "learning_rate": 3.7560838149284564e-06, "loss": 2.0398, "step": 21623 }, { "epoch": 0.72, "grad_norm": 0.7545369863510132, "learning_rate": 3.755253612350528e-06, "loss": 1.9667, "step": 21624 }, { "epoch": 0.72, "grad_norm": 0.7644257545471191, "learning_rate": 3.7544234803220848e-06, "loss": 1.9995, "step": 21625 }, { "epoch": 0.72, "grad_norm": 0.7792132496833801, "learning_rate": 3.753593418852509e-06, "loss": 1.9856, "step": 21626 }, { "epoch": 0.72, "grad_norm": 0.7371617555618286, "learning_rate": 3.752763427951174e-06, "loss": 2.0818, "step": 21627 }, { "epoch": 0.72, "grad_norm": 0.7424009442329407, "learning_rate": 3.751933507627452e-06, "loss": 2.0714, "step": 21628 }, { "epoch": 0.72, "grad_norm": 0.7716426849365234, "learning_rate": 3.7511036578907267e-06, "loss": 2.0761, "step": 21629 }, { "epoch": 0.72, "grad_norm": 0.7692919969558716, "learning_rate": 3.7502738787503677e-06, "loss": 2.1072, "step": 21630 }, { "epoch": 0.72, "grad_norm": 0.741477906703949, "learning_rate": 3.7494441702157545e-06, "loss": 2.0018, "step": 21631 }, { "epoch": 0.72, "grad_norm": 0.7506579756736755, "learning_rate": 3.7486145322962555e-06, "loss": 2.0598, "step": 21632 }, { "epoch": 0.72, "grad_norm": 0.7790831923484802, "learning_rate": 3.747784965001249e-06, "loss": 2.0586, "step": 21633 }, { "epoch": 0.72, "grad_norm": 0.8017479181289673, "learning_rate": 3.7469554683400987e-06, "loss": 2.0095, "step": 21634 }, { "epoch": 0.72, "grad_norm": 0.7882770299911499, "learning_rate": 3.7461260423221857e-06, "loss": 2.1719, "step": 21635 }, { "epoch": 0.72, "grad_norm": 0.7295850515365601, "learning_rate": 3.745296686956876e-06, "loss": 2.0146, "step": 21636 }, { "epoch": 0.72, "grad_norm": 0.7515215873718262, "learning_rate": 3.7444674022535343e-06, "loss": 2.0802, "step": 21637 }, { "epoch": 0.72, "grad_norm": 0.720660388469696, "learning_rate": 3.7436381882215343e-06, "loss": 2.0381, "step": 21638 }, { "epoch": 0.72, "grad_norm": 0.7594308853149414, "learning_rate": 3.742809044870247e-06, "loss": 2.0652, "step": 21639 }, { "epoch": 0.72, "grad_norm": 0.7497535347938538, "learning_rate": 3.7419799722090356e-06, "loss": 2.0745, "step": 21640 }, { "epoch": 0.72, "grad_norm": 0.7305979132652283, "learning_rate": 3.741150970247264e-06, "loss": 2.0846, "step": 21641 }, { "epoch": 0.72, "grad_norm": 0.7293744087219238, "learning_rate": 3.740322038994304e-06, "loss": 2.011, "step": 21642 }, { "epoch": 0.72, "grad_norm": 0.7398267984390259, "learning_rate": 3.7394931784595132e-06, "loss": 2.0731, "step": 21643 }, { "epoch": 0.72, "grad_norm": 0.7478500008583069, "learning_rate": 3.7386643886522635e-06, "loss": 1.961, "step": 21644 }, { "epoch": 0.72, "grad_norm": 0.7407857179641724, "learning_rate": 3.737835669581913e-06, "loss": 2.0444, "step": 21645 }, { "epoch": 0.72, "grad_norm": 0.7605364322662354, "learning_rate": 3.7370070212578223e-06, "loss": 2.0855, "step": 21646 }, { "epoch": 0.72, "grad_norm": 0.7235673666000366, "learning_rate": 3.7361784436893554e-06, "loss": 1.9357, "step": 21647 }, { "epoch": 0.72, "grad_norm": 0.7532749176025391, "learning_rate": 3.7353499368858782e-06, "loss": 1.9676, "step": 21648 }, { "epoch": 0.72, "grad_norm": 0.7507176399230957, "learning_rate": 3.7345215008567447e-06, "loss": 2.0896, "step": 21649 }, { "epoch": 0.72, "grad_norm": 0.7307401895523071, "learning_rate": 3.7336931356113127e-06, "loss": 2.0523, "step": 21650 }, { "epoch": 0.72, "grad_norm": 0.773980438709259, "learning_rate": 3.7328648411589463e-06, "loss": 2.1132, "step": 21651 }, { "epoch": 0.72, "grad_norm": 0.7169507741928101, "learning_rate": 3.7320366175089962e-06, "loss": 1.9981, "step": 21652 }, { "epoch": 0.72, "grad_norm": 0.7570340633392334, "learning_rate": 3.731208464670827e-06, "loss": 2.1106, "step": 21653 }, { "epoch": 0.72, "grad_norm": 0.7264828681945801, "learning_rate": 3.7303803826537867e-06, "loss": 2.086, "step": 21654 }, { "epoch": 0.72, "grad_norm": 0.7211494445800781, "learning_rate": 3.729552371467239e-06, "loss": 2.0341, "step": 21655 }, { "epoch": 0.72, "grad_norm": 0.7361804842948914, "learning_rate": 3.7287244311205296e-06, "loss": 2.0821, "step": 21656 }, { "epoch": 0.72, "grad_norm": 0.7795487642288208, "learning_rate": 3.727896561623019e-06, "loss": 2.123, "step": 21657 }, { "epoch": 0.72, "grad_norm": 0.7395868897438049, "learning_rate": 3.7270687629840586e-06, "loss": 1.9913, "step": 21658 }, { "epoch": 0.72, "grad_norm": 0.7265045642852783, "learning_rate": 3.726241035212995e-06, "loss": 2.0234, "step": 21659 }, { "epoch": 0.72, "grad_norm": 0.7518748044967651, "learning_rate": 3.7254133783191827e-06, "loss": 2.0581, "step": 21660 }, { "epoch": 0.72, "grad_norm": 0.7395328879356384, "learning_rate": 3.7245857923119775e-06, "loss": 2.0314, "step": 21661 }, { "epoch": 0.72, "grad_norm": 0.7616763114929199, "learning_rate": 3.723758277200723e-06, "loss": 2.0566, "step": 21662 }, { "epoch": 0.72, "grad_norm": 0.7270215749740601, "learning_rate": 3.7229308329947665e-06, "loss": 2.0851, "step": 21663 }, { "epoch": 0.72, "grad_norm": 0.7578681707382202, "learning_rate": 3.7221034597034624e-06, "loss": 2.0584, "step": 21664 }, { "epoch": 0.72, "grad_norm": 0.7401824593544006, "learning_rate": 3.72127615733615e-06, "loss": 2.015, "step": 21665 }, { "epoch": 0.72, "grad_norm": 0.7593291401863098, "learning_rate": 3.720448925902185e-06, "loss": 2.0208, "step": 21666 }, { "epoch": 0.72, "grad_norm": 0.7248796224594116, "learning_rate": 3.719621765410906e-06, "loss": 2.053, "step": 21667 }, { "epoch": 0.72, "grad_norm": 0.7829385995864868, "learning_rate": 3.7187946758716563e-06, "loss": 2.0669, "step": 21668 }, { "epoch": 0.72, "grad_norm": 0.7575364708900452, "learning_rate": 3.7179676572937838e-06, "loss": 2.1045, "step": 21669 }, { "epoch": 0.72, "grad_norm": 0.7770982384681702, "learning_rate": 3.717140709686635e-06, "loss": 2.094, "step": 21670 }, { "epoch": 0.72, "grad_norm": 0.7504127025604248, "learning_rate": 3.7163138330595473e-06, "loss": 2.0694, "step": 21671 }, { "epoch": 0.72, "grad_norm": 0.7380024194717407, "learning_rate": 3.71548702742186e-06, "loss": 1.9964, "step": 21672 }, { "epoch": 0.72, "grad_norm": 0.7207601070404053, "learning_rate": 3.7146602927829178e-06, "loss": 2.0068, "step": 21673 }, { "epoch": 0.72, "grad_norm": 0.7460630536079407, "learning_rate": 3.713833629152064e-06, "loss": 2.0684, "step": 21674 }, { "epoch": 0.72, "grad_norm": 0.7658509612083435, "learning_rate": 3.713007036538633e-06, "loss": 2.0315, "step": 21675 }, { "epoch": 0.72, "grad_norm": 0.7567182183265686, "learning_rate": 3.7121805149519596e-06, "loss": 2.025, "step": 21676 }, { "epoch": 0.72, "grad_norm": 0.7388661503791809, "learning_rate": 3.711354064401391e-06, "loss": 2.0786, "step": 21677 }, { "epoch": 0.72, "grad_norm": 0.7182818055152893, "learning_rate": 3.7105276848962535e-06, "loss": 2.0318, "step": 21678 }, { "epoch": 0.72, "grad_norm": 0.7276864051818848, "learning_rate": 3.7097013764458935e-06, "loss": 2.0892, "step": 21679 }, { "epoch": 0.72, "grad_norm": 0.7617847919464111, "learning_rate": 3.708875139059639e-06, "loss": 2.051, "step": 21680 }, { "epoch": 0.72, "grad_norm": 0.7668141722679138, "learning_rate": 3.708048972746824e-06, "loss": 2.0327, "step": 21681 }, { "epoch": 0.72, "grad_norm": 0.7541234493255615, "learning_rate": 3.707222877516784e-06, "loss": 1.9961, "step": 21682 }, { "epoch": 0.72, "grad_norm": 0.7529972195625305, "learning_rate": 3.706396853378855e-06, "loss": 2.0738, "step": 21683 }, { "epoch": 0.72, "grad_norm": 0.7632958889007568, "learning_rate": 3.705570900342367e-06, "loss": 1.9683, "step": 21684 }, { "epoch": 0.72, "grad_norm": 0.7876235246658325, "learning_rate": 3.7047450184166457e-06, "loss": 2.0481, "step": 21685 }, { "epoch": 0.72, "grad_norm": 0.7209914922714233, "learning_rate": 3.70391920761103e-06, "loss": 2.0077, "step": 21686 }, { "epoch": 0.72, "grad_norm": 0.7664456367492676, "learning_rate": 3.703093467934841e-06, "loss": 2.0344, "step": 21687 }, { "epoch": 0.72, "grad_norm": 0.7390437722206116, "learning_rate": 3.702267799397414e-06, "loss": 2.0167, "step": 21688 }, { "epoch": 0.72, "grad_norm": 0.7355640530586243, "learning_rate": 3.7014422020080733e-06, "loss": 1.9747, "step": 21689 }, { "epoch": 0.72, "grad_norm": 0.7546474933624268, "learning_rate": 3.7006166757761496e-06, "loss": 2.1081, "step": 21690 }, { "epoch": 0.72, "grad_norm": 0.7680472135543823, "learning_rate": 3.6997912207109644e-06, "loss": 2.1282, "step": 21691 }, { "epoch": 0.72, "grad_norm": 0.735420286655426, "learning_rate": 3.6989658368218484e-06, "loss": 2.0238, "step": 21692 }, { "epoch": 0.72, "grad_norm": 0.7936490774154663, "learning_rate": 3.6981405241181232e-06, "loss": 2.0866, "step": 21693 }, { "epoch": 0.72, "grad_norm": 0.7604075074195862, "learning_rate": 3.6973152826091106e-06, "loss": 2.0981, "step": 21694 }, { "epoch": 0.72, "grad_norm": 0.7325469255447388, "learning_rate": 3.696490112304135e-06, "loss": 2.0413, "step": 21695 }, { "epoch": 0.72, "grad_norm": 0.7955654859542847, "learning_rate": 3.695665013212525e-06, "loss": 2.1248, "step": 21696 }, { "epoch": 0.72, "grad_norm": 0.7450180053710938, "learning_rate": 3.694839985343596e-06, "loss": 2.0153, "step": 21697 }, { "epoch": 0.72, "grad_norm": 0.742081344127655, "learning_rate": 3.6940150287066656e-06, "loss": 2.0598, "step": 21698 }, { "epoch": 0.72, "grad_norm": 0.7770227789878845, "learning_rate": 3.6931901433110627e-06, "loss": 2.1071, "step": 21699 }, { "epoch": 0.72, "grad_norm": 0.7610399723052979, "learning_rate": 3.6923653291660967e-06, "loss": 2.103, "step": 21700 }, { "epoch": 0.72, "grad_norm": 0.734785795211792, "learning_rate": 3.691540586281095e-06, "loss": 2.0988, "step": 21701 }, { "epoch": 0.72, "grad_norm": 0.7467179298400879, "learning_rate": 3.6907159146653694e-06, "loss": 2.0876, "step": 21702 }, { "epoch": 0.72, "grad_norm": 0.7484380602836609, "learning_rate": 3.6898913143282355e-06, "loss": 2.061, "step": 21703 }, { "epoch": 0.72, "grad_norm": 0.7441505789756775, "learning_rate": 3.6890667852790106e-06, "loss": 2.0405, "step": 21704 }, { "epoch": 0.72, "grad_norm": 0.7350597977638245, "learning_rate": 3.688242327527014e-06, "loss": 2.0346, "step": 21705 }, { "epoch": 0.72, "grad_norm": 0.7352396845817566, "learning_rate": 3.687417941081557e-06, "loss": 2.0013, "step": 21706 }, { "epoch": 0.72, "grad_norm": 0.7347570657730103, "learning_rate": 3.68659362595195e-06, "loss": 2.0555, "step": 21707 }, { "epoch": 0.72, "grad_norm": 0.7444417476654053, "learning_rate": 3.6857693821475104e-06, "loss": 2.11, "step": 21708 }, { "epoch": 0.72, "grad_norm": 0.7247772216796875, "learning_rate": 3.684945209677544e-06, "loss": 2.0183, "step": 21709 }, { "epoch": 0.72, "grad_norm": 0.7479152083396912, "learning_rate": 3.6841211085513705e-06, "loss": 2.077, "step": 21710 }, { "epoch": 0.72, "grad_norm": 0.7393128275871277, "learning_rate": 3.683297078778291e-06, "loss": 2.0611, "step": 21711 }, { "epoch": 0.72, "grad_norm": 0.7576247453689575, "learning_rate": 3.6824731203676223e-06, "loss": 2.0206, "step": 21712 }, { "epoch": 0.72, "grad_norm": 0.7845340967178345, "learning_rate": 3.681649233328667e-06, "loss": 2.0926, "step": 21713 }, { "epoch": 0.72, "grad_norm": 0.7492756247520447, "learning_rate": 3.6808254176707393e-06, "loss": 2.0818, "step": 21714 }, { "epoch": 0.72, "grad_norm": 0.743295431137085, "learning_rate": 3.680001673403142e-06, "loss": 2.0029, "step": 21715 }, { "epoch": 0.72, "grad_norm": 0.7412112951278687, "learning_rate": 3.6791780005351784e-06, "loss": 2.0743, "step": 21716 }, { "epoch": 0.72, "grad_norm": 0.7301713228225708, "learning_rate": 3.6783543990761585e-06, "loss": 2.021, "step": 21717 }, { "epoch": 0.72, "grad_norm": 0.7687027454376221, "learning_rate": 3.677530869035388e-06, "loss": 2.0413, "step": 21718 }, { "epoch": 0.72, "grad_norm": 0.7344357371330261, "learning_rate": 3.676707410422169e-06, "loss": 2.0372, "step": 21719 }, { "epoch": 0.72, "grad_norm": 0.7597854137420654, "learning_rate": 3.6758840232458005e-06, "loss": 2.1309, "step": 21720 }, { "epoch": 0.72, "grad_norm": 0.7847921848297119, "learning_rate": 3.6750607075155907e-06, "loss": 2.0378, "step": 21721 }, { "epoch": 0.72, "grad_norm": 0.7579687833786011, "learning_rate": 3.674237463240835e-06, "loss": 2.0243, "step": 21722 }, { "epoch": 0.72, "grad_norm": 0.7426260113716125, "learning_rate": 3.673414290430838e-06, "loss": 2.0559, "step": 21723 }, { "epoch": 0.72, "grad_norm": 0.721921980381012, "learning_rate": 3.6725911890949053e-06, "loss": 2.0581, "step": 21724 }, { "epoch": 0.72, "grad_norm": 0.7688610553741455, "learning_rate": 3.6717681592423217e-06, "loss": 2.115, "step": 21725 }, { "epoch": 0.72, "grad_norm": 0.7677981853485107, "learning_rate": 3.670945200882393e-06, "loss": 2.0566, "step": 21726 }, { "epoch": 0.72, "grad_norm": 0.6997689604759216, "learning_rate": 3.670122314024419e-06, "loss": 1.9801, "step": 21727 }, { "epoch": 0.72, "grad_norm": 0.727685272693634, "learning_rate": 3.6692994986776944e-06, "loss": 1.9876, "step": 21728 }, { "epoch": 0.72, "grad_norm": 0.7522323727607727, "learning_rate": 3.66847675485151e-06, "loss": 1.9586, "step": 21729 }, { "epoch": 0.72, "grad_norm": 0.7319626808166504, "learning_rate": 3.6676540825551676e-06, "loss": 1.9768, "step": 21730 }, { "epoch": 0.72, "grad_norm": 0.728995680809021, "learning_rate": 3.6668314817979554e-06, "loss": 2.0806, "step": 21731 }, { "epoch": 0.72, "grad_norm": 0.7291548848152161, "learning_rate": 3.666008952589173e-06, "loss": 1.9802, "step": 21732 }, { "epoch": 0.72, "grad_norm": 0.7720770239830017, "learning_rate": 3.6651864949381068e-06, "loss": 1.9775, "step": 21733 }, { "epoch": 0.72, "grad_norm": 0.774888813495636, "learning_rate": 3.6643641088540526e-06, "loss": 2.1124, "step": 21734 }, { "epoch": 0.72, "grad_norm": 0.7967333197593689, "learning_rate": 3.663541794346297e-06, "loss": 2.1005, "step": 21735 }, { "epoch": 0.72, "grad_norm": 0.7408243417739868, "learning_rate": 3.6627195514241365e-06, "loss": 2.0343, "step": 21736 }, { "epoch": 0.72, "grad_norm": 0.7290113568305969, "learning_rate": 3.661897380096856e-06, "loss": 2.0116, "step": 21737 }, { "epoch": 0.72, "grad_norm": 0.819386899471283, "learning_rate": 3.6610752803737415e-06, "loss": 2.0024, "step": 21738 }, { "epoch": 0.72, "grad_norm": 0.7865351438522339, "learning_rate": 3.660253252264083e-06, "loss": 2.0126, "step": 21739 }, { "epoch": 0.72, "grad_norm": 0.7646365165710449, "learning_rate": 3.6594312957771716e-06, "loss": 2.0702, "step": 21740 }, { "epoch": 0.72, "grad_norm": 0.7418750524520874, "learning_rate": 3.6586094109222893e-06, "loss": 2.1008, "step": 21741 }, { "epoch": 0.72, "grad_norm": 0.7796114087104797, "learning_rate": 3.6577875977087186e-06, "loss": 2.0482, "step": 21742 }, { "epoch": 0.72, "grad_norm": 0.7574689984321594, "learning_rate": 3.6569658561457487e-06, "loss": 2.0993, "step": 21743 }, { "epoch": 0.72, "grad_norm": 0.7521612048149109, "learning_rate": 3.6561441862426593e-06, "loss": 2.0342, "step": 21744 }, { "epoch": 0.72, "grad_norm": 0.7477236390113831, "learning_rate": 3.655322588008734e-06, "loss": 2.0391, "step": 21745 }, { "epoch": 0.72, "grad_norm": 0.738766074180603, "learning_rate": 3.654501061453263e-06, "loss": 2.0567, "step": 21746 }, { "epoch": 0.72, "grad_norm": 0.762089192867279, "learning_rate": 3.653679606585513e-06, "loss": 2.0237, "step": 21747 }, { "epoch": 0.72, "grad_norm": 0.7420791983604431, "learning_rate": 3.6528582234147725e-06, "loss": 2.006, "step": 21748 }, { "epoch": 0.72, "grad_norm": 0.7567542791366577, "learning_rate": 3.6520369119503228e-06, "loss": 2.048, "step": 21749 }, { "epoch": 0.72, "grad_norm": 0.7451452612876892, "learning_rate": 3.6512156722014392e-06, "loss": 2.0402, "step": 21750 }, { "epoch": 0.72, "grad_norm": 0.7255205512046814, "learning_rate": 3.650394504177397e-06, "loss": 2.0051, "step": 21751 }, { "epoch": 0.72, "grad_norm": 0.7352153062820435, "learning_rate": 3.64957340788748e-06, "loss": 2.0116, "step": 21752 }, { "epoch": 0.72, "grad_norm": 0.7253125905990601, "learning_rate": 3.6487523833409577e-06, "loss": 2.0115, "step": 21753 }, { "epoch": 0.72, "grad_norm": 0.7490274310112, "learning_rate": 3.6479314305471093e-06, "loss": 2.0185, "step": 21754 }, { "epoch": 0.72, "grad_norm": 0.7254589796066284, "learning_rate": 3.6471105495152114e-06, "loss": 1.9686, "step": 21755 }, { "epoch": 0.72, "grad_norm": 0.7806925177574158, "learning_rate": 3.646289740254536e-06, "loss": 2.011, "step": 21756 }, { "epoch": 0.72, "grad_norm": 0.7477225661277771, "learning_rate": 3.645469002774352e-06, "loss": 2.0856, "step": 21757 }, { "epoch": 0.72, "grad_norm": 0.7251269817352295, "learning_rate": 3.6446483370839347e-06, "loss": 2.0422, "step": 21758 }, { "epoch": 0.72, "grad_norm": 0.7578094601631165, "learning_rate": 3.643827743192563e-06, "loss": 2.0922, "step": 21759 }, { "epoch": 0.72, "grad_norm": 0.7645106315612793, "learning_rate": 3.6430072211094938e-06, "loss": 2.0263, "step": 21760 }, { "epoch": 0.72, "grad_norm": 0.745076060295105, "learning_rate": 3.642186770844003e-06, "loss": 2.0871, "step": 21761 }, { "epoch": 0.72, "grad_norm": 0.7257620096206665, "learning_rate": 3.6413663924053633e-06, "loss": 2.0185, "step": 21762 }, { "epoch": 0.72, "grad_norm": 0.7563227415084839, "learning_rate": 3.6405460858028398e-06, "loss": 2.0097, "step": 21763 }, { "epoch": 0.72, "grad_norm": 0.739338755607605, "learning_rate": 3.6397258510456957e-06, "loss": 2.0765, "step": 21764 }, { "epoch": 0.72, "grad_norm": 0.7878612279891968, "learning_rate": 3.6389056881432048e-06, "loss": 2.1452, "step": 21765 }, { "epoch": 0.72, "grad_norm": 0.7562803626060486, "learning_rate": 3.638085597104627e-06, "loss": 2.0287, "step": 21766 }, { "epoch": 0.72, "grad_norm": 0.7337028980255127, "learning_rate": 3.63726557793923e-06, "loss": 2.0919, "step": 21767 }, { "epoch": 0.72, "grad_norm": 0.7751567363739014, "learning_rate": 3.6364456306562834e-06, "loss": 2.1517, "step": 21768 }, { "epoch": 0.72, "grad_norm": 0.766032338142395, "learning_rate": 3.6356257552650378e-06, "loss": 2.1445, "step": 21769 }, { "epoch": 0.72, "grad_norm": 0.7341062426567078, "learning_rate": 3.6348059517747624e-06, "loss": 2.0642, "step": 21770 }, { "epoch": 0.72, "grad_norm": 0.7230467796325684, "learning_rate": 3.633986220194723e-06, "loss": 2.0457, "step": 21771 }, { "epoch": 0.72, "grad_norm": 0.7464435696601868, "learning_rate": 3.633166560534177e-06, "loss": 2.0358, "step": 21772 }, { "epoch": 0.72, "grad_norm": 0.8250384330749512, "learning_rate": 3.6323469728023796e-06, "loss": 1.977, "step": 21773 }, { "epoch": 0.72, "grad_norm": 0.733219563961029, "learning_rate": 3.6315274570085947e-06, "loss": 2.0779, "step": 21774 }, { "epoch": 0.72, "grad_norm": 0.7792534828186035, "learning_rate": 3.630708013162083e-06, "loss": 2.0423, "step": 21775 }, { "epoch": 0.72, "grad_norm": 0.7413418292999268, "learning_rate": 3.629888641272097e-06, "loss": 2.0345, "step": 21776 }, { "epoch": 0.72, "grad_norm": 0.7510575652122498, "learning_rate": 3.6290693413478982e-06, "loss": 2.0098, "step": 21777 }, { "epoch": 0.72, "grad_norm": 0.738841712474823, "learning_rate": 3.62825011339874e-06, "loss": 2.0803, "step": 21778 }, { "epoch": 0.72, "grad_norm": 0.7418696880340576, "learning_rate": 3.6274309574338763e-06, "loss": 1.9769, "step": 21779 }, { "epoch": 0.72, "grad_norm": 0.7196971774101257, "learning_rate": 3.626611873462561e-06, "loss": 2.0892, "step": 21780 }, { "epoch": 0.72, "grad_norm": 0.7523905634880066, "learning_rate": 3.6257928614940573e-06, "loss": 2.0395, "step": 21781 }, { "epoch": 0.72, "grad_norm": 0.7083031535148621, "learning_rate": 3.6249739215376035e-06, "loss": 2.0097, "step": 21782 }, { "epoch": 0.72, "grad_norm": 0.7924720644950867, "learning_rate": 3.6241550536024584e-06, "loss": 2.0619, "step": 21783 }, { "epoch": 0.72, "grad_norm": 0.7433786392211914, "learning_rate": 3.6233362576978758e-06, "loss": 2.1054, "step": 21784 }, { "epoch": 0.72, "grad_norm": 0.7283850312232971, "learning_rate": 3.6225175338330997e-06, "loss": 2.046, "step": 21785 }, { "epoch": 0.72, "grad_norm": 0.7239839434623718, "learning_rate": 3.621698882017386e-06, "loss": 2.077, "step": 21786 }, { "epoch": 0.72, "grad_norm": 0.7538217306137085, "learning_rate": 3.6208803022599805e-06, "loss": 1.9973, "step": 21787 }, { "epoch": 0.72, "grad_norm": 0.7495763301849365, "learning_rate": 3.6200617945701277e-06, "loss": 2.1001, "step": 21788 }, { "epoch": 0.72, "grad_norm": 0.7470661997795105, "learning_rate": 3.6192433589570773e-06, "loss": 2.0095, "step": 21789 }, { "epoch": 0.72, "grad_norm": 0.7537137866020203, "learning_rate": 3.61842499543008e-06, "loss": 2.0895, "step": 21790 }, { "epoch": 0.72, "grad_norm": 0.7776414155960083, "learning_rate": 3.6176067039983763e-06, "loss": 2.0601, "step": 21791 }, { "epoch": 0.73, "grad_norm": 0.702070415019989, "learning_rate": 3.616788484671209e-06, "loss": 2.043, "step": 21792 }, { "epoch": 0.73, "grad_norm": 0.7186402678489685, "learning_rate": 3.615970337457828e-06, "loss": 2.0689, "step": 21793 }, { "epoch": 0.73, "grad_norm": 0.7302215695381165, "learning_rate": 3.6151522623674717e-06, "loss": 2.0362, "step": 21794 }, { "epoch": 0.73, "grad_norm": 0.7680872082710266, "learning_rate": 3.614334259409381e-06, "loss": 2.0204, "step": 21795 }, { "epoch": 0.73, "grad_norm": 0.7586742639541626, "learning_rate": 3.6135163285927987e-06, "loss": 2.0576, "step": 21796 }, { "epoch": 0.73, "grad_norm": 0.762871265411377, "learning_rate": 3.6126984699269696e-06, "loss": 2.077, "step": 21797 }, { "epoch": 0.73, "grad_norm": 0.7418943047523499, "learning_rate": 3.611880683421126e-06, "loss": 2.1079, "step": 21798 }, { "epoch": 0.73, "grad_norm": 0.7461593151092529, "learning_rate": 3.6110629690845147e-06, "loss": 2.0769, "step": 21799 }, { "epoch": 0.73, "grad_norm": 0.7487608790397644, "learning_rate": 3.6102453269263695e-06, "loss": 2.0353, "step": 21800 }, { "epoch": 0.73, "grad_norm": 0.7583315968513489, "learning_rate": 3.6094277569559245e-06, "loss": 2.0208, "step": 21801 }, { "epoch": 0.73, "grad_norm": 0.7332434058189392, "learning_rate": 3.60861025918242e-06, "loss": 2.0188, "step": 21802 }, { "epoch": 0.73, "grad_norm": 0.7511343359947205, "learning_rate": 3.607792833615097e-06, "loss": 2.0375, "step": 21803 }, { "epoch": 0.73, "grad_norm": 0.7405607104301453, "learning_rate": 3.6069754802631773e-06, "loss": 1.9913, "step": 21804 }, { "epoch": 0.73, "grad_norm": 0.778723955154419, "learning_rate": 3.606158199135903e-06, "loss": 2.1151, "step": 21805 }, { "epoch": 0.73, "grad_norm": 0.7166258096694946, "learning_rate": 3.6053409902425096e-06, "loss": 2.0422, "step": 21806 }, { "epoch": 0.73, "grad_norm": 0.7846384644508362, "learning_rate": 3.604523853592222e-06, "loss": 2.0132, "step": 21807 }, { "epoch": 0.73, "grad_norm": 0.7842736840248108, "learning_rate": 3.603706789194279e-06, "loss": 2.0872, "step": 21808 }, { "epoch": 0.73, "grad_norm": 0.7671645283699036, "learning_rate": 3.602889797057909e-06, "loss": 2.04, "step": 21809 }, { "epoch": 0.73, "grad_norm": 0.7349531650543213, "learning_rate": 3.602072877192336e-06, "loss": 2.0487, "step": 21810 }, { "epoch": 0.73, "grad_norm": 0.7678916454315186, "learning_rate": 3.6012560296067955e-06, "loss": 2.0043, "step": 21811 }, { "epoch": 0.73, "grad_norm": 0.7576454281806946, "learning_rate": 3.6004392543105182e-06, "loss": 2.0563, "step": 21812 }, { "epoch": 0.73, "grad_norm": 0.7245506644248962, "learning_rate": 3.599622551312726e-06, "loss": 2.0552, "step": 21813 }, { "epoch": 0.73, "grad_norm": 0.7298718690872192, "learning_rate": 3.5988059206226455e-06, "loss": 2.0687, "step": 21814 }, { "epoch": 0.73, "grad_norm": 0.7548322081565857, "learning_rate": 3.5979893622495065e-06, "loss": 1.9764, "step": 21815 }, { "epoch": 0.73, "grad_norm": 0.7611265778541565, "learning_rate": 3.5971728762025314e-06, "loss": 2.0329, "step": 21816 }, { "epoch": 0.73, "grad_norm": 0.757684588432312, "learning_rate": 3.596356462490943e-06, "loss": 2.0344, "step": 21817 }, { "epoch": 0.73, "grad_norm": 0.7353269457817078, "learning_rate": 3.595540121123965e-06, "loss": 2.0764, "step": 21818 }, { "epoch": 0.73, "grad_norm": 0.7585800290107727, "learning_rate": 3.594723852110825e-06, "loss": 2.0098, "step": 21819 }, { "epoch": 0.73, "grad_norm": 0.7482415437698364, "learning_rate": 3.5939076554607376e-06, "loss": 2.0728, "step": 21820 }, { "epoch": 0.73, "grad_norm": 0.7166197896003723, "learning_rate": 3.5930915311829306e-06, "loss": 2.0238, "step": 21821 }, { "epoch": 0.73, "grad_norm": 0.7556038498878479, "learning_rate": 3.592275479286621e-06, "loss": 2.0252, "step": 21822 }, { "epoch": 0.73, "grad_norm": 0.7464967370033264, "learning_rate": 3.591459499781025e-06, "loss": 1.9925, "step": 21823 }, { "epoch": 0.73, "grad_norm": 0.8110756278038025, "learning_rate": 3.5906435926753624e-06, "loss": 2.0584, "step": 21824 }, { "epoch": 0.73, "grad_norm": 0.7550192475318909, "learning_rate": 3.5898277579788598e-06, "loss": 2.1613, "step": 21825 }, { "epoch": 0.73, "grad_norm": 0.7350652813911438, "learning_rate": 3.5890119957007184e-06, "loss": 2.0342, "step": 21826 }, { "epoch": 0.73, "grad_norm": 0.7668139934539795, "learning_rate": 3.5881963058501647e-06, "loss": 2.0434, "step": 21827 }, { "epoch": 0.73, "grad_norm": 0.7406257390975952, "learning_rate": 3.5873806884364125e-06, "loss": 2.1098, "step": 21828 }, { "epoch": 0.73, "grad_norm": 0.7229348421096802, "learning_rate": 3.586565143468673e-06, "loss": 2.0341, "step": 21829 }, { "epoch": 0.73, "grad_norm": 0.7453036904335022, "learning_rate": 3.5857496709561645e-06, "loss": 1.9912, "step": 21830 }, { "epoch": 0.73, "grad_norm": 0.7564787864685059, "learning_rate": 3.5849342709080983e-06, "loss": 2.0629, "step": 21831 }, { "epoch": 0.73, "grad_norm": 0.742656409740448, "learning_rate": 3.584118943333681e-06, "loss": 2.0754, "step": 21832 }, { "epoch": 0.73, "grad_norm": 0.7487382888793945, "learning_rate": 3.583303688242127e-06, "loss": 2.1315, "step": 21833 }, { "epoch": 0.73, "grad_norm": 0.7510069608688354, "learning_rate": 3.5824885056426516e-06, "loss": 1.9479, "step": 21834 }, { "epoch": 0.73, "grad_norm": 0.7455934882164001, "learning_rate": 3.5816733955444606e-06, "loss": 2.0564, "step": 21835 }, { "epoch": 0.73, "grad_norm": 0.7172040343284607, "learning_rate": 3.580858357956758e-06, "loss": 2.005, "step": 21836 }, { "epoch": 0.73, "grad_norm": 0.746868371963501, "learning_rate": 3.580043392888759e-06, "loss": 2.038, "step": 21837 }, { "epoch": 0.73, "grad_norm": 0.7526938319206238, "learning_rate": 3.579228500349664e-06, "loss": 2.1281, "step": 21838 }, { "epoch": 0.73, "grad_norm": 0.7642523646354675, "learning_rate": 3.5784136803486858e-06, "loss": 2.0933, "step": 21839 }, { "epoch": 0.73, "grad_norm": 0.7245362401008606, "learning_rate": 3.5775989328950235e-06, "loss": 2.0462, "step": 21840 }, { "epoch": 0.73, "grad_norm": 0.7671468257904053, "learning_rate": 3.576784257997887e-06, "loss": 2.0192, "step": 21841 }, { "epoch": 0.73, "grad_norm": 0.762110710144043, "learning_rate": 3.5759696556664746e-06, "loss": 2.0306, "step": 21842 }, { "epoch": 0.73, "grad_norm": 0.7455496788024902, "learning_rate": 3.575155125909996e-06, "loss": 2.0563, "step": 21843 }, { "epoch": 0.73, "grad_norm": 0.7331894040107727, "learning_rate": 3.574340668737649e-06, "loss": 2.1, "step": 21844 }, { "epoch": 0.73, "grad_norm": 0.7222306132316589, "learning_rate": 3.5735262841586317e-06, "loss": 1.9978, "step": 21845 }, { "epoch": 0.73, "grad_norm": 0.7299706935882568, "learning_rate": 3.572711972182149e-06, "loss": 2.0393, "step": 21846 }, { "epoch": 0.73, "grad_norm": 0.7208203673362732, "learning_rate": 3.5718977328174053e-06, "loss": 2.0771, "step": 21847 }, { "epoch": 0.73, "grad_norm": 0.7589237093925476, "learning_rate": 3.571083566073589e-06, "loss": 2.0445, "step": 21848 }, { "epoch": 0.73, "grad_norm": 0.7421824336051941, "learning_rate": 3.5702694719599008e-06, "loss": 2.0255, "step": 21849 }, { "epoch": 0.73, "grad_norm": 0.7436151504516602, "learning_rate": 3.5694554504855437e-06, "loss": 2.0529, "step": 21850 }, { "epoch": 0.73, "grad_norm": 0.749893069267273, "learning_rate": 3.5686415016597075e-06, "loss": 2.1168, "step": 21851 }, { "epoch": 0.73, "grad_norm": 0.7367182970046997, "learning_rate": 3.5678276254915935e-06, "loss": 2.0283, "step": 21852 }, { "epoch": 0.73, "grad_norm": 0.7289462685585022, "learning_rate": 3.5670138219903937e-06, "loss": 2.0183, "step": 21853 }, { "epoch": 0.73, "grad_norm": 0.7591384649276733, "learning_rate": 3.5662000911652983e-06, "loss": 2.0896, "step": 21854 }, { "epoch": 0.73, "grad_norm": 0.725933313369751, "learning_rate": 3.565386433025503e-06, "loss": 2.0386, "step": 21855 }, { "epoch": 0.73, "grad_norm": 0.7601847052574158, "learning_rate": 3.5645728475802044e-06, "loss": 2.0523, "step": 21856 }, { "epoch": 0.73, "grad_norm": 0.7530854940414429, "learning_rate": 3.5637593348385903e-06, "loss": 2.0201, "step": 21857 }, { "epoch": 0.73, "grad_norm": 0.7309998273849487, "learning_rate": 3.5629458948098483e-06, "loss": 2.0455, "step": 21858 }, { "epoch": 0.73, "grad_norm": 0.7353950142860413, "learning_rate": 3.5621325275031703e-06, "loss": 1.9886, "step": 21859 }, { "epoch": 0.73, "grad_norm": 0.7321322560310364, "learning_rate": 3.56131923292775e-06, "loss": 1.9738, "step": 21860 }, { "epoch": 0.73, "grad_norm": 0.7233182787895203, "learning_rate": 3.5605060110927712e-06, "loss": 1.9906, "step": 21861 }, { "epoch": 0.73, "grad_norm": 0.7458654642105103, "learning_rate": 3.5596928620074176e-06, "loss": 2.0013, "step": 21862 }, { "epoch": 0.73, "grad_norm": 0.774597704410553, "learning_rate": 3.5588797856808842e-06, "loss": 2.0156, "step": 21863 }, { "epoch": 0.73, "grad_norm": 0.7273744344711304, "learning_rate": 3.558066782122348e-06, "loss": 2.0802, "step": 21864 }, { "epoch": 0.73, "grad_norm": 0.7335337400436401, "learning_rate": 3.5572538513410026e-06, "loss": 2.0302, "step": 21865 }, { "epoch": 0.73, "grad_norm": 0.7475301027297974, "learning_rate": 3.5564409933460264e-06, "loss": 1.9859, "step": 21866 }, { "epoch": 0.73, "grad_norm": 0.7385428547859192, "learning_rate": 3.5556282081466e-06, "loss": 2.0664, "step": 21867 }, { "epoch": 0.73, "grad_norm": 0.7578171491622925, "learning_rate": 3.5548154957519097e-06, "loss": 1.9534, "step": 21868 }, { "epoch": 0.73, "grad_norm": 0.7541801333427429, "learning_rate": 3.554002856171139e-06, "loss": 2.0334, "step": 21869 }, { "epoch": 0.73, "grad_norm": 0.7522043585777283, "learning_rate": 3.5531902894134672e-06, "loss": 2.0193, "step": 21870 }, { "epoch": 0.73, "grad_norm": 0.7453630566596985, "learning_rate": 3.5523777954880702e-06, "loss": 1.9853, "step": 21871 }, { "epoch": 0.73, "grad_norm": 0.7678565382957458, "learning_rate": 3.5515653744041334e-06, "loss": 2.0773, "step": 21872 }, { "epoch": 0.73, "grad_norm": 0.7546950578689575, "learning_rate": 3.5507530261708288e-06, "loss": 2.0708, "step": 21873 }, { "epoch": 0.73, "grad_norm": 0.7173621654510498, "learning_rate": 3.5499407507973395e-06, "loss": 2.0942, "step": 21874 }, { "epoch": 0.73, "grad_norm": 0.7275766730308533, "learning_rate": 3.549128548292836e-06, "loss": 2.0826, "step": 21875 }, { "epoch": 0.73, "grad_norm": 0.7295228838920593, "learning_rate": 3.548316418666502e-06, "loss": 1.9741, "step": 21876 }, { "epoch": 0.73, "grad_norm": 0.7192540168762207, "learning_rate": 3.547504361927504e-06, "loss": 2.068, "step": 21877 }, { "epoch": 0.73, "grad_norm": 0.7474537491798401, "learning_rate": 3.546692378085024e-06, "loss": 2.0252, "step": 21878 }, { "epoch": 0.73, "grad_norm": 0.7443355321884155, "learning_rate": 3.5458804671482305e-06, "loss": 2.0357, "step": 21879 }, { "epoch": 0.73, "grad_norm": 0.7322970628738403, "learning_rate": 3.545068629126295e-06, "loss": 1.998, "step": 21880 }, { "epoch": 0.73, "grad_norm": 0.7619025111198425, "learning_rate": 3.5442568640283903e-06, "loss": 2.026, "step": 21881 }, { "epoch": 0.73, "grad_norm": 0.7609520554542542, "learning_rate": 3.543445171863691e-06, "loss": 2.1369, "step": 21882 }, { "epoch": 0.73, "grad_norm": 0.7881824970245361, "learning_rate": 3.542633552641365e-06, "loss": 2.0666, "step": 21883 }, { "epoch": 0.73, "grad_norm": 0.7858774662017822, "learning_rate": 3.5418220063705766e-06, "loss": 2.0175, "step": 21884 }, { "epoch": 0.73, "grad_norm": 0.752856433391571, "learning_rate": 3.5410105330605028e-06, "loss": 2.0413, "step": 21885 }, { "epoch": 0.73, "grad_norm": 0.7580515146255493, "learning_rate": 3.5401991327203022e-06, "loss": 2.0462, "step": 21886 }, { "epoch": 0.73, "grad_norm": 0.7340289354324341, "learning_rate": 3.53938780535915e-06, "loss": 2.0327, "step": 21887 }, { "epoch": 0.73, "grad_norm": 0.7451505661010742, "learning_rate": 3.538576550986208e-06, "loss": 2.0283, "step": 21888 }, { "epoch": 0.73, "grad_norm": 0.7284174561500549, "learning_rate": 3.5377653696106386e-06, "loss": 2.0077, "step": 21889 }, { "epoch": 0.73, "grad_norm": 0.7714676260948181, "learning_rate": 3.5369542612416087e-06, "loss": 2.0711, "step": 21890 }, { "epoch": 0.73, "grad_norm": 0.7632626295089722, "learning_rate": 3.536143225888284e-06, "loss": 2.1135, "step": 21891 }, { "epoch": 0.73, "grad_norm": 0.744427502155304, "learning_rate": 3.5353322635598253e-06, "loss": 2.0757, "step": 21892 }, { "epoch": 0.73, "grad_norm": 0.7260180711746216, "learning_rate": 3.5345213742653915e-06, "loss": 2.001, "step": 21893 }, { "epoch": 0.73, "grad_norm": 0.7563790678977966, "learning_rate": 3.5337105580141485e-06, "loss": 1.9958, "step": 21894 }, { "epoch": 0.73, "grad_norm": 0.7438982725143433, "learning_rate": 3.5328998148152515e-06, "loss": 2.0606, "step": 21895 }, { "epoch": 0.73, "grad_norm": 0.7843042016029358, "learning_rate": 3.532089144677865e-06, "loss": 1.9785, "step": 21896 }, { "epoch": 0.73, "grad_norm": 0.7628198862075806, "learning_rate": 3.531278547611142e-06, "loss": 2.0307, "step": 21897 }, { "epoch": 0.73, "grad_norm": 0.7578327655792236, "learning_rate": 3.530468023624246e-06, "loss": 2.0409, "step": 21898 }, { "epoch": 0.73, "grad_norm": 0.7420735955238342, "learning_rate": 3.529657572726327e-06, "loss": 2.0664, "step": 21899 }, { "epoch": 0.73, "grad_norm": 0.732926070690155, "learning_rate": 3.528847194926549e-06, "loss": 2.0459, "step": 21900 }, { "epoch": 0.73, "grad_norm": 0.7543691992759705, "learning_rate": 3.5280368902340624e-06, "loss": 2.0719, "step": 21901 }, { "epoch": 0.73, "grad_norm": 0.7463003396987915, "learning_rate": 3.527226658658018e-06, "loss": 2.0809, "step": 21902 }, { "epoch": 0.73, "grad_norm": 0.77821284532547, "learning_rate": 3.5264165002075747e-06, "loss": 2.149, "step": 21903 }, { "epoch": 0.73, "grad_norm": 0.7767094373703003, "learning_rate": 3.5256064148918867e-06, "loss": 2.0808, "step": 21904 }, { "epoch": 0.73, "grad_norm": 0.7331055998802185, "learning_rate": 3.524796402720102e-06, "loss": 2.0232, "step": 21905 }, { "epoch": 0.73, "grad_norm": 0.7976982593536377, "learning_rate": 3.523986463701371e-06, "loss": 2.0523, "step": 21906 }, { "epoch": 0.73, "grad_norm": 0.7459834814071655, "learning_rate": 3.5231765978448486e-06, "loss": 2.1459, "step": 21907 }, { "epoch": 0.73, "grad_norm": 0.7619378566741943, "learning_rate": 3.5223668051596773e-06, "loss": 2.087, "step": 21908 }, { "epoch": 0.73, "grad_norm": 0.7582420110702515, "learning_rate": 3.521557085655013e-06, "loss": 2.0633, "step": 21909 }, { "epoch": 0.73, "grad_norm": 0.767217755317688, "learning_rate": 3.5207474393399997e-06, "loss": 2.036, "step": 21910 }, { "epoch": 0.73, "grad_norm": 0.7344055771827698, "learning_rate": 3.5199378662237826e-06, "loss": 2.0794, "step": 21911 }, { "epoch": 0.73, "grad_norm": 0.7665054798126221, "learning_rate": 3.5191283663155084e-06, "loss": 2.0009, "step": 21912 }, { "epoch": 0.73, "grad_norm": 0.7368614077568054, "learning_rate": 3.5183189396243277e-06, "loss": 1.9912, "step": 21913 }, { "epoch": 0.73, "grad_norm": 0.775973379611969, "learning_rate": 3.517509586159381e-06, "loss": 2.0248, "step": 21914 }, { "epoch": 0.73, "grad_norm": 0.7357608079910278, "learning_rate": 3.5167003059298087e-06, "loss": 2.0447, "step": 21915 }, { "epoch": 0.73, "grad_norm": 0.7351614832878113, "learning_rate": 3.515891098944759e-06, "loss": 2.0594, "step": 21916 }, { "epoch": 0.73, "grad_norm": 0.7268781065940857, "learning_rate": 3.5150819652133694e-06, "loss": 1.9979, "step": 21917 }, { "epoch": 0.73, "grad_norm": 0.7279900908470154, "learning_rate": 3.5142729047447867e-06, "loss": 2.0264, "step": 21918 }, { "epoch": 0.73, "grad_norm": 0.7398910522460938, "learning_rate": 3.513463917548143e-06, "loss": 2.0127, "step": 21919 }, { "epoch": 0.73, "grad_norm": 0.7790997624397278, "learning_rate": 3.512655003632588e-06, "loss": 2.092, "step": 21920 }, { "epoch": 0.73, "grad_norm": 0.7216687202453613, "learning_rate": 3.5118461630072496e-06, "loss": 2.0028, "step": 21921 }, { "epoch": 0.73, "grad_norm": 0.7760214805603027, "learning_rate": 3.5110373956812747e-06, "loss": 2.0046, "step": 21922 }, { "epoch": 0.73, "grad_norm": 0.7429458498954773, "learning_rate": 3.510228701663797e-06, "loss": 2.0889, "step": 21923 }, { "epoch": 0.73, "grad_norm": 0.7619438171386719, "learning_rate": 3.509420080963948e-06, "loss": 2.0615, "step": 21924 }, { "epoch": 0.73, "grad_norm": 0.7492358088493347, "learning_rate": 3.5086115335908677e-06, "loss": 2.0331, "step": 21925 }, { "epoch": 0.73, "grad_norm": 0.7462440729141235, "learning_rate": 3.5078030595536926e-06, "loss": 2.07, "step": 21926 }, { "epoch": 0.73, "grad_norm": 0.7404877543449402, "learning_rate": 3.506994658861553e-06, "loss": 1.9926, "step": 21927 }, { "epoch": 0.73, "grad_norm": 0.7329072952270508, "learning_rate": 3.506186331523581e-06, "loss": 2.0217, "step": 21928 }, { "epoch": 0.73, "grad_norm": 0.7485182881355286, "learning_rate": 3.505378077548912e-06, "loss": 2.0591, "step": 21929 }, { "epoch": 0.73, "grad_norm": 0.7510843873023987, "learning_rate": 3.5045698969466736e-06, "loss": 2.0667, "step": 21930 }, { "epoch": 0.73, "grad_norm": 0.7725932598114014, "learning_rate": 3.5037617897259958e-06, "loss": 2.1167, "step": 21931 }, { "epoch": 0.73, "grad_norm": 0.7507808804512024, "learning_rate": 3.502953755896018e-06, "loss": 2.0499, "step": 21932 }, { "epoch": 0.73, "grad_norm": 0.7361055612564087, "learning_rate": 3.5021457954658542e-06, "loss": 2.067, "step": 21933 }, { "epoch": 0.73, "grad_norm": 0.7400034666061401, "learning_rate": 3.5013379084446386e-06, "loss": 2.0014, "step": 21934 }, { "epoch": 0.73, "grad_norm": 0.7240464091300964, "learning_rate": 3.500530094841502e-06, "loss": 2.0101, "step": 21935 }, { "epoch": 0.73, "grad_norm": 0.7522979378700256, "learning_rate": 3.4997223546655677e-06, "loss": 2.0193, "step": 21936 }, { "epoch": 0.73, "grad_norm": 0.7448248863220215, "learning_rate": 3.4989146879259583e-06, "loss": 2.0252, "step": 21937 }, { "epoch": 0.73, "grad_norm": 0.7635264992713928, "learning_rate": 3.498107094631803e-06, "loss": 2.063, "step": 21938 }, { "epoch": 0.73, "grad_norm": 0.7579444646835327, "learning_rate": 3.49729957479222e-06, "loss": 1.991, "step": 21939 }, { "epoch": 0.73, "grad_norm": 0.7302706241607666, "learning_rate": 3.496492128416339e-06, "loss": 2.0346, "step": 21940 }, { "epoch": 0.73, "grad_norm": 0.7431196570396423, "learning_rate": 3.4956847555132746e-06, "loss": 2.0298, "step": 21941 }, { "epoch": 0.73, "grad_norm": 0.73491370677948, "learning_rate": 3.494877456092156e-06, "loss": 2.0286, "step": 21942 }, { "epoch": 0.73, "grad_norm": 0.7373284697532654, "learning_rate": 3.4940702301620954e-06, "loss": 2.0038, "step": 21943 }, { "epoch": 0.73, "grad_norm": 0.7755711078643799, "learning_rate": 3.493263077732221e-06, "loss": 2.0241, "step": 21944 }, { "epoch": 0.73, "grad_norm": 0.73579341173172, "learning_rate": 3.492455998811646e-06, "loss": 1.9627, "step": 21945 }, { "epoch": 0.73, "grad_norm": 0.7552460432052612, "learning_rate": 3.4916489934094865e-06, "loss": 2.0172, "step": 21946 }, { "epoch": 0.73, "grad_norm": 0.7431113719940186, "learning_rate": 3.4908420615348616e-06, "loss": 2.0608, "step": 21947 }, { "epoch": 0.73, "grad_norm": 0.7221951484680176, "learning_rate": 3.490035203196893e-06, "loss": 2.1493, "step": 21948 }, { "epoch": 0.73, "grad_norm": 0.7242643237113953, "learning_rate": 3.489228418404691e-06, "loss": 2.0515, "step": 21949 }, { "epoch": 0.73, "grad_norm": 0.7540082931518555, "learning_rate": 3.4884217071673665e-06, "loss": 2.1009, "step": 21950 }, { "epoch": 0.73, "grad_norm": 0.7547493577003479, "learning_rate": 3.4876150694940415e-06, "loss": 2.0664, "step": 21951 }, { "epoch": 0.73, "grad_norm": 0.7211744785308838, "learning_rate": 3.4868085053938217e-06, "loss": 2.0395, "step": 21952 }, { "epoch": 0.73, "grad_norm": 0.7982618808746338, "learning_rate": 3.486002014875821e-06, "loss": 2.0549, "step": 21953 }, { "epoch": 0.73, "grad_norm": 0.71919184923172, "learning_rate": 3.4851955979491603e-06, "loss": 2.0511, "step": 21954 }, { "epoch": 0.73, "grad_norm": 0.7254493832588196, "learning_rate": 3.484389254622934e-06, "loss": 2.048, "step": 21955 }, { "epoch": 0.73, "grad_norm": 0.7291633486747742, "learning_rate": 3.4835829849062597e-06, "loss": 2.0512, "step": 21956 }, { "epoch": 0.73, "grad_norm": 0.7729559540748596, "learning_rate": 3.4827767888082486e-06, "loss": 2.0667, "step": 21957 }, { "epoch": 0.73, "grad_norm": 0.7443308234214783, "learning_rate": 3.481970666338007e-06, "loss": 2.0844, "step": 21958 }, { "epoch": 0.73, "grad_norm": 0.7337605953216553, "learning_rate": 3.4811646175046365e-06, "loss": 2.0029, "step": 21959 }, { "epoch": 0.73, "grad_norm": 0.7373915314674377, "learning_rate": 3.4803586423172484e-06, "loss": 1.9915, "step": 21960 }, { "epoch": 0.73, "grad_norm": 0.7648467421531677, "learning_rate": 3.4795527407849507e-06, "loss": 2.0408, "step": 21961 }, { "epoch": 0.73, "grad_norm": 0.7355338931083679, "learning_rate": 3.4787469129168405e-06, "loss": 2.0661, "step": 21962 }, { "epoch": 0.73, "grad_norm": 0.7178565859794617, "learning_rate": 3.47794115872203e-06, "loss": 1.9664, "step": 21963 }, { "epoch": 0.73, "grad_norm": 0.7369266152381897, "learning_rate": 3.4771354782096177e-06, "loss": 2.0466, "step": 21964 }, { "epoch": 0.73, "grad_norm": 0.774416446685791, "learning_rate": 3.476329871388704e-06, "loss": 2.1482, "step": 21965 }, { "epoch": 0.73, "grad_norm": 0.7408351898193359, "learning_rate": 3.47552433826839e-06, "loss": 2.0624, "step": 21966 }, { "epoch": 0.73, "grad_norm": 0.7591580748558044, "learning_rate": 3.474718878857787e-06, "loss": 2.0317, "step": 21967 }, { "epoch": 0.73, "grad_norm": 0.766352653503418, "learning_rate": 3.473913493165978e-06, "loss": 2.0737, "step": 21968 }, { "epoch": 0.73, "grad_norm": 0.7312650084495544, "learning_rate": 3.4731081812020696e-06, "loss": 2.0552, "step": 21969 }, { "epoch": 0.73, "grad_norm": 0.7558214664459229, "learning_rate": 3.472302942975164e-06, "loss": 2.0662, "step": 21970 }, { "epoch": 0.73, "grad_norm": 0.7799909710884094, "learning_rate": 3.4714977784943537e-06, "loss": 2.0832, "step": 21971 }, { "epoch": 0.73, "grad_norm": 0.7597474455833435, "learning_rate": 3.470692687768732e-06, "loss": 2.0439, "step": 21972 }, { "epoch": 0.73, "grad_norm": 0.7348108291625977, "learning_rate": 3.4698876708074015e-06, "loss": 2.0214, "step": 21973 }, { "epoch": 0.73, "grad_norm": 0.7593065500259399, "learning_rate": 3.4690827276194493e-06, "loss": 2.1209, "step": 21974 }, { "epoch": 0.73, "grad_norm": 0.7610917091369629, "learning_rate": 3.468277858213973e-06, "loss": 2.0743, "step": 21975 }, { "epoch": 0.73, "grad_norm": 0.7422091960906982, "learning_rate": 3.4674730626000684e-06, "loss": 2.08, "step": 21976 }, { "epoch": 0.73, "grad_norm": 0.7613323330879211, "learning_rate": 3.466668340786825e-06, "loss": 2.036, "step": 21977 }, { "epoch": 0.73, "grad_norm": 0.747014045715332, "learning_rate": 3.465863692783331e-06, "loss": 2.0034, "step": 21978 }, { "epoch": 0.73, "grad_norm": 0.7534889578819275, "learning_rate": 3.4650591185986827e-06, "loss": 2.0461, "step": 21979 }, { "epoch": 0.73, "grad_norm": 0.7346971035003662, "learning_rate": 3.4642546182419668e-06, "loss": 2.0002, "step": 21980 }, { "epoch": 0.73, "grad_norm": 0.7238320112228394, "learning_rate": 3.4634501917222686e-06, "loss": 2.1091, "step": 21981 }, { "epoch": 0.73, "grad_norm": 0.7262044548988342, "learning_rate": 3.462645839048678e-06, "loss": 2.1038, "step": 21982 }, { "epoch": 0.73, "grad_norm": 0.7538089752197266, "learning_rate": 3.461841560230288e-06, "loss": 2.0835, "step": 21983 }, { "epoch": 0.73, "grad_norm": 0.733538806438446, "learning_rate": 3.4610373552761777e-06, "loss": 2.0417, "step": 21984 }, { "epoch": 0.73, "grad_norm": 0.7391936182975769, "learning_rate": 3.4602332241954373e-06, "loss": 2.0643, "step": 21985 }, { "epoch": 0.73, "grad_norm": 0.726144015789032, "learning_rate": 3.459429166997149e-06, "loss": 2.0357, "step": 21986 }, { "epoch": 0.73, "grad_norm": 0.7673753499984741, "learning_rate": 3.458625183690394e-06, "loss": 2.0809, "step": 21987 }, { "epoch": 0.73, "grad_norm": 0.7668999433517456, "learning_rate": 3.457821274284259e-06, "loss": 2.0537, "step": 21988 }, { "epoch": 0.73, "grad_norm": 0.7318655252456665, "learning_rate": 3.457017438787831e-06, "loss": 2.0259, "step": 21989 }, { "epoch": 0.73, "grad_norm": 0.7278853058815002, "learning_rate": 3.4562136772101785e-06, "loss": 2.0071, "step": 21990 }, { "epoch": 0.73, "grad_norm": 0.7428708076477051, "learning_rate": 3.4554099895603886e-06, "loss": 2.115, "step": 21991 }, { "epoch": 0.73, "grad_norm": 0.7199832201004028, "learning_rate": 3.4546063758475444e-06, "loss": 2.0089, "step": 21992 }, { "epoch": 0.73, "grad_norm": 0.7396317720413208, "learning_rate": 3.453802836080722e-06, "loss": 2.0289, "step": 21993 }, { "epoch": 0.73, "grad_norm": 0.7196389436721802, "learning_rate": 3.4529993702689955e-06, "loss": 1.9811, "step": 21994 }, { "epoch": 0.73, "grad_norm": 0.7384416460990906, "learning_rate": 3.4521959784214486e-06, "loss": 2.0278, "step": 21995 }, { "epoch": 0.73, "grad_norm": 0.7443186640739441, "learning_rate": 3.4513926605471504e-06, "loss": 2.049, "step": 21996 }, { "epoch": 0.73, "grad_norm": 0.756470263004303, "learning_rate": 3.45058941665518e-06, "loss": 2.0893, "step": 21997 }, { "epoch": 0.73, "grad_norm": 0.7462362051010132, "learning_rate": 3.449786246754615e-06, "loss": 2.0415, "step": 21998 }, { "epoch": 0.73, "grad_norm": 0.7609695792198181, "learning_rate": 3.4489831508545267e-06, "loss": 2.0423, "step": 21999 }, { "epoch": 0.73, "grad_norm": 0.7346771359443665, "learning_rate": 3.448180128963984e-06, "loss": 2.0077, "step": 22000 }, { "epoch": 0.73, "grad_norm": 0.7259600758552551, "learning_rate": 3.4473771810920665e-06, "loss": 2.0343, "step": 22001 }, { "epoch": 0.73, "grad_norm": 0.7570362091064453, "learning_rate": 3.446574307247841e-06, "loss": 2.1285, "step": 22002 }, { "epoch": 0.73, "grad_norm": 0.7363032698631287, "learning_rate": 3.4457715074403743e-06, "loss": 2.0667, "step": 22003 }, { "epoch": 0.73, "grad_norm": 0.7343173623085022, "learning_rate": 3.4449687816787404e-06, "loss": 2.0379, "step": 22004 }, { "epoch": 0.73, "grad_norm": 0.7564393281936646, "learning_rate": 3.444166129972011e-06, "loss": 2.0995, "step": 22005 }, { "epoch": 0.73, "grad_norm": 0.7725486755371094, "learning_rate": 3.4433635523292475e-06, "loss": 2.1478, "step": 22006 }, { "epoch": 0.73, "grad_norm": 0.7252218723297119, "learning_rate": 3.442561048759523e-06, "loss": 2.0024, "step": 22007 }, { "epoch": 0.73, "grad_norm": 0.7216829657554626, "learning_rate": 3.4417586192719e-06, "loss": 2.087, "step": 22008 }, { "epoch": 0.73, "grad_norm": 0.7366796135902405, "learning_rate": 3.4409562638754425e-06, "loss": 2.1119, "step": 22009 }, { "epoch": 0.73, "grad_norm": 0.7319130897521973, "learning_rate": 3.4401539825792162e-06, "loss": 2.0032, "step": 22010 }, { "epoch": 0.73, "grad_norm": 0.7575193643569946, "learning_rate": 3.4393517753922933e-06, "loss": 2.0323, "step": 22011 }, { "epoch": 0.73, "grad_norm": 0.7761048674583435, "learning_rate": 3.438549642323722e-06, "loss": 1.9898, "step": 22012 }, { "epoch": 0.73, "grad_norm": 0.7262493968009949, "learning_rate": 3.4377475833825714e-06, "loss": 2.1171, "step": 22013 }, { "epoch": 0.73, "grad_norm": 0.7469989061355591, "learning_rate": 3.4369455985779065e-06, "loss": 1.9799, "step": 22014 }, { "epoch": 0.73, "grad_norm": 0.760104775428772, "learning_rate": 3.4361436879187802e-06, "loss": 2.097, "step": 22015 }, { "epoch": 0.73, "grad_norm": 0.7543305158615112, "learning_rate": 3.435341851414259e-06, "loss": 2.0287, "step": 22016 }, { "epoch": 0.73, "grad_norm": 0.758040726184845, "learning_rate": 3.4345400890733983e-06, "loss": 2.0435, "step": 22017 }, { "epoch": 0.73, "grad_norm": 0.7447519898414612, "learning_rate": 3.433738400905253e-06, "loss": 2.0694, "step": 22018 }, { "epoch": 0.73, "grad_norm": 0.7724086046218872, "learning_rate": 3.432936786918882e-06, "loss": 2.0457, "step": 22019 }, { "epoch": 0.73, "grad_norm": 0.7522633075714111, "learning_rate": 3.4321352471233473e-06, "loss": 2.086, "step": 22020 }, { "epoch": 0.73, "grad_norm": 0.7203301191329956, "learning_rate": 3.431333781527699e-06, "loss": 2.0167, "step": 22021 }, { "epoch": 0.73, "grad_norm": 0.7195686101913452, "learning_rate": 3.430532390140988e-06, "loss": 2.0129, "step": 22022 }, { "epoch": 0.73, "grad_norm": 0.7443708181381226, "learning_rate": 3.4297310729722757e-06, "loss": 2.0167, "step": 22023 }, { "epoch": 0.73, "grad_norm": 0.8004683256149292, "learning_rate": 3.4289298300306117e-06, "loss": 2.0389, "step": 22024 }, { "epoch": 0.73, "grad_norm": 0.7190501093864441, "learning_rate": 3.428128661325043e-06, "loss": 1.9871, "step": 22025 }, { "epoch": 0.73, "grad_norm": 0.7454371452331543, "learning_rate": 3.4273275668646254e-06, "loss": 2.0603, "step": 22026 }, { "epoch": 0.73, "grad_norm": 0.7811813354492188, "learning_rate": 3.426526546658413e-06, "loss": 2.0188, "step": 22027 }, { "epoch": 0.73, "grad_norm": 0.7571337819099426, "learning_rate": 3.4257256007154483e-06, "loss": 2.0649, "step": 22028 }, { "epoch": 0.73, "grad_norm": 0.7402219772338867, "learning_rate": 3.424924729044785e-06, "loss": 2.0586, "step": 22029 }, { "epoch": 0.73, "grad_norm": 0.7332258820533752, "learning_rate": 3.4241239316554697e-06, "loss": 1.9875, "step": 22030 }, { "epoch": 0.73, "grad_norm": 0.7862280607223511, "learning_rate": 3.423323208556545e-06, "loss": 2.0967, "step": 22031 }, { "epoch": 0.73, "grad_norm": 0.7423326969146729, "learning_rate": 3.422522559757059e-06, "loss": 2.0195, "step": 22032 }, { "epoch": 0.73, "grad_norm": 0.7573521733283997, "learning_rate": 3.4217219852660664e-06, "loss": 2.0624, "step": 22033 }, { "epoch": 0.73, "grad_norm": 0.7342960238456726, "learning_rate": 3.4209214850925964e-06, "loss": 2.0446, "step": 22034 }, { "epoch": 0.73, "grad_norm": 0.7732722759246826, "learning_rate": 3.4201210592457e-06, "loss": 2.0778, "step": 22035 }, { "epoch": 0.73, "grad_norm": 0.7818910479545593, "learning_rate": 3.4193207077344227e-06, "loss": 2.114, "step": 22036 }, { "epoch": 0.73, "grad_norm": 0.7405540347099304, "learning_rate": 3.4185204305678e-06, "loss": 2.0542, "step": 22037 }, { "epoch": 0.73, "grad_norm": 0.7460864186286926, "learning_rate": 3.4177202277548805e-06, "loss": 2.0424, "step": 22038 }, { "epoch": 0.73, "grad_norm": 0.7105057835578918, "learning_rate": 3.416920099304699e-06, "loss": 2.0497, "step": 22039 }, { "epoch": 0.73, "grad_norm": 0.7740036249160767, "learning_rate": 3.4161200452262933e-06, "loss": 2.0917, "step": 22040 }, { "epoch": 0.73, "grad_norm": 0.730941891670227, "learning_rate": 3.4153200655287057e-06, "loss": 2.0325, "step": 22041 }, { "epoch": 0.73, "grad_norm": 0.7270932197570801, "learning_rate": 3.4145201602209756e-06, "loss": 2.1337, "step": 22042 }, { "epoch": 0.73, "grad_norm": 0.7469239234924316, "learning_rate": 3.4137203293121367e-06, "loss": 2.0478, "step": 22043 }, { "epoch": 0.73, "grad_norm": 0.7913739681243896, "learning_rate": 3.4129205728112234e-06, "loss": 2.1019, "step": 22044 }, { "epoch": 0.73, "grad_norm": 0.7695972919464111, "learning_rate": 3.4121208907272753e-06, "loss": 1.9497, "step": 22045 }, { "epoch": 0.73, "grad_norm": 0.7432686686515808, "learning_rate": 3.411321283069322e-06, "loss": 2.0009, "step": 22046 }, { "epoch": 0.73, "grad_norm": 0.7346075773239136, "learning_rate": 3.4105217498464026e-06, "loss": 2.0189, "step": 22047 }, { "epoch": 0.73, "grad_norm": 0.7304443717002869, "learning_rate": 3.409722291067543e-06, "loss": 2.0142, "step": 22048 }, { "epoch": 0.73, "grad_norm": 0.7526780366897583, "learning_rate": 3.4089229067417827e-06, "loss": 2.0536, "step": 22049 }, { "epoch": 0.73, "grad_norm": 0.7358272671699524, "learning_rate": 3.4081235968781445e-06, "loss": 2.0846, "step": 22050 }, { "epoch": 0.73, "grad_norm": 0.7588794827461243, "learning_rate": 3.4073243614856664e-06, "loss": 2.06, "step": 22051 }, { "epoch": 0.73, "grad_norm": 0.7572571635246277, "learning_rate": 3.406525200573374e-06, "loss": 2.1183, "step": 22052 }, { "epoch": 0.73, "grad_norm": 0.7737666368484497, "learning_rate": 3.405726114150292e-06, "loss": 2.1042, "step": 22053 }, { "epoch": 0.73, "grad_norm": 0.7248309254646301, "learning_rate": 3.4049271022254527e-06, "loss": 2.0377, "step": 22054 }, { "epoch": 0.73, "grad_norm": 0.7304350137710571, "learning_rate": 3.404128164807887e-06, "loss": 2.0643, "step": 22055 }, { "epoch": 0.73, "grad_norm": 0.743652880191803, "learning_rate": 3.4033293019066107e-06, "loss": 2.1009, "step": 22056 }, { "epoch": 0.73, "grad_norm": 0.7278335094451904, "learning_rate": 3.402530513530653e-06, "loss": 2.1079, "step": 22057 }, { "epoch": 0.73, "grad_norm": 0.7626019716262817, "learning_rate": 3.401731799689043e-06, "loss": 1.9947, "step": 22058 }, { "epoch": 0.73, "grad_norm": 0.7729936242103577, "learning_rate": 3.400933160390796e-06, "loss": 2.0053, "step": 22059 }, { "epoch": 0.73, "grad_norm": 0.7294853925704956, "learning_rate": 3.400134595644943e-06, "loss": 1.9573, "step": 22060 }, { "epoch": 0.73, "grad_norm": 0.7472320199012756, "learning_rate": 3.399336105460501e-06, "loss": 2.0906, "step": 22061 }, { "epoch": 0.73, "grad_norm": 0.7488674521446228, "learning_rate": 3.3985376898464874e-06, "loss": 2.0511, "step": 22062 }, { "epoch": 0.73, "grad_norm": 0.7861932516098022, "learning_rate": 3.397739348811927e-06, "loss": 2.0847, "step": 22063 }, { "epoch": 0.73, "grad_norm": 0.7188287973403931, "learning_rate": 3.396941082365841e-06, "loss": 2.0418, "step": 22064 }, { "epoch": 0.73, "grad_norm": 0.7608891725540161, "learning_rate": 3.3961428905172457e-06, "loss": 2.0509, "step": 22065 }, { "epoch": 0.73, "grad_norm": 0.7647976875305176, "learning_rate": 3.395344773275153e-06, "loss": 2.1184, "step": 22066 }, { "epoch": 0.73, "grad_norm": 0.7377094626426697, "learning_rate": 3.3945467306485856e-06, "loss": 2.13, "step": 22067 }, { "epoch": 0.73, "grad_norm": 0.7402511835098267, "learning_rate": 3.3937487626465604e-06, "loss": 2.0744, "step": 22068 }, { "epoch": 0.73, "grad_norm": 0.7236462831497192, "learning_rate": 3.392950869278091e-06, "loss": 1.9357, "step": 22069 }, { "epoch": 0.73, "grad_norm": 0.7358348369598389, "learning_rate": 3.392153050552186e-06, "loss": 2.0796, "step": 22070 }, { "epoch": 0.73, "grad_norm": 0.7291209697723389, "learning_rate": 3.391355306477868e-06, "loss": 2.0627, "step": 22071 }, { "epoch": 0.73, "grad_norm": 0.7506392002105713, "learning_rate": 3.39055763706414e-06, "loss": 2.0242, "step": 22072 }, { "epoch": 0.73, "grad_norm": 0.7334917783737183, "learning_rate": 3.389760042320023e-06, "loss": 2.0527, "step": 22073 }, { "epoch": 0.73, "grad_norm": 0.7368965148925781, "learning_rate": 3.388962522254522e-06, "loss": 2.0821, "step": 22074 }, { "epoch": 0.73, "grad_norm": 0.760286808013916, "learning_rate": 3.388165076876645e-06, "loss": 2.0518, "step": 22075 }, { "epoch": 0.73, "grad_norm": 0.7505927681922913, "learning_rate": 3.3873677061954045e-06, "loss": 2.0896, "step": 22076 }, { "epoch": 0.73, "grad_norm": 0.7552119493484497, "learning_rate": 3.3865704102198117e-06, "loss": 2.1024, "step": 22077 }, { "epoch": 0.73, "grad_norm": 0.7553349733352661, "learning_rate": 3.3857731889588697e-06, "loss": 2.0573, "step": 22078 }, { "epoch": 0.73, "grad_norm": 0.7699952125549316, "learning_rate": 3.3849760424215826e-06, "loss": 2.1266, "step": 22079 }, { "epoch": 0.73, "grad_norm": 0.7664619088172913, "learning_rate": 3.384178970616964e-06, "loss": 2.0955, "step": 22080 }, { "epoch": 0.73, "grad_norm": 0.7309025526046753, "learning_rate": 3.3833819735540098e-06, "loss": 2.0317, "step": 22081 }, { "epoch": 0.73, "grad_norm": 0.730108380317688, "learning_rate": 3.3825850512417315e-06, "loss": 2.0233, "step": 22082 }, { "epoch": 0.73, "grad_norm": 0.7597236633300781, "learning_rate": 3.3817882036891257e-06, "loss": 2.014, "step": 22083 }, { "epoch": 0.73, "grad_norm": 0.724894642829895, "learning_rate": 3.3809914309052016e-06, "loss": 2.0393, "step": 22084 }, { "epoch": 0.73, "grad_norm": 0.7193269729614258, "learning_rate": 3.3801947328989537e-06, "loss": 2.0555, "step": 22085 }, { "epoch": 0.73, "grad_norm": 0.7674509286880493, "learning_rate": 3.3793981096793903e-06, "loss": 2.1059, "step": 22086 }, { "epoch": 0.73, "grad_norm": 0.7193686962127686, "learning_rate": 3.378601561255507e-06, "loss": 2.0112, "step": 22087 }, { "epoch": 0.73, "grad_norm": 0.7191973328590393, "learning_rate": 3.3778050876362988e-06, "loss": 2.0506, "step": 22088 }, { "epoch": 0.73, "grad_norm": 0.7486215233802795, "learning_rate": 3.3770086888307676e-06, "loss": 2.0939, "step": 22089 }, { "epoch": 0.73, "grad_norm": 0.7348816394805908, "learning_rate": 3.3762123648479138e-06, "loss": 2.0476, "step": 22090 }, { "epoch": 0.73, "grad_norm": 0.7693881988525391, "learning_rate": 3.375416115696731e-06, "loss": 2.047, "step": 22091 }, { "epoch": 0.74, "grad_norm": 0.7356235980987549, "learning_rate": 3.3746199413862124e-06, "loss": 2.0496, "step": 22092 }, { "epoch": 0.74, "grad_norm": 0.7413351535797119, "learning_rate": 3.3738238419253566e-06, "loss": 2.0542, "step": 22093 }, { "epoch": 0.74, "grad_norm": 0.7346134781837463, "learning_rate": 3.3730278173231534e-06, "loss": 2.079, "step": 22094 }, { "epoch": 0.74, "grad_norm": 0.7542963624000549, "learning_rate": 3.3722318675886012e-06, "loss": 1.993, "step": 22095 }, { "epoch": 0.74, "grad_norm": 0.759371280670166, "learning_rate": 3.3714359927306893e-06, "loss": 2.0388, "step": 22096 }, { "epoch": 0.74, "grad_norm": 0.7669425010681152, "learning_rate": 3.3706401927584054e-06, "loss": 2.0618, "step": 22097 }, { "epoch": 0.74, "grad_norm": 0.7534034252166748, "learning_rate": 3.369844467680743e-06, "loss": 2.0072, "step": 22098 }, { "epoch": 0.74, "grad_norm": 0.761694073677063, "learning_rate": 3.369048817506696e-06, "loss": 2.0218, "step": 22099 }, { "epoch": 0.74, "grad_norm": 0.7431424260139465, "learning_rate": 3.3682532422452487e-06, "loss": 2.0345, "step": 22100 }, { "epoch": 0.74, "grad_norm": 0.7485072016716003, "learning_rate": 3.3674577419053866e-06, "loss": 2.0312, "step": 22101 }, { "epoch": 0.74, "grad_norm": 0.7714682817459106, "learning_rate": 3.3666623164961034e-06, "loss": 2.0837, "step": 22102 }, { "epoch": 0.74, "grad_norm": 0.7677016854286194, "learning_rate": 3.3658669660263788e-06, "loss": 2.0361, "step": 22103 }, { "epoch": 0.74, "grad_norm": 0.7685104608535767, "learning_rate": 3.3650716905052037e-06, "loss": 2.0704, "step": 22104 }, { "epoch": 0.74, "grad_norm": 0.7618870139122009, "learning_rate": 3.3642764899415583e-06, "loss": 2.0156, "step": 22105 }, { "epoch": 0.74, "grad_norm": 0.7502275109291077, "learning_rate": 3.3634813643444297e-06, "loss": 2.0695, "step": 22106 }, { "epoch": 0.74, "grad_norm": 0.7285387516021729, "learning_rate": 3.362686313722797e-06, "loss": 2.0676, "step": 22107 }, { "epoch": 0.74, "grad_norm": 0.7358669638633728, "learning_rate": 3.361891338085648e-06, "loss": 2.0639, "step": 22108 }, { "epoch": 0.74, "grad_norm": 0.771622359752655, "learning_rate": 3.3610964374419598e-06, "loss": 2.0419, "step": 22109 }, { "epoch": 0.74, "grad_norm": 0.7350928783416748, "learning_rate": 3.3603016118007103e-06, "loss": 2.0216, "step": 22110 }, { "epoch": 0.74, "grad_norm": 0.7385023236274719, "learning_rate": 3.3595068611708813e-06, "loss": 1.9719, "step": 22111 }, { "epoch": 0.74, "grad_norm": 0.7869828939437866, "learning_rate": 3.358712185561457e-06, "loss": 1.9662, "step": 22112 }, { "epoch": 0.74, "grad_norm": 0.7233776450157166, "learning_rate": 3.3579175849814083e-06, "loss": 1.9626, "step": 22113 }, { "epoch": 0.74, "grad_norm": 0.7473151683807373, "learning_rate": 3.357123059439712e-06, "loss": 2.0663, "step": 22114 }, { "epoch": 0.74, "grad_norm": 0.8240140676498413, "learning_rate": 3.3563286089453497e-06, "loss": 2.1127, "step": 22115 }, { "epoch": 0.74, "grad_norm": 0.7196578979492188, "learning_rate": 3.355534233507289e-06, "loss": 2.0309, "step": 22116 }, { "epoch": 0.74, "grad_norm": 0.7662948369979858, "learning_rate": 3.354739933134512e-06, "loss": 2.0613, "step": 22117 }, { "epoch": 0.74, "grad_norm": 0.7353666424751282, "learning_rate": 3.3539457078359894e-06, "loss": 2.0454, "step": 22118 }, { "epoch": 0.74, "grad_norm": 0.7424390912055969, "learning_rate": 3.3531515576206887e-06, "loss": 2.1119, "step": 22119 }, { "epoch": 0.74, "grad_norm": 0.7820848226547241, "learning_rate": 3.352357482497587e-06, "loss": 1.9961, "step": 22120 }, { "epoch": 0.74, "grad_norm": 0.7346158027648926, "learning_rate": 3.3515634824756582e-06, "loss": 2.0595, "step": 22121 }, { "epoch": 0.74, "grad_norm": 0.7797508239746094, "learning_rate": 3.3507695575638687e-06, "loss": 2.0752, "step": 22122 }, { "epoch": 0.74, "grad_norm": 0.7496840357780457, "learning_rate": 3.3499757077711835e-06, "loss": 2.0759, "step": 22123 }, { "epoch": 0.74, "grad_norm": 0.748189389705658, "learning_rate": 3.34918193310658e-06, "loss": 2.0848, "step": 22124 }, { "epoch": 0.74, "grad_norm": 0.708651065826416, "learning_rate": 3.3483882335790173e-06, "loss": 1.9672, "step": 22125 }, { "epoch": 0.74, "grad_norm": 0.7485222816467285, "learning_rate": 3.34759460919747e-06, "loss": 2.0343, "step": 22126 }, { "epoch": 0.74, "grad_norm": 0.7451032400131226, "learning_rate": 3.3468010599708967e-06, "loss": 2.0122, "step": 22127 }, { "epoch": 0.74, "grad_norm": 0.7255945801734924, "learning_rate": 3.3460075859082685e-06, "loss": 2.0376, "step": 22128 }, { "epoch": 0.74, "grad_norm": 0.7336903810501099, "learning_rate": 3.345214187018545e-06, "loss": 2.0792, "step": 22129 }, { "epoch": 0.74, "grad_norm": 0.7269100546836853, "learning_rate": 3.3444208633106935e-06, "loss": 2.0729, "step": 22130 }, { "epoch": 0.74, "grad_norm": 0.7421281933784485, "learning_rate": 3.3436276147936754e-06, "loss": 2.0751, "step": 22131 }, { "epoch": 0.74, "grad_norm": 0.7496660351753235, "learning_rate": 3.342834441476448e-06, "loss": 2.0739, "step": 22132 }, { "epoch": 0.74, "grad_norm": 0.7094911932945251, "learning_rate": 3.3420413433679745e-06, "loss": 2.0072, "step": 22133 }, { "epoch": 0.74, "grad_norm": 0.7650269865989685, "learning_rate": 3.3412483204772207e-06, "loss": 2.0376, "step": 22134 }, { "epoch": 0.74, "grad_norm": 0.7688501477241516, "learning_rate": 3.3404553728131405e-06, "loss": 2.0839, "step": 22135 }, { "epoch": 0.74, "grad_norm": 0.7776604294776917, "learning_rate": 3.3396625003846892e-06, "loss": 2.0521, "step": 22136 }, { "epoch": 0.74, "grad_norm": 0.7357088923454285, "learning_rate": 3.338869703200831e-06, "loss": 1.9996, "step": 22137 }, { "epoch": 0.74, "grad_norm": 0.7088037133216858, "learning_rate": 3.338076981270516e-06, "loss": 2.0215, "step": 22138 }, { "epoch": 0.74, "grad_norm": 0.777632474899292, "learning_rate": 3.337284334602705e-06, "loss": 2.0564, "step": 22139 }, { "epoch": 0.74, "grad_norm": 0.7721948623657227, "learning_rate": 3.336491763206352e-06, "loss": 2.0004, "step": 22140 }, { "epoch": 0.74, "grad_norm": 0.7479133605957031, "learning_rate": 3.3356992670904065e-06, "loss": 2.0084, "step": 22141 }, { "epoch": 0.74, "grad_norm": 0.7704938650131226, "learning_rate": 3.3349068462638254e-06, "loss": 2.0771, "step": 22142 }, { "epoch": 0.74, "grad_norm": 0.7240856885910034, "learning_rate": 3.3341145007355635e-06, "loss": 2.1308, "step": 22143 }, { "epoch": 0.74, "grad_norm": 0.7621692419052124, "learning_rate": 3.3333222305145694e-06, "loss": 2.0509, "step": 22144 }, { "epoch": 0.74, "grad_norm": 0.7420933842658997, "learning_rate": 3.33253003560979e-06, "loss": 2.1305, "step": 22145 }, { "epoch": 0.74, "grad_norm": 0.7493348121643066, "learning_rate": 3.3317379160301833e-06, "loss": 2.0796, "step": 22146 }, { "epoch": 0.74, "grad_norm": 0.7451976537704468, "learning_rate": 3.3309458717846886e-06, "loss": 2.0579, "step": 22147 }, { "epoch": 0.74, "grad_norm": 0.7184981107711792, "learning_rate": 3.3301539028822638e-06, "loss": 2.0696, "step": 22148 }, { "epoch": 0.74, "grad_norm": 0.7519020438194275, "learning_rate": 3.3293620093318467e-06, "loss": 1.9881, "step": 22149 }, { "epoch": 0.74, "grad_norm": 0.7504491209983826, "learning_rate": 3.3285701911423928e-06, "loss": 2.0609, "step": 22150 }, { "epoch": 0.74, "grad_norm": 0.7549226880073547, "learning_rate": 3.3277784483228393e-06, "loss": 2.0688, "step": 22151 }, { "epoch": 0.74, "grad_norm": 0.7448413968086243, "learning_rate": 3.3269867808821344e-06, "loss": 2.0528, "step": 22152 }, { "epoch": 0.74, "grad_norm": 0.759600043296814, "learning_rate": 3.326195188829228e-06, "loss": 2.0374, "step": 22153 }, { "epoch": 0.74, "grad_norm": 0.7269254326820374, "learning_rate": 3.325403672173051e-06, "loss": 2.1103, "step": 22154 }, { "epoch": 0.74, "grad_norm": 0.7295447587966919, "learning_rate": 3.3246122309225527e-06, "loss": 2.0136, "step": 22155 }, { "epoch": 0.74, "grad_norm": 0.7458457946777344, "learning_rate": 3.3238208650866756e-06, "loss": 2.0511, "step": 22156 }, { "epoch": 0.74, "grad_norm": 0.7688998579978943, "learning_rate": 3.323029574674357e-06, "loss": 2.0965, "step": 22157 }, { "epoch": 0.74, "grad_norm": 0.7221521139144897, "learning_rate": 3.3222383596945353e-06, "loss": 2.028, "step": 22158 }, { "epoch": 0.74, "grad_norm": 0.7359201908111572, "learning_rate": 3.3214472201561533e-06, "loss": 2.0831, "step": 22159 }, { "epoch": 0.74, "grad_norm": 0.7515683174133301, "learning_rate": 3.320656156068144e-06, "loss": 2.0486, "step": 22160 }, { "epoch": 0.74, "grad_norm": 0.7465779781341553, "learning_rate": 3.3198651674394468e-06, "loss": 2.089, "step": 22161 }, { "epoch": 0.74, "grad_norm": 0.7769495844841003, "learning_rate": 3.3190742542790043e-06, "loss": 2.0593, "step": 22162 }, { "epoch": 0.74, "grad_norm": 0.7404967546463013, "learning_rate": 3.318283416595739e-06, "loss": 2.0186, "step": 22163 }, { "epoch": 0.74, "grad_norm": 0.7394290566444397, "learning_rate": 3.317492654398592e-06, "loss": 2.0208, "step": 22164 }, { "epoch": 0.74, "grad_norm": 0.772930920124054, "learning_rate": 3.3167019676964995e-06, "loss": 2.063, "step": 22165 }, { "epoch": 0.74, "grad_norm": 0.7465413212776184, "learning_rate": 3.3159113564983912e-06, "loss": 2.0419, "step": 22166 }, { "epoch": 0.74, "grad_norm": 0.7373952865600586, "learning_rate": 3.315120820813197e-06, "loss": 2.0535, "step": 22167 }, { "epoch": 0.74, "grad_norm": 0.7644451856613159, "learning_rate": 3.314330360649849e-06, "loss": 2.0716, "step": 22168 }, { "epoch": 0.74, "grad_norm": 0.7352064251899719, "learning_rate": 3.3135399760172827e-06, "loss": 1.9979, "step": 22169 }, { "epoch": 0.74, "grad_norm": 0.7662498354911804, "learning_rate": 3.3127496669244217e-06, "loss": 2.0671, "step": 22170 }, { "epoch": 0.74, "grad_norm": 0.7551062107086182, "learning_rate": 3.311959433380194e-06, "loss": 2.0838, "step": 22171 }, { "epoch": 0.74, "grad_norm": 0.7728142142295837, "learning_rate": 3.311169275393531e-06, "loss": 2.031, "step": 22172 }, { "epoch": 0.74, "grad_norm": 0.7507092952728271, "learning_rate": 3.3103791929733552e-06, "loss": 2.067, "step": 22173 }, { "epoch": 0.74, "grad_norm": 0.7657943964004517, "learning_rate": 3.3095891861285944e-06, "loss": 2.0793, "step": 22174 }, { "epoch": 0.74, "grad_norm": 0.7218884825706482, "learning_rate": 3.30879925486818e-06, "loss": 2.0274, "step": 22175 }, { "epoch": 0.74, "grad_norm": 0.7566397786140442, "learning_rate": 3.3080093992010245e-06, "loss": 1.9932, "step": 22176 }, { "epoch": 0.74, "grad_norm": 0.7601485252380371, "learning_rate": 3.307219619136057e-06, "loss": 2.0604, "step": 22177 }, { "epoch": 0.74, "grad_norm": 0.745028018951416, "learning_rate": 3.306429914682202e-06, "loss": 2.0406, "step": 22178 }, { "epoch": 0.74, "grad_norm": 0.7598259449005127, "learning_rate": 3.3056402858483807e-06, "loss": 2.0332, "step": 22179 }, { "epoch": 0.74, "grad_norm": 0.7550256848335266, "learning_rate": 3.3048507326435074e-06, "loss": 2.1142, "step": 22180 }, { "epoch": 0.74, "grad_norm": 0.7382687330245972, "learning_rate": 3.3040612550765104e-06, "loss": 2.1274, "step": 22181 }, { "epoch": 0.74, "grad_norm": 0.7453802227973938, "learning_rate": 3.303271853156302e-06, "loss": 2.0716, "step": 22182 }, { "epoch": 0.74, "grad_norm": 0.7379159331321716, "learning_rate": 3.3024825268918046e-06, "loss": 2.1063, "step": 22183 }, { "epoch": 0.74, "grad_norm": 0.7373242378234863, "learning_rate": 3.301693276291936e-06, "loss": 2.079, "step": 22184 }, { "epoch": 0.74, "grad_norm": 0.7235468029975891, "learning_rate": 3.300904101365613e-06, "loss": 2.1325, "step": 22185 }, { "epoch": 0.74, "grad_norm": 0.7384099960327148, "learning_rate": 3.3001150021217444e-06, "loss": 2.0643, "step": 22186 }, { "epoch": 0.74, "grad_norm": 0.7517136335372925, "learning_rate": 3.2993259785692543e-06, "loss": 2.1035, "step": 22187 }, { "epoch": 0.74, "grad_norm": 0.7891362905502319, "learning_rate": 3.2985370307170516e-06, "loss": 2.0248, "step": 22188 }, { "epoch": 0.74, "grad_norm": 0.7310405373573303, "learning_rate": 3.297748158574048e-06, "loss": 2.0595, "step": 22189 }, { "epoch": 0.74, "grad_norm": 0.7453989386558533, "learning_rate": 3.2969593621491567e-06, "loss": 2.0469, "step": 22190 }, { "epoch": 0.74, "grad_norm": 0.7469889521598816, "learning_rate": 3.296170641451294e-06, "loss": 2.0592, "step": 22191 }, { "epoch": 0.74, "grad_norm": 0.7396084666252136, "learning_rate": 3.295381996489363e-06, "loss": 2.0884, "step": 22192 }, { "epoch": 0.74, "grad_norm": 0.7534698247909546, "learning_rate": 3.29459342727228e-06, "loss": 1.9702, "step": 22193 }, { "epoch": 0.74, "grad_norm": 0.7330057621002197, "learning_rate": 3.2938049338089505e-06, "loss": 1.9607, "step": 22194 }, { "epoch": 0.74, "grad_norm": 0.7441755533218384, "learning_rate": 3.2930165161082794e-06, "loss": 1.9973, "step": 22195 }, { "epoch": 0.74, "grad_norm": 0.7269352078437805, "learning_rate": 3.2922281741791783e-06, "loss": 2.0468, "step": 22196 }, { "epoch": 0.74, "grad_norm": 0.731460690498352, "learning_rate": 3.291439908030557e-06, "loss": 2.0519, "step": 22197 }, { "epoch": 0.74, "grad_norm": 0.7212933301925659, "learning_rate": 3.2906517176713102e-06, "loss": 2.0337, "step": 22198 }, { "epoch": 0.74, "grad_norm": 0.7965194582939148, "learning_rate": 3.289863603110349e-06, "loss": 2.0741, "step": 22199 }, { "epoch": 0.74, "grad_norm": 0.7331159114837646, "learning_rate": 3.2890755643565787e-06, "loss": 1.9767, "step": 22200 }, { "epoch": 0.74, "grad_norm": 0.759445071220398, "learning_rate": 3.2882876014189003e-06, "loss": 2.1019, "step": 22201 }, { "epoch": 0.74, "grad_norm": 0.7961204051971436, "learning_rate": 3.2874997143062103e-06, "loss": 2.0002, "step": 22202 }, { "epoch": 0.74, "grad_norm": 0.7729451060295105, "learning_rate": 3.2867119030274196e-06, "loss": 2.0946, "step": 22203 }, { "epoch": 0.74, "grad_norm": 0.7333393692970276, "learning_rate": 3.28592416759142e-06, "loss": 2.0655, "step": 22204 }, { "epoch": 0.74, "grad_norm": 0.723981499671936, "learning_rate": 3.285136508007113e-06, "loss": 1.9794, "step": 22205 }, { "epoch": 0.74, "grad_norm": 0.7416284680366516, "learning_rate": 3.2843489242834025e-06, "loss": 2.098, "step": 22206 }, { "epoch": 0.74, "grad_norm": 0.7751772403717041, "learning_rate": 3.2835614164291827e-06, "loss": 2.0049, "step": 22207 }, { "epoch": 0.74, "grad_norm": 0.7494246959686279, "learning_rate": 3.282773984453346e-06, "loss": 2.093, "step": 22208 }, { "epoch": 0.74, "grad_norm": 0.7638769745826721, "learning_rate": 3.281986628364795e-06, "loss": 2.0801, "step": 22209 }, { "epoch": 0.74, "grad_norm": 0.7460142374038696, "learning_rate": 3.2811993481724227e-06, "loss": 2.0276, "step": 22210 }, { "epoch": 0.74, "grad_norm": 0.7632189393043518, "learning_rate": 3.280412143885119e-06, "loss": 2.0371, "step": 22211 }, { "epoch": 0.74, "grad_norm": 0.7309544086456299, "learning_rate": 3.2796250155117804e-06, "loss": 1.9982, "step": 22212 }, { "epoch": 0.74, "grad_norm": 0.7215017080307007, "learning_rate": 3.278837963061303e-06, "loss": 1.9906, "step": 22213 }, { "epoch": 0.74, "grad_norm": 0.7621486783027649, "learning_rate": 3.2780509865425736e-06, "loss": 2.0824, "step": 22214 }, { "epoch": 0.74, "grad_norm": 0.7434404492378235, "learning_rate": 3.2772640859644868e-06, "loss": 1.9568, "step": 22215 }, { "epoch": 0.74, "grad_norm": 0.7554197907447815, "learning_rate": 3.2764772613359306e-06, "loss": 2.1024, "step": 22216 }, { "epoch": 0.74, "grad_norm": 0.7211920022964478, "learning_rate": 3.27569051266579e-06, "loss": 2.0127, "step": 22217 }, { "epoch": 0.74, "grad_norm": 0.7470629215240479, "learning_rate": 3.2749038399629585e-06, "loss": 2.0952, "step": 22218 }, { "epoch": 0.74, "grad_norm": 0.7381715178489685, "learning_rate": 3.274117243236328e-06, "loss": 2.0628, "step": 22219 }, { "epoch": 0.74, "grad_norm": 0.7558199167251587, "learning_rate": 3.2733307224947718e-06, "loss": 2.0834, "step": 22220 }, { "epoch": 0.74, "grad_norm": 0.7380803823471069, "learning_rate": 3.272544277747184e-06, "loss": 2.0128, "step": 22221 }, { "epoch": 0.74, "grad_norm": 0.7458100318908691, "learning_rate": 3.2717579090024507e-06, "loss": 2.0446, "step": 22222 }, { "epoch": 0.74, "grad_norm": 0.7276365756988525, "learning_rate": 3.2709716162694506e-06, "loss": 2.0027, "step": 22223 }, { "epoch": 0.74, "grad_norm": 0.7607114911079407, "learning_rate": 3.270185399557073e-06, "loss": 2.059, "step": 22224 }, { "epoch": 0.74, "grad_norm": 0.7345208525657654, "learning_rate": 3.2693992588741965e-06, "loss": 2.0235, "step": 22225 }, { "epoch": 0.74, "grad_norm": 0.7323665618896484, "learning_rate": 3.2686131942296994e-06, "loss": 2.0501, "step": 22226 }, { "epoch": 0.74, "grad_norm": 0.7412180304527283, "learning_rate": 3.2678272056324657e-06, "loss": 2.022, "step": 22227 }, { "epoch": 0.74, "grad_norm": 0.7286436557769775, "learning_rate": 3.2670412930913775e-06, "loss": 2.0727, "step": 22228 }, { "epoch": 0.74, "grad_norm": 0.7286287546157837, "learning_rate": 3.2662554566153116e-06, "loss": 2.0524, "step": 22229 }, { "epoch": 0.74, "grad_norm": 0.7574065327644348, "learning_rate": 3.2654696962131415e-06, "loss": 2.0954, "step": 22230 }, { "epoch": 0.74, "grad_norm": 0.7412490248680115, "learning_rate": 3.2646840118937506e-06, "loss": 1.9514, "step": 22231 }, { "epoch": 0.74, "grad_norm": 0.7628432512283325, "learning_rate": 3.2638984036660136e-06, "loss": 2.0335, "step": 22232 }, { "epoch": 0.74, "grad_norm": 0.7409769296646118, "learning_rate": 3.2631128715388006e-06, "loss": 2.0117, "step": 22233 }, { "epoch": 0.74, "grad_norm": 0.74897301197052, "learning_rate": 3.2623274155209906e-06, "loss": 2.0008, "step": 22234 }, { "epoch": 0.74, "grad_norm": 0.7371807098388672, "learning_rate": 3.26154203562146e-06, "loss": 2.0689, "step": 22235 }, { "epoch": 0.74, "grad_norm": 0.7458344101905823, "learning_rate": 3.260756731849075e-06, "loss": 2.0605, "step": 22236 }, { "epoch": 0.74, "grad_norm": 0.7580293416976929, "learning_rate": 3.2599715042127146e-06, "loss": 2.1262, "step": 22237 }, { "epoch": 0.74, "grad_norm": 0.737846851348877, "learning_rate": 3.2591863527212453e-06, "loss": 2.0482, "step": 22238 }, { "epoch": 0.74, "grad_norm": 0.7206730842590332, "learning_rate": 3.258401277383535e-06, "loss": 2.0171, "step": 22239 }, { "epoch": 0.74, "grad_norm": 0.7570645809173584, "learning_rate": 3.2576162782084564e-06, "loss": 2.078, "step": 22240 }, { "epoch": 0.74, "grad_norm": 0.7354682683944702, "learning_rate": 3.2568313552048835e-06, "loss": 2.0138, "step": 22241 }, { "epoch": 0.74, "grad_norm": 0.7740846276283264, "learning_rate": 3.2560465083816726e-06, "loss": 1.9665, "step": 22242 }, { "epoch": 0.74, "grad_norm": 0.7227646112442017, "learning_rate": 3.255261737747696e-06, "loss": 2.0277, "step": 22243 }, { "epoch": 0.74, "grad_norm": 0.7426685690879822, "learning_rate": 3.254477043311821e-06, "loss": 2.0321, "step": 22244 }, { "epoch": 0.74, "grad_norm": 0.7486542463302612, "learning_rate": 3.2536924250829095e-06, "loss": 2.0574, "step": 22245 }, { "epoch": 0.74, "grad_norm": 0.7805135846138, "learning_rate": 3.2529078830698304e-06, "loss": 2.0107, "step": 22246 }, { "epoch": 0.74, "grad_norm": 0.7241868376731873, "learning_rate": 3.252123417281443e-06, "loss": 2.0181, "step": 22247 }, { "epoch": 0.74, "grad_norm": 0.746249794960022, "learning_rate": 3.2513390277266076e-06, "loss": 2.052, "step": 22248 }, { "epoch": 0.74, "grad_norm": 0.7432492971420288, "learning_rate": 3.250554714414189e-06, "loss": 2.0512, "step": 22249 }, { "epoch": 0.74, "grad_norm": 0.7555325031280518, "learning_rate": 3.2497704773530515e-06, "loss": 2.0181, "step": 22250 }, { "epoch": 0.74, "grad_norm": 0.7270098924636841, "learning_rate": 3.248986316552051e-06, "loss": 2.0537, "step": 22251 }, { "epoch": 0.74, "grad_norm": 0.7685802578926086, "learning_rate": 3.248202232020042e-06, "loss": 1.9618, "step": 22252 }, { "epoch": 0.74, "grad_norm": 0.7535423636436462, "learning_rate": 3.247418223765888e-06, "loss": 2.0282, "step": 22253 }, { "epoch": 0.74, "grad_norm": 0.7459569573402405, "learning_rate": 3.2466342917984496e-06, "loss": 2.0116, "step": 22254 }, { "epoch": 0.74, "grad_norm": 0.7370901703834534, "learning_rate": 3.2458504361265775e-06, "loss": 2.0651, "step": 22255 }, { "epoch": 0.74, "grad_norm": 0.7668207287788391, "learning_rate": 3.2450666567591273e-06, "loss": 2.0489, "step": 22256 }, { "epoch": 0.74, "grad_norm": 0.7648358941078186, "learning_rate": 3.244282953704957e-06, "loss": 2.051, "step": 22257 }, { "epoch": 0.74, "grad_norm": 0.7852567434310913, "learning_rate": 3.2434993269729163e-06, "loss": 2.0722, "step": 22258 }, { "epoch": 0.74, "grad_norm": 0.7253546714782715, "learning_rate": 3.2427157765718632e-06, "loss": 2.026, "step": 22259 }, { "epoch": 0.74, "grad_norm": 0.7392908930778503, "learning_rate": 3.2419323025106477e-06, "loss": 2.0437, "step": 22260 }, { "epoch": 0.74, "grad_norm": 0.727792501449585, "learning_rate": 3.241148904798117e-06, "loss": 2.0109, "step": 22261 }, { "epoch": 0.74, "grad_norm": 0.7448568344116211, "learning_rate": 3.2403655834431246e-06, "loss": 2.0892, "step": 22262 }, { "epoch": 0.74, "grad_norm": 0.7522231936454773, "learning_rate": 3.2395823384545267e-06, "loss": 2.0611, "step": 22263 }, { "epoch": 0.74, "grad_norm": 0.7411322593688965, "learning_rate": 3.2387991698411593e-06, "loss": 2.0874, "step": 22264 }, { "epoch": 0.74, "grad_norm": 0.7100245952606201, "learning_rate": 3.238016077611876e-06, "loss": 2.0557, "step": 22265 }, { "epoch": 0.74, "grad_norm": 0.7298486828804016, "learning_rate": 3.2372330617755286e-06, "loss": 2.0877, "step": 22266 }, { "epoch": 0.74, "grad_norm": 0.7426810264587402, "learning_rate": 3.236450122340955e-06, "loss": 2.0131, "step": 22267 }, { "epoch": 0.74, "grad_norm": 0.7703245282173157, "learning_rate": 3.235667259317007e-06, "loss": 2.0676, "step": 22268 }, { "epoch": 0.74, "grad_norm": 0.7220068573951721, "learning_rate": 3.234884472712523e-06, "loss": 2.0786, "step": 22269 }, { "epoch": 0.74, "grad_norm": 0.759346604347229, "learning_rate": 3.2341017625363526e-06, "loss": 2.0496, "step": 22270 }, { "epoch": 0.74, "grad_norm": 0.7599336504936218, "learning_rate": 3.233319128797332e-06, "loss": 2.1154, "step": 22271 }, { "epoch": 0.74, "grad_norm": 0.7436297535896301, "learning_rate": 3.2325365715043088e-06, "loss": 2.0137, "step": 22272 }, { "epoch": 0.74, "grad_norm": 0.7367570996284485, "learning_rate": 3.2317540906661226e-06, "loss": 1.9497, "step": 22273 }, { "epoch": 0.74, "grad_norm": 0.7532768845558167, "learning_rate": 3.2309716862916072e-06, "loss": 2.0986, "step": 22274 }, { "epoch": 0.74, "grad_norm": 0.7432680726051331, "learning_rate": 3.230189358389608e-06, "loss": 2.0491, "step": 22275 }, { "epoch": 0.74, "grad_norm": 0.734361469745636, "learning_rate": 3.2294071069689647e-06, "loss": 1.979, "step": 22276 }, { "epoch": 0.74, "grad_norm": 0.7767292261123657, "learning_rate": 3.228624932038512e-06, "loss": 2.0702, "step": 22277 }, { "epoch": 0.74, "grad_norm": 0.7359764575958252, "learning_rate": 3.2278428336070834e-06, "loss": 1.9546, "step": 22278 }, { "epoch": 0.74, "grad_norm": 0.7637745141983032, "learning_rate": 3.227060811683521e-06, "loss": 2.012, "step": 22279 }, { "epoch": 0.74, "grad_norm": 0.7211446166038513, "learning_rate": 3.226278866276652e-06, "loss": 2.0404, "step": 22280 }, { "epoch": 0.74, "grad_norm": 0.7533299326896667, "learning_rate": 3.2254969973953186e-06, "loss": 2.014, "step": 22281 }, { "epoch": 0.74, "grad_norm": 0.7401911616325378, "learning_rate": 3.2247152050483497e-06, "loss": 2.0569, "step": 22282 }, { "epoch": 0.74, "grad_norm": 0.7467284202575684, "learning_rate": 3.2239334892445753e-06, "loss": 2.0399, "step": 22283 }, { "epoch": 0.74, "grad_norm": 0.7396549582481384, "learning_rate": 3.223151849992828e-06, "loss": 2.0169, "step": 22284 }, { "epoch": 0.74, "grad_norm": 0.7866047024726868, "learning_rate": 3.2223702873019424e-06, "loss": 2.1309, "step": 22285 }, { "epoch": 0.74, "grad_norm": 0.7141895294189453, "learning_rate": 3.221588801180746e-06, "loss": 2.0492, "step": 22286 }, { "epoch": 0.74, "grad_norm": 0.7516275644302368, "learning_rate": 3.2208073916380635e-06, "loss": 2.0349, "step": 22287 }, { "epoch": 0.74, "grad_norm": 0.761690080165863, "learning_rate": 3.2200260586827293e-06, "loss": 2.054, "step": 22288 }, { "epoch": 0.74, "grad_norm": 0.7326943874359131, "learning_rate": 3.2192448023235646e-06, "loss": 2.0477, "step": 22289 }, { "epoch": 0.74, "grad_norm": 0.7479966878890991, "learning_rate": 3.218463622569401e-06, "loss": 2.053, "step": 22290 }, { "epoch": 0.74, "grad_norm": 0.7634629011154175, "learning_rate": 3.2176825194290573e-06, "loss": 2.0996, "step": 22291 }, { "epoch": 0.74, "grad_norm": 0.740103006362915, "learning_rate": 3.216901492911365e-06, "loss": 2.0115, "step": 22292 }, { "epoch": 0.74, "grad_norm": 0.7479941844940186, "learning_rate": 3.216120543025141e-06, "loss": 2.055, "step": 22293 }, { "epoch": 0.74, "grad_norm": 0.7670915126800537, "learning_rate": 3.215339669779215e-06, "loss": 2.0369, "step": 22294 }, { "epoch": 0.74, "grad_norm": 0.7458199858665466, "learning_rate": 3.214558873182405e-06, "loss": 2.0515, "step": 22295 }, { "epoch": 0.74, "grad_norm": 0.7210725545883179, "learning_rate": 3.21377815324353e-06, "loss": 2.0645, "step": 22296 }, { "epoch": 0.74, "grad_norm": 0.7615597248077393, "learning_rate": 3.2129975099714106e-06, "loss": 2.0551, "step": 22297 }, { "epoch": 0.74, "grad_norm": 0.7421480417251587, "learning_rate": 3.212216943374872e-06, "loss": 2.0794, "step": 22298 }, { "epoch": 0.74, "grad_norm": 0.7504313588142395, "learning_rate": 3.2114364534627284e-06, "loss": 2.0574, "step": 22299 }, { "epoch": 0.74, "grad_norm": 0.759382426738739, "learning_rate": 3.2106560402437937e-06, "loss": 2.0659, "step": 22300 }, { "epoch": 0.74, "grad_norm": 0.7769963145256042, "learning_rate": 3.209875703726891e-06, "loss": 2.0633, "step": 22301 }, { "epoch": 0.74, "grad_norm": 0.7394306063652039, "learning_rate": 3.20909544392083e-06, "loss": 2.0498, "step": 22302 }, { "epoch": 0.74, "grad_norm": 0.7504224181175232, "learning_rate": 3.2083152608344326e-06, "loss": 2.0534, "step": 22303 }, { "epoch": 0.74, "grad_norm": 0.7295312285423279, "learning_rate": 3.2075351544765086e-06, "loss": 2.0296, "step": 22304 }, { "epoch": 0.74, "grad_norm": 0.7356663942337036, "learning_rate": 3.2067551248558694e-06, "loss": 2.0413, "step": 22305 }, { "epoch": 0.74, "grad_norm": 0.7510823607444763, "learning_rate": 3.205975171981328e-06, "loss": 2.0576, "step": 22306 }, { "epoch": 0.74, "grad_norm": 0.7515228986740112, "learning_rate": 3.2051952958617017e-06, "loss": 2.0773, "step": 22307 }, { "epoch": 0.74, "grad_norm": 0.7723273038864136, "learning_rate": 3.2044154965057973e-06, "loss": 2.1316, "step": 22308 }, { "epoch": 0.74, "grad_norm": 0.7313694357872009, "learning_rate": 3.20363577392242e-06, "loss": 2.0284, "step": 22309 }, { "epoch": 0.74, "grad_norm": 0.7397423982620239, "learning_rate": 3.202856128120386e-06, "loss": 1.9872, "step": 22310 }, { "epoch": 0.74, "grad_norm": 0.763303279876709, "learning_rate": 3.202076559108497e-06, "loss": 2.0278, "step": 22311 }, { "epoch": 0.74, "grad_norm": 0.767754316329956, "learning_rate": 3.2012970668955657e-06, "loss": 2.0908, "step": 22312 }, { "epoch": 0.74, "grad_norm": 0.7294447422027588, "learning_rate": 3.2005176514903926e-06, "loss": 2.0416, "step": 22313 }, { "epoch": 0.74, "grad_norm": 0.7457343935966492, "learning_rate": 3.199738312901789e-06, "loss": 2.0025, "step": 22314 }, { "epoch": 0.74, "grad_norm": 0.7523823380470276, "learning_rate": 3.1989590511385547e-06, "loss": 2.0742, "step": 22315 }, { "epoch": 0.74, "grad_norm": 0.7587494254112244, "learning_rate": 3.1981798662094977e-06, "loss": 2.0767, "step": 22316 }, { "epoch": 0.74, "grad_norm": 0.7422943711280823, "learning_rate": 3.197400758123418e-06, "loss": 2.0907, "step": 22317 }, { "epoch": 0.74, "grad_norm": 0.7486933469772339, "learning_rate": 3.1966217268891155e-06, "loss": 1.9683, "step": 22318 }, { "epoch": 0.74, "grad_norm": 0.7673466801643372, "learning_rate": 3.195842772515393e-06, "loss": 2.0903, "step": 22319 }, { "epoch": 0.74, "grad_norm": 0.7478808164596558, "learning_rate": 3.1950638950110535e-06, "loss": 2.009, "step": 22320 }, { "epoch": 0.74, "grad_norm": 0.7606772184371948, "learning_rate": 3.1942850943848956e-06, "loss": 2.0965, "step": 22321 }, { "epoch": 0.74, "grad_norm": 0.7608497142791748, "learning_rate": 3.1935063706457127e-06, "loss": 2.0435, "step": 22322 }, { "epoch": 0.74, "grad_norm": 0.7370107173919678, "learning_rate": 3.192727723802308e-06, "loss": 2.109, "step": 22323 }, { "epoch": 0.74, "grad_norm": 0.7349649667739868, "learning_rate": 3.191949153863474e-06, "loss": 2.061, "step": 22324 }, { "epoch": 0.74, "grad_norm": 0.7287658452987671, "learning_rate": 3.191170660838011e-06, "loss": 2.0706, "step": 22325 }, { "epoch": 0.74, "grad_norm": 0.8100554943084717, "learning_rate": 3.1903922447347115e-06, "loss": 2.066, "step": 22326 }, { "epoch": 0.74, "grad_norm": 0.7326253056526184, "learning_rate": 3.1896139055623666e-06, "loss": 2.0652, "step": 22327 }, { "epoch": 0.74, "grad_norm": 0.7203582525253296, "learning_rate": 3.188835643329773e-06, "loss": 2.1105, "step": 22328 }, { "epoch": 0.74, "grad_norm": 0.7398223876953125, "learning_rate": 3.1880574580457246e-06, "loss": 2.025, "step": 22329 }, { "epoch": 0.74, "grad_norm": 0.7392032146453857, "learning_rate": 3.1872793497190114e-06, "loss": 2.012, "step": 22330 }, { "epoch": 0.74, "grad_norm": 0.7362843155860901, "learning_rate": 3.1865013183584205e-06, "loss": 2.0364, "step": 22331 }, { "epoch": 0.74, "grad_norm": 0.735448956489563, "learning_rate": 3.185723363972748e-06, "loss": 2.0283, "step": 22332 }, { "epoch": 0.74, "grad_norm": 0.7439790368080139, "learning_rate": 3.1849454865707764e-06, "loss": 2.0001, "step": 22333 }, { "epoch": 0.74, "grad_norm": 0.7423273921012878, "learning_rate": 3.184167686161299e-06, "loss": 2.0564, "step": 22334 }, { "epoch": 0.74, "grad_norm": 0.736746609210968, "learning_rate": 3.1833899627530975e-06, "loss": 2.0823, "step": 22335 }, { "epoch": 0.74, "grad_norm": 0.7634266018867493, "learning_rate": 3.182612316354965e-06, "loss": 2.1429, "step": 22336 }, { "epoch": 0.74, "grad_norm": 0.7439766526222229, "learning_rate": 3.1818347469756793e-06, "loss": 2.0068, "step": 22337 }, { "epoch": 0.74, "grad_norm": 0.762452244758606, "learning_rate": 3.181057254624029e-06, "loss": 2.0071, "step": 22338 }, { "epoch": 0.74, "grad_norm": 0.7409733533859253, "learning_rate": 3.180279839308804e-06, "loss": 2.0562, "step": 22339 }, { "epoch": 0.74, "grad_norm": 0.764900267124176, "learning_rate": 3.179502501038775e-06, "loss": 2.017, "step": 22340 }, { "epoch": 0.74, "grad_norm": 0.7477806806564331, "learning_rate": 3.1787252398227285e-06, "loss": 2.0808, "step": 22341 }, { "epoch": 0.74, "grad_norm": 0.7834635376930237, "learning_rate": 3.177948055669451e-06, "loss": 2.0203, "step": 22342 }, { "epoch": 0.74, "grad_norm": 0.7584335803985596, "learning_rate": 3.1771709485877167e-06, "loss": 2.0792, "step": 22343 }, { "epoch": 0.74, "grad_norm": 0.7173949480056763, "learning_rate": 3.1763939185863047e-06, "loss": 2.0907, "step": 22344 }, { "epoch": 0.74, "grad_norm": 0.7423549890518188, "learning_rate": 3.175616965673998e-06, "loss": 2.0694, "step": 22345 }, { "epoch": 0.74, "grad_norm": 0.723930835723877, "learning_rate": 3.1748400898595666e-06, "loss": 2.0132, "step": 22346 }, { "epoch": 0.74, "grad_norm": 0.7401352524757385, "learning_rate": 3.1740632911517965e-06, "loss": 2.0004, "step": 22347 }, { "epoch": 0.74, "grad_norm": 0.7421391606330872, "learning_rate": 3.1732865695594594e-06, "loss": 2.1311, "step": 22348 }, { "epoch": 0.74, "grad_norm": 0.7346087098121643, "learning_rate": 3.172509925091326e-06, "loss": 2.0794, "step": 22349 }, { "epoch": 0.74, "grad_norm": 0.7473320364952087, "learning_rate": 3.1717333577561737e-06, "loss": 2.0308, "step": 22350 }, { "epoch": 0.74, "grad_norm": 0.766850471496582, "learning_rate": 3.17095686756278e-06, "loss": 2.0868, "step": 22351 }, { "epoch": 0.74, "grad_norm": 0.7663282155990601, "learning_rate": 3.1701804545199133e-06, "loss": 2.0651, "step": 22352 }, { "epoch": 0.74, "grad_norm": 0.7200099229812622, "learning_rate": 3.1694041186363424e-06, "loss": 2.0572, "step": 22353 }, { "epoch": 0.74, "grad_norm": 0.7191290259361267, "learning_rate": 3.1686278599208396e-06, "loss": 2.0109, "step": 22354 }, { "epoch": 0.74, "grad_norm": 0.7261087894439697, "learning_rate": 3.1678516783821788e-06, "loss": 1.9942, "step": 22355 }, { "epoch": 0.74, "grad_norm": 0.7159486413002014, "learning_rate": 3.167075574029127e-06, "loss": 2.0028, "step": 22356 }, { "epoch": 0.74, "grad_norm": 0.718169629573822, "learning_rate": 3.166299546870447e-06, "loss": 2.0402, "step": 22357 }, { "epoch": 0.74, "grad_norm": 0.7482108473777771, "learning_rate": 3.165523596914912e-06, "loss": 1.9922, "step": 22358 }, { "epoch": 0.74, "grad_norm": 0.7600855827331543, "learning_rate": 3.1647477241712843e-06, "loss": 2.0443, "step": 22359 }, { "epoch": 0.74, "grad_norm": 0.7388033270835876, "learning_rate": 3.1639719286483304e-06, "loss": 2.056, "step": 22360 }, { "epoch": 0.74, "grad_norm": 0.7413361072540283, "learning_rate": 3.1631962103548217e-06, "loss": 2.0414, "step": 22361 }, { "epoch": 0.74, "grad_norm": 0.7584394812583923, "learning_rate": 3.162420569299509e-06, "loss": 2.1137, "step": 22362 }, { "epoch": 0.74, "grad_norm": 0.7635613679885864, "learning_rate": 3.161645005491162e-06, "loss": 1.9723, "step": 22363 }, { "epoch": 0.74, "grad_norm": 0.7402101159095764, "learning_rate": 3.1608695189385454e-06, "loss": 2.0245, "step": 22364 }, { "epoch": 0.74, "grad_norm": 0.7490797638893127, "learning_rate": 3.1600941096504156e-06, "loss": 2.0702, "step": 22365 }, { "epoch": 0.74, "grad_norm": 0.7378631234169006, "learning_rate": 3.1593187776355316e-06, "loss": 1.9592, "step": 22366 }, { "epoch": 0.74, "grad_norm": 0.7659499049186707, "learning_rate": 3.1585435229026585e-06, "loss": 2.1671, "step": 22367 }, { "epoch": 0.74, "grad_norm": 0.7646713256835938, "learning_rate": 3.157768345460547e-06, "loss": 2.0457, "step": 22368 }, { "epoch": 0.74, "grad_norm": 0.7605593800544739, "learning_rate": 3.1569932453179596e-06, "loss": 2.0877, "step": 22369 }, { "epoch": 0.74, "grad_norm": 0.7417692542076111, "learning_rate": 3.1562182224836556e-06, "loss": 2.0764, "step": 22370 }, { "epoch": 0.74, "grad_norm": 0.7433812022209167, "learning_rate": 3.155443276966387e-06, "loss": 2.0779, "step": 22371 }, { "epoch": 0.74, "grad_norm": 0.7464502453804016, "learning_rate": 3.1546684087749045e-06, "loss": 2.0876, "step": 22372 }, { "epoch": 0.74, "grad_norm": 0.7534196376800537, "learning_rate": 3.153893617917971e-06, "loss": 2.0519, "step": 22373 }, { "epoch": 0.74, "grad_norm": 0.7375979423522949, "learning_rate": 3.1531189044043353e-06, "loss": 2.0947, "step": 22374 }, { "epoch": 0.74, "grad_norm": 0.7461097836494446, "learning_rate": 3.1523442682427465e-06, "loss": 2.0766, "step": 22375 }, { "epoch": 0.74, "grad_norm": 0.7347460389137268, "learning_rate": 3.1515697094419582e-06, "loss": 1.9848, "step": 22376 }, { "epoch": 0.74, "grad_norm": 0.7717547416687012, "learning_rate": 3.1507952280107247e-06, "loss": 2.0369, "step": 22377 }, { "epoch": 0.74, "grad_norm": 0.7372307777404785, "learning_rate": 3.1500208239577933e-06, "loss": 1.958, "step": 22378 }, { "epoch": 0.74, "grad_norm": 0.7491931915283203, "learning_rate": 3.149246497291909e-06, "loss": 2.1068, "step": 22379 }, { "epoch": 0.74, "grad_norm": 0.7384610176086426, "learning_rate": 3.1484722480218265e-06, "loss": 2.0887, "step": 22380 }, { "epoch": 0.74, "grad_norm": 0.75984787940979, "learning_rate": 3.147698076156285e-06, "loss": 1.9736, "step": 22381 }, { "epoch": 0.74, "grad_norm": 0.7757591009140015, "learning_rate": 3.1469239817040355e-06, "loss": 2.0025, "step": 22382 }, { "epoch": 0.74, "grad_norm": 0.7683458924293518, "learning_rate": 3.1461499646738293e-06, "loss": 2.0649, "step": 22383 }, { "epoch": 0.74, "grad_norm": 0.7802779674530029, "learning_rate": 3.145376025074397e-06, "loss": 2.0285, "step": 22384 }, { "epoch": 0.74, "grad_norm": 0.7464694380760193, "learning_rate": 3.1446021629144885e-06, "loss": 2.0042, "step": 22385 }, { "epoch": 0.74, "grad_norm": 0.7300896048545837, "learning_rate": 3.143828378202851e-06, "loss": 2.0035, "step": 22386 }, { "epoch": 0.74, "grad_norm": 0.7477436661720276, "learning_rate": 3.143054670948222e-06, "loss": 2.0836, "step": 22387 }, { "epoch": 0.74, "grad_norm": 0.7348775267601013, "learning_rate": 3.1422810411593406e-06, "loss": 2.039, "step": 22388 }, { "epoch": 0.74, "grad_norm": 0.7333837747573853, "learning_rate": 3.1415074888449513e-06, "loss": 2.0413, "step": 22389 }, { "epoch": 0.74, "grad_norm": 0.7494324445724487, "learning_rate": 3.1407340140137878e-06, "loss": 2.0033, "step": 22390 }, { "epoch": 0.74, "grad_norm": 0.7390850186347961, "learning_rate": 3.139960616674592e-06, "loss": 2.1067, "step": 22391 }, { "epoch": 0.74, "grad_norm": 0.7529094219207764, "learning_rate": 3.1391872968361037e-06, "loss": 2.0947, "step": 22392 }, { "epoch": 0.75, "grad_norm": 0.7675386667251587, "learning_rate": 3.138414054507056e-06, "loss": 2.0478, "step": 22393 }, { "epoch": 0.75, "grad_norm": 0.7869687676429749, "learning_rate": 3.1376408896961817e-06, "loss": 2.0321, "step": 22394 }, { "epoch": 0.75, "grad_norm": 0.7473276257514954, "learning_rate": 3.1368678024122233e-06, "loss": 2.0585, "step": 22395 }, { "epoch": 0.75, "grad_norm": 0.7380658388137817, "learning_rate": 3.1360947926639096e-06, "loss": 2.0939, "step": 22396 }, { "epoch": 0.75, "grad_norm": 0.7371789813041687, "learning_rate": 3.135321860459971e-06, "loss": 2.0922, "step": 22397 }, { "epoch": 0.75, "grad_norm": 0.7323634624481201, "learning_rate": 3.134549005809143e-06, "loss": 2.0656, "step": 22398 }, { "epoch": 0.75, "grad_norm": 0.7332828640937805, "learning_rate": 3.1337762287201602e-06, "loss": 2.0457, "step": 22399 }, { "epoch": 0.75, "grad_norm": 0.7701794505119324, "learning_rate": 3.1330035292017458e-06, "loss": 2.0316, "step": 22400 }, { "epoch": 0.75, "grad_norm": 0.7202067971229553, "learning_rate": 3.132230907262637e-06, "loss": 1.9735, "step": 22401 }, { "epoch": 0.75, "grad_norm": 0.7672884464263916, "learning_rate": 3.131458362911558e-06, "loss": 2.1032, "step": 22402 }, { "epoch": 0.75, "grad_norm": 0.7564374208450317, "learning_rate": 3.130685896157234e-06, "loss": 2.1135, "step": 22403 }, { "epoch": 0.75, "grad_norm": 0.7424620985984802, "learning_rate": 3.1299135070083952e-06, "loss": 2.0543, "step": 22404 }, { "epoch": 0.75, "grad_norm": 0.7950997948646545, "learning_rate": 3.129141195473773e-06, "loss": 2.1282, "step": 22405 }, { "epoch": 0.75, "grad_norm": 0.759041428565979, "learning_rate": 3.1283689615620804e-06, "loss": 2.105, "step": 22406 }, { "epoch": 0.75, "grad_norm": 0.7729628086090088, "learning_rate": 3.1275968052820494e-06, "loss": 2.0031, "step": 22407 }, { "epoch": 0.75, "grad_norm": 0.7445035576820374, "learning_rate": 3.1268247266424046e-06, "loss": 2.0507, "step": 22408 }, { "epoch": 0.75, "grad_norm": 0.7606754899024963, "learning_rate": 3.126052725651866e-06, "loss": 2.0229, "step": 22409 }, { "epoch": 0.75, "grad_norm": 0.7545680999755859, "learning_rate": 3.125280802319152e-06, "loss": 1.932, "step": 22410 }, { "epoch": 0.75, "grad_norm": 0.739841878414154, "learning_rate": 3.1245089566529885e-06, "loss": 2.0291, "step": 22411 }, { "epoch": 0.75, "grad_norm": 0.762795090675354, "learning_rate": 3.1237371886620914e-06, "loss": 1.9947, "step": 22412 }, { "epoch": 0.75, "grad_norm": 0.7483312487602234, "learning_rate": 3.1229654983551817e-06, "loss": 2.0598, "step": 22413 }, { "epoch": 0.75, "grad_norm": 0.7436697483062744, "learning_rate": 3.1221938857409807e-06, "loss": 2.1297, "step": 22414 }, { "epoch": 0.75, "grad_norm": 0.7580074667930603, "learning_rate": 3.1214223508282016e-06, "loss": 1.9988, "step": 22415 }, { "epoch": 0.75, "grad_norm": 0.7382920980453491, "learning_rate": 3.1206508936255585e-06, "loss": 2.0091, "step": 22416 }, { "epoch": 0.75, "grad_norm": 0.7696748375892639, "learning_rate": 3.119879514141774e-06, "loss": 2.135, "step": 22417 }, { "epoch": 0.75, "grad_norm": 0.7328803539276123, "learning_rate": 3.1191082123855576e-06, "loss": 2.0553, "step": 22418 }, { "epoch": 0.75, "grad_norm": 0.76280677318573, "learning_rate": 3.118336988365621e-06, "loss": 2.0323, "step": 22419 }, { "epoch": 0.75, "grad_norm": 0.7603625059127808, "learning_rate": 3.117565842090681e-06, "loss": 1.9989, "step": 22420 }, { "epoch": 0.75, "grad_norm": 0.7449672818183899, "learning_rate": 3.1167947735694513e-06, "loss": 2.0427, "step": 22421 }, { "epoch": 0.75, "grad_norm": 0.7192001342773438, "learning_rate": 3.1160237828106363e-06, "loss": 2.0108, "step": 22422 }, { "epoch": 0.75, "grad_norm": 0.7489215135574341, "learning_rate": 3.1152528698229544e-06, "loss": 2.1198, "step": 22423 }, { "epoch": 0.75, "grad_norm": 0.7555010318756104, "learning_rate": 3.1144820346151105e-06, "loss": 2.073, "step": 22424 }, { "epoch": 0.75, "grad_norm": 0.7394611835479736, "learning_rate": 3.11371127719581e-06, "loss": 2.0132, "step": 22425 }, { "epoch": 0.75, "grad_norm": 0.7460958957672119, "learning_rate": 3.1129405975737637e-06, "loss": 2.0845, "step": 22426 }, { "epoch": 0.75, "grad_norm": 0.7331417202949524, "learning_rate": 3.1121699957576847e-06, "loss": 1.9843, "step": 22427 }, { "epoch": 0.75, "grad_norm": 0.7267646789550781, "learning_rate": 3.1113994717562656e-06, "loss": 2.0407, "step": 22428 }, { "epoch": 0.75, "grad_norm": 0.7256742119789124, "learning_rate": 3.110629025578219e-06, "loss": 2.0785, "step": 22429 }, { "epoch": 0.75, "grad_norm": 0.7490482330322266, "learning_rate": 3.109858657232251e-06, "loss": 2.0295, "step": 22430 }, { "epoch": 0.75, "grad_norm": 0.7470577359199524, "learning_rate": 3.109088366727058e-06, "loss": 2.1204, "step": 22431 }, { "epoch": 0.75, "grad_norm": 0.7536673545837402, "learning_rate": 3.10831815407135e-06, "loss": 2.0713, "step": 22432 }, { "epoch": 0.75, "grad_norm": 0.7368770241737366, "learning_rate": 3.107548019273824e-06, "loss": 2.0351, "step": 22433 }, { "epoch": 0.75, "grad_norm": 0.7687072157859802, "learning_rate": 3.1067779623431783e-06, "loss": 2.0223, "step": 22434 }, { "epoch": 0.75, "grad_norm": 0.7330232858657837, "learning_rate": 3.1060079832881164e-06, "loss": 2.015, "step": 22435 }, { "epoch": 0.75, "grad_norm": 0.7784209251403809, "learning_rate": 3.105238082117338e-06, "loss": 2.0829, "step": 22436 }, { "epoch": 0.75, "grad_norm": 0.7776232361793518, "learning_rate": 3.104468258839539e-06, "loss": 2.0883, "step": 22437 }, { "epoch": 0.75, "grad_norm": 0.7391170859336853, "learning_rate": 3.1036985134634135e-06, "loss": 1.9991, "step": 22438 }, { "epoch": 0.75, "grad_norm": 0.740599513053894, "learning_rate": 3.1029288459976637e-06, "loss": 2.0355, "step": 22439 }, { "epoch": 0.75, "grad_norm": 0.7746738791465759, "learning_rate": 3.1021592564509817e-06, "loss": 2.0293, "step": 22440 }, { "epoch": 0.75, "grad_norm": 0.7476466298103333, "learning_rate": 3.1013897448320584e-06, "loss": 2.0047, "step": 22441 }, { "epoch": 0.75, "grad_norm": 0.7456259727478027, "learning_rate": 3.100620311149591e-06, "loss": 2.0776, "step": 22442 }, { "epoch": 0.75, "grad_norm": 0.7681435346603394, "learning_rate": 3.0998509554122757e-06, "loss": 2.0771, "step": 22443 }, { "epoch": 0.75, "grad_norm": 0.7408738136291504, "learning_rate": 3.099081677628797e-06, "loss": 2.0476, "step": 22444 }, { "epoch": 0.75, "grad_norm": 0.7477917075157166, "learning_rate": 3.098312477807852e-06, "loss": 2.0272, "step": 22445 }, { "epoch": 0.75, "grad_norm": 0.7432190179824829, "learning_rate": 3.097543355958128e-06, "loss": 1.9824, "step": 22446 }, { "epoch": 0.75, "grad_norm": 0.7547247409820557, "learning_rate": 3.096774312088311e-06, "loss": 2.0171, "step": 22447 }, { "epoch": 0.75, "grad_norm": 0.7076192498207092, "learning_rate": 3.0960053462070917e-06, "loss": 1.9521, "step": 22448 }, { "epoch": 0.75, "grad_norm": 0.7628404498100281, "learning_rate": 3.095236458323164e-06, "loss": 1.9991, "step": 22449 }, { "epoch": 0.75, "grad_norm": 0.7696869373321533, "learning_rate": 3.094467648445202e-06, "loss": 1.9448, "step": 22450 }, { "epoch": 0.75, "grad_norm": 0.7575535178184509, "learning_rate": 3.0936989165818977e-06, "loss": 2.0473, "step": 22451 }, { "epoch": 0.75, "grad_norm": 0.7805883884429932, "learning_rate": 3.092930262741939e-06, "loss": 2.0498, "step": 22452 }, { "epoch": 0.75, "grad_norm": 0.7141225934028625, "learning_rate": 3.092161686934002e-06, "loss": 1.9841, "step": 22453 }, { "epoch": 0.75, "grad_norm": 0.7509320974349976, "learning_rate": 3.091393189166778e-06, "loss": 2.0246, "step": 22454 }, { "epoch": 0.75, "grad_norm": 0.735600471496582, "learning_rate": 3.0906247694489423e-06, "loss": 2.0283, "step": 22455 }, { "epoch": 0.75, "grad_norm": 0.7517402768135071, "learning_rate": 3.089856427789181e-06, "loss": 2.0227, "step": 22456 }, { "epoch": 0.75, "grad_norm": 0.7559683322906494, "learning_rate": 3.089088164196169e-06, "loss": 1.9579, "step": 22457 }, { "epoch": 0.75, "grad_norm": 0.733515202999115, "learning_rate": 3.088319978678591e-06, "loss": 2.0517, "step": 22458 }, { "epoch": 0.75, "grad_norm": 0.7629181146621704, "learning_rate": 3.087551871245125e-06, "loss": 2.0626, "step": 22459 }, { "epoch": 0.75, "grad_norm": 0.7500926852226257, "learning_rate": 3.0867838419044427e-06, "loss": 2.0773, "step": 22460 }, { "epoch": 0.75, "grad_norm": 0.7385639548301697, "learning_rate": 3.086015890665225e-06, "loss": 2.0367, "step": 22461 }, { "epoch": 0.75, "grad_norm": 0.756473958492279, "learning_rate": 3.085248017536151e-06, "loss": 2.1166, "step": 22462 }, { "epoch": 0.75, "grad_norm": 0.7077974677085876, "learning_rate": 3.0844802225258917e-06, "loss": 1.9807, "step": 22463 }, { "epoch": 0.75, "grad_norm": 0.7534484267234802, "learning_rate": 3.0837125056431205e-06, "loss": 2.155, "step": 22464 }, { "epoch": 0.75, "grad_norm": 0.7526543140411377, "learning_rate": 3.0829448668965133e-06, "loss": 2.0189, "step": 22465 }, { "epoch": 0.75, "grad_norm": 0.7618220448493958, "learning_rate": 3.082177306294739e-06, "loss": 2.0208, "step": 22466 }, { "epoch": 0.75, "grad_norm": 0.7505958676338196, "learning_rate": 3.081409823846475e-06, "loss": 2.0581, "step": 22467 }, { "epoch": 0.75, "grad_norm": 0.742641031742096, "learning_rate": 3.0806424195603877e-06, "loss": 1.9615, "step": 22468 }, { "epoch": 0.75, "grad_norm": 0.7457727789878845, "learning_rate": 3.079875093445144e-06, "loss": 2.0515, "step": 22469 }, { "epoch": 0.75, "grad_norm": 0.7481655478477478, "learning_rate": 3.079107845509416e-06, "loss": 1.996, "step": 22470 }, { "epoch": 0.75, "grad_norm": 0.7417362332344055, "learning_rate": 3.078340675761874e-06, "loss": 2.0609, "step": 22471 }, { "epoch": 0.75, "grad_norm": 0.7424882650375366, "learning_rate": 3.077573584211183e-06, "loss": 2.0626, "step": 22472 }, { "epoch": 0.75, "grad_norm": 0.723423182964325, "learning_rate": 3.0768065708660055e-06, "loss": 2.0448, "step": 22473 }, { "epoch": 0.75, "grad_norm": 0.7270727753639221, "learning_rate": 3.0760396357350143e-06, "loss": 2.0418, "step": 22474 }, { "epoch": 0.75, "grad_norm": 0.7297767400741577, "learning_rate": 3.0752727788268644e-06, "loss": 2.0208, "step": 22475 }, { "epoch": 0.75, "grad_norm": 0.7541229128837585, "learning_rate": 3.074506000150228e-06, "loss": 2.0091, "step": 22476 }, { "epoch": 0.75, "grad_norm": 0.7391154766082764, "learning_rate": 3.0737392997137615e-06, "loss": 1.9514, "step": 22477 }, { "epoch": 0.75, "grad_norm": 0.7623884081840515, "learning_rate": 3.0729726775261328e-06, "loss": 2.0428, "step": 22478 }, { "epoch": 0.75, "grad_norm": 0.7564516067504883, "learning_rate": 3.0722061335959954e-06, "loss": 2.0603, "step": 22479 }, { "epoch": 0.75, "grad_norm": 0.7422676086425781, "learning_rate": 3.0714396679320157e-06, "loss": 2.0681, "step": 22480 }, { "epoch": 0.75, "grad_norm": 0.7417407035827637, "learning_rate": 3.070673280542851e-06, "loss": 2.0441, "step": 22481 }, { "epoch": 0.75, "grad_norm": 0.7274176478385925, "learning_rate": 3.069906971437154e-06, "loss": 2.0568, "step": 22482 }, { "epoch": 0.75, "grad_norm": 0.7387732267379761, "learning_rate": 3.0691407406235873e-06, "loss": 2.0744, "step": 22483 }, { "epoch": 0.75, "grad_norm": 0.8083133101463318, "learning_rate": 3.068374588110811e-06, "loss": 2.0969, "step": 22484 }, { "epoch": 0.75, "grad_norm": 0.7489886283874512, "learning_rate": 3.067608513907475e-06, "loss": 1.9952, "step": 22485 }, { "epoch": 0.75, "grad_norm": 0.7621607184410095, "learning_rate": 3.066842518022233e-06, "loss": 2.0949, "step": 22486 }, { "epoch": 0.75, "grad_norm": 0.7479332089424133, "learning_rate": 3.0660766004637433e-06, "loss": 2.0923, "step": 22487 }, { "epoch": 0.75, "grad_norm": 0.78489089012146, "learning_rate": 3.065310761240653e-06, "loss": 2.0751, "step": 22488 }, { "epoch": 0.75, "grad_norm": 0.7534067034721375, "learning_rate": 3.064545000361622e-06, "loss": 2.0328, "step": 22489 }, { "epoch": 0.75, "grad_norm": 0.7381646037101746, "learning_rate": 3.063779317835296e-06, "loss": 2.0255, "step": 22490 }, { "epoch": 0.75, "grad_norm": 0.7548058032989502, "learning_rate": 3.063013713670323e-06, "loss": 2.0465, "step": 22491 }, { "epoch": 0.75, "grad_norm": 0.7456128001213074, "learning_rate": 3.062248187875356e-06, "loss": 2.0274, "step": 22492 }, { "epoch": 0.75, "grad_norm": 0.731165885925293, "learning_rate": 3.0614827404590464e-06, "loss": 2.0358, "step": 22493 }, { "epoch": 0.75, "grad_norm": 0.7430233955383301, "learning_rate": 3.0607173714300376e-06, "loss": 1.9972, "step": 22494 }, { "epoch": 0.75, "grad_norm": 0.7444012761116028, "learning_rate": 3.059952080796975e-06, "loss": 2.0932, "step": 22495 }, { "epoch": 0.75, "grad_norm": 0.7301807403564453, "learning_rate": 3.0591868685685087e-06, "loss": 2.067, "step": 22496 }, { "epoch": 0.75, "grad_norm": 0.7216402292251587, "learning_rate": 3.0584217347532796e-06, "loss": 2.0301, "step": 22497 }, { "epoch": 0.75, "grad_norm": 0.8821868300437927, "learning_rate": 3.057656679359936e-06, "loss": 2.0783, "step": 22498 }, { "epoch": 0.75, "grad_norm": 0.7370589971542358, "learning_rate": 3.056891702397116e-06, "loss": 2.0777, "step": 22499 }, { "epoch": 0.75, "grad_norm": 0.7623509168624878, "learning_rate": 3.056126803873466e-06, "loss": 2.0059, "step": 22500 }, { "epoch": 0.75, "grad_norm": 0.7227151393890381, "learning_rate": 3.0553619837976245e-06, "loss": 2.0553, "step": 22501 }, { "epoch": 0.75, "grad_norm": 0.7908801436424255, "learning_rate": 3.0545972421782355e-06, "loss": 2.0905, "step": 22502 }, { "epoch": 0.75, "grad_norm": 0.7511583566665649, "learning_rate": 3.0538325790239363e-06, "loss": 2.0511, "step": 22503 }, { "epoch": 0.75, "grad_norm": 0.7420516014099121, "learning_rate": 3.053067994343364e-06, "loss": 2.0553, "step": 22504 }, { "epoch": 0.75, "grad_norm": 0.7602129578590393, "learning_rate": 3.0523034881451564e-06, "loss": 2.0643, "step": 22505 }, { "epoch": 0.75, "grad_norm": 0.7503677606582642, "learning_rate": 3.051539060437957e-06, "loss": 2.0896, "step": 22506 }, { "epoch": 0.75, "grad_norm": 0.7405288815498352, "learning_rate": 3.0507747112303963e-06, "loss": 2.1004, "step": 22507 }, { "epoch": 0.75, "grad_norm": 0.7383706569671631, "learning_rate": 3.0500104405311072e-06, "loss": 2.069, "step": 22508 }, { "epoch": 0.75, "grad_norm": 0.7434817552566528, "learning_rate": 3.0492462483487294e-06, "loss": 2.132, "step": 22509 }, { "epoch": 0.75, "grad_norm": 0.7413058876991272, "learning_rate": 3.0484821346918924e-06, "loss": 2.065, "step": 22510 }, { "epoch": 0.75, "grad_norm": 0.7673773765563965, "learning_rate": 3.0477180995692326e-06, "loss": 2.0245, "step": 22511 }, { "epoch": 0.75, "grad_norm": 0.7183076739311218, "learning_rate": 3.046954142989379e-06, "loss": 2.016, "step": 22512 }, { "epoch": 0.75, "grad_norm": 0.798474907875061, "learning_rate": 3.0461902649609597e-06, "loss": 2.1371, "step": 22513 }, { "epoch": 0.75, "grad_norm": 0.7593542337417603, "learning_rate": 3.0454264654926067e-06, "loss": 2.0847, "step": 22514 }, { "epoch": 0.75, "grad_norm": 0.753358006477356, "learning_rate": 3.0446627445929546e-06, "loss": 2.1216, "step": 22515 }, { "epoch": 0.75, "grad_norm": 0.7411670088768005, "learning_rate": 3.0438991022706254e-06, "loss": 2.0664, "step": 22516 }, { "epoch": 0.75, "grad_norm": 0.7905920147895813, "learning_rate": 3.043135538534244e-06, "loss": 2.0418, "step": 22517 }, { "epoch": 0.75, "grad_norm": 0.7665175199508667, "learning_rate": 3.042372053392444e-06, "loss": 2.1109, "step": 22518 }, { "epoch": 0.75, "grad_norm": 0.7305836081504822, "learning_rate": 3.041608646853844e-06, "loss": 2.0781, "step": 22519 }, { "epoch": 0.75, "grad_norm": 0.7481439709663391, "learning_rate": 3.0408453189270738e-06, "loss": 2.0594, "step": 22520 }, { "epoch": 0.75, "grad_norm": 0.765350878238678, "learning_rate": 3.0400820696207523e-06, "loss": 2.0634, "step": 22521 }, { "epoch": 0.75, "grad_norm": 0.7211467027664185, "learning_rate": 3.0393188989435075e-06, "loss": 2.0848, "step": 22522 }, { "epoch": 0.75, "grad_norm": 0.7525205016136169, "learning_rate": 3.0385558069039557e-06, "loss": 2.0501, "step": 22523 }, { "epoch": 0.75, "grad_norm": 0.7510121464729309, "learning_rate": 3.037792793510723e-06, "loss": 2.1441, "step": 22524 }, { "epoch": 0.75, "grad_norm": 0.7528380751609802, "learning_rate": 3.037029858772428e-06, "loss": 2.0059, "step": 22525 }, { "epoch": 0.75, "grad_norm": 0.7278952598571777, "learning_rate": 3.036267002697685e-06, "loss": 2.0418, "step": 22526 }, { "epoch": 0.75, "grad_norm": 0.7589005827903748, "learning_rate": 3.035504225295116e-06, "loss": 2.052, "step": 22527 }, { "epoch": 0.75, "grad_norm": 0.7450535297393799, "learning_rate": 3.0347415265733426e-06, "loss": 2.0835, "step": 22528 }, { "epoch": 0.75, "grad_norm": 0.7593364119529724, "learning_rate": 3.0339789065409775e-06, "loss": 2.0944, "step": 22529 }, { "epoch": 0.75, "grad_norm": 0.7622039914131165, "learning_rate": 3.033216365206633e-06, "loss": 2.1078, "step": 22530 }, { "epoch": 0.75, "grad_norm": 0.7340630888938904, "learning_rate": 3.032453902578929e-06, "loss": 2.1063, "step": 22531 }, { "epoch": 0.75, "grad_norm": 0.8260413408279419, "learning_rate": 3.0316915186664752e-06, "loss": 2.1573, "step": 22532 }, { "epoch": 0.75, "grad_norm": 0.7661426663398743, "learning_rate": 3.03092921347789e-06, "loss": 2.1063, "step": 22533 }, { "epoch": 0.75, "grad_norm": 0.7391083836555481, "learning_rate": 3.030166987021782e-06, "loss": 2.1514, "step": 22534 }, { "epoch": 0.75, "grad_norm": 0.7492218613624573, "learning_rate": 3.02940483930676e-06, "loss": 2.0696, "step": 22535 }, { "epoch": 0.75, "grad_norm": 0.7586084604263306, "learning_rate": 3.028642770341437e-06, "loss": 2.0374, "step": 22536 }, { "epoch": 0.75, "grad_norm": 0.7670184373855591, "learning_rate": 3.0278807801344246e-06, "loss": 2.0551, "step": 22537 }, { "epoch": 0.75, "grad_norm": 0.7137128114700317, "learning_rate": 3.02711886869433e-06, "loss": 2.013, "step": 22538 }, { "epoch": 0.75, "grad_norm": 0.77633136510849, "learning_rate": 3.0263570360297566e-06, "loss": 2.1386, "step": 22539 }, { "epoch": 0.75, "grad_norm": 0.7546154856681824, "learning_rate": 3.0255952821493174e-06, "loss": 2.0846, "step": 22540 }, { "epoch": 0.75, "grad_norm": 0.7523732781410217, "learning_rate": 3.0248336070616126e-06, "loss": 2.013, "step": 22541 }, { "epoch": 0.75, "grad_norm": 0.7736267447471619, "learning_rate": 3.024072010775252e-06, "loss": 2.0702, "step": 22542 }, { "epoch": 0.75, "grad_norm": 0.7314648628234863, "learning_rate": 3.0233104932988355e-06, "loss": 2.0997, "step": 22543 }, { "epoch": 0.75, "grad_norm": 0.7273348569869995, "learning_rate": 3.0225490546409707e-06, "loss": 2.0447, "step": 22544 }, { "epoch": 0.75, "grad_norm": 0.7652947306632996, "learning_rate": 3.0217876948102544e-06, "loss": 2.0159, "step": 22545 }, { "epoch": 0.75, "grad_norm": 0.7797354459762573, "learning_rate": 3.021026413815291e-06, "loss": 1.9941, "step": 22546 }, { "epoch": 0.75, "grad_norm": 0.7620541453361511, "learning_rate": 3.020265211664688e-06, "loss": 2.0552, "step": 22547 }, { "epoch": 0.75, "grad_norm": 0.7470033168792725, "learning_rate": 3.0195040883670313e-06, "loss": 2.0362, "step": 22548 }, { "epoch": 0.75, "grad_norm": 0.7467162609100342, "learning_rate": 3.018743043930926e-06, "loss": 2.0204, "step": 22549 }, { "epoch": 0.75, "grad_norm": 0.723630964756012, "learning_rate": 3.017982078364975e-06, "loss": 2.0278, "step": 22550 }, { "epoch": 0.75, "grad_norm": 0.7552885413169861, "learning_rate": 3.0172211916777695e-06, "loss": 2.068, "step": 22551 }, { "epoch": 0.75, "grad_norm": 0.7650761604309082, "learning_rate": 3.0164603838779037e-06, "loss": 2.0298, "step": 22552 }, { "epoch": 0.75, "grad_norm": 0.745740532875061, "learning_rate": 3.015699654973979e-06, "loss": 1.9943, "step": 22553 }, { "epoch": 0.75, "grad_norm": 0.757552981376648, "learning_rate": 3.014939004974583e-06, "loss": 1.9918, "step": 22554 }, { "epoch": 0.75, "grad_norm": 0.7809370756149292, "learning_rate": 3.0141784338883164e-06, "loss": 2.0787, "step": 22555 }, { "epoch": 0.75, "grad_norm": 0.7489234209060669, "learning_rate": 3.013417941723763e-06, "loss": 2.0334, "step": 22556 }, { "epoch": 0.75, "grad_norm": 0.7322822213172913, "learning_rate": 3.0126575284895233e-06, "loss": 2.1285, "step": 22557 }, { "epoch": 0.75, "grad_norm": 0.7187131643295288, "learning_rate": 3.011897194194181e-06, "loss": 2.0741, "step": 22558 }, { "epoch": 0.75, "grad_norm": 0.7302840352058411, "learning_rate": 3.011136938846332e-06, "loss": 1.9698, "step": 22559 }, { "epoch": 0.75, "grad_norm": 0.743945837020874, "learning_rate": 3.010376762454561e-06, "loss": 2.059, "step": 22560 }, { "epoch": 0.75, "grad_norm": 0.7236581444740295, "learning_rate": 3.009616665027455e-06, "loss": 2.0178, "step": 22561 }, { "epoch": 0.75, "grad_norm": 0.7602822184562683, "learning_rate": 3.0088566465736024e-06, "loss": 2.0093, "step": 22562 }, { "epoch": 0.75, "grad_norm": 0.7992000579833984, "learning_rate": 3.008096707101593e-06, "loss": 2.015, "step": 22563 }, { "epoch": 0.75, "grad_norm": 0.7656684517860413, "learning_rate": 3.0073368466200104e-06, "loss": 2.0772, "step": 22564 }, { "epoch": 0.75, "grad_norm": 0.7782395482063293, "learning_rate": 3.0065770651374348e-06, "loss": 2.0081, "step": 22565 }, { "epoch": 0.75, "grad_norm": 0.7718179225921631, "learning_rate": 3.0058173626624553e-06, "loss": 2.0887, "step": 22566 }, { "epoch": 0.75, "grad_norm": 0.7243524193763733, "learning_rate": 3.0050577392036495e-06, "loss": 2.0428, "step": 22567 }, { "epoch": 0.75, "grad_norm": 0.7722897529602051, "learning_rate": 3.0042981947696016e-06, "loss": 2.0346, "step": 22568 }, { "epoch": 0.75, "grad_norm": 0.7703007459640503, "learning_rate": 3.0035387293689e-06, "loss": 2.0887, "step": 22569 }, { "epoch": 0.75, "grad_norm": 0.7337794899940491, "learning_rate": 3.0027793430101106e-06, "loss": 2.0703, "step": 22570 }, { "epoch": 0.75, "grad_norm": 0.7501999139785767, "learning_rate": 3.002020035701819e-06, "loss": 2.083, "step": 22571 }, { "epoch": 0.75, "grad_norm": 0.7481458187103271, "learning_rate": 3.001260807452607e-06, "loss": 2.0369, "step": 22572 }, { "epoch": 0.75, "grad_norm": 0.7787818908691406, "learning_rate": 3.00050165827105e-06, "loss": 2.1443, "step": 22573 }, { "epoch": 0.75, "grad_norm": 0.7434309124946594, "learning_rate": 2.999742588165719e-06, "loss": 2.0878, "step": 22574 }, { "epoch": 0.75, "grad_norm": 0.7259165048599243, "learning_rate": 2.9989835971451976e-06, "loss": 2.0844, "step": 22575 }, { "epoch": 0.75, "grad_norm": 0.7410356402397156, "learning_rate": 2.9982246852180517e-06, "loss": 1.986, "step": 22576 }, { "epoch": 0.75, "grad_norm": 0.767663836479187, "learning_rate": 2.9974658523928614e-06, "loss": 2.0108, "step": 22577 }, { "epoch": 0.75, "grad_norm": 0.7757781744003296, "learning_rate": 2.996707098678201e-06, "loss": 2.0834, "step": 22578 }, { "epoch": 0.75, "grad_norm": 0.7286555767059326, "learning_rate": 2.9959484240826385e-06, "loss": 2.0313, "step": 22579 }, { "epoch": 0.75, "grad_norm": 0.7353644371032715, "learning_rate": 2.995189828614744e-06, "loss": 2.0918, "step": 22580 }, { "epoch": 0.75, "grad_norm": 0.7684770822525024, "learning_rate": 2.9944313122830913e-06, "loss": 2.0391, "step": 22581 }, { "epoch": 0.75, "grad_norm": 0.7084128856658936, "learning_rate": 2.9936728750962494e-06, "loss": 1.9643, "step": 22582 }, { "epoch": 0.75, "grad_norm": 0.7928056716918945, "learning_rate": 2.9929145170627815e-06, "loss": 1.991, "step": 22583 }, { "epoch": 0.75, "grad_norm": 0.7325501441955566, "learning_rate": 2.9921562381912594e-06, "loss": 2.0113, "step": 22584 }, { "epoch": 0.75, "grad_norm": 0.7555614113807678, "learning_rate": 2.991398038490252e-06, "loss": 2.0772, "step": 22585 }, { "epoch": 0.75, "grad_norm": 0.7730307579040527, "learning_rate": 2.990639917968321e-06, "loss": 2.0958, "step": 22586 }, { "epoch": 0.75, "grad_norm": 0.7246148586273193, "learning_rate": 2.98988187663403e-06, "loss": 2.0374, "step": 22587 }, { "epoch": 0.75, "grad_norm": 0.749282956123352, "learning_rate": 2.9891239144959484e-06, "loss": 2.006, "step": 22588 }, { "epoch": 0.75, "grad_norm": 0.7372558116912842, "learning_rate": 2.988366031562633e-06, "loss": 2.081, "step": 22589 }, { "epoch": 0.75, "grad_norm": 0.7339449524879456, "learning_rate": 2.987608227842649e-06, "loss": 2.0246, "step": 22590 }, { "epoch": 0.75, "grad_norm": 0.75889652967453, "learning_rate": 2.986850503344564e-06, "loss": 2.0553, "step": 22591 }, { "epoch": 0.75, "grad_norm": 0.8152104616165161, "learning_rate": 2.9860928580769256e-06, "loss": 2.0459, "step": 22592 }, { "epoch": 0.75, "grad_norm": 0.7334176301956177, "learning_rate": 2.985335292048298e-06, "loss": 2.0337, "step": 22593 }, { "epoch": 0.75, "grad_norm": 0.738373339176178, "learning_rate": 2.9845778052672457e-06, "loss": 2.06, "step": 22594 }, { "epoch": 0.75, "grad_norm": 0.712801992893219, "learning_rate": 2.983820397742323e-06, "loss": 1.9967, "step": 22595 }, { "epoch": 0.75, "grad_norm": 0.7481465339660645, "learning_rate": 2.9830630694820804e-06, "loss": 2.0303, "step": 22596 }, { "epoch": 0.75, "grad_norm": 0.732541024684906, "learning_rate": 2.9823058204950837e-06, "loss": 2.0454, "step": 22597 }, { "epoch": 0.75, "grad_norm": 0.7262259721755981, "learning_rate": 2.9815486507898784e-06, "loss": 2.0279, "step": 22598 }, { "epoch": 0.75, "grad_norm": 0.7480810284614563, "learning_rate": 2.980791560375025e-06, "loss": 2.0611, "step": 22599 }, { "epoch": 0.75, "grad_norm": 0.7543347477912903, "learning_rate": 2.9800345492590766e-06, "loss": 2.0451, "step": 22600 }, { "epoch": 0.75, "grad_norm": 0.7453309297561646, "learning_rate": 2.9792776174505843e-06, "loss": 2.0599, "step": 22601 }, { "epoch": 0.75, "grad_norm": 0.725368320941925, "learning_rate": 2.978520764958096e-06, "loss": 2.0309, "step": 22602 }, { "epoch": 0.75, "grad_norm": 0.7317690253257751, "learning_rate": 2.977763991790168e-06, "loss": 2.0219, "step": 22603 }, { "epoch": 0.75, "grad_norm": 0.7536773085594177, "learning_rate": 2.9770072979553466e-06, "loss": 2.0471, "step": 22604 }, { "epoch": 0.75, "grad_norm": 0.7761757969856262, "learning_rate": 2.9762506834621773e-06, "loss": 2.0583, "step": 22605 }, { "epoch": 0.75, "grad_norm": 0.7515122890472412, "learning_rate": 2.9754941483192125e-06, "loss": 2.0571, "step": 22606 }, { "epoch": 0.75, "grad_norm": 0.7875292301177979, "learning_rate": 2.9747376925350013e-06, "loss": 2.0007, "step": 22607 }, { "epoch": 0.75, "grad_norm": 0.7623459696769714, "learning_rate": 2.973981316118083e-06, "loss": 2.1231, "step": 22608 }, { "epoch": 0.75, "grad_norm": 0.7492282390594482, "learning_rate": 2.9732250190770084e-06, "loss": 2.0191, "step": 22609 }, { "epoch": 0.75, "grad_norm": 0.7528647184371948, "learning_rate": 2.9724688014203208e-06, "loss": 2.0674, "step": 22610 }, { "epoch": 0.75, "grad_norm": 0.7651874423027039, "learning_rate": 2.9717126631565585e-06, "loss": 2.0821, "step": 22611 }, { "epoch": 0.75, "grad_norm": 0.7327250242233276, "learning_rate": 2.9709566042942674e-06, "loss": 1.9541, "step": 22612 }, { "epoch": 0.75, "grad_norm": 0.7590217590332031, "learning_rate": 2.9702006248419957e-06, "loss": 2.0074, "step": 22613 }, { "epoch": 0.75, "grad_norm": 0.7499263882637024, "learning_rate": 2.96944472480827e-06, "loss": 2.035, "step": 22614 }, { "epoch": 0.75, "grad_norm": 0.7688724994659424, "learning_rate": 2.9686889042016396e-06, "loss": 2.1279, "step": 22615 }, { "epoch": 0.75, "grad_norm": 0.745100200176239, "learning_rate": 2.967933163030643e-06, "loss": 2.0947, "step": 22616 }, { "epoch": 0.75, "grad_norm": 0.7396244406700134, "learning_rate": 2.967177501303816e-06, "loss": 2.0593, "step": 22617 }, { "epoch": 0.75, "grad_norm": 0.724320650100708, "learning_rate": 2.966421919029694e-06, "loss": 2.0458, "step": 22618 }, { "epoch": 0.75, "grad_norm": 0.7484925389289856, "learning_rate": 2.965666416216818e-06, "loss": 2.1333, "step": 22619 }, { "epoch": 0.75, "grad_norm": 0.7382520437240601, "learning_rate": 2.9649109928737164e-06, "loss": 2.021, "step": 22620 }, { "epoch": 0.75, "grad_norm": 0.8054434061050415, "learning_rate": 2.964155649008927e-06, "loss": 2.0493, "step": 22621 }, { "epoch": 0.75, "grad_norm": 0.7202202081680298, "learning_rate": 2.9634003846309887e-06, "loss": 2.0696, "step": 22622 }, { "epoch": 0.75, "grad_norm": 0.7623435854911804, "learning_rate": 2.9626451997484273e-06, "loss": 2.0569, "step": 22623 }, { "epoch": 0.75, "grad_norm": 0.7965266704559326, "learning_rate": 2.9618900943697737e-06, "loss": 2.0239, "step": 22624 }, { "epoch": 0.75, "grad_norm": 0.7476407885551453, "learning_rate": 2.9611350685035645e-06, "loss": 2.0716, "step": 22625 }, { "epoch": 0.75, "grad_norm": 0.7439313530921936, "learning_rate": 2.960380122158325e-06, "loss": 2.0574, "step": 22626 }, { "epoch": 0.75, "grad_norm": 0.7394826412200928, "learning_rate": 2.959625255342583e-06, "loss": 2.0414, "step": 22627 }, { "epoch": 0.75, "grad_norm": 0.7328760623931885, "learning_rate": 2.9588704680648694e-06, "loss": 2.0539, "step": 22628 }, { "epoch": 0.75, "grad_norm": 0.7159720659255981, "learning_rate": 2.958115760333713e-06, "loss": 2.039, "step": 22629 }, { "epoch": 0.75, "grad_norm": 0.7536383867263794, "learning_rate": 2.9573611321576344e-06, "loss": 2.1139, "step": 22630 }, { "epoch": 0.75, "grad_norm": 0.7587533593177795, "learning_rate": 2.956606583545166e-06, "loss": 2.0357, "step": 22631 }, { "epoch": 0.75, "grad_norm": 0.7350855469703674, "learning_rate": 2.955852114504829e-06, "loss": 1.9967, "step": 22632 }, { "epoch": 0.75, "grad_norm": 0.7340081930160522, "learning_rate": 2.955097725045143e-06, "loss": 1.9534, "step": 22633 }, { "epoch": 0.75, "grad_norm": 0.7744357585906982, "learning_rate": 2.954343415174633e-06, "loss": 2.0428, "step": 22634 }, { "epoch": 0.75, "grad_norm": 0.7952952980995178, "learning_rate": 2.9535891849018293e-06, "loss": 2.0606, "step": 22635 }, { "epoch": 0.75, "grad_norm": 0.7766268253326416, "learning_rate": 2.95283503423524e-06, "loss": 2.0336, "step": 22636 }, { "epoch": 0.75, "grad_norm": 0.7420289516448975, "learning_rate": 2.952080963183389e-06, "loss": 2.0197, "step": 22637 }, { "epoch": 0.75, "grad_norm": 0.738042414188385, "learning_rate": 2.951326971754801e-06, "loss": 1.9974, "step": 22638 }, { "epoch": 0.75, "grad_norm": 0.7221134305000305, "learning_rate": 2.9505730599579883e-06, "loss": 2.0452, "step": 22639 }, { "epoch": 0.75, "grad_norm": 0.7329491972923279, "learning_rate": 2.949819227801468e-06, "loss": 2.0511, "step": 22640 }, { "epoch": 0.75, "grad_norm": 0.7287574410438538, "learning_rate": 2.949065475293761e-06, "loss": 2.0395, "step": 22641 }, { "epoch": 0.75, "grad_norm": 0.7589728236198425, "learning_rate": 2.9483118024433777e-06, "loss": 2.0886, "step": 22642 }, { "epoch": 0.75, "grad_norm": 0.7625011801719666, "learning_rate": 2.947558209258834e-06, "loss": 2.1048, "step": 22643 }, { "epoch": 0.75, "grad_norm": 0.7657559514045715, "learning_rate": 2.946804695748647e-06, "loss": 2.0633, "step": 22644 }, { "epoch": 0.75, "grad_norm": 0.7427349090576172, "learning_rate": 2.946051261921329e-06, "loss": 2.0211, "step": 22645 }, { "epoch": 0.75, "grad_norm": 0.7636844515800476, "learning_rate": 2.9452979077853847e-06, "loss": 2.0159, "step": 22646 }, { "epoch": 0.75, "grad_norm": 0.7512263059616089, "learning_rate": 2.944544633349332e-06, "loss": 2.1151, "step": 22647 }, { "epoch": 0.75, "grad_norm": 0.7367919683456421, "learning_rate": 2.943791438621684e-06, "loss": 2.0392, "step": 22648 }, { "epoch": 0.75, "grad_norm": 0.7454794049263, "learning_rate": 2.9430383236109393e-06, "loss": 2.124, "step": 22649 }, { "epoch": 0.75, "grad_norm": 0.7674774527549744, "learning_rate": 2.9422852883256115e-06, "loss": 2.0191, "step": 22650 }, { "epoch": 0.75, "grad_norm": 0.7572967410087585, "learning_rate": 2.941532332774212e-06, "loss": 1.9516, "step": 22651 }, { "epoch": 0.75, "grad_norm": 0.7606080174446106, "learning_rate": 2.9407794569652392e-06, "loss": 2.0299, "step": 22652 }, { "epoch": 0.75, "grad_norm": 0.7374034523963928, "learning_rate": 2.940026660907207e-06, "loss": 2.013, "step": 22653 }, { "epoch": 0.75, "grad_norm": 0.7524685859680176, "learning_rate": 2.939273944608616e-06, "loss": 2.1021, "step": 22654 }, { "epoch": 0.75, "grad_norm": 0.734710693359375, "learning_rate": 2.9385213080779674e-06, "loss": 2.0589, "step": 22655 }, { "epoch": 0.75, "grad_norm": 0.7480674386024475, "learning_rate": 2.9377687513237664e-06, "loss": 2.0454, "step": 22656 }, { "epoch": 0.75, "grad_norm": 0.7109075784683228, "learning_rate": 2.937016274354517e-06, "loss": 1.9885, "step": 22657 }, { "epoch": 0.75, "grad_norm": 0.7300166487693787, "learning_rate": 2.93626387717872e-06, "loss": 2.049, "step": 22658 }, { "epoch": 0.75, "grad_norm": 0.7387818098068237, "learning_rate": 2.93551155980487e-06, "loss": 2.0298, "step": 22659 }, { "epoch": 0.75, "grad_norm": 0.744696319103241, "learning_rate": 2.9347593222414737e-06, "loss": 2.0132, "step": 22660 }, { "epoch": 0.75, "grad_norm": 0.8759850263595581, "learning_rate": 2.9340071644970223e-06, "loss": 2.154, "step": 22661 }, { "epoch": 0.75, "grad_norm": 0.7840388417243958, "learning_rate": 2.93325508658002e-06, "loss": 2.1163, "step": 22662 }, { "epoch": 0.75, "grad_norm": 0.7456493377685547, "learning_rate": 2.932503088498958e-06, "loss": 2.0361, "step": 22663 }, { "epoch": 0.75, "grad_norm": 0.7483821511268616, "learning_rate": 2.931751170262337e-06, "loss": 2.0904, "step": 22664 }, { "epoch": 0.75, "grad_norm": 0.7588597536087036, "learning_rate": 2.9309993318786457e-06, "loss": 2.0446, "step": 22665 }, { "epoch": 0.75, "grad_norm": 0.7661793231964111, "learning_rate": 2.9302475733563828e-06, "loss": 2.0761, "step": 22666 }, { "epoch": 0.75, "grad_norm": 0.7173787355422974, "learning_rate": 2.929495894704041e-06, "loss": 2.0049, "step": 22667 }, { "epoch": 0.75, "grad_norm": 0.7571566104888916, "learning_rate": 2.928744295930108e-06, "loss": 2.0139, "step": 22668 }, { "epoch": 0.75, "grad_norm": 0.7443260550498962, "learning_rate": 2.9279927770430773e-06, "loss": 2.0707, "step": 22669 }, { "epoch": 0.75, "grad_norm": 0.7448168992996216, "learning_rate": 2.9272413380514453e-06, "loss": 2.0291, "step": 22670 }, { "epoch": 0.75, "grad_norm": 0.7479420304298401, "learning_rate": 2.9264899789636903e-06, "loss": 2.0637, "step": 22671 }, { "epoch": 0.75, "grad_norm": 0.7363066673278809, "learning_rate": 2.9257386997883053e-06, "loss": 2.035, "step": 22672 }, { "epoch": 0.75, "grad_norm": 0.7268658876419067, "learning_rate": 2.9249875005337823e-06, "loss": 2.0371, "step": 22673 }, { "epoch": 0.75, "grad_norm": 0.7813587784767151, "learning_rate": 2.9242363812086005e-06, "loss": 2.0315, "step": 22674 }, { "epoch": 0.75, "grad_norm": 0.7182080149650574, "learning_rate": 2.9234853418212528e-06, "loss": 2.0558, "step": 22675 }, { "epoch": 0.75, "grad_norm": 0.745822548866272, "learning_rate": 2.92273438238022e-06, "loss": 2.0523, "step": 22676 }, { "epoch": 0.75, "grad_norm": 0.7519271373748779, "learning_rate": 2.9219835028939838e-06, "loss": 2.0402, "step": 22677 }, { "epoch": 0.75, "grad_norm": 0.7843594551086426, "learning_rate": 2.92123270337103e-06, "loss": 2.0928, "step": 22678 }, { "epoch": 0.75, "grad_norm": 0.7534686923027039, "learning_rate": 2.920481983819843e-06, "loss": 2.0085, "step": 22679 }, { "epoch": 0.75, "grad_norm": 0.7463078498840332, "learning_rate": 2.919731344248902e-06, "loss": 1.9969, "step": 22680 }, { "epoch": 0.75, "grad_norm": 0.7494160532951355, "learning_rate": 2.9189807846666828e-06, "loss": 2.0336, "step": 22681 }, { "epoch": 0.75, "grad_norm": 0.7696128487586975, "learning_rate": 2.918230305081673e-06, "loss": 2.042, "step": 22682 }, { "epoch": 0.75, "grad_norm": 0.7276766896247864, "learning_rate": 2.917479905502343e-06, "loss": 2.0475, "step": 22683 }, { "epoch": 0.75, "grad_norm": 0.7440370321273804, "learning_rate": 2.916729585937178e-06, "loss": 2.0803, "step": 22684 }, { "epoch": 0.75, "grad_norm": 0.7477647662162781, "learning_rate": 2.9159793463946474e-06, "loss": 2.0497, "step": 22685 }, { "epoch": 0.75, "grad_norm": 0.7365114688873291, "learning_rate": 2.9152291868832337e-06, "loss": 2.0072, "step": 22686 }, { "epoch": 0.75, "grad_norm": 0.7570539712905884, "learning_rate": 2.9144791074114064e-06, "loss": 2.0359, "step": 22687 }, { "epoch": 0.75, "grad_norm": 0.7246546149253845, "learning_rate": 2.9137291079876438e-06, "loss": 1.9985, "step": 22688 }, { "epoch": 0.75, "grad_norm": 0.7559397220611572, "learning_rate": 2.912979188620417e-06, "loss": 2.0726, "step": 22689 }, { "epoch": 0.75, "grad_norm": 0.7361375093460083, "learning_rate": 2.912229349318194e-06, "loss": 2.0288, "step": 22690 }, { "epoch": 0.75, "grad_norm": 0.7233638167381287, "learning_rate": 2.911479590089451e-06, "loss": 1.9971, "step": 22691 }, { "epoch": 0.75, "grad_norm": 0.7768986821174622, "learning_rate": 2.9107299109426602e-06, "loss": 2.0048, "step": 22692 }, { "epoch": 0.75, "grad_norm": 0.7424880862236023, "learning_rate": 2.9099803118862878e-06, "loss": 2.1086, "step": 22693 }, { "epoch": 0.76, "grad_norm": 0.7472460865974426, "learning_rate": 2.9092307929288e-06, "loss": 2.0763, "step": 22694 }, { "epoch": 0.76, "grad_norm": 0.7406028509140015, "learning_rate": 2.9084813540786704e-06, "loss": 2.0163, "step": 22695 }, { "epoch": 0.76, "grad_norm": 0.7557093501091003, "learning_rate": 2.9077319953443594e-06, "loss": 2.042, "step": 22696 }, { "epoch": 0.76, "grad_norm": 0.7623248100280762, "learning_rate": 2.906982716734338e-06, "loss": 2.0075, "step": 22697 }, { "epoch": 0.76, "grad_norm": 0.7446292042732239, "learning_rate": 2.90623351825707e-06, "loss": 2.0699, "step": 22698 }, { "epoch": 0.76, "grad_norm": 0.7672387957572937, "learning_rate": 2.9054843999210147e-06, "loss": 2.0372, "step": 22699 }, { "epoch": 0.76, "grad_norm": 0.7352509498596191, "learning_rate": 2.9047353617346386e-06, "loss": 2.0838, "step": 22700 }, { "epoch": 0.76, "grad_norm": 0.7627996206283569, "learning_rate": 2.903986403706407e-06, "loss": 2.1219, "step": 22701 }, { "epoch": 0.76, "grad_norm": 0.7455876469612122, "learning_rate": 2.90323752584478e-06, "loss": 2.1121, "step": 22702 }, { "epoch": 0.76, "grad_norm": 0.7555532455444336, "learning_rate": 2.9024887281582113e-06, "loss": 2.0483, "step": 22703 }, { "epoch": 0.76, "grad_norm": 0.7447641491889954, "learning_rate": 2.9017400106551696e-06, "loss": 2.0089, "step": 22704 }, { "epoch": 0.76, "grad_norm": 0.7668010592460632, "learning_rate": 2.9009913733441052e-06, "loss": 2.0461, "step": 22705 }, { "epoch": 0.76, "grad_norm": 0.7513505220413208, "learning_rate": 2.900242816233484e-06, "loss": 2.0561, "step": 22706 }, { "epoch": 0.76, "grad_norm": 0.763373076915741, "learning_rate": 2.8994943393317555e-06, "loss": 2.078, "step": 22707 }, { "epoch": 0.76, "grad_norm": 0.7274625897407532, "learning_rate": 2.898745942647381e-06, "loss": 1.9682, "step": 22708 }, { "epoch": 0.76, "grad_norm": 0.7455772757530212, "learning_rate": 2.8979976261888097e-06, "loss": 2.1197, "step": 22709 }, { "epoch": 0.76, "grad_norm": 0.7653241753578186, "learning_rate": 2.8972493899645036e-06, "loss": 2.047, "step": 22710 }, { "epoch": 0.76, "grad_norm": 0.7589138746261597, "learning_rate": 2.89650123398291e-06, "loss": 2.0545, "step": 22711 }, { "epoch": 0.76, "grad_norm": 0.7377748489379883, "learning_rate": 2.89575315825248e-06, "loss": 2.0305, "step": 22712 }, { "epoch": 0.76, "grad_norm": 0.7405987977981567, "learning_rate": 2.8950051627816667e-06, "loss": 2.1129, "step": 22713 }, { "epoch": 0.76, "grad_norm": 0.7699908018112183, "learning_rate": 2.8942572475789254e-06, "loss": 1.9922, "step": 22714 }, { "epoch": 0.76, "grad_norm": 0.7605773210525513, "learning_rate": 2.8935094126526996e-06, "loss": 2.032, "step": 22715 }, { "epoch": 0.76, "grad_norm": 0.7453346848487854, "learning_rate": 2.892761658011438e-06, "loss": 2.048, "step": 22716 }, { "epoch": 0.76, "grad_norm": 0.7545034885406494, "learning_rate": 2.892013983663593e-06, "loss": 2.1008, "step": 22717 }, { "epoch": 0.76, "grad_norm": 0.7768744826316833, "learning_rate": 2.891266389617604e-06, "loss": 2.1043, "step": 22718 }, { "epoch": 0.76, "grad_norm": 0.73736572265625, "learning_rate": 2.8905188758819257e-06, "loss": 2.0451, "step": 22719 }, { "epoch": 0.76, "grad_norm": 0.7751873731613159, "learning_rate": 2.889771442464997e-06, "loss": 2.0798, "step": 22720 }, { "epoch": 0.76, "grad_norm": 0.7433980703353882, "learning_rate": 2.889024089375262e-06, "loss": 2.0152, "step": 22721 }, { "epoch": 0.76, "grad_norm": 0.7612611055374146, "learning_rate": 2.888276816621165e-06, "loss": 2.0679, "step": 22722 }, { "epoch": 0.76, "grad_norm": 0.7978125214576721, "learning_rate": 2.8875296242111507e-06, "loss": 2.0162, "step": 22723 }, { "epoch": 0.76, "grad_norm": 0.7777748107910156, "learning_rate": 2.8867825121536595e-06, "loss": 2.0158, "step": 22724 }, { "epoch": 0.76, "grad_norm": 0.7413191199302673, "learning_rate": 2.886035480457128e-06, "loss": 2.094, "step": 22725 }, { "epoch": 0.76, "grad_norm": 0.7417134642601013, "learning_rate": 2.8852885291300004e-06, "loss": 2.0235, "step": 22726 }, { "epoch": 0.76, "grad_norm": 0.7270734906196594, "learning_rate": 2.8845416581807105e-06, "loss": 2.0058, "step": 22727 }, { "epoch": 0.76, "grad_norm": 0.7813875675201416, "learning_rate": 2.8837948676177017e-06, "loss": 2.0277, "step": 22728 }, { "epoch": 0.76, "grad_norm": 0.7309724688529968, "learning_rate": 2.8830481574494063e-06, "loss": 2.049, "step": 22729 }, { "epoch": 0.76, "grad_norm": 0.7163568735122681, "learning_rate": 2.8823015276842638e-06, "loss": 2.0145, "step": 22730 }, { "epoch": 0.76, "grad_norm": 0.7306113243103027, "learning_rate": 2.881554978330704e-06, "loss": 2.0344, "step": 22731 }, { "epoch": 0.76, "grad_norm": 0.7483114004135132, "learning_rate": 2.880808509397168e-06, "loss": 2.0482, "step": 22732 }, { "epoch": 0.76, "grad_norm": 0.7488372325897217, "learning_rate": 2.8800621208920853e-06, "loss": 1.9394, "step": 22733 }, { "epoch": 0.76, "grad_norm": 0.759667694568634, "learning_rate": 2.8793158128238843e-06, "loss": 2.0223, "step": 22734 }, { "epoch": 0.76, "grad_norm": 0.7745639681816101, "learning_rate": 2.878569585201001e-06, "loss": 2.0839, "step": 22735 }, { "epoch": 0.76, "grad_norm": 0.7300155758857727, "learning_rate": 2.877823438031867e-06, "loss": 2.044, "step": 22736 }, { "epoch": 0.76, "grad_norm": 0.7173658609390259, "learning_rate": 2.87707737132491e-06, "loss": 2.0258, "step": 22737 }, { "epoch": 0.76, "grad_norm": 0.7419295310974121, "learning_rate": 2.8763313850885566e-06, "loss": 2.0495, "step": 22738 }, { "epoch": 0.76, "grad_norm": 0.7540974617004395, "learning_rate": 2.8755854793312377e-06, "loss": 2.0764, "step": 22739 }, { "epoch": 0.76, "grad_norm": 0.7596832513809204, "learning_rate": 2.8748396540613765e-06, "loss": 2.0454, "step": 22740 }, { "epoch": 0.76, "grad_norm": 0.7529978156089783, "learning_rate": 2.874093909287404e-06, "loss": 2.0882, "step": 22741 }, { "epoch": 0.76, "grad_norm": 0.7518634796142578, "learning_rate": 2.873348245017743e-06, "loss": 2.1326, "step": 22742 }, { "epoch": 0.76, "grad_norm": 0.7591113448143005, "learning_rate": 2.8726026612608125e-06, "loss": 2.0348, "step": 22743 }, { "epoch": 0.76, "grad_norm": 0.7428948283195496, "learning_rate": 2.871857158025041e-06, "loss": 2.0727, "step": 22744 }, { "epoch": 0.76, "grad_norm": 0.739607036113739, "learning_rate": 2.8711117353188535e-06, "loss": 2.0395, "step": 22745 }, { "epoch": 0.76, "grad_norm": 0.7560466527938843, "learning_rate": 2.8703663931506664e-06, "loss": 2.07, "step": 22746 }, { "epoch": 0.76, "grad_norm": 0.7224982380867004, "learning_rate": 2.869621131528899e-06, "loss": 1.9763, "step": 22747 }, { "epoch": 0.76, "grad_norm": 0.7437267899513245, "learning_rate": 2.868875950461972e-06, "loss": 2.0349, "step": 22748 }, { "epoch": 0.76, "grad_norm": 0.7242689728736877, "learning_rate": 2.8681308499583103e-06, "loss": 1.9429, "step": 22749 }, { "epoch": 0.76, "grad_norm": 0.7306990027427673, "learning_rate": 2.8673858300263257e-06, "loss": 2.1277, "step": 22750 }, { "epoch": 0.76, "grad_norm": 0.7411149740219116, "learning_rate": 2.8666408906744327e-06, "loss": 2.0081, "step": 22751 }, { "epoch": 0.76, "grad_norm": 0.7521913051605225, "learning_rate": 2.865896031911054e-06, "loss": 2.0848, "step": 22752 }, { "epoch": 0.76, "grad_norm": 0.7480065822601318, "learning_rate": 2.865151253744597e-06, "loss": 2.068, "step": 22753 }, { "epoch": 0.76, "grad_norm": 0.7338721752166748, "learning_rate": 2.86440655618348e-06, "loss": 2.0267, "step": 22754 }, { "epoch": 0.76, "grad_norm": 0.7653493285179138, "learning_rate": 2.863661939236122e-06, "loss": 2.0541, "step": 22755 }, { "epoch": 0.76, "grad_norm": 0.7390453219413757, "learning_rate": 2.862917402910923e-06, "loss": 2.0226, "step": 22756 }, { "epoch": 0.76, "grad_norm": 0.7454491853713989, "learning_rate": 2.8621729472163006e-06, "loss": 2.0429, "step": 22757 }, { "epoch": 0.76, "grad_norm": 0.7457079291343689, "learning_rate": 2.8614285721606683e-06, "loss": 2.0612, "step": 22758 }, { "epoch": 0.76, "grad_norm": 0.7485204935073853, "learning_rate": 2.8606842777524325e-06, "loss": 2.0216, "step": 22759 }, { "epoch": 0.76, "grad_norm": 0.7779741287231445, "learning_rate": 2.8599400639999975e-06, "loss": 2.0378, "step": 22760 }, { "epoch": 0.76, "grad_norm": 0.7353267073631287, "learning_rate": 2.859195930911779e-06, "loss": 2.0459, "step": 22761 }, { "epoch": 0.76, "grad_norm": 0.7637642621994019, "learning_rate": 2.8584518784961766e-06, "loss": 2.0017, "step": 22762 }, { "epoch": 0.76, "grad_norm": 0.7146471738815308, "learning_rate": 2.857707906761603e-06, "loss": 1.9969, "step": 22763 }, { "epoch": 0.76, "grad_norm": 0.7348971366882324, "learning_rate": 2.856964015716457e-06, "loss": 2.0433, "step": 22764 }, { "epoch": 0.76, "grad_norm": 0.7220140099525452, "learning_rate": 2.8562202053691477e-06, "loss": 2.089, "step": 22765 }, { "epoch": 0.76, "grad_norm": 0.7416256666183472, "learning_rate": 2.855476475728073e-06, "loss": 2.1392, "step": 22766 }, { "epoch": 0.76, "grad_norm": 0.7576702237129211, "learning_rate": 2.8547328268016407e-06, "loss": 2.0613, "step": 22767 }, { "epoch": 0.76, "grad_norm": 0.7445651292800903, "learning_rate": 2.85398925859825e-06, "loss": 1.9908, "step": 22768 }, { "epoch": 0.76, "grad_norm": 0.7502511143684387, "learning_rate": 2.853245771126296e-06, "loss": 2.0786, "step": 22769 }, { "epoch": 0.76, "grad_norm": 0.7497684359550476, "learning_rate": 2.8525023643941853e-06, "loss": 2.0972, "step": 22770 }, { "epoch": 0.76, "grad_norm": 0.7444024085998535, "learning_rate": 2.8517590384103157e-06, "loss": 2.0563, "step": 22771 }, { "epoch": 0.76, "grad_norm": 0.7406321167945862, "learning_rate": 2.8510157931830827e-06, "loss": 2.0867, "step": 22772 }, { "epoch": 0.76, "grad_norm": 0.7493699789047241, "learning_rate": 2.8502726287208817e-06, "loss": 2.092, "step": 22773 }, { "epoch": 0.76, "grad_norm": 0.7597299218177795, "learning_rate": 2.8495295450321126e-06, "loss": 1.9773, "step": 22774 }, { "epoch": 0.76, "grad_norm": 0.7407028675079346, "learning_rate": 2.848786542125166e-06, "loss": 2.0848, "step": 22775 }, { "epoch": 0.76, "grad_norm": 0.7526385188102722, "learning_rate": 2.8480436200084372e-06, "loss": 2.0283, "step": 22776 }, { "epoch": 0.76, "grad_norm": 0.7731091976165771, "learning_rate": 2.8473007786903262e-06, "loss": 1.9896, "step": 22777 }, { "epoch": 0.76, "grad_norm": 0.7436456680297852, "learning_rate": 2.846558018179214e-06, "loss": 1.994, "step": 22778 }, { "epoch": 0.76, "grad_norm": 0.7733966112136841, "learning_rate": 2.8458153384834964e-06, "loss": 1.9903, "step": 22779 }, { "epoch": 0.76, "grad_norm": 0.7830696105957031, "learning_rate": 2.8450727396115662e-06, "loss": 2.0559, "step": 22780 }, { "epoch": 0.76, "grad_norm": 0.759630024433136, "learning_rate": 2.8443302215718127e-06, "loss": 2.0441, "step": 22781 }, { "epoch": 0.76, "grad_norm": 0.7294036149978638, "learning_rate": 2.843587784372619e-06, "loss": 2.0204, "step": 22782 }, { "epoch": 0.76, "grad_norm": 0.7080414891242981, "learning_rate": 2.842845428022379e-06, "loss": 2.0727, "step": 22783 }, { "epoch": 0.76, "grad_norm": 0.7561578154563904, "learning_rate": 2.842103152529475e-06, "loss": 2.0254, "step": 22784 }, { "epoch": 0.76, "grad_norm": 0.7158710956573486, "learning_rate": 2.841360957902294e-06, "loss": 2.0194, "step": 22785 }, { "epoch": 0.76, "grad_norm": 0.7433933019638062, "learning_rate": 2.8406188441492245e-06, "loss": 2.1361, "step": 22786 }, { "epoch": 0.76, "grad_norm": 0.7620903849601746, "learning_rate": 2.8398768112786467e-06, "loss": 2.0968, "step": 22787 }, { "epoch": 0.76, "grad_norm": 0.7642061114311218, "learning_rate": 2.839134859298942e-06, "loss": 2.0538, "step": 22788 }, { "epoch": 0.76, "grad_norm": 0.7542993426322937, "learning_rate": 2.838392988218499e-06, "loss": 2.0882, "step": 22789 }, { "epoch": 0.76, "grad_norm": 0.7718907594680786, "learning_rate": 2.8376511980456946e-06, "loss": 2.0869, "step": 22790 }, { "epoch": 0.76, "grad_norm": 0.7224864363670349, "learning_rate": 2.836909488788905e-06, "loss": 2.0489, "step": 22791 }, { "epoch": 0.76, "grad_norm": 0.7465981841087341, "learning_rate": 2.8361678604565155e-06, "loss": 2.0313, "step": 22792 }, { "epoch": 0.76, "grad_norm": 0.7449187636375427, "learning_rate": 2.835426313056905e-06, "loss": 2.0311, "step": 22793 }, { "epoch": 0.76, "grad_norm": 0.769643247127533, "learning_rate": 2.83468484659845e-06, "loss": 2.03, "step": 22794 }, { "epoch": 0.76, "grad_norm": 0.7353590130805969, "learning_rate": 2.8339434610895234e-06, "loss": 2.0634, "step": 22795 }, { "epoch": 0.76, "grad_norm": 0.753582775592804, "learning_rate": 2.833202156538506e-06, "loss": 1.9786, "step": 22796 }, { "epoch": 0.76, "grad_norm": 0.7406953573226929, "learning_rate": 2.8324609329537677e-06, "loss": 2.031, "step": 22797 }, { "epoch": 0.76, "grad_norm": 0.7696153521537781, "learning_rate": 2.8317197903436857e-06, "loss": 2.0016, "step": 22798 }, { "epoch": 0.76, "grad_norm": 0.7360467910766602, "learning_rate": 2.8309787287166377e-06, "loss": 1.9836, "step": 22799 }, { "epoch": 0.76, "grad_norm": 0.7296015024185181, "learning_rate": 2.8302377480809863e-06, "loss": 1.9921, "step": 22800 }, { "epoch": 0.76, "grad_norm": 0.7530319094657898, "learning_rate": 2.8294968484451046e-06, "loss": 1.9961, "step": 22801 }, { "epoch": 0.76, "grad_norm": 0.7379713654518127, "learning_rate": 2.82875602981737e-06, "loss": 2.0353, "step": 22802 }, { "epoch": 0.76, "grad_norm": 0.7686151266098022, "learning_rate": 2.8280152922061465e-06, "loss": 2.0444, "step": 22803 }, { "epoch": 0.76, "grad_norm": 0.7530451416969299, "learning_rate": 2.8272746356198e-06, "loss": 2.0136, "step": 22804 }, { "epoch": 0.76, "grad_norm": 0.7173929214477539, "learning_rate": 2.8265340600667037e-06, "loss": 2.0888, "step": 22805 }, { "epoch": 0.76, "grad_norm": 0.7532140016555786, "learning_rate": 2.825793565555218e-06, "loss": 2.0485, "step": 22806 }, { "epoch": 0.76, "grad_norm": 0.737980842590332, "learning_rate": 2.825053152093713e-06, "loss": 2.0323, "step": 22807 }, { "epoch": 0.76, "grad_norm": 0.7561329007148743, "learning_rate": 2.8243128196905557e-06, "loss": 2.0326, "step": 22808 }, { "epoch": 0.76, "grad_norm": 0.7359281182289124, "learning_rate": 2.823572568354106e-06, "loss": 2.0457, "step": 22809 }, { "epoch": 0.76, "grad_norm": 0.7557693123817444, "learning_rate": 2.8228323980927254e-06, "loss": 2.009, "step": 22810 }, { "epoch": 0.76, "grad_norm": 0.7427211999893188, "learning_rate": 2.8220923089147813e-06, "loss": 2.0814, "step": 22811 }, { "epoch": 0.76, "grad_norm": 0.7207542061805725, "learning_rate": 2.8213523008286303e-06, "loss": 2.0245, "step": 22812 }, { "epoch": 0.76, "grad_norm": 0.7453858256340027, "learning_rate": 2.820612373842632e-06, "loss": 2.0509, "step": 22813 }, { "epoch": 0.76, "grad_norm": 0.7511812448501587, "learning_rate": 2.8198725279651473e-06, "loss": 2.0711, "step": 22814 }, { "epoch": 0.76, "grad_norm": 0.7107770442962646, "learning_rate": 2.8191327632045383e-06, "loss": 1.9964, "step": 22815 }, { "epoch": 0.76, "grad_norm": 0.7525960803031921, "learning_rate": 2.8183930795691583e-06, "loss": 2.0405, "step": 22816 }, { "epoch": 0.76, "grad_norm": 0.7364457249641418, "learning_rate": 2.8176534770673614e-06, "loss": 2.0832, "step": 22817 }, { "epoch": 0.76, "grad_norm": 0.753216028213501, "learning_rate": 2.8169139557075097e-06, "loss": 2.1008, "step": 22818 }, { "epoch": 0.76, "grad_norm": 0.7421953082084656, "learning_rate": 2.8161745154979514e-06, "loss": 1.99, "step": 22819 }, { "epoch": 0.76, "grad_norm": 0.7326236963272095, "learning_rate": 2.8154351564470426e-06, "loss": 2.0345, "step": 22820 }, { "epoch": 0.76, "grad_norm": 0.7462367415428162, "learning_rate": 2.8146958785631427e-06, "loss": 2.0173, "step": 22821 }, { "epoch": 0.76, "grad_norm": 0.7705219984054565, "learning_rate": 2.8139566818545927e-06, "loss": 2.0778, "step": 22822 }, { "epoch": 0.76, "grad_norm": 0.7700719237327576, "learning_rate": 2.813217566329749e-06, "loss": 2.0618, "step": 22823 }, { "epoch": 0.76, "grad_norm": 0.7637203931808472, "learning_rate": 2.812478531996964e-06, "loss": 2.0402, "step": 22824 }, { "epoch": 0.76, "grad_norm": 0.7371863126754761, "learning_rate": 2.811739578864583e-06, "loss": 2.0052, "step": 22825 }, { "epoch": 0.76, "grad_norm": 0.736935555934906, "learning_rate": 2.8110007069409537e-06, "loss": 2.0662, "step": 22826 }, { "epoch": 0.76, "grad_norm": 0.7244799137115479, "learning_rate": 2.810261916234428e-06, "loss": 2.0455, "step": 22827 }, { "epoch": 0.76, "grad_norm": 0.7546132206916809, "learning_rate": 2.809523206753346e-06, "loss": 2.0784, "step": 22828 }, { "epoch": 0.76, "grad_norm": 0.7331262826919556, "learning_rate": 2.8087845785060576e-06, "loss": 2.0079, "step": 22829 }, { "epoch": 0.76, "grad_norm": 0.745187520980835, "learning_rate": 2.8080460315009093e-06, "loss": 2.0422, "step": 22830 }, { "epoch": 0.76, "grad_norm": 0.7327925562858582, "learning_rate": 2.8073075657462424e-06, "loss": 2.1107, "step": 22831 }, { "epoch": 0.76, "grad_norm": 0.7126834392547607, "learning_rate": 2.806569181250396e-06, "loss": 1.9922, "step": 22832 }, { "epoch": 0.76, "grad_norm": 0.7263754606246948, "learning_rate": 2.805830878021715e-06, "loss": 2.0121, "step": 22833 }, { "epoch": 0.76, "grad_norm": 0.7632060647010803, "learning_rate": 2.8050926560685467e-06, "loss": 2.073, "step": 22834 }, { "epoch": 0.76, "grad_norm": 0.7432557344436646, "learning_rate": 2.804354515399219e-06, "loss": 2.0291, "step": 22835 }, { "epoch": 0.76, "grad_norm": 0.7295231223106384, "learning_rate": 2.8036164560220768e-06, "loss": 2.0007, "step": 22836 }, { "epoch": 0.76, "grad_norm": 0.763951301574707, "learning_rate": 2.802878477945462e-06, "loss": 2.0797, "step": 22837 }, { "epoch": 0.76, "grad_norm": 0.7526400685310364, "learning_rate": 2.8021405811777045e-06, "loss": 2.071, "step": 22838 }, { "epoch": 0.76, "grad_norm": 0.7495793700218201, "learning_rate": 2.801402765727147e-06, "loss": 2.0452, "step": 22839 }, { "epoch": 0.76, "grad_norm": 0.7792326211929321, "learning_rate": 2.800665031602123e-06, "loss": 2.0447, "step": 22840 }, { "epoch": 0.76, "grad_norm": 0.7496023774147034, "learning_rate": 2.799927378810964e-06, "loss": 2.0561, "step": 22841 }, { "epoch": 0.76, "grad_norm": 0.7682244777679443, "learning_rate": 2.799189807362004e-06, "loss": 2.0746, "step": 22842 }, { "epoch": 0.76, "grad_norm": 0.7639046907424927, "learning_rate": 2.7984523172635845e-06, "loss": 2.0271, "step": 22843 }, { "epoch": 0.76, "grad_norm": 0.7418408989906311, "learning_rate": 2.797714908524024e-06, "loss": 2.0707, "step": 22844 }, { "epoch": 0.76, "grad_norm": 0.7404930591583252, "learning_rate": 2.79697758115166e-06, "loss": 2.0948, "step": 22845 }, { "epoch": 0.76, "grad_norm": 0.7557381391525269, "learning_rate": 2.796240335154824e-06, "loss": 2.1182, "step": 22846 }, { "epoch": 0.76, "grad_norm": 0.7494269013404846, "learning_rate": 2.795503170541843e-06, "loss": 2.0497, "step": 22847 }, { "epoch": 0.76, "grad_norm": 0.7013692855834961, "learning_rate": 2.7947660873210427e-06, "loss": 2.1248, "step": 22848 }, { "epoch": 0.76, "grad_norm": 0.7491157650947571, "learning_rate": 2.7940290855007525e-06, "loss": 1.993, "step": 22849 }, { "epoch": 0.76, "grad_norm": 0.7449735403060913, "learning_rate": 2.793292165089301e-06, "loss": 2.0824, "step": 22850 }, { "epoch": 0.76, "grad_norm": 0.7609108090400696, "learning_rate": 2.792555326095008e-06, "loss": 2.1298, "step": 22851 }, { "epoch": 0.76, "grad_norm": 0.7150327563285828, "learning_rate": 2.7918185685262045e-06, "loss": 2.0627, "step": 22852 }, { "epoch": 0.76, "grad_norm": 0.7578579783439636, "learning_rate": 2.7910818923912096e-06, "loss": 2.1449, "step": 22853 }, { "epoch": 0.76, "grad_norm": 0.7456454634666443, "learning_rate": 2.7903452976983436e-06, "loss": 2.0342, "step": 22854 }, { "epoch": 0.76, "grad_norm": 0.7480971813201904, "learning_rate": 2.7896087844559316e-06, "loss": 2.0237, "step": 22855 }, { "epoch": 0.76, "grad_norm": 0.7543898224830627, "learning_rate": 2.7888723526723004e-06, "loss": 2.0252, "step": 22856 }, { "epoch": 0.76, "grad_norm": 0.7621117830276489, "learning_rate": 2.7881360023557568e-06, "loss": 2.1324, "step": 22857 }, { "epoch": 0.76, "grad_norm": 0.755854070186615, "learning_rate": 2.7873997335146254e-06, "loss": 2.0158, "step": 22858 }, { "epoch": 0.76, "grad_norm": 0.7405219674110413, "learning_rate": 2.786663546157229e-06, "loss": 2.0967, "step": 22859 }, { "epoch": 0.76, "grad_norm": 0.7010526061058044, "learning_rate": 2.785927440291877e-06, "loss": 2.0497, "step": 22860 }, { "epoch": 0.76, "grad_norm": 0.7419354319572449, "learning_rate": 2.785191415926891e-06, "loss": 2.0273, "step": 22861 }, { "epoch": 0.76, "grad_norm": 0.7444115877151489, "learning_rate": 2.7844554730705853e-06, "loss": 2.0248, "step": 22862 }, { "epoch": 0.76, "grad_norm": 0.7346345782279968, "learning_rate": 2.783719611731269e-06, "loss": 2.1008, "step": 22863 }, { "epoch": 0.76, "grad_norm": 0.7400256395339966, "learning_rate": 2.78298383191726e-06, "loss": 1.9928, "step": 22864 }, { "epoch": 0.76, "grad_norm": 0.7487415075302124, "learning_rate": 2.7822481336368733e-06, "loss": 2.0493, "step": 22865 }, { "epoch": 0.76, "grad_norm": 0.7278691530227661, "learning_rate": 2.781512516898417e-06, "loss": 2.0095, "step": 22866 }, { "epoch": 0.76, "grad_norm": 0.7287070751190186, "learning_rate": 2.780776981710198e-06, "loss": 2.0571, "step": 22867 }, { "epoch": 0.76, "grad_norm": 0.7447271347045898, "learning_rate": 2.7800415280805337e-06, "loss": 2.0326, "step": 22868 }, { "epoch": 0.76, "grad_norm": 0.7377855181694031, "learning_rate": 2.7793061560177247e-06, "loss": 2.0406, "step": 22869 }, { "epoch": 0.76, "grad_norm": 0.759272038936615, "learning_rate": 2.778570865530088e-06, "loss": 2.0656, "step": 22870 }, { "epoch": 0.76, "grad_norm": 0.791037917137146, "learning_rate": 2.7778356566259214e-06, "loss": 2.0494, "step": 22871 }, { "epoch": 0.76, "grad_norm": 0.7595379948616028, "learning_rate": 2.777100529313538e-06, "loss": 2.0402, "step": 22872 }, { "epoch": 0.76, "grad_norm": 0.7502461075782776, "learning_rate": 2.7763654836012367e-06, "loss": 2.0139, "step": 22873 }, { "epoch": 0.76, "grad_norm": 0.7427219152450562, "learning_rate": 2.7756305194973278e-06, "loss": 2.0631, "step": 22874 }, { "epoch": 0.76, "grad_norm": 0.7593291997909546, "learning_rate": 2.774895637010111e-06, "loss": 1.9916, "step": 22875 }, { "epoch": 0.76, "grad_norm": 0.7484283447265625, "learning_rate": 2.7741608361478855e-06, "loss": 1.9805, "step": 22876 }, { "epoch": 0.76, "grad_norm": 0.7353966236114502, "learning_rate": 2.773426116918957e-06, "loss": 2.0106, "step": 22877 }, { "epoch": 0.76, "grad_norm": 0.7234967947006226, "learning_rate": 2.772691479331632e-06, "loss": 2.0825, "step": 22878 }, { "epoch": 0.76, "grad_norm": 0.7462483644485474, "learning_rate": 2.7719569233941956e-06, "loss": 2.102, "step": 22879 }, { "epoch": 0.76, "grad_norm": 0.7308499813079834, "learning_rate": 2.771222449114954e-06, "loss": 2.094, "step": 22880 }, { "epoch": 0.76, "grad_norm": 0.7385032773017883, "learning_rate": 2.7704880565022074e-06, "loss": 1.9976, "step": 22881 }, { "epoch": 0.76, "grad_norm": 0.7400795817375183, "learning_rate": 2.7697537455642476e-06, "loss": 1.9792, "step": 22882 }, { "epoch": 0.76, "grad_norm": 0.7241218090057373, "learning_rate": 2.769019516309376e-06, "loss": 2.0732, "step": 22883 }, { "epoch": 0.76, "grad_norm": 0.7485787868499756, "learning_rate": 2.7682853687458833e-06, "loss": 2.0907, "step": 22884 }, { "epoch": 0.76, "grad_norm": 0.7482932806015015, "learning_rate": 2.7675513028820613e-06, "loss": 2.0659, "step": 22885 }, { "epoch": 0.76, "grad_norm": 0.7356153130531311, "learning_rate": 2.766817318726206e-06, "loss": 2.0177, "step": 22886 }, { "epoch": 0.76, "grad_norm": 0.7160064578056335, "learning_rate": 2.7660834162866136e-06, "loss": 2.0052, "step": 22887 }, { "epoch": 0.76, "grad_norm": 0.7750246524810791, "learning_rate": 2.7653495955715702e-06, "loss": 2.1174, "step": 22888 }, { "epoch": 0.76, "grad_norm": 0.7555484771728516, "learning_rate": 2.7646158565893644e-06, "loss": 2.0059, "step": 22889 }, { "epoch": 0.76, "grad_norm": 0.7672293186187744, "learning_rate": 2.7638821993482913e-06, "loss": 2.0548, "step": 22890 }, { "epoch": 0.76, "grad_norm": 0.7527955770492554, "learning_rate": 2.763148623856633e-06, "loss": 2.0988, "step": 22891 }, { "epoch": 0.76, "grad_norm": 0.750593364238739, "learning_rate": 2.7624151301226843e-06, "loss": 2.0521, "step": 22892 }, { "epoch": 0.76, "grad_norm": 0.7746703028678894, "learning_rate": 2.761681718154724e-06, "loss": 2.0838, "step": 22893 }, { "epoch": 0.76, "grad_norm": 0.7591778635978699, "learning_rate": 2.7609483879610444e-06, "loss": 2.1118, "step": 22894 }, { "epoch": 0.76, "grad_norm": 0.7384560108184814, "learning_rate": 2.7602151395499254e-06, "loss": 2.0234, "step": 22895 }, { "epoch": 0.76, "grad_norm": 0.7612176537513733, "learning_rate": 2.7594819729296553e-06, "loss": 2.0296, "step": 22896 }, { "epoch": 0.76, "grad_norm": 0.7423012852668762, "learning_rate": 2.7587488881085145e-06, "loss": 2.0502, "step": 22897 }, { "epoch": 0.76, "grad_norm": 0.7309958338737488, "learning_rate": 2.7580158850947813e-06, "loss": 2.0811, "step": 22898 }, { "epoch": 0.76, "grad_norm": 0.7536032199859619, "learning_rate": 2.7572829638967415e-06, "loss": 2.0724, "step": 22899 }, { "epoch": 0.76, "grad_norm": 0.7247188687324524, "learning_rate": 2.756550124522677e-06, "loss": 2.0118, "step": 22900 }, { "epoch": 0.76, "grad_norm": 0.7585209608078003, "learning_rate": 2.755817366980863e-06, "loss": 2.0362, "step": 22901 }, { "epoch": 0.76, "grad_norm": 0.7693536281585693, "learning_rate": 2.755084691279577e-06, "loss": 2.0474, "step": 22902 }, { "epoch": 0.76, "grad_norm": 0.7552255988121033, "learning_rate": 2.754352097427101e-06, "loss": 2.1067, "step": 22903 }, { "epoch": 0.76, "grad_norm": 0.7555841207504272, "learning_rate": 2.7536195854317047e-06, "loss": 1.9756, "step": 22904 }, { "epoch": 0.76, "grad_norm": 0.7392663359642029, "learning_rate": 2.752887155301672e-06, "loss": 2.0547, "step": 22905 }, { "epoch": 0.76, "grad_norm": 0.7157472968101501, "learning_rate": 2.752154807045272e-06, "loss": 2.0232, "step": 22906 }, { "epoch": 0.76, "grad_norm": 0.7753708958625793, "learning_rate": 2.7514225406707773e-06, "loss": 2.0479, "step": 22907 }, { "epoch": 0.76, "grad_norm": 0.7481390833854675, "learning_rate": 2.7506903561864615e-06, "loss": 2.0746, "step": 22908 }, { "epoch": 0.76, "grad_norm": 0.7574496865272522, "learning_rate": 2.749958253600601e-06, "loss": 2.0248, "step": 22909 }, { "epoch": 0.76, "grad_norm": 0.7433311343193054, "learning_rate": 2.7492262329214636e-06, "loss": 2.0389, "step": 22910 }, { "epoch": 0.76, "grad_norm": 0.7587152719497681, "learning_rate": 2.7484942941573155e-06, "loss": 2.0246, "step": 22911 }, { "epoch": 0.76, "grad_norm": 0.7461514472961426, "learning_rate": 2.7477624373164326e-06, "loss": 2.0783, "step": 22912 }, { "epoch": 0.76, "grad_norm": 0.7583099007606506, "learning_rate": 2.7470306624070753e-06, "loss": 2.0177, "step": 22913 }, { "epoch": 0.76, "grad_norm": 0.7543014287948608, "learning_rate": 2.7462989694375186e-06, "loss": 2.1068, "step": 22914 }, { "epoch": 0.76, "grad_norm": 0.7607203722000122, "learning_rate": 2.7455673584160223e-06, "loss": 2.0475, "step": 22915 }, { "epoch": 0.76, "grad_norm": 0.7467170357704163, "learning_rate": 2.744835829350857e-06, "loss": 2.0961, "step": 22916 }, { "epoch": 0.76, "grad_norm": 0.7372157573699951, "learning_rate": 2.7441043822502823e-06, "loss": 2.0357, "step": 22917 }, { "epoch": 0.76, "grad_norm": 0.7519665956497192, "learning_rate": 2.743373017122566e-06, "loss": 2.0472, "step": 22918 }, { "epoch": 0.76, "grad_norm": 0.7432259917259216, "learning_rate": 2.742641733975969e-06, "loss": 2.0728, "step": 22919 }, { "epoch": 0.76, "grad_norm": 0.7239487171173096, "learning_rate": 2.741910532818749e-06, "loss": 2.0473, "step": 22920 }, { "epoch": 0.76, "grad_norm": 0.7432849407196045, "learning_rate": 2.7411794136591706e-06, "loss": 2.0659, "step": 22921 }, { "epoch": 0.76, "grad_norm": 0.7440228462219238, "learning_rate": 2.7404483765054955e-06, "loss": 2.068, "step": 22922 }, { "epoch": 0.76, "grad_norm": 0.7427909970283508, "learning_rate": 2.7397174213659815e-06, "loss": 2.0012, "step": 22923 }, { "epoch": 0.76, "grad_norm": 0.7215114235877991, "learning_rate": 2.738986548248881e-06, "loss": 2.0395, "step": 22924 }, { "epoch": 0.76, "grad_norm": 0.7525121569633484, "learning_rate": 2.7382557571624592e-06, "loss": 2.0202, "step": 22925 }, { "epoch": 0.76, "grad_norm": 0.7128960490226746, "learning_rate": 2.737525048114964e-06, "loss": 2.0649, "step": 22926 }, { "epoch": 0.76, "grad_norm": 0.7379663586616516, "learning_rate": 2.7367944211146567e-06, "loss": 2.0634, "step": 22927 }, { "epoch": 0.76, "grad_norm": 0.7514234185218811, "learning_rate": 2.736063876169791e-06, "loss": 2.0317, "step": 22928 }, { "epoch": 0.76, "grad_norm": 0.7321406602859497, "learning_rate": 2.7353334132886157e-06, "loss": 1.9656, "step": 22929 }, { "epoch": 0.76, "grad_norm": 0.756450891494751, "learning_rate": 2.7346030324793847e-06, "loss": 2.0149, "step": 22930 }, { "epoch": 0.76, "grad_norm": 0.7578282952308655, "learning_rate": 2.7338727337503546e-06, "loss": 2.0036, "step": 22931 }, { "epoch": 0.76, "grad_norm": 0.7585911154747009, "learning_rate": 2.7331425171097713e-06, "loss": 2.075, "step": 22932 }, { "epoch": 0.76, "grad_norm": 0.7233803868293762, "learning_rate": 2.732412382565882e-06, "loss": 2.0164, "step": 22933 }, { "epoch": 0.76, "grad_norm": 0.7444892525672913, "learning_rate": 2.731682330126939e-06, "loss": 2.0724, "step": 22934 }, { "epoch": 0.76, "grad_norm": 0.7486906051635742, "learning_rate": 2.7309523598011922e-06, "loss": 2.0469, "step": 22935 }, { "epoch": 0.76, "grad_norm": 0.7834153771400452, "learning_rate": 2.7302224715968863e-06, "loss": 2.0159, "step": 22936 }, { "epoch": 0.76, "grad_norm": 0.7678384184837341, "learning_rate": 2.729492665522262e-06, "loss": 2.0496, "step": 22937 }, { "epoch": 0.76, "grad_norm": 0.7662795782089233, "learning_rate": 2.728762941585573e-06, "loss": 2.0434, "step": 22938 }, { "epoch": 0.76, "grad_norm": 0.717410147190094, "learning_rate": 2.7280332997950554e-06, "loss": 2.1004, "step": 22939 }, { "epoch": 0.76, "grad_norm": 0.7340196371078491, "learning_rate": 2.7273037401589586e-06, "loss": 2.0519, "step": 22940 }, { "epoch": 0.76, "grad_norm": 0.7601124048233032, "learning_rate": 2.726574262685522e-06, "loss": 2.064, "step": 22941 }, { "epoch": 0.76, "grad_norm": 0.7916771769523621, "learning_rate": 2.725844867382983e-06, "loss": 2.1286, "step": 22942 }, { "epoch": 0.76, "grad_norm": 0.7689327001571655, "learning_rate": 2.7251155542595862e-06, "loss": 2.0128, "step": 22943 }, { "epoch": 0.76, "grad_norm": 0.7314073443412781, "learning_rate": 2.7243863233235735e-06, "loss": 2.0524, "step": 22944 }, { "epoch": 0.76, "grad_norm": 0.7353020310401917, "learning_rate": 2.7236571745831806e-06, "loss": 2.0648, "step": 22945 }, { "epoch": 0.76, "grad_norm": 0.742962658405304, "learning_rate": 2.7229281080466407e-06, "loss": 2.1079, "step": 22946 }, { "epoch": 0.76, "grad_norm": 0.7524154186248779, "learning_rate": 2.722199123722198e-06, "loss": 2.1294, "step": 22947 }, { "epoch": 0.76, "grad_norm": 0.7065379023551941, "learning_rate": 2.721470221618081e-06, "loss": 2.0317, "step": 22948 }, { "epoch": 0.76, "grad_norm": 0.7527520060539246, "learning_rate": 2.7207414017425305e-06, "loss": 2.0733, "step": 22949 }, { "epoch": 0.76, "grad_norm": 0.7462311387062073, "learning_rate": 2.720012664103775e-06, "loss": 2.0514, "step": 22950 }, { "epoch": 0.76, "grad_norm": 0.7339749336242676, "learning_rate": 2.7192840087100537e-06, "loss": 2.0711, "step": 22951 }, { "epoch": 0.76, "grad_norm": 0.7383087873458862, "learning_rate": 2.718555435569591e-06, "loss": 2.0168, "step": 22952 }, { "epoch": 0.76, "grad_norm": 0.7327321171760559, "learning_rate": 2.7178269446906236e-06, "loss": 2.0595, "step": 22953 }, { "epoch": 0.76, "grad_norm": 0.7754136323928833, "learning_rate": 2.717098536081381e-06, "loss": 2.0204, "step": 22954 }, { "epoch": 0.76, "grad_norm": 0.7481711506843567, "learning_rate": 2.7163702097500877e-06, "loss": 2.0973, "step": 22955 }, { "epoch": 0.76, "grad_norm": 0.7621837854385376, "learning_rate": 2.715641965704975e-06, "loss": 1.9994, "step": 22956 }, { "epoch": 0.76, "grad_norm": 0.748285174369812, "learning_rate": 2.7149138039542735e-06, "loss": 2.0393, "step": 22957 }, { "epoch": 0.76, "grad_norm": 0.744909405708313, "learning_rate": 2.714185724506205e-06, "loss": 2.0609, "step": 22958 }, { "epoch": 0.76, "grad_norm": 0.7442793250083923, "learning_rate": 2.7134577273689955e-06, "loss": 2.0951, "step": 22959 }, { "epoch": 0.76, "grad_norm": 0.7711385488510132, "learning_rate": 2.7127298125508717e-06, "loss": 2.0293, "step": 22960 }, { "epoch": 0.76, "grad_norm": 0.7569025754928589, "learning_rate": 2.712001980060053e-06, "loss": 2.0161, "step": 22961 }, { "epoch": 0.76, "grad_norm": 0.7248103618621826, "learning_rate": 2.7112742299047678e-06, "loss": 2.0235, "step": 22962 }, { "epoch": 0.76, "grad_norm": 0.7388492822647095, "learning_rate": 2.7105465620932357e-06, "loss": 2.0284, "step": 22963 }, { "epoch": 0.76, "grad_norm": 0.7385640740394592, "learning_rate": 2.7098189766336726e-06, "loss": 2.077, "step": 22964 }, { "epoch": 0.76, "grad_norm": 0.7434827089309692, "learning_rate": 2.709091473534302e-06, "loss": 1.9644, "step": 22965 }, { "epoch": 0.76, "grad_norm": 0.7498022317886353, "learning_rate": 2.708364052803346e-06, "loss": 2.1104, "step": 22966 }, { "epoch": 0.76, "grad_norm": 0.7398502230644226, "learning_rate": 2.70763671444902e-06, "loss": 2.0626, "step": 22967 }, { "epoch": 0.76, "grad_norm": 0.750668466091156, "learning_rate": 2.7069094584795376e-06, "loss": 2.055, "step": 22968 }, { "epoch": 0.76, "grad_norm": 0.7551640272140503, "learning_rate": 2.7061822849031215e-06, "loss": 2.0665, "step": 22969 }, { "epoch": 0.76, "grad_norm": 0.7414166331291199, "learning_rate": 2.7054551937279793e-06, "loss": 2.121, "step": 22970 }, { "epoch": 0.76, "grad_norm": 0.7332898378372192, "learning_rate": 2.704728184962333e-06, "loss": 2.0766, "step": 22971 }, { "epoch": 0.76, "grad_norm": 0.7825677990913391, "learning_rate": 2.7040012586143894e-06, "loss": 2.0362, "step": 22972 }, { "epoch": 0.76, "grad_norm": 0.7340701818466187, "learning_rate": 2.703274414692366e-06, "loss": 1.9866, "step": 22973 }, { "epoch": 0.76, "grad_norm": 0.7423158884048462, "learning_rate": 2.702547653204469e-06, "loss": 2.0693, "step": 22974 }, { "epoch": 0.76, "grad_norm": 0.7552281618118286, "learning_rate": 2.7018209741589163e-06, "loss": 2.0675, "step": 22975 }, { "epoch": 0.76, "grad_norm": 0.7242433428764343, "learning_rate": 2.701094377563912e-06, "loss": 2.0629, "step": 22976 }, { "epoch": 0.76, "grad_norm": 0.742133378982544, "learning_rate": 2.700367863427662e-06, "loss": 2.0333, "step": 22977 }, { "epoch": 0.76, "grad_norm": 0.7377882599830627, "learning_rate": 2.6996414317583787e-06, "loss": 1.9656, "step": 22978 }, { "epoch": 0.76, "grad_norm": 0.7440063953399658, "learning_rate": 2.6989150825642717e-06, "loss": 2.0339, "step": 22979 }, { "epoch": 0.76, "grad_norm": 0.7440783381462097, "learning_rate": 2.698188815853542e-06, "loss": 2.1122, "step": 22980 }, { "epoch": 0.76, "grad_norm": 0.7194262742996216, "learning_rate": 2.6974626316343935e-06, "loss": 2.0558, "step": 22981 }, { "epoch": 0.76, "grad_norm": 0.75040203332901, "learning_rate": 2.696736529915036e-06, "loss": 2.1216, "step": 22982 }, { "epoch": 0.76, "grad_norm": 0.7275949120521545, "learning_rate": 2.696010510703665e-06, "loss": 2.0357, "step": 22983 }, { "epoch": 0.76, "grad_norm": 0.746261715888977, "learning_rate": 2.6952845740084877e-06, "loss": 2.0502, "step": 22984 }, { "epoch": 0.76, "grad_norm": 0.7711354494094849, "learning_rate": 2.6945587198377087e-06, "loss": 2.0533, "step": 22985 }, { "epoch": 0.76, "grad_norm": 0.7380674481391907, "learning_rate": 2.6938329481995195e-06, "loss": 1.9954, "step": 22986 }, { "epoch": 0.76, "grad_norm": 0.723931074142456, "learning_rate": 2.6931072591021237e-06, "loss": 2.0349, "step": 22987 }, { "epoch": 0.76, "grad_norm": 0.7307219505310059, "learning_rate": 2.6923816525537217e-06, "loss": 2.0, "step": 22988 }, { "epoch": 0.76, "grad_norm": 0.7910880446434021, "learning_rate": 2.6916561285625096e-06, "loss": 2.0405, "step": 22989 }, { "epoch": 0.76, "grad_norm": 0.7574506402015686, "learning_rate": 2.6909306871366814e-06, "loss": 2.018, "step": 22990 }, { "epoch": 0.76, "grad_norm": 0.7464982867240906, "learning_rate": 2.6902053282844366e-06, "loss": 2.045, "step": 22991 }, { "epoch": 0.76, "grad_norm": 0.7294722199440002, "learning_rate": 2.6894800520139653e-06, "loss": 1.964, "step": 22992 }, { "epoch": 0.76, "grad_norm": 0.7535343766212463, "learning_rate": 2.6887548583334666e-06, "loss": 2.0889, "step": 22993 }, { "epoch": 0.77, "grad_norm": 0.7569416165351868, "learning_rate": 2.6880297472511287e-06, "loss": 1.9707, "step": 22994 }, { "epoch": 0.77, "grad_norm": 0.7502040863037109, "learning_rate": 2.687304718775148e-06, "loss": 2.0631, "step": 22995 }, { "epoch": 0.77, "grad_norm": 0.7440193295478821, "learning_rate": 2.68657977291371e-06, "loss": 2.0486, "step": 22996 }, { "epoch": 0.77, "grad_norm": 0.7516134977340698, "learning_rate": 2.685854909675011e-06, "loss": 1.9759, "step": 22997 }, { "epoch": 0.77, "grad_norm": 0.7502976655960083, "learning_rate": 2.685130129067236e-06, "loss": 1.9725, "step": 22998 }, { "epoch": 0.77, "grad_norm": 0.7361271977424622, "learning_rate": 2.6844054310985713e-06, "loss": 2.0414, "step": 22999 }, { "epoch": 0.77, "grad_norm": 0.7320762276649475, "learning_rate": 2.6836808157772055e-06, "loss": 2.0866, "step": 23000 }, { "epoch": 0.77, "grad_norm": 0.7951169013977051, "learning_rate": 2.682956283111331e-06, "loss": 2.0989, "step": 23001 }, { "epoch": 0.77, "grad_norm": 0.7393758296966553, "learning_rate": 2.6822318331091267e-06, "loss": 1.9684, "step": 23002 }, { "epoch": 0.77, "grad_norm": 0.7511451244354248, "learning_rate": 2.6815074657787764e-06, "loss": 2.0832, "step": 23003 }, { "epoch": 0.77, "grad_norm": 0.7790981531143188, "learning_rate": 2.680783181128468e-06, "loss": 2.0408, "step": 23004 }, { "epoch": 0.77, "grad_norm": 0.7416275143623352, "learning_rate": 2.680058979166379e-06, "loss": 2.0911, "step": 23005 }, { "epoch": 0.77, "grad_norm": 0.7401715517044067, "learning_rate": 2.679334859900694e-06, "loss": 1.9541, "step": 23006 }, { "epoch": 0.77, "grad_norm": 0.7430713176727295, "learning_rate": 2.6786108233395993e-06, "loss": 2.0818, "step": 23007 }, { "epoch": 0.77, "grad_norm": 0.7540525794029236, "learning_rate": 2.677886869491263e-06, "loss": 1.9796, "step": 23008 }, { "epoch": 0.77, "grad_norm": 0.741267204284668, "learning_rate": 2.677162998363869e-06, "loss": 2.0354, "step": 23009 }, { "epoch": 0.77, "grad_norm": 0.7692165374755859, "learning_rate": 2.6764392099656e-06, "loss": 2.0401, "step": 23010 }, { "epoch": 0.77, "grad_norm": 0.7353464961051941, "learning_rate": 2.6757155043046278e-06, "loss": 2.0227, "step": 23011 }, { "epoch": 0.77, "grad_norm": 0.7519355416297913, "learning_rate": 2.6749918813891264e-06, "loss": 2.073, "step": 23012 }, { "epoch": 0.77, "grad_norm": 0.7517755627632141, "learning_rate": 2.6742683412272774e-06, "loss": 2.0869, "step": 23013 }, { "epoch": 0.77, "grad_norm": 0.7612603306770325, "learning_rate": 2.673544883827248e-06, "loss": 2.0571, "step": 23014 }, { "epoch": 0.77, "grad_norm": 0.7214151620864868, "learning_rate": 2.6728215091972143e-06, "loss": 2.0909, "step": 23015 }, { "epoch": 0.77, "grad_norm": 0.7723817825317383, "learning_rate": 2.6720982173453523e-06, "loss": 2.1102, "step": 23016 }, { "epoch": 0.77, "grad_norm": 0.7285270094871521, "learning_rate": 2.671375008279831e-06, "loss": 2.0992, "step": 23017 }, { "epoch": 0.77, "grad_norm": 0.7769633531570435, "learning_rate": 2.6706518820088158e-06, "loss": 2.0876, "step": 23018 }, { "epoch": 0.77, "grad_norm": 0.7566593289375305, "learning_rate": 2.6699288385404844e-06, "loss": 2.0711, "step": 23019 }, { "epoch": 0.77, "grad_norm": 0.7480419874191284, "learning_rate": 2.669205877883e-06, "loss": 2.0122, "step": 23020 }, { "epoch": 0.77, "grad_norm": 0.7765623927116394, "learning_rate": 2.668483000044528e-06, "loss": 2.1112, "step": 23021 }, { "epoch": 0.77, "grad_norm": 0.7176385521888733, "learning_rate": 2.6677602050332398e-06, "loss": 1.9653, "step": 23022 }, { "epoch": 0.77, "grad_norm": 0.7475897073745728, "learning_rate": 2.6670374928573016e-06, "loss": 2.0737, "step": 23023 }, { "epoch": 0.77, "grad_norm": 0.7404859066009521, "learning_rate": 2.666314863524877e-06, "loss": 2.087, "step": 23024 }, { "epoch": 0.77, "grad_norm": 0.7478281259536743, "learning_rate": 2.6655923170441257e-06, "loss": 2.0862, "step": 23025 }, { "epoch": 0.77, "grad_norm": 0.7813414335250854, "learning_rate": 2.6648698534232165e-06, "loss": 2.0224, "step": 23026 }, { "epoch": 0.77, "grad_norm": 0.7516582608222961, "learning_rate": 2.6641474726703066e-06, "loss": 2.0702, "step": 23027 }, { "epoch": 0.77, "grad_norm": 0.7650073170661926, "learning_rate": 2.663425174793559e-06, "loss": 2.0845, "step": 23028 }, { "epoch": 0.77, "grad_norm": 0.7453831434249878, "learning_rate": 2.66270295980114e-06, "loss": 1.9919, "step": 23029 }, { "epoch": 0.77, "grad_norm": 0.740872859954834, "learning_rate": 2.6619808277011973e-06, "loss": 1.9525, "step": 23030 }, { "epoch": 0.77, "grad_norm": 0.7351100444793701, "learning_rate": 2.661258778501895e-06, "loss": 2.0855, "step": 23031 }, { "epoch": 0.77, "grad_norm": 0.7596026659011841, "learning_rate": 2.6605368122113926e-06, "loss": 2.0735, "step": 23032 }, { "epoch": 0.77, "grad_norm": 0.7400634288787842, "learning_rate": 2.6598149288378438e-06, "loss": 2.0669, "step": 23033 }, { "epoch": 0.77, "grad_norm": 0.7812380790710449, "learning_rate": 2.6590931283894015e-06, "loss": 1.9935, "step": 23034 }, { "epoch": 0.77, "grad_norm": 0.7489427328109741, "learning_rate": 2.658371410874222e-06, "loss": 2.0787, "step": 23035 }, { "epoch": 0.77, "grad_norm": 0.7496021389961243, "learning_rate": 2.6576497763004637e-06, "loss": 2.0195, "step": 23036 }, { "epoch": 0.77, "grad_norm": 0.7212013006210327, "learning_rate": 2.6569282246762718e-06, "loss": 2.0358, "step": 23037 }, { "epoch": 0.77, "grad_norm": 0.7791311144828796, "learning_rate": 2.656206756009805e-06, "loss": 2.0089, "step": 23038 }, { "epoch": 0.77, "grad_norm": 0.7476695775985718, "learning_rate": 2.6554853703092097e-06, "loss": 2.0907, "step": 23039 }, { "epoch": 0.77, "grad_norm": 0.7322556972503662, "learning_rate": 2.6547640675826335e-06, "loss": 2.0, "step": 23040 }, { "epoch": 0.77, "grad_norm": 0.7558596134185791, "learning_rate": 2.654042847838227e-06, "loss": 2.0803, "step": 23041 }, { "epoch": 0.77, "grad_norm": 0.7448135018348694, "learning_rate": 2.653321711084147e-06, "loss": 2.0489, "step": 23042 }, { "epoch": 0.77, "grad_norm": 0.7571619153022766, "learning_rate": 2.6526006573285268e-06, "loss": 2.1088, "step": 23043 }, { "epoch": 0.77, "grad_norm": 0.7577088475227356, "learning_rate": 2.6518796865795173e-06, "loss": 2.1233, "step": 23044 }, { "epoch": 0.77, "grad_norm": 0.7685409188270569, "learning_rate": 2.651158798845268e-06, "loss": 2.091, "step": 23045 }, { "epoch": 0.77, "grad_norm": 0.7400873899459839, "learning_rate": 2.6504379941339164e-06, "loss": 2.1133, "step": 23046 }, { "epoch": 0.77, "grad_norm": 0.7754441499710083, "learning_rate": 2.6497172724536126e-06, "loss": 2.0508, "step": 23047 }, { "epoch": 0.77, "grad_norm": 0.7478275299072266, "learning_rate": 2.648996633812495e-06, "loss": 2.0911, "step": 23048 }, { "epoch": 0.77, "grad_norm": 0.7425262928009033, "learning_rate": 2.6482760782187034e-06, "loss": 2.1021, "step": 23049 }, { "epoch": 0.77, "grad_norm": 0.8017736077308655, "learning_rate": 2.6475556056803784e-06, "loss": 2.1514, "step": 23050 }, { "epoch": 0.77, "grad_norm": 0.7769775986671448, "learning_rate": 2.6468352162056656e-06, "loss": 2.0786, "step": 23051 }, { "epoch": 0.77, "grad_norm": 0.7384634613990784, "learning_rate": 2.6461149098026985e-06, "loss": 2.0232, "step": 23052 }, { "epoch": 0.77, "grad_norm": 0.7649946212768555, "learning_rate": 2.645394686479613e-06, "loss": 2.0206, "step": 23053 }, { "epoch": 0.77, "grad_norm": 0.7619380950927734, "learning_rate": 2.64467454624455e-06, "loss": 1.9989, "step": 23054 }, { "epoch": 0.77, "grad_norm": 0.748152494430542, "learning_rate": 2.6439544891056445e-06, "loss": 1.9973, "step": 23055 }, { "epoch": 0.77, "grad_norm": 0.7547283172607422, "learning_rate": 2.6432345150710257e-06, "loss": 2.0618, "step": 23056 }, { "epoch": 0.77, "grad_norm": 0.7643991708755493, "learning_rate": 2.6425146241488332e-06, "loss": 2.0876, "step": 23057 }, { "epoch": 0.77, "grad_norm": 0.7142226099967957, "learning_rate": 2.641794816347202e-06, "loss": 1.9976, "step": 23058 }, { "epoch": 0.77, "grad_norm": 0.7705869674682617, "learning_rate": 2.6410750916742556e-06, "loss": 2.0575, "step": 23059 }, { "epoch": 0.77, "grad_norm": 0.7607940435409546, "learning_rate": 2.6403554501381347e-06, "loss": 2.0959, "step": 23060 }, { "epoch": 0.77, "grad_norm": 0.7554407119750977, "learning_rate": 2.6396358917469644e-06, "loss": 1.969, "step": 23061 }, { "epoch": 0.77, "grad_norm": 0.7196580767631531, "learning_rate": 2.638916416508871e-06, "loss": 2.0257, "step": 23062 }, { "epoch": 0.77, "grad_norm": 0.7893468141555786, "learning_rate": 2.6381970244319853e-06, "loss": 2.0831, "step": 23063 }, { "epoch": 0.77, "grad_norm": 0.7612413763999939, "learning_rate": 2.6374777155244425e-06, "loss": 2.056, "step": 23064 }, { "epoch": 0.77, "grad_norm": 0.7462210655212402, "learning_rate": 2.6367584897943543e-06, "loss": 2.0002, "step": 23065 }, { "epoch": 0.77, "grad_norm": 0.7644414305686951, "learning_rate": 2.6360393472498548e-06, "loss": 2.0238, "step": 23066 }, { "epoch": 0.77, "grad_norm": 0.728872537612915, "learning_rate": 2.635320287899069e-06, "loss": 1.9148, "step": 23067 }, { "epoch": 0.77, "grad_norm": 0.7525331377983093, "learning_rate": 2.634601311750116e-06, "loss": 2.0431, "step": 23068 }, { "epoch": 0.77, "grad_norm": 0.7628155946731567, "learning_rate": 2.6338824188111233e-06, "loss": 2.0404, "step": 23069 }, { "epoch": 0.77, "grad_norm": 0.7331541776657104, "learning_rate": 2.6331636090902103e-06, "loss": 2.0301, "step": 23070 }, { "epoch": 0.77, "grad_norm": 0.7288308143615723, "learning_rate": 2.632444882595494e-06, "loss": 2.0739, "step": 23071 }, { "epoch": 0.77, "grad_norm": 0.7466153502464294, "learning_rate": 2.6317262393350982e-06, "loss": 2.0188, "step": 23072 }, { "epoch": 0.77, "grad_norm": 0.7413694262504578, "learning_rate": 2.6310076793171447e-06, "loss": 2.0118, "step": 23073 }, { "epoch": 0.77, "grad_norm": 0.7112367153167725, "learning_rate": 2.6302892025497473e-06, "loss": 2.0452, "step": 23074 }, { "epoch": 0.77, "grad_norm": 0.7467717528343201, "learning_rate": 2.6295708090410198e-06, "loss": 1.9391, "step": 23075 }, { "epoch": 0.77, "grad_norm": 0.7682757377624512, "learning_rate": 2.6288524987990847e-06, "loss": 2.0164, "step": 23076 }, { "epoch": 0.77, "grad_norm": 0.7590654492378235, "learning_rate": 2.6281342718320525e-06, "loss": 2.1162, "step": 23077 }, { "epoch": 0.77, "grad_norm": 0.7253022789955139, "learning_rate": 2.6274161281480403e-06, "loss": 2.0863, "step": 23078 }, { "epoch": 0.77, "grad_norm": 0.7303053736686707, "learning_rate": 2.626698067755158e-06, "loss": 2.0302, "step": 23079 }, { "epoch": 0.77, "grad_norm": 0.7731384634971619, "learning_rate": 2.625980090661523e-06, "loss": 2.0783, "step": 23080 }, { "epoch": 0.77, "grad_norm": 0.7907983660697937, "learning_rate": 2.625262196875239e-06, "loss": 2.0571, "step": 23081 }, { "epoch": 0.77, "grad_norm": 0.7206768989562988, "learning_rate": 2.624544386404425e-06, "loss": 2.0121, "step": 23082 }, { "epoch": 0.77, "grad_norm": 0.7531947493553162, "learning_rate": 2.623826659257186e-06, "loss": 2.0543, "step": 23083 }, { "epoch": 0.77, "grad_norm": 0.7287341952323914, "learning_rate": 2.623109015441627e-06, "loss": 2.0724, "step": 23084 }, { "epoch": 0.77, "grad_norm": 0.7168127298355103, "learning_rate": 2.622391454965859e-06, "loss": 1.9957, "step": 23085 }, { "epoch": 0.77, "grad_norm": 0.7427310347557068, "learning_rate": 2.6216739778379953e-06, "loss": 2.1157, "step": 23086 }, { "epoch": 0.77, "grad_norm": 0.7418673038482666, "learning_rate": 2.620956584066128e-06, "loss": 2.0584, "step": 23087 }, { "epoch": 0.77, "grad_norm": 0.74381023645401, "learning_rate": 2.6202392736583695e-06, "loss": 2.0962, "step": 23088 }, { "epoch": 0.77, "grad_norm": 0.7382112741470337, "learning_rate": 2.6195220466228244e-06, "loss": 2.0634, "step": 23089 }, { "epoch": 0.77, "grad_norm": 0.7506797909736633, "learning_rate": 2.618804902967592e-06, "loss": 2.0018, "step": 23090 }, { "epoch": 0.77, "grad_norm": 0.7490646839141846, "learning_rate": 2.6180878427007793e-06, "loss": 2.0783, "step": 23091 }, { "epoch": 0.77, "grad_norm": 0.7715083360671997, "learning_rate": 2.617370865830483e-06, "loss": 2.0635, "step": 23092 }, { "epoch": 0.77, "grad_norm": 0.852545440196991, "learning_rate": 2.616653972364801e-06, "loss": 2.0281, "step": 23093 }, { "epoch": 0.77, "grad_norm": 0.7549353837966919, "learning_rate": 2.6159371623118357e-06, "loss": 2.0399, "step": 23094 }, { "epoch": 0.77, "grad_norm": 0.7757030129432678, "learning_rate": 2.6152204356796885e-06, "loss": 2.0463, "step": 23095 }, { "epoch": 0.77, "grad_norm": 0.7799726128578186, "learning_rate": 2.6145037924764517e-06, "loss": 2.0482, "step": 23096 }, { "epoch": 0.77, "grad_norm": 0.7488247156143188, "learning_rate": 2.6137872327102207e-06, "loss": 2.0135, "step": 23097 }, { "epoch": 0.77, "grad_norm": 0.7174603939056396, "learning_rate": 2.6130707563890954e-06, "loss": 2.0425, "step": 23098 }, { "epoch": 0.77, "grad_norm": 0.7451650500297546, "learning_rate": 2.6123543635211645e-06, "loss": 2.0429, "step": 23099 }, { "epoch": 0.77, "grad_norm": 0.7573601007461548, "learning_rate": 2.611638054114528e-06, "loss": 2.0069, "step": 23100 }, { "epoch": 0.77, "grad_norm": 0.776407778263092, "learning_rate": 2.6109218281772707e-06, "loss": 2.0618, "step": 23101 }, { "epoch": 0.77, "grad_norm": 0.7536882758140564, "learning_rate": 2.6102056857174917e-06, "loss": 2.0988, "step": 23102 }, { "epoch": 0.77, "grad_norm": 0.7317177057266235, "learning_rate": 2.6094896267432744e-06, "loss": 1.9915, "step": 23103 }, { "epoch": 0.77, "grad_norm": 0.7501627206802368, "learning_rate": 2.608773651262716e-06, "loss": 2.0656, "step": 23104 }, { "epoch": 0.77, "grad_norm": 0.7642002701759338, "learning_rate": 2.6080577592839007e-06, "loss": 2.0248, "step": 23105 }, { "epoch": 0.77, "grad_norm": 0.7600533366203308, "learning_rate": 2.6073419508149147e-06, "loss": 2.0362, "step": 23106 }, { "epoch": 0.77, "grad_norm": 0.7495854496955872, "learning_rate": 2.606626225863845e-06, "loss": 2.0507, "step": 23107 }, { "epoch": 0.77, "grad_norm": 0.7516706585884094, "learning_rate": 2.605910584438783e-06, "loss": 2.0418, "step": 23108 }, { "epoch": 0.77, "grad_norm": 0.7292174696922302, "learning_rate": 2.605195026547811e-06, "loss": 2.0389, "step": 23109 }, { "epoch": 0.77, "grad_norm": 0.7485889196395874, "learning_rate": 2.6044795521990076e-06, "loss": 2.1145, "step": 23110 }, { "epoch": 0.77, "grad_norm": 0.7385496497154236, "learning_rate": 2.603764161400464e-06, "loss": 2.0826, "step": 23111 }, { "epoch": 0.77, "grad_norm": 0.74369877576828, "learning_rate": 2.603048854160254e-06, "loss": 2.077, "step": 23112 }, { "epoch": 0.77, "grad_norm": 0.7472503781318665, "learning_rate": 2.6023336304864666e-06, "loss": 2.0773, "step": 23113 }, { "epoch": 0.77, "grad_norm": 0.7360715866088867, "learning_rate": 2.601618490387179e-06, "loss": 1.9985, "step": 23114 }, { "epoch": 0.77, "grad_norm": 0.7419888377189636, "learning_rate": 2.6009034338704666e-06, "loss": 2.0924, "step": 23115 }, { "epoch": 0.77, "grad_norm": 0.7567590475082397, "learning_rate": 2.6001884609444093e-06, "loss": 2.1087, "step": 23116 }, { "epoch": 0.77, "grad_norm": 0.7183083295822144, "learning_rate": 2.5994735716170904e-06, "loss": 2.0426, "step": 23117 }, { "epoch": 0.77, "grad_norm": 0.7556402087211609, "learning_rate": 2.5987587658965817e-06, "loss": 1.9819, "step": 23118 }, { "epoch": 0.77, "grad_norm": 0.754566490650177, "learning_rate": 2.598044043790957e-06, "loss": 2.0659, "step": 23119 }, { "epoch": 0.77, "grad_norm": 0.7535070180892944, "learning_rate": 2.597329405308294e-06, "loss": 2.0231, "step": 23120 }, { "epoch": 0.77, "grad_norm": 0.7175498008728027, "learning_rate": 2.5966148504566635e-06, "loss": 2.0242, "step": 23121 }, { "epoch": 0.77, "grad_norm": 0.7413702011108398, "learning_rate": 2.5959003792441418e-06, "loss": 2.0853, "step": 23122 }, { "epoch": 0.77, "grad_norm": 0.7277328968048096, "learning_rate": 2.5951859916787947e-06, "loss": 2.0229, "step": 23123 }, { "epoch": 0.77, "grad_norm": 0.7225123643875122, "learning_rate": 2.5944716877687004e-06, "loss": 2.0333, "step": 23124 }, { "epoch": 0.77, "grad_norm": 0.7711116671562195, "learning_rate": 2.5937574675219222e-06, "loss": 1.944, "step": 23125 }, { "epoch": 0.77, "grad_norm": 0.7642003297805786, "learning_rate": 2.593043330946534e-06, "loss": 2.0519, "step": 23126 }, { "epoch": 0.77, "grad_norm": 0.7218152284622192, "learning_rate": 2.5923292780506016e-06, "loss": 2.1185, "step": 23127 }, { "epoch": 0.77, "grad_norm": 0.7598666548728943, "learning_rate": 2.591615308842189e-06, "loss": 2.0577, "step": 23128 }, { "epoch": 0.77, "grad_norm": 0.7734375, "learning_rate": 2.590901423329365e-06, "loss": 2.0318, "step": 23129 }, { "epoch": 0.77, "grad_norm": 0.7248088717460632, "learning_rate": 2.590187621520197e-06, "loss": 1.9938, "step": 23130 }, { "epoch": 0.77, "grad_norm": 0.737025797367096, "learning_rate": 2.5894739034227468e-06, "loss": 2.0121, "step": 23131 }, { "epoch": 0.77, "grad_norm": 0.7295243740081787, "learning_rate": 2.588760269045075e-06, "loss": 2.0869, "step": 23132 }, { "epoch": 0.77, "grad_norm": 0.7744941115379333, "learning_rate": 2.5880467183952483e-06, "loss": 2.0768, "step": 23133 }, { "epoch": 0.77, "grad_norm": 0.767135739326477, "learning_rate": 2.587333251481324e-06, "loss": 2.0954, "step": 23134 }, { "epoch": 0.77, "grad_norm": 0.7459158301353455, "learning_rate": 2.5866198683113664e-06, "loss": 2.0325, "step": 23135 }, { "epoch": 0.77, "grad_norm": 0.718892514705658, "learning_rate": 2.5859065688934302e-06, "loss": 2.0098, "step": 23136 }, { "epoch": 0.77, "grad_norm": 0.7337863445281982, "learning_rate": 2.58519335323558e-06, "loss": 2.0431, "step": 23137 }, { "epoch": 0.77, "grad_norm": 0.741270899772644, "learning_rate": 2.5844802213458666e-06, "loss": 2.1027, "step": 23138 }, { "epoch": 0.77, "grad_norm": 0.750564694404602, "learning_rate": 2.583767173232352e-06, "loss": 2.0825, "step": 23139 }, { "epoch": 0.77, "grad_norm": 0.7308881282806396, "learning_rate": 2.5830542089030906e-06, "loss": 2.0268, "step": 23140 }, { "epoch": 0.77, "grad_norm": 0.7434291839599609, "learning_rate": 2.5823413283661323e-06, "loss": 2.0363, "step": 23141 }, { "epoch": 0.77, "grad_norm": 0.727092444896698, "learning_rate": 2.581628531629534e-06, "loss": 2.0588, "step": 23142 }, { "epoch": 0.77, "grad_norm": 0.7616970539093018, "learning_rate": 2.5809158187013527e-06, "loss": 1.9752, "step": 23143 }, { "epoch": 0.77, "grad_norm": 0.7606179118156433, "learning_rate": 2.580203189589636e-06, "loss": 2.1009, "step": 23144 }, { "epoch": 0.77, "grad_norm": 0.7640833258628845, "learning_rate": 2.5794906443024335e-06, "loss": 2.0505, "step": 23145 }, { "epoch": 0.77, "grad_norm": 0.7441621422767639, "learning_rate": 2.5787781828477987e-06, "loss": 2.0578, "step": 23146 }, { "epoch": 0.77, "grad_norm": 0.7598081231117249, "learning_rate": 2.578065805233776e-06, "loss": 2.057, "step": 23147 }, { "epoch": 0.77, "grad_norm": 0.7681798934936523, "learning_rate": 2.577353511468419e-06, "loss": 2.0535, "step": 23148 }, { "epoch": 0.77, "grad_norm": 0.7360518574714661, "learning_rate": 2.5766413015597726e-06, "loss": 2.1291, "step": 23149 }, { "epoch": 0.77, "grad_norm": 0.7576141953468323, "learning_rate": 2.575929175515879e-06, "loss": 2.0669, "step": 23150 }, { "epoch": 0.77, "grad_norm": 0.7417619824409485, "learning_rate": 2.575217133344786e-06, "loss": 2.0052, "step": 23151 }, { "epoch": 0.77, "grad_norm": 0.7638119459152222, "learning_rate": 2.574505175054541e-06, "loss": 2.014, "step": 23152 }, { "epoch": 0.77, "grad_norm": 0.762856125831604, "learning_rate": 2.5737933006531866e-06, "loss": 2.0625, "step": 23153 }, { "epoch": 0.77, "grad_norm": 0.7657785415649414, "learning_rate": 2.5730815101487593e-06, "loss": 2.0657, "step": 23154 }, { "epoch": 0.77, "grad_norm": 0.7676966190338135, "learning_rate": 2.572369803549307e-06, "loss": 2.0354, "step": 23155 }, { "epoch": 0.77, "grad_norm": 0.7719388604164124, "learning_rate": 2.571658180862865e-06, "loss": 2.0395, "step": 23156 }, { "epoch": 0.77, "grad_norm": 0.7784674763679504, "learning_rate": 2.5709466420974793e-06, "loss": 2.0898, "step": 23157 }, { "epoch": 0.77, "grad_norm": 0.7600753903388977, "learning_rate": 2.5702351872611807e-06, "loss": 2.0087, "step": 23158 }, { "epoch": 0.77, "grad_norm": 0.7516697645187378, "learning_rate": 2.569523816362014e-06, "loss": 2.0531, "step": 23159 }, { "epoch": 0.77, "grad_norm": 0.7239949107170105, "learning_rate": 2.568812529408009e-06, "loss": 2.0799, "step": 23160 }, { "epoch": 0.77, "grad_norm": 0.7942061424255371, "learning_rate": 2.5681013264072085e-06, "loss": 2.0776, "step": 23161 }, { "epoch": 0.77, "grad_norm": 0.7459508776664734, "learning_rate": 2.567390207367644e-06, "loss": 2.0292, "step": 23162 }, { "epoch": 0.77, "grad_norm": 0.7878516912460327, "learning_rate": 2.566679172297345e-06, "loss": 2.0882, "step": 23163 }, { "epoch": 0.77, "grad_norm": 0.7609124779701233, "learning_rate": 2.565968221204349e-06, "loss": 2.1078, "step": 23164 }, { "epoch": 0.77, "grad_norm": 0.7543255686759949, "learning_rate": 2.5652573540966896e-06, "loss": 1.9891, "step": 23165 }, { "epoch": 0.77, "grad_norm": 0.7049428820610046, "learning_rate": 2.5645465709823968e-06, "loss": 2.0339, "step": 23166 }, { "epoch": 0.77, "grad_norm": 0.7621498703956604, "learning_rate": 2.5638358718694955e-06, "loss": 2.1192, "step": 23167 }, { "epoch": 0.77, "grad_norm": 0.7518184185028076, "learning_rate": 2.5631252567660212e-06, "loss": 2.07, "step": 23168 }, { "epoch": 0.77, "grad_norm": 0.7573378682136536, "learning_rate": 2.562414725679997e-06, "loss": 2.0919, "step": 23169 }, { "epoch": 0.77, "grad_norm": 0.7513402104377747, "learning_rate": 2.5617042786194547e-06, "loss": 1.9786, "step": 23170 }, { "epoch": 0.77, "grad_norm": 0.7442273497581482, "learning_rate": 2.560993915592418e-06, "loss": 1.9995, "step": 23171 }, { "epoch": 0.77, "grad_norm": 0.7423152327537537, "learning_rate": 2.5602836366069095e-06, "loss": 2.0797, "step": 23172 }, { "epoch": 0.77, "grad_norm": 0.7709124684333801, "learning_rate": 2.5595734416709574e-06, "loss": 2.0684, "step": 23173 }, { "epoch": 0.77, "grad_norm": 0.7575253844261169, "learning_rate": 2.558863330792586e-06, "loss": 2.0416, "step": 23174 }, { "epoch": 0.77, "grad_norm": 0.720293402671814, "learning_rate": 2.5581533039798156e-06, "loss": 1.9962, "step": 23175 }, { "epoch": 0.77, "grad_norm": 0.7539215683937073, "learning_rate": 2.5574433612406657e-06, "loss": 2.0716, "step": 23176 }, { "epoch": 0.77, "grad_norm": 0.7254588603973389, "learning_rate": 2.556733502583161e-06, "loss": 2.0819, "step": 23177 }, { "epoch": 0.77, "grad_norm": 0.748740553855896, "learning_rate": 2.5560237280153167e-06, "loss": 2.044, "step": 23178 }, { "epoch": 0.77, "grad_norm": 0.7665413618087769, "learning_rate": 2.5553140375451567e-06, "loss": 2.0638, "step": 23179 }, { "epoch": 0.77, "grad_norm": 0.7500876784324646, "learning_rate": 2.5546044311806926e-06, "loss": 2.0746, "step": 23180 }, { "epoch": 0.77, "grad_norm": 0.7474533319473267, "learning_rate": 2.553894908929947e-06, "loss": 1.9895, "step": 23181 }, { "epoch": 0.77, "grad_norm": 0.742586076259613, "learning_rate": 2.5531854708009298e-06, "loss": 2.0376, "step": 23182 }, { "epoch": 0.77, "grad_norm": 0.7254591584205627, "learning_rate": 2.552476116801662e-06, "loss": 2.0167, "step": 23183 }, { "epoch": 0.77, "grad_norm": 0.7116720080375671, "learning_rate": 2.5517668469401546e-06, "loss": 2.0238, "step": 23184 }, { "epoch": 0.77, "grad_norm": 0.776931881904602, "learning_rate": 2.5510576612244164e-06, "loss": 2.0222, "step": 23185 }, { "epoch": 0.77, "grad_norm": 0.7377454042434692, "learning_rate": 2.5503485596624645e-06, "loss": 2.0903, "step": 23186 }, { "epoch": 0.77, "grad_norm": 0.7658206820487976, "learning_rate": 2.549639542262311e-06, "loss": 2.0556, "step": 23187 }, { "epoch": 0.77, "grad_norm": 0.7470245361328125, "learning_rate": 2.548930609031963e-06, "loss": 2.0382, "step": 23188 }, { "epoch": 0.77, "grad_norm": 0.7469624876976013, "learning_rate": 2.548221759979429e-06, "loss": 2.0723, "step": 23189 }, { "epoch": 0.77, "grad_norm": 0.7347672581672668, "learning_rate": 2.5475129951127197e-06, "loss": 1.9887, "step": 23190 }, { "epoch": 0.77, "grad_norm": 0.734441876411438, "learning_rate": 2.546804314439839e-06, "loss": 2.0273, "step": 23191 }, { "epoch": 0.77, "grad_norm": 0.7524778246879578, "learning_rate": 2.546095717968795e-06, "loss": 2.0643, "step": 23192 }, { "epoch": 0.77, "grad_norm": 0.754393994808197, "learning_rate": 2.545387205707599e-06, "loss": 2.0395, "step": 23193 }, { "epoch": 0.77, "grad_norm": 0.7166599631309509, "learning_rate": 2.5446787776642436e-06, "loss": 2.0141, "step": 23194 }, { "epoch": 0.77, "grad_norm": 0.7594064474105835, "learning_rate": 2.5439704338467377e-06, "loss": 2.0152, "step": 23195 }, { "epoch": 0.77, "grad_norm": 0.7374521493911743, "learning_rate": 2.543262174263087e-06, "loss": 2.0053, "step": 23196 }, { "epoch": 0.77, "grad_norm": 0.7470736503601074, "learning_rate": 2.5425539989212913e-06, "loss": 2.0334, "step": 23197 }, { "epoch": 0.77, "grad_norm": 0.7143168449401855, "learning_rate": 2.5418459078293458e-06, "loss": 2.0485, "step": 23198 }, { "epoch": 0.77, "grad_norm": 0.760317862033844, "learning_rate": 2.5411379009952573e-06, "loss": 2.0388, "step": 23199 }, { "epoch": 0.77, "grad_norm": 0.7659141421318054, "learning_rate": 2.5404299784270193e-06, "loss": 2.0568, "step": 23200 }, { "epoch": 0.77, "grad_norm": 0.757943868637085, "learning_rate": 2.539722140132634e-06, "loss": 2.0628, "step": 23201 }, { "epoch": 0.77, "grad_norm": 0.7608553767204285, "learning_rate": 2.5390143861200932e-06, "loss": 2.0473, "step": 23202 }, { "epoch": 0.77, "grad_norm": 0.7445995211601257, "learning_rate": 2.5383067163973983e-06, "loss": 2.0883, "step": 23203 }, { "epoch": 0.77, "grad_norm": 0.7729456424713135, "learning_rate": 2.5375991309725388e-06, "loss": 2.0178, "step": 23204 }, { "epoch": 0.77, "grad_norm": 0.7609541416168213, "learning_rate": 2.536891629853513e-06, "loss": 1.94, "step": 23205 }, { "epoch": 0.77, "grad_norm": 0.7459778189659119, "learning_rate": 2.5361842130483116e-06, "loss": 1.9653, "step": 23206 }, { "epoch": 0.77, "grad_norm": 0.744012713432312, "learning_rate": 2.5354768805649245e-06, "loss": 1.9696, "step": 23207 }, { "epoch": 0.77, "grad_norm": 0.7421319484710693, "learning_rate": 2.534769632411345e-06, "loss": 2.0124, "step": 23208 }, { "epoch": 0.77, "grad_norm": 0.7398131489753723, "learning_rate": 2.534062468595565e-06, "loss": 2.0859, "step": 23209 }, { "epoch": 0.77, "grad_norm": 0.7619217038154602, "learning_rate": 2.5333553891255722e-06, "loss": 2.1159, "step": 23210 }, { "epoch": 0.77, "grad_norm": 0.7545625567436218, "learning_rate": 2.5326483940093526e-06, "loss": 2.0154, "step": 23211 }, { "epoch": 0.77, "grad_norm": 0.7888229489326477, "learning_rate": 2.5319414832548973e-06, "loss": 2.0269, "step": 23212 }, { "epoch": 0.77, "grad_norm": 0.7586806416511536, "learning_rate": 2.5312346568701874e-06, "loss": 2.031, "step": 23213 }, { "epoch": 0.77, "grad_norm": 0.7270857691764832, "learning_rate": 2.5305279148632113e-06, "loss": 2.0826, "step": 23214 }, { "epoch": 0.77, "grad_norm": 0.7326899170875549, "learning_rate": 2.529821257241959e-06, "loss": 2.0701, "step": 23215 }, { "epoch": 0.77, "grad_norm": 0.7331536412239075, "learning_rate": 2.529114684014402e-06, "loss": 2.0617, "step": 23216 }, { "epoch": 0.77, "grad_norm": 0.748709499835968, "learning_rate": 2.5284081951885288e-06, "loss": 2.0316, "step": 23217 }, { "epoch": 0.77, "grad_norm": 0.7637634873390198, "learning_rate": 2.5277017907723245e-06, "loss": 2.0473, "step": 23218 }, { "epoch": 0.77, "grad_norm": 0.7509251832962036, "learning_rate": 2.5269954707737667e-06, "loss": 2.0162, "step": 23219 }, { "epoch": 0.77, "grad_norm": 0.7376219034194946, "learning_rate": 2.5262892352008305e-06, "loss": 1.996, "step": 23220 }, { "epoch": 0.77, "grad_norm": 0.7695110440254211, "learning_rate": 2.5255830840615014e-06, "loss": 2.051, "step": 23221 }, { "epoch": 0.77, "grad_norm": 0.7476853728294373, "learning_rate": 2.5248770173637516e-06, "loss": 2.0835, "step": 23222 }, { "epoch": 0.77, "grad_norm": 0.7460974454879761, "learning_rate": 2.524171035115561e-06, "loss": 2.0642, "step": 23223 }, { "epoch": 0.77, "grad_norm": 0.7163519263267517, "learning_rate": 2.5234651373249076e-06, "loss": 2.0019, "step": 23224 }, { "epoch": 0.77, "grad_norm": 0.7410582304000854, "learning_rate": 2.522759323999763e-06, "loss": 2.0135, "step": 23225 }, { "epoch": 0.77, "grad_norm": 0.7593340277671814, "learning_rate": 2.5220535951480985e-06, "loss": 2.0287, "step": 23226 }, { "epoch": 0.77, "grad_norm": 0.746727705001831, "learning_rate": 2.521347950777889e-06, "loss": 2.114, "step": 23227 }, { "epoch": 0.77, "grad_norm": 0.7773048281669617, "learning_rate": 2.5206423908971145e-06, "loss": 2.0817, "step": 23228 }, { "epoch": 0.77, "grad_norm": 0.7414720058441162, "learning_rate": 2.519936915513733e-06, "loss": 2.0001, "step": 23229 }, { "epoch": 0.77, "grad_norm": 0.7342730760574341, "learning_rate": 2.51923152463572e-06, "loss": 2.0192, "step": 23230 }, { "epoch": 0.77, "grad_norm": 0.7706368565559387, "learning_rate": 2.518526218271049e-06, "loss": 2.016, "step": 23231 }, { "epoch": 0.77, "grad_norm": 0.7652897834777832, "learning_rate": 2.5178209964276832e-06, "loss": 2.0796, "step": 23232 }, { "epoch": 0.77, "grad_norm": 0.7546705603599548, "learning_rate": 2.517115859113588e-06, "loss": 2.0078, "step": 23233 }, { "epoch": 0.77, "grad_norm": 0.7734571695327759, "learning_rate": 2.5164108063367356e-06, "loss": 2.0088, "step": 23234 }, { "epoch": 0.77, "grad_norm": 0.735771894454956, "learning_rate": 2.515705838105086e-06, "loss": 2.0191, "step": 23235 }, { "epoch": 0.77, "grad_norm": 0.7152687311172485, "learning_rate": 2.5150009544266043e-06, "loss": 2.031, "step": 23236 }, { "epoch": 0.77, "grad_norm": 0.7341321706771851, "learning_rate": 2.5142961553092614e-06, "loss": 2.0122, "step": 23237 }, { "epoch": 0.77, "grad_norm": 0.7482951283454895, "learning_rate": 2.5135914407610073e-06, "loss": 2.0583, "step": 23238 }, { "epoch": 0.77, "grad_norm": 0.76332688331604, "learning_rate": 2.5128868107898107e-06, "loss": 2.0034, "step": 23239 }, { "epoch": 0.77, "grad_norm": 0.7647858262062073, "learning_rate": 2.512182265403633e-06, "loss": 2.1061, "step": 23240 }, { "epoch": 0.77, "grad_norm": 0.7258380055427551, "learning_rate": 2.5114778046104325e-06, "loss": 2.0308, "step": 23241 }, { "epoch": 0.77, "grad_norm": 0.747856080532074, "learning_rate": 2.510773428418164e-06, "loss": 2.0873, "step": 23242 }, { "epoch": 0.77, "grad_norm": 0.7488059401512146, "learning_rate": 2.5100691368347876e-06, "loss": 2.1389, "step": 23243 }, { "epoch": 0.77, "grad_norm": 0.7324599623680115, "learning_rate": 2.509364929868263e-06, "loss": 2.0919, "step": 23244 }, { "epoch": 0.77, "grad_norm": 0.7501180171966553, "learning_rate": 2.5086608075265415e-06, "loss": 2.059, "step": 23245 }, { "epoch": 0.77, "grad_norm": 0.7471052408218384, "learning_rate": 2.5079567698175835e-06, "loss": 1.9456, "step": 23246 }, { "epoch": 0.77, "grad_norm": 0.7212860584259033, "learning_rate": 2.5072528167493383e-06, "loss": 2.0323, "step": 23247 }, { "epoch": 0.77, "grad_norm": 0.7633925080299377, "learning_rate": 2.5065489483297556e-06, "loss": 2.0374, "step": 23248 }, { "epoch": 0.77, "grad_norm": 0.7668169736862183, "learning_rate": 2.5058451645667927e-06, "loss": 2.0166, "step": 23249 }, { "epoch": 0.77, "grad_norm": 0.7470428943634033, "learning_rate": 2.505141465468405e-06, "loss": 2.0623, "step": 23250 }, { "epoch": 0.77, "grad_norm": 0.7245231866836548, "learning_rate": 2.5044378510425303e-06, "loss": 2.0282, "step": 23251 }, { "epoch": 0.77, "grad_norm": 0.7383379340171814, "learning_rate": 2.5037343212971232e-06, "loss": 2.012, "step": 23252 }, { "epoch": 0.77, "grad_norm": 0.7377979159355164, "learning_rate": 2.5030308762401366e-06, "loss": 2.0409, "step": 23253 }, { "epoch": 0.77, "grad_norm": 0.7442428469657898, "learning_rate": 2.50232751587951e-06, "loss": 2.0598, "step": 23254 }, { "epoch": 0.77, "grad_norm": 0.7664313316345215, "learning_rate": 2.501624240223196e-06, "loss": 1.959, "step": 23255 }, { "epoch": 0.77, "grad_norm": 0.7458236813545227, "learning_rate": 2.500921049279137e-06, "loss": 2.0385, "step": 23256 }, { "epoch": 0.77, "grad_norm": 0.7332466244697571, "learning_rate": 2.500217943055274e-06, "loss": 2.0324, "step": 23257 }, { "epoch": 0.77, "grad_norm": 0.7495574355125427, "learning_rate": 2.499514921559554e-06, "loss": 2.0457, "step": 23258 }, { "epoch": 0.77, "grad_norm": 0.7925146818161011, "learning_rate": 2.4988119847999214e-06, "loss": 2.1001, "step": 23259 }, { "epoch": 0.77, "grad_norm": 0.7526647448539734, "learning_rate": 2.4981091327843143e-06, "loss": 1.9662, "step": 23260 }, { "epoch": 0.77, "grad_norm": 0.7690455913543701, "learning_rate": 2.4974063655206717e-06, "loss": 2.0489, "step": 23261 }, { "epoch": 0.77, "grad_norm": 0.7405413389205933, "learning_rate": 2.4967036830169365e-06, "loss": 2.0072, "step": 23262 }, { "epoch": 0.77, "grad_norm": 0.7440643906593323, "learning_rate": 2.4960010852810467e-06, "loss": 2.0227, "step": 23263 }, { "epoch": 0.77, "grad_norm": 0.726311206817627, "learning_rate": 2.4952985723209365e-06, "loss": 2.0435, "step": 23264 }, { "epoch": 0.77, "grad_norm": 0.7640082240104675, "learning_rate": 2.4945961441445443e-06, "loss": 2.0244, "step": 23265 }, { "epoch": 0.77, "grad_norm": 0.7541269063949585, "learning_rate": 2.4938938007598092e-06, "loss": 2.0845, "step": 23266 }, { "epoch": 0.77, "grad_norm": 0.7884525060653687, "learning_rate": 2.4931915421746588e-06, "loss": 2.1212, "step": 23267 }, { "epoch": 0.77, "grad_norm": 0.7179670333862305, "learning_rate": 2.492489368397035e-06, "loss": 2.0578, "step": 23268 }, { "epoch": 0.77, "grad_norm": 0.7450252175331116, "learning_rate": 2.4917872794348673e-06, "loss": 2.0444, "step": 23269 }, { "epoch": 0.77, "grad_norm": 0.766559898853302, "learning_rate": 2.4910852752960823e-06, "loss": 2.0504, "step": 23270 }, { "epoch": 0.77, "grad_norm": 0.7627468109130859, "learning_rate": 2.490383355988616e-06, "loss": 2.1085, "step": 23271 }, { "epoch": 0.77, "grad_norm": 0.738466203212738, "learning_rate": 2.489681521520403e-06, "loss": 2.0616, "step": 23272 }, { "epoch": 0.77, "grad_norm": 0.7826035618782043, "learning_rate": 2.488979771899361e-06, "loss": 2.0134, "step": 23273 }, { "epoch": 0.77, "grad_norm": 0.7297622561454773, "learning_rate": 2.488278107133424e-06, "loss": 2.0166, "step": 23274 }, { "epoch": 0.77, "grad_norm": 0.7571211457252502, "learning_rate": 2.4875765272305206e-06, "loss": 2.0195, "step": 23275 }, { "epoch": 0.77, "grad_norm": 0.7165980339050293, "learning_rate": 2.4868750321985724e-06, "loss": 1.9945, "step": 23276 }, { "epoch": 0.77, "grad_norm": 0.7498269081115723, "learning_rate": 2.48617362204551e-06, "loss": 2.0576, "step": 23277 }, { "epoch": 0.77, "grad_norm": 0.7377191781997681, "learning_rate": 2.4854722967792543e-06, "loss": 2.076, "step": 23278 }, { "epoch": 0.77, "grad_norm": 0.7650166153907776, "learning_rate": 2.4847710564077265e-06, "loss": 2.0403, "step": 23279 }, { "epoch": 0.77, "grad_norm": 0.7682573795318604, "learning_rate": 2.48406990093885e-06, "loss": 2.0373, "step": 23280 }, { "epoch": 0.77, "grad_norm": 0.7513816356658936, "learning_rate": 2.4833688303805503e-06, "loss": 2.033, "step": 23281 }, { "epoch": 0.77, "grad_norm": 0.7702515721321106, "learning_rate": 2.4826678447407436e-06, "loss": 2.1125, "step": 23282 }, { "epoch": 0.77, "grad_norm": 0.7328887581825256, "learning_rate": 2.4819669440273486e-06, "loss": 2.009, "step": 23283 }, { "epoch": 0.77, "grad_norm": 0.7822847366333008, "learning_rate": 2.4812661282482876e-06, "loss": 2.0695, "step": 23284 }, { "epoch": 0.77, "grad_norm": 0.7292688488960266, "learning_rate": 2.480565397411474e-06, "loss": 2.0433, "step": 23285 }, { "epoch": 0.77, "grad_norm": 0.7741066813468933, "learning_rate": 2.479864751524824e-06, "loss": 2.0319, "step": 23286 }, { "epoch": 0.77, "grad_norm": 0.7526381611824036, "learning_rate": 2.479164190596255e-06, "loss": 2.0949, "step": 23287 }, { "epoch": 0.77, "grad_norm": 0.7720553278923035, "learning_rate": 2.4784637146336844e-06, "loss": 2.0102, "step": 23288 }, { "epoch": 0.77, "grad_norm": 0.7315800189971924, "learning_rate": 2.4777633236450193e-06, "loss": 2.0366, "step": 23289 }, { "epoch": 0.77, "grad_norm": 0.768068253993988, "learning_rate": 2.4770630176381783e-06, "loss": 2.1025, "step": 23290 }, { "epoch": 0.77, "grad_norm": 0.7632604241371155, "learning_rate": 2.4763627966210702e-06, "loss": 2.0998, "step": 23291 }, { "epoch": 0.77, "grad_norm": 0.8026905655860901, "learning_rate": 2.4756626606016042e-06, "loss": 2.0132, "step": 23292 }, { "epoch": 0.77, "grad_norm": 0.7429221868515015, "learning_rate": 2.474962609587691e-06, "loss": 2.0537, "step": 23293 }, { "epoch": 0.77, "grad_norm": 0.7574822306632996, "learning_rate": 2.474262643587246e-06, "loss": 2.023, "step": 23294 }, { "epoch": 0.78, "grad_norm": 0.7176968455314636, "learning_rate": 2.473562762608166e-06, "loss": 2.0575, "step": 23295 }, { "epoch": 0.78, "grad_norm": 0.7388931512832642, "learning_rate": 2.4728629666583616e-06, "loss": 1.9961, "step": 23296 }, { "epoch": 0.78, "grad_norm": 0.744503915309906, "learning_rate": 2.4721632557457444e-06, "loss": 2.0611, "step": 23297 }, { "epoch": 0.78, "grad_norm": 0.7026798725128174, "learning_rate": 2.4714636298782114e-06, "loss": 2.0184, "step": 23298 }, { "epoch": 0.78, "grad_norm": 0.7590958476066589, "learning_rate": 2.470764089063673e-06, "loss": 2.0043, "step": 23299 }, { "epoch": 0.78, "grad_norm": 0.7769315838813782, "learning_rate": 2.47006463331003e-06, "loss": 2.0498, "step": 23300 }, { "epoch": 0.78, "grad_norm": 0.7500719428062439, "learning_rate": 2.4693652626251798e-06, "loss": 2.036, "step": 23301 }, { "epoch": 0.78, "grad_norm": 0.7416940927505493, "learning_rate": 2.4686659770170287e-06, "loss": 2.0659, "step": 23302 }, { "epoch": 0.78, "grad_norm": 0.7317529320716858, "learning_rate": 2.4679667764934777e-06, "loss": 1.9662, "step": 23303 }, { "epoch": 0.78, "grad_norm": 0.7453404068946838, "learning_rate": 2.4672676610624233e-06, "loss": 2.0871, "step": 23304 }, { "epoch": 0.78, "grad_norm": 0.736285924911499, "learning_rate": 2.4665686307317625e-06, "loss": 2.0153, "step": 23305 }, { "epoch": 0.78, "grad_norm": 0.7598045468330383, "learning_rate": 2.4658696855093967e-06, "loss": 2.0575, "step": 23306 }, { "epoch": 0.78, "grad_norm": 0.7303100228309631, "learning_rate": 2.465170825403217e-06, "loss": 2.0246, "step": 23307 }, { "epoch": 0.78, "grad_norm": 0.7114728093147278, "learning_rate": 2.464472050421124e-06, "loss": 1.9969, "step": 23308 }, { "epoch": 0.78, "grad_norm": 0.7648562788963318, "learning_rate": 2.463773360571007e-06, "loss": 2.0485, "step": 23309 }, { "epoch": 0.78, "grad_norm": 0.7444080710411072, "learning_rate": 2.463074755860765e-06, "loss": 1.9549, "step": 23310 }, { "epoch": 0.78, "grad_norm": 0.7734590768814087, "learning_rate": 2.462376236298284e-06, "loss": 2.1079, "step": 23311 }, { "epoch": 0.78, "grad_norm": 0.7090965509414673, "learning_rate": 2.4616778018914623e-06, "loss": 2.0665, "step": 23312 }, { "epoch": 0.78, "grad_norm": 0.70216304063797, "learning_rate": 2.4609794526481854e-06, "loss": 2.009, "step": 23313 }, { "epoch": 0.78, "grad_norm": 0.764106810092926, "learning_rate": 2.460281188576343e-06, "loss": 2.0504, "step": 23314 }, { "epoch": 0.78, "grad_norm": 0.7769772410392761, "learning_rate": 2.4595830096838247e-06, "loss": 2.0354, "step": 23315 }, { "epoch": 0.78, "grad_norm": 0.7483271956443787, "learning_rate": 2.4588849159785245e-06, "loss": 2.003, "step": 23316 }, { "epoch": 0.78, "grad_norm": 0.7399762868881226, "learning_rate": 2.458186907468316e-06, "loss": 2.0285, "step": 23317 }, { "epoch": 0.78, "grad_norm": 0.7650138139724731, "learning_rate": 2.4574889841610926e-06, "loss": 2.0827, "step": 23318 }, { "epoch": 0.78, "grad_norm": 0.7572110295295715, "learning_rate": 2.456791146064741e-06, "loss": 2.0287, "step": 23319 }, { "epoch": 0.78, "grad_norm": 0.7604380249977112, "learning_rate": 2.4560933931871402e-06, "loss": 2.0631, "step": 23320 }, { "epoch": 0.78, "grad_norm": 0.7237051129341125, "learning_rate": 2.4553957255361772e-06, "loss": 2.0057, "step": 23321 }, { "epoch": 0.78, "grad_norm": 0.7260814309120178, "learning_rate": 2.4546981431197316e-06, "loss": 2.0788, "step": 23322 }, { "epoch": 0.78, "grad_norm": 0.7824169397354126, "learning_rate": 2.454000645945682e-06, "loss": 2.106, "step": 23323 }, { "epoch": 0.78, "grad_norm": 0.7276002764701843, "learning_rate": 2.453303234021911e-06, "loss": 2.0964, "step": 23324 }, { "epoch": 0.78, "grad_norm": 0.7538354992866516, "learning_rate": 2.4526059073562993e-06, "loss": 2.0705, "step": 23325 }, { "epoch": 0.78, "grad_norm": 0.7723227143287659, "learning_rate": 2.451908665956724e-06, "loss": 2.0597, "step": 23326 }, { "epoch": 0.78, "grad_norm": 0.7338346838951111, "learning_rate": 2.4512115098310563e-06, "loss": 2.0574, "step": 23327 }, { "epoch": 0.78, "grad_norm": 0.7396935224533081, "learning_rate": 2.450514438987178e-06, "loss": 2.1167, "step": 23328 }, { "epoch": 0.78, "grad_norm": 0.7737733721733093, "learning_rate": 2.4498174534329667e-06, "loss": 1.9939, "step": 23329 }, { "epoch": 0.78, "grad_norm": 0.7365962266921997, "learning_rate": 2.449120553176292e-06, "loss": 2.024, "step": 23330 }, { "epoch": 0.78, "grad_norm": 0.7541148662567139, "learning_rate": 2.4484237382250254e-06, "loss": 1.9948, "step": 23331 }, { "epoch": 0.78, "grad_norm": 0.7303698658943176, "learning_rate": 2.4477270085870442e-06, "loss": 1.9956, "step": 23332 }, { "epoch": 0.78, "grad_norm": 0.7327603101730347, "learning_rate": 2.4470303642702154e-06, "loss": 1.9941, "step": 23333 }, { "epoch": 0.78, "grad_norm": 0.7330635786056519, "learning_rate": 2.4463338052824125e-06, "loss": 2.0497, "step": 23334 }, { "epoch": 0.78, "grad_norm": 0.7537268400192261, "learning_rate": 2.4456373316315053e-06, "loss": 2.0435, "step": 23335 }, { "epoch": 0.78, "grad_norm": 0.7413952946662903, "learning_rate": 2.4449409433253556e-06, "loss": 2.0638, "step": 23336 }, { "epoch": 0.78, "grad_norm": 0.7319790124893188, "learning_rate": 2.4442446403718356e-06, "loss": 2.0423, "step": 23337 }, { "epoch": 0.78, "grad_norm": 0.7612069249153137, "learning_rate": 2.443548422778815e-06, "loss": 1.9948, "step": 23338 }, { "epoch": 0.78, "grad_norm": 0.7573962807655334, "learning_rate": 2.4428522905541564e-06, "loss": 1.9124, "step": 23339 }, { "epoch": 0.78, "grad_norm": 0.7579341530799866, "learning_rate": 2.44215624370572e-06, "loss": 2.0787, "step": 23340 }, { "epoch": 0.78, "grad_norm": 0.7847955822944641, "learning_rate": 2.441460282241376e-06, "loss": 2.0189, "step": 23341 }, { "epoch": 0.78, "grad_norm": 0.7587438821792603, "learning_rate": 2.440764406168981e-06, "loss": 2.058, "step": 23342 }, { "epoch": 0.78, "grad_norm": 0.7209582924842834, "learning_rate": 2.4400686154964027e-06, "loss": 2.0353, "step": 23343 }, { "epoch": 0.78, "grad_norm": 0.7249478697776794, "learning_rate": 2.4393729102314955e-06, "loss": 2.0482, "step": 23344 }, { "epoch": 0.78, "grad_norm": 0.7305390238761902, "learning_rate": 2.4386772903821254e-06, "loss": 2.08, "step": 23345 }, { "epoch": 0.78, "grad_norm": 0.7638789415359497, "learning_rate": 2.4379817559561445e-06, "loss": 2.0371, "step": 23346 }, { "epoch": 0.78, "grad_norm": 0.7391451597213745, "learning_rate": 2.437286306961417e-06, "loss": 2.0086, "step": 23347 }, { "epoch": 0.78, "grad_norm": 0.7408673763275146, "learning_rate": 2.4365909434057978e-06, "loss": 2.0424, "step": 23348 }, { "epoch": 0.78, "grad_norm": 0.750360906124115, "learning_rate": 2.4358956652971367e-06, "loss": 2.051, "step": 23349 }, { "epoch": 0.78, "grad_norm": 0.7405845522880554, "learning_rate": 2.4352004726432944e-06, "loss": 2.1148, "step": 23350 }, { "epoch": 0.78, "grad_norm": 0.7701925039291382, "learning_rate": 2.4345053654521267e-06, "loss": 2.092, "step": 23351 }, { "epoch": 0.78, "grad_norm": 0.7555758357048035, "learning_rate": 2.433810343731483e-06, "loss": 2.0093, "step": 23352 }, { "epoch": 0.78, "grad_norm": 0.7479721903800964, "learning_rate": 2.4331154074892138e-06, "loss": 2.0814, "step": 23353 }, { "epoch": 0.78, "grad_norm": 0.7460130453109741, "learning_rate": 2.432420556733175e-06, "loss": 1.9991, "step": 23354 }, { "epoch": 0.78, "grad_norm": 0.732802152633667, "learning_rate": 2.43172579147121e-06, "loss": 2.0471, "step": 23355 }, { "epoch": 0.78, "grad_norm": 0.7451066970825195, "learning_rate": 2.431031111711175e-06, "loss": 2.0138, "step": 23356 }, { "epoch": 0.78, "grad_norm": 0.7391600012779236, "learning_rate": 2.430336517460914e-06, "loss": 2.0588, "step": 23357 }, { "epoch": 0.78, "grad_norm": 0.7460057735443115, "learning_rate": 2.4296420087282725e-06, "loss": 2.1024, "step": 23358 }, { "epoch": 0.78, "grad_norm": 0.7350541353225708, "learning_rate": 2.4289475855210988e-06, "loss": 2.0431, "step": 23359 }, { "epoch": 0.78, "grad_norm": 0.7730778455734253, "learning_rate": 2.428253247847241e-06, "loss": 2.0329, "step": 23360 }, { "epoch": 0.78, "grad_norm": 0.7575381994247437, "learning_rate": 2.4275589957145408e-06, "loss": 2.0201, "step": 23361 }, { "epoch": 0.78, "grad_norm": 0.7797570824623108, "learning_rate": 2.4268648291308384e-06, "loss": 2.0699, "step": 23362 }, { "epoch": 0.78, "grad_norm": 0.7381778955459595, "learning_rate": 2.426170748103981e-06, "loss": 1.9464, "step": 23363 }, { "epoch": 0.78, "grad_norm": 0.7415148615837097, "learning_rate": 2.4254767526418056e-06, "loss": 1.9797, "step": 23364 }, { "epoch": 0.78, "grad_norm": 0.7837454676628113, "learning_rate": 2.424782842752157e-06, "loss": 2.0331, "step": 23365 }, { "epoch": 0.78, "grad_norm": 0.7485068440437317, "learning_rate": 2.42408901844287e-06, "loss": 2.0165, "step": 23366 }, { "epoch": 0.78, "grad_norm": 0.7541255950927734, "learning_rate": 2.4233952797217876e-06, "loss": 2.0068, "step": 23367 }, { "epoch": 0.78, "grad_norm": 0.7533348202705383, "learning_rate": 2.422701626596743e-06, "loss": 1.9922, "step": 23368 }, { "epoch": 0.78, "grad_norm": 0.7371532320976257, "learning_rate": 2.422008059075577e-06, "loss": 2.0418, "step": 23369 }, { "epoch": 0.78, "grad_norm": 0.7413358092308044, "learning_rate": 2.421314577166123e-06, "loss": 2.067, "step": 23370 }, { "epoch": 0.78, "grad_norm": 0.7418953776359558, "learning_rate": 2.4206211808762127e-06, "loss": 2.0421, "step": 23371 }, { "epoch": 0.78, "grad_norm": 0.7781485319137573, "learning_rate": 2.419927870213682e-06, "loss": 2.1183, "step": 23372 }, { "epoch": 0.78, "grad_norm": 0.7504087686538696, "learning_rate": 2.419234645186367e-06, "loss": 2.0605, "step": 23373 }, { "epoch": 0.78, "grad_norm": 0.7601073384284973, "learning_rate": 2.4185415058020956e-06, "loss": 2.0531, "step": 23374 }, { "epoch": 0.78, "grad_norm": 0.7696033120155334, "learning_rate": 2.417848452068696e-06, "loss": 2.0622, "step": 23375 }, { "epoch": 0.78, "grad_norm": 0.770108163356781, "learning_rate": 2.417155483994005e-06, "loss": 2.0496, "step": 23376 }, { "epoch": 0.78, "grad_norm": 0.7269140481948853, "learning_rate": 2.416462601585844e-06, "loss": 2.0671, "step": 23377 }, { "epoch": 0.78, "grad_norm": 0.738740861415863, "learning_rate": 2.4157698048520473e-06, "loss": 2.0339, "step": 23378 }, { "epoch": 0.78, "grad_norm": 0.7243169546127319, "learning_rate": 2.4150770938004374e-06, "loss": 2.0766, "step": 23379 }, { "epoch": 0.78, "grad_norm": 0.7414936423301697, "learning_rate": 2.4143844684388394e-06, "loss": 1.996, "step": 23380 }, { "epoch": 0.78, "grad_norm": 0.7001222968101501, "learning_rate": 2.4136919287750803e-06, "loss": 2.0642, "step": 23381 }, { "epoch": 0.78, "grad_norm": 0.7368388772010803, "learning_rate": 2.412999474816986e-06, "loss": 2.0173, "step": 23382 }, { "epoch": 0.78, "grad_norm": 0.752288281917572, "learning_rate": 2.412307106572378e-06, "loss": 2.0088, "step": 23383 }, { "epoch": 0.78, "grad_norm": 0.7396050095558167, "learning_rate": 2.4116148240490745e-06, "loss": 2.0576, "step": 23384 }, { "epoch": 0.78, "grad_norm": 0.7667602896690369, "learning_rate": 2.4109226272549015e-06, "loss": 2.0893, "step": 23385 }, { "epoch": 0.78, "grad_norm": 0.7315330505371094, "learning_rate": 2.4102305161976746e-06, "loss": 1.9714, "step": 23386 }, { "epoch": 0.78, "grad_norm": 0.7568930983543396, "learning_rate": 2.409538490885218e-06, "loss": 2.0445, "step": 23387 }, { "epoch": 0.78, "grad_norm": 0.748307466506958, "learning_rate": 2.4088465513253446e-06, "loss": 2.0716, "step": 23388 }, { "epoch": 0.78, "grad_norm": 0.710921585559845, "learning_rate": 2.408154697525876e-06, "loss": 2.0552, "step": 23389 }, { "epoch": 0.78, "grad_norm": 0.7548015713691711, "learning_rate": 2.407462929494625e-06, "loss": 2.0957, "step": 23390 }, { "epoch": 0.78, "grad_norm": 0.733204185962677, "learning_rate": 2.40677124723941e-06, "loss": 1.9471, "step": 23391 }, { "epoch": 0.78, "grad_norm": 0.7375377416610718, "learning_rate": 2.406079650768044e-06, "loss": 2.1176, "step": 23392 }, { "epoch": 0.78, "grad_norm": 0.7408237457275391, "learning_rate": 2.4053881400883363e-06, "loss": 1.9548, "step": 23393 }, { "epoch": 0.78, "grad_norm": 0.7223044633865356, "learning_rate": 2.4046967152081025e-06, "loss": 2.0716, "step": 23394 }, { "epoch": 0.78, "grad_norm": 0.7372767329216003, "learning_rate": 2.4040053761351566e-06, "loss": 2.0356, "step": 23395 }, { "epoch": 0.78, "grad_norm": 0.7458848357200623, "learning_rate": 2.4033141228773073e-06, "loss": 2.0837, "step": 23396 }, { "epoch": 0.78, "grad_norm": 0.7781601548194885, "learning_rate": 2.4026229554423588e-06, "loss": 1.9209, "step": 23397 }, { "epoch": 0.78, "grad_norm": 0.756543755531311, "learning_rate": 2.401931873838127e-06, "loss": 1.9509, "step": 23398 }, { "epoch": 0.78, "grad_norm": 0.7493532299995422, "learning_rate": 2.4012408780724127e-06, "loss": 2.0678, "step": 23399 }, { "epoch": 0.78, "grad_norm": 0.76082843542099, "learning_rate": 2.4005499681530264e-06, "loss": 2.1175, "step": 23400 }, { "epoch": 0.78, "grad_norm": 0.7614288926124573, "learning_rate": 2.399859144087778e-06, "loss": 2.0424, "step": 23401 }, { "epoch": 0.78, "grad_norm": 0.748488187789917, "learning_rate": 2.3991684058844624e-06, "loss": 2.0088, "step": 23402 }, { "epoch": 0.78, "grad_norm": 0.729911744594574, "learning_rate": 2.398477753550886e-06, "loss": 2.03, "step": 23403 }, { "epoch": 0.78, "grad_norm": 0.7355022430419922, "learning_rate": 2.3977871870948566e-06, "loss": 2.0538, "step": 23404 }, { "epoch": 0.78, "grad_norm": 0.7184388041496277, "learning_rate": 2.3970967065241724e-06, "loss": 2.0912, "step": 23405 }, { "epoch": 0.78, "grad_norm": 0.7494268417358398, "learning_rate": 2.3964063118466308e-06, "loss": 2.0468, "step": 23406 }, { "epoch": 0.78, "grad_norm": 0.7325255274772644, "learning_rate": 2.3957160030700364e-06, "loss": 2.1015, "step": 23407 }, { "epoch": 0.78, "grad_norm": 0.7660855650901794, "learning_rate": 2.395025780202185e-06, "loss": 2.0885, "step": 23408 }, { "epoch": 0.78, "grad_norm": 0.7530097961425781, "learning_rate": 2.3943356432508767e-06, "loss": 2.0354, "step": 23409 }, { "epoch": 0.78, "grad_norm": 0.7253831028938293, "learning_rate": 2.3936455922239056e-06, "loss": 1.9908, "step": 23410 }, { "epoch": 0.78, "grad_norm": 0.7819858193397522, "learning_rate": 2.39295562712907e-06, "loss": 2.0413, "step": 23411 }, { "epoch": 0.78, "grad_norm": 0.7732041478157043, "learning_rate": 2.392265747974162e-06, "loss": 2.0594, "step": 23412 }, { "epoch": 0.78, "grad_norm": 0.7459157109260559, "learning_rate": 2.391575954766977e-06, "loss": 2.0722, "step": 23413 }, { "epoch": 0.78, "grad_norm": 0.7450140118598938, "learning_rate": 2.390886247515313e-06, "loss": 2.0477, "step": 23414 }, { "epoch": 0.78, "grad_norm": 0.7548434138298035, "learning_rate": 2.3901966262269505e-06, "loss": 2.0879, "step": 23415 }, { "epoch": 0.78, "grad_norm": 0.7473630905151367, "learning_rate": 2.3895070909096887e-06, "loss": 2.0838, "step": 23416 }, { "epoch": 0.78, "grad_norm": 0.9641106128692627, "learning_rate": 2.388817641571316e-06, "loss": 2.0609, "step": 23417 }, { "epoch": 0.78, "grad_norm": 0.7401993274688721, "learning_rate": 2.3881282782196224e-06, "loss": 1.972, "step": 23418 }, { "epoch": 0.78, "grad_norm": 0.7498778700828552, "learning_rate": 2.3874390008623916e-06, "loss": 2.0879, "step": 23419 }, { "epoch": 0.78, "grad_norm": 0.7570474147796631, "learning_rate": 2.386749809507417e-06, "loss": 2.0973, "step": 23420 }, { "epoch": 0.78, "grad_norm": 0.7704286575317383, "learning_rate": 2.386060704162477e-06, "loss": 2.0625, "step": 23421 }, { "epoch": 0.78, "grad_norm": 0.7458311915397644, "learning_rate": 2.3853716848353624e-06, "loss": 2.1214, "step": 23422 }, { "epoch": 0.78, "grad_norm": 0.7479351758956909, "learning_rate": 2.384682751533861e-06, "loss": 1.9162, "step": 23423 }, { "epoch": 0.78, "grad_norm": 0.7799244523048401, "learning_rate": 2.3839939042657446e-06, "loss": 2.0325, "step": 23424 }, { "epoch": 0.78, "grad_norm": 0.737411618232727, "learning_rate": 2.383305143038802e-06, "loss": 2.0586, "step": 23425 }, { "epoch": 0.78, "grad_norm": 0.7311658263206482, "learning_rate": 2.3826164678608167e-06, "loss": 2.0939, "step": 23426 }, { "epoch": 0.78, "grad_norm": 0.7360967993736267, "learning_rate": 2.381927878739567e-06, "loss": 2.0271, "step": 23427 }, { "epoch": 0.78, "grad_norm": 0.7492108345031738, "learning_rate": 2.381239375682828e-06, "loss": 2.0716, "step": 23428 }, { "epoch": 0.78, "grad_norm": 0.7420879006385803, "learning_rate": 2.380550958698382e-06, "loss": 2.0557, "step": 23429 }, { "epoch": 0.78, "grad_norm": 0.7333759665489197, "learning_rate": 2.3798626277940086e-06, "loss": 2.1041, "step": 23430 }, { "epoch": 0.78, "grad_norm": 0.7620464563369751, "learning_rate": 2.379174382977478e-06, "loss": 2.0313, "step": 23431 }, { "epoch": 0.78, "grad_norm": 0.7438222169876099, "learning_rate": 2.378486224256572e-06, "loss": 2.0513, "step": 23432 }, { "epoch": 0.78, "grad_norm": 0.7130557894706726, "learning_rate": 2.3777981516390623e-06, "loss": 1.9545, "step": 23433 }, { "epoch": 0.78, "grad_norm": 0.7504770159721375, "learning_rate": 2.37711016513272e-06, "loss": 2.0279, "step": 23434 }, { "epoch": 0.78, "grad_norm": 0.7847603559494019, "learning_rate": 2.3764222647453184e-06, "loss": 1.9978, "step": 23435 }, { "epoch": 0.78, "grad_norm": 0.7502672076225281, "learning_rate": 2.3757344504846356e-06, "loss": 2.0158, "step": 23436 }, { "epoch": 0.78, "grad_norm": 0.7489727735519409, "learning_rate": 2.3750467223584317e-06, "loss": 2.0997, "step": 23437 }, { "epoch": 0.78, "grad_norm": 0.7641331553459167, "learning_rate": 2.374359080374482e-06, "loss": 2.0409, "step": 23438 }, { "epoch": 0.78, "grad_norm": 0.756416916847229, "learning_rate": 2.373671524540556e-06, "loss": 2.0563, "step": 23439 }, { "epoch": 0.78, "grad_norm": 0.7475770115852356, "learning_rate": 2.37298405486442e-06, "loss": 2.0735, "step": 23440 }, { "epoch": 0.78, "grad_norm": 0.7600544095039368, "learning_rate": 2.3722966713538375e-06, "loss": 2.0694, "step": 23441 }, { "epoch": 0.78, "grad_norm": 0.7446238398551941, "learning_rate": 2.37160937401658e-06, "loss": 2.0503, "step": 23442 }, { "epoch": 0.78, "grad_norm": 0.8005606532096863, "learning_rate": 2.370922162860406e-06, "loss": 2.0794, "step": 23443 }, { "epoch": 0.78, "grad_norm": 0.7287567257881165, "learning_rate": 2.370235037893083e-06, "loss": 2.0188, "step": 23444 }, { "epoch": 0.78, "grad_norm": 0.7734804153442383, "learning_rate": 2.3695479991223747e-06, "loss": 2.0343, "step": 23445 }, { "epoch": 0.78, "grad_norm": 0.7170252799987793, "learning_rate": 2.3688610465560414e-06, "loss": 2.0584, "step": 23446 }, { "epoch": 0.78, "grad_norm": 0.7365675568580627, "learning_rate": 2.368174180201841e-06, "loss": 2.0675, "step": 23447 }, { "epoch": 0.78, "grad_norm": 0.7450243830680847, "learning_rate": 2.3674874000675397e-06, "loss": 2.0613, "step": 23448 }, { "epoch": 0.78, "grad_norm": 0.7802800536155701, "learning_rate": 2.3668007061608924e-06, "loss": 2.1809, "step": 23449 }, { "epoch": 0.78, "grad_norm": 0.7641650438308716, "learning_rate": 2.3661140984896534e-06, "loss": 2.0813, "step": 23450 }, { "epoch": 0.78, "grad_norm": 0.739465057849884, "learning_rate": 2.365427577061584e-06, "loss": 2.0677, "step": 23451 }, { "epoch": 0.78, "grad_norm": 0.7329521775245667, "learning_rate": 2.364741141884442e-06, "loss": 2.0213, "step": 23452 }, { "epoch": 0.78, "grad_norm": 0.7570412158966064, "learning_rate": 2.3640547929659787e-06, "loss": 2.0032, "step": 23453 }, { "epoch": 0.78, "grad_norm": 0.7617425918579102, "learning_rate": 2.3633685303139507e-06, "loss": 2.0221, "step": 23454 }, { "epoch": 0.78, "grad_norm": 0.7156916260719299, "learning_rate": 2.36268235393611e-06, "loss": 1.989, "step": 23455 }, { "epoch": 0.78, "grad_norm": 0.7371447086334229, "learning_rate": 2.361996263840205e-06, "loss": 2.0197, "step": 23456 }, { "epoch": 0.78, "grad_norm": 0.7214310169219971, "learning_rate": 2.36131026003399e-06, "loss": 2.0602, "step": 23457 }, { "epoch": 0.78, "grad_norm": 0.7453267574310303, "learning_rate": 2.3606243425252196e-06, "loss": 2.0373, "step": 23458 }, { "epoch": 0.78, "grad_norm": 0.7647746801376343, "learning_rate": 2.3599385113216346e-06, "loss": 2.0568, "step": 23459 }, { "epoch": 0.78, "grad_norm": 0.7448199391365051, "learning_rate": 2.359252766430985e-06, "loss": 2.088, "step": 23460 }, { "epoch": 0.78, "grad_norm": 0.7889588475227356, "learning_rate": 2.3585671078610238e-06, "loss": 2.0462, "step": 23461 }, { "epoch": 0.78, "grad_norm": 0.754124641418457, "learning_rate": 2.3578815356194927e-06, "loss": 2.0746, "step": 23462 }, { "epoch": 0.78, "grad_norm": 0.7604997158050537, "learning_rate": 2.3571960497141344e-06, "loss": 2.0842, "step": 23463 }, { "epoch": 0.78, "grad_norm": 0.7162451148033142, "learning_rate": 2.356510650152698e-06, "loss": 2.0913, "step": 23464 }, { "epoch": 0.78, "grad_norm": 0.7356861233711243, "learning_rate": 2.355825336942923e-06, "loss": 2.0857, "step": 23465 }, { "epoch": 0.78, "grad_norm": 0.7419259548187256, "learning_rate": 2.355140110092553e-06, "loss": 2.0265, "step": 23466 }, { "epoch": 0.78, "grad_norm": 0.7603479027748108, "learning_rate": 2.354454969609333e-06, "loss": 2.0763, "step": 23467 }, { "epoch": 0.78, "grad_norm": 0.7977761626243591, "learning_rate": 2.3537699155009997e-06, "loss": 2.0737, "step": 23468 }, { "epoch": 0.78, "grad_norm": 0.7733137011528015, "learning_rate": 2.35308494777529e-06, "loss": 2.0103, "step": 23469 }, { "epoch": 0.78, "grad_norm": 0.7497432827949524, "learning_rate": 2.3524000664399482e-06, "loss": 2.0103, "step": 23470 }, { "epoch": 0.78, "grad_norm": 0.7742173671722412, "learning_rate": 2.351715271502708e-06, "loss": 2.0208, "step": 23471 }, { "epoch": 0.78, "grad_norm": 0.7715641260147095, "learning_rate": 2.351030562971304e-06, "loss": 2.156, "step": 23472 }, { "epoch": 0.78, "grad_norm": 0.7167426943778992, "learning_rate": 2.3503459408534725e-06, "loss": 2.0661, "step": 23473 }, { "epoch": 0.78, "grad_norm": 0.7894922494888306, "learning_rate": 2.3496614051569533e-06, "loss": 2.0157, "step": 23474 }, { "epoch": 0.78, "grad_norm": 0.7574504613876343, "learning_rate": 2.3489769558894738e-06, "loss": 2.0289, "step": 23475 }, { "epoch": 0.78, "grad_norm": 0.733540415763855, "learning_rate": 2.3482925930587707e-06, "loss": 2.0207, "step": 23476 }, { "epoch": 0.78, "grad_norm": 0.7561203241348267, "learning_rate": 2.3476083166725737e-06, "loss": 2.0243, "step": 23477 }, { "epoch": 0.78, "grad_norm": 0.7321612238883972, "learning_rate": 2.34692412673861e-06, "loss": 2.1282, "step": 23478 }, { "epoch": 0.78, "grad_norm": 0.7351642847061157, "learning_rate": 2.346240023264613e-06, "loss": 2.0434, "step": 23479 }, { "epoch": 0.78, "grad_norm": 0.7331467270851135, "learning_rate": 2.345556006258316e-06, "loss": 1.9734, "step": 23480 }, { "epoch": 0.78, "grad_norm": 0.7624969482421875, "learning_rate": 2.3448720757274368e-06, "loss": 2.1269, "step": 23481 }, { "epoch": 0.78, "grad_norm": 0.7439268827438354, "learning_rate": 2.3441882316797047e-06, "loss": 2.0016, "step": 23482 }, { "epoch": 0.78, "grad_norm": 0.7550613284111023, "learning_rate": 2.3435044741228507e-06, "loss": 2.0949, "step": 23483 }, { "epoch": 0.78, "grad_norm": 0.7514625191688538, "learning_rate": 2.342820803064594e-06, "loss": 2.0599, "step": 23484 }, { "epoch": 0.78, "grad_norm": 0.7443356513977051, "learning_rate": 2.342137218512662e-06, "loss": 1.9982, "step": 23485 }, { "epoch": 0.78, "grad_norm": 0.7426179647445679, "learning_rate": 2.3414537204747766e-06, "loss": 2.0104, "step": 23486 }, { "epoch": 0.78, "grad_norm": 0.7371115684509277, "learning_rate": 2.3407703089586553e-06, "loss": 2.0275, "step": 23487 }, { "epoch": 0.78, "grad_norm": 0.8575018644332886, "learning_rate": 2.340086983972023e-06, "loss": 2.0714, "step": 23488 }, { "epoch": 0.78, "grad_norm": 0.735481858253479, "learning_rate": 2.3394037455226015e-06, "loss": 2.0951, "step": 23489 }, { "epoch": 0.78, "grad_norm": 0.7563784122467041, "learning_rate": 2.338720593618107e-06, "loss": 2.0867, "step": 23490 }, { "epoch": 0.78, "grad_norm": 0.7404896020889282, "learning_rate": 2.338037528266254e-06, "loss": 2.0283, "step": 23491 }, { "epoch": 0.78, "grad_norm": 0.7541285157203674, "learning_rate": 2.337354549474765e-06, "loss": 2.0797, "step": 23492 }, { "epoch": 0.78, "grad_norm": 0.7350534200668335, "learning_rate": 2.3366716572513536e-06, "loss": 2.0432, "step": 23493 }, { "epoch": 0.78, "grad_norm": 0.7396516799926758, "learning_rate": 2.3359888516037334e-06, "loss": 2.0811, "step": 23494 }, { "epoch": 0.78, "grad_norm": 0.7381443977355957, "learning_rate": 2.3353061325396177e-06, "loss": 1.9687, "step": 23495 }, { "epoch": 0.78, "grad_norm": 0.728945255279541, "learning_rate": 2.334623500066725e-06, "loss": 2.0456, "step": 23496 }, { "epoch": 0.78, "grad_norm": 0.7696000337600708, "learning_rate": 2.3339409541927617e-06, "loss": 2.0247, "step": 23497 }, { "epoch": 0.78, "grad_norm": 0.7517894506454468, "learning_rate": 2.333258494925442e-06, "loss": 1.9846, "step": 23498 }, { "epoch": 0.78, "grad_norm": 0.7358246445655823, "learning_rate": 2.332576122272475e-06, "loss": 2.0646, "step": 23499 }, { "epoch": 0.78, "grad_norm": 0.8261101841926575, "learning_rate": 2.3318938362415676e-06, "loss": 1.9982, "step": 23500 }, { "epoch": 0.78, "grad_norm": 0.766869306564331, "learning_rate": 2.331211636840429e-06, "loss": 2.0694, "step": 23501 }, { "epoch": 0.78, "grad_norm": 0.7412298917770386, "learning_rate": 2.3305295240767724e-06, "loss": 2.0765, "step": 23502 }, { "epoch": 0.78, "grad_norm": 0.7593595385551453, "learning_rate": 2.329847497958293e-06, "loss": 2.0911, "step": 23503 }, { "epoch": 0.78, "grad_norm": 0.7239635586738586, "learning_rate": 2.329165558492702e-06, "loss": 2.0081, "step": 23504 }, { "epoch": 0.78, "grad_norm": 0.7685195803642273, "learning_rate": 2.328483705687705e-06, "loss": 2.082, "step": 23505 }, { "epoch": 0.78, "grad_norm": 0.75153648853302, "learning_rate": 2.3278019395510008e-06, "loss": 2.0658, "step": 23506 }, { "epoch": 0.78, "grad_norm": 0.7516559958457947, "learning_rate": 2.3271202600902966e-06, "loss": 2.0606, "step": 23507 }, { "epoch": 0.78, "grad_norm": 0.7454524040222168, "learning_rate": 2.326438667313291e-06, "loss": 2.1215, "step": 23508 }, { "epoch": 0.78, "grad_norm": 0.7594374418258667, "learning_rate": 2.3257571612276818e-06, "loss": 1.9674, "step": 23509 }, { "epoch": 0.78, "grad_norm": 0.7282988429069519, "learning_rate": 2.3250757418411698e-06, "loss": 2.0581, "step": 23510 }, { "epoch": 0.78, "grad_norm": 0.7189530730247498, "learning_rate": 2.3243944091614577e-06, "loss": 2.0032, "step": 23511 }, { "epoch": 0.78, "grad_norm": 0.7365098595619202, "learning_rate": 2.3237131631962383e-06, "loss": 2.0631, "step": 23512 }, { "epoch": 0.78, "grad_norm": 0.7153369188308716, "learning_rate": 2.3230320039532074e-06, "loss": 2.0036, "step": 23513 }, { "epoch": 0.78, "grad_norm": 0.7661045789718628, "learning_rate": 2.3223509314400637e-06, "loss": 2.0484, "step": 23514 }, { "epoch": 0.78, "grad_norm": 0.7179904580116272, "learning_rate": 2.3216699456644964e-06, "loss": 2.0522, "step": 23515 }, { "epoch": 0.78, "grad_norm": 0.7751089334487915, "learning_rate": 2.3209890466342055e-06, "loss": 2.0347, "step": 23516 }, { "epoch": 0.78, "grad_norm": 0.7579507827758789, "learning_rate": 2.320308234356877e-06, "loss": 2.0363, "step": 23517 }, { "epoch": 0.78, "grad_norm": 0.7656630873680115, "learning_rate": 2.319627508840209e-06, "loss": 2.0334, "step": 23518 }, { "epoch": 0.78, "grad_norm": 0.738566517829895, "learning_rate": 2.318946870091884e-06, "loss": 2.0511, "step": 23519 }, { "epoch": 0.78, "grad_norm": 0.7315531373023987, "learning_rate": 2.3182663181195997e-06, "loss": 2.039, "step": 23520 }, { "epoch": 0.78, "grad_norm": 0.7282350659370422, "learning_rate": 2.3175858529310404e-06, "loss": 1.9789, "step": 23521 }, { "epoch": 0.78, "grad_norm": 0.74435955286026, "learning_rate": 2.3169054745338903e-06, "loss": 1.9935, "step": 23522 }, { "epoch": 0.78, "grad_norm": 0.748498797416687, "learning_rate": 2.3162251829358397e-06, "loss": 2.0311, "step": 23523 }, { "epoch": 0.78, "grad_norm": 0.7749775052070618, "learning_rate": 2.315544978144579e-06, "loss": 2.0406, "step": 23524 }, { "epoch": 0.78, "grad_norm": 0.7482819557189941, "learning_rate": 2.3148648601677825e-06, "loss": 1.9789, "step": 23525 }, { "epoch": 0.78, "grad_norm": 0.729158878326416, "learning_rate": 2.3141848290131397e-06, "loss": 1.9937, "step": 23526 }, { "epoch": 0.78, "grad_norm": 0.7662271857261658, "learning_rate": 2.3135048846883344e-06, "loss": 1.9859, "step": 23527 }, { "epoch": 0.78, "grad_norm": 0.7428956627845764, "learning_rate": 2.3128250272010432e-06, "loss": 2.0283, "step": 23528 }, { "epoch": 0.78, "grad_norm": 0.7326934337615967, "learning_rate": 2.312145256558953e-06, "loss": 2.1012, "step": 23529 }, { "epoch": 0.78, "grad_norm": 0.7199211716651917, "learning_rate": 2.3114655727697364e-06, "loss": 2.0119, "step": 23530 }, { "epoch": 0.78, "grad_norm": 0.7496190071105957, "learning_rate": 2.31078597584108e-06, "loss": 2.0334, "step": 23531 }, { "epoch": 0.78, "grad_norm": 0.7249230146408081, "learning_rate": 2.3101064657806537e-06, "loss": 2.0073, "step": 23532 }, { "epoch": 0.78, "grad_norm": 0.7357186675071716, "learning_rate": 2.309427042596141e-06, "loss": 2.0029, "step": 23533 }, { "epoch": 0.78, "grad_norm": 0.7654830813407898, "learning_rate": 2.3087477062952135e-06, "loss": 2.0161, "step": 23534 }, { "epoch": 0.78, "grad_norm": 0.7393261790275574, "learning_rate": 2.308068456885545e-06, "loss": 2.0584, "step": 23535 }, { "epoch": 0.78, "grad_norm": 0.7441073656082153, "learning_rate": 2.3073892943748113e-06, "loss": 2.0306, "step": 23536 }, { "epoch": 0.78, "grad_norm": 0.7413317561149597, "learning_rate": 2.306710218770688e-06, "loss": 2.0593, "step": 23537 }, { "epoch": 0.78, "grad_norm": 0.7435477375984192, "learning_rate": 2.306031230080843e-06, "loss": 2.0113, "step": 23538 }, { "epoch": 0.78, "grad_norm": 0.7806531190872192, "learning_rate": 2.3053523283129455e-06, "loss": 2.0082, "step": 23539 }, { "epoch": 0.78, "grad_norm": 0.7768741846084595, "learning_rate": 2.304673513474671e-06, "loss": 2.0074, "step": 23540 }, { "epoch": 0.78, "grad_norm": 0.7511498928070068, "learning_rate": 2.303994785573682e-06, "loss": 2.0672, "step": 23541 }, { "epoch": 0.78, "grad_norm": 0.7799888849258423, "learning_rate": 2.303316144617653e-06, "loss": 2.1485, "step": 23542 }, { "epoch": 0.78, "grad_norm": 0.7495607733726501, "learning_rate": 2.302637590614247e-06, "loss": 2.1416, "step": 23543 }, { "epoch": 0.78, "grad_norm": 0.7572643160820007, "learning_rate": 2.301959123571128e-06, "loss": 1.9855, "step": 23544 }, { "epoch": 0.78, "grad_norm": 0.7390634417533875, "learning_rate": 2.301280743495964e-06, "loss": 2.1307, "step": 23545 }, { "epoch": 0.78, "grad_norm": 0.7172415256500244, "learning_rate": 2.3006024503964197e-06, "loss": 2.0289, "step": 23546 }, { "epoch": 0.78, "grad_norm": 0.7477096915245056, "learning_rate": 2.299924244280157e-06, "loss": 2.0893, "step": 23547 }, { "epoch": 0.78, "grad_norm": 0.7351077198982239, "learning_rate": 2.299246125154835e-06, "loss": 2.0521, "step": 23548 }, { "epoch": 0.78, "grad_norm": 0.7521158456802368, "learning_rate": 2.2985680930281207e-06, "loss": 2.031, "step": 23549 }, { "epoch": 0.78, "grad_norm": 0.7830166220664978, "learning_rate": 2.2978901479076665e-06, "loss": 2.1102, "step": 23550 }, { "epoch": 0.78, "grad_norm": 0.7598782181739807, "learning_rate": 2.2972122898011384e-06, "loss": 2.0407, "step": 23551 }, { "epoch": 0.78, "grad_norm": 0.7617616057395935, "learning_rate": 2.2965345187161892e-06, "loss": 1.9989, "step": 23552 }, { "epoch": 0.78, "grad_norm": 0.7692623734474182, "learning_rate": 2.2958568346604814e-06, "loss": 2.0405, "step": 23553 }, { "epoch": 0.78, "grad_norm": 0.7600765228271484, "learning_rate": 2.2951792376416648e-06, "loss": 2.0424, "step": 23554 }, { "epoch": 0.78, "grad_norm": 0.7529767751693726, "learning_rate": 2.294501727667401e-06, "loss": 2.0905, "step": 23555 }, { "epoch": 0.78, "grad_norm": 0.7281036376953125, "learning_rate": 2.2938243047453403e-06, "loss": 2.0321, "step": 23556 }, { "epoch": 0.78, "grad_norm": 0.7528734803199768, "learning_rate": 2.293146968883134e-06, "loss": 2.0315, "step": 23557 }, { "epoch": 0.78, "grad_norm": 0.735353946685791, "learning_rate": 2.292469720088436e-06, "loss": 2.0322, "step": 23558 }, { "epoch": 0.78, "grad_norm": 0.731687605381012, "learning_rate": 2.2917925583689016e-06, "loss": 2.0563, "step": 23559 }, { "epoch": 0.78, "grad_norm": 0.7748310565948486, "learning_rate": 2.291115483732177e-06, "loss": 2.0684, "step": 23560 }, { "epoch": 0.78, "grad_norm": 0.7436119318008423, "learning_rate": 2.2904384961859085e-06, "loss": 2.0256, "step": 23561 }, { "epoch": 0.78, "grad_norm": 0.7371118664741516, "learning_rate": 2.2897615957377507e-06, "loss": 2.0151, "step": 23562 }, { "epoch": 0.78, "grad_norm": 0.7507503628730774, "learning_rate": 2.2890847823953453e-06, "loss": 2.0297, "step": 23563 }, { "epoch": 0.78, "grad_norm": 0.7812598347663879, "learning_rate": 2.2884080561663437e-06, "loss": 2.0846, "step": 23564 }, { "epoch": 0.78, "grad_norm": 0.7584078311920166, "learning_rate": 2.2877314170583886e-06, "loss": 2.0885, "step": 23565 }, { "epoch": 0.78, "grad_norm": 0.7463155388832092, "learning_rate": 2.2870548650791215e-06, "loss": 2.0675, "step": 23566 }, { "epoch": 0.78, "grad_norm": 0.7308967709541321, "learning_rate": 2.2863784002361878e-06, "loss": 2.038, "step": 23567 }, { "epoch": 0.78, "grad_norm": 0.7545138001441956, "learning_rate": 2.2857020225372327e-06, "loss": 2.0659, "step": 23568 }, { "epoch": 0.78, "grad_norm": 0.7375462651252747, "learning_rate": 2.285025731989896e-06, "loss": 2.0525, "step": 23569 }, { "epoch": 0.78, "grad_norm": 0.7747974991798401, "learning_rate": 2.2843495286018135e-06, "loss": 2.0786, "step": 23570 }, { "epoch": 0.78, "grad_norm": 0.7513628602027893, "learning_rate": 2.2836734123806316e-06, "loss": 2.0756, "step": 23571 }, { "epoch": 0.78, "grad_norm": 0.755552351474762, "learning_rate": 2.2829973833339825e-06, "loss": 2.025, "step": 23572 }, { "epoch": 0.78, "grad_norm": 0.7485615611076355, "learning_rate": 2.2823214414695094e-06, "loss": 2.0224, "step": 23573 }, { "epoch": 0.78, "grad_norm": 0.7787623405456543, "learning_rate": 2.2816455867948416e-06, "loss": 1.9945, "step": 23574 }, { "epoch": 0.78, "grad_norm": 0.7569385766983032, "learning_rate": 2.2809698193176223e-06, "loss": 2.0853, "step": 23575 }, { "epoch": 0.78, "grad_norm": 0.7888135313987732, "learning_rate": 2.2802941390454793e-06, "loss": 2.1015, "step": 23576 }, { "epoch": 0.78, "grad_norm": 0.7930558919906616, "learning_rate": 2.2796185459860522e-06, "loss": 2.0597, "step": 23577 }, { "epoch": 0.78, "grad_norm": 0.7524420022964478, "learning_rate": 2.2789430401469693e-06, "loss": 2.0357, "step": 23578 }, { "epoch": 0.78, "grad_norm": 0.739093005657196, "learning_rate": 2.278267621535861e-06, "loss": 2.0106, "step": 23579 }, { "epoch": 0.78, "grad_norm": 0.7531952261924744, "learning_rate": 2.277592290160359e-06, "loss": 2.0562, "step": 23580 }, { "epoch": 0.78, "grad_norm": 0.7313003540039062, "learning_rate": 2.2769170460280965e-06, "loss": 2.0459, "step": 23581 }, { "epoch": 0.78, "grad_norm": 0.766413688659668, "learning_rate": 2.2762418891467e-06, "loss": 2.0402, "step": 23582 }, { "epoch": 0.78, "grad_norm": 0.7260028719902039, "learning_rate": 2.2755668195237924e-06, "loss": 2.0818, "step": 23583 }, { "epoch": 0.78, "grad_norm": 0.7636836767196655, "learning_rate": 2.274891837167006e-06, "loss": 2.1054, "step": 23584 }, { "epoch": 0.78, "grad_norm": 0.7263052463531494, "learning_rate": 2.274216942083962e-06, "loss": 2.071, "step": 23585 }, { "epoch": 0.78, "grad_norm": 0.7351335883140564, "learning_rate": 2.2735421342822903e-06, "loss": 2.0161, "step": 23586 }, { "epoch": 0.78, "grad_norm": 0.7323692440986633, "learning_rate": 2.2728674137696117e-06, "loss": 1.9184, "step": 23587 }, { "epoch": 0.78, "grad_norm": 0.75522780418396, "learning_rate": 2.272192780553546e-06, "loss": 2.0274, "step": 23588 }, { "epoch": 0.78, "grad_norm": 0.7576169967651367, "learning_rate": 2.2715182346417164e-06, "loss": 2.0378, "step": 23589 }, { "epoch": 0.78, "grad_norm": 0.738373875617981, "learning_rate": 2.270843776041748e-06, "loss": 2.0251, "step": 23590 }, { "epoch": 0.78, "grad_norm": 0.7415732145309448, "learning_rate": 2.2701694047612555e-06, "loss": 2.0661, "step": 23591 }, { "epoch": 0.78, "grad_norm": 0.7485771775245667, "learning_rate": 2.2694951208078574e-06, "loss": 2.0615, "step": 23592 }, { "epoch": 0.78, "grad_norm": 0.753497838973999, "learning_rate": 2.2688209241891758e-06, "loss": 1.9826, "step": 23593 }, { "epoch": 0.78, "grad_norm": 0.76187664270401, "learning_rate": 2.2681468149128217e-06, "loss": 2.1067, "step": 23594 }, { "epoch": 0.79, "grad_norm": 0.7242427468299866, "learning_rate": 2.267472792986415e-06, "loss": 2.0387, "step": 23595 }, { "epoch": 0.79, "grad_norm": 0.7302752733230591, "learning_rate": 2.2667988584175673e-06, "loss": 2.0757, "step": 23596 }, { "epoch": 0.79, "grad_norm": 0.7430997490882874, "learning_rate": 2.2661250112138966e-06, "loss": 2.065, "step": 23597 }, { "epoch": 0.79, "grad_norm": 0.7281623482704163, "learning_rate": 2.2654512513830105e-06, "loss": 2.0584, "step": 23598 }, { "epoch": 0.79, "grad_norm": 0.7423932552337646, "learning_rate": 2.2647775789325253e-06, "loss": 1.9977, "step": 23599 }, { "epoch": 0.79, "grad_norm": 0.7709879279136658, "learning_rate": 2.2641039938700503e-06, "loss": 2.0482, "step": 23600 }, { "epoch": 0.79, "grad_norm": 0.7609750032424927, "learning_rate": 2.263430496203193e-06, "loss": 2.091, "step": 23601 }, { "epoch": 0.79, "grad_norm": 0.7479986548423767, "learning_rate": 2.262757085939562e-06, "loss": 2.0246, "step": 23602 }, { "epoch": 0.79, "grad_norm": 0.7440741658210754, "learning_rate": 2.262083763086771e-06, "loss": 2.1033, "step": 23603 }, { "epoch": 0.79, "grad_norm": 0.7406244277954102, "learning_rate": 2.2614105276524223e-06, "loss": 2.0291, "step": 23604 }, { "epoch": 0.79, "grad_norm": 0.7464913725852966, "learning_rate": 2.260737379644119e-06, "loss": 2.0048, "step": 23605 }, { "epoch": 0.79, "grad_norm": 0.7784124612808228, "learning_rate": 2.260064319069473e-06, "loss": 1.992, "step": 23606 }, { "epoch": 0.79, "grad_norm": 0.7339638471603394, "learning_rate": 2.2593913459360804e-06, "loss": 2.071, "step": 23607 }, { "epoch": 0.79, "grad_norm": 0.7853075861930847, "learning_rate": 2.258718460251551e-06, "loss": 2.0552, "step": 23608 }, { "epoch": 0.79, "grad_norm": 0.7653780579566956, "learning_rate": 2.2580456620234836e-06, "loss": 2.0327, "step": 23609 }, { "epoch": 0.79, "grad_norm": 0.7785540223121643, "learning_rate": 2.2573729512594767e-06, "loss": 2.0599, "step": 23610 }, { "epoch": 0.79, "grad_norm": 0.7656276226043701, "learning_rate": 2.2567003279671316e-06, "loss": 2.1037, "step": 23611 }, { "epoch": 0.79, "grad_norm": 0.7550110816955566, "learning_rate": 2.2560277921540517e-06, "loss": 2.0342, "step": 23612 }, { "epoch": 0.79, "grad_norm": 0.7178660035133362, "learning_rate": 2.255355343827832e-06, "loss": 2.0894, "step": 23613 }, { "epoch": 0.79, "grad_norm": 0.774670422077179, "learning_rate": 2.2546829829960647e-06, "loss": 2.0018, "step": 23614 }, { "epoch": 0.79, "grad_norm": 0.7617074251174927, "learning_rate": 2.2540107096663533e-06, "loss": 2.0502, "step": 23615 }, { "epoch": 0.79, "grad_norm": 0.7612568736076355, "learning_rate": 2.253338523846287e-06, "loss": 2.0414, "step": 23616 }, { "epoch": 0.79, "grad_norm": 0.7606356739997864, "learning_rate": 2.2526664255434637e-06, "loss": 2.0436, "step": 23617 }, { "epoch": 0.79, "grad_norm": 0.7578970789909363, "learning_rate": 2.251994414765474e-06, "loss": 2.0382, "step": 23618 }, { "epoch": 0.79, "grad_norm": 0.7440611124038696, "learning_rate": 2.2513224915199117e-06, "loss": 2.1006, "step": 23619 }, { "epoch": 0.79, "grad_norm": 0.711390495300293, "learning_rate": 2.250650655814364e-06, "loss": 2.0406, "step": 23620 }, { "epoch": 0.79, "grad_norm": 0.7530195116996765, "learning_rate": 2.2499789076564237e-06, "loss": 2.042, "step": 23621 }, { "epoch": 0.79, "grad_norm": 0.7395023107528687, "learning_rate": 2.2493072470536857e-06, "loss": 2.0039, "step": 23622 }, { "epoch": 0.79, "grad_norm": 0.7774317860603333, "learning_rate": 2.248635674013726e-06, "loss": 2.0914, "step": 23623 }, { "epoch": 0.79, "grad_norm": 0.7292062640190125, "learning_rate": 2.2479641885441382e-06, "loss": 2.0086, "step": 23624 }, { "epoch": 0.79, "grad_norm": 0.7318323850631714, "learning_rate": 2.247292790652511e-06, "loss": 2.0655, "step": 23625 }, { "epoch": 0.79, "grad_norm": 0.7490631341934204, "learning_rate": 2.246621480346426e-06, "loss": 2.0168, "step": 23626 }, { "epoch": 0.79, "grad_norm": 0.7465776205062866, "learning_rate": 2.2459502576334634e-06, "loss": 2.0273, "step": 23627 }, { "epoch": 0.79, "grad_norm": 0.7453117370605469, "learning_rate": 2.2452791225212156e-06, "loss": 2.0067, "step": 23628 }, { "epoch": 0.79, "grad_norm": 0.7839908003807068, "learning_rate": 2.244608075017255e-06, "loss": 2.0539, "step": 23629 }, { "epoch": 0.79, "grad_norm": 0.7341722249984741, "learning_rate": 2.2439371151291677e-06, "loss": 2.0237, "step": 23630 }, { "epoch": 0.79, "grad_norm": 0.7251865863800049, "learning_rate": 2.243266242864537e-06, "loss": 2.0363, "step": 23631 }, { "epoch": 0.79, "grad_norm": 0.755972146987915, "learning_rate": 2.2425954582309374e-06, "loss": 2.0175, "step": 23632 }, { "epoch": 0.79, "grad_norm": 0.7262917160987854, "learning_rate": 2.2419247612359453e-06, "loss": 2.0502, "step": 23633 }, { "epoch": 0.79, "grad_norm": 0.7265647649765015, "learning_rate": 2.2412541518871445e-06, "loss": 2.0678, "step": 23634 }, { "epoch": 0.79, "grad_norm": 0.7309658527374268, "learning_rate": 2.2405836301921057e-06, "loss": 2.0521, "step": 23635 }, { "epoch": 0.79, "grad_norm": 0.7781156897544861, "learning_rate": 2.239913196158403e-06, "loss": 2.0395, "step": 23636 }, { "epoch": 0.79, "grad_norm": 0.7624852657318115, "learning_rate": 2.239242849793615e-06, "loss": 2.029, "step": 23637 }, { "epoch": 0.79, "grad_norm": 0.7409361004829407, "learning_rate": 2.2385725911053136e-06, "loss": 2.0583, "step": 23638 }, { "epoch": 0.79, "grad_norm": 0.7424526810646057, "learning_rate": 2.2379024201010715e-06, "loss": 2.0906, "step": 23639 }, { "epoch": 0.79, "grad_norm": 0.7443002462387085, "learning_rate": 2.237232336788455e-06, "loss": 2.0245, "step": 23640 }, { "epoch": 0.79, "grad_norm": 0.7554900050163269, "learning_rate": 2.2365623411750427e-06, "loss": 2.0864, "step": 23641 }, { "epoch": 0.79, "grad_norm": 0.7053746581077576, "learning_rate": 2.235892433268395e-06, "loss": 2.0403, "step": 23642 }, { "epoch": 0.79, "grad_norm": 0.7602192759513855, "learning_rate": 2.2352226130760847e-06, "loss": 2.0881, "step": 23643 }, { "epoch": 0.79, "grad_norm": 0.7537850141525269, "learning_rate": 2.234552880605685e-06, "loss": 2.0079, "step": 23644 }, { "epoch": 0.79, "grad_norm": 0.7397011518478394, "learning_rate": 2.23388323586475e-06, "loss": 1.9865, "step": 23645 }, { "epoch": 0.79, "grad_norm": 0.7676513195037842, "learning_rate": 2.2332136788608505e-06, "loss": 2.0262, "step": 23646 }, { "epoch": 0.79, "grad_norm": 0.7051846981048584, "learning_rate": 2.232544209601554e-06, "loss": 2.0205, "step": 23647 }, { "epoch": 0.79, "grad_norm": 0.761152982711792, "learning_rate": 2.2318748280944204e-06, "loss": 2.0273, "step": 23648 }, { "epoch": 0.79, "grad_norm": 0.7555580735206604, "learning_rate": 2.23120553434701e-06, "loss": 2.0543, "step": 23649 }, { "epoch": 0.79, "grad_norm": 0.7850946187973022, "learning_rate": 2.230536328366889e-06, "loss": 2.0376, "step": 23650 }, { "epoch": 0.79, "grad_norm": 0.753035843372345, "learning_rate": 2.2298672101616125e-06, "loss": 2.1189, "step": 23651 }, { "epoch": 0.79, "grad_norm": 0.7970600128173828, "learning_rate": 2.229198179738743e-06, "loss": 2.0299, "step": 23652 }, { "epoch": 0.79, "grad_norm": 0.7501860857009888, "learning_rate": 2.228529237105841e-06, "loss": 2.0395, "step": 23653 }, { "epoch": 0.79, "grad_norm": 0.7318435907363892, "learning_rate": 2.22786038227046e-06, "loss": 1.9822, "step": 23654 }, { "epoch": 0.79, "grad_norm": 0.7863469123840332, "learning_rate": 2.227191615240156e-06, "loss": 2.0715, "step": 23655 }, { "epoch": 0.79, "grad_norm": 0.7523480653762817, "learning_rate": 2.2265229360224883e-06, "loss": 2.0674, "step": 23656 }, { "epoch": 0.79, "grad_norm": 0.76875239610672, "learning_rate": 2.2258543446250092e-06, "loss": 2.0494, "step": 23657 }, { "epoch": 0.79, "grad_norm": 0.7588374614715576, "learning_rate": 2.2251858410552686e-06, "loss": 2.0795, "step": 23658 }, { "epoch": 0.79, "grad_norm": 0.7442683577537537, "learning_rate": 2.2245174253208214e-06, "loss": 1.9968, "step": 23659 }, { "epoch": 0.79, "grad_norm": 0.7446786165237427, "learning_rate": 2.2238490974292224e-06, "loss": 2.0726, "step": 23660 }, { "epoch": 0.79, "grad_norm": 0.7287999391555786, "learning_rate": 2.2231808573880165e-06, "loss": 2.0646, "step": 23661 }, { "epoch": 0.79, "grad_norm": 0.7380803823471069, "learning_rate": 2.222512705204758e-06, "loss": 2.1259, "step": 23662 }, { "epoch": 0.79, "grad_norm": 0.7377799153327942, "learning_rate": 2.221844640886993e-06, "loss": 2.0361, "step": 23663 }, { "epoch": 0.79, "grad_norm": 0.7310378551483154, "learning_rate": 2.221176664442266e-06, "loss": 2.0203, "step": 23664 }, { "epoch": 0.79, "grad_norm": 0.7672024369239807, "learning_rate": 2.220508775878126e-06, "loss": 2.0275, "step": 23665 }, { "epoch": 0.79, "grad_norm": 0.7604436874389648, "learning_rate": 2.2198409752021245e-06, "loss": 2.0541, "step": 23666 }, { "epoch": 0.79, "grad_norm": 0.7680114507675171, "learning_rate": 2.2191732624217954e-06, "loss": 2.0955, "step": 23667 }, { "epoch": 0.79, "grad_norm": 0.7599309682846069, "learning_rate": 2.2185056375446854e-06, "loss": 2.0192, "step": 23668 }, { "epoch": 0.79, "grad_norm": 0.7287400364875793, "learning_rate": 2.2178381005783413e-06, "loss": 2.0135, "step": 23669 }, { "epoch": 0.79, "grad_norm": 0.749250590801239, "learning_rate": 2.2171706515303016e-06, "loss": 2.0442, "step": 23670 }, { "epoch": 0.79, "grad_norm": 0.759213924407959, "learning_rate": 2.216503290408104e-06, "loss": 1.9958, "step": 23671 }, { "epoch": 0.79, "grad_norm": 0.7460319995880127, "learning_rate": 2.215836017219294e-06, "loss": 2.1775, "step": 23672 }, { "epoch": 0.79, "grad_norm": 0.7417365312576294, "learning_rate": 2.2151688319714037e-06, "loss": 2.0132, "step": 23673 }, { "epoch": 0.79, "grad_norm": 0.7209345102310181, "learning_rate": 2.214501734671973e-06, "loss": 2.1216, "step": 23674 }, { "epoch": 0.79, "grad_norm": 0.7342620491981506, "learning_rate": 2.213834725328542e-06, "loss": 2.0249, "step": 23675 }, { "epoch": 0.79, "grad_norm": 0.7343745827674866, "learning_rate": 2.213167803948644e-06, "loss": 1.9812, "step": 23676 }, { "epoch": 0.79, "grad_norm": 0.7436458468437195, "learning_rate": 2.212500970539808e-06, "loss": 1.9753, "step": 23677 }, { "epoch": 0.79, "grad_norm": 0.7423403263092041, "learning_rate": 2.211834225109576e-06, "loss": 2.0253, "step": 23678 }, { "epoch": 0.79, "grad_norm": 0.7360560894012451, "learning_rate": 2.2111675676654764e-06, "loss": 2.0582, "step": 23679 }, { "epoch": 0.79, "grad_norm": 0.7348859310150146, "learning_rate": 2.2105009982150395e-06, "loss": 2.0268, "step": 23680 }, { "epoch": 0.79, "grad_norm": 0.7292592525482178, "learning_rate": 2.209834516765795e-06, "loss": 1.9193, "step": 23681 }, { "epoch": 0.79, "grad_norm": 0.7509409189224243, "learning_rate": 2.2091681233252793e-06, "loss": 2.0115, "step": 23682 }, { "epoch": 0.79, "grad_norm": 0.7474276423454285, "learning_rate": 2.208501817901012e-06, "loss": 1.9757, "step": 23683 }, { "epoch": 0.79, "grad_norm": 0.7274956107139587, "learning_rate": 2.2078356005005285e-06, "loss": 2.0437, "step": 23684 }, { "epoch": 0.79, "grad_norm": 0.7594448328018188, "learning_rate": 2.2071694711313516e-06, "loss": 2.0406, "step": 23685 }, { "epoch": 0.79, "grad_norm": 0.7546924352645874, "learning_rate": 2.2065034298010035e-06, "loss": 2.0556, "step": 23686 }, { "epoch": 0.79, "grad_norm": 0.7417657375335693, "learning_rate": 2.2058374765170134e-06, "loss": 2.0704, "step": 23687 }, { "epoch": 0.79, "grad_norm": 0.7451030611991882, "learning_rate": 2.205171611286908e-06, "loss": 2.0421, "step": 23688 }, { "epoch": 0.79, "grad_norm": 0.750341534614563, "learning_rate": 2.2045058341182013e-06, "loss": 2.044, "step": 23689 }, { "epoch": 0.79, "grad_norm": 0.7353470921516418, "learning_rate": 2.2038401450184177e-06, "loss": 1.971, "step": 23690 }, { "epoch": 0.79, "grad_norm": 0.7611587047576904, "learning_rate": 2.2031745439950837e-06, "loss": 2.1121, "step": 23691 }, { "epoch": 0.79, "grad_norm": 0.7636519074440002, "learning_rate": 2.2025090310557097e-06, "loss": 2.0848, "step": 23692 }, { "epoch": 0.79, "grad_norm": 0.8071422576904297, "learning_rate": 2.201843606207823e-06, "loss": 2.0832, "step": 23693 }, { "epoch": 0.79, "grad_norm": 0.7979187369346619, "learning_rate": 2.2011782694589356e-06, "loss": 2.0846, "step": 23694 }, { "epoch": 0.79, "grad_norm": 0.7547232508659363, "learning_rate": 2.2005130208165636e-06, "loss": 2.0401, "step": 23695 }, { "epoch": 0.79, "grad_norm": 0.759938657283783, "learning_rate": 2.1998478602882255e-06, "loss": 2.1031, "step": 23696 }, { "epoch": 0.79, "grad_norm": 0.7636541724205017, "learning_rate": 2.1991827878814364e-06, "loss": 2.0809, "step": 23697 }, { "epoch": 0.79, "grad_norm": 0.722571611404419, "learning_rate": 2.1985178036037093e-06, "loss": 2.0154, "step": 23698 }, { "epoch": 0.79, "grad_norm": 0.7514976859092712, "learning_rate": 2.197852907462552e-06, "loss": 2.0722, "step": 23699 }, { "epoch": 0.79, "grad_norm": 0.748512327671051, "learning_rate": 2.1971880994654836e-06, "loss": 2.0605, "step": 23700 }, { "epoch": 0.79, "grad_norm": 0.7539935111999512, "learning_rate": 2.1965233796200114e-06, "loss": 2.0568, "step": 23701 }, { "epoch": 0.79, "grad_norm": 0.7448989152908325, "learning_rate": 2.195858747933641e-06, "loss": 2.1012, "step": 23702 }, { "epoch": 0.79, "grad_norm": 0.7734174132347107, "learning_rate": 2.1951942044138865e-06, "loss": 2.0843, "step": 23703 }, { "epoch": 0.79, "grad_norm": 0.7483270168304443, "learning_rate": 2.194529749068255e-06, "loss": 2.0506, "step": 23704 }, { "epoch": 0.79, "grad_norm": 0.7270799279212952, "learning_rate": 2.19386538190425e-06, "loss": 2.0559, "step": 23705 }, { "epoch": 0.79, "grad_norm": 0.7467303276062012, "learning_rate": 2.193201102929381e-06, "loss": 2.1433, "step": 23706 }, { "epoch": 0.79, "grad_norm": 0.7677479982376099, "learning_rate": 2.192536912151152e-06, "loss": 2.0396, "step": 23707 }, { "epoch": 0.79, "grad_norm": 0.7468218803405762, "learning_rate": 2.191872809577061e-06, "loss": 2.0162, "step": 23708 }, { "epoch": 0.79, "grad_norm": 0.754470705986023, "learning_rate": 2.1912087952146167e-06, "loss": 2.0498, "step": 23709 }, { "epoch": 0.79, "grad_norm": 0.7728621363639832, "learning_rate": 2.1905448690713236e-06, "loss": 2.0292, "step": 23710 }, { "epoch": 0.79, "grad_norm": 0.7209801077842712, "learning_rate": 2.1898810311546723e-06, "loss": 2.0394, "step": 23711 }, { "epoch": 0.79, "grad_norm": 0.7537707090377808, "learning_rate": 2.189217281472168e-06, "loss": 2.0933, "step": 23712 }, { "epoch": 0.79, "grad_norm": 0.7559280395507812, "learning_rate": 2.188553620031312e-06, "loss": 2.0732, "step": 23713 }, { "epoch": 0.79, "grad_norm": 0.7252349853515625, "learning_rate": 2.187890046839596e-06, "loss": 2.0368, "step": 23714 }, { "epoch": 0.79, "grad_norm": 0.7561346292495728, "learning_rate": 2.187226561904523e-06, "loss": 2.0321, "step": 23715 }, { "epoch": 0.79, "grad_norm": 0.7298838496208191, "learning_rate": 2.1865631652335863e-06, "loss": 1.9998, "step": 23716 }, { "epoch": 0.79, "grad_norm": 0.7268658876419067, "learning_rate": 2.185899856834276e-06, "loss": 2.0838, "step": 23717 }, { "epoch": 0.79, "grad_norm": 0.7621577978134155, "learning_rate": 2.185236636714091e-06, "loss": 2.042, "step": 23718 }, { "epoch": 0.79, "grad_norm": 0.7524171471595764, "learning_rate": 2.184573504880524e-06, "loss": 1.9774, "step": 23719 }, { "epoch": 0.79, "grad_norm": 0.7304760217666626, "learning_rate": 2.1839104613410655e-06, "loss": 2.0832, "step": 23720 }, { "epoch": 0.79, "grad_norm": 0.7413729429244995, "learning_rate": 2.183247506103203e-06, "loss": 2.0261, "step": 23721 }, { "epoch": 0.79, "grad_norm": 0.7955942153930664, "learning_rate": 2.18258463917443e-06, "loss": 2.0674, "step": 23722 }, { "epoch": 0.79, "grad_norm": 0.7688320875167847, "learning_rate": 2.1819218605622362e-06, "loss": 1.9601, "step": 23723 }, { "epoch": 0.79, "grad_norm": 0.7308465838432312, "learning_rate": 2.181259170274108e-06, "loss": 2.1032, "step": 23724 }, { "epoch": 0.79, "grad_norm": 0.7447122931480408, "learning_rate": 2.180596568317528e-06, "loss": 2.088, "step": 23725 }, { "epoch": 0.79, "grad_norm": 0.7433052062988281, "learning_rate": 2.179934054699989e-06, "loss": 2.0439, "step": 23726 }, { "epoch": 0.79, "grad_norm": 0.7689745426177979, "learning_rate": 2.1792716294289683e-06, "loss": 2.0674, "step": 23727 }, { "epoch": 0.79, "grad_norm": 0.7288118600845337, "learning_rate": 2.1786092925119573e-06, "loss": 2.0643, "step": 23728 }, { "epoch": 0.79, "grad_norm": 0.750744104385376, "learning_rate": 2.1779470439564345e-06, "loss": 2.0287, "step": 23729 }, { "epoch": 0.79, "grad_norm": 0.7266230583190918, "learning_rate": 2.1772848837698778e-06, "loss": 2.005, "step": 23730 }, { "epoch": 0.79, "grad_norm": 0.7595853805541992, "learning_rate": 2.1766228119597733e-06, "loss": 2.0618, "step": 23731 }, { "epoch": 0.79, "grad_norm": 0.7518106698989868, "learning_rate": 2.175960828533601e-06, "loss": 1.998, "step": 23732 }, { "epoch": 0.79, "grad_norm": 0.7265350818634033, "learning_rate": 2.1752989334988385e-06, "loss": 2.0376, "step": 23733 }, { "epoch": 0.79, "grad_norm": 0.7308914661407471, "learning_rate": 2.1746371268629594e-06, "loss": 2.0497, "step": 23734 }, { "epoch": 0.79, "grad_norm": 0.769344687461853, "learning_rate": 2.1739754086334477e-06, "loss": 2.0446, "step": 23735 }, { "epoch": 0.79, "grad_norm": 0.7268562316894531, "learning_rate": 2.173313778817773e-06, "loss": 2.018, "step": 23736 }, { "epoch": 0.79, "grad_norm": 0.7512324452400208, "learning_rate": 2.172652237423414e-06, "loss": 2.0167, "step": 23737 }, { "epoch": 0.79, "grad_norm": 0.7773889303207397, "learning_rate": 2.17199078445784e-06, "loss": 2.0793, "step": 23738 }, { "epoch": 0.79, "grad_norm": 0.7406313419342041, "learning_rate": 2.1713294199285293e-06, "loss": 2.0244, "step": 23739 }, { "epoch": 0.79, "grad_norm": 0.7373641133308411, "learning_rate": 2.170668143842949e-06, "loss": 1.973, "step": 23740 }, { "epoch": 0.79, "grad_norm": 0.7507232427597046, "learning_rate": 2.1700069562085736e-06, "loss": 2.0154, "step": 23741 }, { "epoch": 0.79, "grad_norm": 0.7756134867668152, "learning_rate": 2.1693458570328707e-06, "loss": 2.0568, "step": 23742 }, { "epoch": 0.79, "grad_norm": 0.7778127193450928, "learning_rate": 2.1686848463233057e-06, "loss": 2.099, "step": 23743 }, { "epoch": 0.79, "grad_norm": 0.76261967420578, "learning_rate": 2.16802392408735e-06, "loss": 2.0976, "step": 23744 }, { "epoch": 0.79, "grad_norm": 0.7695951461791992, "learning_rate": 2.167363090332474e-06, "loss": 2.0611, "step": 23745 }, { "epoch": 0.79, "grad_norm": 0.7367326617240906, "learning_rate": 2.1667023450661383e-06, "loss": 1.9828, "step": 23746 }, { "epoch": 0.79, "grad_norm": 0.7277584075927734, "learning_rate": 2.166041688295807e-06, "loss": 2.0213, "step": 23747 }, { "epoch": 0.79, "grad_norm": 0.7555390000343323, "learning_rate": 2.1653811200289467e-06, "loss": 2.0159, "step": 23748 }, { "epoch": 0.79, "grad_norm": 0.7677854895591736, "learning_rate": 2.164720640273017e-06, "loss": 2.0691, "step": 23749 }, { "epoch": 0.79, "grad_norm": 0.7738224267959595, "learning_rate": 2.1640602490354846e-06, "loss": 2.0039, "step": 23750 }, { "epoch": 0.79, "grad_norm": 0.7399836182594299, "learning_rate": 2.1633999463238075e-06, "loss": 1.9797, "step": 23751 }, { "epoch": 0.79, "grad_norm": 0.7431377172470093, "learning_rate": 2.1627397321454413e-06, "loss": 2.0607, "step": 23752 }, { "epoch": 0.79, "grad_norm": 0.7499891519546509, "learning_rate": 2.1620796065078496e-06, "loss": 2.0752, "step": 23753 }, { "epoch": 0.79, "grad_norm": 0.7329750657081604, "learning_rate": 2.1614195694184914e-06, "loss": 2.0212, "step": 23754 }, { "epoch": 0.79, "grad_norm": 0.7458094358444214, "learning_rate": 2.16075962088482e-06, "loss": 1.9728, "step": 23755 }, { "epoch": 0.79, "grad_norm": 0.7337527275085449, "learning_rate": 2.1600997609142914e-06, "loss": 2.1137, "step": 23756 }, { "epoch": 0.79, "grad_norm": 0.8071608543395996, "learning_rate": 2.1594399895143626e-06, "loss": 2.1056, "step": 23757 }, { "epoch": 0.79, "grad_norm": 0.7372292876243591, "learning_rate": 2.158780306692483e-06, "loss": 2.0795, "step": 23758 }, { "epoch": 0.79, "grad_norm": 0.7833998799324036, "learning_rate": 2.158120712456111e-06, "loss": 2.0649, "step": 23759 }, { "epoch": 0.79, "grad_norm": 0.7692787647247314, "learning_rate": 2.157461206812693e-06, "loss": 2.0633, "step": 23760 }, { "epoch": 0.79, "grad_norm": 0.7255768179893494, "learning_rate": 2.1568017897696847e-06, "loss": 2.0256, "step": 23761 }, { "epoch": 0.79, "grad_norm": 0.7694878578186035, "learning_rate": 2.1561424613345295e-06, "loss": 2.0442, "step": 23762 }, { "epoch": 0.79, "grad_norm": 0.7284373044967651, "learning_rate": 2.155483221514684e-06, "loss": 2.0622, "step": 23763 }, { "epoch": 0.79, "grad_norm": 0.7694969177246094, "learning_rate": 2.1548240703175903e-06, "loss": 2.0582, "step": 23764 }, { "epoch": 0.79, "grad_norm": 0.7884646654129028, "learning_rate": 2.1541650077506947e-06, "loss": 2.0272, "step": 23765 }, { "epoch": 0.79, "grad_norm": 0.7542934417724609, "learning_rate": 2.1535060338214453e-06, "loss": 2.1055, "step": 23766 }, { "epoch": 0.79, "grad_norm": 0.7202092409133911, "learning_rate": 2.152847148537288e-06, "loss": 2.0707, "step": 23767 }, { "epoch": 0.79, "grad_norm": 0.7285504341125488, "learning_rate": 2.152188351905665e-06, "loss": 1.9957, "step": 23768 }, { "epoch": 0.79, "grad_norm": 0.7719646096229553, "learning_rate": 2.151529643934016e-06, "loss": 2.1484, "step": 23769 }, { "epoch": 0.79, "grad_norm": 0.7532100081443787, "learning_rate": 2.150871024629788e-06, "loss": 2.0807, "step": 23770 }, { "epoch": 0.79, "grad_norm": 0.7245051264762878, "learning_rate": 2.1502124940004167e-06, "loss": 2.049, "step": 23771 }, { "epoch": 0.79, "grad_norm": 0.7317708134651184, "learning_rate": 2.1495540520533465e-06, "loss": 2.0418, "step": 23772 }, { "epoch": 0.79, "grad_norm": 0.7627023458480835, "learning_rate": 2.148895698796014e-06, "loss": 2.0688, "step": 23773 }, { "epoch": 0.79, "grad_norm": 0.7387031316757202, "learning_rate": 2.1482374342358547e-06, "loss": 2.0787, "step": 23774 }, { "epoch": 0.79, "grad_norm": 0.742496907711029, "learning_rate": 2.1475792583803067e-06, "loss": 2.116, "step": 23775 }, { "epoch": 0.79, "grad_norm": 0.7471238970756531, "learning_rate": 2.1469211712368088e-06, "loss": 2.0464, "step": 23776 }, { "epoch": 0.79, "grad_norm": 0.7389901280403137, "learning_rate": 2.1462631728127937e-06, "loss": 2.0708, "step": 23777 }, { "epoch": 0.79, "grad_norm": 0.7288243174552917, "learning_rate": 2.145605263115691e-06, "loss": 2.0862, "step": 23778 }, { "epoch": 0.79, "grad_norm": 0.732835590839386, "learning_rate": 2.14494744215294e-06, "loss": 2.0359, "step": 23779 }, { "epoch": 0.79, "grad_norm": 0.720294713973999, "learning_rate": 2.1442897099319673e-06, "loss": 2.0091, "step": 23780 }, { "epoch": 0.79, "grad_norm": 0.7516170144081116, "learning_rate": 2.143632066460207e-06, "loss": 2.0379, "step": 23781 }, { "epoch": 0.79, "grad_norm": 0.7479332089424133, "learning_rate": 2.142974511745085e-06, "loss": 1.9876, "step": 23782 }, { "epoch": 0.79, "grad_norm": 0.7255694270133972, "learning_rate": 2.1423170457940355e-06, "loss": 1.9969, "step": 23783 }, { "epoch": 0.79, "grad_norm": 0.7325586080551147, "learning_rate": 2.1416596686144796e-06, "loss": 2.0446, "step": 23784 }, { "epoch": 0.79, "grad_norm": 0.7238690257072449, "learning_rate": 2.1410023802138513e-06, "loss": 2.0215, "step": 23785 }, { "epoch": 0.79, "grad_norm": 0.7633205652236938, "learning_rate": 2.140345180599571e-06, "loss": 2.0511, "step": 23786 }, { "epoch": 0.79, "grad_norm": 0.7324872612953186, "learning_rate": 2.139688069779062e-06, "loss": 2.0091, "step": 23787 }, { "epoch": 0.79, "grad_norm": 0.7247252464294434, "learning_rate": 2.1390310477597507e-06, "loss": 2.0108, "step": 23788 }, { "epoch": 0.79, "grad_norm": 0.7717952132225037, "learning_rate": 2.1383741145490633e-06, "loss": 2.068, "step": 23789 }, { "epoch": 0.79, "grad_norm": 0.7341840863227844, "learning_rate": 2.1377172701544167e-06, "loss": 2.0133, "step": 23790 }, { "epoch": 0.79, "grad_norm": 0.7492052316665649, "learning_rate": 2.13706051458323e-06, "loss": 2.0768, "step": 23791 }, { "epoch": 0.79, "grad_norm": 0.71937096118927, "learning_rate": 2.1364038478429283e-06, "loss": 2.0228, "step": 23792 }, { "epoch": 0.79, "grad_norm": 0.777675986289978, "learning_rate": 2.1357472699409253e-06, "loss": 2.064, "step": 23793 }, { "epoch": 0.79, "grad_norm": 0.7439678907394409, "learning_rate": 2.1350907808846434e-06, "loss": 1.9987, "step": 23794 }, { "epoch": 0.79, "grad_norm": 0.7402308583259583, "learning_rate": 2.134434380681496e-06, "loss": 2.0568, "step": 23795 }, { "epoch": 0.79, "grad_norm": 0.7478187084197998, "learning_rate": 2.1337780693388964e-06, "loss": 2.058, "step": 23796 }, { "epoch": 0.79, "grad_norm": 0.7419453859329224, "learning_rate": 2.1331218468642622e-06, "loss": 2.0201, "step": 23797 }, { "epoch": 0.79, "grad_norm": 0.747994065284729, "learning_rate": 2.1324657132650107e-06, "loss": 2.0919, "step": 23798 }, { "epoch": 0.79, "grad_norm": 0.7557610273361206, "learning_rate": 2.13180966854855e-06, "loss": 2.0827, "step": 23799 }, { "epoch": 0.79, "grad_norm": 0.7382601499557495, "learning_rate": 2.1311537127222894e-06, "loss": 2.0355, "step": 23800 }, { "epoch": 0.79, "grad_norm": 0.7647411823272705, "learning_rate": 2.130497845793645e-06, "loss": 2.0151, "step": 23801 }, { "epoch": 0.79, "grad_norm": 0.7414058446884155, "learning_rate": 2.1298420677700226e-06, "loss": 2.0359, "step": 23802 }, { "epoch": 0.79, "grad_norm": 0.7301093339920044, "learning_rate": 2.129186378658834e-06, "loss": 2.0681, "step": 23803 }, { "epoch": 0.79, "grad_norm": 0.7572154402732849, "learning_rate": 2.1285307784674827e-06, "loss": 2.0703, "step": 23804 }, { "epoch": 0.79, "grad_norm": 0.7398231029510498, "learning_rate": 2.1278752672033787e-06, "loss": 2.042, "step": 23805 }, { "epoch": 0.79, "grad_norm": 0.7268497943878174, "learning_rate": 2.1272198448739255e-06, "loss": 2.0548, "step": 23806 }, { "epoch": 0.79, "grad_norm": 0.7275376915931702, "learning_rate": 2.1265645114865275e-06, "loss": 2.0329, "step": 23807 }, { "epoch": 0.79, "grad_norm": 0.7279407382011414, "learning_rate": 2.125909267048596e-06, "loss": 2.0143, "step": 23808 }, { "epoch": 0.79, "grad_norm": 0.7743759751319885, "learning_rate": 2.125254111567521e-06, "loss": 2.0332, "step": 23809 }, { "epoch": 0.79, "grad_norm": 0.7421111464500427, "learning_rate": 2.124599045050709e-06, "loss": 2.0219, "step": 23810 }, { "epoch": 0.79, "grad_norm": 0.7650473117828369, "learning_rate": 2.1239440675055643e-06, "loss": 2.0855, "step": 23811 }, { "epoch": 0.79, "grad_norm": 0.7282496094703674, "learning_rate": 2.123289178939485e-06, "loss": 1.9756, "step": 23812 }, { "epoch": 0.79, "grad_norm": 0.7483336925506592, "learning_rate": 2.1226343793598646e-06, "loss": 1.9975, "step": 23813 }, { "epoch": 0.79, "grad_norm": 0.7448161244392395, "learning_rate": 2.1219796687741078e-06, "loss": 2.0612, "step": 23814 }, { "epoch": 0.79, "grad_norm": 0.7480015754699707, "learning_rate": 2.121325047189605e-06, "loss": 2.034, "step": 23815 }, { "epoch": 0.79, "grad_norm": 0.7583790421485901, "learning_rate": 2.1206705146137574e-06, "loss": 2.0845, "step": 23816 }, { "epoch": 0.79, "grad_norm": 0.7382501363754272, "learning_rate": 2.120016071053955e-06, "loss": 2.0328, "step": 23817 }, { "epoch": 0.79, "grad_norm": 0.7349620461463928, "learning_rate": 2.119361716517592e-06, "loss": 2.047, "step": 23818 }, { "epoch": 0.79, "grad_norm": 0.7276656031608582, "learning_rate": 2.118707451012061e-06, "loss": 1.9819, "step": 23819 }, { "epoch": 0.79, "grad_norm": 0.7503131628036499, "learning_rate": 2.1180532745447568e-06, "loss": 2.0497, "step": 23820 }, { "epoch": 0.79, "grad_norm": 0.7267643809318542, "learning_rate": 2.1173991871230683e-06, "loss": 2.0598, "step": 23821 }, { "epoch": 0.79, "grad_norm": 0.7460064888000488, "learning_rate": 2.116745188754381e-06, "loss": 2.0343, "step": 23822 }, { "epoch": 0.79, "grad_norm": 0.7642011642456055, "learning_rate": 2.1160912794460863e-06, "loss": 1.9927, "step": 23823 }, { "epoch": 0.79, "grad_norm": 0.8003596067428589, "learning_rate": 2.115437459205575e-06, "loss": 2.0492, "step": 23824 }, { "epoch": 0.79, "grad_norm": 0.7773311734199524, "learning_rate": 2.1147837280402293e-06, "loss": 1.9894, "step": 23825 }, { "epoch": 0.79, "grad_norm": 0.7727688550949097, "learning_rate": 2.1141300859574344e-06, "loss": 2.0933, "step": 23826 }, { "epoch": 0.79, "grad_norm": 0.747826099395752, "learning_rate": 2.113476532964579e-06, "loss": 2.0333, "step": 23827 }, { "epoch": 0.79, "grad_norm": 0.742027759552002, "learning_rate": 2.11282306906904e-06, "loss": 2.0364, "step": 23828 }, { "epoch": 0.79, "grad_norm": 0.7422628402709961, "learning_rate": 2.1121696942782044e-06, "loss": 2.0187, "step": 23829 }, { "epoch": 0.79, "grad_norm": 0.7441149353981018, "learning_rate": 2.111516408599459e-06, "loss": 2.0868, "step": 23830 }, { "epoch": 0.79, "grad_norm": 0.7450019717216492, "learning_rate": 2.1108632120401718e-06, "loss": 2.0348, "step": 23831 }, { "epoch": 0.79, "grad_norm": 0.78092360496521, "learning_rate": 2.1102101046077283e-06, "loss": 2.0353, "step": 23832 }, { "epoch": 0.79, "grad_norm": 0.7648126482963562, "learning_rate": 2.1095570863095093e-06, "loss": 2.0767, "step": 23833 }, { "epoch": 0.79, "grad_norm": 0.7563747763633728, "learning_rate": 2.108904157152891e-06, "loss": 2.0619, "step": 23834 }, { "epoch": 0.79, "grad_norm": 0.7490023970603943, "learning_rate": 2.1082513171452468e-06, "loss": 2.0452, "step": 23835 }, { "epoch": 0.79, "grad_norm": 0.7652599811553955, "learning_rate": 2.1075985662939556e-06, "loss": 2.1034, "step": 23836 }, { "epoch": 0.79, "grad_norm": 0.7646187543869019, "learning_rate": 2.106945904606389e-06, "loss": 2.0556, "step": 23837 }, { "epoch": 0.79, "grad_norm": 0.7383307218551636, "learning_rate": 2.10629333208992e-06, "loss": 1.9899, "step": 23838 }, { "epoch": 0.79, "grad_norm": 0.7453235387802124, "learning_rate": 2.1056408487519274e-06, "loss": 2.0454, "step": 23839 }, { "epoch": 0.79, "grad_norm": 0.7448431849479675, "learning_rate": 2.104988454599777e-06, "loss": 2.0557, "step": 23840 }, { "epoch": 0.79, "grad_norm": 0.7670080065727234, "learning_rate": 2.1043361496408377e-06, "loss": 2.0013, "step": 23841 }, { "epoch": 0.79, "grad_norm": 0.7708730101585388, "learning_rate": 2.1036839338824846e-06, "loss": 2.0311, "step": 23842 }, { "epoch": 0.79, "grad_norm": 0.7660013437271118, "learning_rate": 2.103031807332081e-06, "loss": 2.0166, "step": 23843 }, { "epoch": 0.79, "grad_norm": 0.7123951315879822, "learning_rate": 2.102379769996994e-06, "loss": 2.0468, "step": 23844 }, { "epoch": 0.79, "grad_norm": 0.7426466345787048, "learning_rate": 2.1017278218845927e-06, "loss": 1.975, "step": 23845 }, { "epoch": 0.79, "grad_norm": 0.7411551475524902, "learning_rate": 2.1010759630022436e-06, "loss": 2.0212, "step": 23846 }, { "epoch": 0.79, "grad_norm": 0.7469449043273926, "learning_rate": 2.100424193357309e-06, "loss": 2.0405, "step": 23847 }, { "epoch": 0.79, "grad_norm": 0.7269944548606873, "learning_rate": 2.0997725129571502e-06, "loss": 2.0441, "step": 23848 }, { "epoch": 0.79, "grad_norm": 0.7457621097564697, "learning_rate": 2.0991209218091336e-06, "loss": 1.982, "step": 23849 }, { "epoch": 0.79, "grad_norm": 0.7549929022789001, "learning_rate": 2.0984694199206156e-06, "loss": 2.0134, "step": 23850 }, { "epoch": 0.79, "grad_norm": 0.7349221110343933, "learning_rate": 2.0978180072989597e-06, "loss": 2.021, "step": 23851 }, { "epoch": 0.79, "grad_norm": 0.7361078262329102, "learning_rate": 2.0971666839515305e-06, "loss": 2.0203, "step": 23852 }, { "epoch": 0.79, "grad_norm": 0.770860493183136, "learning_rate": 2.0965154498856744e-06, "loss": 2.0032, "step": 23853 }, { "epoch": 0.79, "grad_norm": 0.7455860376358032, "learning_rate": 2.0958643051087558e-06, "loss": 2.0389, "step": 23854 }, { "epoch": 0.79, "grad_norm": 0.7228385210037231, "learning_rate": 2.095213249628132e-06, "loss": 2.0126, "step": 23855 }, { "epoch": 0.79, "grad_norm": 0.7507182359695435, "learning_rate": 2.094562283451157e-06, "loss": 2.1085, "step": 23856 }, { "epoch": 0.79, "grad_norm": 0.7216410636901855, "learning_rate": 2.093911406585181e-06, "loss": 2.0554, "step": 23857 }, { "epoch": 0.79, "grad_norm": 0.7724502682685852, "learning_rate": 2.0932606190375624e-06, "loss": 2.0705, "step": 23858 }, { "epoch": 0.79, "grad_norm": 0.7391021251678467, "learning_rate": 2.0926099208156505e-06, "loss": 2.0404, "step": 23859 }, { "epoch": 0.79, "grad_norm": 0.7532942891120911, "learning_rate": 2.0919593119267967e-06, "loss": 1.9934, "step": 23860 }, { "epoch": 0.79, "grad_norm": 0.7816571593284607, "learning_rate": 2.0913087923783547e-06, "loss": 2.0617, "step": 23861 }, { "epoch": 0.79, "grad_norm": 0.7489032745361328, "learning_rate": 2.090658362177671e-06, "loss": 2.0111, "step": 23862 }, { "epoch": 0.79, "grad_norm": 0.7545264959335327, "learning_rate": 2.0900080213320904e-06, "loss": 2.004, "step": 23863 }, { "epoch": 0.79, "grad_norm": 0.7162925601005554, "learning_rate": 2.0893577698489674e-06, "loss": 2.013, "step": 23864 }, { "epoch": 0.79, "grad_norm": 0.7426259517669678, "learning_rate": 2.088707607735644e-06, "loss": 2.0845, "step": 23865 }, { "epoch": 0.79, "grad_norm": 0.7458286881446838, "learning_rate": 2.0880575349994623e-06, "loss": 2.1092, "step": 23866 }, { "epoch": 0.79, "grad_norm": 0.7328277230262756, "learning_rate": 2.08740755164777e-06, "loss": 2.0587, "step": 23867 }, { "epoch": 0.79, "grad_norm": 0.7315108776092529, "learning_rate": 2.0867576576879133e-06, "loss": 1.9971, "step": 23868 }, { "epoch": 0.79, "grad_norm": 0.7838161587715149, "learning_rate": 2.086107853127227e-06, "loss": 2.062, "step": 23869 }, { "epoch": 0.79, "grad_norm": 0.7372422218322754, "learning_rate": 2.08545813797306e-06, "loss": 2.0665, "step": 23870 }, { "epoch": 0.79, "grad_norm": 0.7289823889732361, "learning_rate": 2.0848085122327476e-06, "loss": 2.0465, "step": 23871 }, { "epoch": 0.79, "grad_norm": 0.7478293776512146, "learning_rate": 2.084158975913628e-06, "loss": 2.0552, "step": 23872 }, { "epoch": 0.79, "grad_norm": 0.7334038615226746, "learning_rate": 2.083509529023041e-06, "loss": 1.957, "step": 23873 }, { "epoch": 0.79, "grad_norm": 0.7341417074203491, "learning_rate": 2.0828601715683295e-06, "loss": 2.0813, "step": 23874 }, { "epoch": 0.79, "grad_norm": 0.7505955696105957, "learning_rate": 2.082210903556817e-06, "loss": 2.0594, "step": 23875 }, { "epoch": 0.79, "grad_norm": 0.7447357773780823, "learning_rate": 2.0815617249958462e-06, "loss": 2.0586, "step": 23876 }, { "epoch": 0.79, "grad_norm": 0.7629621624946594, "learning_rate": 2.0809126358927546e-06, "loss": 2.0422, "step": 23877 }, { "epoch": 0.79, "grad_norm": 0.7442324161529541, "learning_rate": 2.080263636254869e-06, "loss": 2.0934, "step": 23878 }, { "epoch": 0.79, "grad_norm": 0.7598207592964172, "learning_rate": 2.0796147260895214e-06, "loss": 2.1284, "step": 23879 }, { "epoch": 0.79, "grad_norm": 0.7288111448287964, "learning_rate": 2.078965905404048e-06, "loss": 2.0641, "step": 23880 }, { "epoch": 0.79, "grad_norm": 0.7293943166732788, "learning_rate": 2.078317174205773e-06, "loss": 2.0925, "step": 23881 }, { "epoch": 0.79, "grad_norm": 0.7395011186599731, "learning_rate": 2.0776685325020273e-06, "loss": 2.0267, "step": 23882 }, { "epoch": 0.79, "grad_norm": 0.7300148606300354, "learning_rate": 2.077019980300142e-06, "loss": 2.0615, "step": 23883 }, { "epoch": 0.79, "grad_norm": 0.730800211429596, "learning_rate": 2.0763715176074417e-06, "loss": 1.9932, "step": 23884 }, { "epoch": 0.79, "grad_norm": 0.758759081363678, "learning_rate": 2.0757231444312507e-06, "loss": 2.0387, "step": 23885 }, { "epoch": 0.79, "grad_norm": 1.052169919013977, "learning_rate": 2.0750748607788973e-06, "loss": 2.0176, "step": 23886 }, { "epoch": 0.79, "grad_norm": 0.7489136457443237, "learning_rate": 2.074426666657704e-06, "loss": 1.983, "step": 23887 }, { "epoch": 0.79, "grad_norm": 0.7223396897315979, "learning_rate": 2.0737785620749907e-06, "loss": 2.0205, "step": 23888 }, { "epoch": 0.79, "grad_norm": 0.7384513020515442, "learning_rate": 2.0731305470380814e-06, "loss": 2.0235, "step": 23889 }, { "epoch": 0.79, "grad_norm": 0.7136140465736389, "learning_rate": 2.0724826215543013e-06, "loss": 2.0506, "step": 23890 }, { "epoch": 0.79, "grad_norm": 0.7408373951911926, "learning_rate": 2.071834785630963e-06, "loss": 2.0729, "step": 23891 }, { "epoch": 0.79, "grad_norm": 0.7481176853179932, "learning_rate": 2.071187039275392e-06, "loss": 2.0885, "step": 23892 }, { "epoch": 0.79, "grad_norm": 0.7725847363471985, "learning_rate": 2.0705393824949025e-06, "loss": 2.0118, "step": 23893 }, { "epoch": 0.79, "grad_norm": 0.7322697639465332, "learning_rate": 2.0698918152968104e-06, "loss": 2.0367, "step": 23894 }, { "epoch": 0.79, "grad_norm": 0.7533455491065979, "learning_rate": 2.0692443376884318e-06, "loss": 1.982, "step": 23895 }, { "epoch": 0.8, "grad_norm": 0.755894124507904, "learning_rate": 2.0685969496770896e-06, "loss": 2.0116, "step": 23896 }, { "epoch": 0.8, "grad_norm": 0.7433171272277832, "learning_rate": 2.067949651270085e-06, "loss": 1.9765, "step": 23897 }, { "epoch": 0.8, "grad_norm": 0.7190411686897278, "learning_rate": 2.0673024424747356e-06, "loss": 1.9739, "step": 23898 }, { "epoch": 0.8, "grad_norm": 0.7932247519493103, "learning_rate": 2.066655323298358e-06, "loss": 2.1058, "step": 23899 }, { "epoch": 0.8, "grad_norm": 0.7437026500701904, "learning_rate": 2.066008293748255e-06, "loss": 2.1022, "step": 23900 }, { "epoch": 0.8, "grad_norm": 0.7600690126419067, "learning_rate": 2.065361353831744e-06, "loss": 2.0009, "step": 23901 }, { "epoch": 0.8, "grad_norm": 0.8063015937805176, "learning_rate": 2.064714503556131e-06, "loss": 2.0684, "step": 23902 }, { "epoch": 0.8, "grad_norm": 0.7964978814125061, "learning_rate": 2.0640677429287203e-06, "loss": 2.098, "step": 23903 }, { "epoch": 0.8, "grad_norm": 0.7567841410636902, "learning_rate": 2.0634210719568206e-06, "loss": 2.0607, "step": 23904 }, { "epoch": 0.8, "grad_norm": 0.7546612024307251, "learning_rate": 2.062774490647741e-06, "loss": 2.084, "step": 23905 }, { "epoch": 0.8, "grad_norm": 0.7798894643783569, "learning_rate": 2.062127999008784e-06, "loss": 2.0694, "step": 23906 }, { "epoch": 0.8, "grad_norm": 0.7356494665145874, "learning_rate": 2.06148159704725e-06, "loss": 2.0194, "step": 23907 }, { "epoch": 0.8, "grad_norm": 0.7583564519882202, "learning_rate": 2.0608352847704437e-06, "loss": 2.0258, "step": 23908 }, { "epoch": 0.8, "grad_norm": 0.7442474961280823, "learning_rate": 2.0601890621856736e-06, "loss": 2.0348, "step": 23909 }, { "epoch": 0.8, "grad_norm": 0.7236694693565369, "learning_rate": 2.059542929300229e-06, "loss": 1.9994, "step": 23910 }, { "epoch": 0.8, "grad_norm": 0.778202474117279, "learning_rate": 2.058896886121415e-06, "loss": 2.0915, "step": 23911 }, { "epoch": 0.8, "grad_norm": 0.7426068782806396, "learning_rate": 2.0582509326565324e-06, "loss": 2.0977, "step": 23912 }, { "epoch": 0.8, "grad_norm": 0.7896616458892822, "learning_rate": 2.0576050689128734e-06, "loss": 2.0654, "step": 23913 }, { "epoch": 0.8, "grad_norm": 0.760249137878418, "learning_rate": 2.0569592948977413e-06, "loss": 2.0675, "step": 23914 }, { "epoch": 0.8, "grad_norm": 0.7379205226898193, "learning_rate": 2.056313610618428e-06, "loss": 2.0539, "step": 23915 }, { "epoch": 0.8, "grad_norm": 0.7622074484825134, "learning_rate": 2.055668016082224e-06, "loss": 2.0356, "step": 23916 }, { "epoch": 0.8, "grad_norm": 0.7344079613685608, "learning_rate": 2.0550225112964283e-06, "loss": 2.0482, "step": 23917 }, { "epoch": 0.8, "grad_norm": 0.726259708404541, "learning_rate": 2.0543770962683363e-06, "loss": 1.9909, "step": 23918 }, { "epoch": 0.8, "grad_norm": 0.7238035798072815, "learning_rate": 2.0537317710052305e-06, "loss": 2.0489, "step": 23919 }, { "epoch": 0.8, "grad_norm": 0.7438998818397522, "learning_rate": 2.053086535514406e-06, "loss": 2.0523, "step": 23920 }, { "epoch": 0.8, "grad_norm": 0.7411240339279175, "learning_rate": 2.052441389803156e-06, "loss": 1.9882, "step": 23921 }, { "epoch": 0.8, "grad_norm": 0.7896168231964111, "learning_rate": 2.0517963338787617e-06, "loss": 2.1195, "step": 23922 }, { "epoch": 0.8, "grad_norm": 0.7211143970489502, "learning_rate": 2.0511513677485173e-06, "loss": 1.9445, "step": 23923 }, { "epoch": 0.8, "grad_norm": 0.7509300112724304, "learning_rate": 2.0505064914197036e-06, "loss": 2.0168, "step": 23924 }, { "epoch": 0.8, "grad_norm": 0.7656954526901245, "learning_rate": 2.0498617048996117e-06, "loss": 2.059, "step": 23925 }, { "epoch": 0.8, "grad_norm": 0.7557909488677979, "learning_rate": 2.04921700819552e-06, "loss": 2.0314, "step": 23926 }, { "epoch": 0.8, "grad_norm": 0.7507382035255432, "learning_rate": 2.048572401314718e-06, "loss": 2.0292, "step": 23927 }, { "epoch": 0.8, "grad_norm": 0.7456069588661194, "learning_rate": 2.0479278842644846e-06, "loss": 2.0447, "step": 23928 }, { "epoch": 0.8, "grad_norm": 0.741738498210907, "learning_rate": 2.0472834570520983e-06, "loss": 2.0075, "step": 23929 }, { "epoch": 0.8, "grad_norm": 0.7369498610496521, "learning_rate": 2.0466391196848432e-06, "loss": 2.0073, "step": 23930 }, { "epoch": 0.8, "grad_norm": 0.7572205066680908, "learning_rate": 2.0459948721700016e-06, "loss": 1.9837, "step": 23931 }, { "epoch": 0.8, "grad_norm": 0.7596014738082886, "learning_rate": 2.0453507145148487e-06, "loss": 2.1312, "step": 23932 }, { "epoch": 0.8, "grad_norm": 0.7297394871711731, "learning_rate": 2.0447066467266576e-06, "loss": 2.0573, "step": 23933 }, { "epoch": 0.8, "grad_norm": 0.7534695863723755, "learning_rate": 2.0440626688127117e-06, "loss": 2.021, "step": 23934 }, { "epoch": 0.8, "grad_norm": 0.7488219141960144, "learning_rate": 2.043418780780281e-06, "loss": 2.0653, "step": 23935 }, { "epoch": 0.8, "grad_norm": 0.7560086250305176, "learning_rate": 2.0427749826366438e-06, "loss": 2.0519, "step": 23936 }, { "epoch": 0.8, "grad_norm": 0.7344833612442017, "learning_rate": 2.042131274389072e-06, "loss": 2.0144, "step": 23937 }, { "epoch": 0.8, "grad_norm": 0.7489877343177795, "learning_rate": 2.041487656044834e-06, "loss": 2.0662, "step": 23938 }, { "epoch": 0.8, "grad_norm": 0.7554758787155151, "learning_rate": 2.0408441276112047e-06, "loss": 2.001, "step": 23939 }, { "epoch": 0.8, "grad_norm": 0.7404783964157104, "learning_rate": 2.040200689095456e-06, "loss": 2.0867, "step": 23940 }, { "epoch": 0.8, "grad_norm": 0.762158989906311, "learning_rate": 2.0395573405048564e-06, "loss": 2.0866, "step": 23941 }, { "epoch": 0.8, "grad_norm": 0.8025885224342346, "learning_rate": 2.038914081846669e-06, "loss": 2.014, "step": 23942 }, { "epoch": 0.8, "grad_norm": 0.7783696055412292, "learning_rate": 2.0382709131281674e-06, "loss": 2.0228, "step": 23943 }, { "epoch": 0.8, "grad_norm": 0.7527107000350952, "learning_rate": 2.0376278343566125e-06, "loss": 2.0065, "step": 23944 }, { "epoch": 0.8, "grad_norm": 0.7369482517242432, "learning_rate": 2.036984845539275e-06, "loss": 2.0645, "step": 23945 }, { "epoch": 0.8, "grad_norm": 0.7583514451980591, "learning_rate": 2.0363419466834122e-06, "loss": 1.9463, "step": 23946 }, { "epoch": 0.8, "grad_norm": 0.7693171501159668, "learning_rate": 2.0356991377962944e-06, "loss": 1.9586, "step": 23947 }, { "epoch": 0.8, "grad_norm": 0.7182508111000061, "learning_rate": 2.0350564188851773e-06, "loss": 2.0296, "step": 23948 }, { "epoch": 0.8, "grad_norm": 0.737296998500824, "learning_rate": 2.034413789957328e-06, "loss": 1.9869, "step": 23949 }, { "epoch": 0.8, "grad_norm": 0.7439113259315491, "learning_rate": 2.0337712510200026e-06, "loss": 2.0601, "step": 23950 }, { "epoch": 0.8, "grad_norm": 0.7364987134933472, "learning_rate": 2.0331288020804585e-06, "loss": 2.031, "step": 23951 }, { "epoch": 0.8, "grad_norm": 0.739911675453186, "learning_rate": 2.032486443145957e-06, "loss": 2.0718, "step": 23952 }, { "epoch": 0.8, "grad_norm": 0.7292237877845764, "learning_rate": 2.031844174223756e-06, "loss": 2.0703, "step": 23953 }, { "epoch": 0.8, "grad_norm": 0.7598031163215637, "learning_rate": 2.031201995321109e-06, "loss": 2.0274, "step": 23954 }, { "epoch": 0.8, "grad_norm": 0.7216752171516418, "learning_rate": 2.030559906445271e-06, "loss": 1.9849, "step": 23955 }, { "epoch": 0.8, "grad_norm": 0.7204724550247192, "learning_rate": 2.029917907603498e-06, "loss": 2.0308, "step": 23956 }, { "epoch": 0.8, "grad_norm": 0.7365097403526306, "learning_rate": 2.0292759988030386e-06, "loss": 2.0945, "step": 23957 }, { "epoch": 0.8, "grad_norm": 0.7813359498977661, "learning_rate": 2.028634180051151e-06, "loss": 2.0448, "step": 23958 }, { "epoch": 0.8, "grad_norm": 0.7346799969673157, "learning_rate": 2.027992451355083e-06, "loss": 2.0198, "step": 23959 }, { "epoch": 0.8, "grad_norm": 0.7495178580284119, "learning_rate": 2.027350812722081e-06, "loss": 1.9696, "step": 23960 }, { "epoch": 0.8, "grad_norm": 0.764894425868988, "learning_rate": 2.0267092641593965e-06, "loss": 2.0763, "step": 23961 }, { "epoch": 0.8, "grad_norm": 0.7430404424667358, "learning_rate": 2.0260678056742822e-06, "loss": 1.9986, "step": 23962 }, { "epoch": 0.8, "grad_norm": 0.7402697801589966, "learning_rate": 2.0254264372739798e-06, "loss": 2.0868, "step": 23963 }, { "epoch": 0.8, "grad_norm": 0.7406750917434692, "learning_rate": 2.024785158965733e-06, "loss": 2.0541, "step": 23964 }, { "epoch": 0.8, "grad_norm": 0.7259573340415955, "learning_rate": 2.0241439707567925e-06, "loss": 2.0298, "step": 23965 }, { "epoch": 0.8, "grad_norm": 0.7383511066436768, "learning_rate": 2.023502872654396e-06, "loss": 2.0554, "step": 23966 }, { "epoch": 0.8, "grad_norm": 0.7651811838150024, "learning_rate": 2.0228618646657928e-06, "loss": 2.0339, "step": 23967 }, { "epoch": 0.8, "grad_norm": 0.7312690019607544, "learning_rate": 2.022220946798218e-06, "loss": 2.0476, "step": 23968 }, { "epoch": 0.8, "grad_norm": 0.7294860482215881, "learning_rate": 2.0215801190589177e-06, "loss": 2.0607, "step": 23969 }, { "epoch": 0.8, "grad_norm": 0.7321736812591553, "learning_rate": 2.020939381455128e-06, "loss": 2.0551, "step": 23970 }, { "epoch": 0.8, "grad_norm": 0.7419860363006592, "learning_rate": 2.0202987339940918e-06, "loss": 2.0776, "step": 23971 }, { "epoch": 0.8, "grad_norm": 0.7457311153411865, "learning_rate": 2.0196581766830425e-06, "loss": 2.0614, "step": 23972 }, { "epoch": 0.8, "grad_norm": 0.7435533404350281, "learning_rate": 2.0190177095292163e-06, "loss": 2.0195, "step": 23973 }, { "epoch": 0.8, "grad_norm": 0.7520368695259094, "learning_rate": 2.0183773325398505e-06, "loss": 2.1257, "step": 23974 }, { "epoch": 0.8, "grad_norm": 0.778453528881073, "learning_rate": 2.017737045722182e-06, "loss": 2.0282, "step": 23975 }, { "epoch": 0.8, "grad_norm": 0.7452737092971802, "learning_rate": 2.017096849083443e-06, "loss": 2.1122, "step": 23976 }, { "epoch": 0.8, "grad_norm": 0.7365851998329163, "learning_rate": 2.0164567426308634e-06, "loss": 1.9858, "step": 23977 }, { "epoch": 0.8, "grad_norm": 0.7520131468772888, "learning_rate": 2.0158167263716786e-06, "loss": 2.0711, "step": 23978 }, { "epoch": 0.8, "grad_norm": 0.7548579573631287, "learning_rate": 2.0151768003131145e-06, "loss": 2.0226, "step": 23979 }, { "epoch": 0.8, "grad_norm": 0.7525720000267029, "learning_rate": 2.0145369644624056e-06, "loss": 2.0466, "step": 23980 }, { "epoch": 0.8, "grad_norm": 0.7186271548271179, "learning_rate": 2.0138972188267793e-06, "loss": 2.0355, "step": 23981 }, { "epoch": 0.8, "grad_norm": 0.7449716925621033, "learning_rate": 2.0132575634134577e-06, "loss": 2.0929, "step": 23982 }, { "epoch": 0.8, "grad_norm": 0.7404311299324036, "learning_rate": 2.0126179982296724e-06, "loss": 1.9712, "step": 23983 }, { "epoch": 0.8, "grad_norm": 0.7164410948753357, "learning_rate": 2.0119785232826503e-06, "loss": 2.0172, "step": 23984 }, { "epoch": 0.8, "grad_norm": 0.7498258352279663, "learning_rate": 2.0113391385796145e-06, "loss": 2.0528, "step": 23985 }, { "epoch": 0.8, "grad_norm": 0.7182027101516724, "learning_rate": 2.0106998441277837e-06, "loss": 2.0678, "step": 23986 }, { "epoch": 0.8, "grad_norm": 0.7292402982711792, "learning_rate": 2.010060639934386e-06, "loss": 2.0712, "step": 23987 }, { "epoch": 0.8, "grad_norm": 0.7549788355827332, "learning_rate": 2.0094215260066383e-06, "loss": 1.9644, "step": 23988 }, { "epoch": 0.8, "grad_norm": 0.7361125349998474, "learning_rate": 2.0087825023517673e-06, "loss": 2.0284, "step": 23989 }, { "epoch": 0.8, "grad_norm": 0.7673788070678711, "learning_rate": 2.0081435689769834e-06, "loss": 2.0623, "step": 23990 }, { "epoch": 0.8, "grad_norm": 0.7537578344345093, "learning_rate": 2.007504725889514e-06, "loss": 2.0111, "step": 23991 }, { "epoch": 0.8, "grad_norm": 0.7365519404411316, "learning_rate": 2.006865973096569e-06, "loss": 2.0056, "step": 23992 }, { "epoch": 0.8, "grad_norm": 0.740678608417511, "learning_rate": 2.00622731060537e-06, "loss": 2.0053, "step": 23993 }, { "epoch": 0.8, "grad_norm": 0.7439206838607788, "learning_rate": 2.005588738423131e-06, "loss": 2.1465, "step": 23994 }, { "epoch": 0.8, "grad_norm": 0.7328673601150513, "learning_rate": 2.0049502565570612e-06, "loss": 2.0286, "step": 23995 }, { "epoch": 0.8, "grad_norm": 0.7167171835899353, "learning_rate": 2.004311865014379e-06, "loss": 2.0352, "step": 23996 }, { "epoch": 0.8, "grad_norm": 0.7395989894866943, "learning_rate": 2.0036735638022976e-06, "loss": 2.098, "step": 23997 }, { "epoch": 0.8, "grad_norm": 0.7301743030548096, "learning_rate": 2.0030353529280267e-06, "loss": 1.9987, "step": 23998 }, { "epoch": 0.8, "grad_norm": 0.7665370106697083, "learning_rate": 2.002397232398772e-06, "loss": 2.0615, "step": 23999 }, { "epoch": 0.8, "grad_norm": 0.7669080495834351, "learning_rate": 2.001759202221749e-06, "loss": 1.9261, "step": 24000 }, { "epoch": 0.8, "grad_norm": 0.7636668682098389, "learning_rate": 2.0011212624041622e-06, "loss": 2.0242, "step": 24001 }, { "epoch": 0.8, "grad_norm": 0.7591378688812256, "learning_rate": 2.00048341295322e-06, "loss": 2.0487, "step": 24002 }, { "epoch": 0.8, "grad_norm": 0.752450704574585, "learning_rate": 1.999845653876129e-06, "loss": 1.9827, "step": 24003 }, { "epoch": 0.8, "grad_norm": 0.7688359022140503, "learning_rate": 1.9992079851800905e-06, "loss": 2.0877, "step": 24004 }, { "epoch": 0.8, "grad_norm": 0.7523253560066223, "learning_rate": 1.998570406872311e-06, "loss": 2.0538, "step": 24005 }, { "epoch": 0.8, "grad_norm": 0.751998782157898, "learning_rate": 1.9979329189599972e-06, "loss": 2.0463, "step": 24006 }, { "epoch": 0.8, "grad_norm": 0.7635302543640137, "learning_rate": 1.9972955214503476e-06, "loss": 2.0943, "step": 24007 }, { "epoch": 0.8, "grad_norm": 0.7524195313453674, "learning_rate": 1.9966582143505595e-06, "loss": 2.1036, "step": 24008 }, { "epoch": 0.8, "grad_norm": 0.7478085160255432, "learning_rate": 1.996020997667837e-06, "loss": 2.0446, "step": 24009 }, { "epoch": 0.8, "grad_norm": 0.7267443537712097, "learning_rate": 1.995383871409381e-06, "loss": 2.0668, "step": 24010 }, { "epoch": 0.8, "grad_norm": 0.7589496970176697, "learning_rate": 1.9947468355823876e-06, "loss": 2.0455, "step": 24011 }, { "epoch": 0.8, "grad_norm": 0.7430739998817444, "learning_rate": 1.994109890194049e-06, "loss": 2.0173, "step": 24012 }, { "epoch": 0.8, "grad_norm": 0.7363247871398926, "learning_rate": 1.9934730352515685e-06, "loss": 2.0629, "step": 24013 }, { "epoch": 0.8, "grad_norm": 0.7687974572181702, "learning_rate": 1.992836270762134e-06, "loss": 2.0244, "step": 24014 }, { "epoch": 0.8, "grad_norm": 0.7530408501625061, "learning_rate": 1.992199596732943e-06, "loss": 2.0465, "step": 24015 }, { "epoch": 0.8, "grad_norm": 0.7589410543441772, "learning_rate": 1.991563013171194e-06, "loss": 2.034, "step": 24016 }, { "epoch": 0.8, "grad_norm": 0.7424665093421936, "learning_rate": 1.9909265200840667e-06, "loss": 2.1034, "step": 24017 }, { "epoch": 0.8, "grad_norm": 0.7523882389068604, "learning_rate": 1.990290117478757e-06, "loss": 2.1157, "step": 24018 }, { "epoch": 0.8, "grad_norm": 0.7241427898406982, "learning_rate": 1.989653805362459e-06, "loss": 2.034, "step": 24019 }, { "epoch": 0.8, "grad_norm": 0.7740873098373413, "learning_rate": 1.9890175837423573e-06, "loss": 2.0913, "step": 24020 }, { "epoch": 0.8, "grad_norm": 0.7444481253623962, "learning_rate": 1.9883814526256384e-06, "loss": 1.991, "step": 24021 }, { "epoch": 0.8, "grad_norm": 0.7467379570007324, "learning_rate": 1.987745412019493e-06, "loss": 1.9274, "step": 24022 }, { "epoch": 0.8, "grad_norm": 0.7424873113632202, "learning_rate": 1.9871094619311005e-06, "loss": 2.0276, "step": 24023 }, { "epoch": 0.8, "grad_norm": 0.7617087960243225, "learning_rate": 1.9864736023676522e-06, "loss": 2.1217, "step": 24024 }, { "epoch": 0.8, "grad_norm": 0.7388319969177246, "learning_rate": 1.985837833336327e-06, "loss": 2.0002, "step": 24025 }, { "epoch": 0.8, "grad_norm": 0.7753557562828064, "learning_rate": 1.985202154844311e-06, "loss": 2.0652, "step": 24026 }, { "epoch": 0.8, "grad_norm": 0.7638347148895264, "learning_rate": 1.9845665668987825e-06, "loss": 2.0427, "step": 24027 }, { "epoch": 0.8, "grad_norm": 0.759086549282074, "learning_rate": 1.9839310695069248e-06, "loss": 2.0997, "step": 24028 }, { "epoch": 0.8, "grad_norm": 0.752033531665802, "learning_rate": 1.983295662675916e-06, "loss": 1.9378, "step": 24029 }, { "epoch": 0.8, "grad_norm": 0.735769510269165, "learning_rate": 1.9826603464129324e-06, "loss": 2.0728, "step": 24030 }, { "epoch": 0.8, "grad_norm": 0.7381555438041687, "learning_rate": 1.982025120725154e-06, "loss": 1.9881, "step": 24031 }, { "epoch": 0.8, "grad_norm": 0.7404716610908508, "learning_rate": 1.981389985619758e-06, "loss": 2.058, "step": 24032 }, { "epoch": 0.8, "grad_norm": 0.7416315674781799, "learning_rate": 1.9807549411039204e-06, "loss": 1.9967, "step": 24033 }, { "epoch": 0.8, "grad_norm": 0.7627708315849304, "learning_rate": 1.98011998718481e-06, "loss": 1.9754, "step": 24034 }, { "epoch": 0.8, "grad_norm": 0.749401867389679, "learning_rate": 1.9794851238696066e-06, "loss": 2.0708, "step": 24035 }, { "epoch": 0.8, "grad_norm": 0.7820358872413635, "learning_rate": 1.978850351165478e-06, "loss": 1.96, "step": 24036 }, { "epoch": 0.8, "grad_norm": 0.7129834890365601, "learning_rate": 1.978215669079596e-06, "loss": 1.9931, "step": 24037 }, { "epoch": 0.8, "grad_norm": 0.764801561832428, "learning_rate": 1.9775810776191372e-06, "loss": 2.0179, "step": 24038 }, { "epoch": 0.8, "grad_norm": 0.7585146427154541, "learning_rate": 1.9769465767912622e-06, "loss": 2.0483, "step": 24039 }, { "epoch": 0.8, "grad_norm": 0.7263974547386169, "learning_rate": 1.9763121666031416e-06, "loss": 2.0331, "step": 24040 }, { "epoch": 0.8, "grad_norm": 0.7646559476852417, "learning_rate": 1.9756778470619463e-06, "loss": 2.0819, "step": 24041 }, { "epoch": 0.8, "grad_norm": 0.7539601922035217, "learning_rate": 1.9750436181748413e-06, "loss": 2.0248, "step": 24042 }, { "epoch": 0.8, "grad_norm": 0.762263834476471, "learning_rate": 1.9744094799489868e-06, "loss": 1.9964, "step": 24043 }, { "epoch": 0.8, "grad_norm": 0.7268050312995911, "learning_rate": 1.9737754323915527e-06, "loss": 2.0301, "step": 24044 }, { "epoch": 0.8, "grad_norm": 0.7425455451011658, "learning_rate": 1.973141475509698e-06, "loss": 1.9889, "step": 24045 }, { "epoch": 0.8, "grad_norm": 0.74796062707901, "learning_rate": 1.972507609310587e-06, "loss": 2.0551, "step": 24046 }, { "epoch": 0.8, "grad_norm": 0.7582500576972961, "learning_rate": 1.971873833801382e-06, "loss": 2.0166, "step": 24047 }, { "epoch": 0.8, "grad_norm": 0.7322655320167542, "learning_rate": 1.971240148989242e-06, "loss": 2.0465, "step": 24048 }, { "epoch": 0.8, "grad_norm": 0.73333340883255, "learning_rate": 1.9706065548813235e-06, "loss": 2.03, "step": 24049 }, { "epoch": 0.8, "grad_norm": 0.7201894521713257, "learning_rate": 1.9699730514847882e-06, "loss": 2.0407, "step": 24050 }, { "epoch": 0.8, "grad_norm": 0.7542328834533691, "learning_rate": 1.969339638806792e-06, "loss": 2.0515, "step": 24051 }, { "epoch": 0.8, "grad_norm": 0.726930558681488, "learning_rate": 1.968706316854487e-06, "loss": 2.0366, "step": 24052 }, { "epoch": 0.8, "grad_norm": 0.7783737778663635, "learning_rate": 1.9680730856350315e-06, "loss": 2.044, "step": 24053 }, { "epoch": 0.8, "grad_norm": 0.7424106001853943, "learning_rate": 1.9674399451555813e-06, "loss": 2.0433, "step": 24054 }, { "epoch": 0.8, "grad_norm": 0.7547112703323364, "learning_rate": 1.966806895423288e-06, "loss": 2.0384, "step": 24055 }, { "epoch": 0.8, "grad_norm": 0.7293472290039062, "learning_rate": 1.966173936445299e-06, "loss": 2.1143, "step": 24056 }, { "epoch": 0.8, "grad_norm": 0.7412411570549011, "learning_rate": 1.9655410682287713e-06, "loss": 2.0596, "step": 24057 }, { "epoch": 0.8, "grad_norm": 0.7491490244865417, "learning_rate": 1.9649082907808494e-06, "loss": 2.0692, "step": 24058 }, { "epoch": 0.8, "grad_norm": 0.7542949914932251, "learning_rate": 1.964275604108684e-06, "loss": 2.0711, "step": 24059 }, { "epoch": 0.8, "grad_norm": 0.7532376646995544, "learning_rate": 1.96364300821943e-06, "loss": 2.0434, "step": 24060 }, { "epoch": 0.8, "grad_norm": 0.7357166409492493, "learning_rate": 1.9630105031202217e-06, "loss": 2.065, "step": 24061 }, { "epoch": 0.8, "grad_norm": 0.737655520439148, "learning_rate": 1.9623780888182107e-06, "loss": 2.0546, "step": 24062 }, { "epoch": 0.8, "grad_norm": 0.7477555274963379, "learning_rate": 1.961745765320544e-06, "loss": 2.0553, "step": 24063 }, { "epoch": 0.8, "grad_norm": 0.742662787437439, "learning_rate": 1.961113532634362e-06, "loss": 2.0169, "step": 24064 }, { "epoch": 0.8, "grad_norm": 0.7546602487564087, "learning_rate": 1.9604813907668064e-06, "loss": 2.0364, "step": 24065 }, { "epoch": 0.8, "grad_norm": 0.7241734266281128, "learning_rate": 1.9598493397250227e-06, "loss": 2.0045, "step": 24066 }, { "epoch": 0.8, "grad_norm": 0.7701930999755859, "learning_rate": 1.9592173795161474e-06, "loss": 2.0467, "step": 24067 }, { "epoch": 0.8, "grad_norm": 0.7520945072174072, "learning_rate": 1.9585855101473206e-06, "loss": 2.0838, "step": 24068 }, { "epoch": 0.8, "grad_norm": 0.7686367630958557, "learning_rate": 1.957953731625686e-06, "loss": 2.003, "step": 24069 }, { "epoch": 0.8, "grad_norm": 0.7123660445213318, "learning_rate": 1.957322043958375e-06, "loss": 1.9866, "step": 24070 }, { "epoch": 0.8, "grad_norm": 0.7771791815757751, "learning_rate": 1.956690447152525e-06, "loss": 2.0625, "step": 24071 }, { "epoch": 0.8, "grad_norm": 0.7729154229164124, "learning_rate": 1.956058941215274e-06, "loss": 2.0498, "step": 24072 }, { "epoch": 0.8, "grad_norm": 0.7548573613166809, "learning_rate": 1.955427526153756e-06, "loss": 2.1009, "step": 24073 }, { "epoch": 0.8, "grad_norm": 0.7320715188980103, "learning_rate": 1.9547962019751e-06, "loss": 1.9936, "step": 24074 }, { "epoch": 0.8, "grad_norm": 0.7527363896369934, "learning_rate": 1.9541649686864417e-06, "loss": 1.9876, "step": 24075 }, { "epoch": 0.8, "grad_norm": 0.7464690804481506, "learning_rate": 1.9535338262949154e-06, "loss": 2.0665, "step": 24076 }, { "epoch": 0.8, "grad_norm": 0.764543354511261, "learning_rate": 1.9529027748076447e-06, "loss": 2.0181, "step": 24077 }, { "epoch": 0.8, "grad_norm": 0.7448415756225586, "learning_rate": 1.9522718142317655e-06, "loss": 1.9835, "step": 24078 }, { "epoch": 0.8, "grad_norm": 0.7496123909950256, "learning_rate": 1.9516409445744035e-06, "loss": 2.0568, "step": 24079 }, { "epoch": 0.8, "grad_norm": 0.7689026594161987, "learning_rate": 1.9510101658426817e-06, "loss": 2.1025, "step": 24080 }, { "epoch": 0.8, "grad_norm": 0.7579641342163086, "learning_rate": 1.95037947804373e-06, "loss": 2.0651, "step": 24081 }, { "epoch": 0.8, "grad_norm": 0.7261137962341309, "learning_rate": 1.949748881184679e-06, "loss": 2.0402, "step": 24082 }, { "epoch": 0.8, "grad_norm": 0.7577558159828186, "learning_rate": 1.9491183752726416e-06, "loss": 2.0883, "step": 24083 }, { "epoch": 0.8, "grad_norm": 0.7400363683700562, "learning_rate": 1.9484879603147464e-06, "loss": 2.0284, "step": 24084 }, { "epoch": 0.8, "grad_norm": 0.754889726638794, "learning_rate": 1.947857636318119e-06, "loss": 2.0648, "step": 24085 }, { "epoch": 0.8, "grad_norm": 0.7724670171737671, "learning_rate": 1.9472274032898764e-06, "loss": 2.1193, "step": 24086 }, { "epoch": 0.8, "grad_norm": 0.7386720776557922, "learning_rate": 1.9465972612371364e-06, "loss": 2.036, "step": 24087 }, { "epoch": 0.8, "grad_norm": 0.7616339921951294, "learning_rate": 1.9459672101670247e-06, "loss": 2.0859, "step": 24088 }, { "epoch": 0.8, "grad_norm": 0.7527587413787842, "learning_rate": 1.9453372500866507e-06, "loss": 2.0723, "step": 24089 }, { "epoch": 0.8, "grad_norm": 0.7683152556419373, "learning_rate": 1.944707381003138e-06, "loss": 2.0509, "step": 24090 }, { "epoch": 0.8, "grad_norm": 0.7492933869361877, "learning_rate": 1.9440776029236018e-06, "loss": 2.0478, "step": 24091 }, { "epoch": 0.8, "grad_norm": 0.7381847500801086, "learning_rate": 1.9434479158551557e-06, "loss": 2.0987, "step": 24092 }, { "epoch": 0.8, "grad_norm": 0.7532115578651428, "learning_rate": 1.942818319804911e-06, "loss": 2.0831, "step": 24093 }, { "epoch": 0.8, "grad_norm": 0.7682026624679565, "learning_rate": 1.942188814779986e-06, "loss": 2.0932, "step": 24094 }, { "epoch": 0.8, "grad_norm": 0.7594884634017944, "learning_rate": 1.941559400787488e-06, "loss": 2.0073, "step": 24095 }, { "epoch": 0.8, "grad_norm": 0.7494890689849854, "learning_rate": 1.9409300778345287e-06, "loss": 2.0255, "step": 24096 }, { "epoch": 0.8, "grad_norm": 0.7350327372550964, "learning_rate": 1.9403008459282167e-06, "loss": 1.9862, "step": 24097 }, { "epoch": 0.8, "grad_norm": 0.746798574924469, "learning_rate": 1.9396717050756654e-06, "loss": 2.1184, "step": 24098 }, { "epoch": 0.8, "grad_norm": 0.7848989963531494, "learning_rate": 1.939042655283977e-06, "loss": 2.0574, "step": 24099 }, { "epoch": 0.8, "grad_norm": 0.7219083309173584, "learning_rate": 1.938413696560263e-06, "loss": 2.0618, "step": 24100 }, { "epoch": 0.8, "grad_norm": 0.7361511588096619, "learning_rate": 1.9377848289116263e-06, "loss": 2.0771, "step": 24101 }, { "epoch": 0.8, "grad_norm": 0.7353638410568237, "learning_rate": 1.937156052345168e-06, "loss": 2.0397, "step": 24102 }, { "epoch": 0.8, "grad_norm": 0.737454891204834, "learning_rate": 1.9365273668679974e-06, "loss": 2.0409, "step": 24103 }, { "epoch": 0.8, "grad_norm": 0.7594074606895447, "learning_rate": 1.935898772487219e-06, "loss": 2.0741, "step": 24104 }, { "epoch": 0.8, "grad_norm": 0.7397553324699402, "learning_rate": 1.9352702692099256e-06, "loss": 2.0583, "step": 24105 }, { "epoch": 0.8, "grad_norm": 0.7256757020950317, "learning_rate": 1.9346418570432213e-06, "loss": 2.0659, "step": 24106 }, { "epoch": 0.8, "grad_norm": 0.7591580748558044, "learning_rate": 1.93401353599421e-06, "loss": 1.9941, "step": 24107 }, { "epoch": 0.8, "grad_norm": 0.7584403157234192, "learning_rate": 1.933385306069986e-06, "loss": 1.9703, "step": 24108 }, { "epoch": 0.8, "grad_norm": 0.7407048940658569, "learning_rate": 1.9327571672776456e-06, "loss": 2.0749, "step": 24109 }, { "epoch": 0.8, "grad_norm": 0.7671871781349182, "learning_rate": 1.9321291196242865e-06, "loss": 2.0419, "step": 24110 }, { "epoch": 0.8, "grad_norm": 0.7658242583274841, "learning_rate": 1.9315011631170067e-06, "loss": 2.0648, "step": 24111 }, { "epoch": 0.8, "grad_norm": 0.7539470791816711, "learning_rate": 1.930873297762895e-06, "loss": 2.0399, "step": 24112 }, { "epoch": 0.8, "grad_norm": 0.7473941445350647, "learning_rate": 1.9302455235690522e-06, "loss": 2.0335, "step": 24113 }, { "epoch": 0.8, "grad_norm": 0.7680037617683411, "learning_rate": 1.929617840542565e-06, "loss": 2.0427, "step": 24114 }, { "epoch": 0.8, "grad_norm": 0.7454906105995178, "learning_rate": 1.928990248690523e-06, "loss": 2.065, "step": 24115 }, { "epoch": 0.8, "grad_norm": 0.7474365830421448, "learning_rate": 1.9283627480200196e-06, "loss": 2.0427, "step": 24116 }, { "epoch": 0.8, "grad_norm": 0.7344571948051453, "learning_rate": 1.9277353385381483e-06, "loss": 1.9783, "step": 24117 }, { "epoch": 0.8, "grad_norm": 0.7475095987319946, "learning_rate": 1.9271080202519864e-06, "loss": 2.0415, "step": 24118 }, { "epoch": 0.8, "grad_norm": 0.7664642930030823, "learning_rate": 1.926480793168628e-06, "loss": 2.0616, "step": 24119 }, { "epoch": 0.8, "grad_norm": 0.7399535179138184, "learning_rate": 1.9258536572951605e-06, "loss": 2.1128, "step": 24120 }, { "epoch": 0.8, "grad_norm": 0.7486439943313599, "learning_rate": 1.925226612638663e-06, "loss": 2.0144, "step": 24121 }, { "epoch": 0.8, "grad_norm": 0.7437174916267395, "learning_rate": 1.9245996592062266e-06, "loss": 2.0846, "step": 24122 }, { "epoch": 0.8, "grad_norm": 0.723362922668457, "learning_rate": 1.9239727970049306e-06, "loss": 2.0121, "step": 24123 }, { "epoch": 0.8, "grad_norm": 0.7494576573371887, "learning_rate": 1.9233460260418533e-06, "loss": 2.1364, "step": 24124 }, { "epoch": 0.8, "grad_norm": 0.7557364106178284, "learning_rate": 1.9227193463240802e-06, "loss": 2.0217, "step": 24125 }, { "epoch": 0.8, "grad_norm": 0.7501938343048096, "learning_rate": 1.9220927578586924e-06, "loss": 2.0727, "step": 24126 }, { "epoch": 0.8, "grad_norm": 0.7384154796600342, "learning_rate": 1.921466260652767e-06, "loss": 1.9668, "step": 24127 }, { "epoch": 0.8, "grad_norm": 0.7558481097221375, "learning_rate": 1.9208398547133778e-06, "loss": 2.0331, "step": 24128 }, { "epoch": 0.8, "grad_norm": 0.7469013333320618, "learning_rate": 1.9202135400476073e-06, "loss": 2.112, "step": 24129 }, { "epoch": 0.8, "grad_norm": 0.7902927994728088, "learning_rate": 1.919587316662528e-06, "loss": 2.0786, "step": 24130 }, { "epoch": 0.8, "grad_norm": 0.7178947329521179, "learning_rate": 1.9189611845652166e-06, "loss": 1.9932, "step": 24131 }, { "epoch": 0.8, "grad_norm": 0.7487528920173645, "learning_rate": 1.918335143762744e-06, "loss": 2.1002, "step": 24132 }, { "epoch": 0.8, "grad_norm": 0.7504995465278625, "learning_rate": 1.917709194262187e-06, "loss": 2.0312, "step": 24133 }, { "epoch": 0.8, "grad_norm": 0.7460945844650269, "learning_rate": 1.9170833360706133e-06, "loss": 2.0274, "step": 24134 }, { "epoch": 0.8, "grad_norm": 0.7296907305717468, "learning_rate": 1.916457569195097e-06, "loss": 1.9545, "step": 24135 }, { "epoch": 0.8, "grad_norm": 0.7328383922576904, "learning_rate": 1.9158318936427044e-06, "loss": 2.0897, "step": 24136 }, { "epoch": 0.8, "grad_norm": 0.746177077293396, "learning_rate": 1.9152063094205042e-06, "loss": 2.033, "step": 24137 }, { "epoch": 0.8, "grad_norm": 0.7522748112678528, "learning_rate": 1.914580816535565e-06, "loss": 2.0787, "step": 24138 }, { "epoch": 0.8, "grad_norm": 0.7220520377159119, "learning_rate": 1.913955414994958e-06, "loss": 2.006, "step": 24139 }, { "epoch": 0.8, "grad_norm": 0.730384349822998, "learning_rate": 1.9133301048057383e-06, "loss": 2.0606, "step": 24140 }, { "epoch": 0.8, "grad_norm": 0.7597929835319519, "learning_rate": 1.9127048859749753e-06, "loss": 2.0293, "step": 24141 }, { "epoch": 0.8, "grad_norm": 0.7728990912437439, "learning_rate": 1.9120797585097363e-06, "loss": 2.0415, "step": 24142 }, { "epoch": 0.8, "grad_norm": 0.755099892616272, "learning_rate": 1.9114547224170774e-06, "loss": 2.0398, "step": 24143 }, { "epoch": 0.8, "grad_norm": 0.7230612635612488, "learning_rate": 1.9108297777040664e-06, "loss": 2.1003, "step": 24144 }, { "epoch": 0.8, "grad_norm": 0.7630913853645325, "learning_rate": 1.910204924377759e-06, "loss": 2.1595, "step": 24145 }, { "epoch": 0.8, "grad_norm": 0.7458087205886841, "learning_rate": 1.9095801624452117e-06, "loss": 2.0185, "step": 24146 }, { "epoch": 0.8, "grad_norm": 0.7574073672294617, "learning_rate": 1.9089554919134868e-06, "loss": 2.0602, "step": 24147 }, { "epoch": 0.8, "grad_norm": 0.7308272123336792, "learning_rate": 1.9083309127896443e-06, "loss": 2.0527, "step": 24148 }, { "epoch": 0.8, "grad_norm": 0.7744826078414917, "learning_rate": 1.9077064250807365e-06, "loss": 2.1446, "step": 24149 }, { "epoch": 0.8, "grad_norm": 0.7543187737464905, "learning_rate": 1.9070820287938164e-06, "loss": 2.0392, "step": 24150 }, { "epoch": 0.8, "grad_norm": 0.733694851398468, "learning_rate": 1.9064577239359428e-06, "loss": 2.072, "step": 24151 }, { "epoch": 0.8, "grad_norm": 0.7475532293319702, "learning_rate": 1.9058335105141645e-06, "loss": 2.057, "step": 24152 }, { "epoch": 0.8, "grad_norm": 0.7242964506149292, "learning_rate": 1.9052093885355382e-06, "loss": 2.0223, "step": 24153 }, { "epoch": 0.8, "grad_norm": 0.7716489434242249, "learning_rate": 1.9045853580071093e-06, "loss": 2.0581, "step": 24154 }, { "epoch": 0.8, "grad_norm": 0.7742037177085876, "learning_rate": 1.9039614189359334e-06, "loss": 2.0087, "step": 24155 }, { "epoch": 0.8, "grad_norm": 0.7815915942192078, "learning_rate": 1.9033375713290535e-06, "loss": 2.0686, "step": 24156 }, { "epoch": 0.8, "grad_norm": 0.7441413402557373, "learning_rate": 1.9027138151935242e-06, "loss": 2.0397, "step": 24157 }, { "epoch": 0.8, "grad_norm": 0.7537321448326111, "learning_rate": 1.9020901505363887e-06, "loss": 1.9799, "step": 24158 }, { "epoch": 0.8, "grad_norm": 0.7504726648330688, "learning_rate": 1.9014665773646889e-06, "loss": 2.0355, "step": 24159 }, { "epoch": 0.8, "grad_norm": 0.7748067378997803, "learning_rate": 1.900843095685475e-06, "loss": 2.0549, "step": 24160 }, { "epoch": 0.8, "grad_norm": 0.7617127895355225, "learning_rate": 1.9002197055057914e-06, "loss": 2.0635, "step": 24161 }, { "epoch": 0.8, "grad_norm": 0.756274938583374, "learning_rate": 1.8995964068326777e-06, "loss": 2.0603, "step": 24162 }, { "epoch": 0.8, "grad_norm": 0.7469486594200134, "learning_rate": 1.8989731996731752e-06, "loss": 2.0262, "step": 24163 }, { "epoch": 0.8, "grad_norm": 0.7266932725906372, "learning_rate": 1.8983500840343282e-06, "loss": 2.0538, "step": 24164 }, { "epoch": 0.8, "grad_norm": 0.7373751997947693, "learning_rate": 1.89772705992317e-06, "loss": 2.1132, "step": 24165 }, { "epoch": 0.8, "grad_norm": 0.748246967792511, "learning_rate": 1.8971041273467472e-06, "loss": 2.0472, "step": 24166 }, { "epoch": 0.8, "grad_norm": 0.7331543564796448, "learning_rate": 1.896481286312093e-06, "loss": 2.0709, "step": 24167 }, { "epoch": 0.8, "grad_norm": 0.7441941499710083, "learning_rate": 1.8958585368262405e-06, "loss": 2.0342, "step": 24168 }, { "epoch": 0.8, "grad_norm": 0.7428725957870483, "learning_rate": 1.8952358788962299e-06, "loss": 2.1066, "step": 24169 }, { "epoch": 0.8, "grad_norm": 0.7549260854721069, "learning_rate": 1.8946133125290966e-06, "loss": 2.0823, "step": 24170 }, { "epoch": 0.8, "grad_norm": 0.7421898245811462, "learning_rate": 1.8939908377318717e-06, "loss": 2.0553, "step": 24171 }, { "epoch": 0.8, "grad_norm": 0.7397654056549072, "learning_rate": 1.893368454511585e-06, "loss": 1.9952, "step": 24172 }, { "epoch": 0.8, "grad_norm": 0.7762250900268555, "learning_rate": 1.8927461628752741e-06, "loss": 2.167, "step": 24173 }, { "epoch": 0.8, "grad_norm": 0.7685840129852295, "learning_rate": 1.8921239628299626e-06, "loss": 2.0001, "step": 24174 }, { "epoch": 0.8, "grad_norm": 0.7320700883865356, "learning_rate": 1.8915018543826846e-06, "loss": 2.0365, "step": 24175 }, { "epoch": 0.8, "grad_norm": 0.7516846656799316, "learning_rate": 1.8908798375404646e-06, "loss": 2.094, "step": 24176 }, { "epoch": 0.8, "grad_norm": 0.7538803815841675, "learning_rate": 1.8902579123103348e-06, "loss": 2.0586, "step": 24177 }, { "epoch": 0.8, "grad_norm": 0.7362875938415527, "learning_rate": 1.8896360786993162e-06, "loss": 2.0555, "step": 24178 }, { "epoch": 0.8, "grad_norm": 0.723981499671936, "learning_rate": 1.8890143367144375e-06, "loss": 2.0026, "step": 24179 }, { "epoch": 0.8, "grad_norm": 0.7648736834526062, "learning_rate": 1.8883926863627223e-06, "loss": 2.0623, "step": 24180 }, { "epoch": 0.8, "grad_norm": 0.7295845746994019, "learning_rate": 1.887771127651189e-06, "loss": 2.0973, "step": 24181 }, { "epoch": 0.8, "grad_norm": 0.7280145287513733, "learning_rate": 1.8871496605868634e-06, "loss": 2.1062, "step": 24182 }, { "epoch": 0.8, "grad_norm": 0.7307288646697998, "learning_rate": 1.8865282851767697e-06, "loss": 2.0041, "step": 24183 }, { "epoch": 0.8, "grad_norm": 0.7433872222900391, "learning_rate": 1.8859070014279245e-06, "loss": 2.0079, "step": 24184 }, { "epoch": 0.8, "grad_norm": 0.7533906102180481, "learning_rate": 1.8852858093473437e-06, "loss": 2.0088, "step": 24185 }, { "epoch": 0.8, "grad_norm": 0.7069349884986877, "learning_rate": 1.88466470894205e-06, "loss": 2.003, "step": 24186 }, { "epoch": 0.8, "grad_norm": 0.7571207284927368, "learning_rate": 1.8840437002190571e-06, "loss": 2.0912, "step": 24187 }, { "epoch": 0.8, "grad_norm": 0.751305103302002, "learning_rate": 1.8834227831853835e-06, "loss": 2.0636, "step": 24188 }, { "epoch": 0.8, "grad_norm": 0.7948052883148193, "learning_rate": 1.8828019578480428e-06, "loss": 2.0791, "step": 24189 }, { "epoch": 0.8, "grad_norm": 0.7479168772697449, "learning_rate": 1.882181224214047e-06, "loss": 2.0507, "step": 24190 }, { "epoch": 0.8, "grad_norm": 0.7845588326454163, "learning_rate": 1.8815605822904093e-06, "loss": 2.1017, "step": 24191 }, { "epoch": 0.8, "grad_norm": 0.774436354637146, "learning_rate": 1.8809400320841443e-06, "loss": 1.9819, "step": 24192 }, { "epoch": 0.8, "grad_norm": 0.7503799796104431, "learning_rate": 1.8803195736022618e-06, "loss": 2.0706, "step": 24193 }, { "epoch": 0.8, "grad_norm": 0.7276161313056946, "learning_rate": 1.879699206851766e-06, "loss": 1.9625, "step": 24194 }, { "epoch": 0.8, "grad_norm": 0.7501667737960815, "learning_rate": 1.879078931839673e-06, "loss": 2.0434, "step": 24195 }, { "epoch": 0.81, "grad_norm": 0.7379202246665955, "learning_rate": 1.8784587485729843e-06, "loss": 2.0551, "step": 24196 }, { "epoch": 0.81, "grad_norm": 0.7356285452842712, "learning_rate": 1.8778386570587125e-06, "loss": 2.0099, "step": 24197 }, { "epoch": 0.81, "grad_norm": 0.7162164449691772, "learning_rate": 1.8772186573038553e-06, "loss": 2.0268, "step": 24198 }, { "epoch": 0.81, "grad_norm": 0.7319517135620117, "learning_rate": 1.8765987493154247e-06, "loss": 2.0886, "step": 24199 }, { "epoch": 0.81, "grad_norm": 0.7349777817726135, "learning_rate": 1.8759789331004185e-06, "loss": 2.0355, "step": 24200 }, { "epoch": 0.81, "grad_norm": 0.7635425329208374, "learning_rate": 1.8753592086658434e-06, "loss": 2.0042, "step": 24201 }, { "epoch": 0.81, "grad_norm": 0.7250452637672424, "learning_rate": 1.874739576018698e-06, "loss": 2.0276, "step": 24202 }, { "epoch": 0.81, "grad_norm": 0.7262831330299377, "learning_rate": 1.8741200351659805e-06, "loss": 2.0616, "step": 24203 }, { "epoch": 0.81, "grad_norm": 0.7490938305854797, "learning_rate": 1.8735005861146927e-06, "loss": 2.0727, "step": 24204 }, { "epoch": 0.81, "grad_norm": 0.765129804611206, "learning_rate": 1.8728812288718357e-06, "loss": 2.0386, "step": 24205 }, { "epoch": 0.81, "grad_norm": 0.7243542671203613, "learning_rate": 1.872261963444404e-06, "loss": 2.0339, "step": 24206 }, { "epoch": 0.81, "grad_norm": 0.7413199543952942, "learning_rate": 1.8716427898393896e-06, "loss": 1.9846, "step": 24207 }, { "epoch": 0.81, "grad_norm": 0.7519736886024475, "learning_rate": 1.8710237080637938e-06, "loss": 1.9928, "step": 24208 }, { "epoch": 0.81, "grad_norm": 0.7478240728378296, "learning_rate": 1.8704047181246065e-06, "loss": 2.0685, "step": 24209 }, { "epoch": 0.81, "grad_norm": 0.7507136464118958, "learning_rate": 1.8697858200288244e-06, "loss": 2.051, "step": 24210 }, { "epoch": 0.81, "grad_norm": 0.7429404258728027, "learning_rate": 1.869167013783435e-06, "loss": 2.0438, "step": 24211 }, { "epoch": 0.81, "grad_norm": 0.7333064079284668, "learning_rate": 1.8685482993954341e-06, "loss": 2.004, "step": 24212 }, { "epoch": 0.81, "grad_norm": 0.7925693988800049, "learning_rate": 1.867929676871806e-06, "loss": 2.0626, "step": 24213 }, { "epoch": 0.81, "grad_norm": 0.731850266456604, "learning_rate": 1.8673111462195449e-06, "loss": 1.987, "step": 24214 }, { "epoch": 0.81, "grad_norm": 0.7237173318862915, "learning_rate": 1.8666927074456365e-06, "loss": 1.9953, "step": 24215 }, { "epoch": 0.81, "grad_norm": 0.7579448819160461, "learning_rate": 1.8660743605570652e-06, "loss": 2.0952, "step": 24216 }, { "epoch": 0.81, "grad_norm": 0.7799676656723022, "learning_rate": 1.865456105560819e-06, "loss": 2.0487, "step": 24217 }, { "epoch": 0.81, "grad_norm": 0.7362228631973267, "learning_rate": 1.864837942463884e-06, "loss": 1.9845, "step": 24218 }, { "epoch": 0.81, "grad_norm": 0.7605487704277039, "learning_rate": 1.864219871273243e-06, "loss": 2.0962, "step": 24219 }, { "epoch": 0.81, "grad_norm": 0.7434645295143127, "learning_rate": 1.8636018919958753e-06, "loss": 2.02, "step": 24220 }, { "epoch": 0.81, "grad_norm": 0.8152747750282288, "learning_rate": 1.862984004638767e-06, "loss": 2.0741, "step": 24221 }, { "epoch": 0.81, "grad_norm": 0.7325339317321777, "learning_rate": 1.8623662092088945e-06, "loss": 2.0053, "step": 24222 }, { "epoch": 0.81, "grad_norm": 0.7410983443260193, "learning_rate": 1.861748505713239e-06, "loss": 2.0838, "step": 24223 }, { "epoch": 0.81, "grad_norm": 0.7392004728317261, "learning_rate": 1.8611308941587858e-06, "loss": 1.964, "step": 24224 }, { "epoch": 0.81, "grad_norm": 0.7942520976066589, "learning_rate": 1.8605133745524995e-06, "loss": 2.005, "step": 24225 }, { "epoch": 0.81, "grad_norm": 0.7314419150352478, "learning_rate": 1.8598959469013634e-06, "loss": 1.9997, "step": 24226 }, { "epoch": 0.81, "grad_norm": 0.7434040904045105, "learning_rate": 1.859278611212354e-06, "loss": 2.0482, "step": 24227 }, { "epoch": 0.81, "grad_norm": 0.7133690714836121, "learning_rate": 1.8586613674924447e-06, "loss": 1.9944, "step": 24228 }, { "epoch": 0.81, "grad_norm": 0.748551607131958, "learning_rate": 1.8580442157486056e-06, "loss": 2.014, "step": 24229 }, { "epoch": 0.81, "grad_norm": 0.7272062301635742, "learning_rate": 1.8574271559878144e-06, "loss": 2.0472, "step": 24230 }, { "epoch": 0.81, "grad_norm": 0.7297928333282471, "learning_rate": 1.8568101882170353e-06, "loss": 2.0206, "step": 24231 }, { "epoch": 0.81, "grad_norm": 0.7379338145256042, "learning_rate": 1.8561933124432451e-06, "loss": 2.0572, "step": 24232 }, { "epoch": 0.81, "grad_norm": 0.7397128939628601, "learning_rate": 1.8555765286734084e-06, "loss": 2.012, "step": 24233 }, { "epoch": 0.81, "grad_norm": 0.7521272897720337, "learning_rate": 1.8549598369144972e-06, "loss": 2.002, "step": 24234 }, { "epoch": 0.81, "grad_norm": 0.7513400912284851, "learning_rate": 1.8543432371734738e-06, "loss": 2.0356, "step": 24235 }, { "epoch": 0.81, "grad_norm": 0.7428647875785828, "learning_rate": 1.853726729457309e-06, "loss": 2.1105, "step": 24236 }, { "epoch": 0.81, "grad_norm": 0.7444289922714233, "learning_rate": 1.8531103137729656e-06, "loss": 2.0222, "step": 24237 }, { "epoch": 0.81, "grad_norm": 0.7444307804107666, "learning_rate": 1.8524939901274042e-06, "loss": 1.9939, "step": 24238 }, { "epoch": 0.81, "grad_norm": 0.7816635370254517, "learning_rate": 1.8518777585275916e-06, "loss": 2.0135, "step": 24239 }, { "epoch": 0.81, "grad_norm": 0.7352049946784973, "learning_rate": 1.851261618980491e-06, "loss": 2.0776, "step": 24240 }, { "epoch": 0.81, "grad_norm": 0.7782276272773743, "learning_rate": 1.8506455714930604e-06, "loss": 2.1488, "step": 24241 }, { "epoch": 0.81, "grad_norm": 0.7558673620223999, "learning_rate": 1.8500296160722586e-06, "loss": 2.0549, "step": 24242 }, { "epoch": 0.81, "grad_norm": 0.7305112481117249, "learning_rate": 1.8494137527250476e-06, "loss": 2.0475, "step": 24243 }, { "epoch": 0.81, "grad_norm": 0.7354997992515564, "learning_rate": 1.8487979814583812e-06, "loss": 2.0819, "step": 24244 }, { "epoch": 0.81, "grad_norm": 0.7304483652114868, "learning_rate": 1.8481823022792177e-06, "loss": 2.0333, "step": 24245 }, { "epoch": 0.81, "grad_norm": 0.7496348023414612, "learning_rate": 1.8475667151945187e-06, "loss": 1.999, "step": 24246 }, { "epoch": 0.81, "grad_norm": 0.7594172358512878, "learning_rate": 1.8469512202112283e-06, "loss": 1.9934, "step": 24247 }, { "epoch": 0.81, "grad_norm": 0.7247515320777893, "learning_rate": 1.8463358173363045e-06, "loss": 2.138, "step": 24248 }, { "epoch": 0.81, "grad_norm": 0.7295111417770386, "learning_rate": 1.8457205065767026e-06, "loss": 2.0598, "step": 24249 }, { "epoch": 0.81, "grad_norm": 0.7577369809150696, "learning_rate": 1.8451052879393715e-06, "loss": 2.0408, "step": 24250 }, { "epoch": 0.81, "grad_norm": 0.735059916973114, "learning_rate": 1.8444901614312593e-06, "loss": 2.086, "step": 24251 }, { "epoch": 0.81, "grad_norm": 0.7383008003234863, "learning_rate": 1.8438751270593202e-06, "loss": 2.0402, "step": 24252 }, { "epoch": 0.81, "grad_norm": 0.7569987177848816, "learning_rate": 1.8432601848304976e-06, "loss": 2.0279, "step": 24253 }, { "epoch": 0.81, "grad_norm": 0.7318865656852722, "learning_rate": 1.8426453347517403e-06, "loss": 2.0462, "step": 24254 }, { "epoch": 0.81, "grad_norm": 0.7530444860458374, "learning_rate": 1.8420305768299983e-06, "loss": 1.9845, "step": 24255 }, { "epoch": 0.81, "grad_norm": 0.7730318903923035, "learning_rate": 1.841415911072214e-06, "loss": 2.1055, "step": 24256 }, { "epoch": 0.81, "grad_norm": 0.7818357348442078, "learning_rate": 1.8408013374853284e-06, "loss": 2.0696, "step": 24257 }, { "epoch": 0.81, "grad_norm": 0.7372931838035583, "learning_rate": 1.8401868560762903e-06, "loss": 2.0524, "step": 24258 }, { "epoch": 0.81, "grad_norm": 0.79176926612854, "learning_rate": 1.8395724668520398e-06, "loss": 2.1298, "step": 24259 }, { "epoch": 0.81, "grad_norm": 0.7549452185630798, "learning_rate": 1.8389581698195136e-06, "loss": 2.0029, "step": 24260 }, { "epoch": 0.81, "grad_norm": 0.7337024211883545, "learning_rate": 1.8383439649856548e-06, "loss": 2.0061, "step": 24261 }, { "epoch": 0.81, "grad_norm": 0.8021436333656311, "learning_rate": 1.837729852357406e-06, "loss": 2.0347, "step": 24262 }, { "epoch": 0.81, "grad_norm": 0.7428921461105347, "learning_rate": 1.8371158319417015e-06, "loss": 2.0107, "step": 24263 }, { "epoch": 0.81, "grad_norm": 0.7261497974395752, "learning_rate": 1.8365019037454757e-06, "loss": 2.0697, "step": 24264 }, { "epoch": 0.81, "grad_norm": 0.77569180727005, "learning_rate": 1.8358880677756707e-06, "loss": 2.1293, "step": 24265 }, { "epoch": 0.81, "grad_norm": 0.7533355951309204, "learning_rate": 1.8352743240392135e-06, "loss": 2.1046, "step": 24266 }, { "epoch": 0.81, "grad_norm": 0.7461055517196655, "learning_rate": 1.8346606725430426e-06, "loss": 2.1007, "step": 24267 }, { "epoch": 0.81, "grad_norm": 0.7565174698829651, "learning_rate": 1.8340471132940962e-06, "loss": 2.0734, "step": 24268 }, { "epoch": 0.81, "grad_norm": 0.7349610924720764, "learning_rate": 1.833433646299293e-06, "loss": 2.0042, "step": 24269 }, { "epoch": 0.81, "grad_norm": 0.7646200060844421, "learning_rate": 1.832820271565572e-06, "loss": 2.1137, "step": 24270 }, { "epoch": 0.81, "grad_norm": 0.7635456919670105, "learning_rate": 1.832206989099863e-06, "loss": 2.0842, "step": 24271 }, { "epoch": 0.81, "grad_norm": 0.7432968020439148, "learning_rate": 1.8315937989090926e-06, "loss": 2.0698, "step": 24272 }, { "epoch": 0.81, "grad_norm": 0.7361358404159546, "learning_rate": 1.8309807010001856e-06, "loss": 2.0567, "step": 24273 }, { "epoch": 0.81, "grad_norm": 0.7535871863365173, "learning_rate": 1.8303676953800731e-06, "loss": 1.9669, "step": 24274 }, { "epoch": 0.81, "grad_norm": 0.7485437393188477, "learning_rate": 1.829754782055677e-06, "loss": 2.0713, "step": 24275 }, { "epoch": 0.81, "grad_norm": 0.7223719954490662, "learning_rate": 1.8291419610339222e-06, "loss": 2.0482, "step": 24276 }, { "epoch": 0.81, "grad_norm": 0.7448598146438599, "learning_rate": 1.8285292323217362e-06, "loss": 2.0152, "step": 24277 }, { "epoch": 0.81, "grad_norm": 0.7425165176391602, "learning_rate": 1.827916595926038e-06, "loss": 2.0454, "step": 24278 }, { "epoch": 0.81, "grad_norm": 0.7546934485435486, "learning_rate": 1.8273040518537466e-06, "loss": 2.0462, "step": 24279 }, { "epoch": 0.81, "grad_norm": 0.7458992004394531, "learning_rate": 1.8266916001117862e-06, "loss": 2.051, "step": 24280 }, { "epoch": 0.81, "grad_norm": 0.7696303725242615, "learning_rate": 1.8260792407070737e-06, "loss": 2.0479, "step": 24281 }, { "epoch": 0.81, "grad_norm": 0.7640809416770935, "learning_rate": 1.8254669736465257e-06, "loss": 2.0034, "step": 24282 }, { "epoch": 0.81, "grad_norm": 0.7493027448654175, "learning_rate": 1.8248547989370614e-06, "loss": 2.0333, "step": 24283 }, { "epoch": 0.81, "grad_norm": 0.729155421257019, "learning_rate": 1.8242427165855981e-06, "loss": 2.0756, "step": 24284 }, { "epoch": 0.81, "grad_norm": 0.7434857487678528, "learning_rate": 1.8236307265990493e-06, "loss": 2.0386, "step": 24285 }, { "epoch": 0.81, "grad_norm": 0.7426034212112427, "learning_rate": 1.8230188289843265e-06, "loss": 2.0366, "step": 24286 }, { "epoch": 0.81, "grad_norm": 0.7583605051040649, "learning_rate": 1.8224070237483471e-06, "loss": 2.0197, "step": 24287 }, { "epoch": 0.81, "grad_norm": 0.7473025918006897, "learning_rate": 1.821795310898019e-06, "loss": 2.0556, "step": 24288 }, { "epoch": 0.81, "grad_norm": 0.7454568147659302, "learning_rate": 1.8211836904402536e-06, "loss": 2.157, "step": 24289 }, { "epoch": 0.81, "grad_norm": 0.7470406293869019, "learning_rate": 1.8205721623819672e-06, "loss": 2.0367, "step": 24290 }, { "epoch": 0.81, "grad_norm": 0.7272387146949768, "learning_rate": 1.8199607267300568e-06, "loss": 2.0466, "step": 24291 }, { "epoch": 0.81, "grad_norm": 0.7336909174919128, "learning_rate": 1.8193493834914366e-06, "loss": 2.0512, "step": 24292 }, { "epoch": 0.81, "grad_norm": 0.7517204880714417, "learning_rate": 1.8187381326730158e-06, "loss": 2.0297, "step": 24293 }, { "epoch": 0.81, "grad_norm": 0.7709594964981079, "learning_rate": 1.8181269742816965e-06, "loss": 2.075, "step": 24294 }, { "epoch": 0.81, "grad_norm": 0.7747277021408081, "learning_rate": 1.8175159083243809e-06, "loss": 2.0304, "step": 24295 }, { "epoch": 0.81, "grad_norm": 0.7449919581413269, "learning_rate": 1.8169049348079782e-06, "loss": 2.0595, "step": 24296 }, { "epoch": 0.81, "grad_norm": 0.7367678880691528, "learning_rate": 1.8162940537393859e-06, "loss": 1.9926, "step": 24297 }, { "epoch": 0.81, "grad_norm": 0.7377228736877441, "learning_rate": 1.8156832651255064e-06, "loss": 2.0452, "step": 24298 }, { "epoch": 0.81, "grad_norm": 0.721007227897644, "learning_rate": 1.815072568973243e-06, "loss": 2.0229, "step": 24299 }, { "epoch": 0.81, "grad_norm": 0.7533445358276367, "learning_rate": 1.8144619652894936e-06, "loss": 2.0375, "step": 24300 }, { "epoch": 0.81, "grad_norm": 0.7165380716323853, "learning_rate": 1.8138514540811525e-06, "loss": 2.0677, "step": 24301 }, { "epoch": 0.81, "grad_norm": 0.7336419820785522, "learning_rate": 1.81324103535512e-06, "loss": 1.9819, "step": 24302 }, { "epoch": 0.81, "grad_norm": 0.7546417713165283, "learning_rate": 1.8126307091182982e-06, "loss": 2.054, "step": 24303 }, { "epoch": 0.81, "grad_norm": 0.7387623190879822, "learning_rate": 1.8120204753775717e-06, "loss": 2.0184, "step": 24304 }, { "epoch": 0.81, "grad_norm": 0.7303354740142822, "learning_rate": 1.811410334139838e-06, "loss": 2.0145, "step": 24305 }, { "epoch": 0.81, "grad_norm": 0.7097549438476562, "learning_rate": 1.8108002854119945e-06, "loss": 2.0453, "step": 24306 }, { "epoch": 0.81, "grad_norm": 0.7465159893035889, "learning_rate": 1.810190329200927e-06, "loss": 2.0736, "step": 24307 }, { "epoch": 0.81, "grad_norm": 0.7227551937103271, "learning_rate": 1.809580465513533e-06, "loss": 2.09, "step": 24308 }, { "epoch": 0.81, "grad_norm": 0.7380690574645996, "learning_rate": 1.8089706943566987e-06, "loss": 2.0706, "step": 24309 }, { "epoch": 0.81, "grad_norm": 0.7471098303794861, "learning_rate": 1.8083610157373098e-06, "loss": 2.0924, "step": 24310 }, { "epoch": 0.81, "grad_norm": 0.7550942897796631, "learning_rate": 1.8077514296622578e-06, "loss": 2.0029, "step": 24311 }, { "epoch": 0.81, "grad_norm": 0.7423034310340881, "learning_rate": 1.8071419361384335e-06, "loss": 2.0569, "step": 24312 }, { "epoch": 0.81, "grad_norm": 0.7246367931365967, "learning_rate": 1.8065325351727136e-06, "loss": 2.0088, "step": 24313 }, { "epoch": 0.81, "grad_norm": 0.7443142533302307, "learning_rate": 1.8059232267719872e-06, "loss": 2.0058, "step": 24314 }, { "epoch": 0.81, "grad_norm": 0.7584161758422852, "learning_rate": 1.805314010943141e-06, "loss": 2.0581, "step": 24315 }, { "epoch": 0.81, "grad_norm": 0.7913779020309448, "learning_rate": 1.804704887693054e-06, "loss": 2.1017, "step": 24316 }, { "epoch": 0.81, "grad_norm": 0.7487267255783081, "learning_rate": 1.804095857028606e-06, "loss": 2.0235, "step": 24317 }, { "epoch": 0.81, "grad_norm": 0.7411563992500305, "learning_rate": 1.8034869189566794e-06, "loss": 2.083, "step": 24318 }, { "epoch": 0.81, "grad_norm": 0.7459330558776855, "learning_rate": 1.8028780734841567e-06, "loss": 1.9975, "step": 24319 }, { "epoch": 0.81, "grad_norm": 0.7521233558654785, "learning_rate": 1.802269320617911e-06, "loss": 2.0539, "step": 24320 }, { "epoch": 0.81, "grad_norm": 0.7269390821456909, "learning_rate": 1.8016606603648246e-06, "loss": 2.0699, "step": 24321 }, { "epoch": 0.81, "grad_norm": 0.7520357966423035, "learning_rate": 1.8010520927317709e-06, "loss": 2.0059, "step": 24322 }, { "epoch": 0.81, "grad_norm": 0.7402631044387817, "learning_rate": 1.8004436177256236e-06, "loss": 2.0631, "step": 24323 }, { "epoch": 0.81, "grad_norm": 0.7807687520980835, "learning_rate": 1.7998352353532588e-06, "loss": 2.0677, "step": 24324 }, { "epoch": 0.81, "grad_norm": 0.748363196849823, "learning_rate": 1.799226945621555e-06, "loss": 2.0269, "step": 24325 }, { "epoch": 0.81, "grad_norm": 0.76650470495224, "learning_rate": 1.798618748537374e-06, "loss": 2.1512, "step": 24326 }, { "epoch": 0.81, "grad_norm": 0.7281331419944763, "learning_rate": 1.7980106441075917e-06, "loss": 1.9759, "step": 24327 }, { "epoch": 0.81, "grad_norm": 0.7175100445747375, "learning_rate": 1.7974026323390814e-06, "loss": 2.0111, "step": 24328 }, { "epoch": 0.81, "grad_norm": 0.7649305462837219, "learning_rate": 1.7967947132387054e-06, "loss": 2.1579, "step": 24329 }, { "epoch": 0.81, "grad_norm": 0.7481555342674255, "learning_rate": 1.7961868868133392e-06, "loss": 2.0544, "step": 24330 }, { "epoch": 0.81, "grad_norm": 0.7388166189193726, "learning_rate": 1.795579153069844e-06, "loss": 2.0633, "step": 24331 }, { "epoch": 0.81, "grad_norm": 0.7547290921211243, "learning_rate": 1.7949715120150856e-06, "loss": 2.1221, "step": 24332 }, { "epoch": 0.81, "grad_norm": 0.768934428691864, "learning_rate": 1.7943639636559306e-06, "loss": 2.0204, "step": 24333 }, { "epoch": 0.81, "grad_norm": 0.7694311141967773, "learning_rate": 1.7937565079992447e-06, "loss": 2.0482, "step": 24334 }, { "epoch": 0.81, "grad_norm": 0.7396800518035889, "learning_rate": 1.7931491450518879e-06, "loss": 2.0407, "step": 24335 }, { "epoch": 0.81, "grad_norm": 0.724143922328949, "learning_rate": 1.7925418748207212e-06, "loss": 2.0506, "step": 24336 }, { "epoch": 0.81, "grad_norm": 0.7323275208473206, "learning_rate": 1.7919346973126074e-06, "loss": 2.0657, "step": 24337 }, { "epoch": 0.81, "grad_norm": 0.7558744549751282, "learning_rate": 1.7913276125344038e-06, "loss": 2.0874, "step": 24338 }, { "epoch": 0.81, "grad_norm": 0.713801920413971, "learning_rate": 1.7907206204929716e-06, "loss": 2.0723, "step": 24339 }, { "epoch": 0.81, "grad_norm": 0.7538487911224365, "learning_rate": 1.7901137211951648e-06, "loss": 2.0333, "step": 24340 }, { "epoch": 0.81, "grad_norm": 0.7670883536338806, "learning_rate": 1.789506914647844e-06, "loss": 2.0802, "step": 24341 }, { "epoch": 0.81, "grad_norm": 0.7542937994003296, "learning_rate": 1.7889002008578593e-06, "loss": 2.0642, "step": 24342 }, { "epoch": 0.81, "grad_norm": 0.7365074157714844, "learning_rate": 1.7882935798320712e-06, "loss": 1.9792, "step": 24343 }, { "epoch": 0.81, "grad_norm": 0.7487741708755493, "learning_rate": 1.7876870515773292e-06, "loss": 2.075, "step": 24344 }, { "epoch": 0.81, "grad_norm": 0.7365841269493103, "learning_rate": 1.787080616100484e-06, "loss": 2.0661, "step": 24345 }, { "epoch": 0.81, "grad_norm": 0.7152438163757324, "learning_rate": 1.7864742734083884e-06, "loss": 1.9892, "step": 24346 }, { "epoch": 0.81, "grad_norm": 0.7796722054481506, "learning_rate": 1.7858680235078984e-06, "loss": 2.0126, "step": 24347 }, { "epoch": 0.81, "grad_norm": 0.7420482039451599, "learning_rate": 1.7852618664058518e-06, "loss": 2.0385, "step": 24348 }, { "epoch": 0.81, "grad_norm": 0.7448991537094116, "learning_rate": 1.7846558021091032e-06, "loss": 2.0307, "step": 24349 }, { "epoch": 0.81, "grad_norm": 0.7769578099250793, "learning_rate": 1.7840498306245001e-06, "loss": 2.0363, "step": 24350 }, { "epoch": 0.81, "grad_norm": 0.7710939645767212, "learning_rate": 1.7834439519588854e-06, "loss": 2.0827, "step": 24351 }, { "epoch": 0.81, "grad_norm": 0.7319108843803406, "learning_rate": 1.7828381661191075e-06, "loss": 2.0423, "step": 24352 }, { "epoch": 0.81, "grad_norm": 0.7591949701309204, "learning_rate": 1.7822324731120078e-06, "loss": 2.09, "step": 24353 }, { "epoch": 0.81, "grad_norm": 0.777458667755127, "learning_rate": 1.7816268729444287e-06, "loss": 2.1064, "step": 24354 }, { "epoch": 0.81, "grad_norm": 0.7271568775177002, "learning_rate": 1.7810213656232111e-06, "loss": 2.0306, "step": 24355 }, { "epoch": 0.81, "grad_norm": 0.746362030506134, "learning_rate": 1.7804159511552e-06, "loss": 1.9994, "step": 24356 }, { "epoch": 0.81, "grad_norm": 0.7325948476791382, "learning_rate": 1.7798106295472328e-06, "loss": 2.0616, "step": 24357 }, { "epoch": 0.81, "grad_norm": 0.753436267375946, "learning_rate": 1.7792054008061456e-06, "loss": 2.0934, "step": 24358 }, { "epoch": 0.81, "grad_norm": 0.7316683530807495, "learning_rate": 1.77860026493878e-06, "loss": 2.0174, "step": 24359 }, { "epoch": 0.81, "grad_norm": 0.7557212114334106, "learning_rate": 1.7779952219519669e-06, "loss": 2.0863, "step": 24360 }, { "epoch": 0.81, "grad_norm": 0.7890271544456482, "learning_rate": 1.7773902718525493e-06, "loss": 2.0682, "step": 24361 }, { "epoch": 0.81, "grad_norm": 0.7312085628509521, "learning_rate": 1.776785414647354e-06, "loss": 1.9849, "step": 24362 }, { "epoch": 0.81, "grad_norm": 0.7421086430549622, "learning_rate": 1.776180650343221e-06, "loss": 2.0384, "step": 24363 }, { "epoch": 0.81, "grad_norm": 0.7418829202651978, "learning_rate": 1.7755759789469762e-06, "loss": 2.0585, "step": 24364 }, { "epoch": 0.81, "grad_norm": 0.7719664573669434, "learning_rate": 1.7749714004654562e-06, "loss": 1.9915, "step": 24365 }, { "epoch": 0.81, "grad_norm": 0.7319421768188477, "learning_rate": 1.7743669149054898e-06, "loss": 2.0369, "step": 24366 }, { "epoch": 0.81, "grad_norm": 0.7165801525115967, "learning_rate": 1.773762522273903e-06, "loss": 2.0054, "step": 24367 }, { "epoch": 0.81, "grad_norm": 0.7573260068893433, "learning_rate": 1.7731582225775256e-06, "loss": 2.0323, "step": 24368 }, { "epoch": 0.81, "grad_norm": 0.7286785244941711, "learning_rate": 1.772554015823188e-06, "loss": 2.0224, "step": 24369 }, { "epoch": 0.81, "grad_norm": 0.7448521256446838, "learning_rate": 1.7719499020177122e-06, "loss": 2.0357, "step": 24370 }, { "epoch": 0.81, "grad_norm": 0.7336699366569519, "learning_rate": 1.771345881167923e-06, "loss": 2.079, "step": 24371 }, { "epoch": 0.81, "grad_norm": 0.7484866380691528, "learning_rate": 1.770741953280648e-06, "loss": 2.0529, "step": 24372 }, { "epoch": 0.81, "grad_norm": 0.7529163360595703, "learning_rate": 1.7701381183627052e-06, "loss": 2.0525, "step": 24373 }, { "epoch": 0.81, "grad_norm": 0.738771378993988, "learning_rate": 1.7695343764209205e-06, "loss": 2.0645, "step": 24374 }, { "epoch": 0.81, "grad_norm": 0.7359669804573059, "learning_rate": 1.7689307274621137e-06, "loss": 2.0542, "step": 24375 }, { "epoch": 0.81, "grad_norm": 0.7188895344734192, "learning_rate": 1.7683271714931005e-06, "loss": 2.0076, "step": 24376 }, { "epoch": 0.81, "grad_norm": 0.7403409481048584, "learning_rate": 1.7677237085207034e-06, "loss": 2.0245, "step": 24377 }, { "epoch": 0.81, "grad_norm": 0.7194327712059021, "learning_rate": 1.767120338551741e-06, "loss": 2.0119, "step": 24378 }, { "epoch": 0.81, "grad_norm": 0.7731668949127197, "learning_rate": 1.7665170615930295e-06, "loss": 2.045, "step": 24379 }, { "epoch": 0.81, "grad_norm": 0.7777301073074341, "learning_rate": 1.7659138776513784e-06, "loss": 2.0728, "step": 24380 }, { "epoch": 0.81, "grad_norm": 0.7535193562507629, "learning_rate": 1.7653107867336106e-06, "loss": 2.0004, "step": 24381 }, { "epoch": 0.81, "grad_norm": 0.729555070400238, "learning_rate": 1.7647077888465325e-06, "loss": 2.045, "step": 24382 }, { "epoch": 0.81, "grad_norm": 0.7446118593215942, "learning_rate": 1.764104883996962e-06, "loss": 2.0728, "step": 24383 }, { "epoch": 0.81, "grad_norm": 0.7420564293861389, "learning_rate": 1.7635020721917052e-06, "loss": 2.0523, "step": 24384 }, { "epoch": 0.81, "grad_norm": 0.7345011830329895, "learning_rate": 1.7628993534375783e-06, "loss": 2.0804, "step": 24385 }, { "epoch": 0.81, "grad_norm": 0.7475037574768066, "learning_rate": 1.7622967277413837e-06, "loss": 2.0768, "step": 24386 }, { "epoch": 0.81, "grad_norm": 0.7558488845825195, "learning_rate": 1.7616941951099354e-06, "loss": 2.0538, "step": 24387 }, { "epoch": 0.81, "grad_norm": 0.7316545248031616, "learning_rate": 1.7610917555500384e-06, "loss": 2.0174, "step": 24388 }, { "epoch": 0.81, "grad_norm": 0.7688521146774292, "learning_rate": 1.7604894090684955e-06, "loss": 2.0717, "step": 24389 }, { "epoch": 0.81, "grad_norm": 0.7228105068206787, "learning_rate": 1.7598871556721143e-06, "loss": 1.9627, "step": 24390 }, { "epoch": 0.81, "grad_norm": 0.7335205674171448, "learning_rate": 1.7592849953677016e-06, "loss": 2.0189, "step": 24391 }, { "epoch": 0.81, "grad_norm": 0.7589786052703857, "learning_rate": 1.7586829281620566e-06, "loss": 2.0804, "step": 24392 }, { "epoch": 0.81, "grad_norm": 0.7369424104690552, "learning_rate": 1.7580809540619803e-06, "loss": 2.0434, "step": 24393 }, { "epoch": 0.81, "grad_norm": 0.7552473545074463, "learning_rate": 1.7574790730742775e-06, "loss": 2.099, "step": 24394 }, { "epoch": 0.81, "grad_norm": 0.732036292552948, "learning_rate": 1.7568772852057436e-06, "loss": 2.0671, "step": 24395 }, { "epoch": 0.81, "grad_norm": 0.736139178276062, "learning_rate": 1.7562755904631811e-06, "loss": 2.0631, "step": 24396 }, { "epoch": 0.81, "grad_norm": 0.7451645135879517, "learning_rate": 1.7556739888533858e-06, "loss": 2.0529, "step": 24397 }, { "epoch": 0.81, "grad_norm": 0.7382019758224487, "learning_rate": 1.755072480383152e-06, "loss": 1.9722, "step": 24398 }, { "epoch": 0.81, "grad_norm": 0.7661020755767822, "learning_rate": 1.7544710650592767e-06, "loss": 1.9932, "step": 24399 }, { "epoch": 0.81, "grad_norm": 0.7296252250671387, "learning_rate": 1.7538697428885577e-06, "loss": 2.0048, "step": 24400 }, { "epoch": 0.81, "grad_norm": 0.7404804229736328, "learning_rate": 1.753268513877786e-06, "loss": 2.0838, "step": 24401 }, { "epoch": 0.81, "grad_norm": 0.7494199275970459, "learning_rate": 1.75266737803375e-06, "loss": 2.0682, "step": 24402 }, { "epoch": 0.81, "grad_norm": 0.7747084498405457, "learning_rate": 1.7520663353632461e-06, "loss": 2.0458, "step": 24403 }, { "epoch": 0.81, "grad_norm": 0.7472316026687622, "learning_rate": 1.7514653858730646e-06, "loss": 2.0449, "step": 24404 }, { "epoch": 0.81, "grad_norm": 0.7199827432632446, "learning_rate": 1.7508645295699922e-06, "loss": 2.0145, "step": 24405 }, { "epoch": 0.81, "grad_norm": 0.7447462677955627, "learning_rate": 1.750263766460817e-06, "loss": 2.0239, "step": 24406 }, { "epoch": 0.81, "grad_norm": 0.7584530115127563, "learning_rate": 1.7496630965523287e-06, "loss": 2.0929, "step": 24407 }, { "epoch": 0.81, "grad_norm": 0.7105960845947266, "learning_rate": 1.749062519851309e-06, "loss": 2.0289, "step": 24408 }, { "epoch": 0.81, "grad_norm": 0.7256761193275452, "learning_rate": 1.7484620363645477e-06, "loss": 1.9687, "step": 24409 }, { "epoch": 0.81, "grad_norm": 0.7138117551803589, "learning_rate": 1.7478616460988274e-06, "loss": 2.0241, "step": 24410 }, { "epoch": 0.81, "grad_norm": 0.7528107762336731, "learning_rate": 1.7472613490609259e-06, "loss": 2.0375, "step": 24411 }, { "epoch": 0.81, "grad_norm": 0.747986912727356, "learning_rate": 1.7466611452576299e-06, "loss": 2.1121, "step": 24412 }, { "epoch": 0.81, "grad_norm": 0.7571225762367249, "learning_rate": 1.746061034695723e-06, "loss": 2.0614, "step": 24413 }, { "epoch": 0.81, "grad_norm": 0.7674474120140076, "learning_rate": 1.7454610173819797e-06, "loss": 2.0404, "step": 24414 }, { "epoch": 0.81, "grad_norm": 0.7537508010864258, "learning_rate": 1.744861093323178e-06, "loss": 2.0357, "step": 24415 }, { "epoch": 0.81, "grad_norm": 0.7553339004516602, "learning_rate": 1.7442612625261003e-06, "loss": 2.0646, "step": 24416 }, { "epoch": 0.81, "grad_norm": 0.7386004328727722, "learning_rate": 1.7436615249975186e-06, "loss": 1.9859, "step": 24417 }, { "epoch": 0.81, "grad_norm": 0.7623127698898315, "learning_rate": 1.7430618807442123e-06, "loss": 2.031, "step": 24418 }, { "epoch": 0.81, "grad_norm": 0.7410336136817932, "learning_rate": 1.7424623297729515e-06, "loss": 1.985, "step": 24419 }, { "epoch": 0.81, "grad_norm": 0.7407962679862976, "learning_rate": 1.741862872090514e-06, "loss": 1.948, "step": 24420 }, { "epoch": 0.81, "grad_norm": 0.7624854445457458, "learning_rate": 1.7412635077036678e-06, "loss": 2.0483, "step": 24421 }, { "epoch": 0.81, "grad_norm": 0.7255503535270691, "learning_rate": 1.7406642366191883e-06, "loss": 1.9762, "step": 24422 }, { "epoch": 0.81, "grad_norm": 0.7390708327293396, "learning_rate": 1.740065058843845e-06, "loss": 2.1457, "step": 24423 }, { "epoch": 0.81, "grad_norm": 0.7563819289207458, "learning_rate": 1.7394659743844022e-06, "loss": 1.9916, "step": 24424 }, { "epoch": 0.81, "grad_norm": 0.7777734398841858, "learning_rate": 1.7388669832476324e-06, "loss": 2.0669, "step": 24425 }, { "epoch": 0.81, "grad_norm": 0.7708575129508972, "learning_rate": 1.7382680854403044e-06, "loss": 2.0379, "step": 24426 }, { "epoch": 0.81, "grad_norm": 0.7511506676673889, "learning_rate": 1.737669280969182e-06, "loss": 1.9981, "step": 24427 }, { "epoch": 0.81, "grad_norm": 0.7509694695472717, "learning_rate": 1.7370705698410261e-06, "loss": 2.1004, "step": 24428 }, { "epoch": 0.81, "grad_norm": 0.7440566420555115, "learning_rate": 1.7364719520626084e-06, "loss": 1.9691, "step": 24429 }, { "epoch": 0.81, "grad_norm": 0.7336593270301819, "learning_rate": 1.7358734276406841e-06, "loss": 2.0368, "step": 24430 }, { "epoch": 0.81, "grad_norm": 0.7451946139335632, "learning_rate": 1.7352749965820214e-06, "loss": 2.0238, "step": 24431 }, { "epoch": 0.81, "grad_norm": 0.7545853853225708, "learning_rate": 1.7346766588933783e-06, "loss": 2.0358, "step": 24432 }, { "epoch": 0.81, "grad_norm": 0.7318029403686523, "learning_rate": 1.7340784145815115e-06, "loss": 2.0155, "step": 24433 }, { "epoch": 0.81, "grad_norm": 0.723529577255249, "learning_rate": 1.7334802636531834e-06, "loss": 2.1013, "step": 24434 }, { "epoch": 0.81, "grad_norm": 0.747162401676178, "learning_rate": 1.7328822061151518e-06, "loss": 2.0689, "step": 24435 }, { "epoch": 0.81, "grad_norm": 0.7324395179748535, "learning_rate": 1.7322842419741726e-06, "loss": 2.0203, "step": 24436 }, { "epoch": 0.81, "grad_norm": 0.7566588521003723, "learning_rate": 1.731686371236999e-06, "loss": 2.1046, "step": 24437 }, { "epoch": 0.81, "grad_norm": 0.7557225227355957, "learning_rate": 1.7310885939103883e-06, "loss": 2.0895, "step": 24438 }, { "epoch": 0.81, "grad_norm": 0.7387061715126038, "learning_rate": 1.7304909100010902e-06, "loss": 2.0108, "step": 24439 }, { "epoch": 0.81, "grad_norm": 0.7409157752990723, "learning_rate": 1.729893319515863e-06, "loss": 2.0526, "step": 24440 }, { "epoch": 0.81, "grad_norm": 0.7638505697250366, "learning_rate": 1.7292958224614508e-06, "loss": 2.0655, "step": 24441 }, { "epoch": 0.81, "grad_norm": 0.7757713198661804, "learning_rate": 1.7286984188446098e-06, "loss": 2.0892, "step": 24442 }, { "epoch": 0.81, "grad_norm": 0.7393655776977539, "learning_rate": 1.7281011086720855e-06, "loss": 2.0184, "step": 24443 }, { "epoch": 0.81, "grad_norm": 0.7504480481147766, "learning_rate": 1.7275038919506283e-06, "loss": 2.0951, "step": 24444 }, { "epoch": 0.81, "grad_norm": 0.757816731929779, "learning_rate": 1.7269067686869835e-06, "loss": 2.0633, "step": 24445 }, { "epoch": 0.81, "grad_norm": 0.7457289099693298, "learning_rate": 1.7263097388878958e-06, "loss": 2.1361, "step": 24446 }, { "epoch": 0.81, "grad_norm": 0.7603092789649963, "learning_rate": 1.7257128025601123e-06, "loss": 2.0448, "step": 24447 }, { "epoch": 0.81, "grad_norm": 0.747394859790802, "learning_rate": 1.7251159597103784e-06, "loss": 2.0193, "step": 24448 }, { "epoch": 0.81, "grad_norm": 0.7410550713539124, "learning_rate": 1.7245192103454344e-06, "loss": 1.9671, "step": 24449 }, { "epoch": 0.81, "grad_norm": 0.7555094361305237, "learning_rate": 1.7239225544720205e-06, "loss": 1.9878, "step": 24450 }, { "epoch": 0.81, "grad_norm": 0.7479246258735657, "learning_rate": 1.7233259920968814e-06, "loss": 2.1862, "step": 24451 }, { "epoch": 0.81, "grad_norm": 0.7452386021614075, "learning_rate": 1.7227295232267517e-06, "loss": 2.1099, "step": 24452 }, { "epoch": 0.81, "grad_norm": 0.7817735075950623, "learning_rate": 1.7221331478683734e-06, "loss": 2.0426, "step": 24453 }, { "epoch": 0.81, "grad_norm": 0.7606265544891357, "learning_rate": 1.7215368660284892e-06, "loss": 2.1183, "step": 24454 }, { "epoch": 0.81, "grad_norm": 0.7569113969802856, "learning_rate": 1.7209406777138239e-06, "loss": 2.1123, "step": 24455 }, { "epoch": 0.81, "grad_norm": 0.7665259838104248, "learning_rate": 1.7203445829311194e-06, "loss": 2.0558, "step": 24456 }, { "epoch": 0.81, "grad_norm": 0.7542934417724609, "learning_rate": 1.7197485816871118e-06, "loss": 2.0138, "step": 24457 }, { "epoch": 0.81, "grad_norm": 0.7507808804512024, "learning_rate": 1.7191526739885312e-06, "loss": 2.1118, "step": 24458 }, { "epoch": 0.81, "grad_norm": 0.7366945147514343, "learning_rate": 1.7185568598421088e-06, "loss": 1.9905, "step": 24459 }, { "epoch": 0.81, "grad_norm": 0.7167713642120361, "learning_rate": 1.7179611392545803e-06, "loss": 2.0673, "step": 24460 }, { "epoch": 0.81, "grad_norm": 0.7365698218345642, "learning_rate": 1.7173655122326705e-06, "loss": 2.0804, "step": 24461 }, { "epoch": 0.81, "grad_norm": 0.7615119218826294, "learning_rate": 1.716769978783114e-06, "loss": 2.0751, "step": 24462 }, { "epoch": 0.81, "grad_norm": 0.7753332853317261, "learning_rate": 1.7161745389126328e-06, "loss": 2.0026, "step": 24463 }, { "epoch": 0.81, "grad_norm": 0.7452757954597473, "learning_rate": 1.7155791926279585e-06, "loss": 2.0641, "step": 24464 }, { "epoch": 0.81, "grad_norm": 0.7272971272468567, "learning_rate": 1.7149839399358136e-06, "loss": 2.0771, "step": 24465 }, { "epoch": 0.81, "grad_norm": 0.7489147782325745, "learning_rate": 1.7143887808429272e-06, "loss": 2.1361, "step": 24466 }, { "epoch": 0.81, "grad_norm": 0.7432457208633423, "learning_rate": 1.7137937153560213e-06, "loss": 2.0825, "step": 24467 }, { "epoch": 0.81, "grad_norm": 0.7778719663619995, "learning_rate": 1.7131987434818143e-06, "loss": 2.0472, "step": 24468 }, { "epoch": 0.81, "grad_norm": 0.7567726969718933, "learning_rate": 1.7126038652270316e-06, "loss": 2.075, "step": 24469 }, { "epoch": 0.81, "grad_norm": 0.7548190951347351, "learning_rate": 1.7120090805983957e-06, "loss": 2.057, "step": 24470 }, { "epoch": 0.81, "grad_norm": 0.728968620300293, "learning_rate": 1.7114143896026248e-06, "loss": 2.0193, "step": 24471 }, { "epoch": 0.81, "grad_norm": 0.7624780535697937, "learning_rate": 1.7108197922464332e-06, "loss": 2.0192, "step": 24472 }, { "epoch": 0.81, "grad_norm": 0.7407897710800171, "learning_rate": 1.7102252885365445e-06, "loss": 2.0273, "step": 24473 }, { "epoch": 0.81, "grad_norm": 0.8049396276473999, "learning_rate": 1.70963087847967e-06, "loss": 2.0163, "step": 24474 }, { "epoch": 0.81, "grad_norm": 0.7493434548377991, "learning_rate": 1.7090365620825266e-06, "loss": 2.0633, "step": 24475 }, { "epoch": 0.81, "grad_norm": 0.7303407788276672, "learning_rate": 1.7084423393518346e-06, "loss": 2.0232, "step": 24476 }, { "epoch": 0.81, "grad_norm": 0.7913075685501099, "learning_rate": 1.7078482102942963e-06, "loss": 2.0741, "step": 24477 }, { "epoch": 0.81, "grad_norm": 0.7184578776359558, "learning_rate": 1.7072541749166293e-06, "loss": 2.0333, "step": 24478 }, { "epoch": 0.81, "grad_norm": 0.7623928189277649, "learning_rate": 1.7066602332255477e-06, "loss": 2.0524, "step": 24479 }, { "epoch": 0.81, "grad_norm": 0.7485759854316711, "learning_rate": 1.706066385227758e-06, "loss": 2.0497, "step": 24480 }, { "epoch": 0.81, "grad_norm": 0.7492665648460388, "learning_rate": 1.7054726309299675e-06, "loss": 2.0434, "step": 24481 }, { "epoch": 0.81, "grad_norm": 0.7363805770874023, "learning_rate": 1.7048789703388878e-06, "loss": 2.0188, "step": 24482 }, { "epoch": 0.81, "grad_norm": 0.7778322100639343, "learning_rate": 1.7042854034612222e-06, "loss": 2.0066, "step": 24483 }, { "epoch": 0.81, "grad_norm": 0.7408059239387512, "learning_rate": 1.703691930303678e-06, "loss": 2.0571, "step": 24484 }, { "epoch": 0.81, "grad_norm": 0.7532210946083069, "learning_rate": 1.7030985508729624e-06, "loss": 1.9983, "step": 24485 }, { "epoch": 0.81, "grad_norm": 0.7453421950340271, "learning_rate": 1.702505265175778e-06, "loss": 1.9613, "step": 24486 }, { "epoch": 0.81, "grad_norm": 0.7172645330429077, "learning_rate": 1.7019120732188232e-06, "loss": 2.0386, "step": 24487 }, { "epoch": 0.81, "grad_norm": 0.7340813875198364, "learning_rate": 1.7013189750088032e-06, "loss": 1.9774, "step": 24488 }, { "epoch": 0.81, "grad_norm": 0.7267454862594604, "learning_rate": 1.7007259705524216e-06, "loss": 2.0986, "step": 24489 }, { "epoch": 0.81, "grad_norm": 0.7326659560203552, "learning_rate": 1.7001330598563704e-06, "loss": 2.0468, "step": 24490 }, { "epoch": 0.81, "grad_norm": 0.7686564922332764, "learning_rate": 1.699540242927351e-06, "loss": 1.9749, "step": 24491 }, { "epoch": 0.81, "grad_norm": 0.770440936088562, "learning_rate": 1.698947519772064e-06, "loss": 1.9953, "step": 24492 }, { "epoch": 0.81, "grad_norm": 0.7358247637748718, "learning_rate": 1.6983548903972036e-06, "loss": 2.0582, "step": 24493 }, { "epoch": 0.81, "grad_norm": 0.7365022897720337, "learning_rate": 1.6977623548094612e-06, "loss": 2.016, "step": 24494 }, { "epoch": 0.81, "grad_norm": 0.726660430431366, "learning_rate": 1.6971699130155361e-06, "loss": 1.9653, "step": 24495 }, { "epoch": 0.81, "grad_norm": 0.7718369960784912, "learning_rate": 1.6965775650221184e-06, "loss": 2.0485, "step": 24496 }, { "epoch": 0.82, "grad_norm": 0.7348105311393738, "learning_rate": 1.6959853108359004e-06, "loss": 2.0279, "step": 24497 }, { "epoch": 0.82, "grad_norm": 0.734720766544342, "learning_rate": 1.695393150463578e-06, "loss": 2.0755, "step": 24498 }, { "epoch": 0.82, "grad_norm": 0.7499367594718933, "learning_rate": 1.6948010839118323e-06, "loss": 2.0526, "step": 24499 }, { "epoch": 0.82, "grad_norm": 0.7268011569976807, "learning_rate": 1.694209111187357e-06, "loss": 2.0762, "step": 24500 }, { "epoch": 0.82, "grad_norm": 0.7263720035552979, "learning_rate": 1.6936172322968425e-06, "loss": 1.9717, "step": 24501 }, { "epoch": 0.82, "grad_norm": 0.7753645777702332, "learning_rate": 1.6930254472469709e-06, "loss": 2.0984, "step": 24502 }, { "epoch": 0.82, "grad_norm": 0.7486563324928284, "learning_rate": 1.692433756044428e-06, "loss": 2.0686, "step": 24503 }, { "epoch": 0.82, "grad_norm": 0.7719558477401733, "learning_rate": 1.6918421586959e-06, "loss": 2.0588, "step": 24504 }, { "epoch": 0.82, "grad_norm": 0.7465249300003052, "learning_rate": 1.691250655208072e-06, "loss": 2.0795, "step": 24505 }, { "epoch": 0.82, "grad_norm": 0.7705626487731934, "learning_rate": 1.6906592455876226e-06, "loss": 2.1261, "step": 24506 }, { "epoch": 0.82, "grad_norm": 0.7503687143325806, "learning_rate": 1.6900679298412381e-06, "loss": 1.9558, "step": 24507 }, { "epoch": 0.82, "grad_norm": 0.7387207746505737, "learning_rate": 1.6894767079755958e-06, "loss": 2.0652, "step": 24508 }, { "epoch": 0.82, "grad_norm": 0.740697979927063, "learning_rate": 1.6888855799973725e-06, "loss": 2.0591, "step": 24509 }, { "epoch": 0.82, "grad_norm": 0.770946741104126, "learning_rate": 1.6882945459132493e-06, "loss": 2.0607, "step": 24510 }, { "epoch": 0.82, "grad_norm": 0.7290916442871094, "learning_rate": 1.6877036057299078e-06, "loss": 2.0489, "step": 24511 }, { "epoch": 0.82, "grad_norm": 0.7611687183380127, "learning_rate": 1.6871127594540149e-06, "loss": 2.0628, "step": 24512 }, { "epoch": 0.82, "grad_norm": 0.7313723564147949, "learning_rate": 1.6865220070922495e-06, "loss": 1.9598, "step": 24513 }, { "epoch": 0.82, "grad_norm": 0.7581969499588013, "learning_rate": 1.6859313486512896e-06, "loss": 2.0088, "step": 24514 }, { "epoch": 0.82, "grad_norm": 0.7544588446617126, "learning_rate": 1.6853407841378022e-06, "loss": 2.03, "step": 24515 }, { "epoch": 0.82, "grad_norm": 0.7417830228805542, "learning_rate": 1.684750313558464e-06, "loss": 2.0674, "step": 24516 }, { "epoch": 0.82, "grad_norm": 0.7411788702011108, "learning_rate": 1.6841599369199435e-06, "loss": 2.0408, "step": 24517 }, { "epoch": 0.82, "grad_norm": 0.7454735636711121, "learning_rate": 1.6835696542289082e-06, "loss": 2.141, "step": 24518 }, { "epoch": 0.82, "grad_norm": 0.7467008829116821, "learning_rate": 1.6829794654920295e-06, "loss": 1.9263, "step": 24519 }, { "epoch": 0.82, "grad_norm": 0.7584688663482666, "learning_rate": 1.6823893707159755e-06, "loss": 2.1133, "step": 24520 }, { "epoch": 0.82, "grad_norm": 0.7470139861106873, "learning_rate": 1.6817993699074131e-06, "loss": 2.0816, "step": 24521 }, { "epoch": 0.82, "grad_norm": 0.7449947595596313, "learning_rate": 1.6812094630730037e-06, "loss": 2.0221, "step": 24522 }, { "epoch": 0.82, "grad_norm": 0.7774048447608948, "learning_rate": 1.6806196502194173e-06, "loss": 1.971, "step": 24523 }, { "epoch": 0.82, "grad_norm": 0.783819317817688, "learning_rate": 1.6800299313533142e-06, "loss": 2.069, "step": 24524 }, { "epoch": 0.82, "grad_norm": 0.7619116306304932, "learning_rate": 1.6794403064813536e-06, "loss": 2.0478, "step": 24525 }, { "epoch": 0.82, "grad_norm": 0.7302319407463074, "learning_rate": 1.6788507756102012e-06, "loss": 2.0364, "step": 24526 }, { "epoch": 0.82, "grad_norm": 0.7524310350418091, "learning_rate": 1.6782613387465185e-06, "loss": 2.0727, "step": 24527 }, { "epoch": 0.82, "grad_norm": 0.747408390045166, "learning_rate": 1.677671995896959e-06, "loss": 2.0217, "step": 24528 }, { "epoch": 0.82, "grad_norm": 0.7467747926712036, "learning_rate": 1.6770827470681872e-06, "loss": 2.0312, "step": 24529 }, { "epoch": 0.82, "grad_norm": 0.759817898273468, "learning_rate": 1.6764935922668569e-06, "loss": 2.1025, "step": 24530 }, { "epoch": 0.82, "grad_norm": 0.7594185471534729, "learning_rate": 1.6759045314996202e-06, "loss": 2.0648, "step": 24531 }, { "epoch": 0.82, "grad_norm": 0.7296598553657532, "learning_rate": 1.6753155647731367e-06, "loss": 2.0653, "step": 24532 }, { "epoch": 0.82, "grad_norm": 0.7643938660621643, "learning_rate": 1.6747266920940642e-06, "loss": 2.075, "step": 24533 }, { "epoch": 0.82, "grad_norm": 0.7425946593284607, "learning_rate": 1.674137913469045e-06, "loss": 2.0569, "step": 24534 }, { "epoch": 0.82, "grad_norm": 0.7510147094726562, "learning_rate": 1.6735492289047362e-06, "loss": 2.1427, "step": 24535 }, { "epoch": 0.82, "grad_norm": 0.7498977184295654, "learning_rate": 1.6729606384077912e-06, "loss": 2.1012, "step": 24536 }, { "epoch": 0.82, "grad_norm": 0.7713295817375183, "learning_rate": 1.6723721419848549e-06, "loss": 2.001, "step": 24537 }, { "epoch": 0.82, "grad_norm": 0.735720694065094, "learning_rate": 1.6717837396425795e-06, "loss": 2.0349, "step": 24538 }, { "epoch": 0.82, "grad_norm": 0.7762858271598816, "learning_rate": 1.67119543138761e-06, "loss": 2.0703, "step": 24539 }, { "epoch": 0.82, "grad_norm": 0.7564401626586914, "learning_rate": 1.6706072172265919e-06, "loss": 2.0349, "step": 24540 }, { "epoch": 0.82, "grad_norm": 0.732455313205719, "learning_rate": 1.6700190971661712e-06, "loss": 2.0174, "step": 24541 }, { "epoch": 0.82, "grad_norm": 0.7477617859840393, "learning_rate": 1.6694310712129958e-06, "loss": 2.0383, "step": 24542 }, { "epoch": 0.82, "grad_norm": 0.7415032982826233, "learning_rate": 1.668843139373706e-06, "loss": 2.0026, "step": 24543 }, { "epoch": 0.82, "grad_norm": 0.7218418121337891, "learning_rate": 1.668255301654942e-06, "loss": 2.0263, "step": 24544 }, { "epoch": 0.82, "grad_norm": 0.7402378916740417, "learning_rate": 1.6676675580633483e-06, "loss": 2.0089, "step": 24545 }, { "epoch": 0.82, "grad_norm": 0.7382590770721436, "learning_rate": 1.667079908605561e-06, "loss": 2.0129, "step": 24546 }, { "epoch": 0.82, "grad_norm": 0.7498797178268433, "learning_rate": 1.6664923532882239e-06, "loss": 2.1123, "step": 24547 }, { "epoch": 0.82, "grad_norm": 0.7568278908729553, "learning_rate": 1.6659048921179698e-06, "loss": 2.0583, "step": 24548 }, { "epoch": 0.82, "grad_norm": 0.7213453054428101, "learning_rate": 1.6653175251014397e-06, "loss": 1.9896, "step": 24549 }, { "epoch": 0.82, "grad_norm": 0.7355955839157104, "learning_rate": 1.6647302522452658e-06, "loss": 2.0218, "step": 24550 }, { "epoch": 0.82, "grad_norm": 0.7454813718795776, "learning_rate": 1.664143073556087e-06, "loss": 2.0689, "step": 24551 }, { "epoch": 0.82, "grad_norm": 0.732250452041626, "learning_rate": 1.6635559890405351e-06, "loss": 2.0107, "step": 24552 }, { "epoch": 0.82, "grad_norm": 0.7605438828468323, "learning_rate": 1.6629689987052388e-06, "loss": 2.0175, "step": 24553 }, { "epoch": 0.82, "grad_norm": 0.7370153665542603, "learning_rate": 1.6623821025568331e-06, "loss": 2.0675, "step": 24554 }, { "epoch": 0.82, "grad_norm": 0.764571487903595, "learning_rate": 1.6617953006019527e-06, "loss": 2.0031, "step": 24555 }, { "epoch": 0.82, "grad_norm": 0.7388578057289124, "learning_rate": 1.6612085928472177e-06, "loss": 2.1111, "step": 24556 }, { "epoch": 0.82, "grad_norm": 0.7645792365074158, "learning_rate": 1.6606219792992606e-06, "loss": 2.0759, "step": 24557 }, { "epoch": 0.82, "grad_norm": 0.734192430973053, "learning_rate": 1.6600354599647116e-06, "loss": 2.0682, "step": 24558 }, { "epoch": 0.82, "grad_norm": 0.729652464389801, "learning_rate": 1.6594490348501913e-06, "loss": 2.0657, "step": 24559 }, { "epoch": 0.82, "grad_norm": 0.7525078058242798, "learning_rate": 1.6588627039623317e-06, "loss": 2.0815, "step": 24560 }, { "epoch": 0.82, "grad_norm": 0.7328199744224548, "learning_rate": 1.6582764673077511e-06, "loss": 2.0226, "step": 24561 }, { "epoch": 0.82, "grad_norm": 0.7466614246368408, "learning_rate": 1.657690324893073e-06, "loss": 2.0374, "step": 24562 }, { "epoch": 0.82, "grad_norm": 0.749165415763855, "learning_rate": 1.65710427672492e-06, "loss": 2.0256, "step": 24563 }, { "epoch": 0.82, "grad_norm": 0.7522661685943604, "learning_rate": 1.656518322809917e-06, "loss": 2.0906, "step": 24564 }, { "epoch": 0.82, "grad_norm": 0.761629581451416, "learning_rate": 1.6559324631546792e-06, "loss": 2.0879, "step": 24565 }, { "epoch": 0.82, "grad_norm": 0.7588980197906494, "learning_rate": 1.6553466977658238e-06, "loss": 2.0592, "step": 24566 }, { "epoch": 0.82, "grad_norm": 0.7484273910522461, "learning_rate": 1.6547610266499736e-06, "loss": 2.0421, "step": 24567 }, { "epoch": 0.82, "grad_norm": 0.7427703738212585, "learning_rate": 1.6541754498137396e-06, "loss": 2.0455, "step": 24568 }, { "epoch": 0.82, "grad_norm": 0.7355174422264099, "learning_rate": 1.6535899672637435e-06, "loss": 2.0351, "step": 24569 }, { "epoch": 0.82, "grad_norm": 0.7615810036659241, "learning_rate": 1.653004579006594e-06, "loss": 2.052, "step": 24570 }, { "epoch": 0.82, "grad_norm": 0.7332801222801208, "learning_rate": 1.6524192850489096e-06, "loss": 1.9977, "step": 24571 }, { "epoch": 0.82, "grad_norm": 0.7462335228919983, "learning_rate": 1.6518340853972969e-06, "loss": 1.9745, "step": 24572 }, { "epoch": 0.82, "grad_norm": 0.7507336735725403, "learning_rate": 1.651248980058373e-06, "loss": 2.0417, "step": 24573 }, { "epoch": 0.82, "grad_norm": 0.7614277601242065, "learning_rate": 1.650663969038745e-06, "loss": 2.0428, "step": 24574 }, { "epoch": 0.82, "grad_norm": 0.7457935214042664, "learning_rate": 1.6500790523450195e-06, "loss": 2.0367, "step": 24575 }, { "epoch": 0.82, "grad_norm": 0.7891905307769775, "learning_rate": 1.649494229983808e-06, "loss": 2.111, "step": 24576 }, { "epoch": 0.82, "grad_norm": 0.7668673396110535, "learning_rate": 1.6489095019617185e-06, "loss": 1.9939, "step": 24577 }, { "epoch": 0.82, "grad_norm": 0.7392739653587341, "learning_rate": 1.6483248682853558e-06, "loss": 2.1285, "step": 24578 }, { "epoch": 0.82, "grad_norm": 0.7716066837310791, "learning_rate": 1.6477403289613215e-06, "loss": 2.0639, "step": 24579 }, { "epoch": 0.82, "grad_norm": 0.7748960256576538, "learning_rate": 1.6471558839962253e-06, "loss": 2.0407, "step": 24580 }, { "epoch": 0.82, "grad_norm": 0.7532209753990173, "learning_rate": 1.6465715333966636e-06, "loss": 2.1106, "step": 24581 }, { "epoch": 0.82, "grad_norm": 0.7628597617149353, "learning_rate": 1.645987277169243e-06, "loss": 2.0539, "step": 24582 }, { "epoch": 0.82, "grad_norm": 0.7131446003913879, "learning_rate": 1.645403115320563e-06, "loss": 2.0718, "step": 24583 }, { "epoch": 0.82, "grad_norm": 0.7546490430831909, "learning_rate": 1.64481904785722e-06, "loss": 2.1012, "step": 24584 }, { "epoch": 0.82, "grad_norm": 0.7365564703941345, "learning_rate": 1.6442350747858139e-06, "loss": 2.0327, "step": 24585 }, { "epoch": 0.82, "grad_norm": 0.7771188020706177, "learning_rate": 1.6436511961129464e-06, "loss": 2.0507, "step": 24586 }, { "epoch": 0.82, "grad_norm": 0.7132853865623474, "learning_rate": 1.6430674118452095e-06, "loss": 2.0557, "step": 24587 }, { "epoch": 0.82, "grad_norm": 0.7698830366134644, "learning_rate": 1.6424837219891976e-06, "loss": 2.0177, "step": 24588 }, { "epoch": 0.82, "grad_norm": 0.7774745225906372, "learning_rate": 1.6419001265515067e-06, "loss": 2.0127, "step": 24589 }, { "epoch": 0.82, "grad_norm": 0.733350932598114, "learning_rate": 1.6413166255387313e-06, "loss": 2.0329, "step": 24590 }, { "epoch": 0.82, "grad_norm": 0.7675386667251587, "learning_rate": 1.6407332189574632e-06, "loss": 2.1012, "step": 24591 }, { "epoch": 0.82, "grad_norm": 0.712372362613678, "learning_rate": 1.640149906814289e-06, "loss": 2.0402, "step": 24592 }, { "epoch": 0.82, "grad_norm": 0.7344722151756287, "learning_rate": 1.6395666891158046e-06, "loss": 2.0297, "step": 24593 }, { "epoch": 0.82, "grad_norm": 0.7221224308013916, "learning_rate": 1.6389835658685938e-06, "loss": 2.0168, "step": 24594 }, { "epoch": 0.82, "grad_norm": 0.7723449468612671, "learning_rate": 1.6384005370792478e-06, "loss": 2.0901, "step": 24595 }, { "epoch": 0.82, "grad_norm": 0.7529548406600952, "learning_rate": 1.6378176027543535e-06, "loss": 2.06, "step": 24596 }, { "epoch": 0.82, "grad_norm": 0.7539964914321899, "learning_rate": 1.6372347629004924e-06, "loss": 2.0227, "step": 24597 }, { "epoch": 0.82, "grad_norm": 0.7455288171768188, "learning_rate": 1.6366520175242518e-06, "loss": 2.0104, "step": 24598 }, { "epoch": 0.82, "grad_norm": 0.763960063457489, "learning_rate": 1.6360693666322181e-06, "loss": 2.0274, "step": 24599 }, { "epoch": 0.82, "grad_norm": 0.723447322845459, "learning_rate": 1.6354868102309696e-06, "loss": 2.0127, "step": 24600 }, { "epoch": 0.82, "grad_norm": 0.7618283629417419, "learning_rate": 1.6349043483270876e-06, "loss": 2.097, "step": 24601 }, { "epoch": 0.82, "grad_norm": 0.7468940615653992, "learning_rate": 1.634321980927157e-06, "loss": 2.0887, "step": 24602 }, { "epoch": 0.82, "grad_norm": 0.7379149794578552, "learning_rate": 1.6337397080377503e-06, "loss": 2.1098, "step": 24603 }, { "epoch": 0.82, "grad_norm": 0.7264783382415771, "learning_rate": 1.6331575296654522e-06, "loss": 2.0211, "step": 24604 }, { "epoch": 0.82, "grad_norm": 0.7703182101249695, "learning_rate": 1.6325754458168341e-06, "loss": 2.04, "step": 24605 }, { "epoch": 0.82, "grad_norm": 0.774222195148468, "learning_rate": 1.6319934564984774e-06, "loss": 2.059, "step": 24606 }, { "epoch": 0.82, "grad_norm": 0.7240437269210815, "learning_rate": 1.6314115617169523e-06, "loss": 2.0653, "step": 24607 }, { "epoch": 0.82, "grad_norm": 0.7502554655075073, "learning_rate": 1.630829761478837e-06, "loss": 1.9935, "step": 24608 }, { "epoch": 0.82, "grad_norm": 0.7653800249099731, "learning_rate": 1.630248055790703e-06, "loss": 2.0466, "step": 24609 }, { "epoch": 0.82, "grad_norm": 0.7408431768417358, "learning_rate": 1.6296664446591181e-06, "loss": 2.1171, "step": 24610 }, { "epoch": 0.82, "grad_norm": 0.7434965372085571, "learning_rate": 1.6290849280906573e-06, "loss": 2.0596, "step": 24611 }, { "epoch": 0.82, "grad_norm": 0.7596830129623413, "learning_rate": 1.6285035060918908e-06, "loss": 2.024, "step": 24612 }, { "epoch": 0.82, "grad_norm": 0.768607497215271, "learning_rate": 1.6279221786693844e-06, "loss": 2.0239, "step": 24613 }, { "epoch": 0.82, "grad_norm": 0.7548766732215881, "learning_rate": 1.6273409458297063e-06, "loss": 2.0567, "step": 24614 }, { "epoch": 0.82, "grad_norm": 0.7422938942909241, "learning_rate": 1.6267598075794244e-06, "loss": 1.979, "step": 24615 }, { "epoch": 0.82, "grad_norm": 0.7561812996864319, "learning_rate": 1.6261787639251003e-06, "loss": 2.0135, "step": 24616 }, { "epoch": 0.82, "grad_norm": 0.7785735726356506, "learning_rate": 1.6255978148733042e-06, "loss": 2.0132, "step": 24617 }, { "epoch": 0.82, "grad_norm": 0.7781122326850891, "learning_rate": 1.6250169604305966e-06, "loss": 1.9659, "step": 24618 }, { "epoch": 0.82, "grad_norm": 0.7320306301116943, "learning_rate": 1.6244362006035363e-06, "loss": 2.0405, "step": 24619 }, { "epoch": 0.82, "grad_norm": 0.7523961067199707, "learning_rate": 1.6238555353986863e-06, "loss": 2.0589, "step": 24620 }, { "epoch": 0.82, "grad_norm": 0.7618944644927979, "learning_rate": 1.6232749648226109e-06, "loss": 2.0587, "step": 24621 }, { "epoch": 0.82, "grad_norm": 0.7323446869850159, "learning_rate": 1.6226944888818651e-06, "loss": 2.0014, "step": 24622 }, { "epoch": 0.82, "grad_norm": 0.7585964798927307, "learning_rate": 1.622114107583006e-06, "loss": 1.9995, "step": 24623 }, { "epoch": 0.82, "grad_norm": 0.7511516213417053, "learning_rate": 1.6215338209325938e-06, "loss": 2.0951, "step": 24624 }, { "epoch": 0.82, "grad_norm": 0.7365175485610962, "learning_rate": 1.6209536289371796e-06, "loss": 1.9801, "step": 24625 }, { "epoch": 0.82, "grad_norm": 0.7180129885673523, "learning_rate": 1.6203735316033231e-06, "loss": 2.0315, "step": 24626 }, { "epoch": 0.82, "grad_norm": 0.7375575304031372, "learning_rate": 1.6197935289375733e-06, "loss": 2.0412, "step": 24627 }, { "epoch": 0.82, "grad_norm": 0.7332069873809814, "learning_rate": 1.619213620946487e-06, "loss": 2.0702, "step": 24628 }, { "epoch": 0.82, "grad_norm": 0.7303208112716675, "learning_rate": 1.6186338076366115e-06, "loss": 2.0235, "step": 24629 }, { "epoch": 0.82, "grad_norm": 0.7562184929847717, "learning_rate": 1.6180540890145014e-06, "loss": 2.0742, "step": 24630 }, { "epoch": 0.82, "grad_norm": 0.707876443862915, "learning_rate": 1.6174744650867036e-06, "loss": 2.0176, "step": 24631 }, { "epoch": 0.82, "grad_norm": 0.796810507774353, "learning_rate": 1.6168949358597652e-06, "loss": 2.0847, "step": 24632 }, { "epoch": 0.82, "grad_norm": 0.7438734769821167, "learning_rate": 1.6163155013402331e-06, "loss": 2.0357, "step": 24633 }, { "epoch": 0.82, "grad_norm": 0.7598115801811218, "learning_rate": 1.6157361615346589e-06, "loss": 2.014, "step": 24634 }, { "epoch": 0.82, "grad_norm": 0.7524226903915405, "learning_rate": 1.615156916449584e-06, "loss": 1.9684, "step": 24635 }, { "epoch": 0.82, "grad_norm": 0.7687348127365112, "learning_rate": 1.6145777660915496e-06, "loss": 2.0852, "step": 24636 }, { "epoch": 0.82, "grad_norm": 0.7468103766441345, "learning_rate": 1.613998710467104e-06, "loss": 2.0784, "step": 24637 }, { "epoch": 0.82, "grad_norm": 0.7184838056564331, "learning_rate": 1.613419749582783e-06, "loss": 2.0654, "step": 24638 }, { "epoch": 0.82, "grad_norm": 0.7395575046539307, "learning_rate": 1.6128408834451336e-06, "loss": 2.0154, "step": 24639 }, { "epoch": 0.82, "grad_norm": 0.7380672097206116, "learning_rate": 1.6122621120606929e-06, "loss": 2.0648, "step": 24640 }, { "epoch": 0.82, "grad_norm": 0.7515471577644348, "learning_rate": 1.6116834354359968e-06, "loss": 2.0914, "step": 24641 }, { "epoch": 0.82, "grad_norm": 0.7638505101203918, "learning_rate": 1.6111048535775842e-06, "loss": 2.0562, "step": 24642 }, { "epoch": 0.82, "grad_norm": 0.7853606343269348, "learning_rate": 1.6105263664919957e-06, "loss": 2.0915, "step": 24643 }, { "epoch": 0.82, "grad_norm": 0.7652499079704285, "learning_rate": 1.6099479741857639e-06, "loss": 2.0473, "step": 24644 }, { "epoch": 0.82, "grad_norm": 0.7278482913970947, "learning_rate": 1.6093696766654199e-06, "loss": 2.0304, "step": 24645 }, { "epoch": 0.82, "grad_norm": 0.7803865075111389, "learning_rate": 1.608791473937502e-06, "loss": 2.0633, "step": 24646 }, { "epoch": 0.82, "grad_norm": 0.753241777420044, "learning_rate": 1.6082133660085386e-06, "loss": 2.0355, "step": 24647 }, { "epoch": 0.82, "grad_norm": 0.7282392382621765, "learning_rate": 1.607635352885064e-06, "loss": 1.996, "step": 24648 }, { "epoch": 0.82, "grad_norm": 0.7931697368621826, "learning_rate": 1.6070574345736056e-06, "loss": 2.0253, "step": 24649 }, { "epoch": 0.82, "grad_norm": 0.7466763257980347, "learning_rate": 1.6064796110806945e-06, "loss": 2.1404, "step": 24650 }, { "epoch": 0.82, "grad_norm": 0.7644801139831543, "learning_rate": 1.605901882412857e-06, "loss": 2.0728, "step": 24651 }, { "epoch": 0.82, "grad_norm": 0.7290539741516113, "learning_rate": 1.605324248576622e-06, "loss": 2.049, "step": 24652 }, { "epoch": 0.82, "grad_norm": 0.7890642881393433, "learning_rate": 1.6047467095785142e-06, "loss": 2.0635, "step": 24653 }, { "epoch": 0.82, "grad_norm": 0.7434118390083313, "learning_rate": 1.6041692654250551e-06, "loss": 2.0504, "step": 24654 }, { "epoch": 0.82, "grad_norm": 0.7930094003677368, "learning_rate": 1.603591916122771e-06, "loss": 2.0756, "step": 24655 }, { "epoch": 0.82, "grad_norm": 0.7471768260002136, "learning_rate": 1.6030146616781882e-06, "loss": 2.0862, "step": 24656 }, { "epoch": 0.82, "grad_norm": 0.7720146179199219, "learning_rate": 1.6024375020978234e-06, "loss": 2.0835, "step": 24657 }, { "epoch": 0.82, "grad_norm": 0.7535285949707031, "learning_rate": 1.6018604373881963e-06, "loss": 2.001, "step": 24658 }, { "epoch": 0.82, "grad_norm": 0.7511516809463501, "learning_rate": 1.601283467555831e-06, "loss": 2.0435, "step": 24659 }, { "epoch": 0.82, "grad_norm": 0.7551955580711365, "learning_rate": 1.6007065926072406e-06, "loss": 2.0902, "step": 24660 }, { "epoch": 0.82, "grad_norm": 0.759741485118866, "learning_rate": 1.600129812548944e-06, "loss": 2.0654, "step": 24661 }, { "epoch": 0.82, "grad_norm": 0.7355949878692627, "learning_rate": 1.5995531273874632e-06, "loss": 1.9998, "step": 24662 }, { "epoch": 0.82, "grad_norm": 0.7355269193649292, "learning_rate": 1.5989765371293032e-06, "loss": 2.0449, "step": 24663 }, { "epoch": 0.82, "grad_norm": 0.7465232610702515, "learning_rate": 1.598400041780982e-06, "loss": 2.0437, "step": 24664 }, { "epoch": 0.82, "grad_norm": 0.7346485257148743, "learning_rate": 1.5978236413490166e-06, "loss": 2.045, "step": 24665 }, { "epoch": 0.82, "grad_norm": 0.7511296272277832, "learning_rate": 1.5972473358399153e-06, "loss": 2.0944, "step": 24666 }, { "epoch": 0.82, "grad_norm": 0.7130091190338135, "learning_rate": 1.5966711252601874e-06, "loss": 2.0097, "step": 24667 }, { "epoch": 0.82, "grad_norm": 0.7297332882881165, "learning_rate": 1.5960950096163453e-06, "loss": 1.9637, "step": 24668 }, { "epoch": 0.82, "grad_norm": 0.7632662057876587, "learning_rate": 1.5955189889148948e-06, "loss": 2.0724, "step": 24669 }, { "epoch": 0.82, "grad_norm": 0.760145366191864, "learning_rate": 1.5949430631623487e-06, "loss": 2.0578, "step": 24670 }, { "epoch": 0.82, "grad_norm": 0.7408374547958374, "learning_rate": 1.594367232365206e-06, "loss": 1.9997, "step": 24671 }, { "epoch": 0.82, "grad_norm": 0.7327712178230286, "learning_rate": 1.5937914965299794e-06, "loss": 1.9802, "step": 24672 }, { "epoch": 0.82, "grad_norm": 0.7711045742034912, "learning_rate": 1.5932158556631672e-06, "loss": 2.0774, "step": 24673 }, { "epoch": 0.82, "grad_norm": 0.7370678186416626, "learning_rate": 1.5926403097712784e-06, "loss": 2.0583, "step": 24674 }, { "epoch": 0.82, "grad_norm": 0.7402954697608948, "learning_rate": 1.5920648588608112e-06, "loss": 2.0183, "step": 24675 }, { "epoch": 0.82, "grad_norm": 0.7355253100395203, "learning_rate": 1.591489502938266e-06, "loss": 2.0246, "step": 24676 }, { "epoch": 0.82, "grad_norm": 0.7816030979156494, "learning_rate": 1.5909142420101442e-06, "loss": 2.0399, "step": 24677 }, { "epoch": 0.82, "grad_norm": 0.7639210224151611, "learning_rate": 1.5903390760829484e-06, "loss": 2.0457, "step": 24678 }, { "epoch": 0.82, "grad_norm": 0.7285211682319641, "learning_rate": 1.5897640051631724e-06, "loss": 2.0555, "step": 24679 }, { "epoch": 0.82, "grad_norm": 0.7460406422615051, "learning_rate": 1.589189029257311e-06, "loss": 2.0524, "step": 24680 }, { "epoch": 0.82, "grad_norm": 0.750711977481842, "learning_rate": 1.5886141483718665e-06, "loss": 2.0406, "step": 24681 }, { "epoch": 0.82, "grad_norm": 0.7633406519889832, "learning_rate": 1.588039362513326e-06, "loss": 2.0773, "step": 24682 }, { "epoch": 0.82, "grad_norm": 0.749678373336792, "learning_rate": 1.587464671688187e-06, "loss": 2.0937, "step": 24683 }, { "epoch": 0.82, "grad_norm": 0.7542497515678406, "learning_rate": 1.5868900759029472e-06, "loss": 2.0733, "step": 24684 }, { "epoch": 0.82, "grad_norm": 0.7439835071563721, "learning_rate": 1.5863155751640879e-06, "loss": 2.0887, "step": 24685 }, { "epoch": 0.82, "grad_norm": 0.7492047548294067, "learning_rate": 1.5857411694781044e-06, "loss": 2.0871, "step": 24686 }, { "epoch": 0.82, "grad_norm": 0.7941245436668396, "learning_rate": 1.5851668588514878e-06, "loss": 2.1572, "step": 24687 }, { "epoch": 0.82, "grad_norm": 0.7431154847145081, "learning_rate": 1.5845926432907256e-06, "loss": 2.108, "step": 24688 }, { "epoch": 0.82, "grad_norm": 0.774812638759613, "learning_rate": 1.5840185228022997e-06, "loss": 2.0755, "step": 24689 }, { "epoch": 0.82, "grad_norm": 0.7613157629966736, "learning_rate": 1.5834444973927043e-06, "loss": 2.1219, "step": 24690 }, { "epoch": 0.82, "grad_norm": 0.7448336482048035, "learning_rate": 1.5828705670684174e-06, "loss": 2.0647, "step": 24691 }, { "epoch": 0.82, "grad_norm": 0.7452444434165955, "learning_rate": 1.582296731835925e-06, "loss": 2.0769, "step": 24692 }, { "epoch": 0.82, "grad_norm": 0.7306809425354004, "learning_rate": 1.581722991701714e-06, "loss": 2.0093, "step": 24693 }, { "epoch": 0.82, "grad_norm": 0.7399933338165283, "learning_rate": 1.5811493466722638e-06, "loss": 2.018, "step": 24694 }, { "epoch": 0.82, "grad_norm": 0.7592442035675049, "learning_rate": 1.5805757967540514e-06, "loss": 1.9878, "step": 24695 }, { "epoch": 0.82, "grad_norm": 0.7524027228355408, "learning_rate": 1.5800023419535592e-06, "loss": 2.0334, "step": 24696 }, { "epoch": 0.82, "grad_norm": 0.7307648658752441, "learning_rate": 1.57942898227727e-06, "loss": 2.0556, "step": 24697 }, { "epoch": 0.82, "grad_norm": 0.7876189351081848, "learning_rate": 1.5788557177316533e-06, "loss": 2.0471, "step": 24698 }, { "epoch": 0.82, "grad_norm": 0.7385796904563904, "learning_rate": 1.57828254832319e-06, "loss": 2.0906, "step": 24699 }, { "epoch": 0.82, "grad_norm": 0.729205310344696, "learning_rate": 1.5777094740583566e-06, "loss": 2.1308, "step": 24700 }, { "epoch": 0.82, "grad_norm": 0.7614633440971375, "learning_rate": 1.5771364949436251e-06, "loss": 1.9896, "step": 24701 }, { "epoch": 0.82, "grad_norm": 0.753969132900238, "learning_rate": 1.5765636109854676e-06, "loss": 2.0598, "step": 24702 }, { "epoch": 0.82, "grad_norm": 0.7359817624092102, "learning_rate": 1.5759908221903596e-06, "loss": 2.0906, "step": 24703 }, { "epoch": 0.82, "grad_norm": 0.7458236217498779, "learning_rate": 1.5754181285647684e-06, "loss": 1.9773, "step": 24704 }, { "epoch": 0.82, "grad_norm": 0.7319135665893555, "learning_rate": 1.5748455301151655e-06, "loss": 2.0767, "step": 24705 }, { "epoch": 0.82, "grad_norm": 0.7432857155799866, "learning_rate": 1.5742730268480232e-06, "loss": 2.0654, "step": 24706 }, { "epoch": 0.82, "grad_norm": 0.7468056082725525, "learning_rate": 1.5737006187698055e-06, "loss": 2.0182, "step": 24707 }, { "epoch": 0.82, "grad_norm": 0.7329849600791931, "learning_rate": 1.5731283058869785e-06, "loss": 2.059, "step": 24708 }, { "epoch": 0.82, "grad_norm": 0.8048843741416931, "learning_rate": 1.5725560882060108e-06, "loss": 2.0675, "step": 24709 }, { "epoch": 0.82, "grad_norm": 0.7300288677215576, "learning_rate": 1.5719839657333657e-06, "loss": 1.9894, "step": 24710 }, { "epoch": 0.82, "grad_norm": 0.7429967522621155, "learning_rate": 1.5714119384755044e-06, "loss": 2.0676, "step": 24711 }, { "epoch": 0.82, "grad_norm": 0.7354058623313904, "learning_rate": 1.5708400064388907e-06, "loss": 2.0972, "step": 24712 }, { "epoch": 0.82, "grad_norm": 0.7282070517539978, "learning_rate": 1.5702681696299893e-06, "loss": 2.0124, "step": 24713 }, { "epoch": 0.82, "grad_norm": 0.7163010239601135, "learning_rate": 1.569696428055255e-06, "loss": 2.0033, "step": 24714 }, { "epoch": 0.82, "grad_norm": 0.7472782135009766, "learning_rate": 1.569124781721153e-06, "loss": 2.0888, "step": 24715 }, { "epoch": 0.82, "grad_norm": 0.7678632736206055, "learning_rate": 1.5685532306341379e-06, "loss": 2.0586, "step": 24716 }, { "epoch": 0.82, "grad_norm": 0.7533635497093201, "learning_rate": 1.5679817748006653e-06, "loss": 2.071, "step": 24717 }, { "epoch": 0.82, "grad_norm": 0.7370835542678833, "learning_rate": 1.5674104142271917e-06, "loss": 2.0918, "step": 24718 }, { "epoch": 0.82, "grad_norm": 0.7656005620956421, "learning_rate": 1.5668391489201794e-06, "loss": 2.0586, "step": 24719 }, { "epoch": 0.82, "grad_norm": 0.7605054378509521, "learning_rate": 1.5662679788860702e-06, "loss": 2.0463, "step": 24720 }, { "epoch": 0.82, "grad_norm": 0.7772300243377686, "learning_rate": 1.565696904131323e-06, "loss": 2.0422, "step": 24721 }, { "epoch": 0.82, "grad_norm": 0.7316023111343384, "learning_rate": 1.5651259246623917e-06, "loss": 1.9821, "step": 24722 }, { "epoch": 0.82, "grad_norm": 0.7254810333251953, "learning_rate": 1.5645550404857223e-06, "loss": 2.0376, "step": 24723 }, { "epoch": 0.82, "grad_norm": 0.7456212043762207, "learning_rate": 1.5639842516077685e-06, "loss": 2.0267, "step": 24724 }, { "epoch": 0.82, "grad_norm": 0.7336885333061218, "learning_rate": 1.5634135580349763e-06, "loss": 2.0925, "step": 24725 }, { "epoch": 0.82, "grad_norm": 0.7532624006271362, "learning_rate": 1.5628429597737915e-06, "loss": 2.1057, "step": 24726 }, { "epoch": 0.82, "grad_norm": 0.7245036959648132, "learning_rate": 1.5622724568306624e-06, "loss": 2.0111, "step": 24727 }, { "epoch": 0.82, "grad_norm": 0.7442290782928467, "learning_rate": 1.561702049212036e-06, "loss": 2.0203, "step": 24728 }, { "epoch": 0.82, "grad_norm": 0.7539690732955933, "learning_rate": 1.561131736924355e-06, "loss": 2.0244, "step": 24729 }, { "epoch": 0.82, "grad_norm": 0.7333362102508545, "learning_rate": 1.5605615199740597e-06, "loss": 2.0148, "step": 24730 }, { "epoch": 0.82, "grad_norm": 0.7597872018814087, "learning_rate": 1.5599913983675962e-06, "loss": 2.0934, "step": 24731 }, { "epoch": 0.82, "grad_norm": 0.7553220391273499, "learning_rate": 1.5594213721114038e-06, "loss": 2.046, "step": 24732 }, { "epoch": 0.82, "grad_norm": 0.7654428482055664, "learning_rate": 1.5588514412119193e-06, "loss": 2.0349, "step": 24733 }, { "epoch": 0.82, "grad_norm": 0.7535482048988342, "learning_rate": 1.5582816056755844e-06, "loss": 2.0332, "step": 24734 }, { "epoch": 0.82, "grad_norm": 0.7631350755691528, "learning_rate": 1.5577118655088397e-06, "loss": 1.9968, "step": 24735 }, { "epoch": 0.82, "grad_norm": 0.7596483826637268, "learning_rate": 1.5571422207181153e-06, "loss": 2.1403, "step": 24736 }, { "epoch": 0.82, "grad_norm": 0.7418236136436462, "learning_rate": 1.5565726713098528e-06, "loss": 2.0334, "step": 24737 }, { "epoch": 0.82, "grad_norm": 0.7330083250999451, "learning_rate": 1.5560032172904837e-06, "loss": 2.1, "step": 24738 }, { "epoch": 0.82, "grad_norm": 0.7567529082298279, "learning_rate": 1.5554338586664398e-06, "loss": 2.0523, "step": 24739 }, { "epoch": 0.82, "grad_norm": 0.7426371574401855, "learning_rate": 1.5548645954441544e-06, "loss": 2.0813, "step": 24740 }, { "epoch": 0.82, "grad_norm": 0.783898651599884, "learning_rate": 1.5542954276300647e-06, "loss": 2.0269, "step": 24741 }, { "epoch": 0.82, "grad_norm": 0.7807183265686035, "learning_rate": 1.5537263552305914e-06, "loss": 2.0393, "step": 24742 }, { "epoch": 0.82, "grad_norm": 0.7230624556541443, "learning_rate": 1.553157378252167e-06, "loss": 2.0387, "step": 24743 }, { "epoch": 0.82, "grad_norm": 0.7492788434028625, "learning_rate": 1.5525884967012227e-06, "loss": 2.0346, "step": 24744 }, { "epoch": 0.82, "grad_norm": 0.7256644368171692, "learning_rate": 1.5520197105841805e-06, "loss": 2.031, "step": 24745 }, { "epoch": 0.82, "grad_norm": 0.7339571714401245, "learning_rate": 1.5514510199074706e-06, "loss": 2.0019, "step": 24746 }, { "epoch": 0.82, "grad_norm": 0.7513602375984192, "learning_rate": 1.5508824246775167e-06, "loss": 2.0119, "step": 24747 }, { "epoch": 0.82, "grad_norm": 0.8020707368850708, "learning_rate": 1.5503139249007381e-06, "loss": 2.0437, "step": 24748 }, { "epoch": 0.82, "grad_norm": 0.7423878312110901, "learning_rate": 1.549745520583562e-06, "loss": 2.0606, "step": 24749 }, { "epoch": 0.82, "grad_norm": 0.7596179842948914, "learning_rate": 1.54917721173241e-06, "loss": 2.0923, "step": 24750 }, { "epoch": 0.82, "grad_norm": 0.7379164695739746, "learning_rate": 1.5486089983537012e-06, "loss": 2.0101, "step": 24751 }, { "epoch": 0.82, "grad_norm": 0.7871154546737671, "learning_rate": 1.5480408804538526e-06, "loss": 2.0415, "step": 24752 }, { "epoch": 0.82, "grad_norm": 0.7450940012931824, "learning_rate": 1.5474728580392884e-06, "loss": 2.0465, "step": 24753 }, { "epoch": 0.82, "grad_norm": 0.7255831360816956, "learning_rate": 1.5469049311164208e-06, "loss": 2.0271, "step": 24754 }, { "epoch": 0.82, "grad_norm": 0.7373485565185547, "learning_rate": 1.546337099691665e-06, "loss": 2.0237, "step": 24755 }, { "epoch": 0.82, "grad_norm": 0.75111323595047, "learning_rate": 1.5457693637714389e-06, "loss": 2.076, "step": 24756 }, { "epoch": 0.82, "grad_norm": 0.7407294511795044, "learning_rate": 1.5452017233621575e-06, "loss": 2.0809, "step": 24757 }, { "epoch": 0.82, "grad_norm": 0.7730043530464172, "learning_rate": 1.54463417847023e-06, "loss": 2.0748, "step": 24758 }, { "epoch": 0.82, "grad_norm": 0.7276864051818848, "learning_rate": 1.5440667291020728e-06, "loss": 2.0172, "step": 24759 }, { "epoch": 0.82, "grad_norm": 0.7292165160179138, "learning_rate": 1.5434993752640948e-06, "loss": 2.0197, "step": 24760 }, { "epoch": 0.82, "grad_norm": 0.7439363598823547, "learning_rate": 1.542932116962701e-06, "loss": 2.0465, "step": 24761 }, { "epoch": 0.82, "grad_norm": 0.7563647627830505, "learning_rate": 1.5423649542043052e-06, "loss": 2.0331, "step": 24762 }, { "epoch": 0.82, "grad_norm": 0.7628288865089417, "learning_rate": 1.5417978869953166e-06, "loss": 2.0561, "step": 24763 }, { "epoch": 0.82, "grad_norm": 0.7728883624076843, "learning_rate": 1.5412309153421346e-06, "loss": 2.0358, "step": 24764 }, { "epoch": 0.82, "grad_norm": 0.7875117063522339, "learning_rate": 1.5406640392511684e-06, "loss": 2.1614, "step": 24765 }, { "epoch": 0.82, "grad_norm": 0.7547198534011841, "learning_rate": 1.5400972587288254e-06, "loss": 1.969, "step": 24766 }, { "epoch": 0.82, "grad_norm": 0.7479784488677979, "learning_rate": 1.5395305737815025e-06, "loss": 2.1037, "step": 24767 }, { "epoch": 0.82, "grad_norm": 0.7540486454963684, "learning_rate": 1.5389639844156069e-06, "loss": 2.084, "step": 24768 }, { "epoch": 0.82, "grad_norm": 0.7532678246498108, "learning_rate": 1.5383974906375377e-06, "loss": 2.0476, "step": 24769 }, { "epoch": 0.82, "grad_norm": 0.7456028461456299, "learning_rate": 1.537831092453692e-06, "loss": 2.0746, "step": 24770 }, { "epoch": 0.82, "grad_norm": 0.7707963585853577, "learning_rate": 1.5372647898704718e-06, "loss": 2.0424, "step": 24771 }, { "epoch": 0.82, "grad_norm": 0.7400874495506287, "learning_rate": 1.536698582894277e-06, "loss": 2.0405, "step": 24772 }, { "epoch": 0.82, "grad_norm": 0.7456061244010925, "learning_rate": 1.5361324715315006e-06, "loss": 2.0877, "step": 24773 }, { "epoch": 0.82, "grad_norm": 0.7650687098503113, "learning_rate": 1.5355664557885385e-06, "loss": 2.0744, "step": 24774 }, { "epoch": 0.82, "grad_norm": 0.7734844088554382, "learning_rate": 1.5350005356717868e-06, "loss": 2.0255, "step": 24775 }, { "epoch": 0.82, "grad_norm": 0.7319495677947998, "learning_rate": 1.5344347111876367e-06, "loss": 2.0651, "step": 24776 }, { "epoch": 0.82, "grad_norm": 0.7321783304214478, "learning_rate": 1.5338689823424836e-06, "loss": 2.0174, "step": 24777 }, { "epoch": 0.82, "grad_norm": 0.7307380437850952, "learning_rate": 1.533303349142715e-06, "loss": 2.0201, "step": 24778 }, { "epoch": 0.82, "grad_norm": 0.760513186454773, "learning_rate": 1.5327378115947255e-06, "loss": 2.0534, "step": 24779 }, { "epoch": 0.82, "grad_norm": 0.7649939656257629, "learning_rate": 1.5321723697048995e-06, "loss": 1.9236, "step": 24780 }, { "epoch": 0.82, "grad_norm": 0.751089870929718, "learning_rate": 1.53160702347963e-06, "loss": 2.1068, "step": 24781 }, { "epoch": 0.82, "grad_norm": 0.7439225912094116, "learning_rate": 1.5310417729253013e-06, "loss": 2.0866, "step": 24782 }, { "epoch": 0.82, "grad_norm": 0.7315003275871277, "learning_rate": 1.5304766180482966e-06, "loss": 2.0164, "step": 24783 }, { "epoch": 0.82, "grad_norm": 0.7567814588546753, "learning_rate": 1.529911558855004e-06, "loss": 1.9752, "step": 24784 }, { "epoch": 0.82, "grad_norm": 0.7418934106826782, "learning_rate": 1.5293465953518105e-06, "loss": 1.9813, "step": 24785 }, { "epoch": 0.82, "grad_norm": 0.7305530309677124, "learning_rate": 1.528781727545091e-06, "loss": 1.9998, "step": 24786 }, { "epoch": 0.82, "grad_norm": 0.759048342704773, "learning_rate": 1.5282169554412307e-06, "loss": 2.1039, "step": 24787 }, { "epoch": 0.82, "grad_norm": 0.7438389658927917, "learning_rate": 1.527652279046613e-06, "loss": 2.0109, "step": 24788 }, { "epoch": 0.82, "grad_norm": 0.7528771758079529, "learning_rate": 1.5270876983676108e-06, "loss": 2.1201, "step": 24789 }, { "epoch": 0.82, "grad_norm": 0.7434794902801514, "learning_rate": 1.52652321341061e-06, "loss": 2.052, "step": 24790 }, { "epoch": 0.82, "grad_norm": 0.7505081295967102, "learning_rate": 1.5259588241819833e-06, "loss": 1.9835, "step": 24791 }, { "epoch": 0.82, "grad_norm": 0.7506682276725769, "learning_rate": 1.5253945306881057e-06, "loss": 2.0943, "step": 24792 }, { "epoch": 0.82, "grad_norm": 0.7411180734634399, "learning_rate": 1.5248303329353543e-06, "loss": 2.0692, "step": 24793 }, { "epoch": 0.82, "grad_norm": 0.7329487800598145, "learning_rate": 1.524266230930105e-06, "loss": 2.0651, "step": 24794 }, { "epoch": 0.82, "grad_norm": 0.7391970753669739, "learning_rate": 1.523702224678728e-06, "loss": 1.9865, "step": 24795 }, { "epoch": 0.82, "grad_norm": 0.7162978649139404, "learning_rate": 1.5231383141875934e-06, "loss": 2.058, "step": 24796 }, { "epoch": 0.82, "grad_norm": 0.7709743976593018, "learning_rate": 1.5225744994630742e-06, "loss": 2.0065, "step": 24797 }, { "epoch": 0.83, "grad_norm": 0.7408787608146667, "learning_rate": 1.5220107805115424e-06, "loss": 1.9629, "step": 24798 }, { "epoch": 0.83, "grad_norm": 0.7529571652412415, "learning_rate": 1.5214471573393653e-06, "loss": 2.0759, "step": 24799 }, { "epoch": 0.83, "grad_norm": 0.7181904315948486, "learning_rate": 1.520883629952905e-06, "loss": 2.0087, "step": 24800 }, { "epoch": 0.83, "grad_norm": 0.7279215455055237, "learning_rate": 1.5203201983585358e-06, "loss": 2.0452, "step": 24801 }, { "epoch": 0.83, "grad_norm": 0.7429754734039307, "learning_rate": 1.519756862562617e-06, "loss": 2.0596, "step": 24802 }, { "epoch": 0.83, "grad_norm": 0.7268272638320923, "learning_rate": 1.5191936225715176e-06, "loss": 1.9947, "step": 24803 }, { "epoch": 0.83, "grad_norm": 0.7643913626670837, "learning_rate": 1.5186304783915983e-06, "loss": 2.0526, "step": 24804 }, { "epoch": 0.83, "grad_norm": 0.7359586358070374, "learning_rate": 1.5180674300292185e-06, "loss": 2.0297, "step": 24805 }, { "epoch": 0.83, "grad_norm": 0.7537996172904968, "learning_rate": 1.5175044774907433e-06, "loss": 2.0977, "step": 24806 }, { "epoch": 0.83, "grad_norm": 0.7732250690460205, "learning_rate": 1.5169416207825327e-06, "loss": 2.0812, "step": 24807 }, { "epoch": 0.83, "grad_norm": 0.7159727215766907, "learning_rate": 1.5163788599109442e-06, "loss": 2.0327, "step": 24808 }, { "epoch": 0.83, "grad_norm": 0.7774404883384705, "learning_rate": 1.5158161948823325e-06, "loss": 2.0758, "step": 24809 }, { "epoch": 0.83, "grad_norm": 0.7422087788581848, "learning_rate": 1.5152536257030604e-06, "loss": 1.9764, "step": 24810 }, { "epoch": 0.83, "grad_norm": 0.7168338894844055, "learning_rate": 1.514691152379477e-06, "loss": 2.0369, "step": 24811 }, { "epoch": 0.83, "grad_norm": 0.7576701641082764, "learning_rate": 1.5141287749179434e-06, "loss": 2.0207, "step": 24812 }, { "epoch": 0.83, "grad_norm": 0.7526422142982483, "learning_rate": 1.5135664933248074e-06, "loss": 2.086, "step": 24813 }, { "epoch": 0.83, "grad_norm": 0.7316453456878662, "learning_rate": 1.513004307606425e-06, "loss": 2.032, "step": 24814 }, { "epoch": 0.83, "grad_norm": 0.7603732347488403, "learning_rate": 1.5124422177691445e-06, "loss": 2.0756, "step": 24815 }, { "epoch": 0.83, "grad_norm": 0.7363866567611694, "learning_rate": 1.5118802238193197e-06, "loss": 2.0393, "step": 24816 }, { "epoch": 0.83, "grad_norm": 0.7656158804893494, "learning_rate": 1.511318325763298e-06, "loss": 1.975, "step": 24817 }, { "epoch": 0.83, "grad_norm": 0.7169625759124756, "learning_rate": 1.510756523607424e-06, "loss": 2.0373, "step": 24818 }, { "epoch": 0.83, "grad_norm": 0.743604302406311, "learning_rate": 1.5101948173580483e-06, "loss": 2.0897, "step": 24819 }, { "epoch": 0.83, "grad_norm": 0.7340902090072632, "learning_rate": 1.509633207021518e-06, "loss": 2.0081, "step": 24820 }, { "epoch": 0.83, "grad_norm": 0.7773296236991882, "learning_rate": 1.509071692604176e-06, "loss": 2.1129, "step": 24821 }, { "epoch": 0.83, "grad_norm": 0.7375178337097168, "learning_rate": 1.5085102741123626e-06, "loss": 2.0029, "step": 24822 }, { "epoch": 0.83, "grad_norm": 0.7441858053207397, "learning_rate": 1.507948951552427e-06, "loss": 2.0036, "step": 24823 }, { "epoch": 0.83, "grad_norm": 0.7257186770439148, "learning_rate": 1.5073877249307045e-06, "loss": 2.0964, "step": 24824 }, { "epoch": 0.83, "grad_norm": 0.7642390727996826, "learning_rate": 1.506826594253541e-06, "loss": 2.0762, "step": 24825 }, { "epoch": 0.83, "grad_norm": 0.748098611831665, "learning_rate": 1.5062655595272735e-06, "loss": 2.126, "step": 24826 }, { "epoch": 0.83, "grad_norm": 0.7200208902359009, "learning_rate": 1.505704620758237e-06, "loss": 1.9989, "step": 24827 }, { "epoch": 0.83, "grad_norm": 0.7468613982200623, "learning_rate": 1.5051437779527722e-06, "loss": 2.007, "step": 24828 }, { "epoch": 0.83, "grad_norm": 0.7690402865409851, "learning_rate": 1.504583031117216e-06, "loss": 2.0562, "step": 24829 }, { "epoch": 0.83, "grad_norm": 0.7629485130310059, "learning_rate": 1.5040223802579025e-06, "loss": 1.962, "step": 24830 }, { "epoch": 0.83, "grad_norm": 0.7495771050453186, "learning_rate": 1.5034618253811616e-06, "loss": 2.0722, "step": 24831 }, { "epoch": 0.83, "grad_norm": 0.7416913509368896, "learning_rate": 1.5029013664933335e-06, "loss": 2.0417, "step": 24832 }, { "epoch": 0.83, "grad_norm": 0.7573280930519104, "learning_rate": 1.5023410036007424e-06, "loss": 2.0891, "step": 24833 }, { "epoch": 0.83, "grad_norm": 0.7307788133621216, "learning_rate": 1.5017807367097248e-06, "loss": 1.9758, "step": 24834 }, { "epoch": 0.83, "grad_norm": 0.7629086971282959, "learning_rate": 1.5012205658266066e-06, "loss": 1.9765, "step": 24835 }, { "epoch": 0.83, "grad_norm": 0.7630726099014282, "learning_rate": 1.5006604909577193e-06, "loss": 2.1183, "step": 24836 }, { "epoch": 0.83, "grad_norm": 0.7442490458488464, "learning_rate": 1.500100512109387e-06, "loss": 2.0619, "step": 24837 }, { "epoch": 0.83, "grad_norm": 0.7420192956924438, "learning_rate": 1.4995406292879388e-06, "loss": 1.9632, "step": 24838 }, { "epoch": 0.83, "grad_norm": 0.7361243963241577, "learning_rate": 1.4989808424996998e-06, "loss": 2.0383, "step": 24839 }, { "epoch": 0.83, "grad_norm": 0.7191910147666931, "learning_rate": 1.4984211517509905e-06, "loss": 2.0243, "step": 24840 }, { "epoch": 0.83, "grad_norm": 0.7341801524162292, "learning_rate": 1.497861557048137e-06, "loss": 2.0634, "step": 24841 }, { "epoch": 0.83, "grad_norm": 0.732101321220398, "learning_rate": 1.497302058397463e-06, "loss": 2.0156, "step": 24842 }, { "epoch": 0.83, "grad_norm": 0.7340344786643982, "learning_rate": 1.4967426558052878e-06, "loss": 2.0131, "step": 24843 }, { "epoch": 0.83, "grad_norm": 0.7823872566223145, "learning_rate": 1.4961833492779276e-06, "loss": 2.0495, "step": 24844 }, { "epoch": 0.83, "grad_norm": 0.7334010004997253, "learning_rate": 1.4956241388217063e-06, "loss": 1.9814, "step": 24845 }, { "epoch": 0.83, "grad_norm": 0.7264289259910583, "learning_rate": 1.4950650244429377e-06, "loss": 2.0014, "step": 24846 }, { "epoch": 0.83, "grad_norm": 0.7532520294189453, "learning_rate": 1.4945060061479422e-06, "loss": 2.0829, "step": 24847 }, { "epoch": 0.83, "grad_norm": 0.8003633618354797, "learning_rate": 1.4939470839430338e-06, "loss": 2.0344, "step": 24848 }, { "epoch": 0.83, "grad_norm": 0.7563724517822266, "learning_rate": 1.4933882578345227e-06, "loss": 2.0137, "step": 24849 }, { "epoch": 0.83, "grad_norm": 0.7585241198539734, "learning_rate": 1.4928295278287264e-06, "loss": 2.0346, "step": 24850 }, { "epoch": 0.83, "grad_norm": 0.7512881755828857, "learning_rate": 1.4922708939319587e-06, "loss": 2.0594, "step": 24851 }, { "epoch": 0.83, "grad_norm": 0.7724229097366333, "learning_rate": 1.4917123561505275e-06, "loss": 2.1233, "step": 24852 }, { "epoch": 0.83, "grad_norm": 0.7541494965553284, "learning_rate": 1.4911539144907429e-06, "loss": 2.015, "step": 24853 }, { "epoch": 0.83, "grad_norm": 0.7142285704612732, "learning_rate": 1.4905955689589158e-06, "loss": 1.9862, "step": 24854 }, { "epoch": 0.83, "grad_norm": 0.756924569606781, "learning_rate": 1.4900373195613515e-06, "loss": 2.0334, "step": 24855 }, { "epoch": 0.83, "grad_norm": 0.7574272155761719, "learning_rate": 1.4894791663043596e-06, "loss": 2.0707, "step": 24856 }, { "epoch": 0.83, "grad_norm": 0.7171395421028137, "learning_rate": 1.4889211091942436e-06, "loss": 1.9911, "step": 24857 }, { "epoch": 0.83, "grad_norm": 0.7445770502090454, "learning_rate": 1.4883631482373096e-06, "loss": 1.9962, "step": 24858 }, { "epoch": 0.83, "grad_norm": 0.7773033976554871, "learning_rate": 1.4878052834398593e-06, "loss": 2.0264, "step": 24859 }, { "epoch": 0.83, "grad_norm": 0.7424682378768921, "learning_rate": 1.4872475148081977e-06, "loss": 1.9937, "step": 24860 }, { "epoch": 0.83, "grad_norm": 0.7593292593955994, "learning_rate": 1.4866898423486253e-06, "loss": 2.0969, "step": 24861 }, { "epoch": 0.83, "grad_norm": 0.7616005539894104, "learning_rate": 1.4861322660674393e-06, "loss": 1.9281, "step": 24862 }, { "epoch": 0.83, "grad_norm": 0.7516399621963501, "learning_rate": 1.4855747859709413e-06, "loss": 2.0553, "step": 24863 }, { "epoch": 0.83, "grad_norm": 0.735551118850708, "learning_rate": 1.4850174020654318e-06, "loss": 2.0519, "step": 24864 }, { "epoch": 0.83, "grad_norm": 0.7431236505508423, "learning_rate": 1.4844601143572057e-06, "loss": 2.0001, "step": 24865 }, { "epoch": 0.83, "grad_norm": 0.7233752608299255, "learning_rate": 1.483902922852556e-06, "loss": 1.9788, "step": 24866 }, { "epoch": 0.83, "grad_norm": 0.7340803742408752, "learning_rate": 1.4833458275577828e-06, "loss": 1.9875, "step": 24867 }, { "epoch": 0.83, "grad_norm": 0.7438344359397888, "learning_rate": 1.4827888284791747e-06, "loss": 2.0066, "step": 24868 }, { "epoch": 0.83, "grad_norm": 0.7409992814064026, "learning_rate": 1.4822319256230267e-06, "loss": 2.0211, "step": 24869 }, { "epoch": 0.83, "grad_norm": 0.7075291275978088, "learning_rate": 1.4816751189956346e-06, "loss": 2.0319, "step": 24870 }, { "epoch": 0.83, "grad_norm": 0.7767680883407593, "learning_rate": 1.4811184086032814e-06, "loss": 2.0157, "step": 24871 }, { "epoch": 0.83, "grad_norm": 0.7490136623382568, "learning_rate": 1.4805617944522588e-06, "loss": 2.0005, "step": 24872 }, { "epoch": 0.83, "grad_norm": 0.7313303351402283, "learning_rate": 1.4800052765488592e-06, "loss": 2.0331, "step": 24873 }, { "epoch": 0.83, "grad_norm": 0.7321137189865112, "learning_rate": 1.4794488548993668e-06, "loss": 2.0209, "step": 24874 }, { "epoch": 0.83, "grad_norm": 0.7748061418533325, "learning_rate": 1.4788925295100642e-06, "loss": 2.0565, "step": 24875 }, { "epoch": 0.83, "grad_norm": 0.7291797399520874, "learning_rate": 1.478336300387243e-06, "loss": 2.0961, "step": 24876 }, { "epoch": 0.83, "grad_norm": 0.7370447516441345, "learning_rate": 1.4777801675371828e-06, "loss": 2.1036, "step": 24877 }, { "epoch": 0.83, "grad_norm": 0.7600078582763672, "learning_rate": 1.4772241309661684e-06, "loss": 1.9991, "step": 24878 }, { "epoch": 0.83, "grad_norm": 0.7252472043037415, "learning_rate": 1.4766681906804792e-06, "loss": 2.0259, "step": 24879 }, { "epoch": 0.83, "grad_norm": 0.7419667840003967, "learning_rate": 1.4761123466864002e-06, "loss": 2.0226, "step": 24880 }, { "epoch": 0.83, "grad_norm": 0.7757155895233154, "learning_rate": 1.4755565989902065e-06, "loss": 2.1047, "step": 24881 }, { "epoch": 0.83, "grad_norm": 0.7458028793334961, "learning_rate": 1.4750009475981774e-06, "loss": 2.0538, "step": 24882 }, { "epoch": 0.83, "grad_norm": 0.7708866000175476, "learning_rate": 1.4744453925165969e-06, "loss": 1.9739, "step": 24883 }, { "epoch": 0.83, "grad_norm": 0.7627978920936584, "learning_rate": 1.473889933751731e-06, "loss": 2.0803, "step": 24884 }, { "epoch": 0.83, "grad_norm": 0.7578418850898743, "learning_rate": 1.4733345713098602e-06, "loss": 2.0904, "step": 24885 }, { "epoch": 0.83, "grad_norm": 0.7354166507720947, "learning_rate": 1.4727793051972605e-06, "loss": 2.0154, "step": 24886 }, { "epoch": 0.83, "grad_norm": 0.7664469480514526, "learning_rate": 1.4722241354202027e-06, "loss": 2.0588, "step": 24887 }, { "epoch": 0.83, "grad_norm": 0.7416092157363892, "learning_rate": 1.471669061984956e-06, "loss": 2.0182, "step": 24888 }, { "epoch": 0.83, "grad_norm": 0.7276415228843689, "learning_rate": 1.4711140848977967e-06, "loss": 2.0257, "step": 24889 }, { "epoch": 0.83, "grad_norm": 0.7509539127349854, "learning_rate": 1.4705592041649908e-06, "loss": 2.0165, "step": 24890 }, { "epoch": 0.83, "grad_norm": 0.7533541321754456, "learning_rate": 1.4700044197928065e-06, "loss": 2.0176, "step": 24891 }, { "epoch": 0.83, "grad_norm": 0.7541894912719727, "learning_rate": 1.4694497317875189e-06, "loss": 2.1044, "step": 24892 }, { "epoch": 0.83, "grad_norm": 0.7428129315376282, "learning_rate": 1.4688951401553841e-06, "loss": 2.1016, "step": 24893 }, { "epoch": 0.83, "grad_norm": 0.7337577939033508, "learning_rate": 1.4683406449026727e-06, "loss": 2.0864, "step": 24894 }, { "epoch": 0.83, "grad_norm": 0.732695996761322, "learning_rate": 1.4677862460356506e-06, "loss": 2.0473, "step": 24895 }, { "epoch": 0.83, "grad_norm": 0.7389583587646484, "learning_rate": 1.4672319435605787e-06, "loss": 2.0654, "step": 24896 }, { "epoch": 0.83, "grad_norm": 0.7543737292289734, "learning_rate": 1.4666777374837171e-06, "loss": 2.0691, "step": 24897 }, { "epoch": 0.83, "grad_norm": 0.738282322883606, "learning_rate": 1.46612362781133e-06, "loss": 1.9842, "step": 24898 }, { "epoch": 0.83, "grad_norm": 0.7562292814254761, "learning_rate": 1.465569614549679e-06, "loss": 2.0029, "step": 24899 }, { "epoch": 0.83, "grad_norm": 0.7625837922096252, "learning_rate": 1.465015697705019e-06, "loss": 2.0722, "step": 24900 }, { "epoch": 0.83, "grad_norm": 0.7556712627410889, "learning_rate": 1.4644618772836116e-06, "loss": 2.0218, "step": 24901 }, { "epoch": 0.83, "grad_norm": 0.7502215504646301, "learning_rate": 1.463908153291711e-06, "loss": 2.0919, "step": 24902 }, { "epoch": 0.83, "grad_norm": 0.7650898694992065, "learning_rate": 1.4633545257355718e-06, "loss": 2.019, "step": 24903 }, { "epoch": 0.83, "grad_norm": 0.75739985704422, "learning_rate": 1.4628009946214505e-06, "loss": 2.08, "step": 24904 }, { "epoch": 0.83, "grad_norm": 0.7352570295333862, "learning_rate": 1.4622475599556041e-06, "loss": 2.0552, "step": 24905 }, { "epoch": 0.83, "grad_norm": 0.7424171566963196, "learning_rate": 1.4616942217442764e-06, "loss": 2.0248, "step": 24906 }, { "epoch": 0.83, "grad_norm": 0.8046366572380066, "learning_rate": 1.4611409799937248e-06, "loss": 2.0401, "step": 24907 }, { "epoch": 0.83, "grad_norm": 0.754127025604248, "learning_rate": 1.4605878347101988e-06, "loss": 2.0694, "step": 24908 }, { "epoch": 0.83, "grad_norm": 0.7638735771179199, "learning_rate": 1.4600347858999476e-06, "loss": 2.0488, "step": 24909 }, { "epoch": 0.83, "grad_norm": 0.7526469230651855, "learning_rate": 1.4594818335692163e-06, "loss": 2.0556, "step": 24910 }, { "epoch": 0.83, "grad_norm": 0.7513940930366516, "learning_rate": 1.4589289777242565e-06, "loss": 2.0731, "step": 24911 }, { "epoch": 0.83, "grad_norm": 0.7152296900749207, "learning_rate": 1.458376218371309e-06, "loss": 2.0295, "step": 24912 }, { "epoch": 0.83, "grad_norm": 0.7388644218444824, "learning_rate": 1.457823555516621e-06, "loss": 2.1876, "step": 24913 }, { "epoch": 0.83, "grad_norm": 0.7313733696937561, "learning_rate": 1.4572709891664383e-06, "loss": 2.0041, "step": 24914 }, { "epoch": 0.83, "grad_norm": 0.7303443551063538, "learning_rate": 1.4567185193270016e-06, "loss": 2.0401, "step": 24915 }, { "epoch": 0.83, "grad_norm": 0.754470944404602, "learning_rate": 1.4561661460045506e-06, "loss": 1.9954, "step": 24916 }, { "epoch": 0.83, "grad_norm": 0.7471160888671875, "learning_rate": 1.455613869205329e-06, "loss": 1.9838, "step": 24917 }, { "epoch": 0.83, "grad_norm": 0.7645959258079529, "learning_rate": 1.455061688935574e-06, "loss": 1.9776, "step": 24918 }, { "epoch": 0.83, "grad_norm": 0.7683794498443604, "learning_rate": 1.454509605201523e-06, "loss": 2.0778, "step": 24919 }, { "epoch": 0.83, "grad_norm": 0.7820815443992615, "learning_rate": 1.4539576180094139e-06, "loss": 2.0469, "step": 24920 }, { "epoch": 0.83, "grad_norm": 0.72707200050354, "learning_rate": 1.4534057273654844e-06, "loss": 2.0536, "step": 24921 }, { "epoch": 0.83, "grad_norm": 0.7351446151733398, "learning_rate": 1.4528539332759673e-06, "loss": 2.037, "step": 24922 }, { "epoch": 0.83, "grad_norm": 0.7233196496963501, "learning_rate": 1.4523022357470996e-06, "loss": 2.0081, "step": 24923 }, { "epoch": 0.83, "grad_norm": 0.7394496202468872, "learning_rate": 1.4517506347851107e-06, "loss": 2.0643, "step": 24924 }, { "epoch": 0.83, "grad_norm": 0.7514690160751343, "learning_rate": 1.4511991303962314e-06, "loss": 2.1315, "step": 24925 }, { "epoch": 0.83, "grad_norm": 0.7188594341278076, "learning_rate": 1.4506477225866944e-06, "loss": 2.0507, "step": 24926 }, { "epoch": 0.83, "grad_norm": 0.7346506118774414, "learning_rate": 1.4500964113627337e-06, "loss": 2.0562, "step": 24927 }, { "epoch": 0.83, "grad_norm": 0.744236409664154, "learning_rate": 1.4495451967305686e-06, "loss": 2.0267, "step": 24928 }, { "epoch": 0.83, "grad_norm": 0.7599499821662903, "learning_rate": 1.4489940786964306e-06, "loss": 1.9929, "step": 24929 }, { "epoch": 0.83, "grad_norm": 0.7448164820671082, "learning_rate": 1.4484430572665486e-06, "loss": 2.072, "step": 24930 }, { "epoch": 0.83, "grad_norm": 0.7348343133926392, "learning_rate": 1.447892132447145e-06, "loss": 2.0068, "step": 24931 }, { "epoch": 0.83, "grad_norm": 0.7368728518486023, "learning_rate": 1.4473413042444416e-06, "loss": 2.0235, "step": 24932 }, { "epoch": 0.83, "grad_norm": 0.7424443364143372, "learning_rate": 1.446790572664667e-06, "loss": 2.0791, "step": 24933 }, { "epoch": 0.83, "grad_norm": 0.7498221397399902, "learning_rate": 1.4462399377140369e-06, "loss": 2.0938, "step": 24934 }, { "epoch": 0.83, "grad_norm": 0.7571089863777161, "learning_rate": 1.4456893993987752e-06, "loss": 2.0636, "step": 24935 }, { "epoch": 0.83, "grad_norm": 0.7512771487236023, "learning_rate": 1.4451389577251029e-06, "loss": 2.0469, "step": 24936 }, { "epoch": 0.83, "grad_norm": 0.7596918940544128, "learning_rate": 1.444588612699238e-06, "loss": 2.0534, "step": 24937 }, { "epoch": 0.83, "grad_norm": 0.7547183036804199, "learning_rate": 1.4440383643273936e-06, "loss": 2.0612, "step": 24938 }, { "epoch": 0.83, "grad_norm": 0.8248137831687927, "learning_rate": 1.4434882126157924e-06, "loss": 2.0551, "step": 24939 }, { "epoch": 0.83, "grad_norm": 0.7174193263053894, "learning_rate": 1.442938157570647e-06, "loss": 1.9518, "step": 24940 }, { "epoch": 0.83, "grad_norm": 0.766645073890686, "learning_rate": 1.442388199198169e-06, "loss": 2.0177, "step": 24941 }, { "epoch": 0.83, "grad_norm": 0.7730458974838257, "learning_rate": 1.441838337504573e-06, "loss": 2.059, "step": 24942 }, { "epoch": 0.83, "grad_norm": 0.7574564218521118, "learning_rate": 1.4412885724960758e-06, "loss": 2.0624, "step": 24943 }, { "epoch": 0.83, "grad_norm": 0.7196966409683228, "learning_rate": 1.440738904178881e-06, "loss": 2.0211, "step": 24944 }, { "epoch": 0.83, "grad_norm": 0.746587336063385, "learning_rate": 1.4401893325592042e-06, "loss": 2.0056, "step": 24945 }, { "epoch": 0.83, "grad_norm": 0.7573887705802917, "learning_rate": 1.4396398576432525e-06, "loss": 2.0744, "step": 24946 }, { "epoch": 0.83, "grad_norm": 0.753665566444397, "learning_rate": 1.4390904794372295e-06, "loss": 2.0426, "step": 24947 }, { "epoch": 0.83, "grad_norm": 0.7425118088722229, "learning_rate": 1.438541197947345e-06, "loss": 2.0638, "step": 24948 }, { "epoch": 0.83, "grad_norm": 0.7178659439086914, "learning_rate": 1.4379920131798098e-06, "loss": 2.0334, "step": 24949 }, { "epoch": 0.83, "grad_norm": 0.7429017424583435, "learning_rate": 1.4374429251408183e-06, "loss": 2.117, "step": 24950 }, { "epoch": 0.83, "grad_norm": 0.7457799911499023, "learning_rate": 1.4368939338365783e-06, "loss": 2.0503, "step": 24951 }, { "epoch": 0.83, "grad_norm": 0.7323011159896851, "learning_rate": 1.4363450392732947e-06, "loss": 2.0531, "step": 24952 }, { "epoch": 0.83, "grad_norm": 0.746109664440155, "learning_rate": 1.4357962414571635e-06, "loss": 1.9997, "step": 24953 }, { "epoch": 0.83, "grad_norm": 0.7290377020835876, "learning_rate": 1.4352475403943899e-06, "loss": 1.9965, "step": 24954 }, { "epoch": 0.83, "grad_norm": 0.7234928607940674, "learning_rate": 1.4346989360911701e-06, "loss": 2.0306, "step": 24955 }, { "epoch": 0.83, "grad_norm": 0.742479145526886, "learning_rate": 1.4341504285537e-06, "loss": 2.0342, "step": 24956 }, { "epoch": 0.83, "grad_norm": 0.7563445568084717, "learning_rate": 1.433602017788177e-06, "loss": 2.0051, "step": 24957 }, { "epoch": 0.83, "grad_norm": 0.7501334547996521, "learning_rate": 1.4330537038008019e-06, "loss": 2.0572, "step": 24958 }, { "epoch": 0.83, "grad_norm": 0.7636805772781372, "learning_rate": 1.432505486597764e-06, "loss": 2.0643, "step": 24959 }, { "epoch": 0.83, "grad_norm": 0.795948326587677, "learning_rate": 1.431957366185256e-06, "loss": 2.0661, "step": 24960 }, { "epoch": 0.83, "grad_norm": 0.7754184007644653, "learning_rate": 1.4314093425694753e-06, "loss": 1.9942, "step": 24961 }, { "epoch": 0.83, "grad_norm": 0.7141072154045105, "learning_rate": 1.4308614157566103e-06, "loss": 2.0012, "step": 24962 }, { "epoch": 0.83, "grad_norm": 0.7436487674713135, "learning_rate": 1.4303135857528473e-06, "loss": 2.0306, "step": 24963 }, { "epoch": 0.83, "grad_norm": 0.7525427937507629, "learning_rate": 1.4297658525643798e-06, "loss": 2.0413, "step": 24964 }, { "epoch": 0.83, "grad_norm": 0.7549144625663757, "learning_rate": 1.4292182161973977e-06, "loss": 2.0298, "step": 24965 }, { "epoch": 0.83, "grad_norm": 0.7496790885925293, "learning_rate": 1.4286706766580827e-06, "loss": 2.0711, "step": 24966 }, { "epoch": 0.83, "grad_norm": 0.7328413128852844, "learning_rate": 1.4281232339526262e-06, "loss": 2.0377, "step": 24967 }, { "epoch": 0.83, "grad_norm": 0.7330312132835388, "learning_rate": 1.427575888087208e-06, "loss": 2.0458, "step": 24968 }, { "epoch": 0.83, "grad_norm": 0.7308694124221802, "learning_rate": 1.4270286390680132e-06, "loss": 1.9903, "step": 24969 }, { "epoch": 0.83, "grad_norm": 0.7718901634216309, "learning_rate": 1.4264814869012234e-06, "loss": 2.0611, "step": 24970 }, { "epoch": 0.83, "grad_norm": 0.7286138534545898, "learning_rate": 1.4259344315930256e-06, "loss": 2.0341, "step": 24971 }, { "epoch": 0.83, "grad_norm": 0.7309878468513489, "learning_rate": 1.425387473149592e-06, "loss": 2.0449, "step": 24972 }, { "epoch": 0.83, "grad_norm": 0.7500165104866028, "learning_rate": 1.424840611577105e-06, "loss": 2.0348, "step": 24973 }, { "epoch": 0.83, "grad_norm": 0.7308431267738342, "learning_rate": 1.4242938468817448e-06, "loss": 2.0223, "step": 24974 }, { "epoch": 0.83, "grad_norm": 0.7213445901870728, "learning_rate": 1.4237471790696856e-06, "loss": 2.1098, "step": 24975 }, { "epoch": 0.83, "grad_norm": 0.7381265759468079, "learning_rate": 1.4232006081471062e-06, "loss": 2.0409, "step": 24976 }, { "epoch": 0.83, "grad_norm": 0.7249129414558411, "learning_rate": 1.4226541341201804e-06, "loss": 2.0475, "step": 24977 }, { "epoch": 0.83, "grad_norm": 0.7500261664390564, "learning_rate": 1.4221077569950791e-06, "loss": 2.0179, "step": 24978 }, { "epoch": 0.83, "grad_norm": 0.741402268409729, "learning_rate": 1.4215614767779772e-06, "loss": 1.9908, "step": 24979 }, { "epoch": 0.83, "grad_norm": 0.7662835717201233, "learning_rate": 1.4210152934750475e-06, "loss": 2.058, "step": 24980 }, { "epoch": 0.83, "grad_norm": 0.7358508110046387, "learning_rate": 1.4204692070924608e-06, "loss": 2.0655, "step": 24981 }, { "epoch": 0.83, "grad_norm": 0.73835289478302, "learning_rate": 1.419923217636382e-06, "loss": 2.0055, "step": 24982 }, { "epoch": 0.83, "grad_norm": 0.8847495317459106, "learning_rate": 1.4193773251129816e-06, "loss": 2.0292, "step": 24983 }, { "epoch": 0.83, "grad_norm": 0.7387908697128296, "learning_rate": 1.4188315295284306e-06, "loss": 2.0312, "step": 24984 }, { "epoch": 0.83, "grad_norm": 0.7960773706436157, "learning_rate": 1.4182858308888913e-06, "loss": 2.0179, "step": 24985 }, { "epoch": 0.83, "grad_norm": 0.74830162525177, "learning_rate": 1.4177402292005282e-06, "loss": 2.0621, "step": 24986 }, { "epoch": 0.83, "grad_norm": 0.7734114527702332, "learning_rate": 1.4171947244695073e-06, "loss": 2.0515, "step": 24987 }, { "epoch": 0.83, "grad_norm": 0.7556977272033691, "learning_rate": 1.416649316701989e-06, "loss": 2.0092, "step": 24988 }, { "epoch": 0.83, "grad_norm": 0.7361196279525757, "learning_rate": 1.4161040059041375e-06, "loss": 2.0846, "step": 24989 }, { "epoch": 0.83, "grad_norm": 0.7426493167877197, "learning_rate": 1.4155587920821133e-06, "loss": 2.0394, "step": 24990 }, { "epoch": 0.83, "grad_norm": 0.7707929611206055, "learning_rate": 1.4150136752420718e-06, "loss": 2.0515, "step": 24991 }, { "epoch": 0.83, "grad_norm": 0.7358521819114685, "learning_rate": 1.4144686553901754e-06, "loss": 2.0682, "step": 24992 }, { "epoch": 0.83, "grad_norm": 0.754878044128418, "learning_rate": 1.413923732532585e-06, "loss": 2.0505, "step": 24993 }, { "epoch": 0.83, "grad_norm": 0.75506991147995, "learning_rate": 1.4133789066754465e-06, "loss": 2.075, "step": 24994 }, { "epoch": 0.83, "grad_norm": 0.750690758228302, "learning_rate": 1.4128341778249223e-06, "loss": 2.0804, "step": 24995 }, { "epoch": 0.83, "grad_norm": 0.742994487285614, "learning_rate": 1.4122895459871666e-06, "loss": 2.1061, "step": 24996 }, { "epoch": 0.83, "grad_norm": 0.7485726475715637, "learning_rate": 1.4117450111683284e-06, "loss": 2.0613, "step": 24997 }, { "epoch": 0.83, "grad_norm": 0.7496106028556824, "learning_rate": 1.4112005733745647e-06, "loss": 1.9947, "step": 24998 }, { "epoch": 0.83, "grad_norm": 0.7527778148651123, "learning_rate": 1.410656232612021e-06, "loss": 2.1145, "step": 24999 }, { "epoch": 0.83, "grad_norm": 0.7567235231399536, "learning_rate": 1.4101119888868508e-06, "loss": 2.0735, "step": 25000 }, { "epoch": 0.83, "grad_norm": 0.7352137565612793, "learning_rate": 1.4095678422051995e-06, "loss": 2.0364, "step": 25001 }, { "epoch": 0.83, "grad_norm": 0.7519750595092773, "learning_rate": 1.4090237925732186e-06, "loss": 2.0808, "step": 25002 }, { "epoch": 0.83, "grad_norm": 0.7570549845695496, "learning_rate": 1.4084798399970522e-06, "loss": 2.0972, "step": 25003 }, { "epoch": 0.83, "grad_norm": 0.7711437940597534, "learning_rate": 1.4079359844828433e-06, "loss": 2.0498, "step": 25004 }, { "epoch": 0.83, "grad_norm": 0.7512153387069702, "learning_rate": 1.4073922260367378e-06, "loss": 2.0929, "step": 25005 }, { "epoch": 0.83, "grad_norm": 0.7496294379234314, "learning_rate": 1.406848564664881e-06, "loss": 2.0579, "step": 25006 }, { "epoch": 0.83, "grad_norm": 0.758820116519928, "learning_rate": 1.4063050003734135e-06, "loss": 2.0524, "step": 25007 }, { "epoch": 0.83, "grad_norm": 0.7568273544311523, "learning_rate": 1.4057615331684736e-06, "loss": 2.0548, "step": 25008 }, { "epoch": 0.83, "grad_norm": 0.7226709127426147, "learning_rate": 1.4052181630562055e-06, "loss": 1.9856, "step": 25009 }, { "epoch": 0.83, "grad_norm": 0.7358258962631226, "learning_rate": 1.4046748900427432e-06, "loss": 2.0966, "step": 25010 }, { "epoch": 0.83, "grad_norm": 0.7271834015846252, "learning_rate": 1.4041317141342281e-06, "loss": 2.0687, "step": 25011 }, { "epoch": 0.83, "grad_norm": 0.747855007648468, "learning_rate": 1.4035886353367968e-06, "loss": 2.0317, "step": 25012 }, { "epoch": 0.83, "grad_norm": 0.7373946309089661, "learning_rate": 1.4030456536565796e-06, "loss": 2.0498, "step": 25013 }, { "epoch": 0.83, "grad_norm": 0.7466525435447693, "learning_rate": 1.4025027690997139e-06, "loss": 2.0656, "step": 25014 }, { "epoch": 0.83, "grad_norm": 0.7209491729736328, "learning_rate": 1.401959981672336e-06, "loss": 2.0519, "step": 25015 }, { "epoch": 0.83, "grad_norm": 0.7503870725631714, "learning_rate": 1.4014172913805768e-06, "loss": 1.9966, "step": 25016 }, { "epoch": 0.83, "grad_norm": 0.7859004735946655, "learning_rate": 1.400874698230562e-06, "loss": 2.0211, "step": 25017 }, { "epoch": 0.83, "grad_norm": 0.7427493929862976, "learning_rate": 1.400332202228427e-06, "loss": 2.0806, "step": 25018 }, { "epoch": 0.83, "grad_norm": 0.7610830664634705, "learning_rate": 1.3997898033802982e-06, "loss": 2.0425, "step": 25019 }, { "epoch": 0.83, "grad_norm": 0.7572808265686035, "learning_rate": 1.3992475016923058e-06, "loss": 2.0957, "step": 25020 }, { "epoch": 0.83, "grad_norm": 0.7164633274078369, "learning_rate": 1.3987052971705718e-06, "loss": 2.0712, "step": 25021 }, { "epoch": 0.83, "grad_norm": 0.7434188723564148, "learning_rate": 1.3981631898212266e-06, "loss": 2.1078, "step": 25022 }, { "epoch": 0.83, "grad_norm": 0.7677701711654663, "learning_rate": 1.3976211796503903e-06, "loss": 2.022, "step": 25023 }, { "epoch": 0.83, "grad_norm": 0.7595756649971008, "learning_rate": 1.3970792666641919e-06, "loss": 2.0448, "step": 25024 }, { "epoch": 0.83, "grad_norm": 0.7680371999740601, "learning_rate": 1.396537450868749e-06, "loss": 2.0045, "step": 25025 }, { "epoch": 0.83, "grad_norm": 0.7705698013305664, "learning_rate": 1.395995732270181e-06, "loss": 1.9945, "step": 25026 }, { "epoch": 0.83, "grad_norm": 0.7396299839019775, "learning_rate": 1.3954541108746123e-06, "loss": 2.0314, "step": 25027 }, { "epoch": 0.83, "grad_norm": 0.7372692227363586, "learning_rate": 1.3949125866881619e-06, "loss": 2.0022, "step": 25028 }, { "epoch": 0.83, "grad_norm": 0.7675361633300781, "learning_rate": 1.3943711597169463e-06, "loss": 2.1005, "step": 25029 }, { "epoch": 0.83, "grad_norm": 0.7285292744636536, "learning_rate": 1.3938298299670793e-06, "loss": 2.0845, "step": 25030 }, { "epoch": 0.83, "grad_norm": 0.7436361312866211, "learning_rate": 1.3932885974446808e-06, "loss": 2.0589, "step": 25031 }, { "epoch": 0.83, "grad_norm": 0.7167371511459351, "learning_rate": 1.3927474621558624e-06, "loss": 2.031, "step": 25032 }, { "epoch": 0.83, "grad_norm": 0.748346745967865, "learning_rate": 1.3922064241067412e-06, "loss": 2.0842, "step": 25033 }, { "epoch": 0.83, "grad_norm": 0.7404475808143616, "learning_rate": 1.391665483303426e-06, "loss": 2.0288, "step": 25034 }, { "epoch": 0.83, "grad_norm": 0.7668644189834595, "learning_rate": 1.3911246397520285e-06, "loss": 2.0329, "step": 25035 }, { "epoch": 0.83, "grad_norm": 0.718517541885376, "learning_rate": 1.390583893458658e-06, "loss": 2.0551, "step": 25036 }, { "epoch": 0.83, "grad_norm": 0.7408729791641235, "learning_rate": 1.3900432444294288e-06, "loss": 2.0518, "step": 25037 }, { "epoch": 0.83, "grad_norm": 0.7193726897239685, "learning_rate": 1.3895026926704435e-06, "loss": 1.977, "step": 25038 }, { "epoch": 0.83, "grad_norm": 0.7746989727020264, "learning_rate": 1.3889622381878098e-06, "loss": 2.085, "step": 25039 }, { "epoch": 0.83, "grad_norm": 0.7408944964408875, "learning_rate": 1.388421880987636e-06, "loss": 2.0045, "step": 25040 }, { "epoch": 0.83, "grad_norm": 0.7758175730705261, "learning_rate": 1.3878816210760214e-06, "loss": 2.0335, "step": 25041 }, { "epoch": 0.83, "grad_norm": 0.7779616117477417, "learning_rate": 1.3873414584590771e-06, "loss": 2.0596, "step": 25042 }, { "epoch": 0.83, "grad_norm": 0.7642082571983337, "learning_rate": 1.3868013931428981e-06, "loss": 1.9832, "step": 25043 }, { "epoch": 0.83, "grad_norm": 0.7376130223274231, "learning_rate": 1.3862614251335916e-06, "loss": 2.0607, "step": 25044 }, { "epoch": 0.83, "grad_norm": 0.7548394799232483, "learning_rate": 1.3857215544372538e-06, "loss": 2.0191, "step": 25045 }, { "epoch": 0.83, "grad_norm": 0.7503734230995178, "learning_rate": 1.3851817810599866e-06, "loss": 2.0048, "step": 25046 }, { "epoch": 0.83, "grad_norm": 0.7692141532897949, "learning_rate": 1.3846421050078874e-06, "loss": 2.0006, "step": 25047 }, { "epoch": 0.83, "grad_norm": 0.7268210053443909, "learning_rate": 1.38410252628705e-06, "loss": 2.0142, "step": 25048 }, { "epoch": 0.83, "grad_norm": 0.7160589694976807, "learning_rate": 1.383563044903573e-06, "loss": 1.9728, "step": 25049 }, { "epoch": 0.83, "grad_norm": 0.746710479259491, "learning_rate": 1.383023660863554e-06, "loss": 2.0054, "step": 25050 }, { "epoch": 0.83, "grad_norm": 0.7268765568733215, "learning_rate": 1.382484374173083e-06, "loss": 2.0274, "step": 25051 }, { "epoch": 0.83, "grad_norm": 0.786273717880249, "learning_rate": 1.3819451848382514e-06, "loss": 2.018, "step": 25052 }, { "epoch": 0.83, "grad_norm": 0.741789698600769, "learning_rate": 1.381406092865154e-06, "loss": 2.1063, "step": 25053 }, { "epoch": 0.83, "grad_norm": 0.7721714973449707, "learning_rate": 1.3808670982598772e-06, "loss": 2.1069, "step": 25054 }, { "epoch": 0.83, "grad_norm": 0.7567890882492065, "learning_rate": 1.3803282010285156e-06, "loss": 2.0109, "step": 25055 }, { "epoch": 0.83, "grad_norm": 0.7555682063102722, "learning_rate": 1.379789401177154e-06, "loss": 2.0941, "step": 25056 }, { "epoch": 0.83, "grad_norm": 0.7459248900413513, "learning_rate": 1.379250698711877e-06, "loss": 2.0621, "step": 25057 }, { "epoch": 0.83, "grad_norm": 0.7545117735862732, "learning_rate": 1.3787120936387744e-06, "loss": 2.1046, "step": 25058 }, { "epoch": 0.83, "grad_norm": 0.7548273205757141, "learning_rate": 1.3781735859639311e-06, "loss": 2.036, "step": 25059 }, { "epoch": 0.83, "grad_norm": 0.737248420715332, "learning_rate": 1.3776351756934313e-06, "loss": 2.0652, "step": 25060 }, { "epoch": 0.83, "grad_norm": 0.7423721551895142, "learning_rate": 1.3770968628333525e-06, "loss": 2.044, "step": 25061 }, { "epoch": 0.83, "grad_norm": 0.7539017200469971, "learning_rate": 1.3765586473897829e-06, "loss": 1.9974, "step": 25062 }, { "epoch": 0.83, "grad_norm": 0.7847672700881958, "learning_rate": 1.3760205293687967e-06, "loss": 2.0586, "step": 25063 }, { "epoch": 0.83, "grad_norm": 0.765710711479187, "learning_rate": 1.375482508776479e-06, "loss": 2.0291, "step": 25064 }, { "epoch": 0.83, "grad_norm": 0.7743377089500427, "learning_rate": 1.3749445856189037e-06, "loss": 2.0433, "step": 25065 }, { "epoch": 0.83, "grad_norm": 0.7551912665367126, "learning_rate": 1.3744067599021515e-06, "loss": 2.0475, "step": 25066 }, { "epoch": 0.83, "grad_norm": 0.8225637078285217, "learning_rate": 1.3738690316322945e-06, "loss": 2.053, "step": 25067 }, { "epoch": 0.83, "grad_norm": 0.767142117023468, "learning_rate": 1.373331400815412e-06, "loss": 2.011, "step": 25068 }, { "epoch": 0.83, "grad_norm": 0.7446503043174744, "learning_rate": 1.3727938674575758e-06, "loss": 2.0475, "step": 25069 }, { "epoch": 0.83, "grad_norm": 0.7426686882972717, "learning_rate": 1.3722564315648556e-06, "loss": 2.0591, "step": 25070 }, { "epoch": 0.83, "grad_norm": 0.7285228967666626, "learning_rate": 1.3717190931433267e-06, "loss": 2.0245, "step": 25071 }, { "epoch": 0.83, "grad_norm": 0.7637831568717957, "learning_rate": 1.3711818521990605e-06, "loss": 1.9677, "step": 25072 }, { "epoch": 0.83, "grad_norm": 0.7469715476036072, "learning_rate": 1.3706447087381247e-06, "loss": 2.0318, "step": 25073 }, { "epoch": 0.83, "grad_norm": 0.7488355040550232, "learning_rate": 1.3701076627665854e-06, "loss": 2.1217, "step": 25074 }, { "epoch": 0.83, "grad_norm": 0.7682512402534485, "learning_rate": 1.3695707142905156e-06, "loss": 2.0657, "step": 25075 }, { "epoch": 0.83, "grad_norm": 0.7365228533744812, "learning_rate": 1.3690338633159738e-06, "loss": 2.0337, "step": 25076 }, { "epoch": 0.83, "grad_norm": 0.7702351212501526, "learning_rate": 1.368497109849033e-06, "loss": 2.0487, "step": 25077 }, { "epoch": 0.83, "grad_norm": 0.7207365036010742, "learning_rate": 1.3679604538957525e-06, "loss": 2.0108, "step": 25078 }, { "epoch": 0.83, "grad_norm": 0.7382466197013855, "learning_rate": 1.3674238954621933e-06, "loss": 2.0503, "step": 25079 }, { "epoch": 0.83, "grad_norm": 0.8027022480964661, "learning_rate": 1.3668874345544203e-06, "loss": 2.1247, "step": 25080 }, { "epoch": 0.83, "grad_norm": 0.7402763962745667, "learning_rate": 1.3663510711784965e-06, "loss": 2.1203, "step": 25081 }, { "epoch": 0.83, "grad_norm": 0.7324920296669006, "learning_rate": 1.3658148053404773e-06, "loss": 2.0241, "step": 25082 }, { "epoch": 0.83, "grad_norm": 0.7351951599121094, "learning_rate": 1.365278637046421e-06, "loss": 2.0138, "step": 25083 }, { "epoch": 0.83, "grad_norm": 0.7668065428733826, "learning_rate": 1.364742566302385e-06, "loss": 2.1655, "step": 25084 }, { "epoch": 0.83, "grad_norm": 0.7341428995132446, "learning_rate": 1.36420659311443e-06, "loss": 2.0219, "step": 25085 }, { "epoch": 0.83, "grad_norm": 0.7541335821151733, "learning_rate": 1.3636707174886077e-06, "loss": 2.0122, "step": 25086 }, { "epoch": 0.83, "grad_norm": 0.7372667193412781, "learning_rate": 1.36313493943097e-06, "loss": 2.0151, "step": 25087 }, { "epoch": 0.83, "grad_norm": 0.746778666973114, "learning_rate": 1.3625992589475734e-06, "loss": 2.0215, "step": 25088 }, { "epoch": 0.83, "grad_norm": 0.7581526041030884, "learning_rate": 1.3620636760444671e-06, "loss": 2.0215, "step": 25089 }, { "epoch": 0.83, "grad_norm": 0.739772617816925, "learning_rate": 1.3615281907277034e-06, "loss": 2.0281, "step": 25090 }, { "epoch": 0.83, "grad_norm": 0.7470480799674988, "learning_rate": 1.3609928030033348e-06, "loss": 1.9735, "step": 25091 }, { "epoch": 0.83, "grad_norm": 0.7609680891036987, "learning_rate": 1.3604575128774022e-06, "loss": 2.0576, "step": 25092 }, { "epoch": 0.83, "grad_norm": 0.7715288996696472, "learning_rate": 1.3599223203559586e-06, "loss": 2.0492, "step": 25093 }, { "epoch": 0.83, "grad_norm": 0.7324267625808716, "learning_rate": 1.3593872254450502e-06, "loss": 2.0502, "step": 25094 }, { "epoch": 0.83, "grad_norm": 0.7684316635131836, "learning_rate": 1.358852228150721e-06, "loss": 2.0692, "step": 25095 }, { "epoch": 0.83, "grad_norm": 0.763430118560791, "learning_rate": 1.358317328479014e-06, "loss": 2.0342, "step": 25096 }, { "epoch": 0.83, "grad_norm": 0.734992504119873, "learning_rate": 1.3577825264359745e-06, "loss": 2.0323, "step": 25097 }, { "epoch": 0.84, "grad_norm": 0.7526440620422363, "learning_rate": 1.3572478220276407e-06, "loss": 2.063, "step": 25098 }, { "epoch": 0.84, "grad_norm": 0.7658821940422058, "learning_rate": 1.3567132152600572e-06, "loss": 2.0379, "step": 25099 }, { "epoch": 0.84, "grad_norm": 0.7326551675796509, "learning_rate": 1.356178706139264e-06, "loss": 2.0663, "step": 25100 }, { "epoch": 0.84, "grad_norm": 0.7488975524902344, "learning_rate": 1.3556442946712977e-06, "loss": 2.0524, "step": 25101 }, { "epoch": 0.84, "grad_norm": 0.728011965751648, "learning_rate": 1.3551099808621937e-06, "loss": 1.9557, "step": 25102 }, { "epoch": 0.84, "grad_norm": 0.7403274774551392, "learning_rate": 1.3545757647179924e-06, "loss": 2.0569, "step": 25103 }, { "epoch": 0.84, "grad_norm": 0.7323578000068665, "learning_rate": 1.354041646244728e-06, "loss": 2.0384, "step": 25104 }, { "epoch": 0.84, "grad_norm": 0.7479914426803589, "learning_rate": 1.3535076254484324e-06, "loss": 2.0648, "step": 25105 }, { "epoch": 0.84, "grad_norm": 0.7221078276634216, "learning_rate": 1.3529737023351397e-06, "loss": 2.041, "step": 25106 }, { "epoch": 0.84, "grad_norm": 0.7198437452316284, "learning_rate": 1.3524398769108848e-06, "loss": 2.0005, "step": 25107 }, { "epoch": 0.84, "grad_norm": 0.7207092046737671, "learning_rate": 1.3519061491816965e-06, "loss": 1.9997, "step": 25108 }, { "epoch": 0.84, "grad_norm": 0.7637871503829956, "learning_rate": 1.351372519153602e-06, "loss": 1.985, "step": 25109 }, { "epoch": 0.84, "grad_norm": 0.7236823439598083, "learning_rate": 1.3508389868326345e-06, "loss": 2.0269, "step": 25110 }, { "epoch": 0.84, "grad_norm": 0.7510977387428284, "learning_rate": 1.3503055522248166e-06, "loss": 2.011, "step": 25111 }, { "epoch": 0.84, "grad_norm": 0.7671706676483154, "learning_rate": 1.3497722153361769e-06, "loss": 2.0469, "step": 25112 }, { "epoch": 0.84, "grad_norm": 0.7745264768600464, "learning_rate": 1.3492389761727465e-06, "loss": 2.1386, "step": 25113 }, { "epoch": 0.84, "grad_norm": 0.7537067532539368, "learning_rate": 1.348705834740539e-06, "loss": 2.078, "step": 25114 }, { "epoch": 0.84, "grad_norm": 0.7596760988235474, "learning_rate": 1.3481727910455832e-06, "loss": 1.9844, "step": 25115 }, { "epoch": 0.84, "grad_norm": 0.7658923268318176, "learning_rate": 1.3476398450939032e-06, "loss": 2.0315, "step": 25116 }, { "epoch": 0.84, "grad_norm": 0.7806052565574646, "learning_rate": 1.3471069968915174e-06, "loss": 2.0479, "step": 25117 }, { "epoch": 0.84, "grad_norm": 0.7459091544151306, "learning_rate": 1.3465742464444442e-06, "loss": 2.015, "step": 25118 }, { "epoch": 0.84, "grad_norm": 0.7538060545921326, "learning_rate": 1.3460415937587047e-06, "loss": 1.9798, "step": 25119 }, { "epoch": 0.84, "grad_norm": 0.7477102875709534, "learning_rate": 1.3455090388403137e-06, "loss": 2.0532, "step": 25120 }, { "epoch": 0.84, "grad_norm": 0.7364475727081299, "learning_rate": 1.3449765816952899e-06, "loss": 2.0606, "step": 25121 }, { "epoch": 0.84, "grad_norm": 0.7414153814315796, "learning_rate": 1.3444442223296505e-06, "loss": 1.9697, "step": 25122 }, { "epoch": 0.84, "grad_norm": 0.7764838933944702, "learning_rate": 1.3439119607494077e-06, "loss": 2.0112, "step": 25123 }, { "epoch": 0.84, "grad_norm": 0.7431680560112, "learning_rate": 1.3433797969605721e-06, "loss": 2.0358, "step": 25124 }, { "epoch": 0.84, "grad_norm": 0.7583439350128174, "learning_rate": 1.34284773096916e-06, "loss": 2.0889, "step": 25125 }, { "epoch": 0.84, "grad_norm": 0.7658635973930359, "learning_rate": 1.342315762781181e-06, "loss": 2.1617, "step": 25126 }, { "epoch": 0.84, "grad_norm": 0.741082489490509, "learning_rate": 1.3417838924026426e-06, "loss": 2.0625, "step": 25127 }, { "epoch": 0.84, "grad_norm": 0.7264873385429382, "learning_rate": 1.3412521198395556e-06, "loss": 2.0956, "step": 25128 }, { "epoch": 0.84, "grad_norm": 0.7705053091049194, "learning_rate": 1.3407204450979294e-06, "loss": 2.0349, "step": 25129 }, { "epoch": 0.84, "grad_norm": 0.7603328227996826, "learning_rate": 1.3401888681837671e-06, "loss": 2.0912, "step": 25130 }, { "epoch": 0.84, "grad_norm": 0.7482497692108154, "learning_rate": 1.3396573891030784e-06, "loss": 2.0427, "step": 25131 }, { "epoch": 0.84, "grad_norm": 0.7430740594863892, "learning_rate": 1.3391260078618639e-06, "loss": 2.0554, "step": 25132 }, { "epoch": 0.84, "grad_norm": 0.7279108166694641, "learning_rate": 1.3385947244661268e-06, "loss": 2.0681, "step": 25133 }, { "epoch": 0.84, "grad_norm": 0.74683678150177, "learning_rate": 1.3380635389218698e-06, "loss": 2.0495, "step": 25134 }, { "epoch": 0.84, "grad_norm": 0.7670799493789673, "learning_rate": 1.3375324512350995e-06, "loss": 2.0581, "step": 25135 }, { "epoch": 0.84, "grad_norm": 0.7483343482017517, "learning_rate": 1.3370014614118054e-06, "loss": 2.0282, "step": 25136 }, { "epoch": 0.84, "grad_norm": 0.7364310622215271, "learning_rate": 1.3364705694579927e-06, "loss": 2.0663, "step": 25137 }, { "epoch": 0.84, "grad_norm": 0.7254992127418518, "learning_rate": 1.335939775379661e-06, "loss": 2.0496, "step": 25138 }, { "epoch": 0.84, "grad_norm": 0.7599915862083435, "learning_rate": 1.3354090791828024e-06, "loss": 2.0782, "step": 25139 }, { "epoch": 0.84, "grad_norm": 0.7428900599479675, "learning_rate": 1.3348784808734127e-06, "loss": 2.0817, "step": 25140 }, { "epoch": 0.84, "grad_norm": 0.7594741582870483, "learning_rate": 1.33434798045749e-06, "loss": 2.0127, "step": 25141 }, { "epoch": 0.84, "grad_norm": 0.7383279204368591, "learning_rate": 1.3338175779410235e-06, "loss": 2.056, "step": 25142 }, { "epoch": 0.84, "grad_norm": 0.7319151163101196, "learning_rate": 1.3332872733300063e-06, "loss": 1.9589, "step": 25143 }, { "epoch": 0.84, "grad_norm": 0.7256348133087158, "learning_rate": 1.3327570666304323e-06, "loss": 2.0905, "step": 25144 }, { "epoch": 0.84, "grad_norm": 0.7703343033790588, "learning_rate": 1.3322269578482906e-06, "loss": 1.9656, "step": 25145 }, { "epoch": 0.84, "grad_norm": 0.7417930960655212, "learning_rate": 1.3316969469895658e-06, "loss": 2.0519, "step": 25146 }, { "epoch": 0.84, "grad_norm": 0.7719858884811401, "learning_rate": 1.331167034060251e-06, "loss": 2.0331, "step": 25147 }, { "epoch": 0.84, "grad_norm": 0.727957010269165, "learning_rate": 1.3306372190663308e-06, "loss": 2.023, "step": 25148 }, { "epoch": 0.84, "grad_norm": 0.761118471622467, "learning_rate": 1.3301075020137887e-06, "loss": 1.9945, "step": 25149 }, { "epoch": 0.84, "grad_norm": 0.770431637763977, "learning_rate": 1.3295778829086103e-06, "loss": 2.0878, "step": 25150 }, { "epoch": 0.84, "grad_norm": 0.7343979477882385, "learning_rate": 1.329048361756783e-06, "loss": 2.08, "step": 25151 }, { "epoch": 0.84, "grad_norm": 0.7670046091079712, "learning_rate": 1.3285189385642816e-06, "loss": 2.0506, "step": 25152 }, { "epoch": 0.84, "grad_norm": 0.7407084107398987, "learning_rate": 1.3279896133370951e-06, "loss": 2.0495, "step": 25153 }, { "epoch": 0.84, "grad_norm": 0.7505419850349426, "learning_rate": 1.3274603860811986e-06, "loss": 2.0012, "step": 25154 }, { "epoch": 0.84, "grad_norm": 0.7217773199081421, "learning_rate": 1.3269312568025705e-06, "loss": 1.9906, "step": 25155 }, { "epoch": 0.84, "grad_norm": 0.7171909213066101, "learning_rate": 1.3264022255071896e-06, "loss": 2.0937, "step": 25156 }, { "epoch": 0.84, "grad_norm": 0.7457893490791321, "learning_rate": 1.3258732922010375e-06, "loss": 2.0706, "step": 25157 }, { "epoch": 0.84, "grad_norm": 0.7622553706169128, "learning_rate": 1.3253444568900819e-06, "loss": 2.1563, "step": 25158 }, { "epoch": 0.84, "grad_norm": 0.7559348344802856, "learning_rate": 1.3248157195803001e-06, "loss": 2.0674, "step": 25159 }, { "epoch": 0.84, "grad_norm": 0.7268890142440796, "learning_rate": 1.3242870802776685e-06, "loss": 2.0461, "step": 25160 }, { "epoch": 0.84, "grad_norm": 0.7106286287307739, "learning_rate": 1.3237585389881547e-06, "loss": 2.0459, "step": 25161 }, { "epoch": 0.84, "grad_norm": 0.7390077710151672, "learning_rate": 1.3232300957177347e-06, "loss": 2.0975, "step": 25162 }, { "epoch": 0.84, "grad_norm": 0.7644327282905579, "learning_rate": 1.3227017504723749e-06, "loss": 2.0579, "step": 25163 }, { "epoch": 0.84, "grad_norm": 0.7560936808586121, "learning_rate": 1.322173503258044e-06, "loss": 1.9858, "step": 25164 }, { "epoch": 0.84, "grad_norm": 0.7526465654373169, "learning_rate": 1.3216453540807116e-06, "loss": 2.0127, "step": 25165 }, { "epoch": 0.84, "grad_norm": 0.7458640336990356, "learning_rate": 1.3211173029463453e-06, "loss": 1.9934, "step": 25166 }, { "epoch": 0.84, "grad_norm": 0.7520620226860046, "learning_rate": 1.3205893498609102e-06, "loss": 2.0462, "step": 25167 }, { "epoch": 0.84, "grad_norm": 0.745143473148346, "learning_rate": 1.3200614948303669e-06, "loss": 1.9849, "step": 25168 }, { "epoch": 0.84, "grad_norm": 0.7449918389320374, "learning_rate": 1.3195337378606843e-06, "loss": 2.0482, "step": 25169 }, { "epoch": 0.84, "grad_norm": 0.7413531541824341, "learning_rate": 1.319006078957823e-06, "loss": 2.0464, "step": 25170 }, { "epoch": 0.84, "grad_norm": 0.7508883476257324, "learning_rate": 1.3184785181277404e-06, "loss": 2.0807, "step": 25171 }, { "epoch": 0.84, "grad_norm": 0.7754811644554138, "learning_rate": 1.3179510553763998e-06, "loss": 2.2027, "step": 25172 }, { "epoch": 0.84, "grad_norm": 0.7478004693984985, "learning_rate": 1.3174236907097626e-06, "loss": 2.1024, "step": 25173 }, { "epoch": 0.84, "grad_norm": 0.7371152639389038, "learning_rate": 1.3168964241337823e-06, "loss": 1.9881, "step": 25174 }, { "epoch": 0.84, "grad_norm": 0.7109218239784241, "learning_rate": 1.3163692556544183e-06, "loss": 1.9963, "step": 25175 }, { "epoch": 0.84, "grad_norm": 0.7128262519836426, "learning_rate": 1.315842185277626e-06, "loss": 2.0836, "step": 25176 }, { "epoch": 0.84, "grad_norm": 0.7441182136535645, "learning_rate": 1.3153152130093571e-06, "loss": 2.1338, "step": 25177 }, { "epoch": 0.84, "grad_norm": 0.7657666206359863, "learning_rate": 1.3147883388555672e-06, "loss": 2.0506, "step": 25178 }, { "epoch": 0.84, "grad_norm": 0.7295756340026855, "learning_rate": 1.3142615628222134e-06, "loss": 2.0486, "step": 25179 }, { "epoch": 0.84, "grad_norm": 0.7198320627212524, "learning_rate": 1.3137348849152364e-06, "loss": 2.0144, "step": 25180 }, { "epoch": 0.84, "grad_norm": 0.723551332950592, "learning_rate": 1.313208305140593e-06, "loss": 2.0104, "step": 25181 }, { "epoch": 0.84, "grad_norm": 0.7326354384422302, "learning_rate": 1.3126818235042338e-06, "loss": 1.9969, "step": 25182 }, { "epoch": 0.84, "grad_norm": 0.7440686821937561, "learning_rate": 1.3121554400121016e-06, "loss": 2.0032, "step": 25183 }, { "epoch": 0.84, "grad_norm": 0.7371004819869995, "learning_rate": 1.3116291546701476e-06, "loss": 2.1082, "step": 25184 }, { "epoch": 0.84, "grad_norm": 0.7588104009628296, "learning_rate": 1.3111029674843134e-06, "loss": 2.0658, "step": 25185 }, { "epoch": 0.84, "grad_norm": 0.7330766916275024, "learning_rate": 1.3105768784605477e-06, "loss": 2.0499, "step": 25186 }, { "epoch": 0.84, "grad_norm": 0.73399418592453, "learning_rate": 1.3100508876047902e-06, "loss": 1.9807, "step": 25187 }, { "epoch": 0.84, "grad_norm": 0.7637670040130615, "learning_rate": 1.309524994922986e-06, "loss": 2.0705, "step": 25188 }, { "epoch": 0.84, "grad_norm": 0.7333109378814697, "learning_rate": 1.3089992004210761e-06, "loss": 2.0158, "step": 25189 }, { "epoch": 0.84, "grad_norm": 0.7543295621871948, "learning_rate": 1.3084735041049979e-06, "loss": 2.0092, "step": 25190 }, { "epoch": 0.84, "grad_norm": 0.738252580165863, "learning_rate": 1.307947905980692e-06, "loss": 2.0517, "step": 25191 }, { "epoch": 0.84, "grad_norm": 0.7534932494163513, "learning_rate": 1.3074224060540984e-06, "loss": 2.0021, "step": 25192 }, { "epoch": 0.84, "grad_norm": 0.7674052715301514, "learning_rate": 1.3068970043311535e-06, "loss": 2.0153, "step": 25193 }, { "epoch": 0.84, "grad_norm": 0.7250233292579651, "learning_rate": 1.3063717008177878e-06, "loss": 2.0141, "step": 25194 }, { "epoch": 0.84, "grad_norm": 0.7527196407318115, "learning_rate": 1.3058464955199423e-06, "loss": 2.0941, "step": 25195 }, { "epoch": 0.84, "grad_norm": 0.7555750012397766, "learning_rate": 1.3053213884435468e-06, "loss": 2.0499, "step": 25196 }, { "epoch": 0.84, "grad_norm": 0.7417261004447937, "learning_rate": 1.304796379594535e-06, "loss": 1.9856, "step": 25197 }, { "epoch": 0.84, "grad_norm": 0.7267795205116272, "learning_rate": 1.3042714689788393e-06, "loss": 2.0699, "step": 25198 }, { "epoch": 0.84, "grad_norm": 0.7626472115516663, "learning_rate": 1.303746656602386e-06, "loss": 2.0021, "step": 25199 }, { "epoch": 0.84, "grad_norm": 0.7490193843841553, "learning_rate": 1.3032219424711056e-06, "loss": 2.0302, "step": 25200 }, { "epoch": 0.84, "grad_norm": 0.7557934522628784, "learning_rate": 1.3026973265909292e-06, "loss": 2.0784, "step": 25201 }, { "epoch": 0.84, "grad_norm": 0.7751348614692688, "learning_rate": 1.302172808967782e-06, "loss": 2.0299, "step": 25202 }, { "epoch": 0.84, "grad_norm": 0.7343829274177551, "learning_rate": 1.301648389607586e-06, "loss": 1.9857, "step": 25203 }, { "epoch": 0.84, "grad_norm": 0.7598791122436523, "learning_rate": 1.3011240685162719e-06, "loss": 2.0832, "step": 25204 }, { "epoch": 0.84, "grad_norm": 0.7633927464485168, "learning_rate": 1.300599845699757e-06, "loss": 2.045, "step": 25205 }, { "epoch": 0.84, "grad_norm": 0.7455004453659058, "learning_rate": 1.3000757211639692e-06, "loss": 2.0556, "step": 25206 }, { "epoch": 0.84, "grad_norm": 0.7329928278923035, "learning_rate": 1.2995516949148245e-06, "loss": 2.0547, "step": 25207 }, { "epoch": 0.84, "grad_norm": 0.7779415845870972, "learning_rate": 1.2990277669582485e-06, "loss": 2.0382, "step": 25208 }, { "epoch": 0.84, "grad_norm": 0.7289942502975464, "learning_rate": 1.2985039373001562e-06, "loss": 2.0465, "step": 25209 }, { "epoch": 0.84, "grad_norm": 0.7401782274246216, "learning_rate": 1.2979802059464674e-06, "loss": 2.0995, "step": 25210 }, { "epoch": 0.84, "grad_norm": 0.7250211834907532, "learning_rate": 1.2974565729030996e-06, "loss": 2.1211, "step": 25211 }, { "epoch": 0.84, "grad_norm": 0.748907208442688, "learning_rate": 1.2969330381759648e-06, "loss": 1.9701, "step": 25212 }, { "epoch": 0.84, "grad_norm": 0.7727431654930115, "learning_rate": 1.2964096017709793e-06, "loss": 1.9948, "step": 25213 }, { "epoch": 0.84, "grad_norm": 0.7299375534057617, "learning_rate": 1.2958862636940605e-06, "loss": 2.0656, "step": 25214 }, { "epoch": 0.84, "grad_norm": 0.7606677412986755, "learning_rate": 1.2953630239511173e-06, "loss": 2.0532, "step": 25215 }, { "epoch": 0.84, "grad_norm": 0.7589752078056335, "learning_rate": 1.294839882548058e-06, "loss": 2.0201, "step": 25216 }, { "epoch": 0.84, "grad_norm": 0.7460601329803467, "learning_rate": 1.294316839490799e-06, "loss": 1.9689, "step": 25217 }, { "epoch": 0.84, "grad_norm": 0.7449535131454468, "learning_rate": 1.2937938947852447e-06, "loss": 2.0113, "step": 25218 }, { "epoch": 0.84, "grad_norm": 0.7384112477302551, "learning_rate": 1.2932710484373057e-06, "loss": 2.1658, "step": 25219 }, { "epoch": 0.84, "grad_norm": 0.7341070175170898, "learning_rate": 1.2927483004528884e-06, "loss": 2.0777, "step": 25220 }, { "epoch": 0.84, "grad_norm": 0.768615186214447, "learning_rate": 1.2922256508378962e-06, "loss": 2.0054, "step": 25221 }, { "epoch": 0.84, "grad_norm": 0.7648570537567139, "learning_rate": 1.2917030995982337e-06, "loss": 2.0126, "step": 25222 }, { "epoch": 0.84, "grad_norm": 0.7308836579322815, "learning_rate": 1.2911806467398103e-06, "loss": 2.0569, "step": 25223 }, { "epoch": 0.84, "grad_norm": 0.7563140988349915, "learning_rate": 1.2906582922685229e-06, "loss": 2.0939, "step": 25224 }, { "epoch": 0.84, "grad_norm": 0.7246943712234497, "learning_rate": 1.2901360361902716e-06, "loss": 1.9814, "step": 25225 }, { "epoch": 0.84, "grad_norm": 0.7550070881843567, "learning_rate": 1.2896138785109612e-06, "loss": 2.0744, "step": 25226 }, { "epoch": 0.84, "grad_norm": 0.7848421335220337, "learning_rate": 1.2890918192364865e-06, "loss": 2.0405, "step": 25227 }, { "epoch": 0.84, "grad_norm": 0.7906895279884338, "learning_rate": 1.288569858372749e-06, "loss": 2.1167, "step": 25228 }, { "epoch": 0.84, "grad_norm": 0.7479054927825928, "learning_rate": 1.288047995925642e-06, "loss": 1.9669, "step": 25229 }, { "epoch": 0.84, "grad_norm": 0.7528484463691711, "learning_rate": 1.287526231901065e-06, "loss": 2.0632, "step": 25230 }, { "epoch": 0.84, "grad_norm": 0.7277924418449402, "learning_rate": 1.2870045663049092e-06, "loss": 2.0269, "step": 25231 }, { "epoch": 0.84, "grad_norm": 0.7739904522895813, "learning_rate": 1.2864829991430706e-06, "loss": 2.0439, "step": 25232 }, { "epoch": 0.84, "grad_norm": 0.7670672535896301, "learning_rate": 1.2859615304214413e-06, "loss": 2.0756, "step": 25233 }, { "epoch": 0.84, "grad_norm": 0.785698413848877, "learning_rate": 1.2854401601459088e-06, "loss": 2.0274, "step": 25234 }, { "epoch": 0.84, "grad_norm": 0.7571924328804016, "learning_rate": 1.2849188883223673e-06, "loss": 1.987, "step": 25235 }, { "epoch": 0.84, "grad_norm": 0.7543885707855225, "learning_rate": 1.2843977149567056e-06, "loss": 2.0392, "step": 25236 }, { "epoch": 0.84, "grad_norm": 0.7375682592391968, "learning_rate": 1.283876640054812e-06, "loss": 2.0427, "step": 25237 }, { "epoch": 0.84, "grad_norm": 0.7282437682151794, "learning_rate": 1.2833556636225686e-06, "loss": 2.1309, "step": 25238 }, { "epoch": 0.84, "grad_norm": 0.7729713320732117, "learning_rate": 1.2828347856658663e-06, "loss": 2.1235, "step": 25239 }, { "epoch": 0.84, "grad_norm": 0.750762939453125, "learning_rate": 1.282314006190587e-06, "loss": 1.9929, "step": 25240 }, { "epoch": 0.84, "grad_norm": 0.7610924243927002, "learning_rate": 1.281793325202616e-06, "loss": 2.0974, "step": 25241 }, { "epoch": 0.84, "grad_norm": 0.741794228553772, "learning_rate": 1.2812727427078353e-06, "loss": 2.0974, "step": 25242 }, { "epoch": 0.84, "grad_norm": 0.7532973289489746, "learning_rate": 1.2807522587121236e-06, "loss": 2.0535, "step": 25243 }, { "epoch": 0.84, "grad_norm": 0.7604883313179016, "learning_rate": 1.2802318732213625e-06, "loss": 2.0072, "step": 25244 }, { "epoch": 0.84, "grad_norm": 0.766514003276825, "learning_rate": 1.2797115862414333e-06, "loss": 2.075, "step": 25245 }, { "epoch": 0.84, "grad_norm": 0.771040678024292, "learning_rate": 1.2791913977782121e-06, "loss": 2.0886, "step": 25246 }, { "epoch": 0.84, "grad_norm": 0.7392578721046448, "learning_rate": 1.2786713078375734e-06, "loss": 2.087, "step": 25247 }, { "epoch": 0.84, "grad_norm": 0.7273556590080261, "learning_rate": 1.2781513164253978e-06, "loss": 2.0445, "step": 25248 }, { "epoch": 0.84, "grad_norm": 0.7258641123771667, "learning_rate": 1.2776314235475551e-06, "loss": 2.0761, "step": 25249 }, { "epoch": 0.84, "grad_norm": 0.7360250949859619, "learning_rate": 1.2771116292099216e-06, "loss": 2.0986, "step": 25250 }, { "epoch": 0.84, "grad_norm": 0.728259265422821, "learning_rate": 1.2765919334183674e-06, "loss": 2.0551, "step": 25251 }, { "epoch": 0.84, "grad_norm": 0.7631586194038391, "learning_rate": 1.2760723361787675e-06, "loss": 1.9561, "step": 25252 }, { "epoch": 0.84, "grad_norm": 0.7701394557952881, "learning_rate": 1.2755528374969873e-06, "loss": 2.0956, "step": 25253 }, { "epoch": 0.84, "grad_norm": 0.7619103789329529, "learning_rate": 1.2750334373789008e-06, "loss": 2.0163, "step": 25254 }, { "epoch": 0.84, "grad_norm": 0.7387005686759949, "learning_rate": 1.2745141358303726e-06, "loss": 2.0108, "step": 25255 }, { "epoch": 0.84, "grad_norm": 0.7507039308547974, "learning_rate": 1.2739949328572677e-06, "loss": 2.0603, "step": 25256 }, { "epoch": 0.84, "grad_norm": 0.7563625574111938, "learning_rate": 1.2734758284654547e-06, "loss": 2.0701, "step": 25257 }, { "epoch": 0.84, "grad_norm": 0.7437933683395386, "learning_rate": 1.2729568226607992e-06, "loss": 2.1404, "step": 25258 }, { "epoch": 0.84, "grad_norm": 0.7523148059844971, "learning_rate": 1.2724379154491628e-06, "loss": 2.0542, "step": 25259 }, { "epoch": 0.84, "grad_norm": 0.7422696352005005, "learning_rate": 1.2719191068364056e-06, "loss": 2.1243, "step": 25260 }, { "epoch": 0.84, "grad_norm": 0.7384955883026123, "learning_rate": 1.271400396828394e-06, "loss": 2.0639, "step": 25261 }, { "epoch": 0.84, "grad_norm": 0.7350273728370667, "learning_rate": 1.270881785430983e-06, "loss": 2.0462, "step": 25262 }, { "epoch": 0.84, "grad_norm": 0.747558057308197, "learning_rate": 1.2703632726500359e-06, "loss": 2.0259, "step": 25263 }, { "epoch": 0.84, "grad_norm": 0.7409343719482422, "learning_rate": 1.2698448584914091e-06, "loss": 2.07, "step": 25264 }, { "epoch": 0.84, "grad_norm": 0.7559645771980286, "learning_rate": 1.269326542960956e-06, "loss": 2.0607, "step": 25265 }, { "epoch": 0.84, "grad_norm": 0.7659122943878174, "learning_rate": 1.2688083260645345e-06, "loss": 2.0673, "step": 25266 }, { "epoch": 0.84, "grad_norm": 0.7683814764022827, "learning_rate": 1.2682902078080029e-06, "loss": 2.0438, "step": 25267 }, { "epoch": 0.84, "grad_norm": 0.7599559426307678, "learning_rate": 1.2677721881972095e-06, "loss": 1.9757, "step": 25268 }, { "epoch": 0.84, "grad_norm": 0.741051197052002, "learning_rate": 1.2672542672380073e-06, "loss": 2.0733, "step": 25269 }, { "epoch": 0.84, "grad_norm": 0.7399287223815918, "learning_rate": 1.2667364449362507e-06, "loss": 1.9815, "step": 25270 }, { "epoch": 0.84, "grad_norm": 0.7443737983703613, "learning_rate": 1.266218721297785e-06, "loss": 2.0422, "step": 25271 }, { "epoch": 0.84, "grad_norm": 0.7541748285293579, "learning_rate": 1.2657010963284643e-06, "loss": 2.0424, "step": 25272 }, { "epoch": 0.84, "grad_norm": 0.754728376865387, "learning_rate": 1.2651835700341309e-06, "loss": 2.006, "step": 25273 }, { "epoch": 0.84, "grad_norm": 0.7470657229423523, "learning_rate": 1.2646661424206376e-06, "loss": 2.0421, "step": 25274 }, { "epoch": 0.84, "grad_norm": 0.7627711892127991, "learning_rate": 1.264148813493824e-06, "loss": 1.994, "step": 25275 }, { "epoch": 0.84, "grad_norm": 0.7268960475921631, "learning_rate": 1.263631583259538e-06, "loss": 2.0474, "step": 25276 }, { "epoch": 0.84, "grad_norm": 0.738702654838562, "learning_rate": 1.263114451723626e-06, "loss": 1.9915, "step": 25277 }, { "epoch": 0.84, "grad_norm": 0.7314087748527527, "learning_rate": 1.262597418891922e-06, "loss": 2.0319, "step": 25278 }, { "epoch": 0.84, "grad_norm": 0.7452148795127869, "learning_rate": 1.2620804847702728e-06, "loss": 1.9888, "step": 25279 }, { "epoch": 0.84, "grad_norm": 0.7460310459136963, "learning_rate": 1.261563649364519e-06, "loss": 1.9824, "step": 25280 }, { "epoch": 0.84, "grad_norm": 0.7457327246665955, "learning_rate": 1.261046912680497e-06, "loss": 2.0799, "step": 25281 }, { "epoch": 0.84, "grad_norm": 0.7303289771080017, "learning_rate": 1.2605302747240444e-06, "loss": 1.9896, "step": 25282 }, { "epoch": 0.84, "grad_norm": 0.7576649785041809, "learning_rate": 1.260013735501001e-06, "loss": 2.0018, "step": 25283 }, { "epoch": 0.84, "grad_norm": 0.7163962125778198, "learning_rate": 1.259497295017198e-06, "loss": 1.9952, "step": 25284 }, { "epoch": 0.84, "grad_norm": 0.737267255783081, "learning_rate": 1.2589809532784735e-06, "loss": 2.0394, "step": 25285 }, { "epoch": 0.84, "grad_norm": 0.737406313419342, "learning_rate": 1.258464710290659e-06, "loss": 1.9981, "step": 25286 }, { "epoch": 0.84, "grad_norm": 0.7562631964683533, "learning_rate": 1.257948566059588e-06, "loss": 2.0538, "step": 25287 }, { "epoch": 0.84, "grad_norm": 0.7464264035224915, "learning_rate": 1.2574325205910886e-06, "loss": 2.0702, "step": 25288 }, { "epoch": 0.84, "grad_norm": 0.7192738056182861, "learning_rate": 1.2569165738909949e-06, "loss": 2.0709, "step": 25289 }, { "epoch": 0.84, "grad_norm": 0.7267374396324158, "learning_rate": 1.2564007259651345e-06, "loss": 2.035, "step": 25290 }, { "epoch": 0.84, "grad_norm": 0.7254093885421753, "learning_rate": 1.2558849768193327e-06, "loss": 2.0336, "step": 25291 }, { "epoch": 0.84, "grad_norm": 0.7453846335411072, "learning_rate": 1.2553693264594169e-06, "loss": 2.0156, "step": 25292 }, { "epoch": 0.84, "grad_norm": 0.7652398347854614, "learning_rate": 1.254853774891216e-06, "loss": 2.0094, "step": 25293 }, { "epoch": 0.84, "grad_norm": 0.7694460153579712, "learning_rate": 1.254338322120552e-06, "loss": 2.056, "step": 25294 }, { "epoch": 0.84, "grad_norm": 0.735845148563385, "learning_rate": 1.2538229681532465e-06, "loss": 2.0873, "step": 25295 }, { "epoch": 0.84, "grad_norm": 0.755448043346405, "learning_rate": 1.2533077129951254e-06, "loss": 2.0066, "step": 25296 }, { "epoch": 0.84, "grad_norm": 0.7351285219192505, "learning_rate": 1.2527925566520049e-06, "loss": 1.9966, "step": 25297 }, { "epoch": 0.84, "grad_norm": 0.7506215572357178, "learning_rate": 1.2522774991297081e-06, "loss": 2.0479, "step": 25298 }, { "epoch": 0.84, "grad_norm": 0.7476725578308105, "learning_rate": 1.2517625404340573e-06, "loss": 2.0759, "step": 25299 }, { "epoch": 0.84, "grad_norm": 0.7444807291030884, "learning_rate": 1.2512476805708629e-06, "loss": 2.0032, "step": 25300 }, { "epoch": 0.84, "grad_norm": 0.7494280338287354, "learning_rate": 1.2507329195459439e-06, "loss": 1.9735, "step": 25301 }, { "epoch": 0.84, "grad_norm": 0.7901392579078674, "learning_rate": 1.25021825736512e-06, "loss": 1.9824, "step": 25302 }, { "epoch": 0.84, "grad_norm": 0.7447489500045776, "learning_rate": 1.2497036940342023e-06, "loss": 2.0627, "step": 25303 }, { "epoch": 0.84, "grad_norm": 0.7485110759735107, "learning_rate": 1.2491892295590013e-06, "loss": 2.0416, "step": 25304 }, { "epoch": 0.84, "grad_norm": 0.7399124503135681, "learning_rate": 1.2486748639453339e-06, "loss": 2.0654, "step": 25305 }, { "epoch": 0.84, "grad_norm": 0.7590696215629578, "learning_rate": 1.2481605971990073e-06, "loss": 1.9205, "step": 25306 }, { "epoch": 0.84, "grad_norm": 0.7470648884773254, "learning_rate": 1.247646429325834e-06, "loss": 2.0402, "step": 25307 }, { "epoch": 0.84, "grad_norm": 0.7450483441352844, "learning_rate": 1.2471323603316233e-06, "loss": 2.0092, "step": 25308 }, { "epoch": 0.84, "grad_norm": 0.7480080127716064, "learning_rate": 1.2466183902221819e-06, "loss": 2.0238, "step": 25309 }, { "epoch": 0.84, "grad_norm": 0.7603583335876465, "learning_rate": 1.2461045190033127e-06, "loss": 2.1106, "step": 25310 }, { "epoch": 0.84, "grad_norm": 0.7747641205787659, "learning_rate": 1.245590746680827e-06, "loss": 2.0094, "step": 25311 }, { "epoch": 0.84, "grad_norm": 0.7354148030281067, "learning_rate": 1.2450770732605267e-06, "loss": 2.0197, "step": 25312 }, { "epoch": 0.84, "grad_norm": 0.7383816242218018, "learning_rate": 1.2445634987482124e-06, "loss": 2.0221, "step": 25313 }, { "epoch": 0.84, "grad_norm": 0.7253024578094482, "learning_rate": 1.2440500231496889e-06, "loss": 2.0521, "step": 25314 }, { "epoch": 0.84, "grad_norm": 0.7561866641044617, "learning_rate": 1.2435366464707589e-06, "loss": 2.0048, "step": 25315 }, { "epoch": 0.84, "grad_norm": 0.751228928565979, "learning_rate": 1.24302336871722e-06, "loss": 2.0114, "step": 25316 }, { "epoch": 0.84, "grad_norm": 0.7449452877044678, "learning_rate": 1.2425101898948689e-06, "loss": 2.0114, "step": 25317 }, { "epoch": 0.84, "grad_norm": 0.7391721606254578, "learning_rate": 1.2419971100095073e-06, "loss": 2.1011, "step": 25318 }, { "epoch": 0.84, "grad_norm": 0.7433247566223145, "learning_rate": 1.2414841290669277e-06, "loss": 2.0663, "step": 25319 }, { "epoch": 0.84, "grad_norm": 0.7417053580284119, "learning_rate": 1.2409712470729275e-06, "loss": 2.0872, "step": 25320 }, { "epoch": 0.84, "grad_norm": 0.7357958555221558, "learning_rate": 1.240458464033304e-06, "loss": 1.9746, "step": 25321 }, { "epoch": 0.84, "grad_norm": 0.765435516834259, "learning_rate": 1.239945779953844e-06, "loss": 2.0587, "step": 25322 }, { "epoch": 0.84, "grad_norm": 0.7597655653953552, "learning_rate": 1.2394331948403427e-06, "loss": 2.1195, "step": 25323 }, { "epoch": 0.84, "grad_norm": 0.7379902005195618, "learning_rate": 1.2389207086985922e-06, "loss": 1.9721, "step": 25324 }, { "epoch": 0.84, "grad_norm": 0.7225934863090515, "learning_rate": 1.2384083215343824e-06, "loss": 2.0483, "step": 25325 }, { "epoch": 0.84, "grad_norm": 0.7396114468574524, "learning_rate": 1.2378960333534973e-06, "loss": 2.0112, "step": 25326 }, { "epoch": 0.84, "grad_norm": 0.7500832676887512, "learning_rate": 1.2373838441617302e-06, "loss": 2.0159, "step": 25327 }, { "epoch": 0.84, "grad_norm": 0.7272298336029053, "learning_rate": 1.2368717539648634e-06, "loss": 2.0042, "step": 25328 }, { "epoch": 0.84, "grad_norm": 0.7270601391792297, "learning_rate": 1.236359762768683e-06, "loss": 1.9794, "step": 25329 }, { "epoch": 0.84, "grad_norm": 0.7376267910003662, "learning_rate": 1.2358478705789768e-06, "loss": 2.0539, "step": 25330 }, { "epoch": 0.84, "grad_norm": 0.7854896187782288, "learning_rate": 1.2353360774015245e-06, "loss": 2.0913, "step": 25331 }, { "epoch": 0.84, "grad_norm": 0.7512280941009521, "learning_rate": 1.234824383242107e-06, "loss": 2.1103, "step": 25332 }, { "epoch": 0.84, "grad_norm": 0.7399800419807434, "learning_rate": 1.2343127881065076e-06, "loss": 2.0801, "step": 25333 }, { "epoch": 0.84, "grad_norm": 0.7520300149917603, "learning_rate": 1.2338012920005071e-06, "loss": 2.118, "step": 25334 }, { "epoch": 0.84, "grad_norm": 0.7485512495040894, "learning_rate": 1.2332898949298788e-06, "loss": 2.0594, "step": 25335 }, { "epoch": 0.84, "grad_norm": 0.7656073570251465, "learning_rate": 1.2327785969004036e-06, "loss": 2.0, "step": 25336 }, { "epoch": 0.84, "grad_norm": 0.7756001353263855, "learning_rate": 1.2322673979178602e-06, "loss": 2.1244, "step": 25337 }, { "epoch": 0.84, "grad_norm": 0.7258834838867188, "learning_rate": 1.2317562979880182e-06, "loss": 2.0007, "step": 25338 }, { "epoch": 0.84, "grad_norm": 0.7529205679893494, "learning_rate": 1.2312452971166577e-06, "loss": 2.0551, "step": 25339 }, { "epoch": 0.84, "grad_norm": 0.764609158039093, "learning_rate": 1.2307343953095485e-06, "loss": 2.0322, "step": 25340 }, { "epoch": 0.84, "grad_norm": 0.7463908195495605, "learning_rate": 1.2302235925724614e-06, "loss": 2.0443, "step": 25341 }, { "epoch": 0.84, "grad_norm": 0.7505912184715271, "learning_rate": 1.2297128889111686e-06, "loss": 1.9558, "step": 25342 }, { "epoch": 0.84, "grad_norm": 0.7852020263671875, "learning_rate": 1.2292022843314432e-06, "loss": 2.0434, "step": 25343 }, { "epoch": 0.84, "grad_norm": 0.7296972274780273, "learning_rate": 1.2286917788390463e-06, "loss": 1.954, "step": 25344 }, { "epoch": 0.84, "grad_norm": 0.7459531426429749, "learning_rate": 1.2281813724397496e-06, "loss": 2.0337, "step": 25345 }, { "epoch": 0.84, "grad_norm": 0.741602897644043, "learning_rate": 1.2276710651393199e-06, "loss": 2.0515, "step": 25346 }, { "epoch": 0.84, "grad_norm": 0.7596555352210999, "learning_rate": 1.2271608569435222e-06, "loss": 2.0496, "step": 25347 }, { "epoch": 0.84, "grad_norm": 0.7494763731956482, "learning_rate": 1.226650747858118e-06, "loss": 2.046, "step": 25348 }, { "epoch": 0.84, "grad_norm": 0.7505432367324829, "learning_rate": 1.2261407378888735e-06, "loss": 2.0402, "step": 25349 }, { "epoch": 0.84, "grad_norm": 0.7442536354064941, "learning_rate": 1.2256308270415473e-06, "loss": 2.0134, "step": 25350 }, { "epoch": 0.84, "grad_norm": 0.7627303600311279, "learning_rate": 1.2251210153219007e-06, "loss": 1.9723, "step": 25351 }, { "epoch": 0.84, "grad_norm": 0.7572183609008789, "learning_rate": 1.2246113027356977e-06, "loss": 2.0238, "step": 25352 }, { "epoch": 0.84, "grad_norm": 0.729246199131012, "learning_rate": 1.2241016892886925e-06, "loss": 2.0024, "step": 25353 }, { "epoch": 0.84, "grad_norm": 0.7345984578132629, "learning_rate": 1.223592174986641e-06, "loss": 1.9236, "step": 25354 }, { "epoch": 0.84, "grad_norm": 0.7105081081390381, "learning_rate": 1.2230827598353045e-06, "loss": 2.0069, "step": 25355 }, { "epoch": 0.84, "grad_norm": 0.7124063372612, "learning_rate": 1.2225734438404346e-06, "loss": 2.0313, "step": 25356 }, { "epoch": 0.84, "grad_norm": 0.770212709903717, "learning_rate": 1.2220642270077843e-06, "loss": 2.0581, "step": 25357 }, { "epoch": 0.84, "grad_norm": 0.736091136932373, "learning_rate": 1.2215551093431078e-06, "loss": 2.0062, "step": 25358 }, { "epoch": 0.84, "grad_norm": 0.7458770871162415, "learning_rate": 1.2210460908521583e-06, "loss": 2.0713, "step": 25359 }, { "epoch": 0.84, "grad_norm": 0.7418363094329834, "learning_rate": 1.2205371715406845e-06, "loss": 2.0245, "step": 25360 }, { "epoch": 0.84, "grad_norm": 0.7704051733016968, "learning_rate": 1.2200283514144373e-06, "loss": 1.9436, "step": 25361 }, { "epoch": 0.84, "grad_norm": 0.745011568069458, "learning_rate": 1.2195196304791646e-06, "loss": 1.9866, "step": 25362 }, { "epoch": 0.84, "grad_norm": 0.7637007236480713, "learning_rate": 1.2190110087406115e-06, "loss": 2.0355, "step": 25363 }, { "epoch": 0.84, "grad_norm": 0.746093213558197, "learning_rate": 1.2185024862045248e-06, "loss": 2.0312, "step": 25364 }, { "epoch": 0.84, "grad_norm": 0.740727961063385, "learning_rate": 1.2179940628766563e-06, "loss": 2.0646, "step": 25365 }, { "epoch": 0.84, "grad_norm": 0.7488850355148315, "learning_rate": 1.217485738762738e-06, "loss": 1.9912, "step": 25366 }, { "epoch": 0.84, "grad_norm": 0.7682657241821289, "learning_rate": 1.2169775138685203e-06, "loss": 2.063, "step": 25367 }, { "epoch": 0.84, "grad_norm": 0.7529699802398682, "learning_rate": 1.2164693881997446e-06, "loss": 2.1372, "step": 25368 }, { "epoch": 0.84, "grad_norm": 0.7560436129570007, "learning_rate": 1.215961361762149e-06, "loss": 2.1288, "step": 25369 }, { "epoch": 0.84, "grad_norm": 0.7511946558952332, "learning_rate": 1.2154534345614754e-06, "loss": 1.9689, "step": 25370 }, { "epoch": 0.84, "grad_norm": 0.7513819336891174, "learning_rate": 1.2149456066034604e-06, "loss": 2.0852, "step": 25371 }, { "epoch": 0.84, "grad_norm": 0.7603201270103455, "learning_rate": 1.2144378778938392e-06, "loss": 1.9777, "step": 25372 }, { "epoch": 0.84, "grad_norm": 0.7423480749130249, "learning_rate": 1.2139302484383507e-06, "loss": 2.1297, "step": 25373 }, { "epoch": 0.84, "grad_norm": 0.7612413763999939, "learning_rate": 1.2134227182427306e-06, "loss": 2.0268, "step": 25374 }, { "epoch": 0.84, "grad_norm": 0.7494702935218811, "learning_rate": 1.2129152873127114e-06, "loss": 2.0919, "step": 25375 }, { "epoch": 0.84, "grad_norm": 0.7470599412918091, "learning_rate": 1.2124079556540236e-06, "loss": 2.0686, "step": 25376 }, { "epoch": 0.84, "grad_norm": 0.7497073411941528, "learning_rate": 1.211900723272401e-06, "loss": 2.0722, "step": 25377 }, { "epoch": 0.84, "grad_norm": 0.7279725670814514, "learning_rate": 1.2113935901735774e-06, "loss": 2.0114, "step": 25378 }, { "epoch": 0.84, "grad_norm": 0.7560218572616577, "learning_rate": 1.2108865563632743e-06, "loss": 2.0566, "step": 25379 }, { "epoch": 0.84, "grad_norm": 0.7198745012283325, "learning_rate": 1.2103796218472241e-06, "loss": 1.9964, "step": 25380 }, { "epoch": 0.84, "grad_norm": 0.7433822154998779, "learning_rate": 1.2098727866311554e-06, "loss": 2.0214, "step": 25381 }, { "epoch": 0.84, "grad_norm": 0.7389475703239441, "learning_rate": 1.2093660507207904e-06, "loss": 2.0621, "step": 25382 }, { "epoch": 0.84, "grad_norm": 0.7363719344139099, "learning_rate": 1.208859414121859e-06, "loss": 2.0229, "step": 25383 }, { "epoch": 0.84, "grad_norm": 0.7337396144866943, "learning_rate": 1.208352876840081e-06, "loss": 1.9386, "step": 25384 }, { "epoch": 0.84, "grad_norm": 0.7351333498954773, "learning_rate": 1.2078464388811773e-06, "loss": 2.0608, "step": 25385 }, { "epoch": 0.84, "grad_norm": 0.7564588189125061, "learning_rate": 1.2073401002508722e-06, "loss": 2.041, "step": 25386 }, { "epoch": 0.84, "grad_norm": 0.7403412461280823, "learning_rate": 1.206833860954888e-06, "loss": 2.0892, "step": 25387 }, { "epoch": 0.84, "grad_norm": 0.7586153745651245, "learning_rate": 1.206327720998941e-06, "loss": 2.0587, "step": 25388 }, { "epoch": 0.84, "grad_norm": 0.7551866769790649, "learning_rate": 1.205821680388749e-06, "loss": 2.1187, "step": 25389 }, { "epoch": 0.84, "grad_norm": 0.7998202443122864, "learning_rate": 1.2053157391300307e-06, "loss": 2.0948, "step": 25390 }, { "epoch": 0.84, "grad_norm": 0.7638534903526306, "learning_rate": 1.2048098972284993e-06, "loss": 2.0874, "step": 25391 }, { "epoch": 0.84, "grad_norm": 0.7591213583946228, "learning_rate": 1.2043041546898726e-06, "loss": 2.0091, "step": 25392 }, { "epoch": 0.84, "grad_norm": 0.7547223567962646, "learning_rate": 1.2037985115198614e-06, "loss": 2.0146, "step": 25393 }, { "epoch": 0.84, "grad_norm": 0.7368488907814026, "learning_rate": 1.2032929677241812e-06, "loss": 2.0462, "step": 25394 }, { "epoch": 0.84, "grad_norm": 0.7529860734939575, "learning_rate": 1.2027875233085395e-06, "loss": 2.0089, "step": 25395 }, { "epoch": 0.84, "grad_norm": 0.7599092125892639, "learning_rate": 1.2022821782786508e-06, "loss": 2.0013, "step": 25396 }, { "epoch": 0.84, "grad_norm": 0.745557427406311, "learning_rate": 1.201776932640223e-06, "loss": 2.1181, "step": 25397 }, { "epoch": 0.84, "grad_norm": 0.7367281317710876, "learning_rate": 1.201271786398961e-06, "loss": 2.0721, "step": 25398 }, { "epoch": 0.85, "grad_norm": 0.7321810126304626, "learning_rate": 1.2007667395605727e-06, "loss": 2.1054, "step": 25399 }, { "epoch": 0.85, "grad_norm": 0.750800609588623, "learning_rate": 1.200261792130767e-06, "loss": 2.0265, "step": 25400 }, { "epoch": 0.85, "grad_norm": 0.7458320260047913, "learning_rate": 1.199756944115248e-06, "loss": 2.0403, "step": 25401 }, { "epoch": 0.85, "grad_norm": 0.7594524025917053, "learning_rate": 1.1992521955197134e-06, "loss": 2.1205, "step": 25402 }, { "epoch": 0.85, "grad_norm": 0.7131485342979431, "learning_rate": 1.1987475463498733e-06, "loss": 2.0077, "step": 25403 }, { "epoch": 0.85, "grad_norm": 0.7324795126914978, "learning_rate": 1.1982429966114228e-06, "loss": 2.0722, "step": 25404 }, { "epoch": 0.85, "grad_norm": 0.7647042274475098, "learning_rate": 1.1977385463100666e-06, "loss": 2.0393, "step": 25405 }, { "epoch": 0.85, "grad_norm": 0.7673027515411377, "learning_rate": 1.197234195451502e-06, "loss": 2.0297, "step": 25406 }, { "epoch": 0.85, "grad_norm": 0.753304660320282, "learning_rate": 1.1967299440414249e-06, "loss": 2.0804, "step": 25407 }, { "epoch": 0.85, "grad_norm": 0.7348125576972961, "learning_rate": 1.1962257920855324e-06, "loss": 2.0497, "step": 25408 }, { "epoch": 0.85, "grad_norm": 0.7185177803039551, "learning_rate": 1.1957217395895237e-06, "loss": 2.0238, "step": 25409 }, { "epoch": 0.85, "grad_norm": 0.7549484372138977, "learning_rate": 1.1952177865590919e-06, "loss": 2.0855, "step": 25410 }, { "epoch": 0.85, "grad_norm": 0.7673759460449219, "learning_rate": 1.194713932999927e-06, "loss": 2.0337, "step": 25411 }, { "epoch": 0.85, "grad_norm": 0.7153233885765076, "learning_rate": 1.1942101789177253e-06, "loss": 2.0342, "step": 25412 }, { "epoch": 0.85, "grad_norm": 0.7404223084449768, "learning_rate": 1.1937065243181744e-06, "loss": 2.0713, "step": 25413 }, { "epoch": 0.85, "grad_norm": 0.7522891759872437, "learning_rate": 1.193202969206969e-06, "loss": 2.0454, "step": 25414 }, { "epoch": 0.85, "grad_norm": 0.7712404131889343, "learning_rate": 1.1926995135897923e-06, "loss": 2.0972, "step": 25415 }, { "epoch": 0.85, "grad_norm": 0.7347492575645447, "learning_rate": 1.1921961574723373e-06, "loss": 2.1077, "step": 25416 }, { "epoch": 0.85, "grad_norm": 0.7498459815979004, "learning_rate": 1.1916929008602863e-06, "loss": 1.9918, "step": 25417 }, { "epoch": 0.85, "grad_norm": 0.7276119589805603, "learning_rate": 1.1911897437593279e-06, "loss": 2.0384, "step": 25418 }, { "epoch": 0.85, "grad_norm": 0.7372073531150818, "learning_rate": 1.1906866861751466e-06, "loss": 2.0247, "step": 25419 }, { "epoch": 0.85, "grad_norm": 0.7364742755889893, "learning_rate": 1.190183728113421e-06, "loss": 1.9952, "step": 25420 }, { "epoch": 0.85, "grad_norm": 0.7318270206451416, "learning_rate": 1.1896808695798368e-06, "loss": 2.0588, "step": 25421 }, { "epoch": 0.85, "grad_norm": 0.7443020343780518, "learning_rate": 1.1891781105800782e-06, "loss": 1.9573, "step": 25422 }, { "epoch": 0.85, "grad_norm": 0.7799516916275024, "learning_rate": 1.1886754511198206e-06, "loss": 2.0403, "step": 25423 }, { "epoch": 0.85, "grad_norm": 0.7437310218811035, "learning_rate": 1.188172891204742e-06, "loss": 2.1055, "step": 25424 }, { "epoch": 0.85, "grad_norm": 0.7208044528961182, "learning_rate": 1.1876704308405228e-06, "loss": 2.0234, "step": 25425 }, { "epoch": 0.85, "grad_norm": 0.7307316660881042, "learning_rate": 1.187168070032838e-06, "loss": 2.0262, "step": 25426 }, { "epoch": 0.85, "grad_norm": 0.7535862922668457, "learning_rate": 1.186665808787365e-06, "loss": 2.0262, "step": 25427 }, { "epoch": 0.85, "grad_norm": 0.7238076329231262, "learning_rate": 1.186163647109776e-06, "loss": 1.9986, "step": 25428 }, { "epoch": 0.85, "grad_norm": 0.7503595352172852, "learning_rate": 1.185661585005744e-06, "loss": 2.0501, "step": 25429 }, { "epoch": 0.85, "grad_norm": 0.7441965937614441, "learning_rate": 1.1851596224809404e-06, "loss": 1.9945, "step": 25430 }, { "epoch": 0.85, "grad_norm": 0.7329748272895813, "learning_rate": 1.1846577595410402e-06, "loss": 2.0297, "step": 25431 }, { "epoch": 0.85, "grad_norm": 0.7497860789299011, "learning_rate": 1.1841559961917103e-06, "loss": 2.0665, "step": 25432 }, { "epoch": 0.85, "grad_norm": 0.7643478512763977, "learning_rate": 1.1836543324386162e-06, "loss": 2.09, "step": 25433 }, { "epoch": 0.85, "grad_norm": 0.7383556365966797, "learning_rate": 1.183152768287432e-06, "loss": 2.0804, "step": 25434 }, { "epoch": 0.85, "grad_norm": 0.760520339012146, "learning_rate": 1.1826513037438182e-06, "loss": 2.0188, "step": 25435 }, { "epoch": 0.85, "grad_norm": 0.7337445020675659, "learning_rate": 1.1821499388134449e-06, "loss": 2.0435, "step": 25436 }, { "epoch": 0.85, "grad_norm": 0.7420154809951782, "learning_rate": 1.1816486735019705e-06, "loss": 2.0938, "step": 25437 }, { "epoch": 0.85, "grad_norm": 0.7261763215065002, "learning_rate": 1.1811475078150647e-06, "loss": 2.0524, "step": 25438 }, { "epoch": 0.85, "grad_norm": 0.7407971024513245, "learning_rate": 1.1806464417583829e-06, "loss": 2.0171, "step": 25439 }, { "epoch": 0.85, "grad_norm": 0.7220339775085449, "learning_rate": 1.1801454753375918e-06, "loss": 2.0709, "step": 25440 }, { "epoch": 0.85, "grad_norm": 0.7758349776268005, "learning_rate": 1.179644608558348e-06, "loss": 2.0864, "step": 25441 }, { "epoch": 0.85, "grad_norm": 0.747570812702179, "learning_rate": 1.1791438414263078e-06, "loss": 1.9923, "step": 25442 }, { "epoch": 0.85, "grad_norm": 0.7489310503005981, "learning_rate": 1.1786431739471315e-06, "loss": 1.9762, "step": 25443 }, { "epoch": 0.85, "grad_norm": 0.7479124665260315, "learning_rate": 1.1781426061264766e-06, "loss": 2.0005, "step": 25444 }, { "epoch": 0.85, "grad_norm": 0.78728848695755, "learning_rate": 1.1776421379699965e-06, "loss": 2.0808, "step": 25445 }, { "epoch": 0.85, "grad_norm": 0.7510107159614563, "learning_rate": 1.1771417694833432e-06, "loss": 2.1014, "step": 25446 }, { "epoch": 0.85, "grad_norm": 0.7366072535514832, "learning_rate": 1.1766415006721732e-06, "loss": 2.0393, "step": 25447 }, { "epoch": 0.85, "grad_norm": 0.7826735973358154, "learning_rate": 1.1761413315421343e-06, "loss": 2.1219, "step": 25448 }, { "epoch": 0.85, "grad_norm": 0.7491790056228638, "learning_rate": 1.1756412620988822e-06, "loss": 2.0614, "step": 25449 }, { "epoch": 0.85, "grad_norm": 0.7468246817588806, "learning_rate": 1.175141292348062e-06, "loss": 2.0325, "step": 25450 }, { "epoch": 0.85, "grad_norm": 0.7504499554634094, "learning_rate": 1.1746414222953228e-06, "loss": 2.073, "step": 25451 }, { "epoch": 0.85, "grad_norm": 0.7411946654319763, "learning_rate": 1.1741416519463123e-06, "loss": 2.0335, "step": 25452 }, { "epoch": 0.85, "grad_norm": 0.7617331743240356, "learning_rate": 1.173641981306679e-06, "loss": 2.0529, "step": 25453 }, { "epoch": 0.85, "grad_norm": 0.7682652473449707, "learning_rate": 1.1731424103820666e-06, "loss": 2.1454, "step": 25454 }, { "epoch": 0.85, "grad_norm": 0.7573553323745728, "learning_rate": 1.1726429391781158e-06, "loss": 2.0809, "step": 25455 }, { "epoch": 0.85, "grad_norm": 0.7370169162750244, "learning_rate": 1.1721435677004733e-06, "loss": 2.0488, "step": 25456 }, { "epoch": 0.85, "grad_norm": 0.758801281452179, "learning_rate": 1.171644295954777e-06, "loss": 2.0894, "step": 25457 }, { "epoch": 0.85, "grad_norm": 0.7704338431358337, "learning_rate": 1.171145123946672e-06, "loss": 2.0871, "step": 25458 }, { "epoch": 0.85, "grad_norm": 0.7466415762901306, "learning_rate": 1.170646051681793e-06, "loss": 2.0652, "step": 25459 }, { "epoch": 0.85, "grad_norm": 0.7362510561943054, "learning_rate": 1.1701470791657822e-06, "loss": 2.0318, "step": 25460 }, { "epoch": 0.85, "grad_norm": 0.7413967847824097, "learning_rate": 1.1696482064042735e-06, "loss": 1.9645, "step": 25461 }, { "epoch": 0.85, "grad_norm": 0.7412198185920715, "learning_rate": 1.1691494334029052e-06, "loss": 2.0693, "step": 25462 }, { "epoch": 0.85, "grad_norm": 0.7685073018074036, "learning_rate": 1.1686507601673125e-06, "loss": 2.0323, "step": 25463 }, { "epoch": 0.85, "grad_norm": 0.7570863366127014, "learning_rate": 1.1681521867031253e-06, "loss": 2.0706, "step": 25464 }, { "epoch": 0.85, "grad_norm": 0.7335966229438782, "learning_rate": 1.1676537130159782e-06, "loss": 2.1258, "step": 25465 }, { "epoch": 0.85, "grad_norm": 0.7508025765419006, "learning_rate": 1.1671553391115054e-06, "loss": 2.0174, "step": 25466 }, { "epoch": 0.85, "grad_norm": 0.7532062530517578, "learning_rate": 1.1666570649953358e-06, "loss": 2.09, "step": 25467 }, { "epoch": 0.85, "grad_norm": 0.7381278276443481, "learning_rate": 1.1661588906730946e-06, "loss": 2.012, "step": 25468 }, { "epoch": 0.85, "grad_norm": 0.8015879392623901, "learning_rate": 1.1656608161504158e-06, "loss": 2.0267, "step": 25469 }, { "epoch": 0.85, "grad_norm": 0.7692397832870483, "learning_rate": 1.165162841432922e-06, "loss": 2.0398, "step": 25470 }, { "epoch": 0.85, "grad_norm": 0.7887807488441467, "learning_rate": 1.164664966526242e-06, "loss": 2.0039, "step": 25471 }, { "epoch": 0.85, "grad_norm": 0.7189185619354248, "learning_rate": 1.1641671914359997e-06, "loss": 1.9984, "step": 25472 }, { "epoch": 0.85, "grad_norm": 0.7554293274879456, "learning_rate": 1.1636695161678158e-06, "loss": 2.0581, "step": 25473 }, { "epoch": 0.85, "grad_norm": 0.7260330319404602, "learning_rate": 1.1631719407273156e-06, "loss": 2.037, "step": 25474 }, { "epoch": 0.85, "grad_norm": 0.7498303651809692, "learning_rate": 1.1626744651201217e-06, "loss": 2.0873, "step": 25475 }, { "epoch": 0.85, "grad_norm": 0.7247176766395569, "learning_rate": 1.1621770893518525e-06, "loss": 2.0212, "step": 25476 }, { "epoch": 0.85, "grad_norm": 0.7451639771461487, "learning_rate": 1.161679813428125e-06, "loss": 2.0387, "step": 25477 }, { "epoch": 0.85, "grad_norm": 0.7324416637420654, "learning_rate": 1.1611826373545587e-06, "loss": 2.0459, "step": 25478 }, { "epoch": 0.85, "grad_norm": 0.7474309206008911, "learning_rate": 1.160685561136774e-06, "loss": 2.0287, "step": 25479 }, { "epoch": 0.85, "grad_norm": 0.7723796367645264, "learning_rate": 1.160188584780383e-06, "loss": 2.0064, "step": 25480 }, { "epoch": 0.85, "grad_norm": 0.7569363713264465, "learning_rate": 1.1596917082909987e-06, "loss": 2.0708, "step": 25481 }, { "epoch": 0.85, "grad_norm": 0.759850800037384, "learning_rate": 1.159194931674238e-06, "loss": 2.0356, "step": 25482 }, { "epoch": 0.85, "grad_norm": 0.7253577709197998, "learning_rate": 1.1586982549357106e-06, "loss": 2.0503, "step": 25483 }, { "epoch": 0.85, "grad_norm": 0.7198708057403564, "learning_rate": 1.158201678081028e-06, "loss": 1.9566, "step": 25484 }, { "epoch": 0.85, "grad_norm": 0.7418367266654968, "learning_rate": 1.1577052011158064e-06, "loss": 2.0321, "step": 25485 }, { "epoch": 0.85, "grad_norm": 0.7481715083122253, "learning_rate": 1.1572088240456436e-06, "loss": 2.1149, "step": 25486 }, { "epoch": 0.85, "grad_norm": 0.76832115650177, "learning_rate": 1.1567125468761542e-06, "loss": 2.0518, "step": 25487 }, { "epoch": 0.85, "grad_norm": 0.732866644859314, "learning_rate": 1.1562163696129459e-06, "loss": 1.9125, "step": 25488 }, { "epoch": 0.85, "grad_norm": 0.7413383722305298, "learning_rate": 1.1557202922616217e-06, "loss": 2.0417, "step": 25489 }, { "epoch": 0.85, "grad_norm": 0.7307530045509338, "learning_rate": 1.1552243148277842e-06, "loss": 2.0541, "step": 25490 }, { "epoch": 0.85, "grad_norm": 0.7439472675323486, "learning_rate": 1.154728437317041e-06, "loss": 2.0177, "step": 25491 }, { "epoch": 0.85, "grad_norm": 0.7621258497238159, "learning_rate": 1.1542326597349896e-06, "loss": 2.0715, "step": 25492 }, { "epoch": 0.85, "grad_norm": 0.7660601735115051, "learning_rate": 1.1537369820872367e-06, "loss": 1.9851, "step": 25493 }, { "epoch": 0.85, "grad_norm": 0.7690000534057617, "learning_rate": 1.153241404379376e-06, "loss": 2.058, "step": 25494 }, { "epoch": 0.85, "grad_norm": 0.7577580809593201, "learning_rate": 1.1527459266170116e-06, "loss": 2.0319, "step": 25495 }, { "epoch": 0.85, "grad_norm": 0.7869577407836914, "learning_rate": 1.1522505488057366e-06, "loss": 2.0772, "step": 25496 }, { "epoch": 0.85, "grad_norm": 0.7485555410385132, "learning_rate": 1.1517552709511514e-06, "loss": 2.1006, "step": 25497 }, { "epoch": 0.85, "grad_norm": 0.7327066659927368, "learning_rate": 1.1512600930588492e-06, "loss": 2.1332, "step": 25498 }, { "epoch": 0.85, "grad_norm": 0.7441847324371338, "learning_rate": 1.150765015134424e-06, "loss": 2.0125, "step": 25499 }, { "epoch": 0.85, "grad_norm": 0.7413938045501709, "learning_rate": 1.1502700371834685e-06, "loss": 2.0255, "step": 25500 }, { "epoch": 0.85, "grad_norm": 0.7364518642425537, "learning_rate": 1.149775159211578e-06, "loss": 2.0028, "step": 25501 }, { "epoch": 0.85, "grad_norm": 0.7678237557411194, "learning_rate": 1.1492803812243403e-06, "loss": 2.0314, "step": 25502 }, { "epoch": 0.85, "grad_norm": 0.8155337572097778, "learning_rate": 1.148785703227344e-06, "loss": 2.0438, "step": 25503 }, { "epoch": 0.85, "grad_norm": 0.7294251322746277, "learning_rate": 1.148291125226182e-06, "loss": 2.0447, "step": 25504 }, { "epoch": 0.85, "grad_norm": 0.7276601791381836, "learning_rate": 1.1477966472264367e-06, "loss": 2.0183, "step": 25505 }, { "epoch": 0.85, "grad_norm": 0.7848840951919556, "learning_rate": 1.1473022692336977e-06, "loss": 2.0941, "step": 25506 }, { "epoch": 0.85, "grad_norm": 0.7748706936836243, "learning_rate": 1.1468079912535534e-06, "loss": 2.0363, "step": 25507 }, { "epoch": 0.85, "grad_norm": 0.7341156601905823, "learning_rate": 1.1463138132915796e-06, "loss": 2.0216, "step": 25508 }, { "epoch": 0.85, "grad_norm": 0.73978590965271, "learning_rate": 1.145819735353364e-06, "loss": 1.9652, "step": 25509 }, { "epoch": 0.85, "grad_norm": 0.728164553642273, "learning_rate": 1.1453257574444899e-06, "loss": 1.9596, "step": 25510 }, { "epoch": 0.85, "grad_norm": 0.7132040858268738, "learning_rate": 1.1448318795705349e-06, "loss": 1.9824, "step": 25511 }, { "epoch": 0.85, "grad_norm": 0.7461531758308411, "learning_rate": 1.1443381017370792e-06, "loss": 2.0458, "step": 25512 }, { "epoch": 0.85, "grad_norm": 0.7205559611320496, "learning_rate": 1.1438444239497027e-06, "loss": 2.0345, "step": 25513 }, { "epoch": 0.85, "grad_norm": 0.7151537537574768, "learning_rate": 1.1433508462139797e-06, "loss": 2.0313, "step": 25514 }, { "epoch": 0.85, "grad_norm": 0.731884241104126, "learning_rate": 1.1428573685354894e-06, "loss": 2.0558, "step": 25515 }, { "epoch": 0.85, "grad_norm": 0.7399733066558838, "learning_rate": 1.142363990919807e-06, "loss": 2.0367, "step": 25516 }, { "epoch": 0.85, "grad_norm": 0.7168453931808472, "learning_rate": 1.141870713372505e-06, "loss": 2.0329, "step": 25517 }, { "epoch": 0.85, "grad_norm": 0.7173789143562317, "learning_rate": 1.1413775358991542e-06, "loss": 2.0684, "step": 25518 }, { "epoch": 0.85, "grad_norm": 0.7331874966621399, "learning_rate": 1.1408844585053302e-06, "loss": 2.0618, "step": 25519 }, { "epoch": 0.85, "grad_norm": 0.7190109491348267, "learning_rate": 1.140391481196602e-06, "loss": 2.0154, "step": 25520 }, { "epoch": 0.85, "grad_norm": 0.7759547829627991, "learning_rate": 1.1398986039785376e-06, "loss": 2.0494, "step": 25521 }, { "epoch": 0.85, "grad_norm": 0.757175862789154, "learning_rate": 1.1394058268567054e-06, "loss": 2.0845, "step": 25522 }, { "epoch": 0.85, "grad_norm": 0.7645297050476074, "learning_rate": 1.1389131498366745e-06, "loss": 2.0331, "step": 25523 }, { "epoch": 0.85, "grad_norm": 0.7445845007896423, "learning_rate": 1.1384205729240105e-06, "loss": 2.0453, "step": 25524 }, { "epoch": 0.85, "grad_norm": 0.7423800826072693, "learning_rate": 1.1379280961242756e-06, "loss": 2.0441, "step": 25525 }, { "epoch": 0.85, "grad_norm": 0.7272167205810547, "learning_rate": 1.1374357194430374e-06, "loss": 2.0796, "step": 25526 }, { "epoch": 0.85, "grad_norm": 0.7231281995773315, "learning_rate": 1.1369434428858539e-06, "loss": 1.9872, "step": 25527 }, { "epoch": 0.85, "grad_norm": 0.7214996814727783, "learning_rate": 1.1364512664582894e-06, "loss": 2.112, "step": 25528 }, { "epoch": 0.85, "grad_norm": 0.7728033661842346, "learning_rate": 1.1359591901659083e-06, "loss": 2.1227, "step": 25529 }, { "epoch": 0.85, "grad_norm": 0.7460951805114746, "learning_rate": 1.135467214014262e-06, "loss": 2.0895, "step": 25530 }, { "epoch": 0.85, "grad_norm": 0.7359938025474548, "learning_rate": 1.1349753380089102e-06, "loss": 2.0748, "step": 25531 }, { "epoch": 0.85, "grad_norm": 0.7507954835891724, "learning_rate": 1.134483562155415e-06, "loss": 2.0368, "step": 25532 }, { "epoch": 0.85, "grad_norm": 0.7548166513442993, "learning_rate": 1.133991886459328e-06, "loss": 2.0295, "step": 25533 }, { "epoch": 0.85, "grad_norm": 0.7229151725769043, "learning_rate": 1.1335003109262033e-06, "loss": 2.0778, "step": 25534 }, { "epoch": 0.85, "grad_norm": 0.774015486240387, "learning_rate": 1.1330088355615976e-06, "loss": 2.0328, "step": 25535 }, { "epoch": 0.85, "grad_norm": 0.7786744236946106, "learning_rate": 1.13251746037106e-06, "loss": 1.996, "step": 25536 }, { "epoch": 0.85, "grad_norm": 0.7786620855331421, "learning_rate": 1.1320261853601422e-06, "loss": 2.0005, "step": 25537 }, { "epoch": 0.85, "grad_norm": 0.7331827878952026, "learning_rate": 1.131535010534398e-06, "loss": 2.0167, "step": 25538 }, { "epoch": 0.85, "grad_norm": 0.7627809047698975, "learning_rate": 1.1310439358993742e-06, "loss": 2.017, "step": 25539 }, { "epoch": 0.85, "grad_norm": 0.7448964715003967, "learning_rate": 1.1305529614606158e-06, "loss": 2.0677, "step": 25540 }, { "epoch": 0.85, "grad_norm": 0.756600558757782, "learning_rate": 1.1300620872236745e-06, "loss": 2.064, "step": 25541 }, { "epoch": 0.85, "grad_norm": 0.7472687363624573, "learning_rate": 1.1295713131940933e-06, "loss": 2.0349, "step": 25542 }, { "epoch": 0.85, "grad_norm": 0.7513116002082825, "learning_rate": 1.1290806393774145e-06, "loss": 2.0647, "step": 25543 }, { "epoch": 0.85, "grad_norm": 0.7636187672615051, "learning_rate": 1.1285900657791836e-06, "loss": 2.1422, "step": 25544 }, { "epoch": 0.85, "grad_norm": 0.7481735348701477, "learning_rate": 1.1280995924049453e-06, "loss": 2.0318, "step": 25545 }, { "epoch": 0.85, "grad_norm": 0.7470899224281311, "learning_rate": 1.1276092192602362e-06, "loss": 2.1109, "step": 25546 }, { "epoch": 0.85, "grad_norm": 0.7598119974136353, "learning_rate": 1.1271189463506006e-06, "loss": 1.9597, "step": 25547 }, { "epoch": 0.85, "grad_norm": 0.7258020043373108, "learning_rate": 1.126628773681575e-06, "loss": 2.0004, "step": 25548 }, { "epoch": 0.85, "grad_norm": 0.7323870658874512, "learning_rate": 1.1261387012586955e-06, "loss": 2.0275, "step": 25549 }, { "epoch": 0.85, "grad_norm": 0.7854240536689758, "learning_rate": 1.1256487290874995e-06, "loss": 2.0649, "step": 25550 }, { "epoch": 0.85, "grad_norm": 0.7275902032852173, "learning_rate": 1.1251588571735284e-06, "loss": 1.993, "step": 25551 }, { "epoch": 0.85, "grad_norm": 0.773851215839386, "learning_rate": 1.1246690855223063e-06, "loss": 1.9964, "step": 25552 }, { "epoch": 0.85, "grad_norm": 0.7523597478866577, "learning_rate": 1.1241794141393725e-06, "loss": 2.0783, "step": 25553 }, { "epoch": 0.85, "grad_norm": 0.7775759100914001, "learning_rate": 1.123689843030259e-06, "loss": 2.0098, "step": 25554 }, { "epoch": 0.85, "grad_norm": 0.7536785006523132, "learning_rate": 1.123200372200497e-06, "loss": 2.1355, "step": 25555 }, { "epoch": 0.85, "grad_norm": 0.7498986721038818, "learning_rate": 1.122711001655611e-06, "loss": 2.0739, "step": 25556 }, { "epoch": 0.85, "grad_norm": 0.7649057507514954, "learning_rate": 1.1222217314011364e-06, "loss": 1.9741, "step": 25557 }, { "epoch": 0.85, "grad_norm": 0.7573047280311584, "learning_rate": 1.1217325614425966e-06, "loss": 2.166, "step": 25558 }, { "epoch": 0.85, "grad_norm": 0.7349489331245422, "learning_rate": 1.1212434917855175e-06, "loss": 2.079, "step": 25559 }, { "epoch": 0.85, "grad_norm": 0.7193424701690674, "learning_rate": 1.1207545224354288e-06, "loss": 1.9917, "step": 25560 }, { "epoch": 0.85, "grad_norm": 0.7615953683853149, "learning_rate": 1.1202656533978517e-06, "loss": 2.0525, "step": 25561 }, { "epoch": 0.85, "grad_norm": 0.7715849280357361, "learning_rate": 1.1197768846783074e-06, "loss": 2.1047, "step": 25562 }, { "epoch": 0.85, "grad_norm": 0.7319328784942627, "learning_rate": 1.1192882162823193e-06, "loss": 2.0586, "step": 25563 }, { "epoch": 0.85, "grad_norm": 0.7264840602874756, "learning_rate": 1.118799648215413e-06, "loss": 2.0003, "step": 25564 }, { "epoch": 0.85, "grad_norm": 0.7499474883079529, "learning_rate": 1.1183111804830994e-06, "loss": 2.0188, "step": 25565 }, { "epoch": 0.85, "grad_norm": 0.7261382341384888, "learning_rate": 1.1178228130908997e-06, "loss": 1.9712, "step": 25566 }, { "epoch": 0.85, "grad_norm": 0.73532634973526, "learning_rate": 1.1173345460443352e-06, "loss": 2.0289, "step": 25567 }, { "epoch": 0.85, "grad_norm": 0.7755469679832458, "learning_rate": 1.116846379348917e-06, "loss": 2.0226, "step": 25568 }, { "epoch": 0.85, "grad_norm": 0.7395761609077454, "learning_rate": 1.116358313010164e-06, "loss": 2.0151, "step": 25569 }, { "epoch": 0.85, "grad_norm": 0.7293194532394409, "learning_rate": 1.1158703470335896e-06, "loss": 1.9611, "step": 25570 }, { "epoch": 0.85, "grad_norm": 0.7172329425811768, "learning_rate": 1.1153824814247028e-06, "loss": 1.9672, "step": 25571 }, { "epoch": 0.85, "grad_norm": 0.7232022285461426, "learning_rate": 1.1148947161890177e-06, "loss": 2.0181, "step": 25572 }, { "epoch": 0.85, "grad_norm": 0.7606463432312012, "learning_rate": 1.1144070513320483e-06, "loss": 1.986, "step": 25573 }, { "epoch": 0.85, "grad_norm": 0.7412828803062439, "learning_rate": 1.1139194868592973e-06, "loss": 2.0179, "step": 25574 }, { "epoch": 0.85, "grad_norm": 0.7248949408531189, "learning_rate": 1.1134320227762762e-06, "loss": 2.0345, "step": 25575 }, { "epoch": 0.85, "grad_norm": 0.7832808494567871, "learning_rate": 1.112944659088494e-06, "loss": 2.0477, "step": 25576 }, { "epoch": 0.85, "grad_norm": 0.7331267595291138, "learning_rate": 1.112457395801455e-06, "loss": 2.0571, "step": 25577 }, { "epoch": 0.85, "grad_norm": 0.7391887307167053, "learning_rate": 1.1119702329206616e-06, "loss": 2.0632, "step": 25578 }, { "epoch": 0.85, "grad_norm": 0.7343783378601074, "learning_rate": 1.1114831704516193e-06, "loss": 2.0278, "step": 25579 }, { "epoch": 0.85, "grad_norm": 0.7450145483016968, "learning_rate": 1.1109962083998326e-06, "loss": 2.0649, "step": 25580 }, { "epoch": 0.85, "grad_norm": 0.7479609847068787, "learning_rate": 1.1105093467707994e-06, "loss": 2.0122, "step": 25581 }, { "epoch": 0.85, "grad_norm": 0.7185435891151428, "learning_rate": 1.1100225855700242e-06, "loss": 2.0279, "step": 25582 }, { "epoch": 0.85, "grad_norm": 0.7416254281997681, "learning_rate": 1.1095359248030046e-06, "loss": 2.0537, "step": 25583 }, { "epoch": 0.85, "grad_norm": 0.7201782464981079, "learning_rate": 1.1090493644752342e-06, "loss": 2.0156, "step": 25584 }, { "epoch": 0.85, "grad_norm": 0.7546603083610535, "learning_rate": 1.108562904592214e-06, "loss": 2.0141, "step": 25585 }, { "epoch": 0.85, "grad_norm": 0.7132443189620972, "learning_rate": 1.1080765451594444e-06, "loss": 1.9846, "step": 25586 }, { "epoch": 0.85, "grad_norm": 0.7304291129112244, "learning_rate": 1.1075902861824095e-06, "loss": 1.9411, "step": 25587 }, { "epoch": 0.85, "grad_norm": 0.7557387948036194, "learning_rate": 1.1071041276666084e-06, "loss": 1.9618, "step": 25588 }, { "epoch": 0.85, "grad_norm": 0.7824674248695374, "learning_rate": 1.1066180696175354e-06, "loss": 2.0808, "step": 25589 }, { "epoch": 0.85, "grad_norm": 0.7659381628036499, "learning_rate": 1.1061321120406776e-06, "loss": 2.0571, "step": 25590 }, { "epoch": 0.85, "grad_norm": 0.739253044128418, "learning_rate": 1.105646254941528e-06, "loss": 2.056, "step": 25591 }, { "epoch": 0.85, "grad_norm": 0.7618462443351746, "learning_rate": 1.1051604983255748e-06, "loss": 2.0337, "step": 25592 }, { "epoch": 0.85, "grad_norm": 0.7802045941352844, "learning_rate": 1.1046748421983033e-06, "loss": 2.0515, "step": 25593 }, { "epoch": 0.85, "grad_norm": 0.7340210676193237, "learning_rate": 1.1041892865652027e-06, "loss": 2.0862, "step": 25594 }, { "epoch": 0.85, "grad_norm": 0.7337337732315063, "learning_rate": 1.1037038314317593e-06, "loss": 2.0938, "step": 25595 }, { "epoch": 0.85, "grad_norm": 0.7595854997634888, "learning_rate": 1.103218476803457e-06, "loss": 2.065, "step": 25596 }, { "epoch": 0.85, "grad_norm": 0.802827775478363, "learning_rate": 1.1027332226857768e-06, "loss": 1.9826, "step": 25597 }, { "epoch": 0.85, "grad_norm": 0.7528466582298279, "learning_rate": 1.102248069084203e-06, "loss": 2.0853, "step": 25598 }, { "epoch": 0.85, "grad_norm": 0.7504701018333435, "learning_rate": 1.1017630160042147e-06, "loss": 2.0801, "step": 25599 }, { "epoch": 0.85, "grad_norm": 0.730383038520813, "learning_rate": 1.1012780634512954e-06, "loss": 2.0854, "step": 25600 }, { "epoch": 0.85, "grad_norm": 0.7294831275939941, "learning_rate": 1.1007932114309184e-06, "loss": 2.0935, "step": 25601 }, { "epoch": 0.85, "grad_norm": 0.7153273224830627, "learning_rate": 1.100308459948567e-06, "loss": 2.0768, "step": 25602 }, { "epoch": 0.85, "grad_norm": 0.7667094469070435, "learning_rate": 1.0998238090097124e-06, "loss": 2.0447, "step": 25603 }, { "epoch": 0.85, "grad_norm": 0.7227137088775635, "learning_rate": 1.0993392586198349e-06, "loss": 1.9862, "step": 25604 }, { "epoch": 0.85, "grad_norm": 0.7257834076881409, "learning_rate": 1.0988548087844054e-06, "loss": 1.9713, "step": 25605 }, { "epoch": 0.85, "grad_norm": 0.7546156048774719, "learning_rate": 1.0983704595088962e-06, "loss": 2.0447, "step": 25606 }, { "epoch": 0.85, "grad_norm": 0.7466554641723633, "learning_rate": 1.0978862107987799e-06, "loss": 2.0435, "step": 25607 }, { "epoch": 0.85, "grad_norm": 0.7298524975776672, "learning_rate": 1.0974020626595328e-06, "loss": 1.9683, "step": 25608 }, { "epoch": 0.85, "grad_norm": 0.7515607476234436, "learning_rate": 1.0969180150966162e-06, "loss": 2.0234, "step": 25609 }, { "epoch": 0.85, "grad_norm": 0.7550762891769409, "learning_rate": 1.0964340681155017e-06, "loss": 1.9821, "step": 25610 }, { "epoch": 0.85, "grad_norm": 0.7641791701316833, "learning_rate": 1.0959502217216589e-06, "loss": 2.0348, "step": 25611 }, { "epoch": 0.85, "grad_norm": 0.7577338218688965, "learning_rate": 1.0954664759205503e-06, "loss": 2.0436, "step": 25612 }, { "epoch": 0.85, "grad_norm": 0.7384804487228394, "learning_rate": 1.0949828307176447e-06, "loss": 2.0559, "step": 25613 }, { "epoch": 0.85, "grad_norm": 0.7456777095794678, "learning_rate": 1.0944992861184044e-06, "loss": 2.0315, "step": 25614 }, { "epoch": 0.85, "grad_norm": 0.7368715405464172, "learning_rate": 1.0940158421282898e-06, "loss": 2.0549, "step": 25615 }, { "epoch": 0.85, "grad_norm": 0.7639155983924866, "learning_rate": 1.093532498752765e-06, "loss": 2.0194, "step": 25616 }, { "epoch": 0.85, "grad_norm": 0.7549625039100647, "learning_rate": 1.0930492559972928e-06, "loss": 2.0362, "step": 25617 }, { "epoch": 0.85, "grad_norm": 0.7858089208602905, "learning_rate": 1.0925661138673293e-06, "loss": 2.0368, "step": 25618 }, { "epoch": 0.85, "grad_norm": 0.7371587753295898, "learning_rate": 1.0920830723683328e-06, "loss": 2.039, "step": 25619 }, { "epoch": 0.85, "grad_norm": 0.7286743521690369, "learning_rate": 1.0916001315057623e-06, "loss": 2.0835, "step": 25620 }, { "epoch": 0.85, "grad_norm": 0.7272665500640869, "learning_rate": 1.091117291285071e-06, "loss": 2.086, "step": 25621 }, { "epoch": 0.85, "grad_norm": 0.7482417821884155, "learning_rate": 1.0906345517117167e-06, "loss": 1.9871, "step": 25622 }, { "epoch": 0.85, "grad_norm": 0.7229847311973572, "learning_rate": 1.0901519127911497e-06, "loss": 2.023, "step": 25623 }, { "epoch": 0.85, "grad_norm": 0.7484353184700012, "learning_rate": 1.0896693745288279e-06, "loss": 2.0203, "step": 25624 }, { "epoch": 0.85, "grad_norm": 0.7271804213523865, "learning_rate": 1.0891869369301967e-06, "loss": 2.0478, "step": 25625 }, { "epoch": 0.85, "grad_norm": 0.7588217854499817, "learning_rate": 1.0887046000007117e-06, "loss": 2.0441, "step": 25626 }, { "epoch": 0.85, "grad_norm": 0.7179100513458252, "learning_rate": 1.088222363745819e-06, "loss": 1.9992, "step": 25627 }, { "epoch": 0.85, "grad_norm": 0.7399671673774719, "learning_rate": 1.087740228170966e-06, "loss": 2.0602, "step": 25628 }, { "epoch": 0.85, "grad_norm": 0.7445325255393982, "learning_rate": 1.0872581932816006e-06, "loss": 2.0829, "step": 25629 }, { "epoch": 0.85, "grad_norm": 0.7651894688606262, "learning_rate": 1.0867762590831709e-06, "loss": 2.0974, "step": 25630 }, { "epoch": 0.85, "grad_norm": 0.7661094069480896, "learning_rate": 1.086294425581118e-06, "loss": 2.0087, "step": 25631 }, { "epoch": 0.85, "grad_norm": 0.7849562168121338, "learning_rate": 1.0858126927808866e-06, "loss": 2.0331, "step": 25632 }, { "epoch": 0.85, "grad_norm": 0.7501575350761414, "learning_rate": 1.0853310606879197e-06, "loss": 2.0808, "step": 25633 }, { "epoch": 0.85, "grad_norm": 0.7664833664894104, "learning_rate": 1.0848495293076567e-06, "loss": 2.0106, "step": 25634 }, { "epoch": 0.85, "grad_norm": 0.7493854761123657, "learning_rate": 1.0843680986455408e-06, "loss": 2.0815, "step": 25635 }, { "epoch": 0.85, "grad_norm": 0.7543217539787292, "learning_rate": 1.08388676870701e-06, "loss": 2.0215, "step": 25636 }, { "epoch": 0.85, "grad_norm": 0.7357943058013916, "learning_rate": 1.0834055394974973e-06, "loss": 2.0267, "step": 25637 }, { "epoch": 0.85, "grad_norm": 0.7303991317749023, "learning_rate": 1.0829244110224447e-06, "loss": 2.0159, "step": 25638 }, { "epoch": 0.85, "grad_norm": 0.7448989152908325, "learning_rate": 1.0824433832872873e-06, "loss": 1.9924, "step": 25639 }, { "epoch": 0.85, "grad_norm": 0.7493236064910889, "learning_rate": 1.0819624562974584e-06, "loss": 2.0316, "step": 25640 }, { "epoch": 0.85, "grad_norm": 0.7773019075393677, "learning_rate": 1.0814816300583896e-06, "loss": 2.1019, "step": 25641 }, { "epoch": 0.85, "grad_norm": 0.7728946208953857, "learning_rate": 1.0810009045755165e-06, "loss": 2.1127, "step": 25642 }, { "epoch": 0.85, "grad_norm": 0.7427574396133423, "learning_rate": 1.0805202798542658e-06, "loss": 2.0737, "step": 25643 }, { "epoch": 0.85, "grad_norm": 0.7579944729804993, "learning_rate": 1.080039755900072e-06, "loss": 1.9549, "step": 25644 }, { "epoch": 0.85, "grad_norm": 0.7827484011650085, "learning_rate": 1.0795593327183596e-06, "loss": 2.1541, "step": 25645 }, { "epoch": 0.85, "grad_norm": 0.747428834438324, "learning_rate": 1.07907901031456e-06, "loss": 2.0369, "step": 25646 }, { "epoch": 0.85, "grad_norm": 0.7605673670768738, "learning_rate": 1.0785987886940952e-06, "loss": 2.0971, "step": 25647 }, { "epoch": 0.85, "grad_norm": 0.7300199270248413, "learning_rate": 1.0781186678623945e-06, "loss": 2.059, "step": 25648 }, { "epoch": 0.85, "grad_norm": 0.7193981409072876, "learning_rate": 1.0776386478248803e-06, "loss": 2.0527, "step": 25649 }, { "epoch": 0.85, "grad_norm": 0.7398879528045654, "learning_rate": 1.0771587285869744e-06, "loss": 2.0604, "step": 25650 }, { "epoch": 0.85, "grad_norm": 0.7175271511077881, "learning_rate": 1.0766789101540998e-06, "loss": 1.9924, "step": 25651 }, { "epoch": 0.85, "grad_norm": 0.7435488700866699, "learning_rate": 1.0761991925316783e-06, "loss": 1.9703, "step": 25652 }, { "epoch": 0.85, "grad_norm": 0.7405022382736206, "learning_rate": 1.0757195757251293e-06, "loss": 2.0373, "step": 25653 }, { "epoch": 0.85, "grad_norm": 0.7471997737884521, "learning_rate": 1.0752400597398683e-06, "loss": 2.1105, "step": 25654 }, { "epoch": 0.85, "grad_norm": 0.7272506952285767, "learning_rate": 1.0747606445813175e-06, "loss": 1.9889, "step": 25655 }, { "epoch": 0.85, "grad_norm": 0.7743155360221863, "learning_rate": 1.074281330254887e-06, "loss": 2.0184, "step": 25656 }, { "epoch": 0.85, "grad_norm": 0.7350564002990723, "learning_rate": 1.0738021167659974e-06, "loss": 2.0917, "step": 25657 }, { "epoch": 0.85, "grad_norm": 0.7490598559379578, "learning_rate": 1.0733230041200603e-06, "loss": 2.0851, "step": 25658 }, { "epoch": 0.85, "grad_norm": 0.7429494857788086, "learning_rate": 1.0728439923224865e-06, "loss": 2.0853, "step": 25659 }, { "epoch": 0.85, "grad_norm": 0.7397154569625854, "learning_rate": 1.072365081378689e-06, "loss": 2.0059, "step": 25660 }, { "epoch": 0.85, "grad_norm": 0.7442455291748047, "learning_rate": 1.0718862712940815e-06, "loss": 2.0145, "step": 25661 }, { "epoch": 0.85, "grad_norm": 0.7686842083930969, "learning_rate": 1.0714075620740694e-06, "loss": 2.0001, "step": 25662 }, { "epoch": 0.85, "grad_norm": 0.7437859177589417, "learning_rate": 1.0709289537240608e-06, "loss": 2.0724, "step": 25663 }, { "epoch": 0.85, "grad_norm": 0.7169320583343506, "learning_rate": 1.0704504462494637e-06, "loss": 2.0237, "step": 25664 }, { "epoch": 0.85, "grad_norm": 0.761655330657959, "learning_rate": 1.0699720396556868e-06, "loss": 2.0384, "step": 25665 }, { "epoch": 0.85, "grad_norm": 0.730991542339325, "learning_rate": 1.0694937339481315e-06, "loss": 2.0318, "step": 25666 }, { "epoch": 0.85, "grad_norm": 0.743136465549469, "learning_rate": 1.0690155291322002e-06, "loss": 2.0051, "step": 25667 }, { "epoch": 0.85, "grad_norm": 0.7378832697868347, "learning_rate": 1.0685374252132996e-06, "loss": 2.1331, "step": 25668 }, { "epoch": 0.85, "grad_norm": 0.7237527966499329, "learning_rate": 1.0680594221968265e-06, "loss": 2.0007, "step": 25669 }, { "epoch": 0.85, "grad_norm": 0.7638330459594727, "learning_rate": 1.0675815200881868e-06, "loss": 2.0611, "step": 25670 }, { "epoch": 0.85, "grad_norm": 0.7427726984024048, "learning_rate": 1.0671037188927747e-06, "loss": 2.0765, "step": 25671 }, { "epoch": 0.85, "grad_norm": 0.7686672806739807, "learning_rate": 1.0666260186159882e-06, "loss": 2.0555, "step": 25672 }, { "epoch": 0.85, "grad_norm": 0.7554507851600647, "learning_rate": 1.0661484192632255e-06, "loss": 2.1012, "step": 25673 }, { "epoch": 0.85, "grad_norm": 0.7501378059387207, "learning_rate": 1.065670920839883e-06, "loss": 2.0263, "step": 25674 }, { "epoch": 0.85, "grad_norm": 0.7537559866905212, "learning_rate": 1.0651935233513555e-06, "loss": 2.0542, "step": 25675 }, { "epoch": 0.85, "grad_norm": 0.7471948266029358, "learning_rate": 1.0647162268030331e-06, "loss": 1.9957, "step": 25676 }, { "epoch": 0.85, "grad_norm": 0.7303323149681091, "learning_rate": 1.0642390312003126e-06, "loss": 2.0349, "step": 25677 }, { "epoch": 0.85, "grad_norm": 0.7317492961883545, "learning_rate": 1.0637619365485808e-06, "loss": 2.0118, "step": 25678 }, { "epoch": 0.85, "grad_norm": 0.7261767387390137, "learning_rate": 1.063284942853231e-06, "loss": 2.0939, "step": 25679 }, { "epoch": 0.85, "grad_norm": 0.7600923180580139, "learning_rate": 1.062808050119648e-06, "loss": 2.01, "step": 25680 }, { "epoch": 0.85, "grad_norm": 0.7691116333007812, "learning_rate": 1.0623312583532242e-06, "loss": 2.0853, "step": 25681 }, { "epoch": 0.85, "grad_norm": 0.7224286198616028, "learning_rate": 1.0618545675593429e-06, "loss": 2.076, "step": 25682 }, { "epoch": 0.85, "grad_norm": 0.7406932711601257, "learning_rate": 1.0613779777433908e-06, "loss": 2.0914, "step": 25683 }, { "epoch": 0.85, "grad_norm": 0.7692009806632996, "learning_rate": 1.0609014889107527e-06, "loss": 1.9773, "step": 25684 }, { "epoch": 0.85, "grad_norm": 0.7442301511764526, "learning_rate": 1.0604251010668088e-06, "loss": 2.0408, "step": 25685 }, { "epoch": 0.85, "grad_norm": 0.7615671157836914, "learning_rate": 1.0599488142169434e-06, "loss": 1.9826, "step": 25686 }, { "epoch": 0.85, "grad_norm": 0.7385686039924622, "learning_rate": 1.059472628366538e-06, "loss": 2.0635, "step": 25687 }, { "epoch": 0.85, "grad_norm": 0.7261627316474915, "learning_rate": 1.0589965435209714e-06, "loss": 2.0735, "step": 25688 }, { "epoch": 0.85, "grad_norm": 0.741863489151001, "learning_rate": 1.0585205596856196e-06, "loss": 2.0633, "step": 25689 }, { "epoch": 0.85, "grad_norm": 0.7368066310882568, "learning_rate": 1.0580446768658648e-06, "loss": 2.0331, "step": 25690 }, { "epoch": 0.85, "grad_norm": 0.7324717044830322, "learning_rate": 1.0575688950670793e-06, "loss": 2.0119, "step": 25691 }, { "epoch": 0.85, "grad_norm": 0.7567073702812195, "learning_rate": 1.0570932142946389e-06, "loss": 2.0125, "step": 25692 }, { "epoch": 0.85, "grad_norm": 0.7309162020683289, "learning_rate": 1.0566176345539225e-06, "loss": 2.0627, "step": 25693 }, { "epoch": 0.85, "grad_norm": 0.7520669102668762, "learning_rate": 1.056142155850295e-06, "loss": 2.0009, "step": 25694 }, { "epoch": 0.85, "grad_norm": 0.7557404637336731, "learning_rate": 1.0556667781891305e-06, "loss": 2.0363, "step": 25695 }, { "epoch": 0.85, "grad_norm": 0.7563972473144531, "learning_rate": 1.0551915015758042e-06, "loss": 2.0721, "step": 25696 }, { "epoch": 0.85, "grad_norm": 0.7686523795127869, "learning_rate": 1.054716326015681e-06, "loss": 2.0981, "step": 25697 }, { "epoch": 0.85, "grad_norm": 0.7877217531204224, "learning_rate": 1.0542412515141298e-06, "loss": 2.0941, "step": 25698 }, { "epoch": 0.86, "grad_norm": 0.7257724404335022, "learning_rate": 1.053766278076519e-06, "loss": 2.091, "step": 25699 }, { "epoch": 0.86, "grad_norm": 0.7235658764839172, "learning_rate": 1.0532914057082123e-06, "loss": 2.0676, "step": 25700 }, { "epoch": 0.86, "grad_norm": 0.7406724095344543, "learning_rate": 1.0528166344145785e-06, "loss": 1.9995, "step": 25701 }, { "epoch": 0.86, "grad_norm": 0.7324613332748413, "learning_rate": 1.0523419642009758e-06, "loss": 1.9248, "step": 25702 }, { "epoch": 0.86, "grad_norm": 0.7732944488525391, "learning_rate": 1.051867395072772e-06, "loss": 2.0682, "step": 25703 }, { "epoch": 0.86, "grad_norm": 0.7542123794555664, "learning_rate": 1.0513929270353252e-06, "loss": 2.0648, "step": 25704 }, { "epoch": 0.86, "grad_norm": 0.7391310334205627, "learning_rate": 1.0509185600939975e-06, "loss": 2.0359, "step": 25705 }, { "epoch": 0.86, "grad_norm": 0.720277726650238, "learning_rate": 1.050444294254148e-06, "loss": 1.9842, "step": 25706 }, { "epoch": 0.86, "grad_norm": 0.731803834438324, "learning_rate": 1.0499701295211318e-06, "loss": 2.0203, "step": 25707 }, { "epoch": 0.86, "grad_norm": 0.7510703802108765, "learning_rate": 1.0494960659003072e-06, "loss": 2.0453, "step": 25708 }, { "epoch": 0.86, "grad_norm": 0.7525373101234436, "learning_rate": 1.0490221033970328e-06, "loss": 1.9853, "step": 25709 }, { "epoch": 0.86, "grad_norm": 0.7505938410758972, "learning_rate": 1.0485482420166614e-06, "loss": 2.0651, "step": 25710 }, { "epoch": 0.86, "grad_norm": 0.7590008974075317, "learning_rate": 1.0480744817645438e-06, "loss": 2.0369, "step": 25711 }, { "epoch": 0.86, "grad_norm": 0.7414840459823608, "learning_rate": 1.0476008226460354e-06, "loss": 2.0474, "step": 25712 }, { "epoch": 0.86, "grad_norm": 0.7427039742469788, "learning_rate": 1.0471272646664854e-06, "loss": 2.0688, "step": 25713 }, { "epoch": 0.86, "grad_norm": 0.7563658356666565, "learning_rate": 1.0466538078312437e-06, "loss": 2.0729, "step": 25714 }, { "epoch": 0.86, "grad_norm": 0.7695289254188538, "learning_rate": 1.0461804521456654e-06, "loss": 2.0176, "step": 25715 }, { "epoch": 0.86, "grad_norm": 0.759555459022522, "learning_rate": 1.045707197615088e-06, "loss": 2.034, "step": 25716 }, { "epoch": 0.86, "grad_norm": 0.729200005531311, "learning_rate": 1.0452340442448628e-06, "loss": 1.9496, "step": 25717 }, { "epoch": 0.86, "grad_norm": 0.7555876970291138, "learning_rate": 1.044760992040338e-06, "loss": 2.1093, "step": 25718 }, { "epoch": 0.86, "grad_norm": 0.7286893725395203, "learning_rate": 1.0442880410068546e-06, "loss": 2.0027, "step": 25719 }, { "epoch": 0.86, "grad_norm": 0.7448348999023438, "learning_rate": 1.0438151911497552e-06, "loss": 1.9858, "step": 25720 }, { "epoch": 0.86, "grad_norm": 0.7271620035171509, "learning_rate": 1.0433424424743842e-06, "loss": 2.102, "step": 25721 }, { "epoch": 0.86, "grad_norm": 0.7297613620758057, "learning_rate": 1.0428697949860788e-06, "loss": 2.0057, "step": 25722 }, { "epoch": 0.86, "grad_norm": 0.7493481636047363, "learning_rate": 1.0423972486901823e-06, "loss": 2.0531, "step": 25723 }, { "epoch": 0.86, "grad_norm": 0.7563741207122803, "learning_rate": 1.0419248035920326e-06, "loss": 1.9966, "step": 25724 }, { "epoch": 0.86, "grad_norm": 0.7450403571128845, "learning_rate": 1.0414524596969676e-06, "loss": 2.0886, "step": 25725 }, { "epoch": 0.86, "grad_norm": 0.7410711646080017, "learning_rate": 1.04098021701032e-06, "loss": 1.9734, "step": 25726 }, { "epoch": 0.86, "grad_norm": 0.7379253506660461, "learning_rate": 1.0405080755374297e-06, "loss": 2.0136, "step": 25727 }, { "epoch": 0.86, "grad_norm": 0.7556507587432861, "learning_rate": 1.040036035283628e-06, "loss": 2.0431, "step": 25728 }, { "epoch": 0.86, "grad_norm": 0.7782156467437744, "learning_rate": 1.0395640962542464e-06, "loss": 2.0762, "step": 25729 }, { "epoch": 0.86, "grad_norm": 0.7497164011001587, "learning_rate": 1.039092258454618e-06, "loss": 2.0076, "step": 25730 }, { "epoch": 0.86, "grad_norm": 0.7253671288490295, "learning_rate": 1.0386205218900759e-06, "loss": 2.0952, "step": 25731 }, { "epoch": 0.86, "grad_norm": 0.7224748730659485, "learning_rate": 1.0381488865659473e-06, "loss": 1.9896, "step": 25732 }, { "epoch": 0.86, "grad_norm": 0.7455363273620605, "learning_rate": 1.0376773524875583e-06, "loss": 2.0748, "step": 25733 }, { "epoch": 0.86, "grad_norm": 0.760480523109436, "learning_rate": 1.0372059196602401e-06, "loss": 1.9962, "step": 25734 }, { "epoch": 0.86, "grad_norm": 0.7405458688735962, "learning_rate": 1.0367345880893155e-06, "loss": 2.1212, "step": 25735 }, { "epoch": 0.86, "grad_norm": 0.7357072830200195, "learning_rate": 1.0362633577801096e-06, "loss": 2.0512, "step": 25736 }, { "epoch": 0.86, "grad_norm": 0.7341716289520264, "learning_rate": 1.035792228737952e-06, "loss": 2.0868, "step": 25737 }, { "epoch": 0.86, "grad_norm": 0.7235838770866394, "learning_rate": 1.0353212009681545e-06, "loss": 2.0778, "step": 25738 }, { "epoch": 0.86, "grad_norm": 0.7489319443702698, "learning_rate": 1.0348502744760457e-06, "loss": 2.0375, "step": 25739 }, { "epoch": 0.86, "grad_norm": 0.7543151378631592, "learning_rate": 1.0343794492669457e-06, "loss": 2.0705, "step": 25740 }, { "epoch": 0.86, "grad_norm": 0.7414613366127014, "learning_rate": 1.0339087253461732e-06, "loss": 2.0184, "step": 25741 }, { "epoch": 0.86, "grad_norm": 0.7648339867591858, "learning_rate": 1.033438102719042e-06, "loss": 2.065, "step": 25742 }, { "epoch": 0.86, "grad_norm": 0.7445924878120422, "learning_rate": 1.0329675813908758e-06, "loss": 2.0883, "step": 25743 }, { "epoch": 0.86, "grad_norm": 0.7281510829925537, "learning_rate": 1.0324971613669832e-06, "loss": 2.0415, "step": 25744 }, { "epoch": 0.86, "grad_norm": 0.762367308139801, "learning_rate": 1.0320268426526825e-06, "loss": 2.065, "step": 25745 }, { "epoch": 0.86, "grad_norm": 0.7247112989425659, "learning_rate": 1.0315566252532894e-06, "loss": 1.9791, "step": 25746 }, { "epoch": 0.86, "grad_norm": 0.7449166774749756, "learning_rate": 1.031086509174113e-06, "loss": 2.0674, "step": 25747 }, { "epoch": 0.86, "grad_norm": 0.727767825126648, "learning_rate": 1.0306164944204634e-06, "loss": 2.0274, "step": 25748 }, { "epoch": 0.86, "grad_norm": 0.764346182346344, "learning_rate": 1.0301465809976541e-06, "loss": 2.0318, "step": 25749 }, { "epoch": 0.86, "grad_norm": 0.7388762831687927, "learning_rate": 1.029676768910991e-06, "loss": 1.9891, "step": 25750 }, { "epoch": 0.86, "grad_norm": 0.7313951849937439, "learning_rate": 1.0292070581657809e-06, "loss": 2.0054, "step": 25751 }, { "epoch": 0.86, "grad_norm": 0.7080071568489075, "learning_rate": 1.028737448767333e-06, "loss": 2.0571, "step": 25752 }, { "epoch": 0.86, "grad_norm": 0.7233836650848389, "learning_rate": 1.0282679407209529e-06, "loss": 2.0532, "step": 25753 }, { "epoch": 0.86, "grad_norm": 0.7567445039749146, "learning_rate": 1.027798534031944e-06, "loss": 2.0188, "step": 25754 }, { "epoch": 0.86, "grad_norm": 0.74405437707901, "learning_rate": 1.0273292287056069e-06, "loss": 2.0173, "step": 25755 }, { "epoch": 0.86, "grad_norm": 0.7492372989654541, "learning_rate": 1.026860024747247e-06, "loss": 2.0079, "step": 25756 }, { "epoch": 0.86, "grad_norm": 0.7637950778007507, "learning_rate": 1.0263909221621627e-06, "loss": 2.0465, "step": 25757 }, { "epoch": 0.86, "grad_norm": 0.7583557367324829, "learning_rate": 1.0259219209556536e-06, "loss": 2.0225, "step": 25758 }, { "epoch": 0.86, "grad_norm": 0.7359915971755981, "learning_rate": 1.025453021133025e-06, "loss": 2.0514, "step": 25759 }, { "epoch": 0.86, "grad_norm": 0.7662783861160278, "learning_rate": 1.0249842226995633e-06, "loss": 2.0096, "step": 25760 }, { "epoch": 0.86, "grad_norm": 0.7452043890953064, "learning_rate": 1.0245155256605699e-06, "loss": 2.0323, "step": 25761 }, { "epoch": 0.86, "grad_norm": 0.7338219285011292, "learning_rate": 1.024046930021343e-06, "loss": 2.0483, "step": 25762 }, { "epoch": 0.86, "grad_norm": 0.7277237176895142, "learning_rate": 1.0235784357871725e-06, "loss": 2.071, "step": 25763 }, { "epoch": 0.86, "grad_norm": 0.7613467574119568, "learning_rate": 1.0231100429633511e-06, "loss": 2.0356, "step": 25764 }, { "epoch": 0.86, "grad_norm": 0.7483755350112915, "learning_rate": 1.0226417515551724e-06, "loss": 2.0242, "step": 25765 }, { "epoch": 0.86, "grad_norm": 0.7352116107940674, "learning_rate": 1.022173561567924e-06, "loss": 2.0541, "step": 25766 }, { "epoch": 0.86, "grad_norm": 0.7330235838890076, "learning_rate": 1.0217054730068975e-06, "loss": 2.0134, "step": 25767 }, { "epoch": 0.86, "grad_norm": 0.7904192209243774, "learning_rate": 1.0212374858773832e-06, "loss": 2.0129, "step": 25768 }, { "epoch": 0.86, "grad_norm": 0.7245510220527649, "learning_rate": 1.0207696001846655e-06, "loss": 1.9769, "step": 25769 }, { "epoch": 0.86, "grad_norm": 0.7236512899398804, "learning_rate": 1.0203018159340282e-06, "loss": 2.0261, "step": 25770 }, { "epoch": 0.86, "grad_norm": 0.7691099643707275, "learning_rate": 1.0198341331307582e-06, "loss": 2.0609, "step": 25771 }, { "epoch": 0.86, "grad_norm": 0.7410522103309631, "learning_rate": 1.0193665517801443e-06, "loss": 1.9685, "step": 25772 }, { "epoch": 0.86, "grad_norm": 0.7280692458152771, "learning_rate": 1.018899071887459e-06, "loss": 2.0092, "step": 25773 }, { "epoch": 0.86, "grad_norm": 0.7256282567977905, "learning_rate": 1.0184316934579885e-06, "loss": 1.982, "step": 25774 }, { "epoch": 0.86, "grad_norm": 0.7744770050048828, "learning_rate": 1.017964416497015e-06, "loss": 2.011, "step": 25775 }, { "epoch": 0.86, "grad_norm": 0.724440336227417, "learning_rate": 1.0174972410098138e-06, "loss": 2.0261, "step": 25776 }, { "epoch": 0.86, "grad_norm": 0.7367421388626099, "learning_rate": 1.017030167001667e-06, "loss": 2.0665, "step": 25777 }, { "epoch": 0.86, "grad_norm": 0.7193717956542969, "learning_rate": 1.0165631944778475e-06, "loss": 2.0794, "step": 25778 }, { "epoch": 0.86, "grad_norm": 0.7565749883651733, "learning_rate": 1.0160963234436316e-06, "loss": 2.0793, "step": 25779 }, { "epoch": 0.86, "grad_norm": 0.7250953316688538, "learning_rate": 1.0156295539042937e-06, "loss": 2.0222, "step": 25780 }, { "epoch": 0.86, "grad_norm": 0.7378321290016174, "learning_rate": 1.0151628858651097e-06, "loss": 2.0266, "step": 25781 }, { "epoch": 0.86, "grad_norm": 0.746042013168335, "learning_rate": 1.014696319331351e-06, "loss": 2.0462, "step": 25782 }, { "epoch": 0.86, "grad_norm": 0.7468606233596802, "learning_rate": 1.014229854308284e-06, "loss": 2.092, "step": 25783 }, { "epoch": 0.86, "grad_norm": 0.7180263996124268, "learning_rate": 1.013763490801184e-06, "loss": 2.045, "step": 25784 }, { "epoch": 0.86, "grad_norm": 0.7249382734298706, "learning_rate": 1.0132972288153175e-06, "loss": 2.0714, "step": 25785 }, { "epoch": 0.86, "grad_norm": 0.7478383779525757, "learning_rate": 1.0128310683559507e-06, "loss": 2.0022, "step": 25786 }, { "epoch": 0.86, "grad_norm": 0.7205400466918945, "learning_rate": 1.012365009428351e-06, "loss": 1.992, "step": 25787 }, { "epoch": 0.86, "grad_norm": 0.7346038222312927, "learning_rate": 1.011899052037787e-06, "loss": 2.0128, "step": 25788 }, { "epoch": 0.86, "grad_norm": 0.73969966173172, "learning_rate": 1.0114331961895174e-06, "loss": 2.0471, "step": 25789 }, { "epoch": 0.86, "grad_norm": 0.7428418397903442, "learning_rate": 1.0109674418888093e-06, "loss": 2.0247, "step": 25790 }, { "epoch": 0.86, "grad_norm": 0.7601402401924133, "learning_rate": 1.0105017891409242e-06, "loss": 2.0143, "step": 25791 }, { "epoch": 0.86, "grad_norm": 0.7239035964012146, "learning_rate": 1.0100362379511186e-06, "loss": 2.0355, "step": 25792 }, { "epoch": 0.86, "grad_norm": 0.7687256932258606, "learning_rate": 1.0095707883246552e-06, "loss": 2.0433, "step": 25793 }, { "epoch": 0.86, "grad_norm": 0.754247784614563, "learning_rate": 1.0091054402667955e-06, "loss": 2.0838, "step": 25794 }, { "epoch": 0.86, "grad_norm": 0.7615288496017456, "learning_rate": 1.0086401937827906e-06, "loss": 2.0499, "step": 25795 }, { "epoch": 0.86, "grad_norm": 0.7577502131462097, "learning_rate": 1.0081750488778985e-06, "loss": 2.0797, "step": 25796 }, { "epoch": 0.86, "grad_norm": 0.755070149898529, "learning_rate": 1.0077100055573774e-06, "loss": 2.0639, "step": 25797 }, { "epoch": 0.86, "grad_norm": 0.7582204937934875, "learning_rate": 1.0072450638264763e-06, "loss": 2.0444, "step": 25798 }, { "epoch": 0.86, "grad_norm": 0.7567570209503174, "learning_rate": 1.0067802236904523e-06, "loss": 2.0173, "step": 25799 }, { "epoch": 0.86, "grad_norm": 0.7594166994094849, "learning_rate": 1.0063154851545542e-06, "loss": 2.0239, "step": 25800 }, { "epoch": 0.86, "grad_norm": 0.7277630567550659, "learning_rate": 1.0058508482240315e-06, "loss": 1.9733, "step": 25801 }, { "epoch": 0.86, "grad_norm": 0.7470248937606812, "learning_rate": 1.0053863129041353e-06, "loss": 2.0243, "step": 25802 }, { "epoch": 0.86, "grad_norm": 0.7536552548408508, "learning_rate": 1.0049218792001147e-06, "loss": 2.0387, "step": 25803 }, { "epoch": 0.86, "grad_norm": 0.7575001120567322, "learning_rate": 1.0044575471172147e-06, "loss": 2.0467, "step": 25804 }, { "epoch": 0.86, "grad_norm": 0.7614201307296753, "learning_rate": 1.0039933166606797e-06, "loss": 2.0721, "step": 25805 }, { "epoch": 0.86, "grad_norm": 0.7290003299713135, "learning_rate": 1.0035291878357578e-06, "loss": 1.9898, "step": 25806 }, { "epoch": 0.86, "grad_norm": 0.7385231256484985, "learning_rate": 1.0030651606476893e-06, "loss": 2.0349, "step": 25807 }, { "epoch": 0.86, "grad_norm": 0.7457908391952515, "learning_rate": 1.00260123510172e-06, "loss": 2.0494, "step": 25808 }, { "epoch": 0.86, "grad_norm": 0.7281091809272766, "learning_rate": 1.0021374112030857e-06, "loss": 2.0246, "step": 25809 }, { "epoch": 0.86, "grad_norm": 0.7556902170181274, "learning_rate": 1.0016736889570332e-06, "loss": 2.0554, "step": 25810 }, { "epoch": 0.86, "grad_norm": 0.722694456577301, "learning_rate": 1.0012100683687953e-06, "loss": 2.0215, "step": 25811 }, { "epoch": 0.86, "grad_norm": 0.7761778831481934, "learning_rate": 1.0007465494436142e-06, "loss": 2.0971, "step": 25812 }, { "epoch": 0.86, "grad_norm": 0.7278135418891907, "learning_rate": 1.0002831321867235e-06, "loss": 2.0636, "step": 25813 }, { "epoch": 0.86, "grad_norm": 0.7265075445175171, "learning_rate": 9.99819816603359e-07, "loss": 2.0126, "step": 25814 }, { "epoch": 0.86, "grad_norm": 0.7465983629226685, "learning_rate": 9.993566026987544e-07, "loss": 1.9358, "step": 25815 }, { "epoch": 0.86, "grad_norm": 0.7712118029594421, "learning_rate": 9.988934904781488e-07, "loss": 2.0723, "step": 25816 }, { "epoch": 0.86, "grad_norm": 0.7292450070381165, "learning_rate": 9.98430479946766e-07, "loss": 1.9269, "step": 25817 }, { "epoch": 0.86, "grad_norm": 0.7559846043586731, "learning_rate": 9.97967571109839e-07, "loss": 2.0205, "step": 25818 }, { "epoch": 0.86, "grad_norm": 0.7426708340644836, "learning_rate": 9.97504763972601e-07, "loss": 2.1079, "step": 25819 }, { "epoch": 0.86, "grad_norm": 0.766106903553009, "learning_rate": 9.970420585402762e-07, "loss": 2.0437, "step": 25820 }, { "epoch": 0.86, "grad_norm": 0.7611443996429443, "learning_rate": 9.965794548180963e-07, "loss": 2.0476, "step": 25821 }, { "epoch": 0.86, "grad_norm": 0.7937203049659729, "learning_rate": 9.961169528112847e-07, "loss": 2.0654, "step": 25822 }, { "epoch": 0.86, "grad_norm": 0.7490450143814087, "learning_rate": 9.956545525250661e-07, "loss": 2.0061, "step": 25823 }, { "epoch": 0.86, "grad_norm": 0.7592965364456177, "learning_rate": 9.951922539646642e-07, "loss": 2.0987, "step": 25824 }, { "epoch": 0.86, "grad_norm": 0.7501606941223145, "learning_rate": 9.947300571353047e-07, "loss": 2.0633, "step": 25825 }, { "epoch": 0.86, "grad_norm": 0.7309877872467041, "learning_rate": 9.94267962042208e-07, "loss": 2.0924, "step": 25826 }, { "epoch": 0.86, "grad_norm": 0.7499247193336487, "learning_rate": 9.93805968690592e-07, "loss": 2.0134, "step": 25827 }, { "epoch": 0.86, "grad_norm": 0.7084293365478516, "learning_rate": 9.933440770856795e-07, "loss": 2.0226, "step": 25828 }, { "epoch": 0.86, "grad_norm": 0.7412239909172058, "learning_rate": 9.92882287232686e-07, "loss": 2.0339, "step": 25829 }, { "epoch": 0.86, "grad_norm": 0.7101403474807739, "learning_rate": 9.92420599136832e-07, "loss": 2.0756, "step": 25830 }, { "epoch": 0.86, "grad_norm": 0.7837666273117065, "learning_rate": 9.919590128033275e-07, "loss": 2.1011, "step": 25831 }, { "epoch": 0.86, "grad_norm": 0.7392111420631409, "learning_rate": 9.91497528237394e-07, "loss": 1.9371, "step": 25832 }, { "epoch": 0.86, "grad_norm": 0.7430040240287781, "learning_rate": 9.910361454442396e-07, "loss": 2.0728, "step": 25833 }, { "epoch": 0.86, "grad_norm": 0.752509593963623, "learning_rate": 9.905748644290815e-07, "loss": 2.0866, "step": 25834 }, { "epoch": 0.86, "grad_norm": 0.7462926506996155, "learning_rate": 9.901136851971283e-07, "loss": 1.9459, "step": 25835 }, { "epoch": 0.86, "grad_norm": 0.7393240332603455, "learning_rate": 9.896526077535884e-07, "loss": 2.0053, "step": 25836 }, { "epoch": 0.86, "grad_norm": 0.7462487816810608, "learning_rate": 9.89191632103672e-07, "loss": 2.0342, "step": 25837 }, { "epoch": 0.86, "grad_norm": 0.7826508283615112, "learning_rate": 9.887307582525907e-07, "loss": 2.1251, "step": 25838 }, { "epoch": 0.86, "grad_norm": 0.7426804304122925, "learning_rate": 9.882699862055479e-07, "loss": 2.0986, "step": 25839 }, { "epoch": 0.86, "grad_norm": 0.7832672595977783, "learning_rate": 9.878093159677471e-07, "loss": 2.0264, "step": 25840 }, { "epoch": 0.86, "grad_norm": 0.7444686889648438, "learning_rate": 9.873487475443965e-07, "loss": 2.0664, "step": 25841 }, { "epoch": 0.86, "grad_norm": 0.7687342166900635, "learning_rate": 9.868882809406964e-07, "loss": 2.0925, "step": 25842 }, { "epoch": 0.86, "grad_norm": 0.7473678588867188, "learning_rate": 9.864279161618528e-07, "loss": 2.0797, "step": 25843 }, { "epoch": 0.86, "grad_norm": 0.7303598523139954, "learning_rate": 9.859676532130636e-07, "loss": 2.0988, "step": 25844 }, { "epoch": 0.86, "grad_norm": 0.7247107625007629, "learning_rate": 9.855074920995278e-07, "loss": 2.0321, "step": 25845 }, { "epoch": 0.86, "grad_norm": 0.7181931138038635, "learning_rate": 9.850474328264448e-07, "loss": 2.0315, "step": 25846 }, { "epoch": 0.86, "grad_norm": 0.7480742335319519, "learning_rate": 9.845874753990138e-07, "loss": 2.0487, "step": 25847 }, { "epoch": 0.86, "grad_norm": 0.7156351804733276, "learning_rate": 9.841276198224315e-07, "loss": 2.0008, "step": 25848 }, { "epoch": 0.86, "grad_norm": 0.7483587861061096, "learning_rate": 9.836678661018884e-07, "loss": 2.0755, "step": 25849 }, { "epoch": 0.86, "grad_norm": 0.7326157093048096, "learning_rate": 9.832082142425836e-07, "loss": 1.9071, "step": 25850 }, { "epoch": 0.86, "grad_norm": 0.7745359539985657, "learning_rate": 9.827486642497064e-07, "loss": 2.1483, "step": 25851 }, { "epoch": 0.86, "grad_norm": 0.7388675808906555, "learning_rate": 9.822892161284525e-07, "loss": 2.1346, "step": 25852 }, { "epoch": 0.86, "grad_norm": 0.7546560168266296, "learning_rate": 9.81829869884008e-07, "loss": 2.0854, "step": 25853 }, { "epoch": 0.86, "grad_norm": 0.727989912033081, "learning_rate": 9.81370625521566e-07, "loss": 2.0088, "step": 25854 }, { "epoch": 0.86, "grad_norm": 0.7250304818153381, "learning_rate": 9.809114830463118e-07, "loss": 2.0199, "step": 25855 }, { "epoch": 0.86, "grad_norm": 0.7670961618423462, "learning_rate": 9.804524424634355e-07, "loss": 2.139, "step": 25856 }, { "epoch": 0.86, "grad_norm": 0.7555557489395142, "learning_rate": 9.799935037781217e-07, "loss": 2.0057, "step": 25857 }, { "epoch": 0.86, "grad_norm": 0.765159010887146, "learning_rate": 9.79534666995553e-07, "loss": 1.9568, "step": 25858 }, { "epoch": 0.86, "grad_norm": 0.7768719792366028, "learning_rate": 9.790759321209152e-07, "loss": 2.0334, "step": 25859 }, { "epoch": 0.86, "grad_norm": 0.7363049983978271, "learning_rate": 9.786172991593934e-07, "loss": 2.0006, "step": 25860 }, { "epoch": 0.86, "grad_norm": 0.7451250553131104, "learning_rate": 9.781587681161652e-07, "loss": 2.0176, "step": 25861 }, { "epoch": 0.86, "grad_norm": 0.7400102615356445, "learning_rate": 9.7770033899641e-07, "loss": 2.0444, "step": 25862 }, { "epoch": 0.86, "grad_norm": 0.7256300449371338, "learning_rate": 9.772420118053117e-07, "loss": 1.964, "step": 25863 }, { "epoch": 0.86, "grad_norm": 0.7720984816551208, "learning_rate": 9.767837865480434e-07, "loss": 2.0688, "step": 25864 }, { "epoch": 0.86, "grad_norm": 0.7473896741867065, "learning_rate": 9.763256632297847e-07, "loss": 2.0841, "step": 25865 }, { "epoch": 0.86, "grad_norm": 0.7628147006034851, "learning_rate": 9.7586764185571e-07, "loss": 2.0016, "step": 25866 }, { "epoch": 0.86, "grad_norm": 0.7686033844947815, "learning_rate": 9.754097224309934e-07, "loss": 2.0025, "step": 25867 }, { "epoch": 0.86, "grad_norm": 0.7593132853507996, "learning_rate": 9.749519049608081e-07, "loss": 2.1048, "step": 25868 }, { "epoch": 0.86, "grad_norm": 0.7308956384658813, "learning_rate": 9.74494189450329e-07, "loss": 1.9692, "step": 25869 }, { "epoch": 0.86, "grad_norm": 0.736372709274292, "learning_rate": 9.740365759047255e-07, "loss": 1.9992, "step": 25870 }, { "epoch": 0.86, "grad_norm": 0.7395648956298828, "learning_rate": 9.735790643291644e-07, "loss": 2.0302, "step": 25871 }, { "epoch": 0.86, "grad_norm": 0.7363121509552002, "learning_rate": 9.731216547288168e-07, "loss": 2.0476, "step": 25872 }, { "epoch": 0.86, "grad_norm": 0.7756577134132385, "learning_rate": 9.726643471088526e-07, "loss": 2.0699, "step": 25873 }, { "epoch": 0.86, "grad_norm": 0.7218660712242126, "learning_rate": 9.72207141474437e-07, "loss": 2.0283, "step": 25874 }, { "epoch": 0.86, "grad_norm": 0.7502326369285583, "learning_rate": 9.71750037830732e-07, "loss": 2.063, "step": 25875 }, { "epoch": 0.86, "grad_norm": 0.7390763163566589, "learning_rate": 9.712930361829054e-07, "loss": 2.023, "step": 25876 }, { "epoch": 0.86, "grad_norm": 0.7505145072937012, "learning_rate": 9.708361365361173e-07, "loss": 2.0673, "step": 25877 }, { "epoch": 0.86, "grad_norm": 0.7782769203186035, "learning_rate": 9.703793388955318e-07, "loss": 2.1059, "step": 25878 }, { "epoch": 0.86, "grad_norm": 0.739403486251831, "learning_rate": 9.6992264326631e-07, "loss": 1.936, "step": 25879 }, { "epoch": 0.86, "grad_norm": 0.7706981897354126, "learning_rate": 9.694660496536079e-07, "loss": 2.0477, "step": 25880 }, { "epoch": 0.86, "grad_norm": 0.7154578566551208, "learning_rate": 9.690095580625859e-07, "loss": 2.0233, "step": 25881 }, { "epoch": 0.86, "grad_norm": 0.7544198036193848, "learning_rate": 9.68553168498403e-07, "loss": 2.1212, "step": 25882 }, { "epoch": 0.86, "grad_norm": 0.7208190560340881, "learning_rate": 9.680968809662128e-07, "loss": 2.0165, "step": 25883 }, { "epoch": 0.86, "grad_norm": 0.7738233208656311, "learning_rate": 9.676406954711693e-07, "loss": 2.0242, "step": 25884 }, { "epoch": 0.86, "grad_norm": 0.7672053575515747, "learning_rate": 9.671846120184303e-07, "loss": 2.0341, "step": 25885 }, { "epoch": 0.86, "grad_norm": 0.7381328344345093, "learning_rate": 9.667286306131429e-07, "loss": 2.0451, "step": 25886 }, { "epoch": 0.86, "grad_norm": 0.7485707998275757, "learning_rate": 9.662727512604642e-07, "loss": 2.0537, "step": 25887 }, { "epoch": 0.86, "grad_norm": 0.7471725940704346, "learning_rate": 9.658169739655388e-07, "loss": 2.0671, "step": 25888 }, { "epoch": 0.86, "grad_norm": 0.7442446947097778, "learning_rate": 9.653612987335215e-07, "loss": 1.9365, "step": 25889 }, { "epoch": 0.86, "grad_norm": 0.7283656001091003, "learning_rate": 9.649057255695549e-07, "loss": 2.0293, "step": 25890 }, { "epoch": 0.86, "grad_norm": 0.7624244093894958, "learning_rate": 9.644502544787905e-07, "loss": 2.1114, "step": 25891 }, { "epoch": 0.86, "grad_norm": 0.7553475499153137, "learning_rate": 9.639948854663717e-07, "loss": 2.0159, "step": 25892 }, { "epoch": 0.86, "grad_norm": 0.7623302936553955, "learning_rate": 9.635396185374412e-07, "loss": 2.0533, "step": 25893 }, { "epoch": 0.86, "grad_norm": 0.7714210748672485, "learning_rate": 9.630844536971451e-07, "loss": 2.0709, "step": 25894 }, { "epoch": 0.86, "grad_norm": 0.7370160818099976, "learning_rate": 9.626293909506257e-07, "loss": 2.0286, "step": 25895 }, { "epoch": 0.86, "grad_norm": 0.7419747710227966, "learning_rate": 9.621744303030223e-07, "loss": 2.0401, "step": 25896 }, { "epoch": 0.86, "grad_norm": 0.750767707824707, "learning_rate": 9.617195717594751e-07, "loss": 2.0733, "step": 25897 }, { "epoch": 0.86, "grad_norm": 0.7311986088752747, "learning_rate": 9.612648153251236e-07, "loss": 2.0728, "step": 25898 }, { "epoch": 0.86, "grad_norm": 0.76406329870224, "learning_rate": 9.608101610051045e-07, "loss": 2.0218, "step": 25899 }, { "epoch": 0.86, "grad_norm": 0.7233788967132568, "learning_rate": 9.603556088045562e-07, "loss": 2.0704, "step": 25900 }, { "epoch": 0.86, "grad_norm": 0.7302941679954529, "learning_rate": 9.599011587286123e-07, "loss": 2.0494, "step": 25901 }, { "epoch": 0.86, "grad_norm": 0.7442104816436768, "learning_rate": 9.59446810782405e-07, "loss": 2.0337, "step": 25902 }, { "epoch": 0.86, "grad_norm": 0.7410232424736023, "learning_rate": 9.589925649710697e-07, "loss": 2.0261, "step": 25903 }, { "epoch": 0.86, "grad_norm": 0.7334192991256714, "learning_rate": 9.585384212997395e-07, "loss": 2.0647, "step": 25904 }, { "epoch": 0.86, "grad_norm": 0.7352466583251953, "learning_rate": 9.580843797735428e-07, "loss": 2.0479, "step": 25905 }, { "epoch": 0.86, "grad_norm": 0.7660627961158752, "learning_rate": 9.57630440397609e-07, "loss": 2.1313, "step": 25906 }, { "epoch": 0.86, "grad_norm": 0.7452356219291687, "learning_rate": 9.571766031770691e-07, "loss": 2.0611, "step": 25907 }, { "epoch": 0.86, "grad_norm": 0.7804729342460632, "learning_rate": 9.567228681170447e-07, "loss": 2.006, "step": 25908 }, { "epoch": 0.86, "grad_norm": 0.7775230407714844, "learning_rate": 9.562692352226688e-07, "loss": 2.1375, "step": 25909 }, { "epoch": 0.86, "grad_norm": 0.7514261603355408, "learning_rate": 9.558157044990612e-07, "loss": 1.9799, "step": 25910 }, { "epoch": 0.86, "grad_norm": 0.7650182843208313, "learning_rate": 9.55362275951348e-07, "loss": 2.0262, "step": 25911 }, { "epoch": 0.86, "grad_norm": 0.7443432211875916, "learning_rate": 9.549089495846509e-07, "loss": 2.0577, "step": 25912 }, { "epoch": 0.86, "grad_norm": 0.7523942589759827, "learning_rate": 9.54455725404092e-07, "loss": 2.0983, "step": 25913 }, { "epoch": 0.86, "grad_norm": 0.7565601468086243, "learning_rate": 9.54002603414792e-07, "loss": 2.0746, "step": 25914 }, { "epoch": 0.86, "grad_norm": 0.7297800779342651, "learning_rate": 9.535495836218666e-07, "loss": 2.0166, "step": 25915 }, { "epoch": 0.86, "grad_norm": 0.7498037219047546, "learning_rate": 9.530966660304363e-07, "loss": 2.0344, "step": 25916 }, { "epoch": 0.86, "grad_norm": 0.7590672969818115, "learning_rate": 9.526438506456204e-07, "loss": 2.0668, "step": 25917 }, { "epoch": 0.86, "grad_norm": 0.7453739047050476, "learning_rate": 9.521911374725312e-07, "loss": 2.1032, "step": 25918 }, { "epoch": 0.86, "grad_norm": 0.7258577942848206, "learning_rate": 9.517385265162826e-07, "loss": 2.0448, "step": 25919 }, { "epoch": 0.86, "grad_norm": 0.7842622995376587, "learning_rate": 9.512860177819916e-07, "loss": 2.0296, "step": 25920 }, { "epoch": 0.86, "grad_norm": 0.7398545742034912, "learning_rate": 9.508336112747651e-07, "loss": 2.0571, "step": 25921 }, { "epoch": 0.86, "grad_norm": 0.7325178980827332, "learning_rate": 9.50381306999718e-07, "loss": 2.0263, "step": 25922 }, { "epoch": 0.86, "grad_norm": 0.7357069849967957, "learning_rate": 9.49929104961963e-07, "loss": 2.0517, "step": 25923 }, { "epoch": 0.86, "grad_norm": 0.7440596222877502, "learning_rate": 9.494770051666013e-07, "loss": 2.0202, "step": 25924 }, { "epoch": 0.86, "grad_norm": 0.7203040719032288, "learning_rate": 9.490250076187446e-07, "loss": 1.9925, "step": 25925 }, { "epoch": 0.86, "grad_norm": 0.7143058776855469, "learning_rate": 9.485731123234998e-07, "loss": 2.0501, "step": 25926 }, { "epoch": 0.86, "grad_norm": 0.7274224162101746, "learning_rate": 9.481213192859717e-07, "loss": 1.9855, "step": 25927 }, { "epoch": 0.86, "grad_norm": 0.7742795348167419, "learning_rate": 9.476696285112629e-07, "loss": 2.0799, "step": 25928 }, { "epoch": 0.86, "grad_norm": 0.7491094470024109, "learning_rate": 9.472180400044784e-07, "loss": 2.0947, "step": 25929 }, { "epoch": 0.86, "grad_norm": 0.7620345950126648, "learning_rate": 9.467665537707182e-07, "loss": 2.0564, "step": 25930 }, { "epoch": 0.86, "grad_norm": 0.7194277048110962, "learning_rate": 9.46315169815084e-07, "loss": 2.0973, "step": 25931 }, { "epoch": 0.86, "grad_norm": 0.7297775149345398, "learning_rate": 9.45863888142674e-07, "loss": 1.9733, "step": 25932 }, { "epoch": 0.86, "grad_norm": 0.8071901202201843, "learning_rate": 9.454127087585896e-07, "loss": 2.0795, "step": 25933 }, { "epoch": 0.86, "grad_norm": 0.7592566013336182, "learning_rate": 9.449616316679244e-07, "loss": 2.0599, "step": 25934 }, { "epoch": 0.86, "grad_norm": 0.7243884205818176, "learning_rate": 9.445106568757778e-07, "loss": 1.9771, "step": 25935 }, { "epoch": 0.86, "grad_norm": 0.7671956419944763, "learning_rate": 9.440597843872423e-07, "loss": 2.0919, "step": 25936 }, { "epoch": 0.86, "grad_norm": 0.7702879905700684, "learning_rate": 9.436090142074095e-07, "loss": 2.1229, "step": 25937 }, { "epoch": 0.86, "grad_norm": 0.7729118466377258, "learning_rate": 9.431583463413752e-07, "loss": 2.0515, "step": 25938 }, { "epoch": 0.86, "grad_norm": 0.7131680250167847, "learning_rate": 9.427077807942319e-07, "loss": 1.9665, "step": 25939 }, { "epoch": 0.86, "grad_norm": 0.7983318567276001, "learning_rate": 9.422573175710681e-07, "loss": 2.0788, "step": 25940 }, { "epoch": 0.86, "grad_norm": 0.7479981184005737, "learning_rate": 9.418069566769717e-07, "loss": 2.0392, "step": 25941 }, { "epoch": 0.86, "grad_norm": 0.7848258018493652, "learning_rate": 9.413566981170319e-07, "loss": 2.0931, "step": 25942 }, { "epoch": 0.86, "grad_norm": 0.7295976877212524, "learning_rate": 9.409065418963348e-07, "loss": 2.0633, "step": 25943 }, { "epoch": 0.86, "grad_norm": 0.7395895719528198, "learning_rate": 9.404564880199652e-07, "loss": 2.1118, "step": 25944 }, { "epoch": 0.86, "grad_norm": 0.7482845783233643, "learning_rate": 9.400065364930133e-07, "loss": 2.0222, "step": 25945 }, { "epoch": 0.86, "grad_norm": 0.7485872507095337, "learning_rate": 9.395566873205542e-07, "loss": 2.0474, "step": 25946 }, { "epoch": 0.86, "grad_norm": 0.7452778220176697, "learning_rate": 9.391069405076736e-07, "loss": 2.042, "step": 25947 }, { "epoch": 0.86, "grad_norm": 0.7633265256881714, "learning_rate": 9.386572960594542e-07, "loss": 2.0135, "step": 25948 }, { "epoch": 0.86, "grad_norm": 0.7407006621360779, "learning_rate": 9.382077539809742e-07, "loss": 2.0604, "step": 25949 }, { "epoch": 0.86, "grad_norm": 0.7550188899040222, "learning_rate": 9.377583142773106e-07, "loss": 2.094, "step": 25950 }, { "epoch": 0.86, "grad_norm": 0.7337163686752319, "learning_rate": 9.373089769535437e-07, "loss": 2.0062, "step": 25951 }, { "epoch": 0.86, "grad_norm": 0.7670320272445679, "learning_rate": 9.368597420147474e-07, "loss": 1.9772, "step": 25952 }, { "epoch": 0.86, "grad_norm": 0.7380965948104858, "learning_rate": 9.364106094659986e-07, "loss": 1.997, "step": 25953 }, { "epoch": 0.86, "grad_norm": 0.740817666053772, "learning_rate": 9.359615793123722e-07, "loss": 2.0461, "step": 25954 }, { "epoch": 0.86, "grad_norm": 0.7396414875984192, "learning_rate": 9.355126515589408e-07, "loss": 2.0321, "step": 25955 }, { "epoch": 0.86, "grad_norm": 0.7441156506538391, "learning_rate": 9.350638262107725e-07, "loss": 1.985, "step": 25956 }, { "epoch": 0.86, "grad_norm": 0.8011876344680786, "learning_rate": 9.3461510327294e-07, "loss": 2.0597, "step": 25957 }, { "epoch": 0.86, "grad_norm": 0.7383939623832703, "learning_rate": 9.341664827505182e-07, "loss": 2.0736, "step": 25958 }, { "epoch": 0.86, "grad_norm": 0.7388496994972229, "learning_rate": 9.337179646485661e-07, "loss": 1.9517, "step": 25959 }, { "epoch": 0.86, "grad_norm": 0.7420719265937805, "learning_rate": 9.332695489721555e-07, "loss": 2.0257, "step": 25960 }, { "epoch": 0.86, "grad_norm": 0.7358705997467041, "learning_rate": 9.328212357263533e-07, "loss": 2.0545, "step": 25961 }, { "epoch": 0.86, "grad_norm": 0.7533799409866333, "learning_rate": 9.323730249162221e-07, "loss": 2.0047, "step": 25962 }, { "epoch": 0.86, "grad_norm": 0.7479290962219238, "learning_rate": 9.319249165468258e-07, "loss": 2.0478, "step": 25963 }, { "epoch": 0.86, "grad_norm": 0.7249380946159363, "learning_rate": 9.31476910623228e-07, "loss": 2.0365, "step": 25964 }, { "epoch": 0.86, "grad_norm": 0.7362590432167053, "learning_rate": 9.31029007150489e-07, "loss": 2.013, "step": 25965 }, { "epoch": 0.86, "grad_norm": 0.7964341044425964, "learning_rate": 9.305812061336683e-07, "loss": 2.0904, "step": 25966 }, { "epoch": 0.86, "grad_norm": 0.7415114641189575, "learning_rate": 9.301335075778295e-07, "loss": 2.0732, "step": 25967 }, { "epoch": 0.86, "grad_norm": 0.7472359538078308, "learning_rate": 9.296859114880241e-07, "loss": 2.0096, "step": 25968 }, { "epoch": 0.86, "grad_norm": 0.778104305267334, "learning_rate": 9.292384178693104e-07, "loss": 2.0436, "step": 25969 }, { "epoch": 0.86, "grad_norm": 0.7336345314979553, "learning_rate": 9.287910267267474e-07, "loss": 2.0037, "step": 25970 }, { "epoch": 0.86, "grad_norm": 0.7477326989173889, "learning_rate": 9.283437380653881e-07, "loss": 2.0689, "step": 25971 }, { "epoch": 0.86, "grad_norm": 0.7556303143501282, "learning_rate": 9.278965518902816e-07, "loss": 2.0544, "step": 25972 }, { "epoch": 0.86, "grad_norm": 0.7354278564453125, "learning_rate": 9.274494682064839e-07, "loss": 2.0762, "step": 25973 }, { "epoch": 0.86, "grad_norm": 0.742210328578949, "learning_rate": 9.270024870190464e-07, "loss": 1.9872, "step": 25974 }, { "epoch": 0.86, "grad_norm": 0.7174685001373291, "learning_rate": 9.265556083330152e-07, "loss": 2.0545, "step": 25975 }, { "epoch": 0.86, "grad_norm": 0.7708245515823364, "learning_rate": 9.261088321534439e-07, "loss": 2.083, "step": 25976 }, { "epoch": 0.86, "grad_norm": 0.7628448009490967, "learning_rate": 9.256621584853764e-07, "loss": 2.0397, "step": 25977 }, { "epoch": 0.86, "grad_norm": 0.724772036075592, "learning_rate": 9.252155873338586e-07, "loss": 1.9871, "step": 25978 }, { "epoch": 0.86, "grad_norm": 0.7278578281402588, "learning_rate": 9.247691187039365e-07, "loss": 2.0186, "step": 25979 }, { "epoch": 0.86, "grad_norm": 0.7602989673614502, "learning_rate": 9.243227526006582e-07, "loss": 2.0489, "step": 25980 }, { "epoch": 0.86, "grad_norm": 0.747604250907898, "learning_rate": 9.238764890290585e-07, "loss": 2.0594, "step": 25981 }, { "epoch": 0.86, "grad_norm": 0.7269222140312195, "learning_rate": 9.234303279941837e-07, "loss": 2.0494, "step": 25982 }, { "epoch": 0.86, "grad_norm": 0.7612643837928772, "learning_rate": 9.229842695010749e-07, "loss": 2.0512, "step": 25983 }, { "epoch": 0.86, "grad_norm": 0.7487745881080627, "learning_rate": 9.225383135547683e-07, "loss": 2.0949, "step": 25984 }, { "epoch": 0.86, "grad_norm": 0.7577506303787231, "learning_rate": 9.220924601603065e-07, "loss": 2.0275, "step": 25985 }, { "epoch": 0.86, "grad_norm": 0.7573544383049011, "learning_rate": 9.216467093227233e-07, "loss": 2.0131, "step": 25986 }, { "epoch": 0.86, "grad_norm": 0.7388461232185364, "learning_rate": 9.212010610470534e-07, "loss": 2.1298, "step": 25987 }, { "epoch": 0.86, "grad_norm": 0.7494735717773438, "learning_rate": 9.207555153383329e-07, "loss": 2.0481, "step": 25988 }, { "epoch": 0.86, "grad_norm": 0.7471747994422913, "learning_rate": 9.203100722015979e-07, "loss": 2.0191, "step": 25989 }, { "epoch": 0.86, "grad_norm": 0.7673196792602539, "learning_rate": 9.198647316418785e-07, "loss": 2.1281, "step": 25990 }, { "epoch": 0.86, "grad_norm": 0.7598997950553894, "learning_rate": 9.194194936642042e-07, "loss": 1.991, "step": 25991 }, { "epoch": 0.86, "grad_norm": 0.7313405871391296, "learning_rate": 9.189743582736077e-07, "loss": 2.0613, "step": 25992 }, { "epoch": 0.86, "grad_norm": 0.7763650417327881, "learning_rate": 9.185293254751182e-07, "loss": 2.035, "step": 25993 }, { "epoch": 0.86, "grad_norm": 0.7444823384284973, "learning_rate": 9.180843952737594e-07, "loss": 2.0085, "step": 25994 }, { "epoch": 0.86, "grad_norm": 0.7560100555419922, "learning_rate": 9.176395676745608e-07, "loss": 1.9957, "step": 25995 }, { "epoch": 0.86, "grad_norm": 0.7714390158653259, "learning_rate": 9.171948426825494e-07, "loss": 2.0559, "step": 25996 }, { "epoch": 0.86, "grad_norm": 0.7365151643753052, "learning_rate": 9.167502203027457e-07, "loss": 2.0816, "step": 25997 }, { "epoch": 0.86, "grad_norm": 0.783757209777832, "learning_rate": 9.163057005401766e-07, "loss": 2.0629, "step": 25998 }, { "epoch": 0.86, "grad_norm": 0.7707266211509705, "learning_rate": 9.158612833998614e-07, "loss": 1.9864, "step": 25999 }, { "epoch": 0.87, "grad_norm": 0.7445627450942993, "learning_rate": 9.154169688868208e-07, "loss": 2.0197, "step": 26000 }, { "epoch": 0.87, "grad_norm": 0.7357868552207947, "learning_rate": 9.149727570060751e-07, "loss": 1.9896, "step": 26001 }, { "epoch": 0.87, "grad_norm": 0.7368590235710144, "learning_rate": 9.145286477626458e-07, "loss": 2.0328, "step": 26002 }, { "epoch": 0.87, "grad_norm": 0.7526722550392151, "learning_rate": 9.140846411615445e-07, "loss": 2.011, "step": 26003 }, { "epoch": 0.87, "grad_norm": 0.7341996431350708, "learning_rate": 9.136407372077894e-07, "loss": 2.0538, "step": 26004 }, { "epoch": 0.87, "grad_norm": 0.7241879105567932, "learning_rate": 9.131969359063986e-07, "loss": 2.0562, "step": 26005 }, { "epoch": 0.87, "grad_norm": 0.7429711222648621, "learning_rate": 9.127532372623804e-07, "loss": 2.0869, "step": 26006 }, { "epoch": 0.87, "grad_norm": 0.7392231225967407, "learning_rate": 9.123096412807531e-07, "loss": 2.0512, "step": 26007 }, { "epoch": 0.87, "grad_norm": 0.8000741600990295, "learning_rate": 9.118661479665258e-07, "loss": 2.0234, "step": 26008 }, { "epoch": 0.87, "grad_norm": 0.7803536653518677, "learning_rate": 9.114227573247059e-07, "loss": 2.0425, "step": 26009 }, { "epoch": 0.87, "grad_norm": 0.7460272908210754, "learning_rate": 9.109794693603058e-07, "loss": 2.0104, "step": 26010 }, { "epoch": 0.87, "grad_norm": 0.7896559834480286, "learning_rate": 9.105362840783349e-07, "loss": 2.0597, "step": 26011 }, { "epoch": 0.87, "grad_norm": 0.7576748728752136, "learning_rate": 9.100932014837982e-07, "loss": 2.07, "step": 26012 }, { "epoch": 0.87, "grad_norm": 0.765677809715271, "learning_rate": 9.096502215816994e-07, "loss": 1.9985, "step": 26013 }, { "epoch": 0.87, "grad_norm": 0.7342962026596069, "learning_rate": 9.092073443770466e-07, "loss": 2.01, "step": 26014 }, { "epoch": 0.87, "grad_norm": 0.7366447448730469, "learning_rate": 9.087645698748393e-07, "loss": 2.0426, "step": 26015 }, { "epoch": 0.87, "grad_norm": 0.7369386553764343, "learning_rate": 9.083218980800845e-07, "loss": 2.0283, "step": 26016 }, { "epoch": 0.87, "grad_norm": 0.7423820495605469, "learning_rate": 9.078793289977783e-07, "loss": 2.0624, "step": 26017 }, { "epoch": 0.87, "grad_norm": 0.7445455193519592, "learning_rate": 9.074368626329255e-07, "loss": 2.0795, "step": 26018 }, { "epoch": 0.87, "grad_norm": 0.7274477481842041, "learning_rate": 9.069944989905199e-07, "loss": 2.1314, "step": 26019 }, { "epoch": 0.87, "grad_norm": 0.7674857378005981, "learning_rate": 9.065522380755632e-07, "loss": 2.0815, "step": 26020 }, { "epoch": 0.87, "grad_norm": 0.7458963990211487, "learning_rate": 9.0611007989305e-07, "loss": 2.0525, "step": 26021 }, { "epoch": 0.87, "grad_norm": 0.7949268817901611, "learning_rate": 9.056680244479732e-07, "loss": 2.0891, "step": 26022 }, { "epoch": 0.87, "grad_norm": 0.7394000887870789, "learning_rate": 9.052260717453299e-07, "loss": 2.0723, "step": 26023 }, { "epoch": 0.87, "grad_norm": 0.746723473072052, "learning_rate": 9.04784221790116e-07, "loss": 2.0797, "step": 26024 }, { "epoch": 0.87, "grad_norm": 0.7523587942123413, "learning_rate": 9.043424745873164e-07, "loss": 1.9896, "step": 26025 }, { "epoch": 0.87, "grad_norm": 0.7480191588401794, "learning_rate": 9.03900830141925e-07, "loss": 2.0271, "step": 26026 }, { "epoch": 0.87, "grad_norm": 0.7278357148170471, "learning_rate": 9.034592884589321e-07, "loss": 2.0142, "step": 26027 }, { "epoch": 0.87, "grad_norm": 0.7568182945251465, "learning_rate": 9.030178495433239e-07, "loss": 2.0695, "step": 26028 }, { "epoch": 0.87, "grad_norm": 0.7750005125999451, "learning_rate": 9.025765134000896e-07, "loss": 2.043, "step": 26029 }, { "epoch": 0.87, "grad_norm": 0.7172654867172241, "learning_rate": 9.021352800342153e-07, "loss": 2.0281, "step": 26030 }, { "epoch": 0.87, "grad_norm": 0.7657871246337891, "learning_rate": 9.016941494506826e-07, "loss": 2.0212, "step": 26031 }, { "epoch": 0.87, "grad_norm": 0.7468778491020203, "learning_rate": 9.012531216544773e-07, "loss": 2.0757, "step": 26032 }, { "epoch": 0.87, "grad_norm": 0.7230513095855713, "learning_rate": 9.008121966505834e-07, "loss": 2.0772, "step": 26033 }, { "epoch": 0.87, "grad_norm": 0.7486144304275513, "learning_rate": 9.003713744439802e-07, "loss": 1.9658, "step": 26034 }, { "epoch": 0.87, "grad_norm": 0.751187801361084, "learning_rate": 8.99930655039648e-07, "loss": 2.0811, "step": 26035 }, { "epoch": 0.87, "grad_norm": 0.7990497350692749, "learning_rate": 8.994900384425665e-07, "loss": 2.0311, "step": 26036 }, { "epoch": 0.87, "grad_norm": 0.748857319355011, "learning_rate": 8.990495246577125e-07, "loss": 2.0092, "step": 26037 }, { "epoch": 0.87, "grad_norm": 0.7557612657546997, "learning_rate": 8.986091136900643e-07, "loss": 2.0454, "step": 26038 }, { "epoch": 0.87, "grad_norm": 0.7426319718360901, "learning_rate": 8.981688055445947e-07, "loss": 2.0857, "step": 26039 }, { "epoch": 0.87, "grad_norm": 0.744658887386322, "learning_rate": 8.977286002262808e-07, "loss": 2.0048, "step": 26040 }, { "epoch": 0.87, "grad_norm": 0.7669386863708496, "learning_rate": 8.972884977400941e-07, "loss": 2.0881, "step": 26041 }, { "epoch": 0.87, "grad_norm": 0.7294650673866272, "learning_rate": 8.968484980910086e-07, "loss": 1.9825, "step": 26042 }, { "epoch": 0.87, "grad_norm": 0.7116085886955261, "learning_rate": 8.964086012839934e-07, "loss": 2.0087, "step": 26043 }, { "epoch": 0.87, "grad_norm": 0.7089520692825317, "learning_rate": 8.959688073240169e-07, "loss": 1.9918, "step": 26044 }, { "epoch": 0.87, "grad_norm": 0.7657942175865173, "learning_rate": 8.955291162160507e-07, "loss": 2.0619, "step": 26045 }, { "epoch": 0.87, "grad_norm": 0.7596400380134583, "learning_rate": 8.950895279650607e-07, "loss": 2.126, "step": 26046 }, { "epoch": 0.87, "grad_norm": 0.7539043426513672, "learning_rate": 8.946500425760141e-07, "loss": 2.1561, "step": 26047 }, { "epoch": 0.87, "grad_norm": 0.7769302129745483, "learning_rate": 8.942106600538736e-07, "loss": 2.1091, "step": 26048 }, { "epoch": 0.87, "grad_norm": 0.7355259656906128, "learning_rate": 8.937713804036052e-07, "loss": 1.9492, "step": 26049 }, { "epoch": 0.87, "grad_norm": 0.7223173379898071, "learning_rate": 8.933322036301706e-07, "loss": 2.0049, "step": 26050 }, { "epoch": 0.87, "grad_norm": 0.7520372271537781, "learning_rate": 8.928931297385324e-07, "loss": 2.0237, "step": 26051 }, { "epoch": 0.87, "grad_norm": 0.7419622540473938, "learning_rate": 8.92454158733651e-07, "loss": 2.0397, "step": 26052 }, { "epoch": 0.87, "grad_norm": 0.750339150428772, "learning_rate": 8.920152906204826e-07, "loss": 1.9908, "step": 26053 }, { "epoch": 0.87, "grad_norm": 0.7568366527557373, "learning_rate": 8.915765254039888e-07, "loss": 2.1255, "step": 26054 }, { "epoch": 0.87, "grad_norm": 0.7394478917121887, "learning_rate": 8.911378630891266e-07, "loss": 1.9806, "step": 26055 }, { "epoch": 0.87, "grad_norm": 0.7275696396827698, "learning_rate": 8.906993036808498e-07, "loss": 2.0212, "step": 26056 }, { "epoch": 0.87, "grad_norm": 0.7336542010307312, "learning_rate": 8.902608471841123e-07, "loss": 1.999, "step": 26057 }, { "epoch": 0.87, "grad_norm": 0.7188431024551392, "learning_rate": 8.898224936038691e-07, "loss": 2.0794, "step": 26058 }, { "epoch": 0.87, "grad_norm": 0.7493748664855957, "learning_rate": 8.893842429450739e-07, "loss": 2.0353, "step": 26059 }, { "epoch": 0.87, "grad_norm": 0.7361557483673096, "learning_rate": 8.889460952126761e-07, "loss": 2.0286, "step": 26060 }, { "epoch": 0.87, "grad_norm": 0.7839362025260925, "learning_rate": 8.885080504116239e-07, "loss": 2.0065, "step": 26061 }, { "epoch": 0.87, "grad_norm": 0.7731884121894836, "learning_rate": 8.880701085468701e-07, "loss": 2.0355, "step": 26062 }, { "epoch": 0.87, "grad_norm": 0.7372866868972778, "learning_rate": 8.876322696233574e-07, "loss": 2.0836, "step": 26063 }, { "epoch": 0.87, "grad_norm": 0.7773445248603821, "learning_rate": 8.871945336460375e-07, "loss": 2.0786, "step": 26064 }, { "epoch": 0.87, "grad_norm": 0.7269904613494873, "learning_rate": 8.867569006198528e-07, "loss": 1.9948, "step": 26065 }, { "epoch": 0.87, "grad_norm": 0.764481782913208, "learning_rate": 8.863193705497464e-07, "loss": 2.0884, "step": 26066 }, { "epoch": 0.87, "grad_norm": 0.73249351978302, "learning_rate": 8.85881943440663e-07, "loss": 2.0115, "step": 26067 }, { "epoch": 0.87, "grad_norm": 0.7192801833152771, "learning_rate": 8.854446192975441e-07, "loss": 2.0556, "step": 26068 }, { "epoch": 0.87, "grad_norm": 0.7057031989097595, "learning_rate": 8.850073981253315e-07, "loss": 1.9782, "step": 26069 }, { "epoch": 0.87, "grad_norm": 0.7374588251113892, "learning_rate": 8.845702799289613e-07, "loss": 1.9507, "step": 26070 }, { "epoch": 0.87, "grad_norm": 0.733747661113739, "learning_rate": 8.84133264713376e-07, "loss": 2.0647, "step": 26071 }, { "epoch": 0.87, "grad_norm": 0.7654053568840027, "learning_rate": 8.836963524835085e-07, "loss": 2.0836, "step": 26072 }, { "epoch": 0.87, "grad_norm": 0.7582880258560181, "learning_rate": 8.832595432442992e-07, "loss": 2.0838, "step": 26073 }, { "epoch": 0.87, "grad_norm": 0.754362165927887, "learning_rate": 8.828228370006786e-07, "loss": 1.9989, "step": 26074 }, { "epoch": 0.87, "grad_norm": 0.7118877172470093, "learning_rate": 8.823862337575839e-07, "loss": 2.012, "step": 26075 }, { "epoch": 0.87, "grad_norm": 0.7350810170173645, "learning_rate": 8.819497335199445e-07, "loss": 2.0774, "step": 26076 }, { "epoch": 0.87, "grad_norm": 0.7620537281036377, "learning_rate": 8.815133362926953e-07, "loss": 1.9649, "step": 26077 }, { "epoch": 0.87, "grad_norm": 0.7511849403381348, "learning_rate": 8.810770420807647e-07, "loss": 2.0153, "step": 26078 }, { "epoch": 0.87, "grad_norm": 0.7317536473274231, "learning_rate": 8.806408508890796e-07, "loss": 1.9758, "step": 26079 }, { "epoch": 0.87, "grad_norm": 0.7334953546524048, "learning_rate": 8.802047627225685e-07, "loss": 2.0397, "step": 26080 }, { "epoch": 0.87, "grad_norm": 0.7556837201118469, "learning_rate": 8.797687775861619e-07, "loss": 2.0181, "step": 26081 }, { "epoch": 0.87, "grad_norm": 0.7242208123207092, "learning_rate": 8.793328954847835e-07, "loss": 1.9877, "step": 26082 }, { "epoch": 0.87, "grad_norm": 0.7341518402099609, "learning_rate": 8.788971164233539e-07, "loss": 2.0684, "step": 26083 }, { "epoch": 0.87, "grad_norm": 0.7540560364723206, "learning_rate": 8.784614404068015e-07, "loss": 2.0349, "step": 26084 }, { "epoch": 0.87, "grad_norm": 0.7437508702278137, "learning_rate": 8.780258674400433e-07, "loss": 1.9549, "step": 26085 }, { "epoch": 0.87, "grad_norm": 0.7480018138885498, "learning_rate": 8.775903975280054e-07, "loss": 2.0204, "step": 26086 }, { "epoch": 0.87, "grad_norm": 0.7488433122634888, "learning_rate": 8.771550306756049e-07, "loss": 2.1124, "step": 26087 }, { "epoch": 0.87, "grad_norm": 0.7354269623756409, "learning_rate": 8.767197668877592e-07, "loss": 2.0443, "step": 26088 }, { "epoch": 0.87, "grad_norm": 0.7319847941398621, "learning_rate": 8.762846061693852e-07, "loss": 2.0784, "step": 26089 }, { "epoch": 0.87, "grad_norm": 0.7533643245697021, "learning_rate": 8.758495485254037e-07, "loss": 2.0749, "step": 26090 }, { "epoch": 0.87, "grad_norm": 0.7987291812896729, "learning_rate": 8.754145939607262e-07, "loss": 2.1008, "step": 26091 }, { "epoch": 0.87, "grad_norm": 0.7291937470436096, "learning_rate": 8.749797424802664e-07, "loss": 2.0296, "step": 26092 }, { "epoch": 0.87, "grad_norm": 0.7204707860946655, "learning_rate": 8.745449940889384e-07, "loss": 2.0139, "step": 26093 }, { "epoch": 0.87, "grad_norm": 0.7761337161064148, "learning_rate": 8.741103487916514e-07, "loss": 2.0459, "step": 26094 }, { "epoch": 0.87, "grad_norm": 0.7239903211593628, "learning_rate": 8.736758065933193e-07, "loss": 1.9895, "step": 26095 }, { "epoch": 0.87, "grad_norm": 0.7312216758728027, "learning_rate": 8.732413674988471e-07, "loss": 1.9608, "step": 26096 }, { "epoch": 0.87, "grad_norm": 0.7601781487464905, "learning_rate": 8.728070315131476e-07, "loss": 2.0202, "step": 26097 }, { "epoch": 0.87, "grad_norm": 0.7548706531524658, "learning_rate": 8.723727986411235e-07, "loss": 2.0258, "step": 26098 }, { "epoch": 0.87, "grad_norm": 0.7665787935256958, "learning_rate": 8.71938668887683e-07, "loss": 2.1145, "step": 26099 }, { "epoch": 0.87, "grad_norm": 0.7559893131256104, "learning_rate": 8.715046422577311e-07, "loss": 2.0669, "step": 26100 }, { "epoch": 0.87, "grad_norm": 0.7597693204879761, "learning_rate": 8.710707187561674e-07, "loss": 2.0784, "step": 26101 }, { "epoch": 0.87, "grad_norm": 0.761865496635437, "learning_rate": 8.706368983878965e-07, "loss": 2.0152, "step": 26102 }, { "epoch": 0.87, "grad_norm": 0.7484170794487, "learning_rate": 8.702031811578216e-07, "loss": 2.0396, "step": 26103 }, { "epoch": 0.87, "grad_norm": 0.7326580882072449, "learning_rate": 8.697695670708406e-07, "loss": 2.0098, "step": 26104 }, { "epoch": 0.87, "grad_norm": 0.7145528793334961, "learning_rate": 8.693360561318509e-07, "loss": 2.0391, "step": 26105 }, { "epoch": 0.87, "grad_norm": 0.7399336099624634, "learning_rate": 8.689026483457519e-07, "loss": 2.0888, "step": 26106 }, { "epoch": 0.87, "grad_norm": 0.750868558883667, "learning_rate": 8.684693437174385e-07, "loss": 2.0159, "step": 26107 }, { "epoch": 0.87, "grad_norm": 0.7159035205841064, "learning_rate": 8.680361422518091e-07, "loss": 2.01, "step": 26108 }, { "epoch": 0.87, "grad_norm": 0.7498615980148315, "learning_rate": 8.676030439537542e-07, "loss": 1.9582, "step": 26109 }, { "epoch": 0.87, "grad_norm": 0.7738543748855591, "learning_rate": 8.671700488281675e-07, "loss": 2.0222, "step": 26110 }, { "epoch": 0.87, "grad_norm": 0.7434841394424438, "learning_rate": 8.667371568799399e-07, "loss": 2.04, "step": 26111 }, { "epoch": 0.87, "grad_norm": 0.7654726505279541, "learning_rate": 8.66304368113966e-07, "loss": 2.0962, "step": 26112 }, { "epoch": 0.87, "grad_norm": 0.7408851981163025, "learning_rate": 8.658716825351332e-07, "loss": 1.9954, "step": 26113 }, { "epoch": 0.87, "grad_norm": 0.7858261466026306, "learning_rate": 8.654391001483253e-07, "loss": 2.0439, "step": 26114 }, { "epoch": 0.87, "grad_norm": 0.7424229979515076, "learning_rate": 8.650066209584363e-07, "loss": 2.0303, "step": 26115 }, { "epoch": 0.87, "grad_norm": 0.7670367956161499, "learning_rate": 8.645742449703464e-07, "loss": 2.0298, "step": 26116 }, { "epoch": 0.87, "grad_norm": 0.7460410594940186, "learning_rate": 8.641419721889454e-07, "loss": 2.0373, "step": 26117 }, { "epoch": 0.87, "grad_norm": 0.7535685300827026, "learning_rate": 8.637098026191115e-07, "loss": 2.0677, "step": 26118 }, { "epoch": 0.87, "grad_norm": 0.7521857619285583, "learning_rate": 8.632777362657319e-07, "loss": 1.9949, "step": 26119 }, { "epoch": 0.87, "grad_norm": 0.7349094152450562, "learning_rate": 8.628457731336848e-07, "loss": 2.0223, "step": 26120 }, { "epoch": 0.87, "grad_norm": 0.7516452074050903, "learning_rate": 8.624139132278519e-07, "loss": 2.0757, "step": 26121 }, { "epoch": 0.87, "grad_norm": 0.733060359954834, "learning_rate": 8.619821565531128e-07, "loss": 1.9759, "step": 26122 }, { "epoch": 0.87, "grad_norm": 0.7503489255905151, "learning_rate": 8.615505031143411e-07, "loss": 2.0838, "step": 26123 }, { "epoch": 0.87, "grad_norm": 0.7479836344718933, "learning_rate": 8.611189529164165e-07, "loss": 2.0401, "step": 26124 }, { "epoch": 0.87, "grad_norm": 0.7196901440620422, "learning_rate": 8.60687505964215e-07, "loss": 2.0121, "step": 26125 }, { "epoch": 0.87, "grad_norm": 0.7345967292785645, "learning_rate": 8.602561622626104e-07, "loss": 2.007, "step": 26126 }, { "epoch": 0.87, "grad_norm": 0.738929033279419, "learning_rate": 8.598249218164745e-07, "loss": 1.9625, "step": 26127 }, { "epoch": 0.87, "grad_norm": 0.7429989576339722, "learning_rate": 8.593937846306799e-07, "loss": 2.0564, "step": 26128 }, { "epoch": 0.87, "grad_norm": 0.7296868562698364, "learning_rate": 8.589627507100973e-07, "loss": 1.9579, "step": 26129 }, { "epoch": 0.87, "grad_norm": 0.7939447164535522, "learning_rate": 8.58531820059596e-07, "loss": 2.0316, "step": 26130 }, { "epoch": 0.87, "grad_norm": 0.7418543696403503, "learning_rate": 8.581009926840478e-07, "loss": 2.004, "step": 26131 }, { "epoch": 0.87, "grad_norm": 0.7300820350646973, "learning_rate": 8.576702685883132e-07, "loss": 2.0828, "step": 26132 }, { "epoch": 0.87, "grad_norm": 0.7351304292678833, "learning_rate": 8.572396477772627e-07, "loss": 2.0364, "step": 26133 }, { "epoch": 0.87, "grad_norm": 0.7615849375724792, "learning_rate": 8.568091302557613e-07, "loss": 2.0511, "step": 26134 }, { "epoch": 0.87, "grad_norm": 0.7370851039886475, "learning_rate": 8.56378716028673e-07, "loss": 2.0435, "step": 26135 }, { "epoch": 0.87, "grad_norm": 0.7111318707466125, "learning_rate": 8.559484051008571e-07, "loss": 2.0558, "step": 26136 }, { "epoch": 0.87, "grad_norm": 0.7329342365264893, "learning_rate": 8.555181974771787e-07, "loss": 2.0508, "step": 26137 }, { "epoch": 0.87, "grad_norm": 0.7335216999053955, "learning_rate": 8.55088093162495e-07, "loss": 2.0649, "step": 26138 }, { "epoch": 0.87, "grad_norm": 0.7488418817520142, "learning_rate": 8.546580921616676e-07, "loss": 2.0519, "step": 26139 }, { "epoch": 0.87, "grad_norm": 0.7692394852638245, "learning_rate": 8.542281944795528e-07, "loss": 2.0899, "step": 26140 }, { "epoch": 0.87, "grad_norm": 0.7547497153282166, "learning_rate": 8.537984001210087e-07, "loss": 2.0866, "step": 26141 }, { "epoch": 0.87, "grad_norm": 0.7116230726242065, "learning_rate": 8.533687090908893e-07, "loss": 2.0333, "step": 26142 }, { "epoch": 0.87, "grad_norm": 0.7162382006645203, "learning_rate": 8.529391213940508e-07, "loss": 1.9313, "step": 26143 }, { "epoch": 0.87, "grad_norm": 0.7477768659591675, "learning_rate": 8.525096370353458e-07, "loss": 2.0117, "step": 26144 }, { "epoch": 0.87, "grad_norm": 0.7796523571014404, "learning_rate": 8.52080256019624e-07, "loss": 2.0074, "step": 26145 }, { "epoch": 0.87, "grad_norm": 0.7533775568008423, "learning_rate": 8.516509783517379e-07, "loss": 2.0713, "step": 26146 }, { "epoch": 0.87, "grad_norm": 0.7249441742897034, "learning_rate": 8.512218040365394e-07, "loss": 2.0241, "step": 26147 }, { "epoch": 0.87, "grad_norm": 0.7516166567802429, "learning_rate": 8.507927330788757e-07, "loss": 2.0644, "step": 26148 }, { "epoch": 0.87, "grad_norm": 0.7337638139724731, "learning_rate": 8.503637654835916e-07, "loss": 2.0677, "step": 26149 }, { "epoch": 0.87, "grad_norm": 0.7596551775932312, "learning_rate": 8.499349012555381e-07, "loss": 2.0866, "step": 26150 }, { "epoch": 0.87, "grad_norm": 0.7552168965339661, "learning_rate": 8.495061403995553e-07, "loss": 2.0333, "step": 26151 }, { "epoch": 0.87, "grad_norm": 0.7323265671730042, "learning_rate": 8.490774829204896e-07, "loss": 2.0358, "step": 26152 }, { "epoch": 0.87, "grad_norm": 0.7373947501182556, "learning_rate": 8.486489288231858e-07, "loss": 2.009, "step": 26153 }, { "epoch": 0.87, "grad_norm": 0.729453980922699, "learning_rate": 8.482204781124815e-07, "loss": 2.0263, "step": 26154 }, { "epoch": 0.87, "grad_norm": 0.7041894197463989, "learning_rate": 8.47792130793218e-07, "loss": 1.9992, "step": 26155 }, { "epoch": 0.87, "grad_norm": 0.7723562717437744, "learning_rate": 8.47363886870236e-07, "loss": 2.0725, "step": 26156 }, { "epoch": 0.87, "grad_norm": 0.7094305753707886, "learning_rate": 8.46935746348374e-07, "loss": 2.0232, "step": 26157 }, { "epoch": 0.87, "grad_norm": 0.7519320249557495, "learning_rate": 8.465077092324658e-07, "loss": 2.0903, "step": 26158 }, { "epoch": 0.87, "grad_norm": 0.7269750833511353, "learning_rate": 8.460797755273487e-07, "loss": 2.0108, "step": 26159 }, { "epoch": 0.87, "grad_norm": 0.7357923984527588, "learning_rate": 8.456519452378597e-07, "loss": 1.9933, "step": 26160 }, { "epoch": 0.87, "grad_norm": 0.7666115164756775, "learning_rate": 8.452242183688286e-07, "loss": 2.0505, "step": 26161 }, { "epoch": 0.87, "grad_norm": 0.7499266862869263, "learning_rate": 8.447965949250903e-07, "loss": 2.0694, "step": 26162 }, { "epoch": 0.87, "grad_norm": 0.739158570766449, "learning_rate": 8.443690749114741e-07, "loss": 2.0768, "step": 26163 }, { "epoch": 0.87, "grad_norm": 0.7603508234024048, "learning_rate": 8.439416583328097e-07, "loss": 2.081, "step": 26164 }, { "epoch": 0.87, "grad_norm": 0.7535427808761597, "learning_rate": 8.435143451939265e-07, "loss": 2.0125, "step": 26165 }, { "epoch": 0.87, "grad_norm": 0.7088193297386169, "learning_rate": 8.43087135499655e-07, "loss": 2.0141, "step": 26166 }, { "epoch": 0.87, "grad_norm": 0.76165372133255, "learning_rate": 8.426600292548148e-07, "loss": 2.0372, "step": 26167 }, { "epoch": 0.87, "grad_norm": 0.7223299741744995, "learning_rate": 8.422330264642354e-07, "loss": 2.0241, "step": 26168 }, { "epoch": 0.87, "grad_norm": 0.7322620153427124, "learning_rate": 8.418061271327415e-07, "loss": 1.9941, "step": 26169 }, { "epoch": 0.87, "grad_norm": 0.7736354470252991, "learning_rate": 8.413793312651541e-07, "loss": 2.0472, "step": 26170 }, { "epoch": 0.87, "grad_norm": 0.7302213311195374, "learning_rate": 8.409526388662947e-07, "loss": 2.0829, "step": 26171 }, { "epoch": 0.87, "grad_norm": 0.7282377481460571, "learning_rate": 8.405260499409873e-07, "loss": 2.0153, "step": 26172 }, { "epoch": 0.87, "grad_norm": 0.7290664911270142, "learning_rate": 8.400995644940457e-07, "loss": 2.0074, "step": 26173 }, { "epoch": 0.87, "grad_norm": 0.7530043721199036, "learning_rate": 8.396731825302906e-07, "loss": 2.0752, "step": 26174 }, { "epoch": 0.87, "grad_norm": 0.7531362175941467, "learning_rate": 8.392469040545426e-07, "loss": 2.0102, "step": 26175 }, { "epoch": 0.87, "grad_norm": 0.7369971871376038, "learning_rate": 8.388207290716133e-07, "loss": 2.0001, "step": 26176 }, { "epoch": 0.87, "grad_norm": 0.7616905570030212, "learning_rate": 8.383946575863166e-07, "loss": 2.0335, "step": 26177 }, { "epoch": 0.87, "grad_norm": 0.7397873401641846, "learning_rate": 8.3796868960347e-07, "loss": 2.0452, "step": 26178 }, { "epoch": 0.87, "grad_norm": 0.735589325428009, "learning_rate": 8.375428251278839e-07, "loss": 2.0234, "step": 26179 }, { "epoch": 0.87, "grad_norm": 0.7499776482582092, "learning_rate": 8.371170641643667e-07, "loss": 1.9932, "step": 26180 }, { "epoch": 0.87, "grad_norm": 0.7413488626480103, "learning_rate": 8.366914067177312e-07, "loss": 1.9895, "step": 26181 }, { "epoch": 0.87, "grad_norm": 0.7432525157928467, "learning_rate": 8.362658527927881e-07, "loss": 2.0616, "step": 26182 }, { "epoch": 0.87, "grad_norm": 0.7550211548805237, "learning_rate": 8.358404023943412e-07, "loss": 1.9723, "step": 26183 }, { "epoch": 0.87, "grad_norm": 0.7545579075813293, "learning_rate": 8.354150555272e-07, "loss": 2.1081, "step": 26184 }, { "epoch": 0.87, "grad_norm": 0.7402257323265076, "learning_rate": 8.349898121961686e-07, "loss": 2.0711, "step": 26185 }, { "epoch": 0.87, "grad_norm": 0.7684229612350464, "learning_rate": 8.345646724060497e-07, "loss": 2.0657, "step": 26186 }, { "epoch": 0.87, "grad_norm": 0.7262281179428101, "learning_rate": 8.341396361616472e-07, "loss": 2.0329, "step": 26187 }, { "epoch": 0.87, "grad_norm": 0.7382645606994629, "learning_rate": 8.337147034677673e-07, "loss": 2.0115, "step": 26188 }, { "epoch": 0.87, "grad_norm": 0.7389950752258301, "learning_rate": 8.332898743292028e-07, "loss": 2.0129, "step": 26189 }, { "epoch": 0.87, "grad_norm": 0.7545232772827148, "learning_rate": 8.328651487507577e-07, "loss": 2.0219, "step": 26190 }, { "epoch": 0.87, "grad_norm": 0.7239479422569275, "learning_rate": 8.324405267372304e-07, "loss": 2.0535, "step": 26191 }, { "epoch": 0.87, "grad_norm": 0.7266597151756287, "learning_rate": 8.32016008293417e-07, "loss": 1.9762, "step": 26192 }, { "epoch": 0.87, "grad_norm": 0.7659448981285095, "learning_rate": 8.315915934241148e-07, "loss": 2.0832, "step": 26193 }, { "epoch": 0.87, "grad_norm": 0.730658769607544, "learning_rate": 8.311672821341165e-07, "loss": 2.0987, "step": 26194 }, { "epoch": 0.87, "grad_norm": 0.716567873954773, "learning_rate": 8.307430744282164e-07, "loss": 2.0384, "step": 26195 }, { "epoch": 0.87, "grad_norm": 0.754006564617157, "learning_rate": 8.303189703112069e-07, "loss": 2.0655, "step": 26196 }, { "epoch": 0.87, "grad_norm": 0.7325912117958069, "learning_rate": 8.298949697878811e-07, "loss": 2.0416, "step": 26197 }, { "epoch": 0.87, "grad_norm": 0.7155423760414124, "learning_rate": 8.294710728630284e-07, "loss": 2.034, "step": 26198 }, { "epoch": 0.87, "grad_norm": 0.7548443078994751, "learning_rate": 8.29047279541435e-07, "loss": 2.0181, "step": 26199 }, { "epoch": 0.87, "grad_norm": 0.7647164463996887, "learning_rate": 8.286235898278927e-07, "loss": 2.0371, "step": 26200 }, { "epoch": 0.87, "grad_norm": 0.7409639358520508, "learning_rate": 8.282000037271864e-07, "loss": 2.0341, "step": 26201 }, { "epoch": 0.87, "grad_norm": 0.7623330950737, "learning_rate": 8.277765212440981e-07, "loss": 2.0262, "step": 26202 }, { "epoch": 0.87, "grad_norm": 0.7453184127807617, "learning_rate": 8.27353142383417e-07, "loss": 2.0066, "step": 26203 }, { "epoch": 0.87, "grad_norm": 0.7607576251029968, "learning_rate": 8.269298671499248e-07, "loss": 2.0288, "step": 26204 }, { "epoch": 0.87, "grad_norm": 0.7142048478126526, "learning_rate": 8.265066955484013e-07, "loss": 2.0262, "step": 26205 }, { "epoch": 0.87, "grad_norm": 0.7428783178329468, "learning_rate": 8.260836275836315e-07, "loss": 2.0931, "step": 26206 }, { "epoch": 0.87, "grad_norm": 0.7461102604866028, "learning_rate": 8.256606632603926e-07, "loss": 1.973, "step": 26207 }, { "epoch": 0.87, "grad_norm": 0.7251810431480408, "learning_rate": 8.252378025834606e-07, "loss": 2.0485, "step": 26208 }, { "epoch": 0.87, "grad_norm": 0.7395917773246765, "learning_rate": 8.248150455576143e-07, "loss": 2.0417, "step": 26209 }, { "epoch": 0.87, "grad_norm": 0.7333256006240845, "learning_rate": 8.243923921876351e-07, "loss": 2.0379, "step": 26210 }, { "epoch": 0.87, "grad_norm": 0.7454107999801636, "learning_rate": 8.239698424782894e-07, "loss": 2.0641, "step": 26211 }, { "epoch": 0.87, "grad_norm": 0.7295187711715698, "learning_rate": 8.235473964343543e-07, "loss": 2.0835, "step": 26212 }, { "epoch": 0.87, "grad_norm": 0.7689849734306335, "learning_rate": 8.23125054060605e-07, "loss": 2.0303, "step": 26213 }, { "epoch": 0.87, "grad_norm": 0.7851645946502686, "learning_rate": 8.227028153618077e-07, "loss": 1.9667, "step": 26214 }, { "epoch": 0.87, "grad_norm": 0.7135246396064758, "learning_rate": 8.222806803427386e-07, "loss": 1.9613, "step": 26215 }, { "epoch": 0.87, "grad_norm": 0.7843337655067444, "learning_rate": 8.218586490081636e-07, "loss": 2.0539, "step": 26216 }, { "epoch": 0.87, "grad_norm": 0.7629027366638184, "learning_rate": 8.214367213628493e-07, "loss": 2.1019, "step": 26217 }, { "epoch": 0.87, "grad_norm": 0.793875515460968, "learning_rate": 8.210148974115628e-07, "loss": 2.0441, "step": 26218 }, { "epoch": 0.87, "grad_norm": 0.7373671531677246, "learning_rate": 8.205931771590725e-07, "loss": 2.0249, "step": 26219 }, { "epoch": 0.87, "grad_norm": 0.7603422403335571, "learning_rate": 8.201715606101413e-07, "loss": 2.0431, "step": 26220 }, { "epoch": 0.87, "grad_norm": 0.7383587956428528, "learning_rate": 8.197500477695297e-07, "loss": 2.0717, "step": 26221 }, { "epoch": 0.87, "grad_norm": 0.722493052482605, "learning_rate": 8.19328638642004e-07, "loss": 2.0437, "step": 26222 }, { "epoch": 0.87, "grad_norm": 0.7490050792694092, "learning_rate": 8.189073332323227e-07, "loss": 2.0308, "step": 26223 }, { "epoch": 0.87, "grad_norm": 0.7656798958778381, "learning_rate": 8.184861315452442e-07, "loss": 2.0752, "step": 26224 }, { "epoch": 0.87, "grad_norm": 0.7712162137031555, "learning_rate": 8.180650335855278e-07, "loss": 2.1078, "step": 26225 }, { "epoch": 0.87, "grad_norm": 0.739031195640564, "learning_rate": 8.176440393579343e-07, "loss": 2.0361, "step": 26226 }, { "epoch": 0.87, "grad_norm": 0.7344566583633423, "learning_rate": 8.172231488672145e-07, "loss": 2.1097, "step": 26227 }, { "epoch": 0.87, "grad_norm": 0.7639145851135254, "learning_rate": 8.168023621181276e-07, "loss": 2.0441, "step": 26228 }, { "epoch": 0.87, "grad_norm": 0.7794142961502075, "learning_rate": 8.163816791154266e-07, "loss": 1.9493, "step": 26229 }, { "epoch": 0.87, "grad_norm": 0.7477810978889465, "learning_rate": 8.159610998638612e-07, "loss": 2.0378, "step": 26230 }, { "epoch": 0.87, "grad_norm": 0.7755833864212036, "learning_rate": 8.15540624368184e-07, "loss": 2.0844, "step": 26231 }, { "epoch": 0.87, "grad_norm": 0.7709144353866577, "learning_rate": 8.151202526331503e-07, "loss": 2.0444, "step": 26232 }, { "epoch": 0.87, "grad_norm": 0.7284353375434875, "learning_rate": 8.146999846635017e-07, "loss": 2.0114, "step": 26233 }, { "epoch": 0.87, "grad_norm": 0.7367143630981445, "learning_rate": 8.142798204639901e-07, "loss": 2.0379, "step": 26234 }, { "epoch": 0.87, "grad_norm": 0.7553098797798157, "learning_rate": 8.138597600393628e-07, "loss": 2.0363, "step": 26235 }, { "epoch": 0.87, "grad_norm": 0.7275480628013611, "learning_rate": 8.134398033943624e-07, "loss": 1.9812, "step": 26236 }, { "epoch": 0.87, "grad_norm": 0.7098502516746521, "learning_rate": 8.130199505337377e-07, "loss": 1.994, "step": 26237 }, { "epoch": 0.87, "grad_norm": 0.7587746381759644, "learning_rate": 8.126002014622292e-07, "loss": 2.0298, "step": 26238 }, { "epoch": 0.87, "grad_norm": 0.7535634636878967, "learning_rate": 8.121805561845775e-07, "loss": 2.01, "step": 26239 }, { "epoch": 0.87, "grad_norm": 0.7612124085426331, "learning_rate": 8.117610147055254e-07, "loss": 2.0403, "step": 26240 }, { "epoch": 0.87, "grad_norm": 0.7397382259368896, "learning_rate": 8.113415770298139e-07, "loss": 2.0126, "step": 26241 }, { "epoch": 0.87, "grad_norm": 0.743455708026886, "learning_rate": 8.1092224316218e-07, "loss": 2.0553, "step": 26242 }, { "epoch": 0.87, "grad_norm": 0.7954873442649841, "learning_rate": 8.105030131073599e-07, "loss": 2.1584, "step": 26243 }, { "epoch": 0.87, "grad_norm": 0.8024573922157288, "learning_rate": 8.100838868700933e-07, "loss": 2.0194, "step": 26244 }, { "epoch": 0.87, "grad_norm": 0.7332099676132202, "learning_rate": 8.096648644551109e-07, "loss": 2.0095, "step": 26245 }, { "epoch": 0.87, "grad_norm": 0.7480826377868652, "learning_rate": 8.092459458671509e-07, "loss": 2.0844, "step": 26246 }, { "epoch": 0.87, "grad_norm": 0.7248606085777283, "learning_rate": 8.088271311109419e-07, "loss": 2.0156, "step": 26247 }, { "epoch": 0.87, "grad_norm": 0.7645732760429382, "learning_rate": 8.08408420191219e-07, "loss": 2.073, "step": 26248 }, { "epoch": 0.87, "grad_norm": 0.737801194190979, "learning_rate": 8.079898131127095e-07, "loss": 2.1071, "step": 26249 }, { "epoch": 0.87, "grad_norm": 0.741977870464325, "learning_rate": 8.075713098801463e-07, "loss": 2.0763, "step": 26250 }, { "epoch": 0.87, "grad_norm": 0.7460431456565857, "learning_rate": 8.071529104982545e-07, "loss": 2.0682, "step": 26251 }, { "epoch": 0.87, "grad_norm": 0.7416695356369019, "learning_rate": 8.067346149717592e-07, "loss": 2.1023, "step": 26252 }, { "epoch": 0.87, "grad_norm": 0.7273126840591431, "learning_rate": 8.063164233053888e-07, "loss": 2.0643, "step": 26253 }, { "epoch": 0.87, "grad_norm": 0.7504833340644836, "learning_rate": 8.058983355038718e-07, "loss": 2.0328, "step": 26254 }, { "epoch": 0.87, "grad_norm": 0.7454068064689636, "learning_rate": 8.054803515719234e-07, "loss": 2.1017, "step": 26255 }, { "epoch": 0.87, "grad_norm": 0.7185887098312378, "learning_rate": 8.050624715142685e-07, "loss": 2.0735, "step": 26256 }, { "epoch": 0.87, "grad_norm": 0.7821540236473083, "learning_rate": 8.046446953356313e-07, "loss": 1.9751, "step": 26257 }, { "epoch": 0.87, "grad_norm": 0.7268336415290833, "learning_rate": 8.042270230407278e-07, "loss": 2.0231, "step": 26258 }, { "epoch": 0.87, "grad_norm": 0.7370143532752991, "learning_rate": 8.038094546342801e-07, "loss": 2.0075, "step": 26259 }, { "epoch": 0.87, "grad_norm": 0.7629903554916382, "learning_rate": 8.033919901210019e-07, "loss": 1.9786, "step": 26260 }, { "epoch": 0.87, "grad_norm": 0.7801638841629028, "learning_rate": 8.029746295056129e-07, "loss": 2.033, "step": 26261 }, { "epoch": 0.87, "grad_norm": 0.7574886083602905, "learning_rate": 8.025573727928238e-07, "loss": 2.0515, "step": 26262 }, { "epoch": 0.87, "grad_norm": 0.7380183935165405, "learning_rate": 8.02140219987354e-07, "loss": 2.0275, "step": 26263 }, { "epoch": 0.87, "grad_norm": 0.7454653978347778, "learning_rate": 8.017231710939133e-07, "loss": 2.0361, "step": 26264 }, { "epoch": 0.87, "grad_norm": 0.7465541958808899, "learning_rate": 8.013062261172122e-07, "loss": 2.0171, "step": 26265 }, { "epoch": 0.87, "grad_norm": 0.7393627762794495, "learning_rate": 8.008893850619615e-07, "loss": 2.0589, "step": 26266 }, { "epoch": 0.87, "grad_norm": 0.7551132440567017, "learning_rate": 8.004726479328739e-07, "loss": 2.0531, "step": 26267 }, { "epoch": 0.87, "grad_norm": 0.7648429870605469, "learning_rate": 8.000560147346547e-07, "loss": 2.0298, "step": 26268 }, { "epoch": 0.87, "grad_norm": 0.7581826448440552, "learning_rate": 7.996394854720091e-07, "loss": 2.0969, "step": 26269 }, { "epoch": 0.87, "grad_norm": 0.7409756779670715, "learning_rate": 7.992230601496465e-07, "loss": 2.0619, "step": 26270 }, { "epoch": 0.87, "grad_norm": 0.7358968257904053, "learning_rate": 7.988067387722675e-07, "loss": 2.0637, "step": 26271 }, { "epoch": 0.87, "grad_norm": 0.7573010921478271, "learning_rate": 7.983905213445798e-07, "loss": 2.0174, "step": 26272 }, { "epoch": 0.87, "grad_norm": 0.7365456223487854, "learning_rate": 7.979744078712826e-07, "loss": 2.0299, "step": 26273 }, { "epoch": 0.87, "grad_norm": 1.1528874635696411, "learning_rate": 7.975583983570768e-07, "loss": 2.0402, "step": 26274 }, { "epoch": 0.87, "grad_norm": 0.7599830031394958, "learning_rate": 7.971424928066618e-07, "loss": 2.0056, "step": 26275 }, { "epoch": 0.87, "grad_norm": 0.7673853039741516, "learning_rate": 7.967266912247395e-07, "loss": 2.0138, "step": 26276 }, { "epoch": 0.87, "grad_norm": 0.7377079725265503, "learning_rate": 7.963109936160063e-07, "loss": 2.0608, "step": 26277 }, { "epoch": 0.87, "grad_norm": 0.7319782376289368, "learning_rate": 7.95895399985156e-07, "loss": 1.9687, "step": 26278 }, { "epoch": 0.87, "grad_norm": 0.7344086170196533, "learning_rate": 7.95479910336886e-07, "loss": 2.0712, "step": 26279 }, { "epoch": 0.87, "grad_norm": 0.7579789161682129, "learning_rate": 7.950645246758881e-07, "loss": 2.0154, "step": 26280 }, { "epoch": 0.87, "grad_norm": 0.7293540239334106, "learning_rate": 7.946492430068586e-07, "loss": 2.1091, "step": 26281 }, { "epoch": 0.87, "grad_norm": 0.7341769337654114, "learning_rate": 7.94234065334486e-07, "loss": 2.086, "step": 26282 }, { "epoch": 0.87, "grad_norm": 0.7867773175239563, "learning_rate": 7.938189916634619e-07, "loss": 2.0153, "step": 26283 }, { "epoch": 0.87, "grad_norm": 0.7890391945838928, "learning_rate": 7.934040219984751e-07, "loss": 2.0293, "step": 26284 }, { "epoch": 0.87, "grad_norm": 0.7641716003417969, "learning_rate": 7.92989156344216e-07, "loss": 2.0952, "step": 26285 }, { "epoch": 0.87, "grad_norm": 0.7397662401199341, "learning_rate": 7.925743947053688e-07, "loss": 2.0082, "step": 26286 }, { "epoch": 0.87, "grad_norm": 0.7331759333610535, "learning_rate": 7.921597370866185e-07, "loss": 2.0325, "step": 26287 }, { "epoch": 0.87, "grad_norm": 0.7580739259719849, "learning_rate": 7.917451834926515e-07, "loss": 1.9758, "step": 26288 }, { "epoch": 0.87, "grad_norm": 0.7553606629371643, "learning_rate": 7.913307339281517e-07, "loss": 2.0906, "step": 26289 }, { "epoch": 0.87, "grad_norm": 0.7575421929359436, "learning_rate": 7.90916388397801e-07, "loss": 2.0479, "step": 26290 }, { "epoch": 0.87, "grad_norm": 0.7442637085914612, "learning_rate": 7.905021469062779e-07, "loss": 2.0806, "step": 26291 }, { "epoch": 0.87, "grad_norm": 0.7745006084442139, "learning_rate": 7.900880094582664e-07, "loss": 2.0739, "step": 26292 }, { "epoch": 0.87, "grad_norm": 0.746653139591217, "learning_rate": 7.896739760584415e-07, "loss": 2.1147, "step": 26293 }, { "epoch": 0.87, "grad_norm": 0.7388312220573425, "learning_rate": 7.89260046711483e-07, "loss": 2.032, "step": 26294 }, { "epoch": 0.87, "grad_norm": 0.7484868168830872, "learning_rate": 7.888462214220671e-07, "loss": 1.9632, "step": 26295 }, { "epoch": 0.87, "grad_norm": 0.758126437664032, "learning_rate": 7.884325001948667e-07, "loss": 2.0639, "step": 26296 }, { "epoch": 0.87, "grad_norm": 0.7262428402900696, "learning_rate": 7.880188830345569e-07, "loss": 2.0655, "step": 26297 }, { "epoch": 0.87, "grad_norm": 0.7202563285827637, "learning_rate": 7.876053699458131e-07, "loss": 2.0383, "step": 26298 }, { "epoch": 0.87, "grad_norm": 0.7157536149024963, "learning_rate": 7.871919609333056e-07, "loss": 1.9861, "step": 26299 }, { "epoch": 0.88, "grad_norm": 0.7857599854469299, "learning_rate": 7.86778656001701e-07, "loss": 2.0625, "step": 26300 }, { "epoch": 0.88, "grad_norm": 0.7309787273406982, "learning_rate": 7.863654551556743e-07, "loss": 2.0554, "step": 26301 }, { "epoch": 0.88, "grad_norm": 0.7530249357223511, "learning_rate": 7.859523583998884e-07, "loss": 2.0996, "step": 26302 }, { "epoch": 0.88, "grad_norm": 0.7495779395103455, "learning_rate": 7.855393657390154e-07, "loss": 1.9862, "step": 26303 }, { "epoch": 0.88, "grad_norm": 0.7540934681892395, "learning_rate": 7.851264771777167e-07, "loss": 2.0219, "step": 26304 }, { "epoch": 0.88, "grad_norm": 0.7269781231880188, "learning_rate": 7.8471369272066e-07, "loss": 2.0525, "step": 26305 }, { "epoch": 0.88, "grad_norm": 0.7454487681388855, "learning_rate": 7.843010123725048e-07, "loss": 2.1233, "step": 26306 }, { "epoch": 0.88, "grad_norm": 0.7507216930389404, "learning_rate": 7.838884361379185e-07, "loss": 2.0871, "step": 26307 }, { "epoch": 0.88, "grad_norm": 0.7801713943481445, "learning_rate": 7.834759640215595e-07, "loss": 2.0646, "step": 26308 }, { "epoch": 0.88, "grad_norm": 0.7474452257156372, "learning_rate": 7.830635960280852e-07, "loss": 2.0944, "step": 26309 }, { "epoch": 0.88, "grad_norm": 0.7268065214157104, "learning_rate": 7.826513321621576e-07, "loss": 2.0131, "step": 26310 }, { "epoch": 0.88, "grad_norm": 0.7813860774040222, "learning_rate": 7.822391724284351e-07, "loss": 1.9443, "step": 26311 }, { "epoch": 0.88, "grad_norm": 0.7407920956611633, "learning_rate": 7.818271168315716e-07, "loss": 2.0948, "step": 26312 }, { "epoch": 0.88, "grad_norm": 0.7049204111099243, "learning_rate": 7.814151653762214e-07, "loss": 2.0573, "step": 26313 }, { "epoch": 0.88, "grad_norm": 0.7173821926116943, "learning_rate": 7.810033180670429e-07, "loss": 1.9916, "step": 26314 }, { "epoch": 0.88, "grad_norm": 0.7453610301017761, "learning_rate": 7.805915749086824e-07, "loss": 1.9909, "step": 26315 }, { "epoch": 0.88, "grad_norm": 0.7367833852767944, "learning_rate": 7.801799359057982e-07, "loss": 2.0535, "step": 26316 }, { "epoch": 0.88, "grad_norm": 0.7298067808151245, "learning_rate": 7.79768401063038e-07, "loss": 2.0149, "step": 26317 }, { "epoch": 0.88, "grad_norm": 0.7338241338729858, "learning_rate": 7.793569703850479e-07, "loss": 2.0353, "step": 26318 }, { "epoch": 0.88, "grad_norm": 0.7431285381317139, "learning_rate": 7.789456438764798e-07, "loss": 2.1085, "step": 26319 }, { "epoch": 0.88, "grad_norm": 0.7254428267478943, "learning_rate": 7.78534421541981e-07, "loss": 2.0804, "step": 26320 }, { "epoch": 0.88, "grad_norm": 0.7380424737930298, "learning_rate": 7.781233033861957e-07, "loss": 2.0775, "step": 26321 }, { "epoch": 0.88, "grad_norm": 0.7428423166275024, "learning_rate": 7.777122894137679e-07, "loss": 2.1024, "step": 26322 }, { "epoch": 0.88, "grad_norm": 0.7416144609451294, "learning_rate": 7.773013796293439e-07, "loss": 1.9696, "step": 26323 }, { "epoch": 0.88, "grad_norm": 0.7438064217567444, "learning_rate": 7.76890574037561e-07, "loss": 1.9977, "step": 26324 }, { "epoch": 0.88, "grad_norm": 0.733548104763031, "learning_rate": 7.764798726430655e-07, "loss": 2.0378, "step": 26325 }, { "epoch": 0.88, "grad_norm": 0.7357192039489746, "learning_rate": 7.760692754504928e-07, "loss": 1.994, "step": 26326 }, { "epoch": 0.88, "grad_norm": 0.7355829477310181, "learning_rate": 7.756587824644857e-07, "loss": 2.0088, "step": 26327 }, { "epoch": 0.88, "grad_norm": 0.742100179195404, "learning_rate": 7.752483936896771e-07, "loss": 2.0375, "step": 26328 }, { "epoch": 0.88, "grad_norm": 0.7232990860939026, "learning_rate": 7.748381091307089e-07, "loss": 2.0771, "step": 26329 }, { "epoch": 0.88, "grad_norm": 0.7615727782249451, "learning_rate": 7.74427928792213e-07, "loss": 2.1025, "step": 26330 }, { "epoch": 0.88, "grad_norm": 0.7557610273361206, "learning_rate": 7.740178526788211e-07, "loss": 2.0423, "step": 26331 }, { "epoch": 0.88, "grad_norm": 0.7456275224685669, "learning_rate": 7.736078807951696e-07, "loss": 2.0228, "step": 26332 }, { "epoch": 0.88, "grad_norm": 0.7457318902015686, "learning_rate": 7.731980131458905e-07, "loss": 2.0496, "step": 26333 }, { "epoch": 0.88, "grad_norm": 0.7533623576164246, "learning_rate": 7.727882497356121e-07, "loss": 2.0745, "step": 26334 }, { "epoch": 0.88, "grad_norm": 0.7600063681602478, "learning_rate": 7.723785905689629e-07, "loss": 2.0028, "step": 26335 }, { "epoch": 0.88, "grad_norm": 0.8000017404556274, "learning_rate": 7.719690356505749e-07, "loss": 2.1033, "step": 26336 }, { "epoch": 0.88, "grad_norm": 0.745420515537262, "learning_rate": 7.715595849850699e-07, "loss": 1.9973, "step": 26337 }, { "epoch": 0.88, "grad_norm": 0.7538821697235107, "learning_rate": 7.711502385770774e-07, "loss": 2.0988, "step": 26338 }, { "epoch": 0.88, "grad_norm": 0.7479807734489441, "learning_rate": 7.70740996431224e-07, "loss": 2.1192, "step": 26339 }, { "epoch": 0.88, "grad_norm": 0.740813672542572, "learning_rate": 7.703318585521257e-07, "loss": 2.1006, "step": 26340 }, { "epoch": 0.88, "grad_norm": 0.7505103349685669, "learning_rate": 7.69922824944409e-07, "loss": 2.0687, "step": 26341 }, { "epoch": 0.88, "grad_norm": 0.7700856924057007, "learning_rate": 7.69513895612698e-07, "loss": 2.0877, "step": 26342 }, { "epoch": 0.88, "grad_norm": 0.7559993863105774, "learning_rate": 7.691050705616077e-07, "loss": 2.0941, "step": 26343 }, { "epoch": 0.88, "grad_norm": 0.7382938265800476, "learning_rate": 7.686963497957578e-07, "loss": 2.0838, "step": 26344 }, { "epoch": 0.88, "grad_norm": 0.7610565423965454, "learning_rate": 7.682877333197681e-07, "loss": 1.9983, "step": 26345 }, { "epoch": 0.88, "grad_norm": 0.7649804353713989, "learning_rate": 7.678792211382513e-07, "loss": 2.0044, "step": 26346 }, { "epoch": 0.88, "grad_norm": 0.7679364085197449, "learning_rate": 7.674708132558261e-07, "loss": 2.0719, "step": 26347 }, { "epoch": 0.88, "grad_norm": 0.7587807178497314, "learning_rate": 7.670625096771034e-07, "loss": 2.0953, "step": 26348 }, { "epoch": 0.88, "grad_norm": 0.7314034104347229, "learning_rate": 7.666543104066992e-07, "loss": 2.027, "step": 26349 }, { "epoch": 0.88, "grad_norm": 0.7628390789031982, "learning_rate": 7.662462154492212e-07, "loss": 2.0085, "step": 26350 }, { "epoch": 0.88, "grad_norm": 0.7543811202049255, "learning_rate": 7.658382248092822e-07, "loss": 2.0245, "step": 26351 }, { "epoch": 0.88, "grad_norm": 0.7537243962287903, "learning_rate": 7.654303384914952e-07, "loss": 2.007, "step": 26352 }, { "epoch": 0.88, "grad_norm": 0.7362833619117737, "learning_rate": 7.650225565004598e-07, "loss": 1.9959, "step": 26353 }, { "epoch": 0.88, "grad_norm": 0.728485643863678, "learning_rate": 7.646148788407881e-07, "loss": 2.0026, "step": 26354 }, { "epoch": 0.88, "grad_norm": 0.7340567111968994, "learning_rate": 7.642073055170862e-07, "loss": 2.0399, "step": 26355 }, { "epoch": 0.88, "grad_norm": 0.7563779950141907, "learning_rate": 7.637998365339583e-07, "loss": 2.038, "step": 26356 }, { "epoch": 0.88, "grad_norm": 0.7270867824554443, "learning_rate": 7.633924718960039e-07, "loss": 2.0107, "step": 26357 }, { "epoch": 0.88, "grad_norm": 0.7485671639442444, "learning_rate": 7.629852116078307e-07, "loss": 2.0621, "step": 26358 }, { "epoch": 0.88, "grad_norm": 0.738422691822052, "learning_rate": 7.625780556740358e-07, "loss": 2.0656, "step": 26359 }, { "epoch": 0.88, "grad_norm": 0.7685933113098145, "learning_rate": 7.621710040992192e-07, "loss": 2.0174, "step": 26360 }, { "epoch": 0.88, "grad_norm": 0.7431742548942566, "learning_rate": 7.617640568879836e-07, "loss": 2.04, "step": 26361 }, { "epoch": 0.88, "grad_norm": 0.7814656496047974, "learning_rate": 7.613572140449233e-07, "loss": 2.0558, "step": 26362 }, { "epoch": 0.88, "grad_norm": 0.8179240822792053, "learning_rate": 7.609504755746322e-07, "loss": 2.0996, "step": 26363 }, { "epoch": 0.88, "grad_norm": 0.7479534149169922, "learning_rate": 7.605438414817101e-07, "loss": 2.0883, "step": 26364 }, { "epoch": 0.88, "grad_norm": 0.7574878931045532, "learning_rate": 7.6013731177075e-07, "loss": 2.0474, "step": 26365 }, { "epoch": 0.88, "grad_norm": 0.7467697262763977, "learning_rate": 7.597308864463404e-07, "loss": 2.0546, "step": 26366 }, { "epoch": 0.88, "grad_norm": 0.7816771864891052, "learning_rate": 7.593245655130766e-07, "loss": 2.0325, "step": 26367 }, { "epoch": 0.88, "grad_norm": 0.7302454710006714, "learning_rate": 7.589183489755491e-07, "loss": 1.9852, "step": 26368 }, { "epoch": 0.88, "grad_norm": 0.7293674945831299, "learning_rate": 7.585122368383457e-07, "loss": 2.0286, "step": 26369 }, { "epoch": 0.88, "grad_norm": 0.7350574135780334, "learning_rate": 7.581062291060559e-07, "loss": 2.0225, "step": 26370 }, { "epoch": 0.88, "grad_norm": 0.7589457035064697, "learning_rate": 7.577003257832661e-07, "loss": 2.1276, "step": 26371 }, { "epoch": 0.88, "grad_norm": 0.7650185823440552, "learning_rate": 7.572945268745602e-07, "loss": 2.0265, "step": 26372 }, { "epoch": 0.88, "grad_norm": 0.7286154627799988, "learning_rate": 7.568888323845236e-07, "loss": 1.9871, "step": 26373 }, { "epoch": 0.88, "grad_norm": 0.7368381023406982, "learning_rate": 7.564832423177427e-07, "loss": 2.0295, "step": 26374 }, { "epoch": 0.88, "grad_norm": 0.7433767318725586, "learning_rate": 7.560777566787947e-07, "loss": 2.0099, "step": 26375 }, { "epoch": 0.88, "grad_norm": 0.7593421936035156, "learning_rate": 7.556723754722617e-07, "loss": 1.9895, "step": 26376 }, { "epoch": 0.88, "grad_norm": 0.7208841443061829, "learning_rate": 7.552670987027267e-07, "loss": 1.9887, "step": 26377 }, { "epoch": 0.88, "grad_norm": 0.7333230376243591, "learning_rate": 7.548619263747658e-07, "loss": 2.0753, "step": 26378 }, { "epoch": 0.88, "grad_norm": 0.7555572986602783, "learning_rate": 7.544568584929546e-07, "loss": 2.0366, "step": 26379 }, { "epoch": 0.88, "grad_norm": 0.7341757416725159, "learning_rate": 7.540518950618736e-07, "loss": 2.0295, "step": 26380 }, { "epoch": 0.88, "grad_norm": 0.7260982990264893, "learning_rate": 7.536470360860948e-07, "loss": 1.9792, "step": 26381 }, { "epoch": 0.88, "grad_norm": 0.7694514393806458, "learning_rate": 7.532422815701912e-07, "loss": 2.0932, "step": 26382 }, { "epoch": 0.88, "grad_norm": 0.7871598601341248, "learning_rate": 7.528376315187403e-07, "loss": 2.0693, "step": 26383 }, { "epoch": 0.88, "grad_norm": 0.76329505443573, "learning_rate": 7.524330859363094e-07, "loss": 2.051, "step": 26384 }, { "epoch": 0.88, "grad_norm": 0.778018593788147, "learning_rate": 7.520286448274694e-07, "loss": 2.0293, "step": 26385 }, { "epoch": 0.88, "grad_norm": 0.7390135526657104, "learning_rate": 7.5162430819679e-07, "loss": 2.0497, "step": 26386 }, { "epoch": 0.88, "grad_norm": 0.739269495010376, "learning_rate": 7.512200760488409e-07, "loss": 2.0223, "step": 26387 }, { "epoch": 0.88, "grad_norm": 0.7386905550956726, "learning_rate": 7.508159483881839e-07, "loss": 2.0541, "step": 26388 }, { "epoch": 0.88, "grad_norm": 0.7574033737182617, "learning_rate": 7.504119252193886e-07, "loss": 2.0616, "step": 26389 }, { "epoch": 0.88, "grad_norm": 0.7468547821044922, "learning_rate": 7.500080065470194e-07, "loss": 2.0074, "step": 26390 }, { "epoch": 0.88, "grad_norm": 0.7471441626548767, "learning_rate": 7.496041923756381e-07, "loss": 1.9615, "step": 26391 }, { "epoch": 0.88, "grad_norm": 0.7442898154258728, "learning_rate": 7.492004827098088e-07, "loss": 2.0362, "step": 26392 }, { "epoch": 0.88, "grad_norm": 0.7246881127357483, "learning_rate": 7.487968775540899e-07, "loss": 2.0067, "step": 26393 }, { "epoch": 0.88, "grad_norm": 0.7344276905059814, "learning_rate": 7.483933769130414e-07, "loss": 2.0322, "step": 26394 }, { "epoch": 0.88, "grad_norm": 0.7571027278900146, "learning_rate": 7.479899807912227e-07, "loss": 2.0514, "step": 26395 }, { "epoch": 0.88, "grad_norm": 0.7393615245819092, "learning_rate": 7.475866891931938e-07, "loss": 1.9932, "step": 26396 }, { "epoch": 0.88, "grad_norm": 0.7445781826972961, "learning_rate": 7.471835021235052e-07, "loss": 2.0727, "step": 26397 }, { "epoch": 0.88, "grad_norm": 0.7247915267944336, "learning_rate": 7.467804195867145e-07, "loss": 2.0197, "step": 26398 }, { "epoch": 0.88, "grad_norm": 0.7440077066421509, "learning_rate": 7.46377441587377e-07, "loss": 2.1172, "step": 26399 }, { "epoch": 0.88, "grad_norm": 0.7396876811981201, "learning_rate": 7.459745681300445e-07, "loss": 2.0435, "step": 26400 }, { "epoch": 0.88, "grad_norm": 0.7395563721656799, "learning_rate": 7.455717992192657e-07, "loss": 2.0289, "step": 26401 }, { "epoch": 0.88, "grad_norm": 0.741050660610199, "learning_rate": 7.451691348595957e-07, "loss": 2.123, "step": 26402 }, { "epoch": 0.88, "grad_norm": 0.7924953103065491, "learning_rate": 7.447665750555788e-07, "loss": 2.0279, "step": 26403 }, { "epoch": 0.88, "grad_norm": 0.808619499206543, "learning_rate": 7.443641198117646e-07, "loss": 1.9745, "step": 26404 }, { "epoch": 0.88, "grad_norm": 0.7439736127853394, "learning_rate": 7.439617691327028e-07, "loss": 2.0293, "step": 26405 }, { "epoch": 0.88, "grad_norm": 0.7520738244056702, "learning_rate": 7.435595230229353e-07, "loss": 2.0804, "step": 26406 }, { "epoch": 0.88, "grad_norm": 0.732196569442749, "learning_rate": 7.431573814870064e-07, "loss": 2.0431, "step": 26407 }, { "epoch": 0.88, "grad_norm": 0.7328974604606628, "learning_rate": 7.427553445294622e-07, "loss": 2.0551, "step": 26408 }, { "epoch": 0.88, "grad_norm": 0.7316774129867554, "learning_rate": 7.423534121548426e-07, "loss": 2.0681, "step": 26409 }, { "epoch": 0.88, "grad_norm": 0.7727085947990417, "learning_rate": 7.419515843676872e-07, "loss": 1.9518, "step": 26410 }, { "epoch": 0.88, "grad_norm": 0.7414073944091797, "learning_rate": 7.41549861172537e-07, "loss": 2.054, "step": 26411 }, { "epoch": 0.88, "grad_norm": 0.7564743161201477, "learning_rate": 7.411482425739314e-07, "loss": 2.0816, "step": 26412 }, { "epoch": 0.88, "grad_norm": 0.7419377565383911, "learning_rate": 7.407467285764059e-07, "loss": 2.0969, "step": 26413 }, { "epoch": 0.88, "grad_norm": 0.7510760426521301, "learning_rate": 7.403453191844989e-07, "loss": 2.0543, "step": 26414 }, { "epoch": 0.88, "grad_norm": 0.7635425329208374, "learning_rate": 7.399440144027436e-07, "loss": 2.0898, "step": 26415 }, { "epoch": 0.88, "grad_norm": 0.7701706290245056, "learning_rate": 7.395428142356731e-07, "loss": 2.074, "step": 26416 }, { "epoch": 0.88, "grad_norm": 0.7268241047859192, "learning_rate": 7.391417186878191e-07, "loss": 2.0605, "step": 26417 }, { "epoch": 0.88, "grad_norm": 0.7377090454101562, "learning_rate": 7.387407277637193e-07, "loss": 2.0572, "step": 26418 }, { "epoch": 0.88, "grad_norm": 0.7595619559288025, "learning_rate": 7.383398414678955e-07, "loss": 2.0374, "step": 26419 }, { "epoch": 0.88, "grad_norm": 0.7361778020858765, "learning_rate": 7.379390598048797e-07, "loss": 2.0078, "step": 26420 }, { "epoch": 0.88, "grad_norm": 0.7585855722427368, "learning_rate": 7.375383827792027e-07, "loss": 2.0092, "step": 26421 }, { "epoch": 0.88, "grad_norm": 0.7654544115066528, "learning_rate": 7.371378103953863e-07, "loss": 2.0294, "step": 26422 }, { "epoch": 0.88, "grad_norm": 0.7516477704048157, "learning_rate": 7.367373426579605e-07, "loss": 2.0207, "step": 26423 }, { "epoch": 0.88, "grad_norm": 0.7430716753005981, "learning_rate": 7.36336979571447e-07, "loss": 1.9939, "step": 26424 }, { "epoch": 0.88, "grad_norm": 0.7393102049827576, "learning_rate": 7.359367211403679e-07, "loss": 2.0631, "step": 26425 }, { "epoch": 0.88, "grad_norm": 0.7286345362663269, "learning_rate": 7.355365673692471e-07, "loss": 2.0357, "step": 26426 }, { "epoch": 0.88, "grad_norm": 0.7644442319869995, "learning_rate": 7.351365182626058e-07, "loss": 2.0613, "step": 26427 }, { "epoch": 0.88, "grad_norm": 0.7338255643844604, "learning_rate": 7.347365738249624e-07, "loss": 1.972, "step": 26428 }, { "epoch": 0.88, "grad_norm": 0.77260422706604, "learning_rate": 7.343367340608331e-07, "loss": 1.9653, "step": 26429 }, { "epoch": 0.88, "grad_norm": 0.74418044090271, "learning_rate": 7.339369989747392e-07, "loss": 2.0189, "step": 26430 }, { "epoch": 0.88, "grad_norm": 0.7452181577682495, "learning_rate": 7.335373685711944e-07, "loss": 2.0163, "step": 26431 }, { "epoch": 0.88, "grad_norm": 0.7488622069358826, "learning_rate": 7.33137842854712e-07, "loss": 2.0367, "step": 26432 }, { "epoch": 0.88, "grad_norm": 0.758019745349884, "learning_rate": 7.327384218298083e-07, "loss": 2.0248, "step": 26433 }, { "epoch": 0.88, "grad_norm": 0.7595903277397156, "learning_rate": 7.323391055009954e-07, "loss": 2.0775, "step": 26434 }, { "epoch": 0.88, "grad_norm": 0.7825447916984558, "learning_rate": 7.319398938727829e-07, "loss": 2.0385, "step": 26435 }, { "epoch": 0.88, "grad_norm": 0.7442269921302795, "learning_rate": 7.315407869496827e-07, "loss": 1.9977, "step": 26436 }, { "epoch": 0.88, "grad_norm": 0.7422558665275574, "learning_rate": 7.311417847362035e-07, "loss": 2.118, "step": 26437 }, { "epoch": 0.88, "grad_norm": 0.7676902413368225, "learning_rate": 7.307428872368505e-07, "loss": 2.0272, "step": 26438 }, { "epoch": 0.88, "grad_norm": 0.7200085520744324, "learning_rate": 7.303440944561324e-07, "loss": 2.0021, "step": 26439 }, { "epoch": 0.88, "grad_norm": 0.7337111830711365, "learning_rate": 7.299454063985567e-07, "loss": 2.0585, "step": 26440 }, { "epoch": 0.88, "grad_norm": 0.7280609607696533, "learning_rate": 7.29546823068622e-07, "loss": 2.0754, "step": 26441 }, { "epoch": 0.88, "grad_norm": 0.7327431440353394, "learning_rate": 7.291483444708347e-07, "loss": 1.9796, "step": 26442 }, { "epoch": 0.88, "grad_norm": 0.7320011854171753, "learning_rate": 7.287499706096968e-07, "loss": 2.0279, "step": 26443 }, { "epoch": 0.88, "grad_norm": 0.7411849498748779, "learning_rate": 7.283517014897079e-07, "loss": 2.0144, "step": 26444 }, { "epoch": 0.88, "grad_norm": 0.7268558740615845, "learning_rate": 7.279535371153689e-07, "loss": 2.0447, "step": 26445 }, { "epoch": 0.88, "grad_norm": 0.7433812022209167, "learning_rate": 7.275554774911764e-07, "loss": 1.979, "step": 26446 }, { "epoch": 0.88, "grad_norm": 0.7657671570777893, "learning_rate": 7.271575226216265e-07, "loss": 2.0262, "step": 26447 }, { "epoch": 0.88, "grad_norm": 0.7456580400466919, "learning_rate": 7.267596725112169e-07, "loss": 2.0297, "step": 26448 }, { "epoch": 0.88, "grad_norm": 0.7219790816307068, "learning_rate": 7.263619271644429e-07, "loss": 2.1402, "step": 26449 }, { "epoch": 0.88, "grad_norm": 0.7132549285888672, "learning_rate": 7.259642865857975e-07, "loss": 2.0373, "step": 26450 }, { "epoch": 0.88, "grad_norm": 0.7332443594932556, "learning_rate": 7.255667507797703e-07, "loss": 1.946, "step": 26451 }, { "epoch": 0.88, "grad_norm": 0.7466189861297607, "learning_rate": 7.251693197508558e-07, "loss": 2.0259, "step": 26452 }, { "epoch": 0.88, "grad_norm": 0.7678950428962708, "learning_rate": 7.247719935035436e-07, "loss": 2.0984, "step": 26453 }, { "epoch": 0.88, "grad_norm": 0.7300118803977966, "learning_rate": 7.243747720423211e-07, "loss": 2.0864, "step": 26454 }, { "epoch": 0.88, "grad_norm": 0.7387140393257141, "learning_rate": 7.239776553716749e-07, "loss": 2.0322, "step": 26455 }, { "epoch": 0.88, "grad_norm": 0.7114147543907166, "learning_rate": 7.235806434960946e-07, "loss": 2.0451, "step": 26456 }, { "epoch": 0.88, "grad_norm": 0.765238344669342, "learning_rate": 7.231837364200623e-07, "loss": 2.0628, "step": 26457 }, { "epoch": 0.88, "grad_norm": 0.7328755259513855, "learning_rate": 7.227869341480653e-07, "loss": 1.9951, "step": 26458 }, { "epoch": 0.88, "grad_norm": 0.7428206205368042, "learning_rate": 7.223902366845847e-07, "loss": 2.1181, "step": 26459 }, { "epoch": 0.88, "grad_norm": 0.7576242089271545, "learning_rate": 7.219936440340991e-07, "loss": 2.0453, "step": 26460 }, { "epoch": 0.88, "grad_norm": 0.7584467530250549, "learning_rate": 7.215971562010926e-07, "loss": 2.0591, "step": 26461 }, { "epoch": 0.88, "grad_norm": 0.7323764562606812, "learning_rate": 7.212007731900462e-07, "loss": 2.0014, "step": 26462 }, { "epoch": 0.88, "grad_norm": 0.7369996905326843, "learning_rate": 7.208044950054338e-07, "loss": 2.0177, "step": 26463 }, { "epoch": 0.88, "grad_norm": 0.7372731566429138, "learning_rate": 7.204083216517344e-07, "loss": 2.0571, "step": 26464 }, { "epoch": 0.88, "grad_norm": 0.7596994042396545, "learning_rate": 7.20012253133423e-07, "loss": 2.0655, "step": 26465 }, { "epoch": 0.88, "grad_norm": 0.7575705051422119, "learning_rate": 7.196162894549741e-07, "loss": 2.0325, "step": 26466 }, { "epoch": 0.88, "grad_norm": 0.7497710585594177, "learning_rate": 7.192204306208628e-07, "loss": 2.0041, "step": 26467 }, { "epoch": 0.88, "grad_norm": 0.803541362285614, "learning_rate": 7.188246766355588e-07, "loss": 2.0959, "step": 26468 }, { "epoch": 0.88, "grad_norm": 0.725372314453125, "learning_rate": 7.184290275035355e-07, "loss": 1.9858, "step": 26469 }, { "epoch": 0.88, "grad_norm": 0.7647196054458618, "learning_rate": 7.180334832292601e-07, "loss": 2.1081, "step": 26470 }, { "epoch": 0.88, "grad_norm": 0.7598264813423157, "learning_rate": 7.176380438172048e-07, "loss": 2.0326, "step": 26471 }, { "epoch": 0.88, "grad_norm": 0.7464932203292847, "learning_rate": 7.172427092718348e-07, "loss": 2.0343, "step": 26472 }, { "epoch": 0.88, "grad_norm": 0.7246004939079285, "learning_rate": 7.168474795976144e-07, "loss": 1.9777, "step": 26473 }, { "epoch": 0.88, "grad_norm": 0.7622198462486267, "learning_rate": 7.164523547990099e-07, "loss": 2.1171, "step": 26474 }, { "epoch": 0.88, "grad_norm": 0.7321404218673706, "learning_rate": 7.160573348804889e-07, "loss": 2.0479, "step": 26475 }, { "epoch": 0.88, "grad_norm": 0.7492873668670654, "learning_rate": 7.1566241984651e-07, "loss": 2.0976, "step": 26476 }, { "epoch": 0.88, "grad_norm": 0.7410077452659607, "learning_rate": 7.152676097015354e-07, "loss": 1.9861, "step": 26477 }, { "epoch": 0.88, "grad_norm": 0.7286153435707092, "learning_rate": 7.148729044500269e-07, "loss": 2.0148, "step": 26478 }, { "epoch": 0.88, "grad_norm": 0.75808185338974, "learning_rate": 7.144783040964421e-07, "loss": 2.0016, "step": 26479 }, { "epoch": 0.88, "grad_norm": 0.7379709482192993, "learning_rate": 7.140838086452395e-07, "loss": 2.0684, "step": 26480 }, { "epoch": 0.88, "grad_norm": 0.7698858380317688, "learning_rate": 7.136894181008768e-07, "loss": 1.9937, "step": 26481 }, { "epoch": 0.88, "grad_norm": 0.7318554520606995, "learning_rate": 7.132951324678084e-07, "loss": 2.0797, "step": 26482 }, { "epoch": 0.88, "grad_norm": 0.7562581300735474, "learning_rate": 7.12900951750487e-07, "loss": 2.0592, "step": 26483 }, { "epoch": 0.88, "grad_norm": 0.7352113127708435, "learning_rate": 7.125068759533704e-07, "loss": 2.0741, "step": 26484 }, { "epoch": 0.88, "grad_norm": 0.7350488901138306, "learning_rate": 7.121129050809083e-07, "loss": 2.0109, "step": 26485 }, { "epoch": 0.88, "grad_norm": 0.7458146214485168, "learning_rate": 7.117190391375495e-07, "loss": 2.0796, "step": 26486 }, { "epoch": 0.88, "grad_norm": 0.7478554844856262, "learning_rate": 7.113252781277468e-07, "loss": 2.0186, "step": 26487 }, { "epoch": 0.88, "grad_norm": 0.7593786120414734, "learning_rate": 7.10931622055947e-07, "loss": 2.0402, "step": 26488 }, { "epoch": 0.88, "grad_norm": 0.7420054078102112, "learning_rate": 7.105380709265985e-07, "loss": 2.0895, "step": 26489 }, { "epoch": 0.88, "grad_norm": 0.7457179427146912, "learning_rate": 7.101446247441446e-07, "loss": 2.0319, "step": 26490 }, { "epoch": 0.88, "grad_norm": 0.7534802556037903, "learning_rate": 7.097512835130349e-07, "loss": 2.1183, "step": 26491 }, { "epoch": 0.88, "grad_norm": 0.7339127659797668, "learning_rate": 7.093580472377082e-07, "loss": 2.066, "step": 26492 }, { "epoch": 0.88, "grad_norm": 0.7291773557662964, "learning_rate": 7.08964915922612e-07, "loss": 2.0812, "step": 26493 }, { "epoch": 0.88, "grad_norm": 0.7058745622634888, "learning_rate": 7.085718895721849e-07, "loss": 2.0509, "step": 26494 }, { "epoch": 0.88, "grad_norm": 0.7566458582878113, "learning_rate": 7.081789681908646e-07, "loss": 2.1196, "step": 26495 }, { "epoch": 0.88, "grad_norm": 0.7218544483184814, "learning_rate": 7.077861517830942e-07, "loss": 1.984, "step": 26496 }, { "epoch": 0.88, "grad_norm": 0.7245951294898987, "learning_rate": 7.073934403533101e-07, "loss": 1.9591, "step": 26497 }, { "epoch": 0.88, "grad_norm": 0.756462574005127, "learning_rate": 7.070008339059497e-07, "loss": 2.035, "step": 26498 }, { "epoch": 0.88, "grad_norm": 0.7721906900405884, "learning_rate": 7.066083324454465e-07, "loss": 1.9546, "step": 26499 }, { "epoch": 0.88, "grad_norm": 0.7380475997924805, "learning_rate": 7.062159359762378e-07, "loss": 2.0238, "step": 26500 }, { "epoch": 0.88, "grad_norm": 0.7506248950958252, "learning_rate": 7.058236445027522e-07, "loss": 2.0171, "step": 26501 }, { "epoch": 0.88, "grad_norm": 0.7781503796577454, "learning_rate": 7.054314580294252e-07, "loss": 2.1069, "step": 26502 }, { "epoch": 0.88, "grad_norm": 0.7556978464126587, "learning_rate": 7.050393765606878e-07, "loss": 1.9994, "step": 26503 }, { "epoch": 0.88, "grad_norm": 0.7589229345321655, "learning_rate": 7.046474001009662e-07, "loss": 2.1239, "step": 26504 }, { "epoch": 0.88, "grad_norm": 0.7527299523353577, "learning_rate": 7.042555286546904e-07, "loss": 2.0423, "step": 26505 }, { "epoch": 0.88, "grad_norm": 0.7582868337631226, "learning_rate": 7.03863762226289e-07, "loss": 1.9939, "step": 26506 }, { "epoch": 0.88, "grad_norm": 0.7491586804389954, "learning_rate": 7.034721008201872e-07, "loss": 2.0746, "step": 26507 }, { "epoch": 0.88, "grad_norm": 0.7429332137107849, "learning_rate": 7.030805444408085e-07, "loss": 2.0528, "step": 26508 }, { "epoch": 0.88, "grad_norm": 0.7391231656074524, "learning_rate": 7.026890930925778e-07, "loss": 1.9935, "step": 26509 }, { "epoch": 0.88, "grad_norm": 0.7251815795898438, "learning_rate": 7.022977467799152e-07, "loss": 2.0139, "step": 26510 }, { "epoch": 0.88, "grad_norm": 0.7659398317337036, "learning_rate": 7.01906505507246e-07, "loss": 1.9828, "step": 26511 }, { "epoch": 0.88, "grad_norm": 0.7408603429794312, "learning_rate": 7.015153692789866e-07, "loss": 2.0568, "step": 26512 }, { "epoch": 0.88, "grad_norm": 0.7994125485420227, "learning_rate": 7.011243380995581e-07, "loss": 2.0755, "step": 26513 }, { "epoch": 0.88, "grad_norm": 0.7625914812088013, "learning_rate": 7.007334119733755e-07, "loss": 2.0435, "step": 26514 }, { "epoch": 0.88, "grad_norm": 0.7196329832077026, "learning_rate": 7.003425909048578e-07, "loss": 2.0066, "step": 26515 }, { "epoch": 0.88, "grad_norm": 0.7342898845672607, "learning_rate": 6.999518748984202e-07, "loss": 1.9848, "step": 26516 }, { "epoch": 0.88, "grad_norm": 0.7342272400856018, "learning_rate": 6.995612639584748e-07, "loss": 2.0463, "step": 26517 }, { "epoch": 0.88, "grad_norm": 0.7354905605316162, "learning_rate": 6.991707580894346e-07, "loss": 2.1275, "step": 26518 }, { "epoch": 0.88, "grad_norm": 0.7462214231491089, "learning_rate": 6.987803572957153e-07, "loss": 2.1382, "step": 26519 }, { "epoch": 0.88, "grad_norm": 0.7806128859519958, "learning_rate": 6.98390061581723e-07, "loss": 2.0094, "step": 26520 }, { "epoch": 0.88, "grad_norm": 0.7756829261779785, "learning_rate": 6.979998709518677e-07, "loss": 2.0605, "step": 26521 }, { "epoch": 0.88, "grad_norm": 0.7664603590965271, "learning_rate": 6.976097854105602e-07, "loss": 1.9872, "step": 26522 }, { "epoch": 0.88, "grad_norm": 0.746811032295227, "learning_rate": 6.972198049622036e-07, "loss": 2.0335, "step": 26523 }, { "epoch": 0.88, "grad_norm": 0.7430893778800964, "learning_rate": 6.968299296112069e-07, "loss": 2.0475, "step": 26524 }, { "epoch": 0.88, "grad_norm": 0.7427177429199219, "learning_rate": 6.96440159361974e-07, "loss": 2.0005, "step": 26525 }, { "epoch": 0.88, "grad_norm": 0.7456814050674438, "learning_rate": 6.960504942189061e-07, "loss": 2.1197, "step": 26526 }, { "epoch": 0.88, "grad_norm": 0.7501267194747925, "learning_rate": 6.956609341864073e-07, "loss": 2.0186, "step": 26527 }, { "epoch": 0.88, "grad_norm": 0.7502648234367371, "learning_rate": 6.952714792688797e-07, "loss": 2.0351, "step": 26528 }, { "epoch": 0.88, "grad_norm": 0.7430863380432129, "learning_rate": 6.94882129470722e-07, "loss": 1.9725, "step": 26529 }, { "epoch": 0.88, "grad_norm": 0.7320888042449951, "learning_rate": 6.944928847963306e-07, "loss": 2.0059, "step": 26530 }, { "epoch": 0.88, "grad_norm": 0.7501510977745056, "learning_rate": 6.941037452501076e-07, "loss": 1.9586, "step": 26531 }, { "epoch": 0.88, "grad_norm": 0.7599371671676636, "learning_rate": 6.937147108364439e-07, "loss": 2.0579, "step": 26532 }, { "epoch": 0.88, "grad_norm": 0.7800934314727783, "learning_rate": 6.933257815597394e-07, "loss": 1.979, "step": 26533 }, { "epoch": 0.88, "grad_norm": 0.7491557002067566, "learning_rate": 6.92936957424385e-07, "loss": 2.0172, "step": 26534 }, { "epoch": 0.88, "grad_norm": 0.7377597689628601, "learning_rate": 6.925482384347748e-07, "loss": 2.0603, "step": 26535 }, { "epoch": 0.88, "grad_norm": 0.7333504557609558, "learning_rate": 6.921596245952989e-07, "loss": 2.0038, "step": 26536 }, { "epoch": 0.88, "grad_norm": 0.7402035593986511, "learning_rate": 6.917711159103491e-07, "loss": 2.041, "step": 26537 }, { "epoch": 0.88, "grad_norm": 0.7260160446166992, "learning_rate": 6.913827123843165e-07, "loss": 2.0761, "step": 26538 }, { "epoch": 0.88, "grad_norm": 0.7535466551780701, "learning_rate": 6.909944140215841e-07, "loss": 2.1587, "step": 26539 }, { "epoch": 0.88, "grad_norm": 0.7271521687507629, "learning_rate": 6.906062208265407e-07, "loss": 2.0885, "step": 26540 }, { "epoch": 0.88, "grad_norm": 0.748368501663208, "learning_rate": 6.90218132803574e-07, "loss": 2.0732, "step": 26541 }, { "epoch": 0.88, "grad_norm": 0.7486216425895691, "learning_rate": 6.89830149957067e-07, "loss": 2.0661, "step": 26542 }, { "epoch": 0.88, "grad_norm": 0.7737284302711487, "learning_rate": 6.894422722914007e-07, "loss": 2.0627, "step": 26543 }, { "epoch": 0.88, "grad_norm": 0.9019026756286621, "learning_rate": 6.890544998109616e-07, "loss": 2.1682, "step": 26544 }, { "epoch": 0.88, "grad_norm": 0.7330599427223206, "learning_rate": 6.886668325201252e-07, "loss": 1.9643, "step": 26545 }, { "epoch": 0.88, "grad_norm": 0.7535129189491272, "learning_rate": 6.882792704232743e-07, "loss": 2.0018, "step": 26546 }, { "epoch": 0.88, "grad_norm": 0.7671772837638855, "learning_rate": 6.878918135247914e-07, "loss": 2.0099, "step": 26547 }, { "epoch": 0.88, "grad_norm": 0.7297726273536682, "learning_rate": 6.87504461829045e-07, "loss": 2.0821, "step": 26548 }, { "epoch": 0.88, "grad_norm": 0.7395918369293213, "learning_rate": 6.87117215340416e-07, "loss": 1.9655, "step": 26549 }, { "epoch": 0.88, "grad_norm": 0.7843712568283081, "learning_rate": 6.867300740632799e-07, "loss": 2.1788, "step": 26550 }, { "epoch": 0.88, "grad_norm": 0.7352198958396912, "learning_rate": 6.863430380020097e-07, "loss": 2.0272, "step": 26551 }, { "epoch": 0.88, "grad_norm": 0.7744807004928589, "learning_rate": 6.859561071609755e-07, "loss": 2.0674, "step": 26552 }, { "epoch": 0.88, "grad_norm": 0.7316256165504456, "learning_rate": 6.855692815445503e-07, "loss": 2.0129, "step": 26553 }, { "epoch": 0.88, "grad_norm": 0.7733190655708313, "learning_rate": 6.851825611571072e-07, "loss": 1.9968, "step": 26554 }, { "epoch": 0.88, "grad_norm": 0.7294735312461853, "learning_rate": 6.847959460030118e-07, "loss": 2.0529, "step": 26555 }, { "epoch": 0.88, "grad_norm": 0.7523104548454285, "learning_rate": 6.844094360866316e-07, "loss": 1.9091, "step": 26556 }, { "epoch": 0.88, "grad_norm": 0.7517918348312378, "learning_rate": 6.840230314123353e-07, "loss": 2.0483, "step": 26557 }, { "epoch": 0.88, "grad_norm": 0.7677464485168457, "learning_rate": 6.836367319844861e-07, "loss": 2.0387, "step": 26558 }, { "epoch": 0.88, "grad_norm": 0.7304145693778992, "learning_rate": 6.832505378074484e-07, "loss": 1.9952, "step": 26559 }, { "epoch": 0.88, "grad_norm": 0.7390241622924805, "learning_rate": 6.828644488855895e-07, "loss": 2.0343, "step": 26560 }, { "epoch": 0.88, "grad_norm": 0.7728503942489624, "learning_rate": 6.824784652232641e-07, "loss": 2.0574, "step": 26561 }, { "epoch": 0.88, "grad_norm": 0.7333219647407532, "learning_rate": 6.820925868248362e-07, "loss": 2.0294, "step": 26562 }, { "epoch": 0.88, "grad_norm": 0.7299553155899048, "learning_rate": 6.81706813694668e-07, "loss": 2.1594, "step": 26563 }, { "epoch": 0.88, "grad_norm": 0.740734338760376, "learning_rate": 6.813211458371149e-07, "loss": 2.0646, "step": 26564 }, { "epoch": 0.88, "grad_norm": 0.7588878273963928, "learning_rate": 6.809355832565323e-07, "loss": 1.9621, "step": 26565 }, { "epoch": 0.88, "grad_norm": 0.7782971262931824, "learning_rate": 6.805501259572789e-07, "loss": 2.0286, "step": 26566 }, { "epoch": 0.88, "grad_norm": 0.7436327934265137, "learning_rate": 6.801647739437079e-07, "loss": 2.0094, "step": 26567 }, { "epoch": 0.88, "grad_norm": 0.7539224028587341, "learning_rate": 6.797795272201735e-07, "loss": 2.125, "step": 26568 }, { "epoch": 0.88, "grad_norm": 0.7687482833862305, "learning_rate": 6.79394385791029e-07, "loss": 2.0988, "step": 26569 }, { "epoch": 0.88, "grad_norm": 0.758817732334137, "learning_rate": 6.790093496606243e-07, "loss": 2.1148, "step": 26570 }, { "epoch": 0.88, "grad_norm": 0.7613086700439453, "learning_rate": 6.786244188333069e-07, "loss": 2.0495, "step": 26571 }, { "epoch": 0.88, "grad_norm": 0.7290220856666565, "learning_rate": 6.7823959331343e-07, "loss": 2.0902, "step": 26572 }, { "epoch": 0.88, "grad_norm": 0.7710554003715515, "learning_rate": 6.778548731053403e-07, "loss": 1.9939, "step": 26573 }, { "epoch": 0.88, "grad_norm": 0.7453715801239014, "learning_rate": 6.774702582133796e-07, "loss": 2.0528, "step": 26574 }, { "epoch": 0.88, "grad_norm": 0.7385093569755554, "learning_rate": 6.77085748641898e-07, "loss": 2.0809, "step": 26575 }, { "epoch": 0.88, "grad_norm": 0.7329203486442566, "learning_rate": 6.767013443952386e-07, "loss": 2.0828, "step": 26576 }, { "epoch": 0.88, "grad_norm": 0.7378323078155518, "learning_rate": 6.763170454777435e-07, "loss": 2.0382, "step": 26577 }, { "epoch": 0.88, "grad_norm": 0.7477156519889832, "learning_rate": 6.759328518937524e-07, "loss": 1.9992, "step": 26578 }, { "epoch": 0.88, "grad_norm": 0.7646410465240479, "learning_rate": 6.755487636476088e-07, "loss": 2.0574, "step": 26579 }, { "epoch": 0.88, "grad_norm": 0.7296204566955566, "learning_rate": 6.751647807436501e-07, "loss": 1.9569, "step": 26580 }, { "epoch": 0.88, "grad_norm": 0.7768306136131287, "learning_rate": 6.74780903186214e-07, "loss": 2.079, "step": 26581 }, { "epoch": 0.88, "grad_norm": 0.7539736032485962, "learning_rate": 6.743971309796416e-07, "loss": 2.1453, "step": 26582 }, { "epoch": 0.88, "grad_norm": 0.7256012558937073, "learning_rate": 6.740134641282614e-07, "loss": 2.0746, "step": 26583 }, { "epoch": 0.88, "grad_norm": 0.7351658940315247, "learning_rate": 6.736299026364123e-07, "loss": 2.0537, "step": 26584 }, { "epoch": 0.88, "grad_norm": 0.7628863453865051, "learning_rate": 6.732464465084288e-07, "loss": 2.0341, "step": 26585 }, { "epoch": 0.88, "grad_norm": 0.7522174119949341, "learning_rate": 6.728630957486393e-07, "loss": 2.0637, "step": 26586 }, { "epoch": 0.88, "grad_norm": 0.7587566375732422, "learning_rate": 6.724798503613761e-07, "loss": 2.0603, "step": 26587 }, { "epoch": 0.88, "grad_norm": 0.767328679561615, "learning_rate": 6.720967103509701e-07, "loss": 2.057, "step": 26588 }, { "epoch": 0.88, "grad_norm": 0.7446606159210205, "learning_rate": 6.717136757217468e-07, "loss": 2.0484, "step": 26589 }, { "epoch": 0.88, "grad_norm": 0.7279035449028015, "learning_rate": 6.71330746478036e-07, "loss": 2.0178, "step": 26590 }, { "epoch": 0.88, "grad_norm": 0.7354421019554138, "learning_rate": 6.709479226241644e-07, "loss": 2.093, "step": 26591 }, { "epoch": 0.88, "grad_norm": 0.7535824775695801, "learning_rate": 6.705652041644562e-07, "loss": 2.0314, "step": 26592 }, { "epoch": 0.88, "grad_norm": 0.7514384984970093, "learning_rate": 6.701825911032333e-07, "loss": 2.1024, "step": 26593 }, { "epoch": 0.88, "grad_norm": 0.7247806787490845, "learning_rate": 6.698000834448215e-07, "loss": 1.9974, "step": 26594 }, { "epoch": 0.88, "grad_norm": 0.7281789183616638, "learning_rate": 6.694176811935394e-07, "loss": 2.0013, "step": 26595 }, { "epoch": 0.88, "grad_norm": 0.7524659037590027, "learning_rate": 6.690353843537078e-07, "loss": 1.9814, "step": 26596 }, { "epoch": 0.88, "grad_norm": 0.7157271504402161, "learning_rate": 6.686531929296447e-07, "loss": 2.0347, "step": 26597 }, { "epoch": 0.88, "grad_norm": 0.749940812587738, "learning_rate": 6.682711069256709e-07, "loss": 2.0708, "step": 26598 }, { "epoch": 0.88, "grad_norm": 0.7544182538986206, "learning_rate": 6.678891263461007e-07, "loss": 2.0366, "step": 26599 }, { "epoch": 0.88, "grad_norm": 0.7334120869636536, "learning_rate": 6.675072511952507e-07, "loss": 2.0014, "step": 26600 }, { "epoch": 0.89, "grad_norm": 0.7459714412689209, "learning_rate": 6.671254814774342e-07, "loss": 1.9703, "step": 26601 }, { "epoch": 0.89, "grad_norm": 0.749575138092041, "learning_rate": 6.667438171969631e-07, "loss": 2.0678, "step": 26602 }, { "epoch": 0.89, "grad_norm": 0.7445259094238281, "learning_rate": 6.663622583581508e-07, "loss": 2.0617, "step": 26603 }, { "epoch": 0.89, "grad_norm": 0.7420641183853149, "learning_rate": 6.659808049653105e-07, "loss": 2.1126, "step": 26604 }, { "epoch": 0.89, "grad_norm": 0.7307288646697998, "learning_rate": 6.655994570227453e-07, "loss": 2.0278, "step": 26605 }, { "epoch": 0.89, "grad_norm": 0.7178513407707214, "learning_rate": 6.652182145347675e-07, "loss": 2.0018, "step": 26606 }, { "epoch": 0.89, "grad_norm": 0.7375272512435913, "learning_rate": 6.648370775056845e-07, "loss": 2.0152, "step": 26607 }, { "epoch": 0.89, "grad_norm": 0.7567374110221863, "learning_rate": 6.64456045939802e-07, "loss": 2.0047, "step": 26608 }, { "epoch": 0.89, "grad_norm": 0.7590065598487854, "learning_rate": 6.64075119841422e-07, "loss": 2.0321, "step": 26609 }, { "epoch": 0.89, "grad_norm": 0.7293776869773865, "learning_rate": 6.636942992148521e-07, "loss": 2.0504, "step": 26610 }, { "epoch": 0.89, "grad_norm": 0.7744541764259338, "learning_rate": 6.633135840643901e-07, "loss": 2.0966, "step": 26611 }, { "epoch": 0.89, "grad_norm": 0.7227846384048462, "learning_rate": 6.629329743943392e-07, "loss": 2.0521, "step": 26612 }, { "epoch": 0.89, "grad_norm": 0.7318065762519836, "learning_rate": 6.625524702090013e-07, "loss": 2.0865, "step": 26613 }, { "epoch": 0.89, "grad_norm": 0.7435641288757324, "learning_rate": 6.621720715126745e-07, "loss": 2.065, "step": 26614 }, { "epoch": 0.89, "grad_norm": 0.7608646154403687, "learning_rate": 6.617917783096517e-07, "loss": 2.083, "step": 26615 }, { "epoch": 0.89, "grad_norm": 0.7389757037162781, "learning_rate": 6.614115906042351e-07, "loss": 2.0568, "step": 26616 }, { "epoch": 0.89, "grad_norm": 0.7342638373374939, "learning_rate": 6.610315084007179e-07, "loss": 2.0746, "step": 26617 }, { "epoch": 0.89, "grad_norm": 0.7387537360191345, "learning_rate": 6.606515317033912e-07, "loss": 2.0029, "step": 26618 }, { "epoch": 0.89, "grad_norm": 0.7389355897903442, "learning_rate": 6.602716605165504e-07, "loss": 2.1011, "step": 26619 }, { "epoch": 0.89, "grad_norm": 0.7701226472854614, "learning_rate": 6.598918948444877e-07, "loss": 2.1454, "step": 26620 }, { "epoch": 0.89, "grad_norm": 0.7223533987998962, "learning_rate": 6.595122346914918e-07, "loss": 2.034, "step": 26621 }, { "epoch": 0.89, "grad_norm": 0.7490042448043823, "learning_rate": 6.591326800618536e-07, "loss": 2.0712, "step": 26622 }, { "epoch": 0.89, "grad_norm": 0.764431893825531, "learning_rate": 6.58753230959861e-07, "loss": 2.0194, "step": 26623 }, { "epoch": 0.89, "grad_norm": 0.7420855760574341, "learning_rate": 6.583738873897971e-07, "loss": 1.9941, "step": 26624 }, { "epoch": 0.89, "grad_norm": 0.7364943623542786, "learning_rate": 6.579946493559519e-07, "loss": 2.0328, "step": 26625 }, { "epoch": 0.89, "grad_norm": 0.7223743796348572, "learning_rate": 6.576155168626097e-07, "loss": 2.0399, "step": 26626 }, { "epoch": 0.89, "grad_norm": 0.7158247232437134, "learning_rate": 6.572364899140505e-07, "loss": 1.9856, "step": 26627 }, { "epoch": 0.89, "grad_norm": 0.7540010213851929, "learning_rate": 6.568575685145561e-07, "loss": 2.061, "step": 26628 }, { "epoch": 0.89, "grad_norm": 0.7794114947319031, "learning_rate": 6.564787526684124e-07, "loss": 2.047, "step": 26629 }, { "epoch": 0.89, "grad_norm": 0.7541570067405701, "learning_rate": 6.561000423798935e-07, "loss": 1.9976, "step": 26630 }, { "epoch": 0.89, "grad_norm": 0.7274417877197266, "learning_rate": 6.557214376532828e-07, "loss": 2.0425, "step": 26631 }, { "epoch": 0.89, "grad_norm": 0.7578456401824951, "learning_rate": 6.553429384928545e-07, "loss": 1.9801, "step": 26632 }, { "epoch": 0.89, "grad_norm": 0.8011856079101562, "learning_rate": 6.549645449028841e-07, "loss": 1.972, "step": 26633 }, { "epoch": 0.89, "grad_norm": 0.7068088054656982, "learning_rate": 6.545862568876471e-07, "loss": 2.0893, "step": 26634 }, { "epoch": 0.89, "grad_norm": 0.7521716952323914, "learning_rate": 6.54208074451419e-07, "loss": 2.0226, "step": 26635 }, { "epoch": 0.89, "grad_norm": 0.7356855869293213, "learning_rate": 6.538299975984708e-07, "loss": 2.0948, "step": 26636 }, { "epoch": 0.89, "grad_norm": 0.7715505957603455, "learning_rate": 6.534520263330723e-07, "loss": 1.9535, "step": 26637 }, { "epoch": 0.89, "grad_norm": 0.7470303177833557, "learning_rate": 6.53074160659496e-07, "loss": 2.123, "step": 26638 }, { "epoch": 0.89, "grad_norm": 0.7485473155975342, "learning_rate": 6.526964005820124e-07, "loss": 2.0757, "step": 26639 }, { "epoch": 0.89, "grad_norm": 0.702972412109375, "learning_rate": 6.52318746104883e-07, "loss": 2.0808, "step": 26640 }, { "epoch": 0.89, "grad_norm": 0.7370464205741882, "learning_rate": 6.519411972323797e-07, "loss": 2.0023, "step": 26641 }, { "epoch": 0.89, "grad_norm": 0.7066406011581421, "learning_rate": 6.515637539687669e-07, "loss": 2.0181, "step": 26642 }, { "epoch": 0.89, "grad_norm": 0.7694652080535889, "learning_rate": 6.511864163183068e-07, "loss": 2.0334, "step": 26643 }, { "epoch": 0.89, "grad_norm": 0.7401752471923828, "learning_rate": 6.508091842852649e-07, "loss": 2.0354, "step": 26644 }, { "epoch": 0.89, "grad_norm": 0.7737911343574524, "learning_rate": 6.504320578739021e-07, "loss": 2.0406, "step": 26645 }, { "epoch": 0.89, "grad_norm": 0.7554739117622375, "learning_rate": 6.500550370884762e-07, "loss": 2.0095, "step": 26646 }, { "epoch": 0.89, "grad_norm": 0.7552582621574402, "learning_rate": 6.496781219332493e-07, "loss": 2.0524, "step": 26647 }, { "epoch": 0.89, "grad_norm": 0.7447125315666199, "learning_rate": 6.493013124124825e-07, "loss": 1.9991, "step": 26648 }, { "epoch": 0.89, "grad_norm": 0.7421413064002991, "learning_rate": 6.489246085304268e-07, "loss": 2.0082, "step": 26649 }, { "epoch": 0.89, "grad_norm": 0.7449020743370056, "learning_rate": 6.4854801029134e-07, "loss": 1.9932, "step": 26650 }, { "epoch": 0.89, "grad_norm": 0.7505519986152649, "learning_rate": 6.481715176994785e-07, "loss": 2.0261, "step": 26651 }, { "epoch": 0.89, "grad_norm": 0.7160792350769043, "learning_rate": 6.477951307590935e-07, "loss": 2.0164, "step": 26652 }, { "epoch": 0.89, "grad_norm": 0.7548861503601074, "learning_rate": 6.474188494744394e-07, "loss": 2.0215, "step": 26653 }, { "epoch": 0.89, "grad_norm": 0.7338448762893677, "learning_rate": 6.470426738497649e-07, "loss": 2.0608, "step": 26654 }, { "epoch": 0.89, "grad_norm": 0.7523289322853088, "learning_rate": 6.466666038893221e-07, "loss": 2.0878, "step": 26655 }, { "epoch": 0.89, "grad_norm": 0.7652626633644104, "learning_rate": 6.462906395973567e-07, "loss": 2.03, "step": 26656 }, { "epoch": 0.89, "grad_norm": 0.7147501111030579, "learning_rate": 6.459147809781196e-07, "loss": 2.0526, "step": 26657 }, { "epoch": 0.89, "grad_norm": 0.7607391476631165, "learning_rate": 6.455390280358553e-07, "loss": 2.0331, "step": 26658 }, { "epoch": 0.89, "grad_norm": 0.739440381526947, "learning_rate": 6.45163380774807e-07, "loss": 2.0174, "step": 26659 }, { "epoch": 0.89, "grad_norm": 0.7463353872299194, "learning_rate": 6.447878391992201e-07, "loss": 2.0591, "step": 26660 }, { "epoch": 0.89, "grad_norm": 0.7504194378852844, "learning_rate": 6.444124033133403e-07, "loss": 2.0646, "step": 26661 }, { "epoch": 0.89, "grad_norm": 0.7576066851615906, "learning_rate": 6.440370731214051e-07, "loss": 1.9744, "step": 26662 }, { "epoch": 0.89, "grad_norm": 0.7382180094718933, "learning_rate": 6.436618486276547e-07, "loss": 2.0781, "step": 26663 }, { "epoch": 0.89, "grad_norm": 0.7660902738571167, "learning_rate": 6.43286729836331e-07, "loss": 2.0142, "step": 26664 }, { "epoch": 0.89, "grad_norm": 0.7437325716018677, "learning_rate": 6.429117167516685e-07, "loss": 2.0495, "step": 26665 }, { "epoch": 0.89, "grad_norm": 0.7604483962059021, "learning_rate": 6.425368093779071e-07, "loss": 2.0756, "step": 26666 }, { "epoch": 0.89, "grad_norm": 0.7335903644561768, "learning_rate": 6.421620077192814e-07, "loss": 1.9702, "step": 26667 }, { "epoch": 0.89, "grad_norm": 0.7337174415588379, "learning_rate": 6.417873117800233e-07, "loss": 2.0081, "step": 26668 }, { "epoch": 0.89, "grad_norm": 0.7416128516197205, "learning_rate": 6.414127215643672e-07, "loss": 2.0037, "step": 26669 }, { "epoch": 0.89, "grad_norm": 0.7363864779472351, "learning_rate": 6.410382370765477e-07, "loss": 2.0788, "step": 26670 }, { "epoch": 0.89, "grad_norm": 0.789065420627594, "learning_rate": 6.406638583207935e-07, "loss": 2.0492, "step": 26671 }, { "epoch": 0.89, "grad_norm": 0.7818248271942139, "learning_rate": 6.402895853013314e-07, "loss": 2.0406, "step": 26672 }, { "epoch": 0.89, "grad_norm": 0.7138779163360596, "learning_rate": 6.399154180223943e-07, "loss": 2.0407, "step": 26673 }, { "epoch": 0.89, "grad_norm": 0.7794494032859802, "learning_rate": 6.395413564882058e-07, "loss": 2.028, "step": 26674 }, { "epoch": 0.89, "grad_norm": 0.7516987323760986, "learning_rate": 6.391674007029946e-07, "loss": 2.0956, "step": 26675 }, { "epoch": 0.89, "grad_norm": 0.772293210029602, "learning_rate": 6.38793550670983e-07, "loss": 2.0807, "step": 26676 }, { "epoch": 0.89, "grad_norm": 0.7267881035804749, "learning_rate": 6.384198063963964e-07, "loss": 2.0138, "step": 26677 }, { "epoch": 0.89, "grad_norm": 0.7654637694358826, "learning_rate": 6.380461678834559e-07, "loss": 2.0191, "step": 26678 }, { "epoch": 0.89, "grad_norm": 0.7666645646095276, "learning_rate": 6.376726351363837e-07, "loss": 2.0433, "step": 26679 }, { "epoch": 0.89, "grad_norm": 0.759799599647522, "learning_rate": 6.372992081593999e-07, "loss": 2.0525, "step": 26680 }, { "epoch": 0.89, "grad_norm": 0.7738442420959473, "learning_rate": 6.369258869567207e-07, "loss": 2.1088, "step": 26681 }, { "epoch": 0.89, "grad_norm": 0.7601478695869446, "learning_rate": 6.365526715325665e-07, "loss": 2.0233, "step": 26682 }, { "epoch": 0.89, "grad_norm": 0.7458545565605164, "learning_rate": 6.361795618911526e-07, "loss": 2.0189, "step": 26683 }, { "epoch": 0.89, "grad_norm": 0.7551502585411072, "learning_rate": 6.358065580366957e-07, "loss": 2.1416, "step": 26684 }, { "epoch": 0.89, "grad_norm": 0.7482866644859314, "learning_rate": 6.354336599734057e-07, "loss": 2.0372, "step": 26685 }, { "epoch": 0.89, "grad_norm": 0.7730600237846375, "learning_rate": 6.350608677055003e-07, "loss": 2.0285, "step": 26686 }, { "epoch": 0.89, "grad_norm": 0.738500714302063, "learning_rate": 6.346881812371875e-07, "loss": 2.0684, "step": 26687 }, { "epoch": 0.89, "grad_norm": 0.7263556718826294, "learning_rate": 6.343156005726791e-07, "loss": 2.0719, "step": 26688 }, { "epoch": 0.89, "grad_norm": 0.7477583885192871, "learning_rate": 6.339431257161854e-07, "loss": 2.0632, "step": 26689 }, { "epoch": 0.89, "grad_norm": 0.7344197630882263, "learning_rate": 6.335707566719118e-07, "loss": 2.0045, "step": 26690 }, { "epoch": 0.89, "grad_norm": 0.7668885588645935, "learning_rate": 6.33198493444066e-07, "loss": 2.0132, "step": 26691 }, { "epoch": 0.89, "grad_norm": 0.7566169500350952, "learning_rate": 6.328263360368558e-07, "loss": 2.0693, "step": 26692 }, { "epoch": 0.89, "grad_norm": 0.7373117804527283, "learning_rate": 6.324542844544846e-07, "loss": 2.0407, "step": 26693 }, { "epoch": 0.89, "grad_norm": 0.7705291509628296, "learning_rate": 6.320823387011521e-07, "loss": 2.0836, "step": 26694 }, { "epoch": 0.89, "grad_norm": 0.7576350569725037, "learning_rate": 6.317104987810662e-07, "loss": 2.0866, "step": 26695 }, { "epoch": 0.89, "grad_norm": 0.7461427450180054, "learning_rate": 6.313387646984226e-07, "loss": 1.9305, "step": 26696 }, { "epoch": 0.89, "grad_norm": 0.7482441663742065, "learning_rate": 6.309671364574243e-07, "loss": 1.9479, "step": 26697 }, { "epoch": 0.89, "grad_norm": 0.7405073046684265, "learning_rate": 6.305956140622671e-07, "loss": 2.0782, "step": 26698 }, { "epoch": 0.89, "grad_norm": 0.7503206729888916, "learning_rate": 6.302241975171508e-07, "loss": 2.1534, "step": 26699 }, { "epoch": 0.89, "grad_norm": 0.7325937151908875, "learning_rate": 6.298528868262699e-07, "loss": 2.0566, "step": 26700 }, { "epoch": 0.89, "grad_norm": 0.7592355608940125, "learning_rate": 6.294816819938198e-07, "loss": 2.053, "step": 26701 }, { "epoch": 0.89, "grad_norm": 0.7623224258422852, "learning_rate": 6.291105830239952e-07, "loss": 2.0841, "step": 26702 }, { "epoch": 0.89, "grad_norm": 0.7596186995506287, "learning_rate": 6.287395899209847e-07, "loss": 2.1019, "step": 26703 }, { "epoch": 0.89, "grad_norm": 0.7117161750793457, "learning_rate": 6.283687026889829e-07, "loss": 1.9914, "step": 26704 }, { "epoch": 0.89, "grad_norm": 0.7472636699676514, "learning_rate": 6.279979213321807e-07, "loss": 2.0721, "step": 26705 }, { "epoch": 0.89, "grad_norm": 0.7442077994346619, "learning_rate": 6.27627245854765e-07, "loss": 2.0556, "step": 26706 }, { "epoch": 0.89, "grad_norm": 0.741631805896759, "learning_rate": 6.272566762609234e-07, "loss": 1.9991, "step": 26707 }, { "epoch": 0.89, "grad_norm": 0.7421914935112, "learning_rate": 6.268862125548436e-07, "loss": 2.045, "step": 26708 }, { "epoch": 0.89, "grad_norm": 0.7307175993919373, "learning_rate": 6.265158547407091e-07, "loss": 2.0202, "step": 26709 }, { "epoch": 0.89, "grad_norm": 0.7356524467468262, "learning_rate": 6.261456028227064e-07, "loss": 2.1394, "step": 26710 }, { "epoch": 0.89, "grad_norm": 0.7400479316711426, "learning_rate": 6.257754568050167e-07, "loss": 2.0922, "step": 26711 }, { "epoch": 0.89, "grad_norm": 0.8011283874511719, "learning_rate": 6.254054166918211e-07, "loss": 1.9984, "step": 26712 }, { "epoch": 0.89, "grad_norm": 0.7177273035049438, "learning_rate": 6.250354824873006e-07, "loss": 1.9862, "step": 26713 }, { "epoch": 0.89, "grad_norm": 0.7417380809783936, "learning_rate": 6.246656541956364e-07, "loss": 2.0417, "step": 26714 }, { "epoch": 0.89, "grad_norm": 0.7765234708786011, "learning_rate": 6.242959318210052e-07, "loss": 2.0012, "step": 26715 }, { "epoch": 0.89, "grad_norm": 0.7295082807540894, "learning_rate": 6.239263153675823e-07, "loss": 2.05, "step": 26716 }, { "epoch": 0.89, "grad_norm": 0.7536152601242065, "learning_rate": 6.235568048395468e-07, "loss": 1.9934, "step": 26717 }, { "epoch": 0.89, "grad_norm": 0.7230278253555298, "learning_rate": 6.231874002410699e-07, "loss": 2.0462, "step": 26718 }, { "epoch": 0.89, "grad_norm": 0.767373263835907, "learning_rate": 6.228181015763279e-07, "loss": 2.0425, "step": 26719 }, { "epoch": 0.89, "grad_norm": 0.7331621646881104, "learning_rate": 6.2244890884949e-07, "loss": 2.0679, "step": 26720 }, { "epoch": 0.89, "grad_norm": 0.7705336213111877, "learning_rate": 6.220798220647295e-07, "loss": 2.0436, "step": 26721 }, { "epoch": 0.89, "grad_norm": 0.7567675113677979, "learning_rate": 6.217108412262141e-07, "loss": 2.1064, "step": 26722 }, { "epoch": 0.89, "grad_norm": 0.7405493259429932, "learning_rate": 6.213419663381149e-07, "loss": 2.05, "step": 26723 }, { "epoch": 0.89, "grad_norm": 0.7321170568466187, "learning_rate": 6.209731974045985e-07, "loss": 2.1023, "step": 26724 }, { "epoch": 0.89, "grad_norm": 0.7282808423042297, "learning_rate": 6.206045344298273e-07, "loss": 1.9804, "step": 26725 }, { "epoch": 0.89, "grad_norm": 0.7823128700256348, "learning_rate": 6.202359774179701e-07, "loss": 2.0998, "step": 26726 }, { "epoch": 0.89, "grad_norm": 0.7819924354553223, "learning_rate": 6.198675263731912e-07, "loss": 2.1141, "step": 26727 }, { "epoch": 0.89, "grad_norm": 0.7460556626319885, "learning_rate": 6.194991812996509e-07, "loss": 2.0672, "step": 26728 }, { "epoch": 0.89, "grad_norm": 0.7619464993476868, "learning_rate": 6.191309422015101e-07, "loss": 2.1034, "step": 26729 }, { "epoch": 0.89, "grad_norm": 0.7551543116569519, "learning_rate": 6.187628090829322e-07, "loss": 2.1001, "step": 26730 }, { "epoch": 0.89, "grad_norm": 0.7451285123825073, "learning_rate": 6.183947819480729e-07, "loss": 2.0541, "step": 26731 }, { "epoch": 0.89, "grad_norm": 0.7397496104240417, "learning_rate": 6.18026860801092e-07, "loss": 2.0694, "step": 26732 }, { "epoch": 0.89, "grad_norm": 0.7273218631744385, "learning_rate": 6.176590456461451e-07, "loss": 2.0041, "step": 26733 }, { "epoch": 0.89, "grad_norm": 0.746175229549408, "learning_rate": 6.172913364873867e-07, "loss": 2.0579, "step": 26734 }, { "epoch": 0.89, "grad_norm": 0.7669066786766052, "learning_rate": 6.169237333289723e-07, "loss": 2.0756, "step": 26735 }, { "epoch": 0.89, "grad_norm": 0.7393285632133484, "learning_rate": 6.165562361750555e-07, "loss": 2.0759, "step": 26736 }, { "epoch": 0.89, "grad_norm": 0.739879846572876, "learning_rate": 6.161888450297871e-07, "loss": 2.0505, "step": 26737 }, { "epoch": 0.89, "grad_norm": 0.7532406449317932, "learning_rate": 6.158215598973161e-07, "loss": 2.0687, "step": 26738 }, { "epoch": 0.89, "grad_norm": 0.7550314664840698, "learning_rate": 6.154543807817936e-07, "loss": 2.0667, "step": 26739 }, { "epoch": 0.89, "grad_norm": 0.7731047868728638, "learning_rate": 6.150873076873698e-07, "loss": 2.075, "step": 26740 }, { "epoch": 0.89, "grad_norm": 0.7553466558456421, "learning_rate": 6.14720340618189e-07, "loss": 2.0338, "step": 26741 }, { "epoch": 0.89, "grad_norm": 0.7297695279121399, "learning_rate": 6.143534795783956e-07, "loss": 2.0763, "step": 26742 }, { "epoch": 0.89, "grad_norm": 0.7507240176200867, "learning_rate": 6.139867245721376e-07, "loss": 2.0242, "step": 26743 }, { "epoch": 0.89, "grad_norm": 0.7413674592971802, "learning_rate": 6.13620075603556e-07, "loss": 2.0146, "step": 26744 }, { "epoch": 0.89, "grad_norm": 0.7299187183380127, "learning_rate": 6.13253532676793e-07, "loss": 2.0222, "step": 26745 }, { "epoch": 0.89, "grad_norm": 0.7330113053321838, "learning_rate": 6.128870957959932e-07, "loss": 2.0123, "step": 26746 }, { "epoch": 0.89, "grad_norm": 0.7312183380126953, "learning_rate": 6.12520764965292e-07, "loss": 2.0255, "step": 26747 }, { "epoch": 0.89, "grad_norm": 0.7326365113258362, "learning_rate": 6.121545401888285e-07, "loss": 2.0036, "step": 26748 }, { "epoch": 0.89, "grad_norm": 0.7596232891082764, "learning_rate": 6.117884214707426e-07, "loss": 2.0618, "step": 26749 }, { "epoch": 0.89, "grad_norm": 0.7574792504310608, "learning_rate": 6.114224088151698e-07, "loss": 2.0285, "step": 26750 }, { "epoch": 0.89, "grad_norm": 0.7369068264961243, "learning_rate": 6.110565022262426e-07, "loss": 1.9883, "step": 26751 }, { "epoch": 0.89, "grad_norm": 0.7699119448661804, "learning_rate": 6.106907017080976e-07, "loss": 2.0829, "step": 26752 }, { "epoch": 0.89, "grad_norm": 0.7466102242469788, "learning_rate": 6.103250072648659e-07, "loss": 2.084, "step": 26753 }, { "epoch": 0.89, "grad_norm": 0.7960851788520813, "learning_rate": 6.099594189006796e-07, "loss": 2.1026, "step": 26754 }, { "epoch": 0.89, "grad_norm": 0.7456473112106323, "learning_rate": 6.095939366196679e-07, "loss": 2.0467, "step": 26755 }, { "epoch": 0.89, "grad_norm": 0.7280260920524597, "learning_rate": 6.092285604259618e-07, "loss": 2.0334, "step": 26756 }, { "epoch": 0.89, "grad_norm": 0.7388403415679932, "learning_rate": 6.088632903236869e-07, "loss": 1.949, "step": 26757 }, { "epoch": 0.89, "grad_norm": 0.7621145248413086, "learning_rate": 6.084981263169721e-07, "loss": 2.0272, "step": 26758 }, { "epoch": 0.89, "grad_norm": 0.7531647086143494, "learning_rate": 6.081330684099418e-07, "loss": 2.036, "step": 26759 }, { "epoch": 0.89, "grad_norm": 0.7246076464653015, "learning_rate": 6.077681166067173e-07, "loss": 1.9869, "step": 26760 }, { "epoch": 0.89, "grad_norm": 0.7324761748313904, "learning_rate": 6.074032709114252e-07, "loss": 2.0079, "step": 26761 }, { "epoch": 0.89, "grad_norm": 0.7732455730438232, "learning_rate": 6.070385313281879e-07, "loss": 2.0995, "step": 26762 }, { "epoch": 0.89, "grad_norm": 0.7558773756027222, "learning_rate": 6.066738978611242e-07, "loss": 2.0689, "step": 26763 }, { "epoch": 0.89, "grad_norm": 0.7268763184547424, "learning_rate": 6.063093705143519e-07, "loss": 2.0867, "step": 26764 }, { "epoch": 0.89, "grad_norm": 0.716711699962616, "learning_rate": 6.059449492919933e-07, "loss": 2.0266, "step": 26765 }, { "epoch": 0.89, "grad_norm": 0.7511341571807861, "learning_rate": 6.055806341981607e-07, "loss": 2.1055, "step": 26766 }, { "epoch": 0.89, "grad_norm": 0.7269211411476135, "learning_rate": 6.052164252369729e-07, "loss": 2.0423, "step": 26767 }, { "epoch": 0.89, "grad_norm": 0.7347042560577393, "learning_rate": 6.04852322412548e-07, "loss": 2.0295, "step": 26768 }, { "epoch": 0.89, "grad_norm": 0.7339388728141785, "learning_rate": 6.044883257289913e-07, "loss": 2.0629, "step": 26769 }, { "epoch": 0.89, "grad_norm": 0.7583845853805542, "learning_rate": 6.041244351904197e-07, "loss": 2.0855, "step": 26770 }, { "epoch": 0.89, "grad_norm": 0.7364854216575623, "learning_rate": 6.037606508009453e-07, "loss": 2.0583, "step": 26771 }, { "epoch": 0.89, "grad_norm": 0.7215999960899353, "learning_rate": 6.03396972564676e-07, "loss": 1.9929, "step": 26772 }, { "epoch": 0.89, "grad_norm": 0.7856223583221436, "learning_rate": 6.030334004857186e-07, "loss": 2.001, "step": 26773 }, { "epoch": 0.89, "grad_norm": 0.7823106050491333, "learning_rate": 6.026699345681852e-07, "loss": 2.0884, "step": 26774 }, { "epoch": 0.89, "grad_norm": 0.7538570761680603, "learning_rate": 6.023065748161782e-07, "loss": 2.0205, "step": 26775 }, { "epoch": 0.89, "grad_norm": 0.7534996271133423, "learning_rate": 6.019433212338033e-07, "loss": 2.0608, "step": 26776 }, { "epoch": 0.89, "grad_norm": 0.7311971187591553, "learning_rate": 6.015801738251659e-07, "loss": 2.0954, "step": 26777 }, { "epoch": 0.89, "grad_norm": 0.7502958178520203, "learning_rate": 6.012171325943683e-07, "loss": 2.0221, "step": 26778 }, { "epoch": 0.89, "grad_norm": 0.7447825074195862, "learning_rate": 6.008541975455106e-07, "loss": 2.0173, "step": 26779 }, { "epoch": 0.89, "grad_norm": 0.7507491111755371, "learning_rate": 6.004913686826941e-07, "loss": 2.0259, "step": 26780 }, { "epoch": 0.89, "grad_norm": 0.789646327495575, "learning_rate": 6.001286460100186e-07, "loss": 2.0069, "step": 26781 }, { "epoch": 0.89, "grad_norm": 0.7772741913795471, "learning_rate": 5.997660295315777e-07, "loss": 2.094, "step": 26782 }, { "epoch": 0.89, "grad_norm": 0.7480953931808472, "learning_rate": 5.994035192514724e-07, "loss": 2.0513, "step": 26783 }, { "epoch": 0.89, "grad_norm": 0.7661371827125549, "learning_rate": 5.990411151737985e-07, "loss": 1.9886, "step": 26784 }, { "epoch": 0.89, "grad_norm": 0.7284296154975891, "learning_rate": 5.98678817302647e-07, "loss": 2.0631, "step": 26785 }, { "epoch": 0.89, "grad_norm": 0.7280776500701904, "learning_rate": 5.983166256421125e-07, "loss": 2.0381, "step": 26786 }, { "epoch": 0.89, "grad_norm": 0.7455608248710632, "learning_rate": 5.979545401962883e-07, "loss": 2.0862, "step": 26787 }, { "epoch": 0.89, "grad_norm": 0.7274338006973267, "learning_rate": 5.975925609692612e-07, "loss": 1.9799, "step": 26788 }, { "epoch": 0.89, "grad_norm": 0.75301194190979, "learning_rate": 5.972306879651235e-07, "loss": 2.0726, "step": 26789 }, { "epoch": 0.89, "grad_norm": 0.7474877238273621, "learning_rate": 5.968689211879653e-07, "loss": 2.1057, "step": 26790 }, { "epoch": 0.89, "grad_norm": 0.7693120241165161, "learning_rate": 5.965072606418698e-07, "loss": 2.1169, "step": 26791 }, { "epoch": 0.89, "grad_norm": 0.7455722093582153, "learning_rate": 5.961457063309228e-07, "loss": 2.0601, "step": 26792 }, { "epoch": 0.89, "grad_norm": 0.7424631714820862, "learning_rate": 5.957842582592121e-07, "loss": 2.0142, "step": 26793 }, { "epoch": 0.89, "grad_norm": 0.7237871885299683, "learning_rate": 5.954229164308201e-07, "loss": 1.998, "step": 26794 }, { "epoch": 0.89, "grad_norm": 0.7454896569252014, "learning_rate": 5.950616808498266e-07, "loss": 2.1098, "step": 26795 }, { "epoch": 0.89, "grad_norm": 0.7543712854385376, "learning_rate": 5.947005515203153e-07, "loss": 2.0629, "step": 26796 }, { "epoch": 0.89, "grad_norm": 0.7333405017852783, "learning_rate": 5.943395284463649e-07, "loss": 2.0216, "step": 26797 }, { "epoch": 0.89, "grad_norm": 0.7327441573143005, "learning_rate": 5.939786116320534e-07, "loss": 2.0697, "step": 26798 }, { "epoch": 0.89, "grad_norm": 0.7487781643867493, "learning_rate": 5.936178010814597e-07, "loss": 2.0238, "step": 26799 }, { "epoch": 0.89, "grad_norm": 0.7438755631446838, "learning_rate": 5.932570967986606e-07, "loss": 2.0097, "step": 26800 }, { "epoch": 0.89, "grad_norm": 0.7622124552726746, "learning_rate": 5.928964987877283e-07, "loss": 2.0436, "step": 26801 }, { "epoch": 0.89, "grad_norm": 0.7725982666015625, "learning_rate": 5.925360070527398e-07, "loss": 2.0693, "step": 26802 }, { "epoch": 0.89, "grad_norm": 0.7298283576965332, "learning_rate": 5.921756215977659e-07, "loss": 2.0449, "step": 26803 }, { "epoch": 0.89, "grad_norm": 0.7347543239593506, "learning_rate": 5.918153424268769e-07, "loss": 2.0505, "step": 26804 }, { "epoch": 0.89, "grad_norm": 0.765644907951355, "learning_rate": 5.914551695441451e-07, "loss": 2.0396, "step": 26805 }, { "epoch": 0.89, "grad_norm": 0.7311999201774597, "learning_rate": 5.910951029536394e-07, "loss": 2.0973, "step": 26806 }, { "epoch": 0.89, "grad_norm": 0.7443760633468628, "learning_rate": 5.907351426594254e-07, "loss": 2.0495, "step": 26807 }, { "epoch": 0.89, "grad_norm": 0.7606602311134338, "learning_rate": 5.903752886655733e-07, "loss": 2.0661, "step": 26808 }, { "epoch": 0.89, "grad_norm": 0.7304977178573608, "learning_rate": 5.900155409761465e-07, "loss": 2.06, "step": 26809 }, { "epoch": 0.89, "grad_norm": 0.7611575722694397, "learning_rate": 5.896558995952084e-07, "loss": 1.9904, "step": 26810 }, { "epoch": 0.89, "grad_norm": 0.7486984133720398, "learning_rate": 5.892963645268224e-07, "loss": 2.0651, "step": 26811 }, { "epoch": 0.89, "grad_norm": 0.7296981811523438, "learning_rate": 5.88936935775054e-07, "loss": 2.0673, "step": 26812 }, { "epoch": 0.89, "grad_norm": 0.7515977621078491, "learning_rate": 5.88577613343958e-07, "loss": 2.052, "step": 26813 }, { "epoch": 0.89, "grad_norm": 0.7565595507621765, "learning_rate": 5.882183972375955e-07, "loss": 2.0497, "step": 26814 }, { "epoch": 0.89, "grad_norm": 0.7510352730751038, "learning_rate": 5.878592874600275e-07, "loss": 2.056, "step": 26815 }, { "epoch": 0.89, "grad_norm": 0.7326372861862183, "learning_rate": 5.875002840153099e-07, "loss": 2.0916, "step": 26816 }, { "epoch": 0.89, "grad_norm": 0.7442916035652161, "learning_rate": 5.87141386907496e-07, "loss": 2.0523, "step": 26817 }, { "epoch": 0.89, "grad_norm": 0.7336155772209167, "learning_rate": 5.867825961406437e-07, "loss": 2.0282, "step": 26818 }, { "epoch": 0.89, "grad_norm": 0.759099006652832, "learning_rate": 5.864239117188031e-07, "loss": 2.0063, "step": 26819 }, { "epoch": 0.89, "grad_norm": 0.7481719851493835, "learning_rate": 5.860653336460287e-07, "loss": 2.1326, "step": 26820 }, { "epoch": 0.89, "grad_norm": 0.7478468418121338, "learning_rate": 5.857068619263728e-07, "loss": 2.033, "step": 26821 }, { "epoch": 0.89, "grad_norm": 0.7536263465881348, "learning_rate": 5.853484965638834e-07, "loss": 2.1152, "step": 26822 }, { "epoch": 0.89, "grad_norm": 0.7659785747528076, "learning_rate": 5.849902375626071e-07, "loss": 2.0592, "step": 26823 }, { "epoch": 0.89, "grad_norm": 0.7538750767707825, "learning_rate": 5.846320849265952e-07, "loss": 2.053, "step": 26824 }, { "epoch": 0.89, "grad_norm": 0.7700193524360657, "learning_rate": 5.842740386598921e-07, "loss": 2.0955, "step": 26825 }, { "epoch": 0.89, "grad_norm": 0.7565541863441467, "learning_rate": 5.839160987665404e-07, "loss": 1.9857, "step": 26826 }, { "epoch": 0.89, "grad_norm": 0.7457347512245178, "learning_rate": 5.835582652505877e-07, "loss": 1.9862, "step": 26827 }, { "epoch": 0.89, "grad_norm": 0.7410709261894226, "learning_rate": 5.832005381160755e-07, "loss": 2.0062, "step": 26828 }, { "epoch": 0.89, "grad_norm": 0.7709906697273254, "learning_rate": 5.828429173670436e-07, "loss": 2.0139, "step": 26829 }, { "epoch": 0.89, "grad_norm": 0.7411143779754639, "learning_rate": 5.824854030075355e-07, "loss": 1.9856, "step": 26830 }, { "epoch": 0.89, "grad_norm": 0.7281932830810547, "learning_rate": 5.821279950415882e-07, "loss": 2.0093, "step": 26831 }, { "epoch": 0.89, "grad_norm": 0.7799766659736633, "learning_rate": 5.81770693473237e-07, "loss": 2.0806, "step": 26832 }, { "epoch": 0.89, "grad_norm": 0.7608921527862549, "learning_rate": 5.814134983065212e-07, "loss": 2.0332, "step": 26833 }, { "epoch": 0.89, "grad_norm": 0.7611744403839111, "learning_rate": 5.810564095454785e-07, "loss": 2.0163, "step": 26834 }, { "epoch": 0.89, "grad_norm": 0.7796937823295593, "learning_rate": 5.80699427194138e-07, "loss": 1.976, "step": 26835 }, { "epoch": 0.89, "grad_norm": 0.744566798210144, "learning_rate": 5.803425512565353e-07, "loss": 2.0554, "step": 26836 }, { "epoch": 0.89, "grad_norm": 0.7287493348121643, "learning_rate": 5.799857817367027e-07, "loss": 2.0621, "step": 26837 }, { "epoch": 0.89, "grad_norm": 0.7415565848350525, "learning_rate": 5.796291186386693e-07, "loss": 2.0882, "step": 26838 }, { "epoch": 0.89, "grad_norm": 0.7295355200767517, "learning_rate": 5.792725619664663e-07, "loss": 2.0272, "step": 26839 }, { "epoch": 0.89, "grad_norm": 0.7915575504302979, "learning_rate": 5.789161117241193e-07, "loss": 2.1246, "step": 26840 }, { "epoch": 0.89, "grad_norm": 0.745983898639679, "learning_rate": 5.785597679156585e-07, "loss": 2.0926, "step": 26841 }, { "epoch": 0.89, "grad_norm": 0.768291711807251, "learning_rate": 5.782035305451072e-07, "loss": 2.0751, "step": 26842 }, { "epoch": 0.89, "grad_norm": 0.7417081594467163, "learning_rate": 5.778473996164913e-07, "loss": 2.0644, "step": 26843 }, { "epoch": 0.89, "grad_norm": 0.738188624382019, "learning_rate": 5.774913751338329e-07, "loss": 2.0945, "step": 26844 }, { "epoch": 0.89, "grad_norm": 0.7360116839408875, "learning_rate": 5.771354571011545e-07, "loss": 1.9655, "step": 26845 }, { "epoch": 0.89, "grad_norm": 0.7933596968650818, "learning_rate": 5.767796455224772e-07, "loss": 2.1241, "step": 26846 }, { "epoch": 0.89, "grad_norm": 0.7706090807914734, "learning_rate": 5.764239404018235e-07, "loss": 2.0484, "step": 26847 }, { "epoch": 0.89, "grad_norm": 0.7358858585357666, "learning_rate": 5.760683417432067e-07, "loss": 1.9973, "step": 26848 }, { "epoch": 0.89, "grad_norm": 0.7996397018432617, "learning_rate": 5.757128495506459e-07, "loss": 2.0207, "step": 26849 }, { "epoch": 0.89, "grad_norm": 0.7407367825508118, "learning_rate": 5.753574638281612e-07, "loss": 2.1165, "step": 26850 }, { "epoch": 0.89, "grad_norm": 0.7487921714782715, "learning_rate": 5.750021845797615e-07, "loss": 2.0401, "step": 26851 }, { "epoch": 0.89, "grad_norm": 0.706542432308197, "learning_rate": 5.74647011809466e-07, "loss": 1.9813, "step": 26852 }, { "epoch": 0.89, "grad_norm": 0.7501137852668762, "learning_rate": 5.742919455212848e-07, "loss": 2.0561, "step": 26853 }, { "epoch": 0.89, "grad_norm": 0.7457038164138794, "learning_rate": 5.739369857192267e-07, "loss": 2.0392, "step": 26854 }, { "epoch": 0.89, "grad_norm": 0.7404676079750061, "learning_rate": 5.735821324073054e-07, "loss": 1.9017, "step": 26855 }, { "epoch": 0.89, "grad_norm": 0.7670263648033142, "learning_rate": 5.732273855895298e-07, "loss": 2.0759, "step": 26856 }, { "epoch": 0.89, "grad_norm": 0.7539442181587219, "learning_rate": 5.728727452699068e-07, "loss": 1.9943, "step": 26857 }, { "epoch": 0.89, "grad_norm": 0.7237810492515564, "learning_rate": 5.725182114524408e-07, "loss": 1.9949, "step": 26858 }, { "epoch": 0.89, "grad_norm": 0.737122118473053, "learning_rate": 5.721637841411409e-07, "loss": 1.958, "step": 26859 }, { "epoch": 0.89, "grad_norm": 0.7268405556678772, "learning_rate": 5.718094633400073e-07, "loss": 2.0696, "step": 26860 }, { "epoch": 0.89, "grad_norm": 0.7415480017662048, "learning_rate": 5.714552490530467e-07, "loss": 2.0524, "step": 26861 }, { "epoch": 0.89, "grad_norm": 0.7366801500320435, "learning_rate": 5.711011412842571e-07, "loss": 2.0466, "step": 26862 }, { "epoch": 0.89, "grad_norm": 0.7471643090248108, "learning_rate": 5.70747140037643e-07, "loss": 2.0691, "step": 26863 }, { "epoch": 0.89, "grad_norm": 0.7320636510848999, "learning_rate": 5.703932453172001e-07, "loss": 2.084, "step": 26864 }, { "epoch": 0.89, "grad_norm": 0.728541374206543, "learning_rate": 5.700394571269296e-07, "loss": 2.0271, "step": 26865 }, { "epoch": 0.89, "grad_norm": 0.734802782535553, "learning_rate": 5.696857754708262e-07, "loss": 2.0262, "step": 26866 }, { "epoch": 0.89, "grad_norm": 0.769321620464325, "learning_rate": 5.693322003528856e-07, "loss": 2.0648, "step": 26867 }, { "epoch": 0.89, "grad_norm": 0.7468048930168152, "learning_rate": 5.689787317771022e-07, "loss": 2.0482, "step": 26868 }, { "epoch": 0.89, "grad_norm": 0.747482180595398, "learning_rate": 5.686253697474719e-07, "loss": 2.0558, "step": 26869 }, { "epoch": 0.89, "grad_norm": 0.7281048893928528, "learning_rate": 5.682721142679836e-07, "loss": 2.0481, "step": 26870 }, { "epoch": 0.89, "grad_norm": 0.7226170301437378, "learning_rate": 5.679189653426298e-07, "loss": 2.0714, "step": 26871 }, { "epoch": 0.89, "grad_norm": 0.7490057945251465, "learning_rate": 5.675659229754005e-07, "loss": 2.0726, "step": 26872 }, { "epoch": 0.89, "grad_norm": 0.7540087699890137, "learning_rate": 5.672129871702814e-07, "loss": 2.0771, "step": 26873 }, { "epoch": 0.89, "grad_norm": 0.7498872876167297, "learning_rate": 5.668601579312638e-07, "loss": 2.0655, "step": 26874 }, { "epoch": 0.89, "grad_norm": 0.7233832478523254, "learning_rate": 5.665074352623323e-07, "loss": 2.0508, "step": 26875 }, { "epoch": 0.89, "grad_norm": 0.7480850219726562, "learning_rate": 5.661548191674681e-07, "loss": 2.0366, "step": 26876 }, { "epoch": 0.89, "grad_norm": 0.7863582968711853, "learning_rate": 5.658023096506593e-07, "loss": 1.9977, "step": 26877 }, { "epoch": 0.89, "grad_norm": 0.7653627395629883, "learning_rate": 5.654499067158881e-07, "loss": 2.0342, "step": 26878 }, { "epoch": 0.89, "grad_norm": 0.7334886193275452, "learning_rate": 5.650976103671357e-07, "loss": 1.9953, "step": 26879 }, { "epoch": 0.89, "grad_norm": 0.7379988431930542, "learning_rate": 5.64745420608378e-07, "loss": 2.0902, "step": 26880 }, { "epoch": 0.89, "grad_norm": 0.7469719052314758, "learning_rate": 5.643933374435994e-07, "loss": 2.0244, "step": 26881 }, { "epoch": 0.89, "grad_norm": 0.7325426340103149, "learning_rate": 5.640413608767737e-07, "loss": 2.051, "step": 26882 }, { "epoch": 0.89, "grad_norm": 0.7712374329566956, "learning_rate": 5.636894909118796e-07, "loss": 2.0795, "step": 26883 }, { "epoch": 0.89, "grad_norm": 0.7337093353271484, "learning_rate": 5.633377275528906e-07, "loss": 2.0513, "step": 26884 }, { "epoch": 0.89, "grad_norm": 0.7074521780014038, "learning_rate": 5.629860708037826e-07, "loss": 1.9491, "step": 26885 }, { "epoch": 0.89, "grad_norm": 0.7709642052650452, "learning_rate": 5.626345206685257e-07, "loss": 2.0757, "step": 26886 }, { "epoch": 0.89, "grad_norm": 0.7714084386825562, "learning_rate": 5.622830771510945e-07, "loss": 2.0662, "step": 26887 }, { "epoch": 0.89, "grad_norm": 0.7319288849830627, "learning_rate": 5.619317402554581e-07, "loss": 2.0736, "step": 26888 }, { "epoch": 0.89, "grad_norm": 0.7358079552650452, "learning_rate": 5.615805099855842e-07, "loss": 2.1122, "step": 26889 }, { "epoch": 0.89, "grad_norm": 0.7885095477104187, "learning_rate": 5.612293863454432e-07, "loss": 2.0366, "step": 26890 }, { "epoch": 0.89, "grad_norm": 0.7655701041221619, "learning_rate": 5.608783693390008e-07, "loss": 2.0259, "step": 26891 }, { "epoch": 0.89, "grad_norm": 0.7609039545059204, "learning_rate": 5.605274589702237e-07, "loss": 2.1184, "step": 26892 }, { "epoch": 0.89, "grad_norm": 0.7649220824241638, "learning_rate": 5.601766552430743e-07, "loss": 2.1437, "step": 26893 }, { "epoch": 0.89, "grad_norm": 0.7477615475654602, "learning_rate": 5.598259581615173e-07, "loss": 2.0349, "step": 26894 }, { "epoch": 0.89, "grad_norm": 0.7641354203224182, "learning_rate": 5.594753677295128e-07, "loss": 2.0766, "step": 26895 }, { "epoch": 0.89, "grad_norm": 0.7420513033866882, "learning_rate": 5.591248839510244e-07, "loss": 2.075, "step": 26896 }, { "epoch": 0.89, "grad_norm": 0.7668627500534058, "learning_rate": 5.587745068300099e-07, "loss": 2.1119, "step": 26897 }, { "epoch": 0.89, "grad_norm": 0.7368153929710388, "learning_rate": 5.584242363704273e-07, "loss": 2.0023, "step": 26898 }, { "epoch": 0.89, "grad_norm": 0.762692391872406, "learning_rate": 5.580740725762335e-07, "loss": 2.0503, "step": 26899 }, { "epoch": 0.89, "grad_norm": 0.7256408333778381, "learning_rate": 5.577240154513874e-07, "loss": 1.969, "step": 26900 }, { "epoch": 0.89, "grad_norm": 0.7607681751251221, "learning_rate": 5.573740649998416e-07, "loss": 2.0373, "step": 26901 }, { "epoch": 0.9, "grad_norm": 0.7171739339828491, "learning_rate": 5.570242212255484e-07, "loss": 2.054, "step": 26902 }, { "epoch": 0.9, "grad_norm": 0.7665387392044067, "learning_rate": 5.566744841324623e-07, "loss": 2.0434, "step": 26903 }, { "epoch": 0.9, "grad_norm": 0.745210587978363, "learning_rate": 5.563248537245325e-07, "loss": 2.0277, "step": 26904 }, { "epoch": 0.9, "grad_norm": 0.7858266830444336, "learning_rate": 5.559753300057114e-07, "loss": 2.0415, "step": 26905 }, { "epoch": 0.9, "grad_norm": 0.7739076614379883, "learning_rate": 5.556259129799446e-07, "loss": 2.0099, "step": 26906 }, { "epoch": 0.9, "grad_norm": 0.7702251672744751, "learning_rate": 5.552766026511825e-07, "loss": 2.1132, "step": 26907 }, { "epoch": 0.9, "grad_norm": 0.7716066837310791, "learning_rate": 5.549273990233695e-07, "loss": 2.041, "step": 26908 }, { "epoch": 0.9, "grad_norm": 0.7275430560112, "learning_rate": 5.545783021004525e-07, "loss": 2.0059, "step": 26909 }, { "epoch": 0.9, "grad_norm": 0.7659602761268616, "learning_rate": 5.54229311886374e-07, "loss": 2.125, "step": 26910 }, { "epoch": 0.9, "grad_norm": 0.7582475543022156, "learning_rate": 5.538804283850763e-07, "loss": 2.0475, "step": 26911 }, { "epoch": 0.9, "grad_norm": 0.7267307639122009, "learning_rate": 5.535316516005007e-07, "loss": 2.0096, "step": 26912 }, { "epoch": 0.9, "grad_norm": 0.7591155171394348, "learning_rate": 5.531829815365897e-07, "loss": 1.9729, "step": 26913 }, { "epoch": 0.9, "grad_norm": 0.7996005415916443, "learning_rate": 5.528344181972811e-07, "loss": 2.0383, "step": 26914 }, { "epoch": 0.9, "grad_norm": 0.7296708822250366, "learning_rate": 5.524859615865119e-07, "loss": 2.0565, "step": 26915 }, { "epoch": 0.9, "grad_norm": 0.7162570357322693, "learning_rate": 5.5213761170822e-07, "loss": 2.0358, "step": 26916 }, { "epoch": 0.9, "grad_norm": 0.75110924243927, "learning_rate": 5.5178936856634e-07, "loss": 2.0785, "step": 26917 }, { "epoch": 0.9, "grad_norm": 0.7339128851890564, "learning_rate": 5.514412321648077e-07, "loss": 2.039, "step": 26918 }, { "epoch": 0.9, "grad_norm": 0.7672488689422607, "learning_rate": 5.510932025075543e-07, "loss": 2.0993, "step": 26919 }, { "epoch": 0.9, "grad_norm": 0.7108776569366455, "learning_rate": 5.507452795985114e-07, "loss": 1.9927, "step": 26920 }, { "epoch": 0.9, "grad_norm": 0.7517478466033936, "learning_rate": 5.503974634416098e-07, "loss": 1.9737, "step": 26921 }, { "epoch": 0.9, "grad_norm": 0.7337257862091064, "learning_rate": 5.500497540407823e-07, "loss": 2.108, "step": 26922 }, { "epoch": 0.9, "grad_norm": 0.7433484792709351, "learning_rate": 5.497021513999535e-07, "loss": 1.9872, "step": 26923 }, { "epoch": 0.9, "grad_norm": 0.7508088946342468, "learning_rate": 5.4935465552305e-07, "loss": 2.0254, "step": 26924 }, { "epoch": 0.9, "grad_norm": 0.7388478517532349, "learning_rate": 5.490072664140012e-07, "loss": 2.0837, "step": 26925 }, { "epoch": 0.9, "grad_norm": 0.7516533732414246, "learning_rate": 5.48659984076727e-07, "loss": 2.1046, "step": 26926 }, { "epoch": 0.9, "grad_norm": 0.7340182662010193, "learning_rate": 5.483128085151557e-07, "loss": 1.9676, "step": 26927 }, { "epoch": 0.9, "grad_norm": 0.7733637690544128, "learning_rate": 5.47965739733205e-07, "loss": 2.0736, "step": 26928 }, { "epoch": 0.9, "grad_norm": 0.7810878753662109, "learning_rate": 5.476187777347997e-07, "loss": 2.0118, "step": 26929 }, { "epoch": 0.9, "grad_norm": 0.7485711574554443, "learning_rate": 5.472719225238554e-07, "loss": 1.946, "step": 26930 }, { "epoch": 0.9, "grad_norm": 0.7349092960357666, "learning_rate": 5.469251741042958e-07, "loss": 2.0287, "step": 26931 }, { "epoch": 0.9, "grad_norm": 0.7228965759277344, "learning_rate": 5.465785324800354e-07, "loss": 2.0236, "step": 26932 }, { "epoch": 0.9, "grad_norm": 0.7300693988800049, "learning_rate": 5.462319976549879e-07, "loss": 2.0116, "step": 26933 }, { "epoch": 0.9, "grad_norm": 0.7541865706443787, "learning_rate": 5.458855696330723e-07, "loss": 2.0014, "step": 26934 }, { "epoch": 0.9, "grad_norm": 0.7615839242935181, "learning_rate": 5.455392484182009e-07, "loss": 2.063, "step": 26935 }, { "epoch": 0.9, "grad_norm": 0.7451556921005249, "learning_rate": 5.451930340142875e-07, "loss": 2.0744, "step": 26936 }, { "epoch": 0.9, "grad_norm": 0.7672232389450073, "learning_rate": 5.448469264252398e-07, "loss": 2.0018, "step": 26937 }, { "epoch": 0.9, "grad_norm": 0.7256489992141724, "learning_rate": 5.445009256549727e-07, "loss": 1.9996, "step": 26938 }, { "epoch": 0.9, "grad_norm": 0.732761800289154, "learning_rate": 5.441550317073896e-07, "loss": 2.0611, "step": 26939 }, { "epoch": 0.9, "grad_norm": 0.742633581161499, "learning_rate": 5.43809244586404e-07, "loss": 2.0334, "step": 26940 }, { "epoch": 0.9, "grad_norm": 0.7921823859214783, "learning_rate": 5.434635642959196e-07, "loss": 2.0549, "step": 26941 }, { "epoch": 0.9, "grad_norm": 0.7654536962509155, "learning_rate": 5.431179908398398e-07, "loss": 2.0826, "step": 26942 }, { "epoch": 0.9, "grad_norm": 0.7512344717979431, "learning_rate": 5.427725242220716e-07, "loss": 1.9456, "step": 26943 }, { "epoch": 0.9, "grad_norm": 0.756268322467804, "learning_rate": 5.424271644465173e-07, "loss": 2.0259, "step": 26944 }, { "epoch": 0.9, "grad_norm": 0.7653051614761353, "learning_rate": 5.420819115170783e-07, "loss": 2.0696, "step": 26945 }, { "epoch": 0.9, "grad_norm": 0.7791337966918945, "learning_rate": 5.417367654376548e-07, "loss": 2.0326, "step": 26946 }, { "epoch": 0.9, "grad_norm": 0.7451674938201904, "learning_rate": 5.413917262121449e-07, "loss": 1.996, "step": 26947 }, { "epoch": 0.9, "grad_norm": 0.7252696752548218, "learning_rate": 5.410467938444497e-07, "loss": 1.9873, "step": 26948 }, { "epoch": 0.9, "grad_norm": 0.7509819269180298, "learning_rate": 5.407019683384651e-07, "loss": 2.0311, "step": 26949 }, { "epoch": 0.9, "grad_norm": 0.7448418140411377, "learning_rate": 5.403572496980836e-07, "loss": 2.0671, "step": 26950 }, { "epoch": 0.9, "grad_norm": 0.79044508934021, "learning_rate": 5.400126379272042e-07, "loss": 2.025, "step": 26951 }, { "epoch": 0.9, "grad_norm": 0.7812781929969788, "learning_rate": 5.396681330297159e-07, "loss": 2.0099, "step": 26952 }, { "epoch": 0.9, "grad_norm": 0.7292900085449219, "learning_rate": 5.393237350095126e-07, "loss": 2.0463, "step": 26953 }, { "epoch": 0.9, "grad_norm": 0.7490742802619934, "learning_rate": 5.389794438704887e-07, "loss": 2.0502, "step": 26954 }, { "epoch": 0.9, "grad_norm": 0.7482472062110901, "learning_rate": 5.386352596165267e-07, "loss": 2.0796, "step": 26955 }, { "epoch": 0.9, "grad_norm": 0.7403907179832458, "learning_rate": 5.382911822515191e-07, "loss": 2.0875, "step": 26956 }, { "epoch": 0.9, "grad_norm": 0.7163010835647583, "learning_rate": 5.379472117793549e-07, "loss": 2.0715, "step": 26957 }, { "epoch": 0.9, "grad_norm": 0.7788358926773071, "learning_rate": 5.376033482039166e-07, "loss": 2.0491, "step": 26958 }, { "epoch": 0.9, "grad_norm": 0.7473604679107666, "learning_rate": 5.37259591529089e-07, "loss": 1.9643, "step": 26959 }, { "epoch": 0.9, "grad_norm": 0.7432107925415039, "learning_rate": 5.369159417587588e-07, "loss": 2.0535, "step": 26960 }, { "epoch": 0.9, "grad_norm": 0.7301928997039795, "learning_rate": 5.365723988968041e-07, "loss": 2.0612, "step": 26961 }, { "epoch": 0.9, "grad_norm": 0.7319395542144775, "learning_rate": 5.362289629471095e-07, "loss": 2.0067, "step": 26962 }, { "epoch": 0.9, "grad_norm": 0.7301526665687561, "learning_rate": 5.358856339135532e-07, "loss": 2.003, "step": 26963 }, { "epoch": 0.9, "grad_norm": 0.7434555888175964, "learning_rate": 5.355424118000163e-07, "loss": 2.031, "step": 26964 }, { "epoch": 0.9, "grad_norm": 0.757209300994873, "learning_rate": 5.351992966103725e-07, "loss": 2.1425, "step": 26965 }, { "epoch": 0.9, "grad_norm": 0.7407654523849487, "learning_rate": 5.348562883485009e-07, "loss": 2.0562, "step": 26966 }, { "epoch": 0.9, "grad_norm": 0.7699578404426575, "learning_rate": 5.345133870182773e-07, "loss": 2.0984, "step": 26967 }, { "epoch": 0.9, "grad_norm": 0.734982430934906, "learning_rate": 5.341705926235718e-07, "loss": 2.0833, "step": 26968 }, { "epoch": 0.9, "grad_norm": 0.7863766551017761, "learning_rate": 5.338279051682594e-07, "loss": 2.0752, "step": 26969 }, { "epoch": 0.9, "grad_norm": 0.7463781237602234, "learning_rate": 5.334853246562132e-07, "loss": 2.0632, "step": 26970 }, { "epoch": 0.9, "grad_norm": 0.7242060899734497, "learning_rate": 5.331428510913017e-07, "loss": 1.9176, "step": 26971 }, { "epoch": 0.9, "grad_norm": 0.7766664028167725, "learning_rate": 5.328004844773926e-07, "loss": 2.0673, "step": 26972 }, { "epoch": 0.9, "grad_norm": 0.7228651642799377, "learning_rate": 5.324582248183574e-07, "loss": 2.0354, "step": 26973 }, { "epoch": 0.9, "grad_norm": 0.7047490477561951, "learning_rate": 5.321160721180585e-07, "loss": 1.9829, "step": 26974 }, { "epoch": 0.9, "grad_norm": 0.7621657252311707, "learning_rate": 5.317740263803639e-07, "loss": 2.0329, "step": 26975 }, { "epoch": 0.9, "grad_norm": 0.7462449073791504, "learning_rate": 5.314320876091406e-07, "loss": 2.0401, "step": 26976 }, { "epoch": 0.9, "grad_norm": 0.7307231426239014, "learning_rate": 5.310902558082442e-07, "loss": 1.973, "step": 26977 }, { "epoch": 0.9, "grad_norm": 0.7253983020782471, "learning_rate": 5.307485309815419e-07, "loss": 2.0925, "step": 26978 }, { "epoch": 0.9, "grad_norm": 0.7262557744979858, "learning_rate": 5.304069131328948e-07, "loss": 2.0064, "step": 26979 }, { "epoch": 0.9, "grad_norm": 0.7326792478561401, "learning_rate": 5.3006540226616e-07, "loss": 1.997, "step": 26980 }, { "epoch": 0.9, "grad_norm": 0.7362827658653259, "learning_rate": 5.297239983851954e-07, "loss": 2.0084, "step": 26981 }, { "epoch": 0.9, "grad_norm": 0.7429168224334717, "learning_rate": 5.293827014938601e-07, "loss": 2.0587, "step": 26982 }, { "epoch": 0.9, "grad_norm": 0.7198291420936584, "learning_rate": 5.290415115960079e-07, "loss": 1.9758, "step": 26983 }, { "epoch": 0.9, "grad_norm": 0.7450733780860901, "learning_rate": 5.287004286954933e-07, "loss": 2.037, "step": 26984 }, { "epoch": 0.9, "grad_norm": 0.7643210291862488, "learning_rate": 5.283594527961722e-07, "loss": 1.9919, "step": 26985 }, { "epoch": 0.9, "grad_norm": 0.7423257827758789, "learning_rate": 5.280185839018948e-07, "loss": 2.0027, "step": 26986 }, { "epoch": 0.9, "grad_norm": 0.7475537061691284, "learning_rate": 5.276778220165113e-07, "loss": 1.9544, "step": 26987 }, { "epoch": 0.9, "grad_norm": 0.742992103099823, "learning_rate": 5.273371671438743e-07, "loss": 2.0181, "step": 26988 }, { "epoch": 0.9, "grad_norm": 0.7447389364242554, "learning_rate": 5.269966192878295e-07, "loss": 2.0092, "step": 26989 }, { "epoch": 0.9, "grad_norm": 0.7271109819412231, "learning_rate": 5.266561784522251e-07, "loss": 2.0369, "step": 26990 }, { "epoch": 0.9, "grad_norm": 0.7332441210746765, "learning_rate": 5.263158446409056e-07, "loss": 2.0461, "step": 26991 }, { "epoch": 0.9, "grad_norm": 0.7433024644851685, "learning_rate": 5.259756178577203e-07, "loss": 1.9958, "step": 26992 }, { "epoch": 0.9, "grad_norm": 0.730384349822998, "learning_rate": 5.256354981065092e-07, "loss": 2.0336, "step": 26993 }, { "epoch": 0.9, "grad_norm": 0.7674983143806458, "learning_rate": 5.252954853911151e-07, "loss": 2.0502, "step": 26994 }, { "epoch": 0.9, "grad_norm": 0.743308424949646, "learning_rate": 5.249555797153805e-07, "loss": 1.9863, "step": 26995 }, { "epoch": 0.9, "grad_norm": 0.7620336413383484, "learning_rate": 5.246157810831442e-07, "loss": 2.0189, "step": 26996 }, { "epoch": 0.9, "grad_norm": 0.7430989146232605, "learning_rate": 5.242760894982457e-07, "loss": 2.0512, "step": 26997 }, { "epoch": 0.9, "grad_norm": 0.7522795796394348, "learning_rate": 5.23936504964525e-07, "loss": 2.0942, "step": 26998 }, { "epoch": 0.9, "grad_norm": 0.7349939346313477, "learning_rate": 5.235970274858138e-07, "loss": 2.0659, "step": 26999 }, { "epoch": 0.9, "grad_norm": 0.7293635606765747, "learning_rate": 5.232576570659487e-07, "loss": 2.0173, "step": 27000 }, { "epoch": 0.9, "grad_norm": 0.7363609671592712, "learning_rate": 5.229183937087667e-07, "loss": 2.0796, "step": 27001 }, { "epoch": 0.9, "grad_norm": 0.7646094560623169, "learning_rate": 5.225792374180983e-07, "loss": 2.0562, "step": 27002 }, { "epoch": 0.9, "grad_norm": 0.7520452737808228, "learning_rate": 5.222401881977745e-07, "loss": 2.053, "step": 27003 }, { "epoch": 0.9, "grad_norm": 0.7562854886054993, "learning_rate": 5.219012460516271e-07, "loss": 2.0971, "step": 27004 }, { "epoch": 0.9, "grad_norm": 0.723134458065033, "learning_rate": 5.215624109834838e-07, "loss": 2.0249, "step": 27005 }, { "epoch": 0.9, "grad_norm": 0.7467735409736633, "learning_rate": 5.21223682997174e-07, "loss": 2.0645, "step": 27006 }, { "epoch": 0.9, "grad_norm": 0.7454159259796143, "learning_rate": 5.208850620965245e-07, "loss": 2.0222, "step": 27007 }, { "epoch": 0.9, "grad_norm": 0.7370786070823669, "learning_rate": 5.205465482853611e-07, "loss": 2.0671, "step": 27008 }, { "epoch": 0.9, "grad_norm": 0.7202531099319458, "learning_rate": 5.202081415675053e-07, "loss": 2.0037, "step": 27009 }, { "epoch": 0.9, "grad_norm": 0.7588405013084412, "learning_rate": 5.198698419467841e-07, "loss": 2.0351, "step": 27010 }, { "epoch": 0.9, "grad_norm": 0.7160064578056335, "learning_rate": 5.195316494270164e-07, "loss": 2.02, "step": 27011 }, { "epoch": 0.9, "grad_norm": 0.7371523380279541, "learning_rate": 5.191935640120238e-07, "loss": 2.0399, "step": 27012 }, { "epoch": 0.9, "grad_norm": 0.7569615244865417, "learning_rate": 5.188555857056254e-07, "loss": 2.0699, "step": 27013 }, { "epoch": 0.9, "grad_norm": 0.7328880429267883, "learning_rate": 5.185177145116415e-07, "loss": 2.0842, "step": 27014 }, { "epoch": 0.9, "grad_norm": 0.7620487213134766, "learning_rate": 5.181799504338869e-07, "loss": 2.0388, "step": 27015 }, { "epoch": 0.9, "grad_norm": 0.7487577795982361, "learning_rate": 5.178422934761795e-07, "loss": 1.9506, "step": 27016 }, { "epoch": 0.9, "grad_norm": 0.7214451432228088, "learning_rate": 5.175047436423319e-07, "loss": 2.1044, "step": 27017 }, { "epoch": 0.9, "grad_norm": 0.7533203959465027, "learning_rate": 5.171673009361589e-07, "loss": 2.0185, "step": 27018 }, { "epoch": 0.9, "grad_norm": 0.7581403851509094, "learning_rate": 5.168299653614706e-07, "loss": 2.0909, "step": 27019 }, { "epoch": 0.9, "grad_norm": 0.7517644762992859, "learning_rate": 5.164927369220829e-07, "loss": 2.0755, "step": 27020 }, { "epoch": 0.9, "grad_norm": 0.7404831647872925, "learning_rate": 5.161556156217995e-07, "loss": 2.0907, "step": 27021 }, { "epoch": 0.9, "grad_norm": 0.7361766695976257, "learning_rate": 5.158186014644318e-07, "loss": 2.0411, "step": 27022 }, { "epoch": 0.9, "grad_norm": 0.7281767129898071, "learning_rate": 5.154816944537889e-07, "loss": 2.0443, "step": 27023 }, { "epoch": 0.9, "grad_norm": 0.7476963400840759, "learning_rate": 5.151448945936754e-07, "loss": 2.0166, "step": 27024 }, { "epoch": 0.9, "grad_norm": 0.7690434455871582, "learning_rate": 5.148082018878952e-07, "loss": 2.0087, "step": 27025 }, { "epoch": 0.9, "grad_norm": 0.7568269371986389, "learning_rate": 5.144716163402542e-07, "loss": 2.0448, "step": 27026 }, { "epoch": 0.9, "grad_norm": 0.7555501461029053, "learning_rate": 5.141351379545523e-07, "loss": 1.9516, "step": 27027 }, { "epoch": 0.9, "grad_norm": 0.7793776392936707, "learning_rate": 5.137987667345923e-07, "loss": 2.0541, "step": 27028 }, { "epoch": 0.9, "grad_norm": 0.7565628290176392, "learning_rate": 5.134625026841755e-07, "loss": 2.0442, "step": 27029 }, { "epoch": 0.9, "grad_norm": 0.7423378229141235, "learning_rate": 5.131263458071001e-07, "loss": 2.0215, "step": 27030 }, { "epoch": 0.9, "grad_norm": 0.7259252667427063, "learning_rate": 5.127902961071618e-07, "loss": 1.9915, "step": 27031 }, { "epoch": 0.9, "grad_norm": 0.7894111275672913, "learning_rate": 5.124543535881588e-07, "loss": 2.1366, "step": 27032 }, { "epoch": 0.9, "grad_norm": 0.7414683699607849, "learning_rate": 5.12118518253889e-07, "loss": 2.0442, "step": 27033 }, { "epoch": 0.9, "grad_norm": 0.7552931904792786, "learning_rate": 5.117827901081406e-07, "loss": 2.0191, "step": 27034 }, { "epoch": 0.9, "grad_norm": 0.7368302345275879, "learning_rate": 5.114471691547096e-07, "loss": 1.9923, "step": 27035 }, { "epoch": 0.9, "grad_norm": 0.7382238507270813, "learning_rate": 5.111116553973894e-07, "loss": 2.0408, "step": 27036 }, { "epoch": 0.9, "grad_norm": 0.7565481662750244, "learning_rate": 5.10776248839967e-07, "loss": 2.0303, "step": 27037 }, { "epoch": 0.9, "grad_norm": 0.7438725829124451, "learning_rate": 5.10440949486235e-07, "loss": 2.0596, "step": 27038 }, { "epoch": 0.9, "grad_norm": 0.7720710039138794, "learning_rate": 5.101057573399803e-07, "loss": 1.9984, "step": 27039 }, { "epoch": 0.9, "grad_norm": 0.7454878091812134, "learning_rate": 5.097706724049867e-07, "loss": 2.0438, "step": 27040 }, { "epoch": 0.9, "grad_norm": 0.7662563323974609, "learning_rate": 5.094356946850431e-07, "loss": 2.0705, "step": 27041 }, { "epoch": 0.9, "grad_norm": 0.7354649305343628, "learning_rate": 5.091008241839346e-07, "loss": 2.0718, "step": 27042 }, { "epoch": 0.9, "grad_norm": 0.7660358548164368, "learning_rate": 5.087660609054412e-07, "loss": 2.1405, "step": 27043 }, { "epoch": 0.9, "grad_norm": 0.7287164330482483, "learning_rate": 5.084314048533456e-07, "loss": 2.0131, "step": 27044 }, { "epoch": 0.9, "grad_norm": 0.7514034509658813, "learning_rate": 5.080968560314315e-07, "loss": 2.0712, "step": 27045 }, { "epoch": 0.9, "grad_norm": 0.743866503238678, "learning_rate": 5.077624144434756e-07, "loss": 2.0889, "step": 27046 }, { "epoch": 0.9, "grad_norm": 0.7519726157188416, "learning_rate": 5.074280800932563e-07, "loss": 2.1144, "step": 27047 }, { "epoch": 0.9, "grad_norm": 0.7461344003677368, "learning_rate": 5.070938529845504e-07, "loss": 2.1008, "step": 27048 }, { "epoch": 0.9, "grad_norm": 0.7630887627601624, "learning_rate": 5.067597331211372e-07, "loss": 2.1109, "step": 27049 }, { "epoch": 0.9, "grad_norm": 0.702914834022522, "learning_rate": 5.064257205067869e-07, "loss": 2.0449, "step": 27050 }, { "epoch": 0.9, "grad_norm": 0.7731585502624512, "learning_rate": 5.060918151452765e-07, "loss": 2.0291, "step": 27051 }, { "epoch": 0.9, "grad_norm": 0.7206317186355591, "learning_rate": 5.057580170403775e-07, "loss": 1.9682, "step": 27052 }, { "epoch": 0.9, "grad_norm": 0.7426854968070984, "learning_rate": 5.054243261958581e-07, "loss": 2.0535, "step": 27053 }, { "epoch": 0.9, "grad_norm": 0.7682952880859375, "learning_rate": 5.050907426154905e-07, "loss": 2.0614, "step": 27054 }, { "epoch": 0.9, "grad_norm": 0.73529452085495, "learning_rate": 5.047572663030453e-07, "loss": 2.0544, "step": 27055 }, { "epoch": 0.9, "grad_norm": 0.7498229742050171, "learning_rate": 5.044238972622861e-07, "loss": 2.0773, "step": 27056 }, { "epoch": 0.9, "grad_norm": 0.7547264695167542, "learning_rate": 5.040906354969799e-07, "loss": 1.9999, "step": 27057 }, { "epoch": 0.9, "grad_norm": 0.7484133839607239, "learning_rate": 5.037574810108937e-07, "loss": 2.0022, "step": 27058 }, { "epoch": 0.9, "grad_norm": 0.751929759979248, "learning_rate": 5.034244338077898e-07, "loss": 2.1634, "step": 27059 }, { "epoch": 0.9, "grad_norm": 0.7217931151390076, "learning_rate": 5.030914938914321e-07, "loss": 2.0403, "step": 27060 }, { "epoch": 0.9, "grad_norm": 0.7354684472084045, "learning_rate": 5.027586612655799e-07, "loss": 2.006, "step": 27061 }, { "epoch": 0.9, "grad_norm": 0.7464145421981812, "learning_rate": 5.024259359339944e-07, "loss": 2.1009, "step": 27062 }, { "epoch": 0.9, "grad_norm": 0.7327633500099182, "learning_rate": 5.02093317900434e-07, "loss": 2.0259, "step": 27063 }, { "epoch": 0.9, "grad_norm": 0.7375994920730591, "learning_rate": 5.017608071686586e-07, "loss": 2.0765, "step": 27064 }, { "epoch": 0.9, "grad_norm": 0.7414315342903137, "learning_rate": 5.014284037424233e-07, "loss": 2.071, "step": 27065 }, { "epoch": 0.9, "grad_norm": 0.7649632692337036, "learning_rate": 5.010961076254806e-07, "loss": 2.0666, "step": 27066 }, { "epoch": 0.9, "grad_norm": 0.740566074848175, "learning_rate": 5.007639188215896e-07, "loss": 2.0483, "step": 27067 }, { "epoch": 0.9, "grad_norm": 0.7272589206695557, "learning_rate": 5.004318373344996e-07, "loss": 2.046, "step": 27068 }, { "epoch": 0.9, "grad_norm": 0.7377866506576538, "learning_rate": 5.000998631679643e-07, "loss": 1.9703, "step": 27069 }, { "epoch": 0.9, "grad_norm": 0.7418698668479919, "learning_rate": 4.997679963257318e-07, "loss": 2.0502, "step": 27070 }, { "epoch": 0.9, "grad_norm": 0.7199897766113281, "learning_rate": 4.994362368115546e-07, "loss": 1.9693, "step": 27071 }, { "epoch": 0.9, "grad_norm": 0.7556372284889221, "learning_rate": 4.991045846291764e-07, "loss": 2.0622, "step": 27072 }, { "epoch": 0.9, "grad_norm": 0.7479092478752136, "learning_rate": 4.987730397823487e-07, "loss": 2.0602, "step": 27073 }, { "epoch": 0.9, "grad_norm": 0.750408947467804, "learning_rate": 4.984416022748151e-07, "loss": 2.0325, "step": 27074 }, { "epoch": 0.9, "grad_norm": 0.7369655966758728, "learning_rate": 4.981102721103193e-07, "loss": 2.0629, "step": 27075 }, { "epoch": 0.9, "grad_norm": 0.7437099814414978, "learning_rate": 4.977790492926038e-07, "loss": 2.0585, "step": 27076 }, { "epoch": 0.9, "grad_norm": 0.7695946097373962, "learning_rate": 4.974479338254146e-07, "loss": 2.0772, "step": 27077 }, { "epoch": 0.9, "grad_norm": 0.7202904224395752, "learning_rate": 4.971169257124864e-07, "loss": 2.0062, "step": 27078 }, { "epoch": 0.9, "grad_norm": 0.7570523619651794, "learning_rate": 4.967860249575629e-07, "loss": 2.0616, "step": 27079 }, { "epoch": 0.9, "grad_norm": 0.7607750296592712, "learning_rate": 4.964552315643822e-07, "loss": 2.0256, "step": 27080 }, { "epoch": 0.9, "grad_norm": 0.7446511387825012, "learning_rate": 4.961245455366792e-07, "loss": 1.9807, "step": 27081 }, { "epoch": 0.9, "grad_norm": 0.7301918268203735, "learning_rate": 4.957939668781931e-07, "loss": 2.0482, "step": 27082 }, { "epoch": 0.9, "grad_norm": 0.7503844499588013, "learning_rate": 4.954634955926574e-07, "loss": 2.0197, "step": 27083 }, { "epoch": 0.9, "grad_norm": 0.7488322854042053, "learning_rate": 4.951331316838026e-07, "loss": 2.0211, "step": 27084 }, { "epoch": 0.9, "grad_norm": 0.7072362303733826, "learning_rate": 4.948028751553635e-07, "loss": 2.0552, "step": 27085 }, { "epoch": 0.9, "grad_norm": 0.7244053483009338, "learning_rate": 4.944727260110715e-07, "loss": 2.0492, "step": 27086 }, { "epoch": 0.9, "grad_norm": 0.7351243495941162, "learning_rate": 4.941426842546571e-07, "loss": 2.0142, "step": 27087 }, { "epoch": 0.9, "grad_norm": 0.7761110067367554, "learning_rate": 4.938127498898459e-07, "loss": 1.9812, "step": 27088 }, { "epoch": 0.9, "grad_norm": 0.75872802734375, "learning_rate": 4.934829229203685e-07, "loss": 2.0452, "step": 27089 }, { "epoch": 0.9, "grad_norm": 0.7456680536270142, "learning_rate": 4.931532033499475e-07, "loss": 2.0871, "step": 27090 }, { "epoch": 0.9, "grad_norm": 0.7709147930145264, "learning_rate": 4.928235911823121e-07, "loss": 2.0273, "step": 27091 }, { "epoch": 0.9, "grad_norm": 0.7303224205970764, "learning_rate": 4.924940864211825e-07, "loss": 2.079, "step": 27092 }, { "epoch": 0.9, "grad_norm": 0.7164412140846252, "learning_rate": 4.921646890702847e-07, "loss": 1.9756, "step": 27093 }, { "epoch": 0.9, "grad_norm": 0.7366651296615601, "learning_rate": 4.918353991333358e-07, "loss": 2.0164, "step": 27094 }, { "epoch": 0.9, "grad_norm": 0.7471129894256592, "learning_rate": 4.915062166140605e-07, "loss": 2.013, "step": 27095 }, { "epoch": 0.9, "grad_norm": 0.7312847375869751, "learning_rate": 4.911771415161748e-07, "loss": 1.9979, "step": 27096 }, { "epoch": 0.9, "grad_norm": 0.7334935665130615, "learning_rate": 4.908481738433957e-07, "loss": 1.9989, "step": 27097 }, { "epoch": 0.9, "grad_norm": 0.744944155216217, "learning_rate": 4.905193135994413e-07, "loss": 2.0499, "step": 27098 }, { "epoch": 0.9, "grad_norm": 0.8649877309799194, "learning_rate": 4.901905607880287e-07, "loss": 2.0129, "step": 27099 }, { "epoch": 0.9, "grad_norm": 0.7543284296989441, "learning_rate": 4.898619154128681e-07, "loss": 2.026, "step": 27100 }, { "epoch": 0.9, "grad_norm": 0.7839173078536987, "learning_rate": 4.895333774776745e-07, "loss": 1.9944, "step": 27101 }, { "epoch": 0.9, "grad_norm": 0.7556594014167786, "learning_rate": 4.892049469861592e-07, "loss": 2.0571, "step": 27102 }, { "epoch": 0.9, "grad_norm": 0.7706905603408813, "learning_rate": 4.888766239420317e-07, "loss": 2.0997, "step": 27103 }, { "epoch": 0.9, "grad_norm": 0.7642306685447693, "learning_rate": 4.885484083490033e-07, "loss": 2.1167, "step": 27104 }, { "epoch": 0.9, "grad_norm": 0.7485413551330566, "learning_rate": 4.882203002107811e-07, "loss": 2.0459, "step": 27105 }, { "epoch": 0.9, "grad_norm": 0.7663609385490417, "learning_rate": 4.878922995310698e-07, "loss": 2.138, "step": 27106 }, { "epoch": 0.9, "grad_norm": 0.7456453442573547, "learning_rate": 4.875644063135765e-07, "loss": 2.0618, "step": 27107 }, { "epoch": 0.9, "grad_norm": 0.7579767107963562, "learning_rate": 4.872366205620071e-07, "loss": 2.0206, "step": 27108 }, { "epoch": 0.9, "grad_norm": 0.7478576898574829, "learning_rate": 4.869089422800633e-07, "loss": 2.0518, "step": 27109 }, { "epoch": 0.9, "grad_norm": 0.7535732388496399, "learning_rate": 4.865813714714451e-07, "loss": 1.9959, "step": 27110 }, { "epoch": 0.9, "grad_norm": 0.7318512797355652, "learning_rate": 4.862539081398576e-07, "loss": 2.03, "step": 27111 }, { "epoch": 0.9, "grad_norm": 0.7211381793022156, "learning_rate": 4.859265522889967e-07, "loss": 2.0776, "step": 27112 }, { "epoch": 0.9, "grad_norm": 0.7655084133148193, "learning_rate": 4.855993039225626e-07, "loss": 2.0183, "step": 27113 }, { "epoch": 0.9, "grad_norm": 0.7552460432052612, "learning_rate": 4.852721630442492e-07, "loss": 2.0524, "step": 27114 }, { "epoch": 0.9, "grad_norm": 0.7510892748832703, "learning_rate": 4.849451296577578e-07, "loss": 2.0859, "step": 27115 }, { "epoch": 0.9, "grad_norm": 0.765746533870697, "learning_rate": 4.846182037667779e-07, "loss": 2.0827, "step": 27116 }, { "epoch": 0.9, "grad_norm": 0.7234635949134827, "learning_rate": 4.842913853750064e-07, "loss": 2.0496, "step": 27117 }, { "epoch": 0.9, "grad_norm": 0.7556241750717163, "learning_rate": 4.839646744861337e-07, "loss": 2.0458, "step": 27118 }, { "epoch": 0.9, "grad_norm": 0.7452849745750427, "learning_rate": 4.836380711038502e-07, "loss": 2.0201, "step": 27119 }, { "epoch": 0.9, "grad_norm": 0.7753193378448486, "learning_rate": 4.833115752318462e-07, "loss": 2.0913, "step": 27120 }, { "epoch": 0.9, "grad_norm": 0.726535439491272, "learning_rate": 4.829851868738122e-07, "loss": 2.0921, "step": 27121 }, { "epoch": 0.9, "grad_norm": 0.7456585764884949, "learning_rate": 4.826589060334341e-07, "loss": 2.0761, "step": 27122 }, { "epoch": 0.9, "grad_norm": 0.7307118773460388, "learning_rate": 4.823327327143968e-07, "loss": 2.1309, "step": 27123 }, { "epoch": 0.9, "grad_norm": 0.7597889304161072, "learning_rate": 4.820066669203883e-07, "loss": 2.0618, "step": 27124 }, { "epoch": 0.9, "grad_norm": 0.7671752572059631, "learning_rate": 4.816807086550879e-07, "loss": 2.0837, "step": 27125 }, { "epoch": 0.9, "grad_norm": 0.7538430094718933, "learning_rate": 4.813548579221828e-07, "loss": 1.9987, "step": 27126 }, { "epoch": 0.9, "grad_norm": 0.7858676314353943, "learning_rate": 4.810291147253521e-07, "loss": 2.0929, "step": 27127 }, { "epoch": 0.9, "grad_norm": 0.743984043598175, "learning_rate": 4.807034790682741e-07, "loss": 2.0459, "step": 27128 }, { "epoch": 0.9, "grad_norm": 0.7324937582015991, "learning_rate": 4.803779509546292e-07, "loss": 2.024, "step": 27129 }, { "epoch": 0.9, "grad_norm": 0.7180732488632202, "learning_rate": 4.800525303880966e-07, "loss": 1.9935, "step": 27130 }, { "epoch": 0.9, "grad_norm": 0.7210368514060974, "learning_rate": 4.79727217372351e-07, "loss": 2.0739, "step": 27131 }, { "epoch": 0.9, "grad_norm": 0.7431942224502563, "learning_rate": 4.794020119110665e-07, "loss": 2.0913, "step": 27132 }, { "epoch": 0.9, "grad_norm": 0.7325198650360107, "learning_rate": 4.790769140079188e-07, "loss": 1.9829, "step": 27133 }, { "epoch": 0.9, "grad_norm": 0.7760083675384521, "learning_rate": 4.787519236665816e-07, "loss": 2.0408, "step": 27134 }, { "epoch": 0.9, "grad_norm": 0.7466432452201843, "learning_rate": 4.784270408907254e-07, "loss": 2.0562, "step": 27135 }, { "epoch": 0.9, "grad_norm": 0.7381594777107239, "learning_rate": 4.781022656840184e-07, "loss": 2.0641, "step": 27136 }, { "epoch": 0.9, "grad_norm": 0.763980507850647, "learning_rate": 4.777775980501331e-07, "loss": 2.0445, "step": 27137 }, { "epoch": 0.9, "grad_norm": 0.7529333233833313, "learning_rate": 4.774530379927345e-07, "loss": 2.0408, "step": 27138 }, { "epoch": 0.9, "grad_norm": 0.746748149394989, "learning_rate": 4.771285855154928e-07, "loss": 2.0177, "step": 27139 }, { "epoch": 0.9, "grad_norm": 0.7413294315338135, "learning_rate": 4.7680424062207075e-07, "loss": 2.0067, "step": 27140 }, { "epoch": 0.9, "grad_norm": 0.7652919292449951, "learning_rate": 4.7648000331613207e-07, "loss": 2.1138, "step": 27141 }, { "epoch": 0.9, "grad_norm": 0.7411420345306396, "learning_rate": 4.761558736013405e-07, "loss": 2.0844, "step": 27142 }, { "epoch": 0.9, "grad_norm": 0.7302148938179016, "learning_rate": 4.758318514813609e-07, "loss": 2.0831, "step": 27143 }, { "epoch": 0.9, "grad_norm": 0.741340696811676, "learning_rate": 4.755079369598503e-07, "loss": 2.0703, "step": 27144 }, { "epoch": 0.9, "grad_norm": 0.7437747120857239, "learning_rate": 4.7518413004046805e-07, "loss": 2.0798, "step": 27145 }, { "epoch": 0.9, "grad_norm": 0.7413864731788635, "learning_rate": 4.7486043072687447e-07, "loss": 1.9819, "step": 27146 }, { "epoch": 0.9, "grad_norm": 0.7291920781135559, "learning_rate": 4.745368390227245e-07, "loss": 2.0232, "step": 27147 }, { "epoch": 0.9, "grad_norm": 0.7635176181793213, "learning_rate": 4.742133549316763e-07, "loss": 2.0904, "step": 27148 }, { "epoch": 0.9, "grad_norm": 0.7395678162574768, "learning_rate": 4.7388997845738137e-07, "loss": 2.0449, "step": 27149 }, { "epoch": 0.9, "grad_norm": 0.7517318725585938, "learning_rate": 4.735667096034957e-07, "loss": 1.995, "step": 27150 }, { "epoch": 0.9, "grad_norm": 0.7475565671920776, "learning_rate": 4.732435483736697e-07, "loss": 1.9816, "step": 27151 }, { "epoch": 0.9, "grad_norm": 0.7626528143882751, "learning_rate": 4.72920494771556e-07, "loss": 2.0995, "step": 27152 }, { "epoch": 0.9, "grad_norm": 0.7431235909461975, "learning_rate": 4.7259754880080277e-07, "loss": 2.0399, "step": 27153 }, { "epoch": 0.9, "grad_norm": 0.746757447719574, "learning_rate": 4.722747104650582e-07, "loss": 2.0557, "step": 27154 }, { "epoch": 0.9, "grad_norm": 0.7459179162979126, "learning_rate": 4.719519797679695e-07, "loss": 2.0516, "step": 27155 }, { "epoch": 0.9, "grad_norm": 0.7435001134872437, "learning_rate": 4.716293567131847e-07, "loss": 2.0622, "step": 27156 }, { "epoch": 0.9, "grad_norm": 0.7230681777000427, "learning_rate": 4.7130684130434755e-07, "loss": 2.0126, "step": 27157 }, { "epoch": 0.9, "grad_norm": 0.7326712012290955, "learning_rate": 4.7098443354509973e-07, "loss": 1.9997, "step": 27158 }, { "epoch": 0.9, "grad_norm": 0.7275022864341736, "learning_rate": 4.7066213343908596e-07, "loss": 2.0076, "step": 27159 }, { "epoch": 0.9, "grad_norm": 0.7304858565330505, "learning_rate": 4.7033994098994674e-07, "loss": 2.0196, "step": 27160 }, { "epoch": 0.9, "grad_norm": 0.7503100633621216, "learning_rate": 4.700178562013202e-07, "loss": 2.0079, "step": 27161 }, { "epoch": 0.9, "grad_norm": 0.7476886510848999, "learning_rate": 4.696958790768513e-07, "loss": 2.0234, "step": 27162 }, { "epoch": 0.9, "grad_norm": 0.765396773815155, "learning_rate": 4.693740096201693e-07, "loss": 1.9728, "step": 27163 }, { "epoch": 0.9, "grad_norm": 0.7505879998207092, "learning_rate": 4.6905224783491464e-07, "loss": 2.0694, "step": 27164 }, { "epoch": 0.9, "grad_norm": 0.7419947981834412, "learning_rate": 4.687305937247233e-07, "loss": 2.0781, "step": 27165 }, { "epoch": 0.9, "grad_norm": 0.7218696475028992, "learning_rate": 4.684090472932279e-07, "loss": 2.0785, "step": 27166 }, { "epoch": 0.9, "grad_norm": 0.7634477615356445, "learning_rate": 4.6808760854405885e-07, "loss": 2.0639, "step": 27167 }, { "epoch": 0.9, "grad_norm": 0.7482348084449768, "learning_rate": 4.6776627748085225e-07, "loss": 2.0637, "step": 27168 }, { "epoch": 0.9, "grad_norm": 0.7162985801696777, "learning_rate": 4.6744505410723397e-07, "loss": 2.1038, "step": 27169 }, { "epoch": 0.9, "grad_norm": 0.7516705393791199, "learning_rate": 4.671239384268356e-07, "loss": 2.0446, "step": 27170 }, { "epoch": 0.9, "grad_norm": 0.7414560317993164, "learning_rate": 4.6680293044328417e-07, "loss": 2.0335, "step": 27171 }, { "epoch": 0.9, "grad_norm": 0.7323722839355469, "learning_rate": 4.664820301602069e-07, "loss": 2.0858, "step": 27172 }, { "epoch": 0.9, "grad_norm": 0.7679982781410217, "learning_rate": 4.6616123758122636e-07, "loss": 2.0469, "step": 27173 }, { "epoch": 0.9, "grad_norm": 0.7764168381690979, "learning_rate": 4.6584055270997077e-07, "loss": 2.0976, "step": 27174 }, { "epoch": 0.9, "grad_norm": 0.745108962059021, "learning_rate": 4.655199755500617e-07, "loss": 2.0261, "step": 27175 }, { "epoch": 0.9, "grad_norm": 0.7494735717773438, "learning_rate": 4.6519950610511734e-07, "loss": 2.0027, "step": 27176 }, { "epoch": 0.9, "grad_norm": 0.7368234992027283, "learning_rate": 4.6487914437876257e-07, "loss": 2.0468, "step": 27177 }, { "epoch": 0.9, "grad_norm": 0.7543310523033142, "learning_rate": 4.6455889037461564e-07, "loss": 2.0878, "step": 27178 }, { "epoch": 0.9, "grad_norm": 0.7143838405609131, "learning_rate": 4.6423874409629365e-07, "loss": 1.9742, "step": 27179 }, { "epoch": 0.9, "grad_norm": 0.7212214469909668, "learning_rate": 4.639187055474137e-07, "loss": 1.9663, "step": 27180 }, { "epoch": 0.9, "grad_norm": 0.7832921743392944, "learning_rate": 4.635987747315918e-07, "loss": 2.119, "step": 27181 }, { "epoch": 0.9, "grad_norm": 0.7653928995132446, "learning_rate": 4.6327895165244164e-07, "loss": 2.1316, "step": 27182 }, { "epoch": 0.9, "grad_norm": 0.7269788384437561, "learning_rate": 4.62959236313576e-07, "loss": 2.08, "step": 27183 }, { "epoch": 0.9, "grad_norm": 0.7331037521362305, "learning_rate": 4.626396287186108e-07, "loss": 2.0744, "step": 27184 }, { "epoch": 0.9, "grad_norm": 0.7766017913818359, "learning_rate": 4.6232012887115095e-07, "loss": 2.0697, "step": 27185 }, { "epoch": 0.9, "grad_norm": 0.7590108513832092, "learning_rate": 4.6200073677480804e-07, "loss": 2.0469, "step": 27186 }, { "epoch": 0.9, "grad_norm": 0.7213303446769714, "learning_rate": 4.616814524331925e-07, "loss": 2.0911, "step": 27187 }, { "epoch": 0.9, "grad_norm": 0.7811734080314636, "learning_rate": 4.613622758499103e-07, "loss": 2.0417, "step": 27188 }, { "epoch": 0.9, "grad_norm": 0.7582060694694519, "learning_rate": 4.6104320702856533e-07, "loss": 2.1037, "step": 27189 }, { "epoch": 0.9, "grad_norm": 0.812232255935669, "learning_rate": 4.607242459727657e-07, "loss": 2.0454, "step": 27190 }, { "epoch": 0.9, "grad_norm": 0.7333725094795227, "learning_rate": 4.604053926861107e-07, "loss": 2.0382, "step": 27191 }, { "epoch": 0.9, "grad_norm": 0.7528991103172302, "learning_rate": 4.600866471722054e-07, "loss": 2.0403, "step": 27192 }, { "epoch": 0.9, "grad_norm": 0.7418551445007324, "learning_rate": 4.597680094346513e-07, "loss": 2.015, "step": 27193 }, { "epoch": 0.9, "grad_norm": 0.7414429187774658, "learning_rate": 4.5944947947704765e-07, "loss": 1.9767, "step": 27194 }, { "epoch": 0.9, "grad_norm": 0.7212668061256409, "learning_rate": 4.5913105730299055e-07, "loss": 2.0033, "step": 27195 }, { "epoch": 0.9, "grad_norm": 0.7240128517150879, "learning_rate": 4.588127429160816e-07, "loss": 2.115, "step": 27196 }, { "epoch": 0.9, "grad_norm": 0.7466779351234436, "learning_rate": 4.5849453631991335e-07, "loss": 2.0644, "step": 27197 }, { "epoch": 0.9, "grad_norm": 0.7503958940505981, "learning_rate": 4.5817643751808086e-07, "loss": 2.0272, "step": 27198 }, { "epoch": 0.9, "grad_norm": 0.7405818104743958, "learning_rate": 4.5785844651418e-07, "loss": 2.0446, "step": 27199 }, { "epoch": 0.9, "grad_norm": 0.7428597807884216, "learning_rate": 4.575405633118024e-07, "loss": 2.0814, "step": 27200 }, { "epoch": 0.9, "grad_norm": 0.7610568404197693, "learning_rate": 4.572227879145386e-07, "loss": 2.0398, "step": 27201 }, { "epoch": 0.91, "grad_norm": 0.7541273236274719, "learning_rate": 4.569051203259789e-07, "loss": 2.077, "step": 27202 }, { "epoch": 0.91, "grad_norm": 0.7375293374061584, "learning_rate": 4.565875605497139e-07, "loss": 2.0089, "step": 27203 }, { "epoch": 0.91, "grad_norm": 0.7622026801109314, "learning_rate": 4.562701085893273e-07, "loss": 2.0236, "step": 27204 }, { "epoch": 0.91, "grad_norm": 0.7403553128242493, "learning_rate": 4.559527644484085e-07, "loss": 2.052, "step": 27205 }, { "epoch": 0.91, "grad_norm": 0.7683464884757996, "learning_rate": 4.5563552813054466e-07, "loss": 2.0031, "step": 27206 }, { "epoch": 0.91, "grad_norm": 0.7548992037773132, "learning_rate": 4.55318399639314e-07, "loss": 2.0678, "step": 27207 }, { "epoch": 0.91, "grad_norm": 0.7375810146331787, "learning_rate": 4.550013789783025e-07, "loss": 2.0553, "step": 27208 }, { "epoch": 0.91, "grad_norm": 0.7473897933959961, "learning_rate": 4.5468446615109294e-07, "loss": 2.0127, "step": 27209 }, { "epoch": 0.91, "grad_norm": 0.7527197003364563, "learning_rate": 4.5436766116126353e-07, "loss": 2.0616, "step": 27210 }, { "epoch": 0.91, "grad_norm": 0.7621583342552185, "learning_rate": 4.5405096401239354e-07, "loss": 2.0477, "step": 27211 }, { "epoch": 0.91, "grad_norm": 0.7665872573852539, "learning_rate": 4.537343747080625e-07, "loss": 1.9802, "step": 27212 }, { "epoch": 0.91, "grad_norm": 0.7572036385536194, "learning_rate": 4.5341789325184404e-07, "loss": 2.0665, "step": 27213 }, { "epoch": 0.91, "grad_norm": 0.745315432548523, "learning_rate": 4.531015196473154e-07, "loss": 2.0378, "step": 27214 }, { "epoch": 0.91, "grad_norm": 0.7504767775535583, "learning_rate": 4.527852538980526e-07, "loss": 2.1323, "step": 27215 }, { "epoch": 0.91, "grad_norm": 0.7375007271766663, "learning_rate": 4.5246909600762613e-07, "loss": 2.0619, "step": 27216 }, { "epoch": 0.91, "grad_norm": 0.7386402487754822, "learning_rate": 4.5215304597960754e-07, "loss": 2.0014, "step": 27217 }, { "epoch": 0.91, "grad_norm": 0.7384512424468994, "learning_rate": 4.518371038175684e-07, "loss": 2.0429, "step": 27218 }, { "epoch": 0.91, "grad_norm": 0.7507937550544739, "learning_rate": 4.5152126952508034e-07, "loss": 2.0809, "step": 27219 }, { "epoch": 0.91, "grad_norm": 0.7525652647018433, "learning_rate": 4.5120554310570607e-07, "loss": 1.9981, "step": 27220 }, { "epoch": 0.91, "grad_norm": 0.7631418108940125, "learning_rate": 4.5088992456301605e-07, "loss": 2.0813, "step": 27221 }, { "epoch": 0.91, "grad_norm": 0.7673131823539734, "learning_rate": 4.5057441390057635e-07, "loss": 2.109, "step": 27222 }, { "epoch": 0.91, "grad_norm": 0.7537752389907837, "learning_rate": 4.502590111219507e-07, "loss": 2.1128, "step": 27223 }, { "epoch": 0.91, "grad_norm": 0.7530593276023865, "learning_rate": 4.4994371623069964e-07, "loss": 2.0165, "step": 27224 }, { "epoch": 0.91, "grad_norm": 0.7353842854499817, "learning_rate": 4.4962852923039036e-07, "loss": 2.0916, "step": 27225 }, { "epoch": 0.91, "grad_norm": 0.7565382719039917, "learning_rate": 4.493134501245788e-07, "loss": 2.0762, "step": 27226 }, { "epoch": 0.91, "grad_norm": 0.7596974968910217, "learning_rate": 4.489984789168267e-07, "loss": 2.0196, "step": 27227 }, { "epoch": 0.91, "grad_norm": 0.7390725612640381, "learning_rate": 4.486836156106944e-07, "loss": 2.0399, "step": 27228 }, { "epoch": 0.91, "grad_norm": 0.7634503841400146, "learning_rate": 4.483688602097358e-07, "loss": 2.1223, "step": 27229 }, { "epoch": 0.91, "grad_norm": 0.735130786895752, "learning_rate": 4.480542127175069e-07, "loss": 2.0037, "step": 27230 }, { "epoch": 0.91, "grad_norm": 0.7377784848213196, "learning_rate": 4.4773967313756497e-07, "loss": 2.074, "step": 27231 }, { "epoch": 0.91, "grad_norm": 0.7616094350814819, "learning_rate": 4.4742524147346254e-07, "loss": 2.0392, "step": 27232 }, { "epoch": 0.91, "grad_norm": 0.7815489172935486, "learning_rate": 4.4711091772874917e-07, "loss": 2.0044, "step": 27233 }, { "epoch": 0.91, "grad_norm": 0.7499237656593323, "learning_rate": 4.4679670190697853e-07, "loss": 2.0618, "step": 27234 }, { "epoch": 0.91, "grad_norm": 0.7361255884170532, "learning_rate": 4.4648259401170123e-07, "loss": 2.1037, "step": 27235 }, { "epoch": 0.91, "grad_norm": 0.74874347448349, "learning_rate": 4.461685940464644e-07, "loss": 2.0326, "step": 27236 }, { "epoch": 0.91, "grad_norm": 0.7651322484016418, "learning_rate": 4.458547020148163e-07, "loss": 2.0292, "step": 27237 }, { "epoch": 0.91, "grad_norm": 0.7285462617874146, "learning_rate": 4.455409179203019e-07, "loss": 2.0134, "step": 27238 }, { "epoch": 0.91, "grad_norm": 0.7600539326667786, "learning_rate": 4.4522724176646605e-07, "loss": 2.1237, "step": 27239 }, { "epoch": 0.91, "grad_norm": 0.7835752964019775, "learning_rate": 4.4491367355685266e-07, "loss": 1.9904, "step": 27240 }, { "epoch": 0.91, "grad_norm": 0.7645593285560608, "learning_rate": 4.446002132950078e-07, "loss": 2.0253, "step": 27241 }, { "epoch": 0.91, "grad_norm": 0.7423568367958069, "learning_rate": 4.442868609844675e-07, "loss": 2.0818, "step": 27242 }, { "epoch": 0.91, "grad_norm": 0.7289443016052246, "learning_rate": 4.439736166287734e-07, "loss": 2.0427, "step": 27243 }, { "epoch": 0.91, "grad_norm": 0.78445965051651, "learning_rate": 4.436604802314659e-07, "loss": 1.9468, "step": 27244 }, { "epoch": 0.91, "grad_norm": 0.7338494658470154, "learning_rate": 4.433474517960812e-07, "loss": 2.0992, "step": 27245 }, { "epoch": 0.91, "grad_norm": 0.757244348526001, "learning_rate": 4.430345313261575e-07, "loss": 2.0941, "step": 27246 }, { "epoch": 0.91, "grad_norm": 0.7690564393997192, "learning_rate": 4.4272171882522865e-07, "loss": 1.9839, "step": 27247 }, { "epoch": 0.91, "grad_norm": 0.760442852973938, "learning_rate": 4.4240901429682737e-07, "loss": 2.0739, "step": 27248 }, { "epoch": 0.91, "grad_norm": 0.7288614511489868, "learning_rate": 4.4209641774448753e-07, "loss": 2.0584, "step": 27249 }, { "epoch": 0.91, "grad_norm": 0.7467079162597656, "learning_rate": 4.4178392917174296e-07, "loss": 2.0638, "step": 27250 }, { "epoch": 0.91, "grad_norm": 0.7373058795928955, "learning_rate": 4.414715485821208e-07, "loss": 1.9754, "step": 27251 }, { "epoch": 0.91, "grad_norm": 0.7578360438346863, "learning_rate": 4.4115927597915053e-07, "loss": 2.0406, "step": 27252 }, { "epoch": 0.91, "grad_norm": 0.7696007490158081, "learning_rate": 4.408471113663615e-07, "loss": 2.0562, "step": 27253 }, { "epoch": 0.91, "grad_norm": 0.7486810088157654, "learning_rate": 4.4053505474728085e-07, "loss": 2.0643, "step": 27254 }, { "epoch": 0.91, "grad_norm": 0.7665430307388306, "learning_rate": 4.402231061254303e-07, "loss": 1.9978, "step": 27255 }, { "epoch": 0.91, "grad_norm": 0.746160089969635, "learning_rate": 4.399112655043369e-07, "loss": 2.0626, "step": 27256 }, { "epoch": 0.91, "grad_norm": 0.745145320892334, "learning_rate": 4.3959953288752466e-07, "loss": 1.9698, "step": 27257 }, { "epoch": 0.91, "grad_norm": 0.7654933929443359, "learning_rate": 4.3928790827851284e-07, "loss": 2.0736, "step": 27258 }, { "epoch": 0.91, "grad_norm": 0.7345343828201294, "learning_rate": 4.389763916808232e-07, "loss": 2.0881, "step": 27259 }, { "epoch": 0.91, "grad_norm": 0.7512836456298828, "learning_rate": 4.386649830979761e-07, "loss": 2.007, "step": 27260 }, { "epoch": 0.91, "grad_norm": 0.7301349639892578, "learning_rate": 4.383536825334866e-07, "loss": 2.0531, "step": 27261 }, { "epoch": 0.91, "grad_norm": 0.749341607093811, "learning_rate": 4.380424899908742e-07, "loss": 2.0299, "step": 27262 }, { "epoch": 0.91, "grad_norm": 0.7210928201675415, "learning_rate": 4.377314054736559e-07, "loss": 2.038, "step": 27263 }, { "epoch": 0.91, "grad_norm": 0.7523465752601624, "learning_rate": 4.3742042898534234e-07, "loss": 2.017, "step": 27264 }, { "epoch": 0.91, "grad_norm": 0.7519701719284058, "learning_rate": 4.371095605294484e-07, "loss": 2.0102, "step": 27265 }, { "epoch": 0.91, "grad_norm": 0.7561206817626953, "learning_rate": 4.3679880010948693e-07, "loss": 2.1069, "step": 27266 }, { "epoch": 0.91, "grad_norm": 0.7261016368865967, "learning_rate": 4.364881477289673e-07, "loss": 1.9687, "step": 27267 }, { "epoch": 0.91, "grad_norm": 0.7484026551246643, "learning_rate": 4.361776033914e-07, "loss": 2.0595, "step": 27268 }, { "epoch": 0.91, "grad_norm": 0.751403272151947, "learning_rate": 4.358671671002945e-07, "loss": 2.0661, "step": 27269 }, { "epoch": 0.91, "grad_norm": 0.7374343276023865, "learning_rate": 4.3555683885915466e-07, "loss": 1.9217, "step": 27270 }, { "epoch": 0.91, "grad_norm": 0.7459091544151306, "learning_rate": 4.3524661867148875e-07, "loss": 2.0409, "step": 27271 }, { "epoch": 0.91, "grad_norm": 0.7699228525161743, "learning_rate": 4.3493650654080177e-07, "loss": 2.108, "step": 27272 }, { "epoch": 0.91, "grad_norm": 0.7439596056938171, "learning_rate": 4.3462650247059646e-07, "loss": 2.0086, "step": 27273 }, { "epoch": 0.91, "grad_norm": 0.7643613219261169, "learning_rate": 4.3431660646437444e-07, "loss": 2.0718, "step": 27274 }, { "epoch": 0.91, "grad_norm": 0.7296448945999146, "learning_rate": 4.3400681852563853e-07, "loss": 1.9716, "step": 27275 }, { "epoch": 0.91, "grad_norm": 0.7166428565979004, "learning_rate": 4.3369713865788585e-07, "loss": 2.1054, "step": 27276 }, { "epoch": 0.91, "grad_norm": 0.7583434581756592, "learning_rate": 4.3338756686461704e-07, "loss": 2.0953, "step": 27277 }, { "epoch": 0.91, "grad_norm": 0.7275704741477966, "learning_rate": 4.3307810314932917e-07, "loss": 1.9988, "step": 27278 }, { "epoch": 0.91, "grad_norm": 0.7607447504997253, "learning_rate": 4.327687475155184e-07, "loss": 2.0739, "step": 27279 }, { "epoch": 0.91, "grad_norm": 0.7460162043571472, "learning_rate": 4.324594999666776e-07, "loss": 2.1326, "step": 27280 }, { "epoch": 0.91, "grad_norm": 0.737190306186676, "learning_rate": 4.321503605063049e-07, "loss": 2.0477, "step": 27281 }, { "epoch": 0.91, "grad_norm": 0.7477969527244568, "learning_rate": 4.318413291378887e-07, "loss": 2.0461, "step": 27282 }, { "epoch": 0.91, "grad_norm": 0.761574387550354, "learning_rate": 4.3153240586492174e-07, "loss": 2.0817, "step": 27283 }, { "epoch": 0.91, "grad_norm": 0.7492398619651794, "learning_rate": 4.3122359069089345e-07, "loss": 2.0609, "step": 27284 }, { "epoch": 0.91, "grad_norm": 0.7186716794967651, "learning_rate": 4.3091488361929556e-07, "loss": 2.0624, "step": 27285 }, { "epoch": 0.91, "grad_norm": 0.7704103589057922, "learning_rate": 4.306062846536108e-07, "loss": 2.0544, "step": 27286 }, { "epoch": 0.91, "grad_norm": 0.7424799203872681, "learning_rate": 4.302977937973274e-07, "loss": 2.0257, "step": 27287 }, { "epoch": 0.91, "grad_norm": 0.7350695729255676, "learning_rate": 4.2998941105393375e-07, "loss": 1.9568, "step": 27288 }, { "epoch": 0.91, "grad_norm": 0.7453460097312927, "learning_rate": 4.2968113642690933e-07, "loss": 2.0084, "step": 27289 }, { "epoch": 0.91, "grad_norm": 0.7324519157409668, "learning_rate": 4.293729699197391e-07, "loss": 2.0134, "step": 27290 }, { "epoch": 0.91, "grad_norm": 0.7294197678565979, "learning_rate": 4.290649115359047e-07, "loss": 2.0988, "step": 27291 }, { "epoch": 0.91, "grad_norm": 0.7409774661064148, "learning_rate": 4.2875696127888446e-07, "loss": 2.0085, "step": 27292 }, { "epoch": 0.91, "grad_norm": 0.7409294247627258, "learning_rate": 4.284491191521589e-07, "loss": 2.0331, "step": 27293 }, { "epoch": 0.91, "grad_norm": 0.7285907864570618, "learning_rate": 4.2814138515920753e-07, "loss": 2.0122, "step": 27294 }, { "epoch": 0.91, "grad_norm": 0.7287317514419556, "learning_rate": 4.2783375930350426e-07, "loss": 2.028, "step": 27295 }, { "epoch": 0.91, "grad_norm": 0.75761479139328, "learning_rate": 4.275262415885251e-07, "loss": 2.0531, "step": 27296 }, { "epoch": 0.91, "grad_norm": 0.7740064263343811, "learning_rate": 4.2721883201774506e-07, "loss": 2.0425, "step": 27297 }, { "epoch": 0.91, "grad_norm": 0.7537700533866882, "learning_rate": 4.2691153059463475e-07, "loss": 2.0344, "step": 27298 }, { "epoch": 0.91, "grad_norm": 0.7413243055343628, "learning_rate": 4.2660433732266913e-07, "loss": 2.0943, "step": 27299 }, { "epoch": 0.91, "grad_norm": 0.7568277716636658, "learning_rate": 4.262972522053166e-07, "loss": 2.0627, "step": 27300 }, { "epoch": 0.91, "grad_norm": 0.7404831647872925, "learning_rate": 4.259902752460476e-07, "loss": 2.0784, "step": 27301 }, { "epoch": 0.91, "grad_norm": 0.7655001878738403, "learning_rate": 4.256834064483284e-07, "loss": 1.9952, "step": 27302 }, { "epoch": 0.91, "grad_norm": 0.7347220778465271, "learning_rate": 4.253766458156283e-07, "loss": 2.0191, "step": 27303 }, { "epoch": 0.91, "grad_norm": 0.7642946243286133, "learning_rate": 4.250699933514113e-07, "loss": 2.0073, "step": 27304 }, { "epoch": 0.91, "grad_norm": 0.7280412912368774, "learning_rate": 4.247634490591401e-07, "loss": 2.0404, "step": 27305 }, { "epoch": 0.91, "grad_norm": 0.7524710893630981, "learning_rate": 4.2445701294228095e-07, "loss": 2.1265, "step": 27306 }, { "epoch": 0.91, "grad_norm": 0.7413941621780396, "learning_rate": 4.2415068500429646e-07, "loss": 2.0014, "step": 27307 }, { "epoch": 0.91, "grad_norm": 0.7534601092338562, "learning_rate": 4.23844465248644e-07, "loss": 2.0985, "step": 27308 }, { "epoch": 0.91, "grad_norm": 0.7424414753913879, "learning_rate": 4.2353835367878404e-07, "loss": 2.0539, "step": 27309 }, { "epoch": 0.91, "grad_norm": 0.7325050234794617, "learning_rate": 4.232323502981772e-07, "loss": 1.9935, "step": 27310 }, { "epoch": 0.91, "grad_norm": 0.7339297533035278, "learning_rate": 4.229264551102763e-07, "loss": 2.036, "step": 27311 }, { "epoch": 0.91, "grad_norm": 0.7558871507644653, "learning_rate": 4.2262066811854183e-07, "loss": 2.0183, "step": 27312 }, { "epoch": 0.91, "grad_norm": 0.7621297836303711, "learning_rate": 4.2231498932642555e-07, "loss": 2.014, "step": 27313 }, { "epoch": 0.91, "grad_norm": 0.7755623459815979, "learning_rate": 4.220094187373802e-07, "loss": 2.0973, "step": 27314 }, { "epoch": 0.91, "grad_norm": 0.7383571863174438, "learning_rate": 4.217039563548597e-07, "loss": 2.0071, "step": 27315 }, { "epoch": 0.91, "grad_norm": 0.7490650415420532, "learning_rate": 4.2139860218231575e-07, "loss": 2.1004, "step": 27316 }, { "epoch": 0.91, "grad_norm": 0.7200995683670044, "learning_rate": 4.210933562231967e-07, "loss": 2.0263, "step": 27317 }, { "epoch": 0.91, "grad_norm": 0.7488175630569458, "learning_rate": 4.2078821848094975e-07, "loss": 2.0978, "step": 27318 }, { "epoch": 0.91, "grad_norm": 0.768964946269989, "learning_rate": 4.204831889590244e-07, "loss": 2.0848, "step": 27319 }, { "epoch": 0.91, "grad_norm": 0.7332388162612915, "learning_rate": 4.2017826766086454e-07, "loss": 2.0546, "step": 27320 }, { "epoch": 0.91, "grad_norm": 0.7552720308303833, "learning_rate": 4.1987345458991747e-07, "loss": 2.0823, "step": 27321 }, { "epoch": 0.91, "grad_norm": 0.7609044909477234, "learning_rate": 4.195687497496248e-07, "loss": 2.0337, "step": 27322 }, { "epoch": 0.91, "grad_norm": 0.7537041902542114, "learning_rate": 4.192641531434316e-07, "loss": 1.9834, "step": 27323 }, { "epoch": 0.91, "grad_norm": 0.7487196922302246, "learning_rate": 4.1895966477477515e-07, "loss": 2.0635, "step": 27324 }, { "epoch": 0.91, "grad_norm": 0.7514746189117432, "learning_rate": 4.1865528464709814e-07, "loss": 2.0596, "step": 27325 }, { "epoch": 0.91, "grad_norm": 0.74385005235672, "learning_rate": 4.1835101276383907e-07, "loss": 2.0887, "step": 27326 }, { "epoch": 0.91, "grad_norm": 0.7372734546661377, "learning_rate": 4.18046849128434e-07, "loss": 2.0712, "step": 27327 }, { "epoch": 0.91, "grad_norm": 0.7318451404571533, "learning_rate": 4.1774279374431903e-07, "loss": 2.03, "step": 27328 }, { "epoch": 0.91, "grad_norm": 0.7508850693702698, "learning_rate": 4.1743884661493264e-07, "loss": 2.0176, "step": 27329 }, { "epoch": 0.91, "grad_norm": 0.7809758186340332, "learning_rate": 4.171350077437053e-07, "loss": 2.1629, "step": 27330 }, { "epoch": 0.91, "grad_norm": 0.7604416012763977, "learning_rate": 4.168312771340699e-07, "loss": 2.1007, "step": 27331 }, { "epoch": 0.91, "grad_norm": 0.7466316223144531, "learning_rate": 4.165276547894592e-07, "loss": 2.0649, "step": 27332 }, { "epoch": 0.91, "grad_norm": 0.7419121861457825, "learning_rate": 4.162241407133016e-07, "loss": 2.0836, "step": 27333 }, { "epoch": 0.91, "grad_norm": 0.7313079833984375, "learning_rate": 4.159207349090277e-07, "loss": 1.9826, "step": 27334 }, { "epoch": 0.91, "grad_norm": 0.7823788523674011, "learning_rate": 4.156174373800648e-07, "loss": 2.0522, "step": 27335 }, { "epoch": 0.91, "grad_norm": 0.7896131277084351, "learning_rate": 4.153142481298389e-07, "loss": 2.1371, "step": 27336 }, { "epoch": 0.91, "grad_norm": 0.7621791362762451, "learning_rate": 4.1501116716177515e-07, "loss": 1.9975, "step": 27337 }, { "epoch": 0.91, "grad_norm": 0.7754830121994019, "learning_rate": 4.1470819447929857e-07, "loss": 2.0859, "step": 27338 }, { "epoch": 0.91, "grad_norm": 0.7694870829582214, "learning_rate": 4.144053300858308e-07, "loss": 2.0778, "step": 27339 }, { "epoch": 0.91, "grad_norm": 0.7369939684867859, "learning_rate": 4.141025739847937e-07, "loss": 1.9806, "step": 27340 }, { "epoch": 0.91, "grad_norm": 0.7189335227012634, "learning_rate": 4.137999261796066e-07, "loss": 2.0064, "step": 27341 }, { "epoch": 0.91, "grad_norm": 0.755294144153595, "learning_rate": 4.1349738667369246e-07, "loss": 2.0422, "step": 27342 }, { "epoch": 0.91, "grad_norm": 0.7233870625495911, "learning_rate": 4.1319495547046506e-07, "loss": 2.0535, "step": 27343 }, { "epoch": 0.91, "grad_norm": 0.7535879015922546, "learning_rate": 4.128926325733429e-07, "loss": 2.019, "step": 27344 }, { "epoch": 0.91, "grad_norm": 0.7583195567131042, "learning_rate": 4.1259041798574205e-07, "loss": 2.089, "step": 27345 }, { "epoch": 0.91, "grad_norm": 0.7333357930183411, "learning_rate": 4.122883117110743e-07, "loss": 2.0201, "step": 27346 }, { "epoch": 0.91, "grad_norm": 0.7266172170639038, "learning_rate": 4.119863137527558e-07, "loss": 2.0556, "step": 27347 }, { "epoch": 0.91, "grad_norm": 0.7337165474891663, "learning_rate": 4.11684424114196e-07, "loss": 1.9964, "step": 27348 }, { "epoch": 0.91, "grad_norm": 0.7413678169250488, "learning_rate": 4.1138264279880546e-07, "loss": 1.9812, "step": 27349 }, { "epoch": 0.91, "grad_norm": 0.7808709740638733, "learning_rate": 4.110809698099949e-07, "loss": 2.0321, "step": 27350 }, { "epoch": 0.91, "grad_norm": 0.7807044386863708, "learning_rate": 4.1077940515117266e-07, "loss": 2.0957, "step": 27351 }, { "epoch": 0.91, "grad_norm": 0.7442800998687744, "learning_rate": 4.1047794882574487e-07, "loss": 2.0303, "step": 27352 }, { "epoch": 0.91, "grad_norm": 0.7498773336410522, "learning_rate": 4.101766008371155e-07, "loss": 2.1079, "step": 27353 }, { "epoch": 0.91, "grad_norm": 0.7489928007125854, "learning_rate": 4.0987536118869297e-07, "loss": 2.0257, "step": 27354 }, { "epoch": 0.91, "grad_norm": 0.766908586025238, "learning_rate": 4.0957422988387673e-07, "loss": 1.9821, "step": 27355 }, { "epoch": 0.91, "grad_norm": 0.741818368434906, "learning_rate": 4.0927320692607075e-07, "loss": 2.0424, "step": 27356 }, { "epoch": 0.91, "grad_norm": 0.7340573668479919, "learning_rate": 4.0897229231867454e-07, "loss": 2.0816, "step": 27357 }, { "epoch": 0.91, "grad_norm": 0.7337018251419067, "learning_rate": 4.086714860650909e-07, "loss": 2.0692, "step": 27358 }, { "epoch": 0.91, "grad_norm": 0.7399473190307617, "learning_rate": 4.0837078816871265e-07, "loss": 2.088, "step": 27359 }, { "epoch": 0.91, "grad_norm": 0.7470431923866272, "learning_rate": 4.0807019863294273e-07, "loss": 2.0544, "step": 27360 }, { "epoch": 0.91, "grad_norm": 0.7440586686134338, "learning_rate": 4.077697174611728e-07, "loss": 2.0089, "step": 27361 }, { "epoch": 0.91, "grad_norm": 0.7290531396865845, "learning_rate": 4.0746934465679897e-07, "loss": 2.0375, "step": 27362 }, { "epoch": 0.91, "grad_norm": 0.7470327615737915, "learning_rate": 4.071690802232142e-07, "loss": 2.0602, "step": 27363 }, { "epoch": 0.91, "grad_norm": 0.7562233805656433, "learning_rate": 4.068689241638124e-07, "loss": 2.0611, "step": 27364 }, { "epoch": 0.91, "grad_norm": 0.7457106709480286, "learning_rate": 4.0656887648198416e-07, "loss": 2.0812, "step": 27365 }, { "epoch": 0.91, "grad_norm": 0.732226550579071, "learning_rate": 4.062689371811157e-07, "loss": 2.1268, "step": 27366 }, { "epoch": 0.91, "grad_norm": 0.7533044219017029, "learning_rate": 4.0596910626460093e-07, "loss": 2.0027, "step": 27367 }, { "epoch": 0.91, "grad_norm": 0.7608362436294556, "learning_rate": 4.0566938373582275e-07, "loss": 2.1373, "step": 27368 }, { "epoch": 0.91, "grad_norm": 0.7218877673149109, "learning_rate": 4.0536976959817064e-07, "loss": 2.0762, "step": 27369 }, { "epoch": 0.91, "grad_norm": 0.741619348526001, "learning_rate": 4.0507026385502747e-07, "loss": 2.0588, "step": 27370 }, { "epoch": 0.91, "grad_norm": 0.7445287108421326, "learning_rate": 4.0477086650977604e-07, "loss": 2.0242, "step": 27371 }, { "epoch": 0.91, "grad_norm": 0.7644695043563843, "learning_rate": 4.0447157756580035e-07, "loss": 2.0634, "step": 27372 }, { "epoch": 0.91, "grad_norm": 0.7628416419029236, "learning_rate": 4.041723970264821e-07, "loss": 2.0781, "step": 27373 }, { "epoch": 0.91, "grad_norm": 0.743776261806488, "learning_rate": 4.038733248951998e-07, "loss": 2.0621, "step": 27374 }, { "epoch": 0.91, "grad_norm": 0.7691857814788818, "learning_rate": 4.035743611753329e-07, "loss": 2.0385, "step": 27375 }, { "epoch": 0.91, "grad_norm": 0.7516829371452332, "learning_rate": 4.0327550587025864e-07, "loss": 2.01, "step": 27376 }, { "epoch": 0.91, "grad_norm": 0.7452287077903748, "learning_rate": 4.0297675898335334e-07, "loss": 2.0465, "step": 27377 }, { "epoch": 0.91, "grad_norm": 0.7835694551467896, "learning_rate": 4.0267812051799306e-07, "loss": 2.0722, "step": 27378 }, { "epoch": 0.91, "grad_norm": 0.7340983748435974, "learning_rate": 4.023795904775496e-07, "loss": 2.0084, "step": 27379 }, { "epoch": 0.91, "grad_norm": 0.7549338936805725, "learning_rate": 4.0208116886539806e-07, "loss": 2.0562, "step": 27380 }, { "epoch": 0.91, "grad_norm": 0.7653358578681946, "learning_rate": 4.0178285568490795e-07, "loss": 2.0128, "step": 27381 }, { "epoch": 0.91, "grad_norm": 0.7582991719245911, "learning_rate": 4.014846509394499e-07, "loss": 2.0633, "step": 27382 }, { "epoch": 0.91, "grad_norm": 0.7238643765449524, "learning_rate": 4.0118655463239453e-07, "loss": 2.0409, "step": 27383 }, { "epoch": 0.91, "grad_norm": 0.7570232152938843, "learning_rate": 4.008885667671059e-07, "loss": 2.0149, "step": 27384 }, { "epoch": 0.91, "grad_norm": 0.7459805011749268, "learning_rate": 4.005906873469523e-07, "loss": 2.0786, "step": 27385 }, { "epoch": 0.91, "grad_norm": 0.7322351932525635, "learning_rate": 4.002929163753011e-07, "loss": 1.966, "step": 27386 }, { "epoch": 0.91, "grad_norm": 0.7392884492874146, "learning_rate": 3.999952538555141e-07, "loss": 2.0252, "step": 27387 }, { "epoch": 0.91, "grad_norm": 0.7593870162963867, "learning_rate": 3.99697699790953e-07, "loss": 2.1258, "step": 27388 }, { "epoch": 0.91, "grad_norm": 0.7252902388572693, "learning_rate": 3.9940025418498285e-07, "loss": 2.0032, "step": 27389 }, { "epoch": 0.91, "grad_norm": 0.7523022294044495, "learning_rate": 3.9910291704096104e-07, "loss": 2.0152, "step": 27390 }, { "epoch": 0.91, "grad_norm": 0.713740348815918, "learning_rate": 3.9880568836224707e-07, "loss": 2.0865, "step": 27391 }, { "epoch": 0.91, "grad_norm": 0.7317163944244385, "learning_rate": 3.9850856815220275e-07, "loss": 2.0257, "step": 27392 }, { "epoch": 0.91, "grad_norm": 0.7534809112548828, "learning_rate": 3.982115564141786e-07, "loss": 2.0585, "step": 27393 }, { "epoch": 0.91, "grad_norm": 0.7478227615356445, "learning_rate": 3.9791465315153434e-07, "loss": 2.0349, "step": 27394 }, { "epoch": 0.91, "grad_norm": 0.7133371233940125, "learning_rate": 3.9761785836762267e-07, "loss": 2.0511, "step": 27395 }, { "epoch": 0.91, "grad_norm": 0.7364675998687744, "learning_rate": 3.9732117206579765e-07, "loss": 2.063, "step": 27396 }, { "epoch": 0.91, "grad_norm": 0.7627322673797607, "learning_rate": 3.970245942494089e-07, "loss": 2.1047, "step": 27397 }, { "epoch": 0.91, "grad_norm": 0.7295727729797363, "learning_rate": 3.967281249218091e-07, "loss": 2.0218, "step": 27398 }, { "epoch": 0.91, "grad_norm": 0.7716072201728821, "learning_rate": 3.9643176408634686e-07, "loss": 2.0934, "step": 27399 }, { "epoch": 0.91, "grad_norm": 0.7506695985794067, "learning_rate": 3.9613551174637057e-07, "loss": 2.0594, "step": 27400 }, { "epoch": 0.91, "grad_norm": 0.7332156896591187, "learning_rate": 3.9583936790522523e-07, "loss": 2.0604, "step": 27401 }, { "epoch": 0.91, "grad_norm": 0.7541760206222534, "learning_rate": 3.9554333256626055e-07, "loss": 2.03, "step": 27402 }, { "epoch": 0.91, "grad_norm": 0.7483181953430176, "learning_rate": 3.9524740573281707e-07, "loss": 2.1044, "step": 27403 }, { "epoch": 0.91, "grad_norm": 0.7564271092414856, "learning_rate": 3.9495158740823993e-07, "loss": 1.9896, "step": 27404 }, { "epoch": 0.91, "grad_norm": 0.7870551347732544, "learning_rate": 3.946558775958709e-07, "loss": 2.1267, "step": 27405 }, { "epoch": 0.91, "grad_norm": 0.7321218848228455, "learning_rate": 3.9436027629904947e-07, "loss": 1.9923, "step": 27406 }, { "epoch": 0.91, "grad_norm": 0.7440309524536133, "learning_rate": 3.9406478352111644e-07, "loss": 2.0238, "step": 27407 }, { "epoch": 0.91, "grad_norm": 0.72625333070755, "learning_rate": 3.9376939926541015e-07, "loss": 2.0341, "step": 27408 }, { "epoch": 0.91, "grad_norm": 0.7353745698928833, "learning_rate": 3.9347412353526793e-07, "loss": 2.1006, "step": 27409 }, { "epoch": 0.91, "grad_norm": 0.7492586970329285, "learning_rate": 3.931789563340249e-07, "loss": 2.0836, "step": 27410 }, { "epoch": 0.91, "grad_norm": 0.7198688983917236, "learning_rate": 3.928838976650162e-07, "loss": 2.0569, "step": 27411 }, { "epoch": 0.91, "grad_norm": 0.7719637155532837, "learning_rate": 3.925889475315736e-07, "loss": 2.0878, "step": 27412 }, { "epoch": 0.91, "grad_norm": 0.8289550542831421, "learning_rate": 3.9229410593703e-07, "loss": 2.1032, "step": 27413 }, { "epoch": 0.91, "grad_norm": 0.7269850373268127, "learning_rate": 3.919993728847205e-07, "loss": 2.0151, "step": 27414 }, { "epoch": 0.91, "grad_norm": 0.7554654479026794, "learning_rate": 3.917047483779679e-07, "loss": 2.0866, "step": 27415 }, { "epoch": 0.91, "grad_norm": 0.7510951161384583, "learning_rate": 3.914102324201041e-07, "loss": 2.0472, "step": 27416 }, { "epoch": 0.91, "grad_norm": 0.7514237761497498, "learning_rate": 3.9111582501445755e-07, "loss": 2.023, "step": 27417 }, { "epoch": 0.91, "grad_norm": 0.7676913738250732, "learning_rate": 3.9082152616435333e-07, "loss": 2.0727, "step": 27418 }, { "epoch": 0.91, "grad_norm": 0.7341684699058533, "learning_rate": 3.9052733587311543e-07, "loss": 2.0779, "step": 27419 }, { "epoch": 0.91, "grad_norm": 0.7579126358032227, "learning_rate": 3.902332541440679e-07, "loss": 2.0354, "step": 27420 }, { "epoch": 0.91, "grad_norm": 0.7665988206863403, "learning_rate": 3.899392809805325e-07, "loss": 2.0964, "step": 27421 }, { "epoch": 0.91, "grad_norm": 0.7308052778244019, "learning_rate": 3.896454163858321e-07, "loss": 2.0097, "step": 27422 }, { "epoch": 0.91, "grad_norm": 0.7163789868354797, "learning_rate": 3.893516603632852e-07, "loss": 1.9979, "step": 27423 }, { "epoch": 0.91, "grad_norm": 0.7743442058563232, "learning_rate": 3.8905801291621135e-07, "loss": 2.0654, "step": 27424 }, { "epoch": 0.91, "grad_norm": 0.7607342600822449, "learning_rate": 3.8876447404792684e-07, "loss": 2.0592, "step": 27425 }, { "epoch": 0.91, "grad_norm": 0.7724065780639648, "learning_rate": 3.884710437617478e-07, "loss": 2.0487, "step": 27426 }, { "epoch": 0.91, "grad_norm": 0.703120231628418, "learning_rate": 3.881777220609928e-07, "loss": 1.9502, "step": 27427 }, { "epoch": 0.91, "grad_norm": 0.7442044019699097, "learning_rate": 3.8788450894897024e-07, "loss": 1.9645, "step": 27428 }, { "epoch": 0.91, "grad_norm": 0.7330692410469055, "learning_rate": 3.8759140442899524e-07, "loss": 2.046, "step": 27429 }, { "epoch": 0.91, "grad_norm": 0.732552170753479, "learning_rate": 3.872984085043807e-07, "loss": 2.0579, "step": 27430 }, { "epoch": 0.91, "grad_norm": 0.7150179743766785, "learning_rate": 3.8700552117843514e-07, "loss": 2.059, "step": 27431 }, { "epoch": 0.91, "grad_norm": 0.7221666574478149, "learning_rate": 3.867127424544659e-07, "loss": 2.0559, "step": 27432 }, { "epoch": 0.91, "grad_norm": 0.7662800550460815, "learning_rate": 3.864200723357836e-07, "loss": 2.0051, "step": 27433 }, { "epoch": 0.91, "grad_norm": 0.7387768030166626, "learning_rate": 3.861275108256912e-07, "loss": 1.9618, "step": 27434 }, { "epoch": 0.91, "grad_norm": 0.7429881691932678, "learning_rate": 3.858350579274972e-07, "loss": 2.0201, "step": 27435 }, { "epoch": 0.91, "grad_norm": 0.7195424437522888, "learning_rate": 3.8554271364450445e-07, "loss": 2.0139, "step": 27436 }, { "epoch": 0.91, "grad_norm": 0.7355684638023376, "learning_rate": 3.852504779800159e-07, "loss": 2.0776, "step": 27437 }, { "epoch": 0.91, "grad_norm": 0.7297601103782654, "learning_rate": 3.8495835093733113e-07, "loss": 2.0294, "step": 27438 }, { "epoch": 0.91, "grad_norm": 0.7359074950218201, "learning_rate": 3.846663325197542e-07, "loss": 2.0259, "step": 27439 }, { "epoch": 0.91, "grad_norm": 0.7576746940612793, "learning_rate": 3.843744227305812e-07, "loss": 2.0033, "step": 27440 }, { "epoch": 0.91, "grad_norm": 0.7370162606239319, "learning_rate": 3.840826215731086e-07, "loss": 2.0903, "step": 27441 }, { "epoch": 0.91, "grad_norm": 0.7371616363525391, "learning_rate": 3.8379092905063585e-07, "loss": 2.0553, "step": 27442 }, { "epoch": 0.91, "grad_norm": 0.7390612363815308, "learning_rate": 3.8349934516645925e-07, "loss": 2.003, "step": 27443 }, { "epoch": 0.91, "grad_norm": 0.7552176117897034, "learning_rate": 3.832078699238695e-07, "loss": 2.0444, "step": 27444 }, { "epoch": 0.91, "grad_norm": 0.7280820608139038, "learning_rate": 3.829165033261628e-07, "loss": 2.0326, "step": 27445 }, { "epoch": 0.91, "grad_norm": 0.7386280298233032, "learning_rate": 3.826252453766288e-07, "loss": 2.0001, "step": 27446 }, { "epoch": 0.91, "grad_norm": 0.7406368851661682, "learning_rate": 3.823340960785571e-07, "loss": 2.0674, "step": 27447 }, { "epoch": 0.91, "grad_norm": 0.7545284628868103, "learning_rate": 3.8204305543523837e-07, "loss": 2.0631, "step": 27448 }, { "epoch": 0.91, "grad_norm": 0.7496291399002075, "learning_rate": 3.817521234499633e-07, "loss": 2.0538, "step": 27449 }, { "epoch": 0.91, "grad_norm": 0.757919430732727, "learning_rate": 3.8146130012601365e-07, "loss": 2.045, "step": 27450 }, { "epoch": 0.91, "grad_norm": 0.7482235431671143, "learning_rate": 3.8117058546667695e-07, "loss": 2.0972, "step": 27451 }, { "epoch": 0.91, "grad_norm": 0.7504217028617859, "learning_rate": 3.8087997947523825e-07, "loss": 2.0438, "step": 27452 }, { "epoch": 0.91, "grad_norm": 0.7346357703208923, "learning_rate": 3.805894821549805e-07, "loss": 2.0221, "step": 27453 }, { "epoch": 0.91, "grad_norm": 0.7408124804496765, "learning_rate": 3.8029909350918547e-07, "loss": 2.0262, "step": 27454 }, { "epoch": 0.91, "grad_norm": 0.732194721698761, "learning_rate": 3.8000881354113283e-07, "loss": 2.0441, "step": 27455 }, { "epoch": 0.91, "grad_norm": 0.732491135597229, "learning_rate": 3.797186422541033e-07, "loss": 2.0155, "step": 27456 }, { "epoch": 0.91, "grad_norm": 0.7348154187202454, "learning_rate": 3.7942857965137303e-07, "loss": 2.0705, "step": 27457 }, { "epoch": 0.91, "grad_norm": 0.7364615797996521, "learning_rate": 3.7913862573622286e-07, "loss": 2.0768, "step": 27458 }, { "epoch": 0.91, "grad_norm": 0.7435455918312073, "learning_rate": 3.7884878051192565e-07, "loss": 2.0903, "step": 27459 }, { "epoch": 0.91, "grad_norm": 0.7162229418754578, "learning_rate": 3.785590439817544e-07, "loss": 2.0346, "step": 27460 }, { "epoch": 0.91, "grad_norm": 0.7298769354820251, "learning_rate": 3.782694161489864e-07, "loss": 2.0202, "step": 27461 }, { "epoch": 0.91, "grad_norm": 0.730826199054718, "learning_rate": 3.779798970168913e-07, "loss": 2.0027, "step": 27462 }, { "epoch": 0.91, "grad_norm": 0.7588793635368347, "learning_rate": 3.7769048658873984e-07, "loss": 2.0646, "step": 27463 }, { "epoch": 0.91, "grad_norm": 0.7409497499465942, "learning_rate": 3.7740118486780054e-07, "loss": 2.0698, "step": 27464 }, { "epoch": 0.91, "grad_norm": 0.7720558047294617, "learning_rate": 3.7711199185734514e-07, "loss": 2.0398, "step": 27465 }, { "epoch": 0.91, "grad_norm": 0.7268298864364624, "learning_rate": 3.768229075606389e-07, "loss": 2.0294, "step": 27466 }, { "epoch": 0.91, "grad_norm": 0.736134946346283, "learning_rate": 3.765339319809469e-07, "loss": 2.0648, "step": 27467 }, { "epoch": 0.91, "grad_norm": 0.7300928831100464, "learning_rate": 3.7624506512153656e-07, "loss": 1.9775, "step": 27468 }, { "epoch": 0.91, "grad_norm": 0.7649882435798645, "learning_rate": 3.7595630698566644e-07, "loss": 1.9447, "step": 27469 }, { "epoch": 0.91, "grad_norm": 0.7762765288352966, "learning_rate": 3.7566765757660275e-07, "loss": 2.0443, "step": 27470 }, { "epoch": 0.91, "grad_norm": 0.7147378921508789, "learning_rate": 3.753791168976073e-07, "loss": 2.0476, "step": 27471 }, { "epoch": 0.91, "grad_norm": 0.7329960465431213, "learning_rate": 3.7509068495193644e-07, "loss": 2.0488, "step": 27472 }, { "epoch": 0.91, "grad_norm": 0.760723888874054, "learning_rate": 3.748023617428498e-07, "loss": 2.0506, "step": 27473 }, { "epoch": 0.91, "grad_norm": 0.7451719641685486, "learning_rate": 3.745141472736058e-07, "loss": 2.0656, "step": 27474 }, { "epoch": 0.91, "grad_norm": 0.7724258899688721, "learning_rate": 3.7422604154745965e-07, "loss": 2.0151, "step": 27475 }, { "epoch": 0.91, "grad_norm": 0.7436041831970215, "learning_rate": 3.739380445676677e-07, "loss": 1.9797, "step": 27476 }, { "epoch": 0.91, "grad_norm": 0.7287425994873047, "learning_rate": 3.736501563374817e-07, "loss": 1.998, "step": 27477 }, { "epoch": 0.91, "grad_norm": 0.7354134321212769, "learning_rate": 3.7336237686015353e-07, "loss": 2.0845, "step": 27478 }, { "epoch": 0.91, "grad_norm": 0.7672419548034668, "learning_rate": 3.73074706138935e-07, "loss": 2.0152, "step": 27479 }, { "epoch": 0.91, "grad_norm": 0.7639938592910767, "learning_rate": 3.7278714417707807e-07, "loss": 2.0754, "step": 27480 }, { "epoch": 0.91, "grad_norm": 0.7598233819007874, "learning_rate": 3.7249969097783e-07, "loss": 2.0479, "step": 27481 }, { "epoch": 0.91, "grad_norm": 0.7292041182518005, "learning_rate": 3.7221234654443716e-07, "loss": 2.0923, "step": 27482 }, { "epoch": 0.91, "grad_norm": 0.744170606136322, "learning_rate": 3.7192511088014804e-07, "loss": 2.0461, "step": 27483 }, { "epoch": 0.91, "grad_norm": 0.7313699126243591, "learning_rate": 3.716379839882045e-07, "loss": 2.0563, "step": 27484 }, { "epoch": 0.91, "grad_norm": 0.7566964626312256, "learning_rate": 3.7135096587185393e-07, "loss": 2.0097, "step": 27485 }, { "epoch": 0.91, "grad_norm": 0.733971893787384, "learning_rate": 3.7106405653433595e-07, "loss": 2.0431, "step": 27486 }, { "epoch": 0.91, "grad_norm": 0.7231170535087585, "learning_rate": 3.707772559788947e-07, "loss": 2.0472, "step": 27487 }, { "epoch": 0.91, "grad_norm": 0.7284144759178162, "learning_rate": 3.7049056420876637e-07, "loss": 2.0215, "step": 27488 }, { "epoch": 0.91, "grad_norm": 0.7388703227043152, "learning_rate": 3.7020398122719516e-07, "loss": 2.0203, "step": 27489 }, { "epoch": 0.91, "grad_norm": 0.7505077123641968, "learning_rate": 3.699175070374139e-07, "loss": 2.0528, "step": 27490 }, { "epoch": 0.91, "grad_norm": 0.7713919281959534, "learning_rate": 3.696311416426612e-07, "loss": 2.0284, "step": 27491 }, { "epoch": 0.91, "grad_norm": 0.7308657169342041, "learning_rate": 3.6934488504617117e-07, "loss": 2.0246, "step": 27492 }, { "epoch": 0.91, "grad_norm": 0.7289283275604248, "learning_rate": 3.6905873725118113e-07, "loss": 2.0185, "step": 27493 }, { "epoch": 0.91, "grad_norm": 0.7859716415405273, "learning_rate": 3.687726982609185e-07, "loss": 2.0085, "step": 27494 }, { "epoch": 0.91, "grad_norm": 0.7605398297309875, "learning_rate": 3.684867680786175e-07, "loss": 2.0456, "step": 27495 }, { "epoch": 0.91, "grad_norm": 0.7533289790153503, "learning_rate": 3.6820094670750986e-07, "loss": 2.0315, "step": 27496 }, { "epoch": 0.91, "grad_norm": 0.7487311959266663, "learning_rate": 3.679152341508219e-07, "loss": 2.0796, "step": 27497 }, { "epoch": 0.91, "grad_norm": 0.7333808541297913, "learning_rate": 3.676296304117832e-07, "loss": 2.0661, "step": 27498 }, { "epoch": 0.91, "grad_norm": 0.7354040741920471, "learning_rate": 3.673441354936202e-07, "loss": 2.0388, "step": 27499 }, { "epoch": 0.91, "grad_norm": 0.7418214678764343, "learning_rate": 3.67058749399557e-07, "loss": 2.0787, "step": 27500 }, { "epoch": 0.91, "grad_norm": 0.7730532884597778, "learning_rate": 3.6677347213281867e-07, "loss": 2.0382, "step": 27501 }, { "epoch": 0.91, "grad_norm": 0.7209863066673279, "learning_rate": 3.6648830369662936e-07, "loss": 2.0533, "step": 27502 }, { "epoch": 0.92, "grad_norm": 0.7592591047286987, "learning_rate": 3.662032440942087e-07, "loss": 2.0223, "step": 27503 }, { "epoch": 0.92, "grad_norm": 0.7403962016105652, "learning_rate": 3.6591829332877747e-07, "loss": 2.0301, "step": 27504 }, { "epoch": 0.92, "grad_norm": 0.7473664283752441, "learning_rate": 3.6563345140355535e-07, "loss": 2.06, "step": 27505 }, { "epoch": 0.92, "grad_norm": 0.7360561490058899, "learning_rate": 3.653487183217597e-07, "loss": 2.0916, "step": 27506 }, { "epoch": 0.92, "grad_norm": 0.765838623046875, "learning_rate": 3.6506409408660906e-07, "loss": 1.9556, "step": 27507 }, { "epoch": 0.92, "grad_norm": 0.7543379068374634, "learning_rate": 3.647795787013164e-07, "loss": 2.0554, "step": 27508 }, { "epoch": 0.92, "grad_norm": 0.7381190657615662, "learning_rate": 3.6449517216909813e-07, "loss": 2.1212, "step": 27509 }, { "epoch": 0.92, "grad_norm": 0.7779924273490906, "learning_rate": 3.642108744931649e-07, "loss": 2.0452, "step": 27510 }, { "epoch": 0.92, "grad_norm": 0.7593322396278381, "learning_rate": 3.6392668567673205e-07, "loss": 2.0923, "step": 27511 }, { "epoch": 0.92, "grad_norm": 0.759253740310669, "learning_rate": 3.63642605723008e-07, "loss": 1.9928, "step": 27512 }, { "epoch": 0.92, "grad_norm": 0.7421619296073914, "learning_rate": 3.633586346352014e-07, "loss": 2.0057, "step": 27513 }, { "epoch": 0.92, "grad_norm": 0.7459618449211121, "learning_rate": 3.6307477241652065e-07, "loss": 2.0409, "step": 27514 }, { "epoch": 0.92, "grad_norm": 0.7399706840515137, "learning_rate": 3.6279101907017554e-07, "loss": 2.0659, "step": 27515 }, { "epoch": 0.92, "grad_norm": 0.7464442849159241, "learning_rate": 3.62507374599369e-07, "loss": 2.0636, "step": 27516 }, { "epoch": 0.92, "grad_norm": 0.7298858165740967, "learning_rate": 3.6222383900730404e-07, "loss": 1.9757, "step": 27517 }, { "epoch": 0.92, "grad_norm": 0.7626159191131592, "learning_rate": 3.619404122971881e-07, "loss": 1.9184, "step": 27518 }, { "epoch": 0.92, "grad_norm": 0.756585955619812, "learning_rate": 3.6165709447221866e-07, "loss": 1.9781, "step": 27519 }, { "epoch": 0.92, "grad_norm": 0.7326863408088684, "learning_rate": 3.613738855356008e-07, "loss": 2.0216, "step": 27520 }, { "epoch": 0.92, "grad_norm": 0.761835515499115, "learning_rate": 3.6109078549053213e-07, "loss": 2.0159, "step": 27521 }, { "epoch": 0.92, "grad_norm": 0.7639132738113403, "learning_rate": 3.6080779434020883e-07, "loss": 1.9892, "step": 27522 }, { "epoch": 0.92, "grad_norm": 0.7316587567329407, "learning_rate": 3.605249120878307e-07, "loss": 2.1365, "step": 27523 }, { "epoch": 0.92, "grad_norm": 0.7516165375709534, "learning_rate": 3.6024213873659396e-07, "loss": 2.0385, "step": 27524 }, { "epoch": 0.92, "grad_norm": 0.7540576457977295, "learning_rate": 3.5995947428969167e-07, "loss": 1.9717, "step": 27525 }, { "epoch": 0.92, "grad_norm": 0.7510865926742554, "learning_rate": 3.5967691875031686e-07, "loss": 2.0934, "step": 27526 }, { "epoch": 0.92, "grad_norm": 0.7325657606124878, "learning_rate": 3.593944721216624e-07, "loss": 2.0162, "step": 27527 }, { "epoch": 0.92, "grad_norm": 0.7787989974021912, "learning_rate": 3.591121344069204e-07, "loss": 2.0237, "step": 27528 }, { "epoch": 0.92, "grad_norm": 0.7360804080963135, "learning_rate": 3.5882990560927923e-07, "loss": 1.9765, "step": 27529 }, { "epoch": 0.92, "grad_norm": 0.7749325037002563, "learning_rate": 3.585477857319264e-07, "loss": 2.079, "step": 27530 }, { "epoch": 0.92, "grad_norm": 0.7415676116943359, "learning_rate": 3.582657747780505e-07, "loss": 2.1029, "step": 27531 }, { "epoch": 0.92, "grad_norm": 0.7974497079849243, "learning_rate": 3.579838727508378e-07, "loss": 2.0621, "step": 27532 }, { "epoch": 0.92, "grad_norm": 0.7780328392982483, "learning_rate": 3.577020796534725e-07, "loss": 2.0426, "step": 27533 }, { "epoch": 0.92, "grad_norm": 0.7363455891609192, "learning_rate": 3.5742039548913864e-07, "loss": 2.0367, "step": 27534 }, { "epoch": 0.92, "grad_norm": 0.7355459928512573, "learning_rate": 3.57138820261016e-07, "loss": 2.053, "step": 27535 }, { "epoch": 0.92, "grad_norm": 0.7832638621330261, "learning_rate": 3.568573539722886e-07, "loss": 2.0665, "step": 27536 }, { "epoch": 0.92, "grad_norm": 0.7744941115379333, "learning_rate": 3.565759966261362e-07, "loss": 2.1091, "step": 27537 }, { "epoch": 0.92, "grad_norm": 0.7403947114944458, "learning_rate": 3.562947482257373e-07, "loss": 2.0821, "step": 27538 }, { "epoch": 0.92, "grad_norm": 0.729976236820221, "learning_rate": 3.560136087742672e-07, "loss": 2.0122, "step": 27539 }, { "epoch": 0.92, "grad_norm": 0.7683988809585571, "learning_rate": 3.557325782749044e-07, "loss": 2.0616, "step": 27540 }, { "epoch": 0.92, "grad_norm": 0.7541755437850952, "learning_rate": 3.554516567308208e-07, "loss": 2.1573, "step": 27541 }, { "epoch": 0.92, "grad_norm": 0.7659843564033508, "learning_rate": 3.55170844145194e-07, "loss": 2.1084, "step": 27542 }, { "epoch": 0.92, "grad_norm": 0.7346780300140381, "learning_rate": 3.548901405211935e-07, "loss": 2.0981, "step": 27543 }, { "epoch": 0.92, "grad_norm": 0.7583374977111816, "learning_rate": 3.5460954586199246e-07, "loss": 2.0454, "step": 27544 }, { "epoch": 0.92, "grad_norm": 0.7600131034851074, "learning_rate": 3.543290601707594e-07, "loss": 2.0148, "step": 27545 }, { "epoch": 0.92, "grad_norm": 0.7440279722213745, "learning_rate": 3.540486834506651e-07, "loss": 2.0497, "step": 27546 }, { "epoch": 0.92, "grad_norm": 0.7115409970283508, "learning_rate": 3.5376841570487487e-07, "loss": 2.0523, "step": 27547 }, { "epoch": 0.92, "grad_norm": 0.7426609992980957, "learning_rate": 3.5348825693655496e-07, "loss": 2.0563, "step": 27548 }, { "epoch": 0.92, "grad_norm": 0.7468178868293762, "learning_rate": 3.5320820714887073e-07, "loss": 2.0064, "step": 27549 }, { "epoch": 0.92, "grad_norm": 0.7592133283615112, "learning_rate": 3.5292826634498845e-07, "loss": 1.9758, "step": 27550 }, { "epoch": 0.92, "grad_norm": 0.7255106568336487, "learning_rate": 3.5264843452806893e-07, "loss": 2.0593, "step": 27551 }, { "epoch": 0.92, "grad_norm": 0.7375859618186951, "learning_rate": 3.52368711701272e-07, "loss": 2.0762, "step": 27552 }, { "epoch": 0.92, "grad_norm": 0.7460224032402039, "learning_rate": 3.5208909786776046e-07, "loss": 2.0242, "step": 27553 }, { "epoch": 0.92, "grad_norm": 0.7159547209739685, "learning_rate": 3.5180959303069084e-07, "loss": 2.0084, "step": 27554 }, { "epoch": 0.92, "grad_norm": 0.738386332988739, "learning_rate": 3.515301971932239e-07, "loss": 1.9902, "step": 27555 }, { "epoch": 0.92, "grad_norm": 0.7599590420722961, "learning_rate": 3.512509103585138e-07, "loss": 2.0486, "step": 27556 }, { "epoch": 0.92, "grad_norm": 0.7833879590034485, "learning_rate": 3.509717325297146e-07, "loss": 2.0634, "step": 27557 }, { "epoch": 0.92, "grad_norm": 0.7348275780677795, "learning_rate": 3.506926637099828e-07, "loss": 2.0568, "step": 27558 }, { "epoch": 0.92, "grad_norm": 0.7912258505821228, "learning_rate": 3.504137039024702e-07, "loss": 2.0866, "step": 27559 }, { "epoch": 0.92, "grad_norm": 0.7673795819282532, "learning_rate": 3.501348531103299e-07, "loss": 1.9504, "step": 27560 }, { "epoch": 0.92, "grad_norm": 0.7692797780036926, "learning_rate": 3.4985611133670826e-07, "loss": 2.086, "step": 27561 }, { "epoch": 0.92, "grad_norm": 0.7537751793861389, "learning_rate": 3.4957747858475835e-07, "loss": 1.97, "step": 27562 }, { "epoch": 0.92, "grad_norm": 0.7188949584960938, "learning_rate": 3.492989548576253e-07, "loss": 2.0186, "step": 27563 }, { "epoch": 0.92, "grad_norm": 0.7401921153068542, "learning_rate": 3.490205401584579e-07, "loss": 2.0624, "step": 27564 }, { "epoch": 0.92, "grad_norm": 0.7180664539337158, "learning_rate": 3.4874223449039903e-07, "loss": 2.0415, "step": 27565 }, { "epoch": 0.92, "grad_norm": 0.7615936398506165, "learning_rate": 3.484640378565951e-07, "loss": 1.987, "step": 27566 }, { "epoch": 0.92, "grad_norm": 0.7585820555686951, "learning_rate": 3.48185950260187e-07, "loss": 2.0911, "step": 27567 }, { "epoch": 0.92, "grad_norm": 0.7129085063934326, "learning_rate": 3.479079717043188e-07, "loss": 1.9633, "step": 27568 }, { "epoch": 0.92, "grad_norm": 0.7468371391296387, "learning_rate": 3.476301021921302e-07, "loss": 2.0459, "step": 27569 }, { "epoch": 0.92, "grad_norm": 0.733013927936554, "learning_rate": 3.473523417267588e-07, "loss": 2.0765, "step": 27570 }, { "epoch": 0.92, "grad_norm": 0.7291478514671326, "learning_rate": 3.470746903113431e-07, "loss": 2.1096, "step": 27571 }, { "epoch": 0.92, "grad_norm": 0.7657207250595093, "learning_rate": 3.4679714794902173e-07, "loss": 2.037, "step": 27572 }, { "epoch": 0.92, "grad_norm": 0.7497923970222473, "learning_rate": 3.465197146429278e-07, "loss": 2.128, "step": 27573 }, { "epoch": 0.92, "grad_norm": 0.7357337474822998, "learning_rate": 3.4624239039619643e-07, "loss": 2.022, "step": 27574 }, { "epoch": 0.92, "grad_norm": 0.7296460270881653, "learning_rate": 3.4596517521196305e-07, "loss": 2.0955, "step": 27575 }, { "epoch": 0.92, "grad_norm": 0.7511003017425537, "learning_rate": 3.4568806909335505e-07, "loss": 2.0452, "step": 27576 }, { "epoch": 0.92, "grad_norm": 0.7694618701934814, "learning_rate": 3.4541107204350556e-07, "loss": 2.0505, "step": 27577 }, { "epoch": 0.92, "grad_norm": 0.7169292569160461, "learning_rate": 3.451341840655453e-07, "loss": 2.0159, "step": 27578 }, { "epoch": 0.92, "grad_norm": 0.7414395213127136, "learning_rate": 3.448574051625986e-07, "loss": 2.0076, "step": 27579 }, { "epoch": 0.92, "grad_norm": 0.742670476436615, "learning_rate": 3.44580735337795e-07, "loss": 2.0466, "step": 27580 }, { "epoch": 0.92, "grad_norm": 0.7397040128707886, "learning_rate": 3.443041745942599e-07, "loss": 2.0372, "step": 27581 }, { "epoch": 0.92, "grad_norm": 0.7517799139022827, "learning_rate": 3.4402772293511854e-07, "loss": 2.0758, "step": 27582 }, { "epoch": 0.92, "grad_norm": 0.7356666922569275, "learning_rate": 3.4375138036349066e-07, "loss": 2.0165, "step": 27583 }, { "epoch": 0.92, "grad_norm": 0.7546393275260925, "learning_rate": 3.4347514688250263e-07, "loss": 2.1016, "step": 27584 }, { "epoch": 0.92, "grad_norm": 0.751071035861969, "learning_rate": 3.431990224952719e-07, "loss": 2.0643, "step": 27585 }, { "epoch": 0.92, "grad_norm": 0.76637864112854, "learning_rate": 3.4292300720491946e-07, "loss": 2.0665, "step": 27586 }, { "epoch": 0.92, "grad_norm": 0.7580568194389343, "learning_rate": 3.4264710101456376e-07, "loss": 2.0098, "step": 27587 }, { "epoch": 0.92, "grad_norm": 0.7629618048667908, "learning_rate": 3.423713039273213e-07, "loss": 2.0591, "step": 27588 }, { "epoch": 0.92, "grad_norm": 0.7623186707496643, "learning_rate": 3.4209561594630627e-07, "loss": 2.0234, "step": 27589 }, { "epoch": 0.92, "grad_norm": 0.7312891483306885, "learning_rate": 3.4182003707463716e-07, "loss": 1.9804, "step": 27590 }, { "epoch": 0.92, "grad_norm": 0.7653946280479431, "learning_rate": 3.41544567315425e-07, "loss": 2.0957, "step": 27591 }, { "epoch": 0.92, "grad_norm": 0.7689878344535828, "learning_rate": 3.412692066717804e-07, "loss": 2.0525, "step": 27592 }, { "epoch": 0.92, "grad_norm": 0.7284693717956543, "learning_rate": 3.4099395514681557e-07, "loss": 2.0338, "step": 27593 }, { "epoch": 0.92, "grad_norm": 0.7579547762870789, "learning_rate": 3.4071881274364117e-07, "loss": 2.0128, "step": 27594 }, { "epoch": 0.92, "grad_norm": 0.7335493564605713, "learning_rate": 3.404437794653659e-07, "loss": 2.0216, "step": 27595 }, { "epoch": 0.92, "grad_norm": 0.7740117311477661, "learning_rate": 3.401688553150939e-07, "loss": 2.0914, "step": 27596 }, { "epoch": 0.92, "grad_norm": 0.7390198707580566, "learning_rate": 3.398940402959339e-07, "loss": 2.0646, "step": 27597 }, { "epoch": 0.92, "grad_norm": 0.7441772818565369, "learning_rate": 3.3961933441098884e-07, "loss": 2.0021, "step": 27598 }, { "epoch": 0.92, "grad_norm": 0.7458149790763855, "learning_rate": 3.3934473766336296e-07, "loss": 1.9979, "step": 27599 }, { "epoch": 0.92, "grad_norm": 0.7587944865226746, "learning_rate": 3.3907025005616044e-07, "loss": 2.0968, "step": 27600 }, { "epoch": 0.92, "grad_norm": 0.7294687032699585, "learning_rate": 3.3879587159247773e-07, "loss": 2.0624, "step": 27601 }, { "epoch": 0.92, "grad_norm": 0.775977373123169, "learning_rate": 3.385216022754179e-07, "loss": 2.0463, "step": 27602 }, { "epoch": 0.92, "grad_norm": 0.7988474369049072, "learning_rate": 3.382474421080806e-07, "loss": 2.0699, "step": 27603 }, { "epoch": 0.92, "grad_norm": 0.7609072923660278, "learning_rate": 3.379733910935601e-07, "loss": 2.047, "step": 27604 }, { "epoch": 0.92, "grad_norm": 0.7817708849906921, "learning_rate": 3.376994492349539e-07, "loss": 2.0579, "step": 27605 }, { "epoch": 0.92, "grad_norm": 0.7727806568145752, "learning_rate": 3.374256165353562e-07, "loss": 2.0627, "step": 27606 }, { "epoch": 0.92, "grad_norm": 0.7486349940299988, "learning_rate": 3.371518929978612e-07, "loss": 2.0649, "step": 27607 }, { "epoch": 0.92, "grad_norm": 0.7648556232452393, "learning_rate": 3.36878278625562e-07, "loss": 2.0431, "step": 27608 }, { "epoch": 0.92, "grad_norm": 0.7246996760368347, "learning_rate": 3.3660477342154716e-07, "loss": 2.0198, "step": 27609 }, { "epoch": 0.92, "grad_norm": 0.7365943789482117, "learning_rate": 3.3633137738890984e-07, "loss": 2.0918, "step": 27610 }, { "epoch": 0.92, "grad_norm": 0.7324069142341614, "learning_rate": 3.3605809053073646e-07, "loss": 1.977, "step": 27611 }, { "epoch": 0.92, "grad_norm": 0.7139624357223511, "learning_rate": 3.357849128501145e-07, "loss": 2.0517, "step": 27612 }, { "epoch": 0.92, "grad_norm": 0.7324069142341614, "learning_rate": 3.355118443501337e-07, "loss": 2.0159, "step": 27613 }, { "epoch": 0.92, "grad_norm": 0.7657989859580994, "learning_rate": 3.352388850338728e-07, "loss": 1.9641, "step": 27614 }, { "epoch": 0.92, "grad_norm": 0.7399185299873352, "learning_rate": 3.3496603490441927e-07, "loss": 2.0597, "step": 27615 }, { "epoch": 0.92, "grad_norm": 0.7372857928276062, "learning_rate": 3.3469329396485726e-07, "loss": 2.1119, "step": 27616 }, { "epoch": 0.92, "grad_norm": 0.7680679559707642, "learning_rate": 3.3442066221826553e-07, "loss": 2.0549, "step": 27617 }, { "epoch": 0.92, "grad_norm": 0.7426912188529968, "learning_rate": 3.341481396677226e-07, "loss": 2.1273, "step": 27618 }, { "epoch": 0.92, "grad_norm": 0.763296902179718, "learning_rate": 3.338757263163117e-07, "loss": 2.0342, "step": 27619 }, { "epoch": 0.92, "grad_norm": 0.7364580631256104, "learning_rate": 3.3360342216710583e-07, "loss": 2.0461, "step": 27620 }, { "epoch": 0.92, "grad_norm": 0.7506850361824036, "learning_rate": 3.333312272231837e-07, "loss": 2.0308, "step": 27621 }, { "epoch": 0.92, "grad_norm": 0.7369828820228577, "learning_rate": 3.3305914148762273e-07, "loss": 2.0333, "step": 27622 }, { "epoch": 0.92, "grad_norm": 0.702512800693512, "learning_rate": 3.327871649634906e-07, "loss": 2.0519, "step": 27623 }, { "epoch": 0.92, "grad_norm": 0.7778658866882324, "learning_rate": 3.325152976538648e-07, "loss": 2.0709, "step": 27624 }, { "epoch": 0.92, "grad_norm": 0.7795168161392212, "learning_rate": 3.322435395618162e-07, "loss": 2.0974, "step": 27625 }, { "epoch": 0.92, "grad_norm": 0.728728175163269, "learning_rate": 3.319718906904146e-07, "loss": 2.0842, "step": 27626 }, { "epoch": 0.92, "grad_norm": 0.7503560185432434, "learning_rate": 3.3170035104272745e-07, "loss": 2.0981, "step": 27627 }, { "epoch": 0.92, "grad_norm": 0.745243489742279, "learning_rate": 3.314289206218235e-07, "loss": 2.0928, "step": 27628 }, { "epoch": 0.92, "grad_norm": 0.7384575605392456, "learning_rate": 3.311575994307703e-07, "loss": 2.0207, "step": 27629 }, { "epoch": 0.92, "grad_norm": 0.7331790328025818, "learning_rate": 3.3088638747263093e-07, "loss": 2.0384, "step": 27630 }, { "epoch": 0.92, "grad_norm": 0.7511398792266846, "learning_rate": 3.306152847504707e-07, "loss": 2.1399, "step": 27631 }, { "epoch": 0.92, "grad_norm": 0.7215908169746399, "learning_rate": 3.303442912673538e-07, "loss": 2.0004, "step": 27632 }, { "epoch": 0.92, "grad_norm": 0.7458534240722656, "learning_rate": 3.300734070263378e-07, "loss": 2.0497, "step": 27633 }, { "epoch": 0.92, "grad_norm": 0.7726430892944336, "learning_rate": 3.298026320304859e-07, "loss": 2.1379, "step": 27634 }, { "epoch": 0.92, "grad_norm": 0.7475405931472778, "learning_rate": 3.2953196628285887e-07, "loss": 2.0623, "step": 27635 }, { "epoch": 0.92, "grad_norm": 0.7420817017555237, "learning_rate": 3.292614097865088e-07, "loss": 2.0471, "step": 27636 }, { "epoch": 0.92, "grad_norm": 0.7497652173042297, "learning_rate": 3.289909625444976e-07, "loss": 2.0386, "step": 27637 }, { "epoch": 0.92, "grad_norm": 0.718167245388031, "learning_rate": 3.287206245598784e-07, "loss": 1.9858, "step": 27638 }, { "epoch": 0.92, "grad_norm": 0.7252147197723389, "learning_rate": 3.284503958357066e-07, "loss": 2.0459, "step": 27639 }, { "epoch": 0.92, "grad_norm": 0.8097555041313171, "learning_rate": 3.281802763750319e-07, "loss": 2.0348, "step": 27640 }, { "epoch": 0.92, "grad_norm": 0.7462708950042725, "learning_rate": 3.279102661809108e-07, "loss": 2.0223, "step": 27641 }, { "epoch": 0.92, "grad_norm": 0.7451258897781372, "learning_rate": 3.2764036525638866e-07, "loss": 2.0061, "step": 27642 }, { "epoch": 0.92, "grad_norm": 0.7512140274047852, "learning_rate": 3.273705736045174e-07, "loss": 2.0513, "step": 27643 }, { "epoch": 0.92, "grad_norm": 0.7722046971321106, "learning_rate": 3.2710089122834686e-07, "loss": 2.0475, "step": 27644 }, { "epoch": 0.92, "grad_norm": 0.7464533448219299, "learning_rate": 3.2683131813092015e-07, "loss": 2.1396, "step": 27645 }, { "epoch": 0.92, "grad_norm": 0.7333582043647766, "learning_rate": 3.2656185431528375e-07, "loss": 2.0368, "step": 27646 }, { "epoch": 0.92, "grad_norm": 0.7492766976356506, "learning_rate": 3.2629249978448296e-07, "loss": 1.9906, "step": 27647 }, { "epoch": 0.92, "grad_norm": 0.785752534866333, "learning_rate": 3.260232545415609e-07, "loss": 2.0217, "step": 27648 }, { "epoch": 0.92, "grad_norm": 0.7555016875267029, "learning_rate": 3.257541185895574e-07, "loss": 2.0208, "step": 27649 }, { "epoch": 0.92, "grad_norm": 0.7398478388786316, "learning_rate": 3.2548509193151333e-07, "loss": 2.0333, "step": 27650 }, { "epoch": 0.92, "grad_norm": 0.7614901661872864, "learning_rate": 3.252161745704707e-07, "loss": 2.0467, "step": 27651 }, { "epoch": 0.92, "grad_norm": 0.7250102758407593, "learning_rate": 3.249473665094649e-07, "loss": 2.0108, "step": 27652 }, { "epoch": 0.92, "grad_norm": 0.7306356430053711, "learning_rate": 3.2467866775153454e-07, "loss": 1.951, "step": 27653 }, { "epoch": 0.92, "grad_norm": 0.7301570773124695, "learning_rate": 3.244100782997139e-07, "loss": 2.0853, "step": 27654 }, { "epoch": 0.92, "grad_norm": 0.7345647215843201, "learning_rate": 3.241415981570373e-07, "loss": 2.0381, "step": 27655 }, { "epoch": 0.92, "grad_norm": 0.7391906976699829, "learning_rate": 3.238732273265377e-07, "loss": 2.0072, "step": 27656 }, { "epoch": 0.92, "grad_norm": 0.752103865146637, "learning_rate": 3.2360496581124943e-07, "loss": 1.9838, "step": 27657 }, { "epoch": 0.92, "grad_norm": 0.7293895483016968, "learning_rate": 3.233368136142001e-07, "loss": 2.0094, "step": 27658 }, { "epoch": 0.92, "grad_norm": 0.7485709190368652, "learning_rate": 3.230687707384195e-07, "loss": 2.047, "step": 27659 }, { "epoch": 0.92, "grad_norm": 0.7544573545455933, "learning_rate": 3.2280083718693845e-07, "loss": 2.0979, "step": 27660 }, { "epoch": 0.92, "grad_norm": 0.7439588308334351, "learning_rate": 3.2253301296278125e-07, "loss": 2.0136, "step": 27661 }, { "epoch": 0.92, "grad_norm": 0.7464872598648071, "learning_rate": 3.2226529806897445e-07, "loss": 2.0009, "step": 27662 }, { "epoch": 0.92, "grad_norm": 0.7387205362319946, "learning_rate": 3.219976925085444e-07, "loss": 2.0664, "step": 27663 }, { "epoch": 0.92, "grad_norm": 0.7663566470146179, "learning_rate": 3.217301962845099e-07, "loss": 2.0833, "step": 27664 }, { "epoch": 0.92, "grad_norm": 0.743693470954895, "learning_rate": 3.2146280939989616e-07, "loss": 2.0177, "step": 27665 }, { "epoch": 0.92, "grad_norm": 0.7562993764877319, "learning_rate": 3.2119553185772423e-07, "loss": 2.0056, "step": 27666 }, { "epoch": 0.92, "grad_norm": 0.7402998208999634, "learning_rate": 3.2092836366101386e-07, "loss": 2.0537, "step": 27667 }, { "epoch": 0.92, "grad_norm": 0.7410950660705566, "learning_rate": 3.2066130481278045e-07, "loss": 2.0603, "step": 27668 }, { "epoch": 0.92, "grad_norm": 0.7640246748924255, "learning_rate": 3.203943553160438e-07, "loss": 2.0852, "step": 27669 }, { "epoch": 0.92, "grad_norm": 0.7431691288948059, "learning_rate": 3.2012751517381924e-07, "loss": 1.9777, "step": 27670 }, { "epoch": 0.92, "grad_norm": 0.7969907522201538, "learning_rate": 3.1986078438911994e-07, "loss": 2.0866, "step": 27671 }, { "epoch": 0.92, "grad_norm": 0.7685071229934692, "learning_rate": 3.1959416296496013e-07, "loss": 2.107, "step": 27672 }, { "epoch": 0.92, "grad_norm": 0.7259434461593628, "learning_rate": 3.1932765090435414e-07, "loss": 2.0303, "step": 27673 }, { "epoch": 0.92, "grad_norm": 0.7721937298774719, "learning_rate": 3.1906124821030836e-07, "loss": 2.0079, "step": 27674 }, { "epoch": 0.92, "grad_norm": 0.7577913403511047, "learning_rate": 3.187949548858371e-07, "loss": 2.047, "step": 27675 }, { "epoch": 0.92, "grad_norm": 0.7574318647384644, "learning_rate": 3.1852877093394686e-07, "loss": 2.0498, "step": 27676 }, { "epoch": 0.92, "grad_norm": 0.7250029444694519, "learning_rate": 3.1826269635764296e-07, "loss": 2.0528, "step": 27677 }, { "epoch": 0.92, "grad_norm": 0.7249597311019897, "learning_rate": 3.1799673115993413e-07, "loss": 1.9698, "step": 27678 }, { "epoch": 0.92, "grad_norm": 0.74311363697052, "learning_rate": 3.177308753438246e-07, "loss": 2.0123, "step": 27679 }, { "epoch": 0.92, "grad_norm": 0.7568889260292053, "learning_rate": 3.1746512891231653e-07, "loss": 2.065, "step": 27680 }, { "epoch": 0.92, "grad_norm": 0.7424436211585999, "learning_rate": 3.171994918684118e-07, "loss": 2.0081, "step": 27681 }, { "epoch": 0.92, "grad_norm": 0.7420914173126221, "learning_rate": 3.169339642151148e-07, "loss": 2.0871, "step": 27682 }, { "epoch": 0.92, "grad_norm": 0.7239668369293213, "learning_rate": 3.166685459554208e-07, "loss": 2.0417, "step": 27683 }, { "epoch": 0.92, "grad_norm": 0.7450498938560486, "learning_rate": 3.16403237092332e-07, "loss": 2.1023, "step": 27684 }, { "epoch": 0.92, "grad_norm": 0.7478615045547485, "learning_rate": 3.161380376288448e-07, "loss": 1.9884, "step": 27685 }, { "epoch": 0.92, "grad_norm": 0.7439897656440735, "learning_rate": 3.158729475679534e-07, "loss": 2.0532, "step": 27686 }, { "epoch": 0.92, "grad_norm": 0.7438661456108093, "learning_rate": 3.1560796691265326e-07, "loss": 2.0268, "step": 27687 }, { "epoch": 0.92, "grad_norm": 0.7466949224472046, "learning_rate": 3.153430956659409e-07, "loss": 2.0626, "step": 27688 }, { "epoch": 0.92, "grad_norm": 0.7543800473213196, "learning_rate": 3.150783338308061e-07, "loss": 2.1109, "step": 27689 }, { "epoch": 0.92, "grad_norm": 0.7582016587257385, "learning_rate": 3.148136814102387e-07, "loss": 2.0837, "step": 27690 }, { "epoch": 0.92, "grad_norm": 0.7708203196525574, "learning_rate": 3.145491384072319e-07, "loss": 2.08, "step": 27691 }, { "epoch": 0.92, "grad_norm": 0.7646574974060059, "learning_rate": 3.1428470482477324e-07, "loss": 2.0877, "step": 27692 }, { "epoch": 0.92, "grad_norm": 0.7319545149803162, "learning_rate": 3.1402038066584705e-07, "loss": 1.9771, "step": 27693 }, { "epoch": 0.92, "grad_norm": 0.7412433624267578, "learning_rate": 3.1375616593344316e-07, "loss": 2.05, "step": 27694 }, { "epoch": 0.92, "grad_norm": 0.7650107741355896, "learning_rate": 3.1349206063054693e-07, "loss": 2.0824, "step": 27695 }, { "epoch": 0.92, "grad_norm": 0.7498570084571838, "learning_rate": 3.1322806476013824e-07, "loss": 2.0909, "step": 27696 }, { "epoch": 0.92, "grad_norm": 0.7431207299232483, "learning_rate": 3.1296417832520356e-07, "loss": 1.9951, "step": 27697 }, { "epoch": 0.92, "grad_norm": 0.7238743305206299, "learning_rate": 3.127004013287227e-07, "loss": 2.0449, "step": 27698 }, { "epoch": 0.92, "grad_norm": 0.7596167922019958, "learning_rate": 3.1243673377367335e-07, "loss": 2.0155, "step": 27699 }, { "epoch": 0.92, "grad_norm": 0.7470327019691467, "learning_rate": 3.121731756630375e-07, "loss": 2.0852, "step": 27700 }, { "epoch": 0.92, "grad_norm": 0.7704834938049316, "learning_rate": 3.119097269997928e-07, "loss": 2.0289, "step": 27701 }, { "epoch": 0.92, "grad_norm": 0.7676897048950195, "learning_rate": 3.116463877869125e-07, "loss": 2.0345, "step": 27702 }, { "epoch": 0.92, "grad_norm": 0.7366531491279602, "learning_rate": 3.11383158027373e-07, "loss": 2.0723, "step": 27703 }, { "epoch": 0.92, "grad_norm": 0.7341530323028564, "learning_rate": 3.1112003772414967e-07, "loss": 2.0194, "step": 27704 }, { "epoch": 0.92, "grad_norm": 0.7782750725746155, "learning_rate": 3.108570268802125e-07, "loss": 1.958, "step": 27705 }, { "epoch": 0.92, "grad_norm": 0.7287595272064209, "learning_rate": 3.105941254985367e-07, "loss": 2.0083, "step": 27706 }, { "epoch": 0.92, "grad_norm": 0.7534686923027039, "learning_rate": 3.10331333582089e-07, "loss": 2.0975, "step": 27707 }, { "epoch": 0.92, "grad_norm": 0.7921737432479858, "learning_rate": 3.10068651133838e-07, "loss": 2.118, "step": 27708 }, { "epoch": 0.92, "grad_norm": 0.7504991292953491, "learning_rate": 3.0980607815675245e-07, "loss": 1.9919, "step": 27709 }, { "epoch": 0.92, "grad_norm": 0.7587846517562866, "learning_rate": 3.095436146538e-07, "loss": 2.0619, "step": 27710 }, { "epoch": 0.92, "grad_norm": 0.7559679746627808, "learning_rate": 3.0928126062794496e-07, "loss": 2.0301, "step": 27711 }, { "epoch": 0.92, "grad_norm": 0.7743144035339355, "learning_rate": 3.0901901608214936e-07, "loss": 2.1081, "step": 27712 }, { "epoch": 0.92, "grad_norm": 0.730049192905426, "learning_rate": 3.087568810193775e-07, "loss": 2.0741, "step": 27713 }, { "epoch": 0.92, "grad_norm": 0.7464324831962585, "learning_rate": 3.084948554425915e-07, "loss": 2.0473, "step": 27714 }, { "epoch": 0.92, "grad_norm": 0.7562131881713867, "learning_rate": 3.082329393547523e-07, "loss": 2.0204, "step": 27715 }, { "epoch": 0.92, "grad_norm": 0.717248260974884, "learning_rate": 3.079711327588153e-07, "loss": 1.9518, "step": 27716 }, { "epoch": 0.92, "grad_norm": 0.7551710605621338, "learning_rate": 3.077094356577415e-07, "loss": 2.0335, "step": 27717 }, { "epoch": 0.92, "grad_norm": 0.7462586164474487, "learning_rate": 3.074478480544851e-07, "loss": 2.037, "step": 27718 }, { "epoch": 0.92, "grad_norm": 0.7253049612045288, "learning_rate": 3.071863699520039e-07, "loss": 2.0298, "step": 27719 }, { "epoch": 0.92, "grad_norm": 0.7469426989555359, "learning_rate": 3.069250013532499e-07, "loss": 2.0993, "step": 27720 }, { "epoch": 0.92, "grad_norm": 0.748832106590271, "learning_rate": 3.0666374226117625e-07, "loss": 1.9962, "step": 27721 }, { "epoch": 0.92, "grad_norm": 0.7599135041236877, "learning_rate": 3.06402592678734e-07, "loss": 2.0126, "step": 27722 }, { "epoch": 0.92, "grad_norm": 0.7544862031936646, "learning_rate": 3.0614155260887733e-07, "loss": 2.0214, "step": 27723 }, { "epoch": 0.92, "grad_norm": 0.7334742546081543, "learning_rate": 3.058806220545496e-07, "loss": 1.9852, "step": 27724 }, { "epoch": 0.92, "grad_norm": 0.7521848082542419, "learning_rate": 3.056198010187006e-07, "loss": 2.0237, "step": 27725 }, { "epoch": 0.92, "grad_norm": 0.7528825998306274, "learning_rate": 3.0535908950428017e-07, "loss": 2.0781, "step": 27726 }, { "epoch": 0.92, "grad_norm": 0.7192471027374268, "learning_rate": 3.050984875142293e-07, "loss": 2.0439, "step": 27727 }, { "epoch": 0.92, "grad_norm": 0.7353876233100891, "learning_rate": 3.048379950514946e-07, "loss": 2.0315, "step": 27728 }, { "epoch": 0.92, "grad_norm": 0.7753949165344238, "learning_rate": 3.0457761211901804e-07, "loss": 2.0264, "step": 27729 }, { "epoch": 0.92, "grad_norm": 0.7380372285842896, "learning_rate": 3.043173387197418e-07, "loss": 2.0752, "step": 27730 }, { "epoch": 0.92, "grad_norm": 0.7567800283432007, "learning_rate": 3.040571748566057e-07, "loss": 2.0796, "step": 27731 }, { "epoch": 0.92, "grad_norm": 0.7507311701774597, "learning_rate": 3.037971205325496e-07, "loss": 2.0991, "step": 27732 }, { "epoch": 0.92, "grad_norm": 0.7370536923408508, "learning_rate": 3.035371757505112e-07, "loss": 2.0206, "step": 27733 }, { "epoch": 0.92, "grad_norm": 0.7381748557090759, "learning_rate": 3.0327734051342705e-07, "loss": 2.0055, "step": 27734 }, { "epoch": 0.92, "grad_norm": 0.7792350649833679, "learning_rate": 3.030176148242314e-07, "loss": 2.0978, "step": 27735 }, { "epoch": 0.92, "grad_norm": 0.7473456859588623, "learning_rate": 3.02757998685862e-07, "loss": 2.0001, "step": 27736 }, { "epoch": 0.92, "grad_norm": 0.7511739134788513, "learning_rate": 3.024984921012486e-07, "loss": 2.028, "step": 27737 }, { "epoch": 0.92, "grad_norm": 0.7346223592758179, "learning_rate": 3.022390950733234e-07, "loss": 2.0577, "step": 27738 }, { "epoch": 0.92, "grad_norm": 0.7358810305595398, "learning_rate": 3.019798076050184e-07, "loss": 2.0626, "step": 27739 }, { "epoch": 0.92, "grad_norm": 0.7527163028717041, "learning_rate": 3.017206296992603e-07, "loss": 1.976, "step": 27740 }, { "epoch": 0.92, "grad_norm": 0.7601765394210815, "learning_rate": 3.014615613589811e-07, "loss": 2.0888, "step": 27741 }, { "epoch": 0.92, "grad_norm": 0.7566134333610535, "learning_rate": 3.01202602587104e-07, "loss": 2.0367, "step": 27742 }, { "epoch": 0.92, "grad_norm": 0.7677571177482605, "learning_rate": 3.0094375338655556e-07, "loss": 2.0658, "step": 27743 }, { "epoch": 0.92, "grad_norm": 0.7175692915916443, "learning_rate": 3.006850137602602e-07, "loss": 2.0886, "step": 27744 }, { "epoch": 0.92, "grad_norm": 0.7648827433586121, "learning_rate": 3.004263837111421e-07, "loss": 2.0492, "step": 27745 }, { "epoch": 0.92, "grad_norm": 0.742368757724762, "learning_rate": 3.001678632421223e-07, "loss": 2.1145, "step": 27746 }, { "epoch": 0.92, "grad_norm": 0.7374505400657654, "learning_rate": 2.999094523561208e-07, "loss": 2.0367, "step": 27747 }, { "epoch": 0.92, "grad_norm": 0.734950065612793, "learning_rate": 2.9965115105605733e-07, "loss": 1.9964, "step": 27748 }, { "epoch": 0.92, "grad_norm": 0.734057605266571, "learning_rate": 2.993929593448497e-07, "loss": 2.0903, "step": 27749 }, { "epoch": 0.92, "grad_norm": 0.7659988403320312, "learning_rate": 2.9913487722541655e-07, "loss": 2.0108, "step": 27750 }, { "epoch": 0.92, "grad_norm": 0.7558735013008118, "learning_rate": 2.988769047006712e-07, "loss": 2.0666, "step": 27751 }, { "epoch": 0.92, "grad_norm": 0.7403257489204407, "learning_rate": 2.986190417735302e-07, "loss": 2.0736, "step": 27752 }, { "epoch": 0.92, "grad_norm": 0.7615982294082642, "learning_rate": 2.983612884469045e-07, "loss": 2.0408, "step": 27753 }, { "epoch": 0.92, "grad_norm": 0.7448079586029053, "learning_rate": 2.9810364472370843e-07, "loss": 2.0411, "step": 27754 }, { "epoch": 0.92, "grad_norm": 0.7467453479766846, "learning_rate": 2.97846110606852e-07, "loss": 2.0334, "step": 27755 }, { "epoch": 0.92, "grad_norm": 0.7232568860054016, "learning_rate": 2.975886860992427e-07, "loss": 2.0549, "step": 27756 }, { "epoch": 0.92, "grad_norm": 0.7580637335777283, "learning_rate": 2.9733137120379065e-07, "loss": 2.0521, "step": 27757 }, { "epoch": 0.92, "grad_norm": 0.7344949245452881, "learning_rate": 2.970741659234022e-07, "loss": 2.0583, "step": 27758 }, { "epoch": 0.92, "grad_norm": 0.7170581817626953, "learning_rate": 2.9681707026098514e-07, "loss": 1.9781, "step": 27759 }, { "epoch": 0.92, "grad_norm": 0.7625787854194641, "learning_rate": 2.965600842194394e-07, "loss": 1.9749, "step": 27760 }, { "epoch": 0.92, "grad_norm": 0.7346869111061096, "learning_rate": 2.9630320780167366e-07, "loss": 2.0312, "step": 27761 }, { "epoch": 0.92, "grad_norm": 0.7336499094963074, "learning_rate": 2.9604644101058565e-07, "loss": 1.9909, "step": 27762 }, { "epoch": 0.92, "grad_norm": 0.7484045028686523, "learning_rate": 2.957897838490786e-07, "loss": 2.1084, "step": 27763 }, { "epoch": 0.92, "grad_norm": 0.7607395052909851, "learning_rate": 2.955332363200514e-07, "loss": 2.0512, "step": 27764 }, { "epoch": 0.92, "grad_norm": 0.7536801099777222, "learning_rate": 2.9527679842640153e-07, "loss": 2.0568, "step": 27765 }, { "epoch": 0.92, "grad_norm": 0.774182140827179, "learning_rate": 2.9502047017102687e-07, "loss": 1.9764, "step": 27766 }, { "epoch": 0.92, "grad_norm": 0.7581747174263, "learning_rate": 2.947642515568239e-07, "loss": 2.0213, "step": 27767 }, { "epoch": 0.92, "grad_norm": 0.7543849945068359, "learning_rate": 2.9450814258668693e-07, "loss": 2.0757, "step": 27768 }, { "epoch": 0.92, "grad_norm": 0.7381640672683716, "learning_rate": 2.9425214326350816e-07, "loss": 1.9918, "step": 27769 }, { "epoch": 0.92, "grad_norm": 0.7476697564125061, "learning_rate": 2.9399625359018193e-07, "loss": 2.0163, "step": 27770 }, { "epoch": 0.92, "grad_norm": 0.7519593834877014, "learning_rate": 2.937404735695959e-07, "loss": 2.057, "step": 27771 }, { "epoch": 0.92, "grad_norm": 0.7299101948738098, "learning_rate": 2.934848032046422e-07, "loss": 2.0564, "step": 27772 }, { "epoch": 0.92, "grad_norm": 0.7523294687271118, "learning_rate": 2.932292424982086e-07, "loss": 2.0115, "step": 27773 }, { "epoch": 0.92, "grad_norm": 0.736680805683136, "learning_rate": 2.9297379145318274e-07, "loss": 2.0916, "step": 27774 }, { "epoch": 0.92, "grad_norm": 0.7316254377365112, "learning_rate": 2.9271845007245004e-07, "loss": 2.0764, "step": 27775 }, { "epoch": 0.92, "grad_norm": 0.7437477707862854, "learning_rate": 2.924632183588949e-07, "loss": 2.0698, "step": 27776 }, { "epoch": 0.92, "grad_norm": 0.7351751923561096, "learning_rate": 2.922080963154017e-07, "loss": 1.9997, "step": 27777 }, { "epoch": 0.92, "grad_norm": 0.7282069325447083, "learning_rate": 2.9195308394485147e-07, "loss": 2.037, "step": 27778 }, { "epoch": 0.92, "grad_norm": 0.7197161912918091, "learning_rate": 2.916981812501252e-07, "loss": 2.0145, "step": 27779 }, { "epoch": 0.92, "grad_norm": 0.7297230362892151, "learning_rate": 2.91443388234105e-07, "loss": 2.0015, "step": 27780 }, { "epoch": 0.92, "grad_norm": 0.7370126247406006, "learning_rate": 2.9118870489966753e-07, "loss": 1.9739, "step": 27781 }, { "epoch": 0.92, "grad_norm": 0.7700838446617126, "learning_rate": 2.909341312496883e-07, "loss": 2.0291, "step": 27782 }, { "epoch": 0.92, "grad_norm": 0.7294010519981384, "learning_rate": 2.906796672870471e-07, "loss": 1.9787, "step": 27783 }, { "epoch": 0.92, "grad_norm": 0.7314468026161194, "learning_rate": 2.9042531301461506e-07, "loss": 2.0457, "step": 27784 }, { "epoch": 0.92, "grad_norm": 0.754212498664856, "learning_rate": 2.9017106843526876e-07, "loss": 2.0528, "step": 27785 }, { "epoch": 0.92, "grad_norm": 0.755840539932251, "learning_rate": 2.899169335518792e-07, "loss": 2.0765, "step": 27786 }, { "epoch": 0.92, "grad_norm": 0.7168874144554138, "learning_rate": 2.8966290836731745e-07, "loss": 2.0318, "step": 27787 }, { "epoch": 0.92, "grad_norm": 0.7285027503967285, "learning_rate": 2.8940899288445237e-07, "loss": 1.992, "step": 27788 }, { "epoch": 0.92, "grad_norm": 0.7419102787971497, "learning_rate": 2.8915518710615376e-07, "loss": 2.0239, "step": 27789 }, { "epoch": 0.92, "grad_norm": 0.732861340045929, "learning_rate": 2.889014910352905e-07, "loss": 2.0129, "step": 27790 }, { "epoch": 0.92, "grad_norm": 0.7722498774528503, "learning_rate": 2.886479046747248e-07, "loss": 2.0812, "step": 27791 }, { "epoch": 0.92, "grad_norm": 0.7624475359916687, "learning_rate": 2.883944280273243e-07, "loss": 2.0483, "step": 27792 }, { "epoch": 0.92, "grad_norm": 0.7525282502174377, "learning_rate": 2.881410610959523e-07, "loss": 2.0804, "step": 27793 }, { "epoch": 0.92, "grad_norm": 0.7676408886909485, "learning_rate": 2.878878038834709e-07, "loss": 2.0513, "step": 27794 }, { "epoch": 0.92, "grad_norm": 0.7185543179512024, "learning_rate": 2.876346563927401e-07, "loss": 2.0377, "step": 27795 }, { "epoch": 0.92, "grad_norm": 0.7555459141731262, "learning_rate": 2.8738161862662207e-07, "loss": 1.9662, "step": 27796 }, { "epoch": 0.92, "grad_norm": 0.7270405292510986, "learning_rate": 2.8712869058797445e-07, "loss": 2.1201, "step": 27797 }, { "epoch": 0.92, "grad_norm": 0.7435339093208313, "learning_rate": 2.8687587227965385e-07, "loss": 2.0281, "step": 27798 }, { "epoch": 0.92, "grad_norm": 0.7333428859710693, "learning_rate": 2.8662316370451806e-07, "loss": 2.0368, "step": 27799 }, { "epoch": 0.92, "grad_norm": 0.742886483669281, "learning_rate": 2.8637056486542026e-07, "loss": 1.9998, "step": 27800 }, { "epoch": 0.92, "grad_norm": 0.7383636236190796, "learning_rate": 2.861180757652149e-07, "loss": 1.9869, "step": 27801 }, { "epoch": 0.92, "grad_norm": 0.7411794662475586, "learning_rate": 2.858656964067563e-07, "loss": 2.0622, "step": 27802 }, { "epoch": 0.93, "grad_norm": 0.7370967864990234, "learning_rate": 2.8561342679289337e-07, "loss": 2.1043, "step": 27803 }, { "epoch": 0.93, "grad_norm": 0.7297102212905884, "learning_rate": 2.8536126692647606e-07, "loss": 2.0786, "step": 27804 }, { "epoch": 0.93, "grad_norm": 0.745228111743927, "learning_rate": 2.851092168103542e-07, "loss": 1.9948, "step": 27805 }, { "epoch": 0.93, "grad_norm": 0.7371108531951904, "learning_rate": 2.8485727644737447e-07, "loss": 2.0174, "step": 27806 }, { "epoch": 0.93, "grad_norm": 0.7554144263267517, "learning_rate": 2.846054458403835e-07, "loss": 2.1258, "step": 27807 }, { "epoch": 0.93, "grad_norm": 0.7192895412445068, "learning_rate": 2.8435372499222793e-07, "loss": 2.1065, "step": 27808 }, { "epoch": 0.93, "grad_norm": 0.7098931074142456, "learning_rate": 2.841021139057487e-07, "loss": 1.9536, "step": 27809 }, { "epoch": 0.93, "grad_norm": 0.757360577583313, "learning_rate": 2.8385061258378923e-07, "loss": 2.1026, "step": 27810 }, { "epoch": 0.93, "grad_norm": 0.7316729426383972, "learning_rate": 2.835992210291927e-07, "loss": 2.0026, "step": 27811 }, { "epoch": 0.93, "grad_norm": 0.751430332660675, "learning_rate": 2.8334793924479797e-07, "loss": 2.0582, "step": 27812 }, { "epoch": 0.93, "grad_norm": 0.7631667256355286, "learning_rate": 2.830967672334428e-07, "loss": 2.0803, "step": 27813 }, { "epoch": 0.93, "grad_norm": 0.7661742568016052, "learning_rate": 2.8284570499796496e-07, "loss": 2.097, "step": 27814 }, { "epoch": 0.93, "grad_norm": 0.7558079361915588, "learning_rate": 2.8259475254120315e-07, "loss": 1.9494, "step": 27815 }, { "epoch": 0.93, "grad_norm": 0.7462192177772522, "learning_rate": 2.823439098659908e-07, "loss": 2.0685, "step": 27816 }, { "epoch": 0.93, "grad_norm": 0.7554885149002075, "learning_rate": 2.8209317697516e-07, "loss": 2.0609, "step": 27817 }, { "epoch": 0.93, "grad_norm": 0.7765205502510071, "learning_rate": 2.8184255387154744e-07, "loss": 2.1027, "step": 27818 }, { "epoch": 0.93, "grad_norm": 0.7259227633476257, "learning_rate": 2.8159204055798085e-07, "loss": 1.9972, "step": 27819 }, { "epoch": 0.93, "grad_norm": 0.7740525603294373, "learning_rate": 2.813416370372912e-07, "loss": 2.0156, "step": 27820 }, { "epoch": 0.93, "grad_norm": 0.7714994549751282, "learning_rate": 2.8109134331231083e-07, "loss": 2.0266, "step": 27821 }, { "epoch": 0.93, "grad_norm": 0.7852103114128113, "learning_rate": 2.8084115938586177e-07, "loss": 1.9885, "step": 27822 }, { "epoch": 0.93, "grad_norm": 0.7389509081840515, "learning_rate": 2.8059108526077404e-07, "loss": 2.0001, "step": 27823 }, { "epoch": 0.93, "grad_norm": 0.7501384615898132, "learning_rate": 2.803411209398732e-07, "loss": 2.0133, "step": 27824 }, { "epoch": 0.93, "grad_norm": 0.7325080633163452, "learning_rate": 2.800912664259825e-07, "loss": 2.0607, "step": 27825 }, { "epoch": 0.93, "grad_norm": 0.7479891180992126, "learning_rate": 2.798415217219219e-07, "loss": 1.9714, "step": 27826 }, { "epoch": 0.93, "grad_norm": 0.7832179665565491, "learning_rate": 2.7959188683051696e-07, "loss": 2.0281, "step": 27827 }, { "epoch": 0.93, "grad_norm": 0.7498183250427246, "learning_rate": 2.793423617545854e-07, "loss": 2.0773, "step": 27828 }, { "epoch": 0.93, "grad_norm": 0.7317245006561279, "learning_rate": 2.7909294649694606e-07, "loss": 2.01, "step": 27829 }, { "epoch": 0.93, "grad_norm": 0.7390229105949402, "learning_rate": 2.788436410604201e-07, "loss": 2.0559, "step": 27830 }, { "epoch": 0.93, "grad_norm": 0.7410488724708557, "learning_rate": 2.7859444544782064e-07, "loss": 2.0427, "step": 27831 }, { "epoch": 0.93, "grad_norm": 0.7306001782417297, "learning_rate": 2.783453596619623e-07, "loss": 2.0805, "step": 27832 }, { "epoch": 0.93, "grad_norm": 0.7568457126617432, "learning_rate": 2.780963837056627e-07, "loss": 2.0906, "step": 27833 }, { "epoch": 0.93, "grad_norm": 0.7483450770378113, "learning_rate": 2.778475175817319e-07, "loss": 2.0536, "step": 27834 }, { "epoch": 0.93, "grad_norm": 0.7529596090316772, "learning_rate": 2.775987612929809e-07, "loss": 2.011, "step": 27835 }, { "epoch": 0.93, "grad_norm": 0.7707741856575012, "learning_rate": 2.77350114842222e-07, "loss": 2.1078, "step": 27836 }, { "epoch": 0.93, "grad_norm": 0.7418769001960754, "learning_rate": 2.771015782322639e-07, "loss": 1.9772, "step": 27837 }, { "epoch": 0.93, "grad_norm": 0.7614316940307617, "learning_rate": 2.7685315146591343e-07, "loss": 2.0641, "step": 27838 }, { "epoch": 0.93, "grad_norm": 0.7126253247261047, "learning_rate": 2.766048345459782e-07, "loss": 2.0214, "step": 27839 }, { "epoch": 0.93, "grad_norm": 0.7546053528785706, "learning_rate": 2.763566274752638e-07, "loss": 2.0079, "step": 27840 }, { "epoch": 0.93, "grad_norm": 0.7334279417991638, "learning_rate": 2.761085302565714e-07, "loss": 2.0217, "step": 27841 }, { "epoch": 0.93, "grad_norm": 0.7316117882728577, "learning_rate": 2.758605428927075e-07, "loss": 2.0624, "step": 27842 }, { "epoch": 0.93, "grad_norm": 0.7327911257743835, "learning_rate": 2.7561266538647323e-07, "loss": 2.0557, "step": 27843 }, { "epoch": 0.93, "grad_norm": 0.7581915855407715, "learning_rate": 2.7536489774066644e-07, "loss": 1.9988, "step": 27844 }, { "epoch": 0.93, "grad_norm": 0.7346964478492737, "learning_rate": 2.7511723995808705e-07, "loss": 2.0695, "step": 27845 }, { "epoch": 0.93, "grad_norm": 0.7348883152008057, "learning_rate": 2.7486969204153613e-07, "loss": 2.0784, "step": 27846 }, { "epoch": 0.93, "grad_norm": 0.7330226302146912, "learning_rate": 2.7462225399380705e-07, "loss": 2.0985, "step": 27847 }, { "epoch": 0.93, "grad_norm": 0.7600095868110657, "learning_rate": 2.7437492581769534e-07, "loss": 2.0555, "step": 27848 }, { "epoch": 0.93, "grad_norm": 0.7869946956634521, "learning_rate": 2.741277075159965e-07, "loss": 2.032, "step": 27849 }, { "epoch": 0.93, "grad_norm": 0.7264682650566101, "learning_rate": 2.7388059909150276e-07, "loss": 2.0737, "step": 27850 }, { "epoch": 0.93, "grad_norm": 0.7407927513122559, "learning_rate": 2.736336005470053e-07, "loss": 2.0223, "step": 27851 }, { "epoch": 0.93, "grad_norm": 0.7393671274185181, "learning_rate": 2.733867118852962e-07, "loss": 2.0038, "step": 27852 }, { "epoch": 0.93, "grad_norm": 0.7185119390487671, "learning_rate": 2.7313993310916443e-07, "loss": 2.0448, "step": 27853 }, { "epoch": 0.93, "grad_norm": 0.75821453332901, "learning_rate": 2.728932642213955e-07, "loss": 2.0136, "step": 27854 }, { "epoch": 0.93, "grad_norm": 0.7126148343086243, "learning_rate": 2.726467052247794e-07, "loss": 2.0198, "step": 27855 }, { "epoch": 0.93, "grad_norm": 0.7342941164970398, "learning_rate": 2.7240025612209954e-07, "loss": 2.1063, "step": 27856 }, { "epoch": 0.93, "grad_norm": 0.7413205504417419, "learning_rate": 2.721539169161391e-07, "loss": 2.0389, "step": 27857 }, { "epoch": 0.93, "grad_norm": 0.7501566410064697, "learning_rate": 2.7190768760968376e-07, "loss": 2.0966, "step": 27858 }, { "epoch": 0.93, "grad_norm": 0.7557350397109985, "learning_rate": 2.716615682055146e-07, "loss": 2.1455, "step": 27859 }, { "epoch": 0.93, "grad_norm": 0.7484878301620483, "learning_rate": 2.7141555870641045e-07, "loss": 2.0656, "step": 27860 }, { "epoch": 0.93, "grad_norm": 0.7559391856193542, "learning_rate": 2.711696591151536e-07, "loss": 2.0703, "step": 27861 }, { "epoch": 0.93, "grad_norm": 0.7430881857872009, "learning_rate": 2.7092386943451954e-07, "loss": 1.9874, "step": 27862 }, { "epoch": 0.93, "grad_norm": 0.7316837310791016, "learning_rate": 2.7067818966728497e-07, "loss": 2.0028, "step": 27863 }, { "epoch": 0.93, "grad_norm": 0.7509291172027588, "learning_rate": 2.704326198162266e-07, "loss": 2.051, "step": 27864 }, { "epoch": 0.93, "grad_norm": 0.7525030374526978, "learning_rate": 2.7018715988411994e-07, "loss": 2.0591, "step": 27865 }, { "epoch": 0.93, "grad_norm": 0.728617250919342, "learning_rate": 2.6994180987373496e-07, "loss": 2.0216, "step": 27866 }, { "epoch": 0.93, "grad_norm": 0.7736281156539917, "learning_rate": 2.6969656978784396e-07, "loss": 1.9672, "step": 27867 }, { "epoch": 0.93, "grad_norm": 0.760162353515625, "learning_rate": 2.694514396292203e-07, "loss": 2.0191, "step": 27868 }, { "epoch": 0.93, "grad_norm": 0.7245417237281799, "learning_rate": 2.6920641940063276e-07, "loss": 2.1005, "step": 27869 }, { "epoch": 0.93, "grad_norm": 0.7642731666564941, "learning_rate": 2.6896150910484586e-07, "loss": 2.0365, "step": 27870 }, { "epoch": 0.93, "grad_norm": 0.7467430830001831, "learning_rate": 2.6871670874462964e-07, "loss": 2.0797, "step": 27871 }, { "epoch": 0.93, "grad_norm": 0.7355383038520813, "learning_rate": 2.684720183227496e-07, "loss": 2.0858, "step": 27872 }, { "epoch": 0.93, "grad_norm": 0.766723096370697, "learning_rate": 2.6822743784196804e-07, "loss": 1.9927, "step": 27873 }, { "epoch": 0.93, "grad_norm": 0.7569175362586975, "learning_rate": 2.6798296730505046e-07, "loss": 2.0404, "step": 27874 }, { "epoch": 0.93, "grad_norm": 0.748996376991272, "learning_rate": 2.6773860671475913e-07, "loss": 1.9798, "step": 27875 }, { "epoch": 0.93, "grad_norm": 0.7404541373252869, "learning_rate": 2.674943560738508e-07, "loss": 2.0395, "step": 27876 }, { "epoch": 0.93, "grad_norm": 0.7506717443466187, "learning_rate": 2.6725021538508977e-07, "loss": 1.9652, "step": 27877 }, { "epoch": 0.93, "grad_norm": 0.745093047618866, "learning_rate": 2.670061846512306e-07, "loss": 2.0599, "step": 27878 }, { "epoch": 0.93, "grad_norm": 0.7612241506576538, "learning_rate": 2.6676226387503114e-07, "loss": 2.0642, "step": 27879 }, { "epoch": 0.93, "grad_norm": 0.7584373950958252, "learning_rate": 2.66518453059248e-07, "loss": 2.0412, "step": 27880 }, { "epoch": 0.93, "grad_norm": 0.7781530022621155, "learning_rate": 2.662747522066345e-07, "loss": 2.0075, "step": 27881 }, { "epoch": 0.93, "grad_norm": 0.7199565768241882, "learning_rate": 2.66031161319944e-07, "loss": 1.9915, "step": 27882 }, { "epoch": 0.93, "grad_norm": 0.7701200842857361, "learning_rate": 2.6578768040192995e-07, "loss": 2.0701, "step": 27883 }, { "epoch": 0.93, "grad_norm": 0.714232325553894, "learning_rate": 2.6554430945534225e-07, "loss": 2.0728, "step": 27884 }, { "epoch": 0.93, "grad_norm": 0.7406719326972961, "learning_rate": 2.653010484829288e-07, "loss": 2.0573, "step": 27885 }, { "epoch": 0.93, "grad_norm": 0.7422420978546143, "learning_rate": 2.6505789748743846e-07, "loss": 1.9974, "step": 27886 }, { "epoch": 0.93, "grad_norm": 0.764012336730957, "learning_rate": 2.648148564716213e-07, "loss": 2.0003, "step": 27887 }, { "epoch": 0.93, "grad_norm": 0.7540760040283203, "learning_rate": 2.645719254382184e-07, "loss": 2.0248, "step": 27888 }, { "epoch": 0.93, "grad_norm": 0.7420525550842285, "learning_rate": 2.643291043899765e-07, "loss": 1.9695, "step": 27889 }, { "epoch": 0.93, "grad_norm": 0.7442198991775513, "learning_rate": 2.6408639332963893e-07, "loss": 1.9752, "step": 27890 }, { "epoch": 0.93, "grad_norm": 0.7698887586593628, "learning_rate": 2.6384379225994684e-07, "loss": 2.075, "step": 27891 }, { "epoch": 0.93, "grad_norm": 0.765974760055542, "learning_rate": 2.636013011836436e-07, "loss": 2.0281, "step": 27892 }, { "epoch": 0.93, "grad_norm": 0.7182168960571289, "learning_rate": 2.6335892010346587e-07, "loss": 1.9914, "step": 27893 }, { "epoch": 0.93, "grad_norm": 0.7500603795051575, "learning_rate": 2.631166490221515e-07, "loss": 2.0028, "step": 27894 }, { "epoch": 0.93, "grad_norm": 0.7582493424415588, "learning_rate": 2.628744879424394e-07, "loss": 2.0824, "step": 27895 }, { "epoch": 0.93, "grad_norm": 0.7277804613113403, "learning_rate": 2.626324368670652e-07, "loss": 2.046, "step": 27896 }, { "epoch": 0.93, "grad_norm": 0.7764238715171814, "learning_rate": 2.623904957987644e-07, "loss": 2.07, "step": 27897 }, { "epoch": 0.93, "grad_norm": 0.7843112945556641, "learning_rate": 2.621486647402671e-07, "loss": 2.089, "step": 27898 }, { "epoch": 0.93, "grad_norm": 0.7297757267951965, "learning_rate": 2.619069436943078e-07, "loss": 1.9168, "step": 27899 }, { "epoch": 0.93, "grad_norm": 0.7402142286300659, "learning_rate": 2.616653326636176e-07, "loss": 2.0293, "step": 27900 }, { "epoch": 0.93, "grad_norm": 0.7449386715888977, "learning_rate": 2.6142383165092433e-07, "loss": 2.1063, "step": 27901 }, { "epoch": 0.93, "grad_norm": 0.7214683294296265, "learning_rate": 2.6118244065895693e-07, "loss": 2.055, "step": 27902 }, { "epoch": 0.93, "grad_norm": 0.748918890953064, "learning_rate": 2.609411596904432e-07, "loss": 2.0357, "step": 27903 }, { "epoch": 0.93, "grad_norm": 0.7619758248329163, "learning_rate": 2.6069998874810766e-07, "loss": 2.0926, "step": 27904 }, { "epoch": 0.93, "grad_norm": 0.7452168464660645, "learning_rate": 2.604589278346781e-07, "loss": 2.0076, "step": 27905 }, { "epoch": 0.93, "grad_norm": 0.7235488295555115, "learning_rate": 2.6021797695287453e-07, "loss": 2.025, "step": 27906 }, { "epoch": 0.93, "grad_norm": 0.7461463809013367, "learning_rate": 2.599771361054193e-07, "loss": 2.0504, "step": 27907 }, { "epoch": 0.93, "grad_norm": 0.7565953731536865, "learning_rate": 2.5973640529503466e-07, "loss": 2.1018, "step": 27908 }, { "epoch": 0.93, "grad_norm": 0.7196838855743408, "learning_rate": 2.594957845244417e-07, "loss": 1.9591, "step": 27909 }, { "epoch": 0.93, "grad_norm": 0.7476866245269775, "learning_rate": 2.59255273796355e-07, "loss": 2.108, "step": 27910 }, { "epoch": 0.93, "grad_norm": 0.7530679106712341, "learning_rate": 2.590148731134923e-07, "loss": 1.9993, "step": 27911 }, { "epoch": 0.93, "grad_norm": 0.7391665577888489, "learning_rate": 2.587745824785726e-07, "loss": 2.0114, "step": 27912 }, { "epoch": 0.93, "grad_norm": 0.7437525987625122, "learning_rate": 2.5853440189430814e-07, "loss": 2.0954, "step": 27913 }, { "epoch": 0.93, "grad_norm": 0.7517217397689819, "learning_rate": 2.582943313634134e-07, "loss": 2.0752, "step": 27914 }, { "epoch": 0.93, "grad_norm": 0.7529842257499695, "learning_rate": 2.5805437088859964e-07, "loss": 2.0175, "step": 27915 }, { "epoch": 0.93, "grad_norm": 0.7393817901611328, "learning_rate": 2.5781452047257905e-07, "loss": 1.97, "step": 27916 }, { "epoch": 0.93, "grad_norm": 0.7171664237976074, "learning_rate": 2.575747801180595e-07, "loss": 2.0042, "step": 27917 }, { "epoch": 0.93, "grad_norm": 0.7397173047065735, "learning_rate": 2.573351498277521e-07, "loss": 2.0198, "step": 27918 }, { "epoch": 0.93, "grad_norm": 0.7386016845703125, "learning_rate": 2.570956296043614e-07, "loss": 2.0639, "step": 27919 }, { "epoch": 0.93, "grad_norm": 0.7404997944831848, "learning_rate": 2.5685621945059414e-07, "loss": 2.0004, "step": 27920 }, { "epoch": 0.93, "grad_norm": 0.7680494785308838, "learning_rate": 2.5661691936915477e-07, "loss": 2.0211, "step": 27921 }, { "epoch": 0.93, "grad_norm": 0.7619282603263855, "learning_rate": 2.5637772936274783e-07, "loss": 2.0734, "step": 27922 }, { "epoch": 0.93, "grad_norm": 0.7816585898399353, "learning_rate": 2.561386494340756e-07, "loss": 2.0264, "step": 27923 }, { "epoch": 0.93, "grad_norm": 0.7177309989929199, "learning_rate": 2.5589967958583706e-07, "loss": 2.0349, "step": 27924 }, { "epoch": 0.93, "grad_norm": 0.7667866945266724, "learning_rate": 2.5566081982073443e-07, "loss": 1.9945, "step": 27925 }, { "epoch": 0.93, "grad_norm": 0.7431163787841797, "learning_rate": 2.554220701414645e-07, "loss": 2.1019, "step": 27926 }, { "epoch": 0.93, "grad_norm": 0.7544727325439453, "learning_rate": 2.5518343055072615e-07, "loss": 1.9863, "step": 27927 }, { "epoch": 0.93, "grad_norm": 0.7579176425933838, "learning_rate": 2.5494490105121396e-07, "loss": 2.0565, "step": 27928 }, { "epoch": 0.93, "grad_norm": 0.7550086379051208, "learning_rate": 2.547064816456224e-07, "loss": 2.0313, "step": 27929 }, { "epoch": 0.93, "grad_norm": 0.7622935175895691, "learning_rate": 2.54468172336646e-07, "loss": 2.0841, "step": 27930 }, { "epoch": 0.93, "grad_norm": 0.7593290209770203, "learning_rate": 2.5422997312697704e-07, "loss": 2.1454, "step": 27931 }, { "epoch": 0.93, "grad_norm": 0.7562953233718872, "learning_rate": 2.5399188401930676e-07, "loss": 2.0461, "step": 27932 }, { "epoch": 0.93, "grad_norm": 0.7268831729888916, "learning_rate": 2.537539050163229e-07, "loss": 2.0605, "step": 27933 }, { "epoch": 0.93, "grad_norm": 0.7707730531692505, "learning_rate": 2.5351603612071784e-07, "loss": 2.0689, "step": 27934 }, { "epoch": 0.93, "grad_norm": 0.7599323391914368, "learning_rate": 2.5327827733517385e-07, "loss": 2.058, "step": 27935 }, { "epoch": 0.93, "grad_norm": 0.7391889691352844, "learning_rate": 2.530406286623821e-07, "loss": 2.0067, "step": 27936 }, { "epoch": 0.93, "grad_norm": 0.7436203956604004, "learning_rate": 2.528030901050238e-07, "loss": 2.038, "step": 27937 }, { "epoch": 0.93, "grad_norm": 0.7612622380256653, "learning_rate": 2.5256566166578455e-07, "loss": 2.0749, "step": 27938 }, { "epoch": 0.93, "grad_norm": 0.7785054445266724, "learning_rate": 2.5232834334734445e-07, "loss": 2.0955, "step": 27939 }, { "epoch": 0.93, "grad_norm": 0.7840487957000732, "learning_rate": 2.52091135152388e-07, "loss": 2.0645, "step": 27940 }, { "epoch": 0.93, "grad_norm": 0.7244338393211365, "learning_rate": 2.5185403708359204e-07, "loss": 2.0585, "step": 27941 }, { "epoch": 0.93, "grad_norm": 0.7714110612869263, "learning_rate": 2.5161704914363536e-07, "loss": 1.9534, "step": 27942 }, { "epoch": 0.93, "grad_norm": 0.718863844871521, "learning_rate": 2.513801713351971e-07, "loss": 2.0082, "step": 27943 }, { "epoch": 0.93, "grad_norm": 0.7214365601539612, "learning_rate": 2.511434036609528e-07, "loss": 1.9924, "step": 27944 }, { "epoch": 0.93, "grad_norm": 0.7528209090232849, "learning_rate": 2.5090674612357704e-07, "loss": 2.0595, "step": 27945 }, { "epoch": 0.93, "grad_norm": 0.7849499583244324, "learning_rate": 2.5067019872574205e-07, "loss": 2.1153, "step": 27946 }, { "epoch": 0.93, "grad_norm": 0.7654549479484558, "learning_rate": 2.5043376147012243e-07, "loss": 2.058, "step": 27947 }, { "epoch": 0.93, "grad_norm": 0.7268943190574646, "learning_rate": 2.5019743435938715e-07, "loss": 2.0381, "step": 27948 }, { "epoch": 0.93, "grad_norm": 0.7318461537361145, "learning_rate": 2.499612173962096e-07, "loss": 2.0569, "step": 27949 }, { "epoch": 0.93, "grad_norm": 0.7483956813812256, "learning_rate": 2.4972511058325434e-07, "loss": 2.0772, "step": 27950 }, { "epoch": 0.93, "grad_norm": 0.7712288498878479, "learning_rate": 2.4948911392319143e-07, "loss": 2.028, "step": 27951 }, { "epoch": 0.93, "grad_norm": 0.7424534559249878, "learning_rate": 2.492532274186843e-07, "loss": 2.0384, "step": 27952 }, { "epoch": 0.93, "grad_norm": 0.7072364091873169, "learning_rate": 2.4901745107240195e-07, "loss": 1.9763, "step": 27953 }, { "epoch": 0.93, "grad_norm": 0.7210165858268738, "learning_rate": 2.4878178488700554e-07, "loss": 1.9684, "step": 27954 }, { "epoch": 0.93, "grad_norm": 0.7977557182312012, "learning_rate": 2.4854622886515634e-07, "loss": 2.0878, "step": 27955 }, { "epoch": 0.93, "grad_norm": 0.748574435710907, "learning_rate": 2.483107830095188e-07, "loss": 2.0812, "step": 27956 }, { "epoch": 0.93, "grad_norm": 0.7427569627761841, "learning_rate": 2.480754473227498e-07, "loss": 2.0108, "step": 27957 }, { "epoch": 0.93, "grad_norm": 0.7094568610191345, "learning_rate": 2.478402218075093e-07, "loss": 2.016, "step": 27958 }, { "epoch": 0.93, "grad_norm": 0.7647199630737305, "learning_rate": 2.476051064664542e-07, "loss": 2.0081, "step": 27959 }, { "epoch": 0.93, "grad_norm": 0.7378902435302734, "learning_rate": 2.473701013022423e-07, "loss": 2.1271, "step": 27960 }, { "epoch": 0.93, "grad_norm": 0.7373604774475098, "learning_rate": 2.4713520631752587e-07, "loss": 2.0092, "step": 27961 }, { "epoch": 0.93, "grad_norm": 0.7290225028991699, "learning_rate": 2.4690042151496174e-07, "loss": 2.0065, "step": 27962 }, { "epoch": 0.93, "grad_norm": 0.734314501285553, "learning_rate": 2.466657468972e-07, "loss": 1.9777, "step": 27963 }, { "epoch": 0.93, "grad_norm": 0.7562270164489746, "learning_rate": 2.46431182466893e-07, "loss": 2.0512, "step": 27964 }, { "epoch": 0.93, "grad_norm": 0.7201805710792542, "learning_rate": 2.4619672822668974e-07, "loss": 2.0894, "step": 27965 }, { "epoch": 0.93, "grad_norm": 0.7354416251182556, "learning_rate": 2.4596238417924025e-07, "loss": 2.0006, "step": 27966 }, { "epoch": 0.93, "grad_norm": 0.7386675477027893, "learning_rate": 2.4572815032719133e-07, "loss": 2.0823, "step": 27967 }, { "epoch": 0.93, "grad_norm": 0.742964506149292, "learning_rate": 2.4549402667318754e-07, "loss": 2.0778, "step": 27968 }, { "epoch": 0.93, "grad_norm": 0.7420713901519775, "learning_rate": 2.452600132198779e-07, "loss": 1.9728, "step": 27969 }, { "epoch": 0.93, "grad_norm": 0.7666468620300293, "learning_rate": 2.450261099699014e-07, "loss": 2.0495, "step": 27970 }, { "epoch": 0.93, "grad_norm": 0.7206969857215881, "learning_rate": 2.447923169259048e-07, "loss": 2.0392, "step": 27971 }, { "epoch": 0.93, "grad_norm": 0.7331469058990479, "learning_rate": 2.4455863409052816e-07, "loss": 1.9956, "step": 27972 }, { "epoch": 0.93, "grad_norm": 0.7426043748855591, "learning_rate": 2.443250614664083e-07, "loss": 2.1411, "step": 27973 }, { "epoch": 0.93, "grad_norm": 0.7391018867492676, "learning_rate": 2.440915990561876e-07, "loss": 2.0752, "step": 27974 }, { "epoch": 0.93, "grad_norm": 0.7588388919830322, "learning_rate": 2.438582468625028e-07, "loss": 2.0083, "step": 27975 }, { "epoch": 0.93, "grad_norm": 0.7827141284942627, "learning_rate": 2.436250048879907e-07, "loss": 2.0645, "step": 27976 }, { "epoch": 0.93, "grad_norm": 0.7294793725013733, "learning_rate": 2.433918731352836e-07, "loss": 1.9831, "step": 27977 }, { "epoch": 0.93, "grad_norm": 0.7249341607093811, "learning_rate": 2.4315885160701936e-07, "loss": 2.0523, "step": 27978 }, { "epoch": 0.93, "grad_norm": 0.7558111548423767, "learning_rate": 2.4292594030582597e-07, "loss": 2.0793, "step": 27979 }, { "epoch": 0.93, "grad_norm": 0.7405421733856201, "learning_rate": 2.4269313923433904e-07, "loss": 2.044, "step": 27980 }, { "epoch": 0.93, "grad_norm": 0.7508049011230469, "learning_rate": 2.4246044839518534e-07, "loss": 2.016, "step": 27981 }, { "epoch": 0.93, "grad_norm": 0.7585733532905579, "learning_rate": 2.4222786779099617e-07, "loss": 2.0529, "step": 27982 }, { "epoch": 0.93, "grad_norm": 0.7552363276481628, "learning_rate": 2.41995397424396e-07, "loss": 1.9681, "step": 27983 }, { "epoch": 0.93, "grad_norm": 0.7727384567260742, "learning_rate": 2.417630372980151e-07, "loss": 2.0242, "step": 27984 }, { "epoch": 0.93, "grad_norm": 0.7331233024597168, "learning_rate": 2.415307874144768e-07, "loss": 2.0114, "step": 27985 }, { "epoch": 0.93, "grad_norm": 0.728410542011261, "learning_rate": 2.4129864777640235e-07, "loss": 2.0077, "step": 27986 }, { "epoch": 0.93, "grad_norm": 0.7551946043968201, "learning_rate": 2.410666183864174e-07, "loss": 1.9697, "step": 27987 }, { "epoch": 0.93, "grad_norm": 0.7394986748695374, "learning_rate": 2.4083469924714443e-07, "loss": 1.9924, "step": 27988 }, { "epoch": 0.93, "grad_norm": 0.7358717918395996, "learning_rate": 2.406028903612001e-07, "loss": 2.0273, "step": 27989 }, { "epoch": 0.93, "grad_norm": 0.7398594617843628, "learning_rate": 2.403711917312046e-07, "loss": 2.0204, "step": 27990 }, { "epoch": 0.93, "grad_norm": 0.7531290054321289, "learning_rate": 2.4013960335977584e-07, "loss": 2.0382, "step": 27991 }, { "epoch": 0.93, "grad_norm": 0.7616372108459473, "learning_rate": 2.3990812524952946e-07, "loss": 2.0877, "step": 27992 }, { "epoch": 0.93, "grad_norm": 0.7439168691635132, "learning_rate": 2.3967675740308226e-07, "loss": 2.0348, "step": 27993 }, { "epoch": 0.93, "grad_norm": 0.739776611328125, "learning_rate": 2.3944549982304664e-07, "loss": 2.0625, "step": 27994 }, { "epoch": 0.93, "grad_norm": 0.7496808767318726, "learning_rate": 2.3921435251203496e-07, "loss": 1.993, "step": 27995 }, { "epoch": 0.93, "grad_norm": 0.7755863666534424, "learning_rate": 2.389833154726595e-07, "loss": 2.0431, "step": 27996 }, { "epoch": 0.93, "grad_norm": 0.7898247838020325, "learning_rate": 2.3875238870753046e-07, "loss": 2.1109, "step": 27997 }, { "epoch": 0.93, "grad_norm": 0.7207484245300293, "learning_rate": 2.385215722192558e-07, "loss": 2.0052, "step": 27998 }, { "epoch": 0.93, "grad_norm": 0.7522338628768921, "learning_rate": 2.3829086601044327e-07, "loss": 1.943, "step": 27999 }, { "epoch": 0.93, "grad_norm": 0.744543731212616, "learning_rate": 2.380602700837009e-07, "loss": 2.1063, "step": 28000 }, { "epoch": 0.93, "grad_norm": 0.7459607720375061, "learning_rate": 2.3782978444163108e-07, "loss": 2.0936, "step": 28001 }, { "epoch": 0.93, "grad_norm": 0.7656667232513428, "learning_rate": 2.3759940908683942e-07, "loss": 2.0531, "step": 28002 }, { "epoch": 0.93, "grad_norm": 0.760022759437561, "learning_rate": 2.3736914402192834e-07, "loss": 1.9323, "step": 28003 }, { "epoch": 0.93, "grad_norm": 0.7677549719810486, "learning_rate": 2.3713898924950018e-07, "loss": 2.0752, "step": 28004 }, { "epoch": 0.93, "grad_norm": 0.7869532704353333, "learning_rate": 2.3690894477215288e-07, "loss": 2.0282, "step": 28005 }, { "epoch": 0.93, "grad_norm": 0.725181519985199, "learning_rate": 2.3667901059248656e-07, "loss": 2.0404, "step": 28006 }, { "epoch": 0.93, "grad_norm": 0.7499891519546509, "learning_rate": 2.3644918671310024e-07, "loss": 1.9886, "step": 28007 }, { "epoch": 0.93, "grad_norm": 0.7205195426940918, "learning_rate": 2.3621947313658856e-07, "loss": 1.9988, "step": 28008 }, { "epoch": 0.93, "grad_norm": 0.7316867113113403, "learning_rate": 2.3598986986554606e-07, "loss": 2.0647, "step": 28009 }, { "epoch": 0.93, "grad_norm": 0.7315390706062317, "learning_rate": 2.3576037690256848e-07, "loss": 1.9771, "step": 28010 }, { "epoch": 0.93, "grad_norm": 0.7673647999763489, "learning_rate": 2.3553099425024818e-07, "loss": 2.0435, "step": 28011 }, { "epoch": 0.93, "grad_norm": 0.7407047748565674, "learning_rate": 2.3530172191117528e-07, "loss": 1.9932, "step": 28012 }, { "epoch": 0.93, "grad_norm": 0.732793390750885, "learning_rate": 2.350725598879422e-07, "loss": 2.0413, "step": 28013 }, { "epoch": 0.93, "grad_norm": 0.771995484828949, "learning_rate": 2.348435081831346e-07, "loss": 2.0519, "step": 28014 }, { "epoch": 0.93, "grad_norm": 0.7245350480079651, "learning_rate": 2.346145667993427e-07, "loss": 2.0432, "step": 28015 }, { "epoch": 0.93, "grad_norm": 0.7007312774658203, "learning_rate": 2.3438573573915436e-07, "loss": 1.9817, "step": 28016 }, { "epoch": 0.93, "grad_norm": 0.7540026903152466, "learning_rate": 2.3415701500515086e-07, "loss": 2.0353, "step": 28017 }, { "epoch": 0.93, "grad_norm": 0.7522410154342651, "learning_rate": 2.3392840459991795e-07, "loss": 2.077, "step": 28018 }, { "epoch": 0.93, "grad_norm": 0.7347499132156372, "learning_rate": 2.3369990452603907e-07, "loss": 2.0153, "step": 28019 }, { "epoch": 0.93, "grad_norm": 0.735512375831604, "learning_rate": 2.3347151478609554e-07, "loss": 2.0494, "step": 28020 }, { "epoch": 0.93, "grad_norm": 0.802232027053833, "learning_rate": 2.3324323538266524e-07, "loss": 2.09, "step": 28021 }, { "epoch": 0.93, "grad_norm": 0.7380008101463318, "learning_rate": 2.330150663183295e-07, "loss": 2.0469, "step": 28022 }, { "epoch": 0.93, "grad_norm": 0.7257122993469238, "learning_rate": 2.327870075956673e-07, "loss": 2.0794, "step": 28023 }, { "epoch": 0.93, "grad_norm": 0.7509109377861023, "learning_rate": 2.3255905921725219e-07, "loss": 2.1458, "step": 28024 }, { "epoch": 0.93, "grad_norm": 0.7736875414848328, "learning_rate": 2.3233122118565988e-07, "loss": 1.9821, "step": 28025 }, { "epoch": 0.93, "grad_norm": 0.7294339537620544, "learning_rate": 2.3210349350346607e-07, "loss": 2.044, "step": 28026 }, { "epoch": 0.93, "grad_norm": 0.7399587631225586, "learning_rate": 2.3187587617324204e-07, "loss": 2.0529, "step": 28027 }, { "epoch": 0.93, "grad_norm": 0.7400123476982117, "learning_rate": 2.3164836919755905e-07, "loss": 2.0704, "step": 28028 }, { "epoch": 0.93, "grad_norm": 0.7414452433586121, "learning_rate": 2.3142097257898955e-07, "loss": 2.0573, "step": 28029 }, { "epoch": 0.93, "grad_norm": 0.734362781047821, "learning_rate": 2.3119368632010031e-07, "loss": 2.0112, "step": 28030 }, { "epoch": 0.93, "grad_norm": 0.7472421526908875, "learning_rate": 2.309665104234582e-07, "loss": 2.0708, "step": 28031 }, { "epoch": 0.93, "grad_norm": 0.7583260536193848, "learning_rate": 2.3073944489163337e-07, "loss": 2.0596, "step": 28032 }, { "epoch": 0.93, "grad_norm": 0.7616268396377563, "learning_rate": 2.305124897271882e-07, "loss": 2.0421, "step": 28033 }, { "epoch": 0.93, "grad_norm": 0.7394059896469116, "learning_rate": 2.3028564493268733e-07, "loss": 2.007, "step": 28034 }, { "epoch": 0.93, "grad_norm": 0.7329918146133423, "learning_rate": 2.300589105106943e-07, "loss": 2.0464, "step": 28035 }, { "epoch": 0.93, "grad_norm": 0.734288215637207, "learning_rate": 2.2983228646376808e-07, "loss": 1.9856, "step": 28036 }, { "epoch": 0.93, "grad_norm": 0.7182545065879822, "learning_rate": 2.2960577279447116e-07, "loss": 2.0219, "step": 28037 }, { "epoch": 0.93, "grad_norm": 0.7408545613288879, "learning_rate": 2.2937936950536365e-07, "loss": 2.0557, "step": 28038 }, { "epoch": 0.93, "grad_norm": 0.7295119762420654, "learning_rate": 2.2915307659900243e-07, "loss": 2.0209, "step": 28039 }, { "epoch": 0.93, "grad_norm": 0.7122246026992798, "learning_rate": 2.28926894077941e-07, "loss": 1.9999, "step": 28040 }, { "epoch": 0.93, "grad_norm": 0.737155556678772, "learning_rate": 2.2870082194473954e-07, "loss": 2.0253, "step": 28041 }, { "epoch": 0.93, "grad_norm": 0.7549123167991638, "learning_rate": 2.284748602019482e-07, "loss": 2.0351, "step": 28042 }, { "epoch": 0.93, "grad_norm": 0.7305973172187805, "learning_rate": 2.2824900885212165e-07, "loss": 1.9916, "step": 28043 }, { "epoch": 0.93, "grad_norm": 0.7417933940887451, "learning_rate": 2.2802326789781005e-07, "loss": 2.0495, "step": 28044 }, { "epoch": 0.93, "grad_norm": 0.7741519212722778, "learning_rate": 2.277976373415658e-07, "loss": 2.0991, "step": 28045 }, { "epoch": 0.93, "grad_norm": 0.7422214150428772, "learning_rate": 2.2757211718593686e-07, "loss": 2.0584, "step": 28046 }, { "epoch": 0.93, "grad_norm": 0.7591766119003296, "learning_rate": 2.273467074334701e-07, "loss": 2.0612, "step": 28047 }, { "epoch": 0.93, "grad_norm": 0.7199655771255493, "learning_rate": 2.2712140808671345e-07, "loss": 2.1208, "step": 28048 }, { "epoch": 0.93, "grad_norm": 0.7292852997779846, "learning_rate": 2.2689621914821157e-07, "loss": 2.0082, "step": 28049 }, { "epoch": 0.93, "grad_norm": 0.7759328484535217, "learning_rate": 2.26671140620508e-07, "loss": 1.97, "step": 28050 }, { "epoch": 0.93, "grad_norm": 0.7437945008277893, "learning_rate": 2.2644617250614732e-07, "loss": 2.0394, "step": 28051 }, { "epoch": 0.93, "grad_norm": 0.7486053705215454, "learning_rate": 2.2622131480766974e-07, "loss": 2.0467, "step": 28052 }, { "epoch": 0.93, "grad_norm": 0.734088659286499, "learning_rate": 2.2599656752761433e-07, "loss": 2.0934, "step": 28053 }, { "epoch": 0.93, "grad_norm": 0.7579296827316284, "learning_rate": 2.2577193066852242e-07, "loss": 2.0909, "step": 28054 }, { "epoch": 0.93, "grad_norm": 0.7074944972991943, "learning_rate": 2.2554740423293198e-07, "loss": 2.0442, "step": 28055 }, { "epoch": 0.93, "grad_norm": 0.7578952312469482, "learning_rate": 2.2532298822337762e-07, "loss": 2.0137, "step": 28056 }, { "epoch": 0.93, "grad_norm": 0.7697674632072449, "learning_rate": 2.250986826423962e-07, "loss": 2.0474, "step": 28057 }, { "epoch": 0.93, "grad_norm": 0.7605390548706055, "learning_rate": 2.2487448749252017e-07, "loss": 2.0712, "step": 28058 }, { "epoch": 0.93, "grad_norm": 0.7327914237976074, "learning_rate": 2.2465040277628303e-07, "loss": 2.0572, "step": 28059 }, { "epoch": 0.93, "grad_norm": 0.7244095206260681, "learning_rate": 2.244264284962183e-07, "loss": 2.0356, "step": 28060 }, { "epoch": 0.93, "grad_norm": 0.7222782969474792, "learning_rate": 2.2420256465485403e-07, "loss": 2.0454, "step": 28061 }, { "epoch": 0.93, "grad_norm": 0.7518909573554993, "learning_rate": 2.2397881125471922e-07, "loss": 2.0339, "step": 28062 }, { "epoch": 0.93, "grad_norm": 0.7530210614204407, "learning_rate": 2.237551682983441e-07, "loss": 1.9957, "step": 28063 }, { "epoch": 0.93, "grad_norm": 0.7931407690048218, "learning_rate": 2.2353163578825333e-07, "loss": 2.0776, "step": 28064 }, { "epoch": 0.93, "grad_norm": 0.7547805309295654, "learning_rate": 2.2330821372697154e-07, "loss": 2.0355, "step": 28065 }, { "epoch": 0.93, "grad_norm": 0.761118471622467, "learning_rate": 2.2308490211702338e-07, "loss": 2.0813, "step": 28066 }, { "epoch": 0.93, "grad_norm": 0.750560462474823, "learning_rate": 2.2286170096093352e-07, "loss": 2.0455, "step": 28067 }, { "epoch": 0.93, "grad_norm": 0.7362675070762634, "learning_rate": 2.2263861026122213e-07, "loss": 2.0452, "step": 28068 }, { "epoch": 0.93, "grad_norm": 0.738545835018158, "learning_rate": 2.2241563002040945e-07, "loss": 2.0323, "step": 28069 }, { "epoch": 0.93, "grad_norm": 0.7388875484466553, "learning_rate": 2.2219276024101456e-07, "loss": 1.9414, "step": 28070 }, { "epoch": 0.93, "grad_norm": 0.7838796973228455, "learning_rate": 2.2197000092555544e-07, "loss": 2.1119, "step": 28071 }, { "epoch": 0.93, "grad_norm": 0.7481625080108643, "learning_rate": 2.2174735207654895e-07, "loss": 2.0112, "step": 28072 }, { "epoch": 0.93, "grad_norm": 0.7383284568786621, "learning_rate": 2.215248136965109e-07, "loss": 2.0938, "step": 28073 }, { "epoch": 0.93, "grad_norm": 0.7471939921379089, "learning_rate": 2.2130238578795372e-07, "loss": 2.0967, "step": 28074 }, { "epoch": 0.93, "grad_norm": 0.7329567670822144, "learning_rate": 2.210800683533909e-07, "loss": 1.9997, "step": 28075 }, { "epoch": 0.93, "grad_norm": 0.7704600691795349, "learning_rate": 2.2085786139533606e-07, "loss": 2.0782, "step": 28076 }, { "epoch": 0.93, "grad_norm": 0.7359515428543091, "learning_rate": 2.2063576491629712e-07, "loss": 2.0162, "step": 28077 }, { "epoch": 0.93, "grad_norm": 0.7156592011451721, "learning_rate": 2.2041377891878436e-07, "loss": 1.9805, "step": 28078 }, { "epoch": 0.93, "grad_norm": 0.7603126168251038, "learning_rate": 2.201919034053046e-07, "loss": 2.0579, "step": 28079 }, { "epoch": 0.93, "grad_norm": 0.7617754340171814, "learning_rate": 2.1997013837836589e-07, "loss": 2.0822, "step": 28080 }, { "epoch": 0.93, "grad_norm": 0.7407201528549194, "learning_rate": 2.1974848384047177e-07, "loss": 1.9292, "step": 28081 }, { "epoch": 0.93, "grad_norm": 0.7522522807121277, "learning_rate": 2.1952693979412798e-07, "loss": 2.049, "step": 28082 }, { "epoch": 0.93, "grad_norm": 0.7404746413230896, "learning_rate": 2.193055062418381e-07, "loss": 2.0118, "step": 28083 }, { "epoch": 0.93, "grad_norm": 0.7674007415771484, "learning_rate": 2.1908418318610125e-07, "loss": 2.0128, "step": 28084 }, { "epoch": 0.93, "grad_norm": 0.76450115442276, "learning_rate": 2.1886297062941985e-07, "loss": 2.1129, "step": 28085 }, { "epoch": 0.93, "grad_norm": 0.7293853759765625, "learning_rate": 2.1864186857429303e-07, "loss": 2.0713, "step": 28086 }, { "epoch": 0.93, "grad_norm": 0.7815470099449158, "learning_rate": 2.1842087702321545e-07, "loss": 2.0846, "step": 28087 }, { "epoch": 0.93, "grad_norm": 0.7278974652290344, "learning_rate": 2.1819999597868735e-07, "loss": 2.1167, "step": 28088 }, { "epoch": 0.93, "grad_norm": 0.7420892119407654, "learning_rate": 2.179792254432045e-07, "loss": 2.006, "step": 28089 }, { "epoch": 0.93, "grad_norm": 0.7969798445701599, "learning_rate": 2.177585654192571e-07, "loss": 2.0815, "step": 28090 }, { "epoch": 0.93, "grad_norm": 0.755756676197052, "learning_rate": 2.175380159093421e-07, "loss": 1.9916, "step": 28091 }, { "epoch": 0.93, "grad_norm": 0.7682430148124695, "learning_rate": 2.1731757691594968e-07, "loss": 2.0873, "step": 28092 }, { "epoch": 0.93, "grad_norm": 0.7515072226524353, "learning_rate": 2.17097248441569e-07, "loss": 2.0835, "step": 28093 }, { "epoch": 0.93, "grad_norm": 0.7316335439682007, "learning_rate": 2.1687703048869025e-07, "loss": 2.0479, "step": 28094 }, { "epoch": 0.93, "grad_norm": 0.7294155359268188, "learning_rate": 2.166569230598037e-07, "loss": 2.0195, "step": 28095 }, { "epoch": 0.93, "grad_norm": 0.7303521633148193, "learning_rate": 2.1643692615739176e-07, "loss": 2.0483, "step": 28096 }, { "epoch": 0.93, "grad_norm": 0.7538743615150452, "learning_rate": 2.1621703978394137e-07, "loss": 2.1011, "step": 28097 }, { "epoch": 0.93, "grad_norm": 0.7781128883361816, "learning_rate": 2.1599726394193722e-07, "loss": 2.0983, "step": 28098 }, { "epoch": 0.93, "grad_norm": 0.7532880306243896, "learning_rate": 2.157775986338617e-07, "loss": 2.0182, "step": 28099 }, { "epoch": 0.93, "grad_norm": 0.7647575736045837, "learning_rate": 2.1555804386219735e-07, "loss": 2.0347, "step": 28100 }, { "epoch": 0.93, "grad_norm": 0.7569502592086792, "learning_rate": 2.1533859962942438e-07, "loss": 2.0564, "step": 28101 }, { "epoch": 0.93, "grad_norm": 0.7714378833770752, "learning_rate": 2.151192659380208e-07, "loss": 2.1549, "step": 28102 }, { "epoch": 0.93, "grad_norm": 0.7836398482322693, "learning_rate": 2.149000427904646e-07, "loss": 2.0741, "step": 28103 }, { "epoch": 0.94, "grad_norm": 0.7313203811645508, "learning_rate": 2.1468093018923497e-07, "loss": 1.9401, "step": 28104 }, { "epoch": 0.94, "grad_norm": 0.7311463356018066, "learning_rate": 2.1446192813680433e-07, "loss": 2.0176, "step": 28105 }, { "epoch": 0.94, "grad_norm": 0.7424915432929993, "learning_rate": 2.1424303663564737e-07, "loss": 2.0127, "step": 28106 }, { "epoch": 0.94, "grad_norm": 0.7267068028450012, "learning_rate": 2.140242556882377e-07, "loss": 2.0301, "step": 28107 }, { "epoch": 0.94, "grad_norm": 0.7418103218078613, "learning_rate": 2.1380558529704888e-07, "loss": 1.996, "step": 28108 }, { "epoch": 0.94, "grad_norm": 0.8219940662384033, "learning_rate": 2.1358702546454779e-07, "loss": 2.0073, "step": 28109 }, { "epoch": 0.94, "grad_norm": 0.732789933681488, "learning_rate": 2.1336857619320362e-07, "loss": 2.1028, "step": 28110 }, { "epoch": 0.94, "grad_norm": 0.748769998550415, "learning_rate": 2.131502374854877e-07, "loss": 2.0909, "step": 28111 }, { "epoch": 0.94, "grad_norm": 0.7236852645874023, "learning_rate": 2.129320093438636e-07, "loss": 1.9705, "step": 28112 }, { "epoch": 0.94, "grad_norm": 0.7357243895530701, "learning_rate": 2.1271389177079938e-07, "loss": 2.0998, "step": 28113 }, { "epoch": 0.94, "grad_norm": 0.743503212928772, "learning_rate": 2.124958847687575e-07, "loss": 2.0692, "step": 28114 }, { "epoch": 0.94, "grad_norm": 0.7469790577888489, "learning_rate": 2.122779883401993e-07, "loss": 2.1115, "step": 28115 }, { "epoch": 0.94, "grad_norm": 0.7310565710067749, "learning_rate": 2.120602024875895e-07, "loss": 2.0823, "step": 28116 }, { "epoch": 0.94, "grad_norm": 0.7582325339317322, "learning_rate": 2.1184252721338838e-07, "loss": 2.0617, "step": 28117 }, { "epoch": 0.94, "grad_norm": 0.7520031929016113, "learning_rate": 2.1162496252005172e-07, "loss": 2.0341, "step": 28118 }, { "epoch": 0.94, "grad_norm": 0.7410632967948914, "learning_rate": 2.1140750841003975e-07, "loss": 2.0661, "step": 28119 }, { "epoch": 0.94, "grad_norm": 0.7370291352272034, "learning_rate": 2.1119016488581058e-07, "loss": 1.9691, "step": 28120 }, { "epoch": 0.94, "grad_norm": 0.713188111782074, "learning_rate": 2.1097293194981662e-07, "loss": 2.0151, "step": 28121 }, { "epoch": 0.94, "grad_norm": 0.7436332106590271, "learning_rate": 2.107558096045148e-07, "loss": 2.0221, "step": 28122 }, { "epoch": 0.94, "grad_norm": 0.7481253147125244, "learning_rate": 2.1053879785235653e-07, "loss": 2.0088, "step": 28123 }, { "epoch": 0.94, "grad_norm": 0.7673843502998352, "learning_rate": 2.1032189669579317e-07, "loss": 2.0916, "step": 28124 }, { "epoch": 0.94, "grad_norm": 0.7568921446800232, "learning_rate": 2.101051061372761e-07, "loss": 2.0285, "step": 28125 }, { "epoch": 0.94, "grad_norm": 0.7405672073364258, "learning_rate": 2.0988842617925442e-07, "loss": 1.9999, "step": 28126 }, { "epoch": 0.94, "grad_norm": 0.7252838611602783, "learning_rate": 2.0967185682417625e-07, "loss": 1.9852, "step": 28127 }, { "epoch": 0.94, "grad_norm": 0.7522289156913757, "learning_rate": 2.0945539807448623e-07, "loss": 2.1143, "step": 28128 }, { "epoch": 0.94, "grad_norm": 0.7249211668968201, "learning_rate": 2.0923904993263132e-07, "loss": 2.0499, "step": 28129 }, { "epoch": 0.94, "grad_norm": 0.755709707736969, "learning_rate": 2.0902281240105627e-07, "loss": 2.08, "step": 28130 }, { "epoch": 0.94, "grad_norm": 0.737132728099823, "learning_rate": 2.0880668548220463e-07, "loss": 2.056, "step": 28131 }, { "epoch": 0.94, "grad_norm": 0.7643870711326599, "learning_rate": 2.0859066917851445e-07, "loss": 2.0048, "step": 28132 }, { "epoch": 0.94, "grad_norm": 0.739196240901947, "learning_rate": 2.0837476349243046e-07, "loss": 2.0394, "step": 28133 }, { "epoch": 0.94, "grad_norm": 0.7299288511276245, "learning_rate": 2.081589684263885e-07, "loss": 1.9996, "step": 28134 }, { "epoch": 0.94, "grad_norm": 0.7216627597808838, "learning_rate": 2.0794328398282992e-07, "loss": 2.0902, "step": 28135 }, { "epoch": 0.94, "grad_norm": 0.7541171312332153, "learning_rate": 2.0772771016418836e-07, "loss": 2.0682, "step": 28136 }, { "epoch": 0.94, "grad_norm": 0.741381049156189, "learning_rate": 2.075122469728996e-07, "loss": 1.9696, "step": 28137 }, { "epoch": 0.94, "grad_norm": 0.7600972652435303, "learning_rate": 2.0729689441139844e-07, "loss": 2.0442, "step": 28138 }, { "epoch": 0.94, "grad_norm": 0.766302227973938, "learning_rate": 2.0708165248211843e-07, "loss": 2.0074, "step": 28139 }, { "epoch": 0.94, "grad_norm": 0.8163720965385437, "learning_rate": 2.0686652118749207e-07, "loss": 2.056, "step": 28140 }, { "epoch": 0.94, "grad_norm": 0.744296669960022, "learning_rate": 2.0665150052994632e-07, "loss": 2.0521, "step": 28141 }, { "epoch": 0.94, "grad_norm": 0.7360948920249939, "learning_rate": 2.0643659051191366e-07, "loss": 2.0863, "step": 28142 }, { "epoch": 0.94, "grad_norm": 0.7409694194793701, "learning_rate": 2.0622179113581997e-07, "loss": 1.9769, "step": 28143 }, { "epoch": 0.94, "grad_norm": 0.7717030048370361, "learning_rate": 2.0600710240409327e-07, "loss": 2.0539, "step": 28144 }, { "epoch": 0.94, "grad_norm": 0.7367424368858337, "learning_rate": 2.057925243191583e-07, "loss": 2.0567, "step": 28145 }, { "epoch": 0.94, "grad_norm": 0.7140920162200928, "learning_rate": 2.0557805688343978e-07, "loss": 1.9633, "step": 28146 }, { "epoch": 0.94, "grad_norm": 0.7554545998573303, "learning_rate": 2.0536370009935914e-07, "loss": 2.0843, "step": 28147 }, { "epoch": 0.94, "grad_norm": 0.7350105047225952, "learning_rate": 2.0514945396933993e-07, "loss": 2.007, "step": 28148 }, { "epoch": 0.94, "grad_norm": 0.7714079022407532, "learning_rate": 2.049353184958025e-07, "loss": 2.0787, "step": 28149 }, { "epoch": 0.94, "grad_norm": 0.7355878353118896, "learning_rate": 2.047212936811649e-07, "loss": 2.0647, "step": 28150 }, { "epoch": 0.94, "grad_norm": 0.7433454394340515, "learning_rate": 2.0450737952784517e-07, "loss": 2.0064, "step": 28151 }, { "epoch": 0.94, "grad_norm": 0.7268030643463135, "learning_rate": 2.0429357603826028e-07, "loss": 2.0534, "step": 28152 }, { "epoch": 0.94, "grad_norm": 0.7110949158668518, "learning_rate": 2.0407988321482718e-07, "loss": 2.0149, "step": 28153 }, { "epoch": 0.94, "grad_norm": 0.7422033548355103, "learning_rate": 2.0386630105995618e-07, "loss": 2.0284, "step": 28154 }, { "epoch": 0.94, "grad_norm": 0.7450889945030212, "learning_rate": 2.0365282957606424e-07, "loss": 2.0824, "step": 28155 }, { "epoch": 0.94, "grad_norm": 0.7447482347488403, "learning_rate": 2.0343946876556164e-07, "loss": 2.0809, "step": 28156 }, { "epoch": 0.94, "grad_norm": 0.753074586391449, "learning_rate": 2.0322621863085756e-07, "loss": 2.0503, "step": 28157 }, { "epoch": 0.94, "grad_norm": 0.7411597371101379, "learning_rate": 2.0301307917436341e-07, "loss": 2.0666, "step": 28158 }, { "epoch": 0.94, "grad_norm": 0.7490658760070801, "learning_rate": 2.0280005039848505e-07, "loss": 2.0828, "step": 28159 }, { "epoch": 0.94, "grad_norm": 0.7272852659225464, "learning_rate": 2.0258713230562943e-07, "loss": 2.039, "step": 28160 }, { "epoch": 0.94, "grad_norm": 0.7387398481369019, "learning_rate": 2.0237432489820352e-07, "loss": 2.0435, "step": 28161 }, { "epoch": 0.94, "grad_norm": 0.7438418865203857, "learning_rate": 2.0216162817860985e-07, "loss": 2.0999, "step": 28162 }, { "epoch": 0.94, "grad_norm": 0.7278345823287964, "learning_rate": 2.0194904214925205e-07, "loss": 2.0894, "step": 28163 }, { "epoch": 0.94, "grad_norm": 0.7639041543006897, "learning_rate": 2.0173656681253262e-07, "loss": 2.0911, "step": 28164 }, { "epoch": 0.94, "grad_norm": 0.7376741766929626, "learning_rate": 2.0152420217084966e-07, "loss": 2.0474, "step": 28165 }, { "epoch": 0.94, "grad_norm": 0.7775146961212158, "learning_rate": 2.013119482266057e-07, "loss": 2.0563, "step": 28166 }, { "epoch": 0.94, "grad_norm": 0.7368758320808411, "learning_rate": 2.010998049821955e-07, "loss": 2.0242, "step": 28167 }, { "epoch": 0.94, "grad_norm": 0.7319049835205078, "learning_rate": 2.008877724400171e-07, "loss": 2.0981, "step": 28168 }, { "epoch": 0.94, "grad_norm": 0.7465494275093079, "learning_rate": 2.0067585060246531e-07, "loss": 2.0516, "step": 28169 }, { "epoch": 0.94, "grad_norm": 0.7248865365982056, "learning_rate": 2.0046403947193594e-07, "loss": 2.0978, "step": 28170 }, { "epoch": 0.94, "grad_norm": 0.7468136548995972, "learning_rate": 2.002523390508204e-07, "loss": 2.0433, "step": 28171 }, { "epoch": 0.94, "grad_norm": 0.7477654814720154, "learning_rate": 2.0004074934151019e-07, "loss": 2.0436, "step": 28172 }, { "epoch": 0.94, "grad_norm": 0.7288998365402222, "learning_rate": 1.9982927034639665e-07, "loss": 2.0386, "step": 28173 }, { "epoch": 0.94, "grad_norm": 0.7658536434173584, "learning_rate": 1.9961790206786901e-07, "loss": 2.1652, "step": 28174 }, { "epoch": 0.94, "grad_norm": 0.7352390885353088, "learning_rate": 1.9940664450831425e-07, "loss": 2.043, "step": 28175 }, { "epoch": 0.94, "grad_norm": 0.7358065247535706, "learning_rate": 1.9919549767011938e-07, "loss": 2.0835, "step": 28176 }, { "epoch": 0.94, "grad_norm": 0.7275101542472839, "learning_rate": 1.989844615556702e-07, "loss": 2.0853, "step": 28177 }, { "epoch": 0.94, "grad_norm": 0.802442193031311, "learning_rate": 1.987735361673493e-07, "loss": 2.0906, "step": 28178 }, { "epoch": 0.94, "grad_norm": 0.7385225296020508, "learning_rate": 1.985627215075425e-07, "loss": 2.0197, "step": 28179 }, { "epoch": 0.94, "grad_norm": 0.789284348487854, "learning_rate": 1.983520175786302e-07, "loss": 1.9989, "step": 28180 }, { "epoch": 0.94, "grad_norm": 0.7460114359855652, "learning_rate": 1.9814142438299156e-07, "loss": 2.0775, "step": 28181 }, { "epoch": 0.94, "grad_norm": 0.7618851661682129, "learning_rate": 1.9793094192300577e-07, "loss": 2.0995, "step": 28182 }, { "epoch": 0.94, "grad_norm": 0.7256976366043091, "learning_rate": 1.9772057020105317e-07, "loss": 2.0404, "step": 28183 }, { "epoch": 0.94, "grad_norm": 0.7586345076560974, "learning_rate": 1.9751030921950854e-07, "loss": 2.0076, "step": 28184 }, { "epoch": 0.94, "grad_norm": 0.7642638683319092, "learning_rate": 1.9730015898074662e-07, "loss": 2.0582, "step": 28185 }, { "epoch": 0.94, "grad_norm": 0.7528810501098633, "learning_rate": 1.970901194871444e-07, "loss": 2.0401, "step": 28186 }, { "epoch": 0.94, "grad_norm": 0.7620170712471008, "learning_rate": 1.968801907410711e-07, "loss": 2.0628, "step": 28187 }, { "epoch": 0.94, "grad_norm": 0.7333848476409912, "learning_rate": 1.9667037274490153e-07, "loss": 2.0514, "step": 28188 }, { "epoch": 0.94, "grad_norm": 0.7701336145401001, "learning_rate": 1.9646066550100374e-07, "loss": 2.0952, "step": 28189 }, { "epoch": 0.94, "grad_norm": 0.7643424868583679, "learning_rate": 1.9625106901174918e-07, "loss": 1.9422, "step": 28190 }, { "epoch": 0.94, "grad_norm": 0.7378793954849243, "learning_rate": 1.9604158327950263e-07, "loss": 2.0828, "step": 28191 }, { "epoch": 0.94, "grad_norm": 0.7595254182815552, "learning_rate": 1.9583220830663441e-07, "loss": 2.0135, "step": 28192 }, { "epoch": 0.94, "grad_norm": 0.7632750868797302, "learning_rate": 1.9562294409550708e-07, "loss": 2.0722, "step": 28193 }, { "epoch": 0.94, "grad_norm": 0.7199559807777405, "learning_rate": 1.9541379064848542e-07, "loss": 1.9962, "step": 28194 }, { "epoch": 0.94, "grad_norm": 0.7338096499443054, "learning_rate": 1.952047479679331e-07, "loss": 2.0311, "step": 28195 }, { "epoch": 0.94, "grad_norm": 0.756389319896698, "learning_rate": 1.9499581605621266e-07, "loss": 2.0938, "step": 28196 }, { "epoch": 0.94, "grad_norm": 0.757422149181366, "learning_rate": 1.947869949156822e-07, "loss": 2.021, "step": 28197 }, { "epoch": 0.94, "grad_norm": 0.752947986125946, "learning_rate": 1.94578284548701e-07, "loss": 2.0125, "step": 28198 }, { "epoch": 0.94, "grad_norm": 0.7873149514198303, "learning_rate": 1.943696849576293e-07, "loss": 2.0101, "step": 28199 }, { "epoch": 0.94, "grad_norm": 0.726495623588562, "learning_rate": 1.9416119614482089e-07, "loss": 2.0275, "step": 28200 }, { "epoch": 0.94, "grad_norm": 0.7671146392822266, "learning_rate": 1.9395281811263377e-07, "loss": 2.0783, "step": 28201 }, { "epoch": 0.94, "grad_norm": 0.7347133755683899, "learning_rate": 1.937445508634206e-07, "loss": 1.9786, "step": 28202 }, { "epoch": 0.94, "grad_norm": 0.7001929879188538, "learning_rate": 1.9353639439953387e-07, "loss": 2.0373, "step": 28203 }, { "epoch": 0.94, "grad_norm": 0.7235764265060425, "learning_rate": 1.9332834872332507e-07, "loss": 2.0821, "step": 28204 }, { "epoch": 0.94, "grad_norm": 0.7280557751655579, "learning_rate": 1.9312041383714674e-07, "loss": 2.0605, "step": 28205 }, { "epoch": 0.94, "grad_norm": 0.7409875392913818, "learning_rate": 1.9291258974334592e-07, "loss": 1.9524, "step": 28206 }, { "epoch": 0.94, "grad_norm": 0.7390321493148804, "learning_rate": 1.9270487644427072e-07, "loss": 1.9937, "step": 28207 }, { "epoch": 0.94, "grad_norm": 0.7340453863143921, "learning_rate": 1.9249727394226925e-07, "loss": 2.0786, "step": 28208 }, { "epoch": 0.94, "grad_norm": 0.7230122089385986, "learning_rate": 1.922897822396852e-07, "loss": 1.9847, "step": 28209 }, { "epoch": 0.94, "grad_norm": 0.7584959268569946, "learning_rate": 1.9208240133886335e-07, "loss": 1.9893, "step": 28210 }, { "epoch": 0.94, "grad_norm": 0.7738015651702881, "learning_rate": 1.918751312421463e-07, "loss": 2.0216, "step": 28211 }, { "epoch": 0.94, "grad_norm": 0.7722808718681335, "learning_rate": 1.916679719518766e-07, "loss": 2.0461, "step": 28212 }, { "epoch": 0.94, "grad_norm": 0.7201647162437439, "learning_rate": 1.9146092347039346e-07, "loss": 2.0471, "step": 28213 }, { "epoch": 0.94, "grad_norm": 0.7485525012016296, "learning_rate": 1.9125398580003617e-07, "loss": 2.0726, "step": 28214 }, { "epoch": 0.94, "grad_norm": 0.740333080291748, "learning_rate": 1.9104715894314397e-07, "loss": 2.0727, "step": 28215 }, { "epoch": 0.94, "grad_norm": 0.7636852264404297, "learning_rate": 1.9084044290205162e-07, "loss": 1.9932, "step": 28216 }, { "epoch": 0.94, "grad_norm": 0.7347451448440552, "learning_rate": 1.9063383767909392e-07, "loss": 1.9765, "step": 28217 }, { "epoch": 0.94, "grad_norm": 0.7465278506278992, "learning_rate": 1.9042734327660794e-07, "loss": 2.0626, "step": 28218 }, { "epoch": 0.94, "grad_norm": 0.7537314891815186, "learning_rate": 1.902209596969251e-07, "loss": 2.0191, "step": 28219 }, { "epoch": 0.94, "grad_norm": 0.7364344000816345, "learning_rate": 1.900146869423758e-07, "loss": 2.0537, "step": 28220 }, { "epoch": 0.94, "grad_norm": 0.7271739840507507, "learning_rate": 1.8980852501529146e-07, "loss": 2.0777, "step": 28221 }, { "epoch": 0.94, "grad_norm": 0.7679404020309448, "learning_rate": 1.8960247391800134e-07, "loss": 2.0637, "step": 28222 }, { "epoch": 0.94, "grad_norm": 0.7484942078590393, "learning_rate": 1.893965336528336e-07, "loss": 2.0338, "step": 28223 }, { "epoch": 0.94, "grad_norm": 0.7485313415527344, "learning_rate": 1.8919070422211306e-07, "loss": 2.042, "step": 28224 }, { "epoch": 0.94, "grad_norm": 0.7216722965240479, "learning_rate": 1.889849856281667e-07, "loss": 2.069, "step": 28225 }, { "epoch": 0.94, "grad_norm": 0.7566137909889221, "learning_rate": 1.8877937787331714e-07, "loss": 2.0587, "step": 28226 }, { "epoch": 0.94, "grad_norm": 0.7391642332077026, "learning_rate": 1.8857388095989026e-07, "loss": 2.1168, "step": 28227 }, { "epoch": 0.94, "grad_norm": 0.7059034705162048, "learning_rate": 1.8836849489020537e-07, "loss": 2.0032, "step": 28228 }, { "epoch": 0.94, "grad_norm": 0.7885773777961731, "learning_rate": 1.881632196665817e-07, "loss": 2.0099, "step": 28229 }, { "epoch": 0.94, "grad_norm": 0.7154566049575806, "learning_rate": 1.8795805529133959e-07, "loss": 1.9764, "step": 28230 }, { "epoch": 0.94, "grad_norm": 0.7505607604980469, "learning_rate": 1.8775300176679834e-07, "loss": 2.1414, "step": 28231 }, { "epoch": 0.94, "grad_norm": 0.7214109301567078, "learning_rate": 1.8754805909527274e-07, "loss": 2.0525, "step": 28232 }, { "epoch": 0.94, "grad_norm": 0.7589523792266846, "learning_rate": 1.873432272790776e-07, "loss": 2.0263, "step": 28233 }, { "epoch": 0.94, "grad_norm": 0.7556894421577454, "learning_rate": 1.871385063205289e-07, "loss": 2.004, "step": 28234 }, { "epoch": 0.94, "grad_norm": 0.7544842958450317, "learning_rate": 1.8693389622193692e-07, "loss": 2.0317, "step": 28235 }, { "epoch": 0.94, "grad_norm": 0.736167848110199, "learning_rate": 1.8672939698561544e-07, "loss": 1.9811, "step": 28236 }, { "epoch": 0.94, "grad_norm": 0.743106484413147, "learning_rate": 1.865250086138759e-07, "loss": 2.0331, "step": 28237 }, { "epoch": 0.94, "grad_norm": 0.77440345287323, "learning_rate": 1.8632073110902428e-07, "loss": 2.0395, "step": 28238 }, { "epoch": 0.94, "grad_norm": 0.7486289739608765, "learning_rate": 1.8611656447336868e-07, "loss": 2.1089, "step": 28239 }, { "epoch": 0.94, "grad_norm": 0.7164742350578308, "learning_rate": 1.859125087092184e-07, "loss": 1.9477, "step": 28240 }, { "epoch": 0.94, "grad_norm": 0.7724472284317017, "learning_rate": 1.8570856381887603e-07, "loss": 2.0479, "step": 28241 }, { "epoch": 0.94, "grad_norm": 0.7318061590194702, "learning_rate": 1.855047298046464e-07, "loss": 1.9245, "step": 28242 }, { "epoch": 0.94, "grad_norm": 0.7490097284317017, "learning_rate": 1.8530100666883322e-07, "loss": 2.0387, "step": 28243 }, { "epoch": 0.94, "grad_norm": 0.7235598564147949, "learning_rate": 1.8509739441373576e-07, "loss": 2.0979, "step": 28244 }, { "epoch": 0.94, "grad_norm": 0.8222164511680603, "learning_rate": 1.848938930416566e-07, "loss": 1.9763, "step": 28245 }, { "epoch": 0.94, "grad_norm": 0.7476462721824646, "learning_rate": 1.8469050255489506e-07, "loss": 2.0123, "step": 28246 }, { "epoch": 0.94, "grad_norm": 0.7721613049507141, "learning_rate": 1.8448722295574705e-07, "loss": 2.0084, "step": 28247 }, { "epoch": 0.94, "grad_norm": 0.7403656840324402, "learning_rate": 1.8428405424650964e-07, "loss": 1.9892, "step": 28248 }, { "epoch": 0.94, "grad_norm": 0.7229350805282593, "learning_rate": 1.8408099642947874e-07, "loss": 1.9968, "step": 28249 }, { "epoch": 0.94, "grad_norm": 0.7326645255088806, "learning_rate": 1.838780495069492e-07, "loss": 2.0676, "step": 28250 }, { "epoch": 0.94, "grad_norm": 0.7832779288291931, "learning_rate": 1.836752134812103e-07, "loss": 2.1203, "step": 28251 }, { "epoch": 0.94, "grad_norm": 0.7860056757926941, "learning_rate": 1.834724883545569e-07, "loss": 1.9958, "step": 28252 }, { "epoch": 0.94, "grad_norm": 0.7495105862617493, "learning_rate": 1.8326987412927932e-07, "loss": 2.0715, "step": 28253 }, { "epoch": 0.94, "grad_norm": 0.731623113155365, "learning_rate": 1.8306737080766468e-07, "loss": 2.077, "step": 28254 }, { "epoch": 0.94, "grad_norm": 0.7362052202224731, "learning_rate": 1.8286497839200112e-07, "loss": 2.0763, "step": 28255 }, { "epoch": 0.94, "grad_norm": 0.7488023638725281, "learning_rate": 1.8266269688457682e-07, "loss": 2.0069, "step": 28256 }, { "epoch": 0.94, "grad_norm": 0.7696954607963562, "learning_rate": 1.8246052628767442e-07, "loss": 2.0637, "step": 28257 }, { "epoch": 0.94, "grad_norm": 0.7478713393211365, "learning_rate": 1.822584666035798e-07, "loss": 2.0495, "step": 28258 }, { "epoch": 0.94, "grad_norm": 0.7723537087440491, "learning_rate": 1.820565178345768e-07, "loss": 2.0586, "step": 28259 }, { "epoch": 0.94, "grad_norm": 0.7291403412818909, "learning_rate": 1.8185467998294347e-07, "loss": 2.093, "step": 28260 }, { "epoch": 0.94, "grad_norm": 0.7738631367683411, "learning_rate": 1.8165295305096254e-07, "loss": 2.0762, "step": 28261 }, { "epoch": 0.94, "grad_norm": 0.7681684494018555, "learning_rate": 1.8145133704091323e-07, "loss": 2.0175, "step": 28262 }, { "epoch": 0.94, "grad_norm": 0.7546780705451965, "learning_rate": 1.812498319550715e-07, "loss": 2.0127, "step": 28263 }, { "epoch": 0.94, "grad_norm": 0.7464657425880432, "learning_rate": 1.8104843779571447e-07, "loss": 2.0764, "step": 28264 }, { "epoch": 0.94, "grad_norm": 0.7276400923728943, "learning_rate": 1.8084715456511913e-07, "loss": 2.0589, "step": 28265 }, { "epoch": 0.94, "grad_norm": 0.7347224354743958, "learning_rate": 1.8064598226555706e-07, "loss": 2.0418, "step": 28266 }, { "epoch": 0.94, "grad_norm": 0.7595275640487671, "learning_rate": 1.804449208993009e-07, "loss": 2.0885, "step": 28267 }, { "epoch": 0.94, "grad_norm": 0.7277171611785889, "learning_rate": 1.802439704686254e-07, "loss": 2.0294, "step": 28268 }, { "epoch": 0.94, "grad_norm": 0.7411624193191528, "learning_rate": 1.8004313097579772e-07, "loss": 2.0507, "step": 28269 }, { "epoch": 0.94, "grad_norm": 0.7438887357711792, "learning_rate": 1.7984240242308714e-07, "loss": 2.0352, "step": 28270 }, { "epoch": 0.94, "grad_norm": 0.7290762066841125, "learning_rate": 1.7964178481276295e-07, "loss": 2.0387, "step": 28271 }, { "epoch": 0.94, "grad_norm": 0.7570380568504333, "learning_rate": 1.7944127814709e-07, "loss": 1.9369, "step": 28272 }, { "epoch": 0.94, "grad_norm": 0.7962685823440552, "learning_rate": 1.7924088242833427e-07, "loss": 2.0095, "step": 28273 }, { "epoch": 0.94, "grad_norm": 0.7719094753265381, "learning_rate": 1.7904059765875837e-07, "loss": 2.046, "step": 28274 }, { "epoch": 0.94, "grad_norm": 0.7585697174072266, "learning_rate": 1.7884042384062827e-07, "loss": 2.0811, "step": 28275 }, { "epoch": 0.94, "grad_norm": 0.7371112108230591, "learning_rate": 1.7864036097620108e-07, "loss": 2.0271, "step": 28276 }, { "epoch": 0.94, "grad_norm": 0.7562046647071838, "learning_rate": 1.784404090677405e-07, "loss": 2.0337, "step": 28277 }, { "epoch": 0.94, "grad_norm": 0.7499122023582458, "learning_rate": 1.782405681175048e-07, "loss": 1.93, "step": 28278 }, { "epoch": 0.94, "grad_norm": 0.7518165111541748, "learning_rate": 1.7804083812774985e-07, "loss": 2.0007, "step": 28279 }, { "epoch": 0.94, "grad_norm": 0.7126772403717041, "learning_rate": 1.7784121910073282e-07, "loss": 1.9807, "step": 28280 }, { "epoch": 0.94, "grad_norm": 0.7511412501335144, "learning_rate": 1.7764171103871187e-07, "loss": 2.0675, "step": 28281 }, { "epoch": 0.94, "grad_norm": 0.7182841300964355, "learning_rate": 1.774423139439363e-07, "loss": 2.0088, "step": 28282 }, { "epoch": 0.94, "grad_norm": 0.7198595404624939, "learning_rate": 1.7724302781866098e-07, "loss": 2.0408, "step": 28283 }, { "epoch": 0.94, "grad_norm": 0.7396387457847595, "learning_rate": 1.7704385266513747e-07, "loss": 1.9792, "step": 28284 }, { "epoch": 0.94, "grad_norm": 0.7296948432922363, "learning_rate": 1.7684478848561616e-07, "loss": 2.032, "step": 28285 }, { "epoch": 0.94, "grad_norm": 0.7366349101066589, "learning_rate": 1.7664583528234413e-07, "loss": 1.966, "step": 28286 }, { "epoch": 0.94, "grad_norm": 0.7235068678855896, "learning_rate": 1.7644699305757184e-07, "loss": 2.0961, "step": 28287 }, { "epoch": 0.94, "grad_norm": 0.7307411432266235, "learning_rate": 1.7624826181354415e-07, "loss": 2.11, "step": 28288 }, { "epoch": 0.94, "grad_norm": 0.7531940937042236, "learning_rate": 1.760496415525048e-07, "loss": 1.9982, "step": 28289 }, { "epoch": 0.94, "grad_norm": 0.7397749423980713, "learning_rate": 1.7585113227670092e-07, "loss": 2.0457, "step": 28290 }, { "epoch": 0.94, "grad_norm": 0.792334794998169, "learning_rate": 1.756527339883729e-07, "loss": 2.1144, "step": 28291 }, { "epoch": 0.94, "grad_norm": 0.7362340688705444, "learning_rate": 1.754544466897623e-07, "loss": 2.0472, "step": 28292 }, { "epoch": 0.94, "grad_norm": 0.746554434299469, "learning_rate": 1.7525627038310955e-07, "loss": 2.0498, "step": 28293 }, { "epoch": 0.94, "grad_norm": 0.7348185181617737, "learning_rate": 1.7505820507065508e-07, "loss": 2.0312, "step": 28294 }, { "epoch": 0.94, "grad_norm": 0.7473952174186707, "learning_rate": 1.7486025075463375e-07, "loss": 2.1029, "step": 28295 }, { "epoch": 0.94, "grad_norm": 0.7585883140563965, "learning_rate": 1.746624074372827e-07, "loss": 2.0093, "step": 28296 }, { "epoch": 0.94, "grad_norm": 0.736268162727356, "learning_rate": 1.7446467512083897e-07, "loss": 2.0541, "step": 28297 }, { "epoch": 0.94, "grad_norm": 0.7622162103652954, "learning_rate": 1.7426705380753306e-07, "loss": 2.0535, "step": 28298 }, { "epoch": 0.94, "grad_norm": 0.7357087731361389, "learning_rate": 1.7406954349960203e-07, "loss": 1.9852, "step": 28299 }, { "epoch": 0.94, "grad_norm": 0.7691460251808167, "learning_rate": 1.73872144199273e-07, "loss": 2.0408, "step": 28300 }, { "epoch": 0.94, "grad_norm": 0.7713519930839539, "learning_rate": 1.7367485590877863e-07, "loss": 1.9845, "step": 28301 }, { "epoch": 0.94, "grad_norm": 0.7451587915420532, "learning_rate": 1.7347767863034605e-07, "loss": 2.0524, "step": 28302 }, { "epoch": 0.94, "grad_norm": 0.7565116882324219, "learning_rate": 1.7328061236620564e-07, "loss": 2.0838, "step": 28303 }, { "epoch": 0.94, "grad_norm": 0.7465039491653442, "learning_rate": 1.73083657118579e-07, "loss": 2.0999, "step": 28304 }, { "epoch": 0.94, "grad_norm": 0.7290777564048767, "learning_rate": 1.7288681288969544e-07, "loss": 1.9937, "step": 28305 }, { "epoch": 0.94, "grad_norm": 0.7499036192893982, "learning_rate": 1.7269007968177765e-07, "loss": 1.9387, "step": 28306 }, { "epoch": 0.94, "grad_norm": 0.7434399724006653, "learning_rate": 1.7249345749704828e-07, "loss": 2.0215, "step": 28307 }, { "epoch": 0.94, "grad_norm": 0.7299054265022278, "learning_rate": 1.7229694633772775e-07, "loss": 2.0832, "step": 28308 }, { "epoch": 0.94, "grad_norm": 0.7239153385162354, "learning_rate": 1.7210054620603656e-07, "loss": 2.0073, "step": 28309 }, { "epoch": 0.94, "grad_norm": 0.7587687373161316, "learning_rate": 1.71904257104194e-07, "loss": 2.0789, "step": 28310 }, { "epoch": 0.94, "grad_norm": 0.7295830249786377, "learning_rate": 1.717080790344172e-07, "loss": 2.0189, "step": 28311 }, { "epoch": 0.94, "grad_norm": 0.7542331218719482, "learning_rate": 1.7151201199892331e-07, "loss": 2.0574, "step": 28312 }, { "epoch": 0.94, "grad_norm": 0.7858145833015442, "learning_rate": 1.7131605599992606e-07, "loss": 2.0653, "step": 28313 }, { "epoch": 0.94, "grad_norm": 0.7913341522216797, "learning_rate": 1.7112021103963927e-07, "loss": 2.0558, "step": 28314 }, { "epoch": 0.94, "grad_norm": 0.7490506172180176, "learning_rate": 1.709244771202767e-07, "loss": 2.0219, "step": 28315 }, { "epoch": 0.94, "grad_norm": 0.7350800633430481, "learning_rate": 1.7072885424404996e-07, "loss": 2.0739, "step": 28316 }, { "epoch": 0.94, "grad_norm": 0.7498719096183777, "learning_rate": 1.705333424131661e-07, "loss": 2.0198, "step": 28317 }, { "epoch": 0.94, "grad_norm": 0.7415547370910645, "learning_rate": 1.7033794162983676e-07, "loss": 1.9879, "step": 28318 }, { "epoch": 0.94, "grad_norm": 0.7545487284660339, "learning_rate": 1.70142651896269e-07, "loss": 1.9818, "step": 28319 }, { "epoch": 0.94, "grad_norm": 0.7751821279525757, "learning_rate": 1.6994747321466777e-07, "loss": 2.0927, "step": 28320 }, { "epoch": 0.94, "grad_norm": 0.7195343375205994, "learning_rate": 1.6975240558724014e-07, "loss": 2.0057, "step": 28321 }, { "epoch": 0.94, "grad_norm": 0.756786584854126, "learning_rate": 1.6955744901618887e-07, "loss": 2.0354, "step": 28322 }, { "epoch": 0.94, "grad_norm": 0.7642935514450073, "learning_rate": 1.6936260350371437e-07, "loss": 2.0977, "step": 28323 }, { "epoch": 0.94, "grad_norm": 0.7660335898399353, "learning_rate": 1.6916786905202153e-07, "loss": 2.0779, "step": 28324 }, { "epoch": 0.94, "grad_norm": 0.745856523513794, "learning_rate": 1.6897324566330864e-07, "loss": 2.0278, "step": 28325 }, { "epoch": 0.94, "grad_norm": 0.742396354675293, "learning_rate": 1.6877873333977392e-07, "loss": 2.0131, "step": 28326 }, { "epoch": 0.94, "grad_norm": 0.735607922077179, "learning_rate": 1.6858433208361558e-07, "loss": 2.0453, "step": 28327 }, { "epoch": 0.94, "grad_norm": 0.7740545272827148, "learning_rate": 1.6839004189702968e-07, "loss": 2.065, "step": 28328 }, { "epoch": 0.94, "grad_norm": 0.7439558506011963, "learning_rate": 1.681958627822111e-07, "loss": 2.0043, "step": 28329 }, { "epoch": 0.94, "grad_norm": 0.7579848170280457, "learning_rate": 1.680017947413537e-07, "loss": 2.0214, "step": 28330 }, { "epoch": 0.94, "grad_norm": 0.7669976353645325, "learning_rate": 1.678078377766501e-07, "loss": 2.026, "step": 28331 }, { "epoch": 0.94, "grad_norm": 0.7325143814086914, "learning_rate": 1.6761399189029082e-07, "loss": 2.0469, "step": 28332 }, { "epoch": 0.94, "grad_norm": 0.7471593022346497, "learning_rate": 1.674202570844663e-07, "loss": 1.9697, "step": 28333 }, { "epoch": 0.94, "grad_norm": 0.7568303942680359, "learning_rate": 1.6722663336136703e-07, "loss": 2.0784, "step": 28334 }, { "epoch": 0.94, "grad_norm": 0.7640582323074341, "learning_rate": 1.670331207231768e-07, "loss": 2.1056, "step": 28335 }, { "epoch": 0.94, "grad_norm": 0.7403440475463867, "learning_rate": 1.6683971917208387e-07, "loss": 2.0621, "step": 28336 }, { "epoch": 0.94, "grad_norm": 0.7237331867218018, "learning_rate": 1.6664642871027314e-07, "loss": 2.0473, "step": 28337 }, { "epoch": 0.94, "grad_norm": 0.7541647553443909, "learning_rate": 1.6645324933992845e-07, "loss": 1.9948, "step": 28338 }, { "epoch": 0.94, "grad_norm": 0.725741446018219, "learning_rate": 1.662601810632325e-07, "loss": 1.9948, "step": 28339 }, { "epoch": 0.94, "grad_norm": 0.7446455955505371, "learning_rate": 1.660672238823646e-07, "loss": 2.0939, "step": 28340 }, { "epoch": 0.94, "grad_norm": 0.7481258511543274, "learning_rate": 1.6587437779950755e-07, "loss": 2.0493, "step": 28341 }, { "epoch": 0.94, "grad_norm": 0.7438316345214844, "learning_rate": 1.6568164281683618e-07, "loss": 2.0447, "step": 28342 }, { "epoch": 0.94, "grad_norm": 0.7250549793243408, "learning_rate": 1.6548901893653214e-07, "loss": 2.0318, "step": 28343 }, { "epoch": 0.94, "grad_norm": 0.778458297252655, "learning_rate": 1.652965061607692e-07, "loss": 2.0801, "step": 28344 }, { "epoch": 0.94, "grad_norm": 0.71019446849823, "learning_rate": 1.651041044917212e-07, "loss": 1.9876, "step": 28345 }, { "epoch": 0.94, "grad_norm": 0.7694583535194397, "learning_rate": 1.6491181393156419e-07, "loss": 2.0679, "step": 28346 }, { "epoch": 0.94, "grad_norm": 0.7302212119102478, "learning_rate": 1.6471963448246976e-07, "loss": 2.0167, "step": 28347 }, { "epoch": 0.94, "grad_norm": 0.7205811142921448, "learning_rate": 1.6452756614660948e-07, "loss": 2.0953, "step": 28348 }, { "epoch": 0.94, "grad_norm": 0.7533875703811646, "learning_rate": 1.6433560892615163e-07, "loss": 2.0678, "step": 28349 }, { "epoch": 0.94, "grad_norm": 0.7474888563156128, "learning_rate": 1.641437628232667e-07, "loss": 2.0946, "step": 28350 }, { "epoch": 0.94, "grad_norm": 0.7399880290031433, "learning_rate": 1.6395202784011965e-07, "loss": 2.0474, "step": 28351 }, { "epoch": 0.94, "grad_norm": 0.7406282424926758, "learning_rate": 1.6376040397887872e-07, "loss": 2.0079, "step": 28352 }, { "epoch": 0.94, "grad_norm": 0.7734681963920593, "learning_rate": 1.6356889124170882e-07, "loss": 2.0926, "step": 28353 }, { "epoch": 0.94, "grad_norm": 0.756147027015686, "learning_rate": 1.6337748963077273e-07, "loss": 2.0926, "step": 28354 }, { "epoch": 0.94, "grad_norm": 0.7375460267066956, "learning_rate": 1.6318619914823198e-07, "loss": 2.1086, "step": 28355 }, { "epoch": 0.94, "grad_norm": 0.7504284977912903, "learning_rate": 1.6299501979624933e-07, "loss": 1.9235, "step": 28356 }, { "epoch": 0.94, "grad_norm": 0.7604022026062012, "learning_rate": 1.6280395157698414e-07, "loss": 2.0145, "step": 28357 }, { "epoch": 0.94, "grad_norm": 0.7696889042854309, "learning_rate": 1.6261299449259472e-07, "loss": 2.0328, "step": 28358 }, { "epoch": 0.94, "grad_norm": 0.7469714879989624, "learning_rate": 1.6242214854523707e-07, "loss": 2.1047, "step": 28359 }, { "epoch": 0.94, "grad_norm": 0.7709844708442688, "learning_rate": 1.6223141373707063e-07, "loss": 2.09, "step": 28360 }, { "epoch": 0.94, "grad_norm": 0.746536374092102, "learning_rate": 1.6204079007024698e-07, "loss": 2.0776, "step": 28361 }, { "epoch": 0.94, "grad_norm": 0.7545673251152039, "learning_rate": 1.6185027754692106e-07, "loss": 2.0927, "step": 28362 }, { "epoch": 0.94, "grad_norm": 0.7516542077064514, "learning_rate": 1.616598761692456e-07, "loss": 2.0697, "step": 28363 }, { "epoch": 0.94, "grad_norm": 0.7715277075767517, "learning_rate": 1.6146958593937e-07, "loss": 2.0379, "step": 28364 }, { "epoch": 0.94, "grad_norm": 0.7570193409919739, "learning_rate": 1.6127940685944589e-07, "loss": 2.0523, "step": 28365 }, { "epoch": 0.94, "grad_norm": 0.7394590377807617, "learning_rate": 1.6108933893162039e-07, "loss": 2.0138, "step": 28366 }, { "epoch": 0.94, "grad_norm": 0.7429561614990234, "learning_rate": 1.608993821580418e-07, "loss": 2.0452, "step": 28367 }, { "epoch": 0.94, "grad_norm": 0.7527019381523132, "learning_rate": 1.6070953654085507e-07, "loss": 2.1095, "step": 28368 }, { "epoch": 0.94, "grad_norm": 0.7455555200576782, "learning_rate": 1.605198020822063e-07, "loss": 2.024, "step": 28369 }, { "epoch": 0.94, "grad_norm": 0.7376919984817505, "learning_rate": 1.6033017878423818e-07, "loss": 2.0334, "step": 28370 }, { "epoch": 0.94, "grad_norm": 0.7549187541007996, "learning_rate": 1.6014066664909234e-07, "loss": 2.0674, "step": 28371 }, { "epoch": 0.94, "grad_norm": 0.7485802173614502, "learning_rate": 1.599512656789115e-07, "loss": 1.9673, "step": 28372 }, { "epoch": 0.94, "grad_norm": 0.7388875484466553, "learning_rate": 1.5976197587583399e-07, "loss": 1.9912, "step": 28373 }, { "epoch": 0.94, "grad_norm": 0.7337551116943359, "learning_rate": 1.5957279724199914e-07, "loss": 2.063, "step": 28374 }, { "epoch": 0.94, "grad_norm": 0.7287161946296692, "learning_rate": 1.5938372977954307e-07, "loss": 2.0948, "step": 28375 }, { "epoch": 0.94, "grad_norm": 0.7483761310577393, "learning_rate": 1.5919477349060298e-07, "loss": 2.0845, "step": 28376 }, { "epoch": 0.94, "grad_norm": 0.7411022186279297, "learning_rate": 1.5900592837731264e-07, "loss": 2.0523, "step": 28377 }, { "epoch": 0.94, "grad_norm": 0.7285013198852539, "learning_rate": 1.5881719444180598e-07, "loss": 1.9989, "step": 28378 }, { "epoch": 0.94, "grad_norm": 0.7327479124069214, "learning_rate": 1.586285716862157e-07, "loss": 2.0528, "step": 28379 }, { "epoch": 0.94, "grad_norm": 0.7655050754547119, "learning_rate": 1.5844006011267122e-07, "loss": 2.0148, "step": 28380 }, { "epoch": 0.94, "grad_norm": 0.7391484379768372, "learning_rate": 1.5825165972330413e-07, "loss": 2.0778, "step": 28381 }, { "epoch": 0.94, "grad_norm": 0.7341198921203613, "learning_rate": 1.5806337052024168e-07, "loss": 2.0031, "step": 28382 }, { "epoch": 0.94, "grad_norm": 0.7228718400001526, "learning_rate": 1.578751925056121e-07, "loss": 2.0209, "step": 28383 }, { "epoch": 0.94, "grad_norm": 0.7728800177574158, "learning_rate": 1.576871256815393e-07, "loss": 2.0324, "step": 28384 }, { "epoch": 0.94, "grad_norm": 0.7297536730766296, "learning_rate": 1.5749917005014936e-07, "loss": 2.0626, "step": 28385 }, { "epoch": 0.94, "grad_norm": 0.7636086344718933, "learning_rate": 1.573113256135661e-07, "loss": 2.0235, "step": 28386 }, { "epoch": 0.94, "grad_norm": 0.7305300235748291, "learning_rate": 1.5712359237391118e-07, "loss": 2.0663, "step": 28387 }, { "epoch": 0.94, "grad_norm": 0.7554391026496887, "learning_rate": 1.569359703333051e-07, "loss": 2.0619, "step": 28388 }, { "epoch": 0.94, "grad_norm": 0.7666745185852051, "learning_rate": 1.567484594938673e-07, "loss": 2.0191, "step": 28389 }, { "epoch": 0.94, "grad_norm": 0.7664971351623535, "learning_rate": 1.5656105985771718e-07, "loss": 2.0224, "step": 28390 }, { "epoch": 0.94, "grad_norm": 0.7475983500480652, "learning_rate": 1.5637377142697196e-07, "loss": 2.0678, "step": 28391 }, { "epoch": 0.94, "grad_norm": 0.7408192753791809, "learning_rate": 1.561865942037466e-07, "loss": 2.025, "step": 28392 }, { "epoch": 0.94, "grad_norm": 0.7450444102287292, "learning_rate": 1.5599952819015496e-07, "loss": 1.9791, "step": 28393 }, { "epoch": 0.94, "grad_norm": 0.7386817932128906, "learning_rate": 1.5581257338831313e-07, "loss": 2.0332, "step": 28394 }, { "epoch": 0.94, "grad_norm": 0.732686460018158, "learning_rate": 1.5562572980033053e-07, "loss": 2.0748, "step": 28395 }, { "epoch": 0.94, "grad_norm": 0.7775156497955322, "learning_rate": 1.5543899742831993e-07, "loss": 1.987, "step": 28396 }, { "epoch": 0.94, "grad_norm": 0.7728347182273865, "learning_rate": 1.552523762743885e-07, "loss": 2.0586, "step": 28397 }, { "epoch": 0.94, "grad_norm": 0.7374674081802368, "learning_rate": 1.5506586634064792e-07, "loss": 2.0743, "step": 28398 }, { "epoch": 0.94, "grad_norm": 0.7305232882499695, "learning_rate": 1.5487946762920203e-07, "loss": 1.9663, "step": 28399 }, { "epoch": 0.94, "grad_norm": 0.7428101301193237, "learning_rate": 1.5469318014215917e-07, "loss": 2.045, "step": 28400 }, { "epoch": 0.94, "grad_norm": 0.7395274043083191, "learning_rate": 1.545070038816232e-07, "loss": 2.045, "step": 28401 }, { "epoch": 0.94, "grad_norm": 0.7485905885696411, "learning_rate": 1.543209388496958e-07, "loss": 2.0359, "step": 28402 }, { "epoch": 0.94, "grad_norm": 0.7275430560112, "learning_rate": 1.541349850484797e-07, "loss": 2.0249, "step": 28403 }, { "epoch": 0.95, "grad_norm": 0.7517231702804565, "learning_rate": 1.5394914248007875e-07, "loss": 2.1011, "step": 28404 }, { "epoch": 0.95, "grad_norm": 0.7438262701034546, "learning_rate": 1.537634111465891e-07, "loss": 2.0846, "step": 28405 }, { "epoch": 0.95, "grad_norm": 0.7387683987617493, "learning_rate": 1.5357779105010905e-07, "loss": 2.0926, "step": 28406 }, { "epoch": 0.95, "grad_norm": 0.7335829138755798, "learning_rate": 1.5339228219273694e-07, "loss": 1.9862, "step": 28407 }, { "epoch": 0.95, "grad_norm": 0.7312666773796082, "learning_rate": 1.5320688457656774e-07, "loss": 2.0478, "step": 28408 }, { "epoch": 0.95, "grad_norm": 0.7414546012878418, "learning_rate": 1.5302159820369756e-07, "loss": 2.0636, "step": 28409 }, { "epoch": 0.95, "grad_norm": 0.7388017773628235, "learning_rate": 1.5283642307621693e-07, "loss": 2.0177, "step": 28410 }, { "epoch": 0.95, "grad_norm": 0.7487220764160156, "learning_rate": 1.5265135919622086e-07, "loss": 1.9636, "step": 28411 }, { "epoch": 0.95, "grad_norm": 0.7452407479286194, "learning_rate": 1.524664065657977e-07, "loss": 2.0826, "step": 28412 }, { "epoch": 0.95, "grad_norm": 0.7256044745445251, "learning_rate": 1.5228156518703906e-07, "loss": 2.0104, "step": 28413 }, { "epoch": 0.95, "grad_norm": 0.7575449347496033, "learning_rate": 1.520968350620311e-07, "loss": 2.1031, "step": 28414 }, { "epoch": 0.95, "grad_norm": 0.7580878734588623, "learning_rate": 1.5191221619286102e-07, "loss": 2.0408, "step": 28415 }, { "epoch": 0.95, "grad_norm": 0.7436800599098206, "learning_rate": 1.5172770858161601e-07, "loss": 2.0373, "step": 28416 }, { "epoch": 0.95, "grad_norm": 0.726304292678833, "learning_rate": 1.5154331223037887e-07, "loss": 2.0146, "step": 28417 }, { "epoch": 0.95, "grad_norm": 0.7285186648368835, "learning_rate": 1.5135902714123464e-07, "loss": 2.0306, "step": 28418 }, { "epoch": 0.95, "grad_norm": 0.7661466002464294, "learning_rate": 1.511748533162638e-07, "loss": 2.0944, "step": 28419 }, { "epoch": 0.95, "grad_norm": 0.7457267045974731, "learning_rate": 1.5099079075754808e-07, "loss": 1.967, "step": 28420 }, { "epoch": 0.95, "grad_norm": 0.7371566295623779, "learning_rate": 1.5080683946716578e-07, "loss": 2.0362, "step": 28421 }, { "epoch": 0.95, "grad_norm": 0.7245676517486572, "learning_rate": 1.5062299944719526e-07, "loss": 2.0361, "step": 28422 }, { "epoch": 0.95, "grad_norm": 0.7544835805892944, "learning_rate": 1.5043927069971487e-07, "loss": 2.0405, "step": 28423 }, { "epoch": 0.95, "grad_norm": 0.7272149920463562, "learning_rate": 1.5025565322679848e-07, "loss": 2.0123, "step": 28424 }, { "epoch": 0.95, "grad_norm": 0.7294792532920837, "learning_rate": 1.5007214703051997e-07, "loss": 2.0913, "step": 28425 }, { "epoch": 0.95, "grad_norm": 0.7548956274986267, "learning_rate": 1.498887521129555e-07, "loss": 2.0717, "step": 28426 }, { "epoch": 0.95, "grad_norm": 0.7533094882965088, "learning_rate": 1.4970546847617562e-07, "loss": 2.0662, "step": 28427 }, { "epoch": 0.95, "grad_norm": 0.7679675817489624, "learning_rate": 1.4952229612224866e-07, "loss": 2.0905, "step": 28428 }, { "epoch": 0.95, "grad_norm": 0.7357742190361023, "learning_rate": 1.4933923505324745e-07, "loss": 2.0294, "step": 28429 }, { "epoch": 0.95, "grad_norm": 0.7486264705657959, "learning_rate": 1.4915628527123693e-07, "loss": 2.0882, "step": 28430 }, { "epoch": 0.95, "grad_norm": 0.7551736235618591, "learning_rate": 1.489734467782866e-07, "loss": 2.126, "step": 28431 }, { "epoch": 0.95, "grad_norm": 0.7354605197906494, "learning_rate": 1.487907195764604e-07, "loss": 2.0008, "step": 28432 }, { "epoch": 0.95, "grad_norm": 0.7396884560585022, "learning_rate": 1.4860810366782442e-07, "loss": 2.1194, "step": 28433 }, { "epoch": 0.95, "grad_norm": 0.7500935792922974, "learning_rate": 1.4842559905443921e-07, "loss": 2.0154, "step": 28434 }, { "epoch": 0.95, "grad_norm": 0.7391665577888489, "learning_rate": 1.482432057383687e-07, "loss": 1.9955, "step": 28435 }, { "epoch": 0.95, "grad_norm": 0.7395474314689636, "learning_rate": 1.4806092372167348e-07, "loss": 2.0286, "step": 28436 }, { "epoch": 0.95, "grad_norm": 0.7288504242897034, "learning_rate": 1.4787875300641074e-07, "loss": 2.092, "step": 28437 }, { "epoch": 0.95, "grad_norm": 0.7440198063850403, "learning_rate": 1.4769669359464e-07, "loss": 2.0051, "step": 28438 }, { "epoch": 0.95, "grad_norm": 0.7375121116638184, "learning_rate": 1.4751474548841848e-07, "loss": 2.0279, "step": 28439 }, { "epoch": 0.95, "grad_norm": 0.7525367736816406, "learning_rate": 1.4733290868980232e-07, "loss": 2.0412, "step": 28440 }, { "epoch": 0.95, "grad_norm": 0.7290826439857483, "learning_rate": 1.471511832008432e-07, "loss": 2.0019, "step": 28441 }, { "epoch": 0.95, "grad_norm": 0.7247813940048218, "learning_rate": 1.4696956902359616e-07, "loss": 2.0253, "step": 28442 }, { "epoch": 0.95, "grad_norm": 0.7464855909347534, "learning_rate": 1.4678806616011287e-07, "loss": 2.025, "step": 28443 }, { "epoch": 0.95, "grad_norm": 0.7288612723350525, "learning_rate": 1.466066746124417e-07, "loss": 2.0106, "step": 28444 }, { "epoch": 0.95, "grad_norm": 0.7284706234931946, "learning_rate": 1.464253943826366e-07, "loss": 1.9789, "step": 28445 }, { "epoch": 0.95, "grad_norm": 0.7453920841217041, "learning_rate": 1.462442254727403e-07, "loss": 2.0966, "step": 28446 }, { "epoch": 0.95, "grad_norm": 0.7240303754806519, "learning_rate": 1.4606316788480236e-07, "loss": 2.058, "step": 28447 }, { "epoch": 0.95, "grad_norm": 0.735941469669342, "learning_rate": 1.4588222162086884e-07, "loss": 2.0197, "step": 28448 }, { "epoch": 0.95, "grad_norm": 0.7546452879905701, "learning_rate": 1.457013866829826e-07, "loss": 2.1018, "step": 28449 }, { "epoch": 0.95, "grad_norm": 0.8033734560012817, "learning_rate": 1.4552066307318536e-07, "loss": 2.0865, "step": 28450 }, { "epoch": 0.95, "grad_norm": 0.7451728582382202, "learning_rate": 1.453400507935221e-07, "loss": 2.0198, "step": 28451 }, { "epoch": 0.95, "grad_norm": 0.7348548173904419, "learning_rate": 1.451595498460312e-07, "loss": 2.0615, "step": 28452 }, { "epoch": 0.95, "grad_norm": 0.7506760358810425, "learning_rate": 1.4497916023275104e-07, "loss": 2.0471, "step": 28453 }, { "epoch": 0.95, "grad_norm": 0.7752646803855896, "learning_rate": 1.4479888195572222e-07, "loss": 2.0916, "step": 28454 }, { "epoch": 0.95, "grad_norm": 0.729526937007904, "learning_rate": 1.4461871501697976e-07, "loss": 1.9575, "step": 28455 }, { "epoch": 0.95, "grad_norm": 0.7493242621421814, "learning_rate": 1.4443865941855762e-07, "loss": 2.0057, "step": 28456 }, { "epoch": 0.95, "grad_norm": 0.7409965991973877, "learning_rate": 1.4425871516249302e-07, "loss": 2.0127, "step": 28457 }, { "epoch": 0.95, "grad_norm": 0.7261967062950134, "learning_rate": 1.4407888225081768e-07, "loss": 2.0974, "step": 28458 }, { "epoch": 0.95, "grad_norm": 0.7618956565856934, "learning_rate": 1.438991606855622e-07, "loss": 2.0177, "step": 28459 }, { "epoch": 0.95, "grad_norm": 0.7207528948783875, "learning_rate": 1.437195504687572e-07, "loss": 2.0309, "step": 28460 }, { "epoch": 0.95, "grad_norm": 0.7475081086158752, "learning_rate": 1.4354005160243434e-07, "loss": 1.9605, "step": 28461 }, { "epoch": 0.95, "grad_norm": 0.7617717385292053, "learning_rate": 1.433606640886187e-07, "loss": 2.0666, "step": 28462 }, { "epoch": 0.95, "grad_norm": 0.7372684478759766, "learning_rate": 1.4318138792933646e-07, "loss": 2.0078, "step": 28463 }, { "epoch": 0.95, "grad_norm": 0.7516733407974243, "learning_rate": 1.4300222312661593e-07, "loss": 2.0079, "step": 28464 }, { "epoch": 0.95, "grad_norm": 0.7308643460273743, "learning_rate": 1.4282316968247778e-07, "loss": 2.0602, "step": 28465 }, { "epoch": 0.95, "grad_norm": 0.731998085975647, "learning_rate": 1.4264422759894702e-07, "loss": 2.1157, "step": 28466 }, { "epoch": 0.95, "grad_norm": 0.7614666819572449, "learning_rate": 1.424653968780465e-07, "loss": 2.09, "step": 28467 }, { "epoch": 0.95, "grad_norm": 0.759365975856781, "learning_rate": 1.4228667752179347e-07, "loss": 2.0183, "step": 28468 }, { "epoch": 0.95, "grad_norm": 0.7782214283943176, "learning_rate": 1.4210806953220857e-07, "loss": 2.0965, "step": 28469 }, { "epoch": 0.95, "grad_norm": 0.7407270669937134, "learning_rate": 1.4192957291130905e-07, "loss": 2.0706, "step": 28470 }, { "epoch": 0.95, "grad_norm": 0.7834709882736206, "learning_rate": 1.417511876611133e-07, "loss": 2.0149, "step": 28471 }, { "epoch": 0.95, "grad_norm": 0.738029956817627, "learning_rate": 1.4157291378363303e-07, "loss": 2.0759, "step": 28472 }, { "epoch": 0.95, "grad_norm": 0.7465745210647583, "learning_rate": 1.4139475128088552e-07, "loss": 2.1144, "step": 28473 }, { "epoch": 0.95, "grad_norm": 0.7632477879524231, "learning_rate": 1.4121670015488253e-07, "loss": 2.0558, "step": 28474 }, { "epoch": 0.95, "grad_norm": 0.7353018522262573, "learning_rate": 1.4103876040763466e-07, "loss": 2.0965, "step": 28475 }, { "epoch": 0.95, "grad_norm": 0.7343853116035461, "learning_rate": 1.408609320411536e-07, "loss": 2.0103, "step": 28476 }, { "epoch": 0.95, "grad_norm": 0.7195243835449219, "learning_rate": 1.4068321505744776e-07, "loss": 1.9915, "step": 28477 }, { "epoch": 0.95, "grad_norm": 0.727735698223114, "learning_rate": 1.4050560945852444e-07, "loss": 1.9919, "step": 28478 }, { "epoch": 0.95, "grad_norm": 0.7325900793075562, "learning_rate": 1.403281152463909e-07, "loss": 2.0129, "step": 28479 }, { "epoch": 0.95, "grad_norm": 0.7481728196144104, "learning_rate": 1.4015073242305222e-07, "loss": 2.1057, "step": 28480 }, { "epoch": 0.95, "grad_norm": 0.7408105134963989, "learning_rate": 1.3997346099051123e-07, "loss": 2.0275, "step": 28481 }, { "epoch": 0.95, "grad_norm": 0.7865252494812012, "learning_rate": 1.397963009507719e-07, "loss": 2.0462, "step": 28482 }, { "epoch": 0.95, "grad_norm": 0.7346115708351135, "learning_rate": 1.3961925230583484e-07, "loss": 2.0036, "step": 28483 }, { "epoch": 0.95, "grad_norm": 0.798490047454834, "learning_rate": 1.3944231505770178e-07, "loss": 1.9704, "step": 28484 }, { "epoch": 0.95, "grad_norm": 0.715848445892334, "learning_rate": 1.3926548920837002e-07, "loss": 2.0676, "step": 28485 }, { "epoch": 0.95, "grad_norm": 0.7405949234962463, "learning_rate": 1.3908877475983796e-07, "loss": 2.0536, "step": 28486 }, { "epoch": 0.95, "grad_norm": 0.7531590461730957, "learning_rate": 1.3891217171410177e-07, "loss": 2.0468, "step": 28487 }, { "epoch": 0.95, "grad_norm": 0.7611472010612488, "learning_rate": 1.3873568007315652e-07, "loss": 2.0526, "step": 28488 }, { "epoch": 0.95, "grad_norm": 0.7280851602554321, "learning_rate": 1.385592998389973e-07, "loss": 2.0062, "step": 28489 }, { "epoch": 0.95, "grad_norm": 0.7876123189926147, "learning_rate": 1.3838303101361471e-07, "loss": 2.0608, "step": 28490 }, { "epoch": 0.95, "grad_norm": 0.7556420564651489, "learning_rate": 1.3820687359900053e-07, "loss": 2.0484, "step": 28491 }, { "epoch": 0.95, "grad_norm": 0.7811589241027832, "learning_rate": 1.3803082759714648e-07, "loss": 2.0772, "step": 28492 }, { "epoch": 0.95, "grad_norm": 0.7394065260887146, "learning_rate": 1.3785489301004096e-07, "loss": 2.0052, "step": 28493 }, { "epoch": 0.95, "grad_norm": 0.7497679591178894, "learning_rate": 1.3767906983967018e-07, "loss": 2.0939, "step": 28494 }, { "epoch": 0.95, "grad_norm": 0.7506534457206726, "learning_rate": 1.375033580880214e-07, "loss": 2.0704, "step": 28495 }, { "epoch": 0.95, "grad_norm": 0.7549369931221008, "learning_rate": 1.3732775775707973e-07, "loss": 2.0543, "step": 28496 }, { "epoch": 0.95, "grad_norm": 0.7826048135757446, "learning_rate": 1.3715226884882916e-07, "loss": 2.0624, "step": 28497 }, { "epoch": 0.95, "grad_norm": 0.7772423624992371, "learning_rate": 1.369768913652525e-07, "loss": 2.0787, "step": 28498 }, { "epoch": 0.95, "grad_norm": 0.7651323080062866, "learning_rate": 1.3680162530833042e-07, "loss": 2.0885, "step": 28499 }, { "epoch": 0.95, "grad_norm": 0.7521963715553284, "learning_rate": 1.3662647068004242e-07, "loss": 2.0431, "step": 28500 }, { "epoch": 0.95, "grad_norm": 0.7563682794570923, "learning_rate": 1.3645142748236805e-07, "loss": 1.9652, "step": 28501 }, { "epoch": 0.95, "grad_norm": 0.7326636910438538, "learning_rate": 1.3627649571728573e-07, "loss": 2.016, "step": 28502 }, { "epoch": 0.95, "grad_norm": 0.727057695388794, "learning_rate": 1.3610167538677054e-07, "loss": 2.0277, "step": 28503 }, { "epoch": 0.95, "grad_norm": 0.7421227097511292, "learning_rate": 1.3592696649279758e-07, "loss": 1.9988, "step": 28504 }, { "epoch": 0.95, "grad_norm": 0.7919015884399414, "learning_rate": 1.3575236903734078e-07, "loss": 2.0871, "step": 28505 }, { "epoch": 0.95, "grad_norm": 0.758823573589325, "learning_rate": 1.3557788302237308e-07, "loss": 2.0222, "step": 28506 }, { "epoch": 0.95, "grad_norm": 0.7408049702644348, "learning_rate": 1.3540350844986505e-07, "loss": 2.0455, "step": 28507 }, { "epoch": 0.95, "grad_norm": 0.7485356330871582, "learning_rate": 1.3522924532178738e-07, "loss": 2.0465, "step": 28508 }, { "epoch": 0.95, "grad_norm": 0.7242406010627747, "learning_rate": 1.350550936401085e-07, "loss": 2.0553, "step": 28509 }, { "epoch": 0.95, "grad_norm": 0.7367553114891052, "learning_rate": 1.3488105340679457e-07, "loss": 2.0412, "step": 28510 }, { "epoch": 0.95, "grad_norm": 0.7277079224586487, "learning_rate": 1.3470712462381408e-07, "loss": 1.9896, "step": 28511 }, { "epoch": 0.95, "grad_norm": 0.7252501249313354, "learning_rate": 1.3453330729313098e-07, "loss": 2.0043, "step": 28512 }, { "epoch": 0.95, "grad_norm": 0.7573903799057007, "learning_rate": 1.3435960141670813e-07, "loss": 2.1117, "step": 28513 }, { "epoch": 0.95, "grad_norm": 0.7289398312568665, "learning_rate": 1.341860069965084e-07, "loss": 2.0753, "step": 28514 }, { "epoch": 0.95, "grad_norm": 0.7309209108352661, "learning_rate": 1.3401252403449472e-07, "loss": 2.0573, "step": 28515 }, { "epoch": 0.95, "grad_norm": 0.7334218621253967, "learning_rate": 1.3383915253262436e-07, "loss": 2.0946, "step": 28516 }, { "epoch": 0.95, "grad_norm": 0.7478011846542358, "learning_rate": 1.3366589249285687e-07, "loss": 1.9797, "step": 28517 }, { "epoch": 0.95, "grad_norm": 0.7408818006515503, "learning_rate": 1.3349274391715072e-07, "loss": 2.0055, "step": 28518 }, { "epoch": 0.95, "grad_norm": 0.7568060159683228, "learning_rate": 1.3331970680745986e-07, "loss": 2.0176, "step": 28519 }, { "epoch": 0.95, "grad_norm": 0.719158947467804, "learning_rate": 1.3314678116574166e-07, "loss": 2.0505, "step": 28520 }, { "epoch": 0.95, "grad_norm": 0.736534059047699, "learning_rate": 1.3297396699394893e-07, "loss": 2.0351, "step": 28521 }, { "epoch": 0.95, "grad_norm": 0.7723605632781982, "learning_rate": 1.328012642940324e-07, "loss": 2.0639, "step": 28522 }, { "epoch": 0.95, "grad_norm": 0.7540204524993896, "learning_rate": 1.326286730679449e-07, "loss": 2.0851, "step": 28523 }, { "epoch": 0.95, "grad_norm": 0.7801617980003357, "learning_rate": 1.3245619331763716e-07, "loss": 2.1001, "step": 28524 }, { "epoch": 0.95, "grad_norm": 0.7578858733177185, "learning_rate": 1.3228382504505532e-07, "loss": 2.0432, "step": 28525 }, { "epoch": 0.95, "grad_norm": 0.7669897675514221, "learning_rate": 1.3211156825214676e-07, "loss": 2.0629, "step": 28526 }, { "epoch": 0.95, "grad_norm": 0.7698136568069458, "learning_rate": 1.3193942294085992e-07, "loss": 2.007, "step": 28527 }, { "epoch": 0.95, "grad_norm": 0.729936957359314, "learning_rate": 1.317673891131377e-07, "loss": 2.0371, "step": 28528 }, { "epoch": 0.95, "grad_norm": 0.7488539814949036, "learning_rate": 1.3159546677092516e-07, "loss": 2.0207, "step": 28529 }, { "epoch": 0.95, "grad_norm": 0.7614069581031799, "learning_rate": 1.3142365591616303e-07, "loss": 2.0641, "step": 28530 }, { "epoch": 0.95, "grad_norm": 0.7549223899841309, "learning_rate": 1.3125195655079192e-07, "loss": 2.0375, "step": 28531 }, { "epoch": 0.95, "grad_norm": 0.7765169739723206, "learning_rate": 1.3108036867675256e-07, "loss": 2.0224, "step": 28532 }, { "epoch": 0.95, "grad_norm": 0.735680103302002, "learning_rate": 1.3090889229598447e-07, "loss": 2.0758, "step": 28533 }, { "epoch": 0.95, "grad_norm": 0.7738703489303589, "learning_rate": 1.3073752741042388e-07, "loss": 2.0431, "step": 28534 }, { "epoch": 0.95, "grad_norm": 0.7168803811073303, "learning_rate": 1.3056627402200594e-07, "loss": 1.9887, "step": 28535 }, { "epoch": 0.95, "grad_norm": 0.7260355353355408, "learning_rate": 1.3039513213266685e-07, "loss": 2.0265, "step": 28536 }, { "epoch": 0.95, "grad_norm": 0.7297505140304565, "learning_rate": 1.3022410174433842e-07, "loss": 2.0558, "step": 28537 }, { "epoch": 0.95, "grad_norm": 0.7644875645637512, "learning_rate": 1.3005318285895574e-07, "loss": 2.088, "step": 28538 }, { "epoch": 0.95, "grad_norm": 0.7596598863601685, "learning_rate": 1.2988237547844617e-07, "loss": 1.9719, "step": 28539 }, { "epoch": 0.95, "grad_norm": 0.7348309755325317, "learning_rate": 1.2971167960474152e-07, "loss": 2.0688, "step": 28540 }, { "epoch": 0.95, "grad_norm": 0.7433788776397705, "learning_rate": 1.295410952397691e-07, "loss": 2.062, "step": 28541 }, { "epoch": 0.95, "grad_norm": 0.7096604108810425, "learning_rate": 1.293706223854585e-07, "loss": 2.0596, "step": 28542 }, { "epoch": 0.95, "grad_norm": 0.7426185011863708, "learning_rate": 1.2920026104373261e-07, "loss": 2.0389, "step": 28543 }, { "epoch": 0.95, "grad_norm": 0.7203823924064636, "learning_rate": 1.290300112165177e-07, "loss": 2.0641, "step": 28544 }, { "epoch": 0.95, "grad_norm": 0.7482665777206421, "learning_rate": 1.2885987290573555e-07, "loss": 2.035, "step": 28545 }, { "epoch": 0.95, "grad_norm": 0.7347431778907776, "learning_rate": 1.2868984611331124e-07, "loss": 2.0475, "step": 28546 }, { "epoch": 0.95, "grad_norm": 0.727058470249176, "learning_rate": 1.2851993084116333e-07, "loss": 2.0721, "step": 28547 }, { "epoch": 0.95, "grad_norm": 0.7440776824951172, "learning_rate": 1.283501270912113e-07, "loss": 2.0266, "step": 28548 }, { "epoch": 0.95, "grad_norm": 0.7505230903625488, "learning_rate": 1.281804348653748e-07, "loss": 2.0586, "step": 28549 }, { "epoch": 0.95, "grad_norm": 0.741380512714386, "learning_rate": 1.2801085416557003e-07, "loss": 2.0498, "step": 28550 }, { "epoch": 0.95, "grad_norm": 0.752052903175354, "learning_rate": 1.2784138499371325e-07, "loss": 2.0557, "step": 28551 }, { "epoch": 0.95, "grad_norm": 0.7592599987983704, "learning_rate": 1.2767202735171958e-07, "loss": 2.0873, "step": 28552 }, { "epoch": 0.95, "grad_norm": 0.7741233706474304, "learning_rate": 1.2750278124149973e-07, "loss": 2.0656, "step": 28553 }, { "epoch": 0.95, "grad_norm": 0.7629435658454895, "learning_rate": 1.2733364666496883e-07, "loss": 2.058, "step": 28554 }, { "epoch": 0.95, "grad_norm": 0.7637726068496704, "learning_rate": 1.2716462362403649e-07, "loss": 2.0836, "step": 28555 }, { "epoch": 0.95, "grad_norm": 0.7383171916007996, "learning_rate": 1.2699571212061225e-07, "loss": 2.0243, "step": 28556 }, { "epoch": 0.95, "grad_norm": 0.7305157780647278, "learning_rate": 1.2682691215660347e-07, "loss": 1.9554, "step": 28557 }, { "epoch": 0.95, "grad_norm": 0.7961694002151489, "learning_rate": 1.2665822373391978e-07, "loss": 2.0745, "step": 28558 }, { "epoch": 0.95, "grad_norm": 0.7379926443099976, "learning_rate": 1.2648964685446296e-07, "loss": 1.9991, "step": 28559 }, { "epoch": 0.95, "grad_norm": 0.7493047118186951, "learning_rate": 1.263211815201415e-07, "loss": 2.0541, "step": 28560 }, { "epoch": 0.95, "grad_norm": 0.7484104633331299, "learning_rate": 1.2615282773285498e-07, "loss": 2.0519, "step": 28561 }, { "epoch": 0.95, "grad_norm": 0.7362362146377563, "learning_rate": 1.2598458549450853e-07, "loss": 2.0116, "step": 28562 }, { "epoch": 0.95, "grad_norm": 0.7650010585784912, "learning_rate": 1.2581645480700066e-07, "loss": 2.0445, "step": 28563 }, { "epoch": 0.95, "grad_norm": 0.7376635670661926, "learning_rate": 1.2564843567223317e-07, "loss": 2.0892, "step": 28564 }, { "epoch": 0.95, "grad_norm": 0.764620840549469, "learning_rate": 1.254805280921012e-07, "loss": 2.083, "step": 28565 }, { "epoch": 0.95, "grad_norm": 0.7527552843093872, "learning_rate": 1.2531273206850325e-07, "loss": 2.0129, "step": 28566 }, { "epoch": 0.95, "grad_norm": 0.7629365921020508, "learning_rate": 1.2514504760333557e-07, "loss": 2.0759, "step": 28567 }, { "epoch": 0.95, "grad_norm": 0.7253546118736267, "learning_rate": 1.2497747469849108e-07, "loss": 2.0146, "step": 28568 }, { "epoch": 0.95, "grad_norm": 0.8023061752319336, "learning_rate": 1.2481001335586497e-07, "loss": 2.0228, "step": 28569 }, { "epoch": 0.95, "grad_norm": 0.7748093605041504, "learning_rate": 1.2464266357734677e-07, "loss": 2.0914, "step": 28570 }, { "epoch": 0.95, "grad_norm": 0.7617517709732056, "learning_rate": 1.244754253648295e-07, "loss": 2.0394, "step": 28571 }, { "epoch": 0.95, "grad_norm": 0.7129969596862793, "learning_rate": 1.2430829872019934e-07, "loss": 2.0703, "step": 28572 }, { "epoch": 0.95, "grad_norm": 0.7633289694786072, "learning_rate": 1.2414128364534818e-07, "loss": 2.0278, "step": 28573 }, { "epoch": 0.95, "grad_norm": 0.7370293736457825, "learning_rate": 1.2397438014216e-07, "loss": 1.968, "step": 28574 }, { "epoch": 0.95, "grad_norm": 0.7366570830345154, "learning_rate": 1.2380758821252116e-07, "loss": 2.0212, "step": 28575 }, { "epoch": 0.95, "grad_norm": 0.7286252379417419, "learning_rate": 1.2364090785831672e-07, "loss": 1.9655, "step": 28576 }, { "epoch": 0.95, "grad_norm": 0.7130202054977417, "learning_rate": 1.2347433908142858e-07, "loss": 1.9784, "step": 28577 }, { "epoch": 0.95, "grad_norm": 0.7651662230491638, "learning_rate": 1.2330788188373965e-07, "loss": 2.0205, "step": 28578 }, { "epoch": 0.95, "grad_norm": 0.7573035359382629, "learning_rate": 1.2314153626712954e-07, "loss": 2.0897, "step": 28579 }, { "epoch": 0.95, "grad_norm": 0.7515241503715515, "learning_rate": 1.2297530223347898e-07, "loss": 2.016, "step": 28580 }, { "epoch": 0.95, "grad_norm": 0.7582022547721863, "learning_rate": 1.2280917978466423e-07, "loss": 2.0049, "step": 28581 }, { "epoch": 0.95, "grad_norm": 0.7236115336418152, "learning_rate": 1.2264316892256266e-07, "loss": 2.0393, "step": 28582 }, { "epoch": 0.95, "grad_norm": 0.7457336187362671, "learning_rate": 1.224772696490495e-07, "loss": 2.0485, "step": 28583 }, { "epoch": 0.95, "grad_norm": 0.7515358328819275, "learning_rate": 1.2231148196600096e-07, "loss": 2.094, "step": 28584 }, { "epoch": 0.95, "grad_norm": 0.752422571182251, "learning_rate": 1.2214580587528668e-07, "loss": 2.0487, "step": 28585 }, { "epoch": 0.95, "grad_norm": 0.7412647008895874, "learning_rate": 1.2198024137878073e-07, "loss": 2.0612, "step": 28586 }, { "epoch": 0.95, "grad_norm": 0.7286475300788879, "learning_rate": 1.218147884783527e-07, "loss": 2.0701, "step": 28587 }, { "epoch": 0.95, "grad_norm": 0.7538265585899353, "learning_rate": 1.2164944717587224e-07, "loss": 2.0521, "step": 28588 }, { "epoch": 0.95, "grad_norm": 0.7535181045532227, "learning_rate": 1.214842174732067e-07, "loss": 2.0615, "step": 28589 }, { "epoch": 0.95, "grad_norm": 0.7363185882568359, "learning_rate": 1.2131909937222354e-07, "loss": 2.0222, "step": 28590 }, { "epoch": 0.95, "grad_norm": 0.7342584133148193, "learning_rate": 1.2115409287478785e-07, "loss": 2.0144, "step": 28591 }, { "epoch": 0.95, "grad_norm": 0.7449294328689575, "learning_rate": 1.2098919798276376e-07, "loss": 2.041, "step": 28592 }, { "epoch": 0.95, "grad_norm": 0.7489109039306641, "learning_rate": 1.2082441469801308e-07, "loss": 2.0568, "step": 28593 }, { "epoch": 0.95, "grad_norm": 0.7555636167526245, "learning_rate": 1.206597430223988e-07, "loss": 2.0544, "step": 28594 }, { "epoch": 0.95, "grad_norm": 0.7566273808479309, "learning_rate": 1.2049518295778162e-07, "loss": 2.0331, "step": 28595 }, { "epoch": 0.95, "grad_norm": 0.7314335107803345, "learning_rate": 1.2033073450602006e-07, "loss": 2.0515, "step": 28596 }, { "epoch": 0.95, "grad_norm": 0.7581839561462402, "learning_rate": 1.2016639766897043e-07, "loss": 2.0605, "step": 28597 }, { "epoch": 0.95, "grad_norm": 0.7689090371131897, "learning_rate": 1.2000217244849122e-07, "loss": 2.0592, "step": 28598 }, { "epoch": 0.95, "grad_norm": 0.7555123567581177, "learning_rate": 1.1983805884643763e-07, "loss": 2.1348, "step": 28599 }, { "epoch": 0.95, "grad_norm": 0.7491744756698608, "learning_rate": 1.196740568646626e-07, "loss": 2.0613, "step": 28600 }, { "epoch": 0.95, "grad_norm": 0.7228789329528809, "learning_rate": 1.1951016650502024e-07, "loss": 1.9965, "step": 28601 }, { "epoch": 0.95, "grad_norm": 0.7371302247047424, "learning_rate": 1.1934638776936015e-07, "loss": 2.0976, "step": 28602 }, { "epoch": 0.95, "grad_norm": 0.7275402545928955, "learning_rate": 1.191827206595364e-07, "loss": 2.0328, "step": 28603 }, { "epoch": 0.95, "grad_norm": 0.726569414138794, "learning_rate": 1.1901916517739421e-07, "loss": 2.0449, "step": 28604 }, { "epoch": 0.95, "grad_norm": 0.7519346475601196, "learning_rate": 1.1885572132478207e-07, "loss": 1.9365, "step": 28605 }, { "epoch": 0.95, "grad_norm": 0.740821361541748, "learning_rate": 1.1869238910354852e-07, "loss": 2.031, "step": 28606 }, { "epoch": 0.95, "grad_norm": 0.7059916853904724, "learning_rate": 1.1852916851553542e-07, "loss": 2.0358, "step": 28607 }, { "epoch": 0.95, "grad_norm": 0.7687329053878784, "learning_rate": 1.1836605956259017e-07, "loss": 1.9872, "step": 28608 }, { "epoch": 0.95, "grad_norm": 0.7150636911392212, "learning_rate": 1.1820306224655353e-07, "loss": 2.0439, "step": 28609 }, { "epoch": 0.95, "grad_norm": 0.7502188682556152, "learning_rate": 1.1804017656926736e-07, "loss": 2.1067, "step": 28610 }, { "epoch": 0.95, "grad_norm": 0.7261785268783569, "learning_rate": 1.178774025325713e-07, "loss": 2.0485, "step": 28611 }, { "epoch": 0.95, "grad_norm": 0.7605353593826294, "learning_rate": 1.1771474013830608e-07, "loss": 2.0691, "step": 28612 }, { "epoch": 0.95, "grad_norm": 0.7837786674499512, "learning_rate": 1.1755218938830803e-07, "loss": 1.962, "step": 28613 }, { "epoch": 0.95, "grad_norm": 0.747161328792572, "learning_rate": 1.1738975028441236e-07, "loss": 2.0765, "step": 28614 }, { "epoch": 0.95, "grad_norm": 0.7233255505561829, "learning_rate": 1.1722742282845756e-07, "loss": 2.0796, "step": 28615 }, { "epoch": 0.95, "grad_norm": 0.7441611886024475, "learning_rate": 1.170652070222733e-07, "loss": 2.0386, "step": 28616 }, { "epoch": 0.95, "grad_norm": 0.7729007005691528, "learning_rate": 1.169031028676959e-07, "loss": 1.9767, "step": 28617 }, { "epoch": 0.95, "grad_norm": 0.7489969730377197, "learning_rate": 1.1674111036655389e-07, "loss": 2.0437, "step": 28618 }, { "epoch": 0.95, "grad_norm": 0.752183735370636, "learning_rate": 1.1657922952068024e-07, "loss": 2.0438, "step": 28619 }, { "epoch": 0.95, "grad_norm": 0.75187087059021, "learning_rate": 1.1641746033190015e-07, "loss": 2.0541, "step": 28620 }, { "epoch": 0.95, "grad_norm": 0.7474924921989441, "learning_rate": 1.1625580280204552e-07, "loss": 2.0781, "step": 28621 }, { "epoch": 0.95, "grad_norm": 0.7701267600059509, "learning_rate": 1.160942569329393e-07, "loss": 2.078, "step": 28622 }, { "epoch": 0.95, "grad_norm": 0.7274881601333618, "learning_rate": 1.1593282272640671e-07, "loss": 2.0423, "step": 28623 }, { "epoch": 0.95, "grad_norm": 0.8035042881965637, "learning_rate": 1.1577150018427297e-07, "loss": 2.05, "step": 28624 }, { "epoch": 0.95, "grad_norm": 0.7427250742912292, "learning_rate": 1.1561028930836105e-07, "loss": 2.0443, "step": 28625 }, { "epoch": 0.95, "grad_norm": 0.7830579280853271, "learning_rate": 1.154491901004906e-07, "loss": 2.1077, "step": 28626 }, { "epoch": 0.95, "grad_norm": 0.73859703540802, "learning_rate": 1.152882025624824e-07, "loss": 2.0516, "step": 28627 }, { "epoch": 0.95, "grad_norm": 0.780911386013031, "learning_rate": 1.15127326696155e-07, "loss": 2.1168, "step": 28628 }, { "epoch": 0.95, "grad_norm": 0.7387827038764954, "learning_rate": 1.1496656250332582e-07, "loss": 1.9209, "step": 28629 }, { "epoch": 0.95, "grad_norm": 0.737053394317627, "learning_rate": 1.148059099858112e-07, "loss": 2.0994, "step": 28630 }, { "epoch": 0.95, "grad_norm": 0.7374988794326782, "learning_rate": 1.1464536914542745e-07, "loss": 2.0541, "step": 28631 }, { "epoch": 0.95, "grad_norm": 0.741296648979187, "learning_rate": 1.1448493998398535e-07, "loss": 2.0486, "step": 28632 }, { "epoch": 0.95, "grad_norm": 0.7400513887405396, "learning_rate": 1.1432462250329901e-07, "loss": 1.9812, "step": 28633 }, { "epoch": 0.95, "grad_norm": 0.7461794018745422, "learning_rate": 1.141644167051803e-07, "loss": 2.0548, "step": 28634 }, { "epoch": 0.95, "grad_norm": 0.7359539866447449, "learning_rate": 1.1400432259143779e-07, "loss": 1.9906, "step": 28635 }, { "epoch": 0.95, "grad_norm": 0.7408196330070496, "learning_rate": 1.1384434016388113e-07, "loss": 2.0845, "step": 28636 }, { "epoch": 0.95, "grad_norm": 0.7656357884407043, "learning_rate": 1.1368446942431666e-07, "loss": 2.0003, "step": 28637 }, { "epoch": 0.95, "grad_norm": 0.7472095489501953, "learning_rate": 1.135247103745507e-07, "loss": 2.0622, "step": 28638 }, { "epoch": 0.95, "grad_norm": 0.7568441033363342, "learning_rate": 1.1336506301638961e-07, "loss": 2.0046, "step": 28639 }, { "epoch": 0.95, "grad_norm": 0.7535372376441956, "learning_rate": 1.1320552735163525e-07, "loss": 2.0237, "step": 28640 }, { "epoch": 0.95, "grad_norm": 0.7311646342277527, "learning_rate": 1.1304610338209177e-07, "loss": 2.0495, "step": 28641 }, { "epoch": 0.95, "grad_norm": 0.7639433145523071, "learning_rate": 1.128867911095577e-07, "loss": 2.0569, "step": 28642 }, { "epoch": 0.95, "grad_norm": 0.7473360896110535, "learning_rate": 1.1272759053583493e-07, "loss": 2.0265, "step": 28643 }, { "epoch": 0.95, "grad_norm": 0.7467397451400757, "learning_rate": 1.1256850166272093e-07, "loss": 2.0596, "step": 28644 }, { "epoch": 0.95, "grad_norm": 0.760664701461792, "learning_rate": 1.1240952449201315e-07, "loss": 2.0811, "step": 28645 }, { "epoch": 0.95, "grad_norm": 0.7418599128723145, "learning_rate": 1.1225065902550792e-07, "loss": 2.0668, "step": 28646 }, { "epoch": 0.95, "grad_norm": 0.7384566068649292, "learning_rate": 1.1209190526500047e-07, "loss": 2.0226, "step": 28647 }, { "epoch": 0.95, "grad_norm": 0.7502782344818115, "learning_rate": 1.1193326321228492e-07, "loss": 2.0867, "step": 28648 }, { "epoch": 0.95, "grad_norm": 0.7258604764938354, "learning_rate": 1.1177473286914986e-07, "loss": 2.0061, "step": 28649 }, { "epoch": 0.95, "grad_norm": 0.7358530163764954, "learning_rate": 1.116163142373905e-07, "loss": 1.9964, "step": 28650 }, { "epoch": 0.95, "grad_norm": 0.7093873023986816, "learning_rate": 1.114580073187943e-07, "loss": 2.0404, "step": 28651 }, { "epoch": 0.95, "grad_norm": 0.7596768140792847, "learning_rate": 1.1129981211514984e-07, "loss": 2.0179, "step": 28652 }, { "epoch": 0.95, "grad_norm": 0.7384152412414551, "learning_rate": 1.111417286282468e-07, "loss": 2.0554, "step": 28653 }, { "epoch": 0.95, "grad_norm": 0.7256574034690857, "learning_rate": 1.1098375685986707e-07, "loss": 1.978, "step": 28654 }, { "epoch": 0.95, "grad_norm": 0.7446480393409729, "learning_rate": 1.1082589681179701e-07, "loss": 2.0874, "step": 28655 }, { "epoch": 0.95, "grad_norm": 0.7562741637229919, "learning_rate": 1.1066814848582186e-07, "loss": 2.0084, "step": 28656 }, { "epoch": 0.95, "grad_norm": 0.7268078923225403, "learning_rate": 1.1051051188372131e-07, "loss": 2.0528, "step": 28657 }, { "epoch": 0.95, "grad_norm": 0.7587331533432007, "learning_rate": 1.1035298700727726e-07, "loss": 2.0468, "step": 28658 }, { "epoch": 0.95, "grad_norm": 0.7448142766952515, "learning_rate": 1.101955738582694e-07, "loss": 2.0249, "step": 28659 }, { "epoch": 0.95, "grad_norm": 0.7738903760910034, "learning_rate": 1.1003827243847631e-07, "loss": 2.0469, "step": 28660 }, { "epoch": 0.95, "grad_norm": 0.7613240480422974, "learning_rate": 1.0988108274967435e-07, "loss": 1.9857, "step": 28661 }, { "epoch": 0.95, "grad_norm": 0.7377136945724487, "learning_rate": 1.0972400479363987e-07, "loss": 2.0533, "step": 28662 }, { "epoch": 0.95, "grad_norm": 0.7845824956893921, "learning_rate": 1.0956703857214701e-07, "loss": 2.1349, "step": 28663 }, { "epoch": 0.95, "grad_norm": 0.7358260154724121, "learning_rate": 1.094101840869699e-07, "loss": 2.0449, "step": 28664 }, { "epoch": 0.95, "grad_norm": 0.767694354057312, "learning_rate": 1.0925344133987936e-07, "loss": 2.0288, "step": 28665 }, { "epoch": 0.95, "grad_norm": 0.7372912764549255, "learning_rate": 1.0909681033264841e-07, "loss": 2.0174, "step": 28666 }, { "epoch": 0.95, "grad_norm": 0.7376200556755066, "learning_rate": 1.089402910670434e-07, "loss": 2.0725, "step": 28667 }, { "epoch": 0.95, "grad_norm": 0.7449764013290405, "learning_rate": 1.0878388354483404e-07, "loss": 2.0373, "step": 28668 }, { "epoch": 0.95, "grad_norm": 0.7363483309745789, "learning_rate": 1.0862758776778892e-07, "loss": 2.0344, "step": 28669 }, { "epoch": 0.95, "grad_norm": 0.760418176651001, "learning_rate": 1.0847140373767218e-07, "loss": 2.079, "step": 28670 }, { "epoch": 0.95, "grad_norm": 0.757100522518158, "learning_rate": 1.0831533145624795e-07, "loss": 2.0561, "step": 28671 }, { "epoch": 0.95, "grad_norm": 0.7479868531227112, "learning_rate": 1.081593709252804e-07, "loss": 2.0161, "step": 28672 }, { "epoch": 0.95, "grad_norm": 0.7149615287780762, "learning_rate": 1.0800352214653031e-07, "loss": 2.0088, "step": 28673 }, { "epoch": 0.95, "grad_norm": 0.7360186576843262, "learning_rate": 1.0784778512175964e-07, "loss": 2.0431, "step": 28674 }, { "epoch": 0.95, "grad_norm": 0.755361795425415, "learning_rate": 1.0769215985272807e-07, "loss": 2.0418, "step": 28675 }, { "epoch": 0.95, "grad_norm": 0.734107255935669, "learning_rate": 1.0753664634119198e-07, "loss": 2.0343, "step": 28676 }, { "epoch": 0.95, "grad_norm": 0.7883127927780151, "learning_rate": 1.0738124458890887e-07, "loss": 2.0019, "step": 28677 }, { "epoch": 0.95, "grad_norm": 0.7369222640991211, "learning_rate": 1.0722595459763618e-07, "loss": 1.9578, "step": 28678 }, { "epoch": 0.95, "grad_norm": 0.7316192388534546, "learning_rate": 1.07070776369127e-07, "loss": 2.0608, "step": 28679 }, { "epoch": 0.95, "grad_norm": 0.7118890285491943, "learning_rate": 1.0691570990513322e-07, "loss": 2.0766, "step": 28680 }, { "epoch": 0.95, "grad_norm": 0.7194395661354065, "learning_rate": 1.0676075520740791e-07, "loss": 1.9792, "step": 28681 }, { "epoch": 0.95, "grad_norm": 0.7654778361320496, "learning_rate": 1.0660591227770189e-07, "loss": 2.1259, "step": 28682 }, { "epoch": 0.95, "grad_norm": 0.741857647895813, "learning_rate": 1.0645118111776376e-07, "loss": 1.9914, "step": 28683 }, { "epoch": 0.95, "grad_norm": 0.7469571828842163, "learning_rate": 1.0629656172934322e-07, "loss": 2.063, "step": 28684 }, { "epoch": 0.95, "grad_norm": 0.7291365265846252, "learning_rate": 1.0614205411418554e-07, "loss": 1.9764, "step": 28685 }, { "epoch": 0.95, "grad_norm": 0.7237410545349121, "learning_rate": 1.0598765827403602e-07, "loss": 2.0447, "step": 28686 }, { "epoch": 0.95, "grad_norm": 0.7562398314476013, "learning_rate": 1.0583337421063878e-07, "loss": 2.0727, "step": 28687 }, { "epoch": 0.95, "grad_norm": 0.7528773546218872, "learning_rate": 1.0567920192573911e-07, "loss": 2.0335, "step": 28688 }, { "epoch": 0.95, "grad_norm": 0.7367376089096069, "learning_rate": 1.0552514142107672e-07, "loss": 2.0688, "step": 28689 }, { "epoch": 0.95, "grad_norm": 0.7565695643424988, "learning_rate": 1.0537119269839135e-07, "loss": 2.1415, "step": 28690 }, { "epoch": 0.95, "grad_norm": 0.7651036381721497, "learning_rate": 1.0521735575942494e-07, "loss": 1.9834, "step": 28691 }, { "epoch": 0.95, "grad_norm": 0.7305153608322144, "learning_rate": 1.0506363060591385e-07, "loss": 1.955, "step": 28692 }, { "epoch": 0.95, "grad_norm": 0.7481963634490967, "learning_rate": 1.0491001723959338e-07, "loss": 2.0373, "step": 28693 }, { "epoch": 0.95, "grad_norm": 0.7346555590629578, "learning_rate": 1.0475651566220213e-07, "loss": 2.0794, "step": 28694 }, { "epoch": 0.95, "grad_norm": 0.7491049766540527, "learning_rate": 1.0460312587547094e-07, "loss": 2.0807, "step": 28695 }, { "epoch": 0.95, "grad_norm": 0.7417234182357788, "learning_rate": 1.044498478811351e-07, "loss": 2.0217, "step": 28696 }, { "epoch": 0.95, "grad_norm": 0.743399977684021, "learning_rate": 1.0429668168092655e-07, "loss": 1.9982, "step": 28697 }, { "epoch": 0.95, "grad_norm": 0.7455649971961975, "learning_rate": 1.041436272765739e-07, "loss": 2.1149, "step": 28698 }, { "epoch": 0.95, "grad_norm": 0.7458542585372925, "learning_rate": 1.0399068466980688e-07, "loss": 2.0888, "step": 28699 }, { "epoch": 0.95, "grad_norm": 0.7343627214431763, "learning_rate": 1.0383785386235301e-07, "loss": 2.0342, "step": 28700 }, { "epoch": 0.95, "grad_norm": 0.7347861528396606, "learning_rate": 1.0368513485594089e-07, "loss": 1.9451, "step": 28701 }, { "epoch": 0.95, "grad_norm": 0.7316446900367737, "learning_rate": 1.0353252765229249e-07, "loss": 2.0682, "step": 28702 }, { "epoch": 0.95, "grad_norm": 0.7549335956573486, "learning_rate": 1.033800322531342e-07, "loss": 2.0115, "step": 28703 }, { "epoch": 0.95, "grad_norm": 0.7718681693077087, "learning_rate": 1.0322764866018908e-07, "loss": 2.0675, "step": 28704 }, { "epoch": 0.96, "grad_norm": 0.7223976254463196, "learning_rate": 1.0307537687517688e-07, "loss": 2.0379, "step": 28705 }, { "epoch": 0.96, "grad_norm": 0.7304015755653381, "learning_rate": 1.0292321689981954e-07, "loss": 2.0108, "step": 28706 }, { "epoch": 0.96, "grad_norm": 0.7386050820350647, "learning_rate": 1.0277116873583571e-07, "loss": 2.1091, "step": 28707 }, { "epoch": 0.96, "grad_norm": 0.7167690396308899, "learning_rate": 1.0261923238494175e-07, "loss": 2.0387, "step": 28708 }, { "epoch": 0.96, "grad_norm": 0.7573454976081848, "learning_rate": 1.0246740784885634e-07, "loss": 2.041, "step": 28709 }, { "epoch": 0.96, "grad_norm": 0.7409628033638, "learning_rate": 1.0231569512929363e-07, "loss": 2.0235, "step": 28710 }, { "epoch": 0.96, "grad_norm": 0.7428113222122192, "learning_rate": 1.0216409422796669e-07, "loss": 2.0777, "step": 28711 }, { "epoch": 0.96, "grad_norm": 0.7730421423912048, "learning_rate": 1.0201260514658973e-07, "loss": 2.0588, "step": 28712 }, { "epoch": 0.96, "grad_norm": 0.7434766888618469, "learning_rate": 1.0186122788687358e-07, "loss": 2.0169, "step": 28713 }, { "epoch": 0.96, "grad_norm": 0.7657095193862915, "learning_rate": 1.01709962450528e-07, "loss": 2.0709, "step": 28714 }, { "epoch": 0.96, "grad_norm": 0.7505764961242676, "learning_rate": 1.0155880883926272e-07, "loss": 2.0719, "step": 28715 }, { "epoch": 0.96, "grad_norm": 0.748176097869873, "learning_rate": 1.0140776705478528e-07, "loss": 2.0719, "step": 28716 }, { "epoch": 0.96, "grad_norm": 0.7549925446510315, "learning_rate": 1.0125683709880096e-07, "loss": 2.074, "step": 28717 }, { "epoch": 0.96, "grad_norm": 0.7779072523117065, "learning_rate": 1.011060189730162e-07, "loss": 2.0837, "step": 28718 }, { "epoch": 0.96, "grad_norm": 0.7540832161903381, "learning_rate": 1.0095531267913405e-07, "loss": 2.121, "step": 28719 }, { "epoch": 0.96, "grad_norm": 0.7389533519744873, "learning_rate": 1.0080471821885762e-07, "loss": 2.0618, "step": 28720 }, { "epoch": 0.96, "grad_norm": 0.7536616921424866, "learning_rate": 1.0065423559388776e-07, "loss": 2.0346, "step": 28721 }, { "epoch": 0.96, "grad_norm": 0.7630150318145752, "learning_rate": 1.0050386480592533e-07, "loss": 2.1062, "step": 28722 }, { "epoch": 0.96, "grad_norm": 0.74809330701828, "learning_rate": 1.0035360585666786e-07, "loss": 2.0845, "step": 28723 }, { "epoch": 0.96, "grad_norm": 0.8102424740791321, "learning_rate": 1.00203458747814e-07, "loss": 2.1354, "step": 28724 }, { "epoch": 0.96, "grad_norm": 0.7356367707252502, "learning_rate": 1.0005342348105906e-07, "loss": 2.0408, "step": 28725 }, { "epoch": 0.96, "grad_norm": 0.7479951977729797, "learning_rate": 9.990350005809835e-08, "loss": 2.0782, "step": 28726 }, { "epoch": 0.96, "grad_norm": 0.7814403772354126, "learning_rate": 9.975368848062606e-08, "loss": 2.0527, "step": 28727 }, { "epoch": 0.96, "grad_norm": 0.7389352321624756, "learning_rate": 9.960398875033527e-08, "loss": 2.0229, "step": 28728 }, { "epoch": 0.96, "grad_norm": 0.7411439418792725, "learning_rate": 9.945440086891688e-08, "loss": 2.041, "step": 28729 }, { "epoch": 0.96, "grad_norm": 0.7428656816482544, "learning_rate": 9.930492483805843e-08, "loss": 2.0514, "step": 28730 }, { "epoch": 0.96, "grad_norm": 0.7575491070747375, "learning_rate": 9.915556065945186e-08, "loss": 2.0328, "step": 28731 }, { "epoch": 0.96, "grad_norm": 0.7489508390426636, "learning_rate": 9.900630833478364e-08, "loss": 2.0193, "step": 28732 }, { "epoch": 0.96, "grad_norm": 0.7498624324798584, "learning_rate": 9.885716786573907e-08, "loss": 2.0789, "step": 28733 }, { "epoch": 0.96, "grad_norm": 0.7056251764297485, "learning_rate": 9.870813925400346e-08, "loss": 2.028, "step": 28734 }, { "epoch": 0.96, "grad_norm": 0.7531627416610718, "learning_rate": 9.855922250126105e-08, "loss": 2.0822, "step": 28735 }, { "epoch": 0.96, "grad_norm": 0.7574704885482788, "learning_rate": 9.841041760919268e-08, "loss": 2.0352, "step": 28736 }, { "epoch": 0.96, "grad_norm": 0.7346788644790649, "learning_rate": 9.826172457948147e-08, "loss": 2.0344, "step": 28737 }, { "epoch": 0.96, "grad_norm": 0.7184901237487793, "learning_rate": 9.811314341380606e-08, "loss": 2.0839, "step": 28738 }, { "epoch": 0.96, "grad_norm": 0.7373314499855042, "learning_rate": 9.796467411384513e-08, "loss": 2.0052, "step": 28739 }, { "epoch": 0.96, "grad_norm": 0.7102420330047607, "learning_rate": 9.781631668127622e-08, "loss": 2.0373, "step": 28740 }, { "epoch": 0.96, "grad_norm": 0.7478048205375671, "learning_rate": 9.766807111777465e-08, "loss": 2.0628, "step": 28741 }, { "epoch": 0.96, "grad_norm": 0.7541193962097168, "learning_rate": 9.751993742501686e-08, "loss": 2.0405, "step": 28742 }, { "epoch": 0.96, "grad_norm": 0.7814239263534546, "learning_rate": 9.737191560467485e-08, "loss": 2.1096, "step": 28743 }, { "epoch": 0.96, "grad_norm": 0.7488712072372437, "learning_rate": 9.722400565842283e-08, "loss": 2.0539, "step": 28744 }, { "epoch": 0.96, "grad_norm": 0.76631760597229, "learning_rate": 9.707620758792835e-08, "loss": 2.0806, "step": 28745 }, { "epoch": 0.96, "grad_norm": 0.7401441335678101, "learning_rate": 9.692852139486453e-08, "loss": 2.0578, "step": 28746 }, { "epoch": 0.96, "grad_norm": 0.7271931171417236, "learning_rate": 9.67809470808978e-08, "loss": 1.997, "step": 28747 }, { "epoch": 0.96, "grad_norm": 0.731926441192627, "learning_rate": 9.663348464769684e-08, "loss": 2.0187, "step": 28748 }, { "epoch": 0.96, "grad_norm": 0.7533583641052246, "learning_rate": 9.648613409692587e-08, "loss": 2.0563, "step": 28749 }, { "epoch": 0.96, "grad_norm": 0.7400550842285156, "learning_rate": 9.633889543025133e-08, "loss": 2.0761, "step": 28750 }, { "epoch": 0.96, "grad_norm": 0.739736020565033, "learning_rate": 9.619176864933522e-08, "loss": 1.9838, "step": 28751 }, { "epoch": 0.96, "grad_norm": 0.737621545791626, "learning_rate": 9.604475375584065e-08, "loss": 2.0275, "step": 28752 }, { "epoch": 0.96, "grad_norm": 0.7857711911201477, "learning_rate": 9.589785075142744e-08, "loss": 1.9635, "step": 28753 }, { "epoch": 0.96, "grad_norm": 0.7478498816490173, "learning_rate": 9.575105963775755e-08, "loss": 2.0277, "step": 28754 }, { "epoch": 0.96, "grad_norm": 0.7466282248497009, "learning_rate": 9.560438041648634e-08, "loss": 2.0083, "step": 28755 }, { "epoch": 0.96, "grad_norm": 0.7439160943031311, "learning_rate": 9.545781308927249e-08, "loss": 2.0729, "step": 28756 }, { "epoch": 0.96, "grad_norm": 0.7442426085472107, "learning_rate": 9.531135765777245e-08, "loss": 2.0081, "step": 28757 }, { "epoch": 0.96, "grad_norm": 0.7315365672111511, "learning_rate": 9.516501412363821e-08, "loss": 2.0607, "step": 28758 }, { "epoch": 0.96, "grad_norm": 0.7446399331092834, "learning_rate": 9.501878248852625e-08, "loss": 1.9919, "step": 28759 }, { "epoch": 0.96, "grad_norm": 0.7262716889381409, "learning_rate": 9.487266275408746e-08, "loss": 2.0614, "step": 28760 }, { "epoch": 0.96, "grad_norm": 0.7658587694168091, "learning_rate": 9.472665492197163e-08, "loss": 1.9969, "step": 28761 }, { "epoch": 0.96, "grad_norm": 0.7941670417785645, "learning_rate": 9.458075899382858e-08, "loss": 2.0145, "step": 28762 }, { "epoch": 0.96, "grad_norm": 0.7706116437911987, "learning_rate": 9.443497497130804e-08, "loss": 2.0386, "step": 28763 }, { "epoch": 0.96, "grad_norm": 0.7372916340827942, "learning_rate": 9.428930285605654e-08, "loss": 2.0413, "step": 28764 }, { "epoch": 0.96, "grad_norm": 0.7452129125595093, "learning_rate": 9.414374264971715e-08, "loss": 1.9996, "step": 28765 }, { "epoch": 0.96, "grad_norm": 0.7453997731208801, "learning_rate": 9.399829435393748e-08, "loss": 2.0657, "step": 28766 }, { "epoch": 0.96, "grad_norm": 0.7542822957038879, "learning_rate": 9.385295797035953e-08, "loss": 2.0721, "step": 28767 }, { "epoch": 0.96, "grad_norm": 0.7773736119270325, "learning_rate": 9.370773350062534e-08, "loss": 2.0066, "step": 28768 }, { "epoch": 0.96, "grad_norm": 0.7260607481002808, "learning_rate": 9.356262094637469e-08, "loss": 2.084, "step": 28769 }, { "epoch": 0.96, "grad_norm": 0.7489703297615051, "learning_rate": 9.341762030924962e-08, "loss": 2.0275, "step": 28770 }, { "epoch": 0.96, "grad_norm": 0.780580997467041, "learning_rate": 9.327273159088434e-08, "loss": 2.1097, "step": 28771 }, { "epoch": 0.96, "grad_norm": 0.7455532550811768, "learning_rate": 9.31279547929198e-08, "loss": 2.0526, "step": 28772 }, { "epoch": 0.96, "grad_norm": 0.7439217567443848, "learning_rate": 9.298328991698802e-08, "loss": 2.1277, "step": 28773 }, { "epoch": 0.96, "grad_norm": 0.7304056882858276, "learning_rate": 9.283873696472545e-08, "loss": 2.1337, "step": 28774 }, { "epoch": 0.96, "grad_norm": 0.7803228497505188, "learning_rate": 9.269429593776413e-08, "loss": 2.0867, "step": 28775 }, { "epoch": 0.96, "grad_norm": 0.7476922869682312, "learning_rate": 9.254996683773831e-08, "loss": 2.0926, "step": 28776 }, { "epoch": 0.96, "grad_norm": 0.745601236820221, "learning_rate": 9.240574966627558e-08, "loss": 2.0397, "step": 28777 }, { "epoch": 0.96, "grad_norm": 0.7883411645889282, "learning_rate": 9.226164442500573e-08, "loss": 2.0313, "step": 28778 }, { "epoch": 0.96, "grad_norm": 0.7247946858406067, "learning_rate": 9.211765111555748e-08, "loss": 2.0898, "step": 28779 }, { "epoch": 0.96, "grad_norm": 0.7257028818130493, "learning_rate": 9.19737697395573e-08, "loss": 2.0647, "step": 28780 }, { "epoch": 0.96, "grad_norm": 0.7526800036430359, "learning_rate": 9.183000029863165e-08, "loss": 2.1105, "step": 28781 }, { "epoch": 0.96, "grad_norm": 0.7623597979545593, "learning_rate": 9.16863427944037e-08, "loss": 2.0071, "step": 28782 }, { "epoch": 0.96, "grad_norm": 0.7737778425216675, "learning_rate": 9.154279722849546e-08, "loss": 2.0657, "step": 28783 }, { "epoch": 0.96, "grad_norm": 0.7557483315467834, "learning_rate": 9.13993636025301e-08, "loss": 2.0217, "step": 28784 }, { "epoch": 0.96, "grad_norm": 0.764198362827301, "learning_rate": 9.125604191812854e-08, "loss": 2.0632, "step": 28785 }, { "epoch": 0.96, "grad_norm": 0.8041329383850098, "learning_rate": 9.111283217690948e-08, "loss": 2.0307, "step": 28786 }, { "epoch": 0.96, "grad_norm": 0.7491889595985413, "learning_rate": 9.09697343804894e-08, "loss": 1.9997, "step": 28787 }, { "epoch": 0.96, "grad_norm": 0.7481538653373718, "learning_rate": 9.082674853048589e-08, "loss": 2.0222, "step": 28788 }, { "epoch": 0.96, "grad_norm": 0.7227540612220764, "learning_rate": 9.068387462851435e-08, "loss": 2.1069, "step": 28789 }, { "epoch": 0.96, "grad_norm": 0.7346522212028503, "learning_rate": 9.054111267619015e-08, "loss": 2.0064, "step": 28790 }, { "epoch": 0.96, "grad_norm": 0.7587025165557861, "learning_rate": 9.03984626751242e-08, "loss": 2.0379, "step": 28791 }, { "epoch": 0.96, "grad_norm": 0.7325794100761414, "learning_rate": 9.025592462692856e-08, "loss": 2.0525, "step": 28792 }, { "epoch": 0.96, "grad_norm": 0.7329788208007812, "learning_rate": 9.011349853321416e-08, "loss": 2.0313, "step": 28793 }, { "epoch": 0.96, "grad_norm": 0.7759432196617126, "learning_rate": 8.997118439558972e-08, "loss": 2.0632, "step": 28794 }, { "epoch": 0.96, "grad_norm": 0.7502753734588623, "learning_rate": 8.982898221566394e-08, "loss": 2.1322, "step": 28795 }, { "epoch": 0.96, "grad_norm": 0.7386521697044373, "learning_rate": 8.96868919950411e-08, "loss": 2.0316, "step": 28796 }, { "epoch": 0.96, "grad_norm": 0.7299783229827881, "learning_rate": 8.95449137353277e-08, "loss": 2.0106, "step": 28797 }, { "epoch": 0.96, "grad_norm": 0.7445585131645203, "learning_rate": 8.9403047438128e-08, "loss": 2.0608, "step": 28798 }, { "epoch": 0.96, "grad_norm": 0.7425210475921631, "learning_rate": 8.926129310504516e-08, "loss": 2.0346, "step": 28799 }, { "epoch": 0.96, "grad_norm": 0.7514474987983704, "learning_rate": 8.911965073767903e-08, "loss": 2.0221, "step": 28800 }, { "epoch": 0.96, "grad_norm": 0.7600212097167969, "learning_rate": 8.897812033763165e-08, "loss": 2.044, "step": 28801 }, { "epoch": 0.96, "grad_norm": 0.7252838611602783, "learning_rate": 8.883670190649951e-08, "loss": 2.034, "step": 28802 }, { "epoch": 0.96, "grad_norm": 0.7614043354988098, "learning_rate": 8.869539544588357e-08, "loss": 1.9935, "step": 28803 }, { "epoch": 0.96, "grad_norm": 0.7334284782409668, "learning_rate": 8.855420095737699e-08, "loss": 2.0823, "step": 28804 }, { "epoch": 0.96, "grad_norm": 0.7646724581718445, "learning_rate": 8.841311844257738e-08, "loss": 2.0408, "step": 28805 }, { "epoch": 0.96, "grad_norm": 0.7628721594810486, "learning_rate": 8.82721479030768e-08, "loss": 2.0496, "step": 28806 }, { "epoch": 0.96, "grad_norm": 0.7408182621002197, "learning_rate": 8.813128934046955e-08, "loss": 2.0724, "step": 28807 }, { "epoch": 0.96, "grad_norm": 0.7715884447097778, "learning_rate": 8.799054275634545e-08, "loss": 2.0907, "step": 28808 }, { "epoch": 0.96, "grad_norm": 0.7652683854103088, "learning_rate": 8.784990815229543e-08, "loss": 2.0904, "step": 28809 }, { "epoch": 0.96, "grad_norm": 0.7156738042831421, "learning_rate": 8.770938552990826e-08, "loss": 2.0998, "step": 28810 }, { "epoch": 0.96, "grad_norm": 0.7185529470443726, "learning_rate": 8.756897489077043e-08, "loss": 2.0151, "step": 28811 }, { "epoch": 0.96, "grad_norm": 0.765351414680481, "learning_rate": 8.742867623647067e-08, "loss": 2.0301, "step": 28812 }, { "epoch": 0.96, "grad_norm": 0.7472516298294067, "learning_rate": 8.728848956859104e-08, "loss": 2.0616, "step": 28813 }, { "epoch": 0.96, "grad_norm": 0.7201507687568665, "learning_rate": 8.714841488871695e-08, "loss": 2.0104, "step": 28814 }, { "epoch": 0.96, "grad_norm": 0.7690840363502502, "learning_rate": 8.700845219843046e-08, "loss": 2.077, "step": 28815 }, { "epoch": 0.96, "grad_norm": 0.7646406292915344, "learning_rate": 8.686860149931253e-08, "loss": 2.0638, "step": 28816 }, { "epoch": 0.96, "grad_norm": 0.7455672025680542, "learning_rate": 8.672886279294523e-08, "loss": 2.0354, "step": 28817 }, { "epoch": 0.96, "grad_norm": 0.7456795573234558, "learning_rate": 8.658923608090397e-08, "loss": 2.0927, "step": 28818 }, { "epoch": 0.96, "grad_norm": 0.7295423746109009, "learning_rate": 8.644972136476748e-08, "loss": 2.0254, "step": 28819 }, { "epoch": 0.96, "grad_norm": 0.7496533989906311, "learning_rate": 8.631031864611228e-08, "loss": 2.1402, "step": 28820 }, { "epoch": 0.96, "grad_norm": 0.7263340950012207, "learning_rate": 8.617102792651377e-08, "loss": 2.0235, "step": 28821 }, { "epoch": 0.96, "grad_norm": 0.7462524175643921, "learning_rate": 8.603184920754404e-08, "loss": 2.058, "step": 28822 }, { "epoch": 0.96, "grad_norm": 0.7780177593231201, "learning_rate": 8.589278249077737e-08, "loss": 2.1657, "step": 28823 }, { "epoch": 0.96, "grad_norm": 0.7397934198379517, "learning_rate": 8.575382777778252e-08, "loss": 2.0823, "step": 28824 }, { "epoch": 0.96, "grad_norm": 0.7458871603012085, "learning_rate": 8.561498507013266e-08, "loss": 2.085, "step": 28825 }, { "epoch": 0.96, "grad_norm": 0.7458474636077881, "learning_rate": 8.547625436939211e-08, "loss": 2.1151, "step": 28826 }, { "epoch": 0.96, "grad_norm": 0.7505542039871216, "learning_rate": 8.533763567713183e-08, "loss": 2.0463, "step": 28827 }, { "epoch": 0.96, "grad_norm": 0.7505803108215332, "learning_rate": 8.519912899491501e-08, "loss": 2.0369, "step": 28828 }, { "epoch": 0.96, "grad_norm": 0.7204444408416748, "learning_rate": 8.506073432430929e-08, "loss": 2.085, "step": 28829 }, { "epoch": 0.96, "grad_norm": 0.7758601903915405, "learning_rate": 8.492245166687562e-08, "loss": 2.0631, "step": 28830 }, { "epoch": 0.96, "grad_norm": 0.7837344408035278, "learning_rate": 8.478428102417725e-08, "loss": 2.0946, "step": 28831 }, { "epoch": 0.96, "grad_norm": 0.7752772569656372, "learning_rate": 8.46462223977762e-08, "loss": 2.0706, "step": 28832 }, { "epoch": 0.96, "grad_norm": 0.7393060326576233, "learning_rate": 8.450827578923016e-08, "loss": 2.0737, "step": 28833 }, { "epoch": 0.96, "grad_norm": 0.7521793246269226, "learning_rate": 8.437044120009897e-08, "loss": 2.0223, "step": 28834 }, { "epoch": 0.96, "grad_norm": 0.7673332691192627, "learning_rate": 8.423271863193915e-08, "loss": 2.1169, "step": 28835 }, { "epoch": 0.96, "grad_norm": 0.7373728156089783, "learning_rate": 8.409510808630728e-08, "loss": 2.0116, "step": 28836 }, { "epoch": 0.96, "grad_norm": 0.7135067582130432, "learning_rate": 8.395760956475763e-08, "loss": 2.038, "step": 28837 }, { "epoch": 0.96, "grad_norm": 0.7294144034385681, "learning_rate": 8.38202230688434e-08, "loss": 2.0488, "step": 28838 }, { "epoch": 0.96, "grad_norm": 0.7496734261512756, "learning_rate": 8.368294860011672e-08, "loss": 2.0726, "step": 28839 }, { "epoch": 0.96, "grad_norm": 0.7476152777671814, "learning_rate": 8.354578616012854e-08, "loss": 2.0577, "step": 28840 }, { "epoch": 0.96, "grad_norm": 0.7320188283920288, "learning_rate": 8.340873575042874e-08, "loss": 2.0108, "step": 28841 }, { "epoch": 0.96, "grad_norm": 0.7118979692459106, "learning_rate": 8.327179737256496e-08, "loss": 2.0068, "step": 28842 }, { "epoch": 0.96, "grad_norm": 0.7497416138648987, "learning_rate": 8.313497102808487e-08, "loss": 2.0222, "step": 28843 }, { "epoch": 0.96, "grad_norm": 0.7564935684204102, "learning_rate": 8.29982567185339e-08, "loss": 2.0486, "step": 28844 }, { "epoch": 0.96, "grad_norm": 0.7485154867172241, "learning_rate": 8.286165444545635e-08, "loss": 2.0586, "step": 28845 }, { "epoch": 0.96, "grad_norm": 0.7184957265853882, "learning_rate": 8.272516421039656e-08, "loss": 1.9907, "step": 28846 }, { "epoch": 0.96, "grad_norm": 0.7209492325782776, "learning_rate": 8.25887860148955e-08, "loss": 2.0628, "step": 28847 }, { "epoch": 0.96, "grad_norm": 0.7558860182762146, "learning_rate": 8.245251986049307e-08, "loss": 2.0364, "step": 28848 }, { "epoch": 0.96, "grad_norm": 0.7647889852523804, "learning_rate": 8.231636574873137e-08, "loss": 2.013, "step": 28849 }, { "epoch": 0.96, "grad_norm": 0.7553786635398865, "learning_rate": 8.218032368114692e-08, "loss": 2.083, "step": 28850 }, { "epoch": 0.96, "grad_norm": 0.7671339511871338, "learning_rate": 8.20443936592763e-08, "loss": 2.1382, "step": 28851 }, { "epoch": 0.96, "grad_norm": 0.7357126474380493, "learning_rate": 8.190857568465605e-08, "loss": 2.0789, "step": 28852 }, { "epoch": 0.96, "grad_norm": 0.7759916186332703, "learning_rate": 8.177286975881938e-08, "loss": 2.0316, "step": 28853 }, { "epoch": 0.96, "grad_norm": 0.7308028340339661, "learning_rate": 8.163727588329951e-08, "loss": 2.0366, "step": 28854 }, { "epoch": 0.96, "grad_norm": 0.7526029348373413, "learning_rate": 8.150179405963077e-08, "loss": 2.0154, "step": 28855 }, { "epoch": 0.96, "grad_norm": 0.7430981397628784, "learning_rate": 8.136642428934083e-08, "loss": 1.9436, "step": 28856 }, { "epoch": 0.96, "grad_norm": 0.7524028420448303, "learning_rate": 8.123116657396068e-08, "loss": 2.043, "step": 28857 }, { "epoch": 0.96, "grad_norm": 0.7466210722923279, "learning_rate": 8.1096020915018e-08, "loss": 2.0193, "step": 28858 }, { "epoch": 0.96, "grad_norm": 0.7538317441940308, "learning_rate": 8.096098731403823e-08, "loss": 1.9666, "step": 28859 }, { "epoch": 0.96, "grad_norm": 0.7528130412101746, "learning_rate": 8.082606577254904e-08, "loss": 2.0744, "step": 28860 }, { "epoch": 0.96, "grad_norm": 0.7222666144371033, "learning_rate": 8.069125629207475e-08, "loss": 1.976, "step": 28861 }, { "epoch": 0.96, "grad_norm": 0.7432643175125122, "learning_rate": 8.055655887413527e-08, "loss": 2.0001, "step": 28862 }, { "epoch": 0.96, "grad_norm": 0.7185947299003601, "learning_rate": 8.042197352025494e-08, "loss": 2.0092, "step": 28863 }, { "epoch": 0.96, "grad_norm": 0.7216563820838928, "learning_rate": 8.028750023195475e-08, "loss": 2.017, "step": 28864 }, { "epoch": 0.96, "grad_norm": 0.7504189610481262, "learning_rate": 8.015313901075239e-08, "loss": 2.042, "step": 28865 }, { "epoch": 0.96, "grad_norm": 0.756300151348114, "learning_rate": 8.001888985816553e-08, "loss": 2.0928, "step": 28866 }, { "epoch": 0.96, "grad_norm": 0.7267733216285706, "learning_rate": 7.988475277571295e-08, "loss": 2.067, "step": 28867 }, { "epoch": 0.96, "grad_norm": 0.7821259498596191, "learning_rate": 7.975072776490789e-08, "loss": 2.0662, "step": 28868 }, { "epoch": 0.96, "grad_norm": 0.7699817419052124, "learning_rate": 7.96168148272658e-08, "loss": 2.1177, "step": 28869 }, { "epoch": 0.96, "grad_norm": 0.7503629326820374, "learning_rate": 7.948301396429769e-08, "loss": 2.0629, "step": 28870 }, { "epoch": 0.96, "grad_norm": 0.7410330176353455, "learning_rate": 7.934932517751793e-08, "loss": 2.0228, "step": 28871 }, { "epoch": 0.96, "grad_norm": 0.7521970868110657, "learning_rate": 7.921574846843417e-08, "loss": 2.0508, "step": 28872 }, { "epoch": 0.96, "grad_norm": 0.7348074913024902, "learning_rate": 7.908228383855743e-08, "loss": 2.0049, "step": 28873 }, { "epoch": 0.96, "grad_norm": 0.7255772352218628, "learning_rate": 7.89489312893954e-08, "loss": 1.954, "step": 28874 }, { "epoch": 0.96, "grad_norm": 0.7951341867446899, "learning_rate": 7.881569082245244e-08, "loss": 2.0055, "step": 28875 }, { "epoch": 0.96, "grad_norm": 0.7480822801589966, "learning_rate": 7.868256243923622e-08, "loss": 2.0816, "step": 28876 }, { "epoch": 0.96, "grad_norm": 0.749695897102356, "learning_rate": 7.85495461412511e-08, "loss": 2.0095, "step": 28877 }, { "epoch": 0.96, "grad_norm": 0.7443879842758179, "learning_rate": 7.841664192999809e-08, "loss": 2.0369, "step": 28878 }, { "epoch": 0.96, "grad_norm": 0.774549126625061, "learning_rate": 7.828384980697822e-08, "loss": 2.0776, "step": 28879 }, { "epoch": 0.96, "grad_norm": 0.7527526617050171, "learning_rate": 7.815116977369363e-08, "loss": 2.0417, "step": 28880 }, { "epoch": 0.96, "grad_norm": 0.7556657195091248, "learning_rate": 7.8018601831642e-08, "loss": 2.083, "step": 28881 }, { "epoch": 0.96, "grad_norm": 0.7575960159301758, "learning_rate": 7.788614598232103e-08, "loss": 2.064, "step": 28882 }, { "epoch": 0.96, "grad_norm": 0.7440369129180908, "learning_rate": 7.775380222722839e-08, "loss": 2.0583, "step": 28883 }, { "epoch": 0.96, "grad_norm": 0.7180289030075073, "learning_rate": 7.762157056785735e-08, "loss": 2.0367, "step": 28884 }, { "epoch": 0.96, "grad_norm": 0.7400633096694946, "learning_rate": 7.748945100570226e-08, "loss": 2.039, "step": 28885 }, { "epoch": 0.96, "grad_norm": 0.7503394484519958, "learning_rate": 7.73574435422575e-08, "loss": 2.0293, "step": 28886 }, { "epoch": 0.96, "grad_norm": 0.7502999305725098, "learning_rate": 7.722554817901296e-08, "loss": 2.0681, "step": 28887 }, { "epoch": 0.96, "grad_norm": 0.7551541328430176, "learning_rate": 7.709376491745857e-08, "loss": 2.0625, "step": 28888 }, { "epoch": 0.96, "grad_norm": 0.7333679795265198, "learning_rate": 7.696209375908315e-08, "loss": 2.0788, "step": 28889 }, { "epoch": 0.96, "grad_norm": 0.780125081539154, "learning_rate": 7.683053470537439e-08, "loss": 1.9942, "step": 28890 }, { "epoch": 0.96, "grad_norm": 0.741783857345581, "learning_rate": 7.669908775781887e-08, "loss": 2.0695, "step": 28891 }, { "epoch": 0.96, "grad_norm": 0.7597998976707458, "learning_rate": 7.656775291790208e-08, "loss": 1.9876, "step": 28892 }, { "epoch": 0.96, "grad_norm": 0.7643751502037048, "learning_rate": 7.643653018710729e-08, "loss": 2.0536, "step": 28893 }, { "epoch": 0.96, "grad_norm": 0.7884009480476379, "learning_rate": 7.63054195669155e-08, "loss": 2.0477, "step": 28894 }, { "epoch": 0.96, "grad_norm": 0.7447836995124817, "learning_rate": 7.617442105881001e-08, "loss": 2.1086, "step": 28895 }, { "epoch": 0.96, "grad_norm": 0.7266417741775513, "learning_rate": 7.604353466427072e-08, "loss": 2.0646, "step": 28896 }, { "epoch": 0.96, "grad_norm": 0.7657048106193542, "learning_rate": 7.591276038477425e-08, "loss": 2.0485, "step": 28897 }, { "epoch": 0.96, "grad_norm": 0.7509279251098633, "learning_rate": 7.578209822179938e-08, "loss": 2.0076, "step": 28898 }, { "epoch": 0.96, "grad_norm": 0.7228952050209045, "learning_rate": 7.565154817682274e-08, "loss": 2.0287, "step": 28899 }, { "epoch": 0.96, "grad_norm": 0.7269327640533447, "learning_rate": 7.552111025131869e-08, "loss": 2.0283, "step": 28900 }, { "epoch": 0.96, "grad_norm": 0.7704408764839172, "learning_rate": 7.539078444676051e-08, "loss": 1.9756, "step": 28901 }, { "epoch": 0.96, "grad_norm": 0.7412946224212646, "learning_rate": 7.526057076462145e-08, "loss": 1.9986, "step": 28902 }, { "epoch": 0.96, "grad_norm": 0.7515783905982971, "learning_rate": 7.513046920637146e-08, "loss": 2.0649, "step": 28903 }, { "epoch": 0.96, "grad_norm": 0.7565885186195374, "learning_rate": 7.500047977348046e-08, "loss": 1.9703, "step": 28904 }, { "epoch": 0.96, "grad_norm": 0.7550889253616333, "learning_rate": 7.48706024674184e-08, "loss": 2.0268, "step": 28905 }, { "epoch": 0.96, "grad_norm": 0.7521265149116516, "learning_rate": 7.474083728965076e-08, "loss": 2.04, "step": 28906 }, { "epoch": 0.96, "grad_norm": 0.7509969472885132, "learning_rate": 7.461118424164415e-08, "loss": 2.0367, "step": 28907 }, { "epoch": 0.96, "grad_norm": 0.7306939363479614, "learning_rate": 7.448164332486519e-08, "loss": 2.0887, "step": 28908 }, { "epoch": 0.96, "grad_norm": 0.7440593838691711, "learning_rate": 7.435221454077491e-08, "loss": 2.0922, "step": 28909 }, { "epoch": 0.96, "grad_norm": 0.704559862613678, "learning_rate": 7.422289789083548e-08, "loss": 2.0396, "step": 28910 }, { "epoch": 0.96, "grad_norm": 0.7528597712516785, "learning_rate": 7.409369337650907e-08, "loss": 2.0287, "step": 28911 }, { "epoch": 0.96, "grad_norm": 0.7378642559051514, "learning_rate": 7.396460099925451e-08, "loss": 2.0287, "step": 28912 }, { "epoch": 0.96, "grad_norm": 0.8209337592124939, "learning_rate": 7.383562076053174e-08, "loss": 2.0111, "step": 28913 }, { "epoch": 0.96, "grad_norm": 0.7397079467773438, "learning_rate": 7.370675266179628e-08, "loss": 2.0299, "step": 28914 }, { "epoch": 0.96, "grad_norm": 0.7886514067649841, "learning_rate": 7.357799670450472e-08, "loss": 2.0332, "step": 28915 }, { "epoch": 0.96, "grad_norm": 0.7646514773368835, "learning_rate": 7.344935289011146e-08, "loss": 2.0312, "step": 28916 }, { "epoch": 0.96, "grad_norm": 0.7521873116493225, "learning_rate": 7.332082122006979e-08, "loss": 2.0332, "step": 28917 }, { "epoch": 0.96, "grad_norm": 0.7282657027244568, "learning_rate": 7.319240169583186e-08, "loss": 2.1085, "step": 28918 }, { "epoch": 0.96, "grad_norm": 0.7328762412071228, "learning_rate": 7.306409431884875e-08, "loss": 2.0806, "step": 28919 }, { "epoch": 0.96, "grad_norm": 0.7582875490188599, "learning_rate": 7.29358990905693e-08, "loss": 2.1055, "step": 28920 }, { "epoch": 0.96, "grad_norm": 0.7659229636192322, "learning_rate": 7.280781601244235e-08, "loss": 2.0365, "step": 28921 }, { "epoch": 0.96, "grad_norm": 0.74201899766922, "learning_rate": 7.267984508591452e-08, "loss": 2.0379, "step": 28922 }, { "epoch": 0.96, "grad_norm": 0.7303563952445984, "learning_rate": 7.255198631243243e-08, "loss": 2.0675, "step": 28923 }, { "epoch": 0.96, "grad_norm": 0.7446170449256897, "learning_rate": 7.242423969344048e-08, "loss": 2.0954, "step": 28924 }, { "epoch": 0.96, "grad_norm": 0.7393648624420166, "learning_rate": 7.229660523037974e-08, "loss": 2.0608, "step": 28925 }, { "epoch": 0.96, "grad_norm": 0.7304803133010864, "learning_rate": 7.21690829246946e-08, "loss": 1.953, "step": 28926 }, { "epoch": 0.96, "grad_norm": 0.7664514183998108, "learning_rate": 7.204167277782503e-08, "loss": 2.0564, "step": 28927 }, { "epoch": 0.96, "grad_norm": 0.7689447402954102, "learning_rate": 7.191437479120989e-08, "loss": 2.0857, "step": 28928 }, { "epoch": 0.96, "grad_norm": 0.7483336925506592, "learning_rate": 7.1787188966288e-08, "loss": 2.0231, "step": 28929 }, { "epoch": 0.96, "grad_norm": 0.7294921875, "learning_rate": 7.166011530449601e-08, "loss": 2.0525, "step": 28930 }, { "epoch": 0.96, "grad_norm": 0.753108561038971, "learning_rate": 7.153315380726944e-08, "loss": 2.024, "step": 28931 }, { "epoch": 0.96, "grad_norm": 0.7425176501274109, "learning_rate": 7.140630447604268e-08, "loss": 2.0762, "step": 28932 }, { "epoch": 0.96, "grad_norm": 0.7400618195533752, "learning_rate": 7.127956731224794e-08, "loss": 1.9791, "step": 28933 }, { "epoch": 0.96, "grad_norm": 0.7528064250946045, "learning_rate": 7.11529423173185e-08, "loss": 2.0539, "step": 28934 }, { "epoch": 0.96, "grad_norm": 0.7512118816375732, "learning_rate": 7.102642949268435e-08, "loss": 2.0406, "step": 28935 }, { "epoch": 0.96, "grad_norm": 0.7711220383644104, "learning_rate": 7.090002883977431e-08, "loss": 2.0427, "step": 28936 }, { "epoch": 0.96, "grad_norm": 0.7332038879394531, "learning_rate": 7.077374036001728e-08, "loss": 1.9904, "step": 28937 }, { "epoch": 0.96, "grad_norm": 0.7384692430496216, "learning_rate": 7.064756405483986e-08, "loss": 2.028, "step": 28938 }, { "epoch": 0.96, "grad_norm": 0.722040593624115, "learning_rate": 7.052149992566537e-08, "loss": 2.0499, "step": 28939 }, { "epoch": 0.96, "grad_norm": 0.7493895292282104, "learning_rate": 7.039554797392157e-08, "loss": 2.0283, "step": 28940 }, { "epoch": 0.96, "grad_norm": 0.7240380048751831, "learning_rate": 7.026970820102841e-08, "loss": 1.9974, "step": 28941 }, { "epoch": 0.96, "grad_norm": 0.736992597579956, "learning_rate": 7.014398060840921e-08, "loss": 1.9494, "step": 28942 }, { "epoch": 0.96, "grad_norm": 0.7390832901000977, "learning_rate": 7.001836519748285e-08, "loss": 2.0885, "step": 28943 }, { "epoch": 0.96, "grad_norm": 0.7628918290138245, "learning_rate": 6.989286196967038e-08, "loss": 2.0416, "step": 28944 }, { "epoch": 0.96, "grad_norm": 0.737324595451355, "learning_rate": 6.976747092638848e-08, "loss": 2.0399, "step": 28945 }, { "epoch": 0.96, "grad_norm": 0.7331181764602661, "learning_rate": 6.964219206905487e-08, "loss": 2.0881, "step": 28946 }, { "epoch": 0.96, "grad_norm": 0.7519974112510681, "learning_rate": 6.951702539908289e-08, "loss": 1.9593, "step": 28947 }, { "epoch": 0.96, "grad_norm": 0.7352447509765625, "learning_rate": 6.939197091788807e-08, "loss": 2.1293, "step": 28948 }, { "epoch": 0.96, "grad_norm": 0.7299389243125916, "learning_rate": 6.926702862688262e-08, "loss": 2.0609, "step": 28949 }, { "epoch": 0.96, "grad_norm": 0.7379013895988464, "learning_rate": 6.914219852747872e-08, "loss": 2.0223, "step": 28950 }, { "epoch": 0.96, "grad_norm": 0.7552069425582886, "learning_rate": 6.901748062108638e-08, "loss": 2.0489, "step": 28951 }, { "epoch": 0.96, "grad_norm": 0.7454066872596741, "learning_rate": 6.889287490911445e-08, "loss": 2.0947, "step": 28952 }, { "epoch": 0.96, "grad_norm": 0.8056999444961548, "learning_rate": 6.87683813929696e-08, "loss": 2.0257, "step": 28953 }, { "epoch": 0.96, "grad_norm": 0.7525133490562439, "learning_rate": 6.864400007405957e-08, "loss": 2.0127, "step": 28954 }, { "epoch": 0.96, "grad_norm": 0.729185938835144, "learning_rate": 6.851973095378994e-08, "loss": 2.0215, "step": 28955 }, { "epoch": 0.96, "grad_norm": 0.732745885848999, "learning_rate": 6.8395574033564e-08, "loss": 2.0966, "step": 28956 }, { "epoch": 0.96, "grad_norm": 0.7138034105300903, "learning_rate": 6.827152931478398e-08, "loss": 2.0603, "step": 28957 }, { "epoch": 0.96, "grad_norm": 0.7614477872848511, "learning_rate": 6.814759679885207e-08, "loss": 2.0164, "step": 28958 }, { "epoch": 0.96, "grad_norm": 0.7697935104370117, "learning_rate": 6.802377648716718e-08, "loss": 1.9745, "step": 28959 }, { "epoch": 0.96, "grad_norm": 0.7562699913978577, "learning_rate": 6.790006838112928e-08, "loss": 1.9992, "step": 28960 }, { "epoch": 0.96, "grad_norm": 0.7690843939781189, "learning_rate": 6.777647248213503e-08, "loss": 2.0162, "step": 28961 }, { "epoch": 0.96, "grad_norm": 0.7489324808120728, "learning_rate": 6.765298879158332e-08, "loss": 2.0266, "step": 28962 }, { "epoch": 0.96, "grad_norm": 0.761817991733551, "learning_rate": 6.752961731086527e-08, "loss": 2.0728, "step": 28963 }, { "epoch": 0.96, "grad_norm": 0.7664352655410767, "learning_rate": 6.740635804137752e-08, "loss": 2.0059, "step": 28964 }, { "epoch": 0.96, "grad_norm": 0.7590832114219666, "learning_rate": 6.728321098451119e-08, "loss": 2.0217, "step": 28965 }, { "epoch": 0.96, "grad_norm": 0.7573423385620117, "learning_rate": 6.716017614165848e-08, "loss": 2.0474, "step": 28966 }, { "epoch": 0.96, "grad_norm": 0.7522915601730347, "learning_rate": 6.703725351420832e-08, "loss": 2.1411, "step": 28967 }, { "epoch": 0.96, "grad_norm": 0.7393189072608948, "learning_rate": 6.691444310355067e-08, "loss": 2.0408, "step": 28968 }, { "epoch": 0.96, "grad_norm": 0.754711389541626, "learning_rate": 6.67917449110722e-08, "loss": 2.0062, "step": 28969 }, { "epoch": 0.96, "grad_norm": 0.7508113384246826, "learning_rate": 6.66691589381585e-08, "loss": 2.0786, "step": 28970 }, { "epoch": 0.96, "grad_norm": 0.741977870464325, "learning_rate": 6.654668518619623e-08, "loss": 2.0619, "step": 28971 }, { "epoch": 0.96, "grad_norm": 0.7559650540351868, "learning_rate": 6.642432365656648e-08, "loss": 2.0401, "step": 28972 }, { "epoch": 0.96, "grad_norm": 0.7487648725509644, "learning_rate": 6.630207435065372e-08, "loss": 2.1132, "step": 28973 }, { "epoch": 0.96, "grad_norm": 0.765039324760437, "learning_rate": 6.617993726983907e-08, "loss": 2.054, "step": 28974 }, { "epoch": 0.96, "grad_norm": 0.7337497472763062, "learning_rate": 6.605791241550142e-08, "loss": 2.0492, "step": 28975 }, { "epoch": 0.96, "grad_norm": 0.7413203120231628, "learning_rate": 6.593599978901855e-08, "loss": 2.0437, "step": 28976 }, { "epoch": 0.96, "grad_norm": 0.7204717397689819, "learning_rate": 6.581419939176936e-08, "loss": 2.0033, "step": 28977 }, { "epoch": 0.96, "grad_norm": 0.7481163740158081, "learning_rate": 6.569251122513054e-08, "loss": 2.0207, "step": 28978 }, { "epoch": 0.96, "grad_norm": 0.739377498626709, "learning_rate": 6.557093529047432e-08, "loss": 2.0259, "step": 28979 }, { "epoch": 0.96, "grad_norm": 0.7433655858039856, "learning_rate": 6.544947158917625e-08, "loss": 2.0141, "step": 28980 }, { "epoch": 0.96, "grad_norm": 0.737528920173645, "learning_rate": 6.532812012260748e-08, "loss": 2.0004, "step": 28981 }, { "epoch": 0.96, "grad_norm": 0.7599485516548157, "learning_rate": 6.520688089213912e-08, "loss": 2.0238, "step": 28982 }, { "epoch": 0.96, "grad_norm": 0.7532410025596619, "learning_rate": 6.508575389914118e-08, "loss": 2.0821, "step": 28983 }, { "epoch": 0.96, "grad_norm": 0.7481398582458496, "learning_rate": 6.496473914498147e-08, "loss": 1.9912, "step": 28984 }, { "epoch": 0.96, "grad_norm": 0.7430540323257446, "learning_rate": 6.484383663102889e-08, "loss": 2.0657, "step": 28985 }, { "epoch": 0.96, "grad_norm": 0.7415584325790405, "learning_rate": 6.472304635864678e-08, "loss": 1.9872, "step": 28986 }, { "epoch": 0.96, "grad_norm": 0.7189276218414307, "learning_rate": 6.460236832920185e-08, "loss": 2.009, "step": 28987 }, { "epoch": 0.96, "grad_norm": 0.743125319480896, "learning_rate": 6.448180254405633e-08, "loss": 2.0796, "step": 28988 }, { "epoch": 0.96, "grad_norm": 0.7273780703544617, "learning_rate": 6.436134900457358e-08, "loss": 2.0124, "step": 28989 }, { "epoch": 0.96, "grad_norm": 0.7614495754241943, "learning_rate": 6.424100771211251e-08, "loss": 2.0022, "step": 28990 }, { "epoch": 0.96, "grad_norm": 0.7750645279884338, "learning_rate": 6.412077866803312e-08, "loss": 2.0623, "step": 28991 }, { "epoch": 0.96, "grad_norm": 0.7312312722206116, "learning_rate": 6.400066187369547e-08, "loss": 1.9976, "step": 28992 }, { "epoch": 0.96, "grad_norm": 0.7096292972564697, "learning_rate": 6.388065733045401e-08, "loss": 2.0562, "step": 28993 }, { "epoch": 0.96, "grad_norm": 0.8079985976219177, "learning_rate": 6.376076503966655e-08, "loss": 2.1109, "step": 28994 }, { "epoch": 0.96, "grad_norm": 0.7363803386688232, "learning_rate": 6.364098500268645e-08, "loss": 2.0643, "step": 28995 }, { "epoch": 0.96, "grad_norm": 0.7387759685516357, "learning_rate": 6.352131722086707e-08, "loss": 1.9433, "step": 28996 }, { "epoch": 0.96, "grad_norm": 0.735599160194397, "learning_rate": 6.340176169556067e-08, "loss": 1.9956, "step": 28997 }, { "epoch": 0.96, "grad_norm": 0.7228538990020752, "learning_rate": 6.328231842811839e-08, "loss": 2.0413, "step": 28998 }, { "epoch": 0.96, "grad_norm": 0.7223252654075623, "learning_rate": 6.316298741988802e-08, "loss": 1.9661, "step": 28999 }, { "epoch": 0.96, "grad_norm": 0.7391944527626038, "learning_rate": 6.304376867221962e-08, "loss": 2.0823, "step": 29000 }, { "epoch": 0.96, "grad_norm": 0.7537028193473816, "learning_rate": 6.292466218645765e-08, "loss": 2.0685, "step": 29001 }, { "epoch": 0.96, "grad_norm": 0.7065121531486511, "learning_rate": 6.280566796395105e-08, "loss": 1.9963, "step": 29002 }, { "epoch": 0.96, "grad_norm": 0.7442588210105896, "learning_rate": 6.268678600604095e-08, "loss": 2.0353, "step": 29003 }, { "epoch": 0.96, "grad_norm": 0.7650353908538818, "learning_rate": 6.256801631407184e-08, "loss": 2.1055, "step": 29004 }, { "epoch": 0.96, "grad_norm": 0.7484534382820129, "learning_rate": 6.244935888938485e-08, "loss": 2.0808, "step": 29005 }, { "epoch": 0.97, "grad_norm": 0.7225353121757507, "learning_rate": 6.233081373332228e-08, "loss": 2.0327, "step": 29006 }, { "epoch": 0.97, "grad_norm": 0.7634073495864868, "learning_rate": 6.221238084722192e-08, "loss": 2.1246, "step": 29007 }, { "epoch": 0.97, "grad_norm": 0.7813124060630798, "learning_rate": 6.209406023242049e-08, "loss": 2.0828, "step": 29008 }, { "epoch": 0.97, "grad_norm": 0.748350977897644, "learning_rate": 6.1975851890258e-08, "loss": 2.0005, "step": 29009 }, { "epoch": 0.97, "grad_norm": 0.7416631579399109, "learning_rate": 6.185775582206677e-08, "loss": 2.0538, "step": 29010 }, { "epoch": 0.97, "grad_norm": 0.7401524782180786, "learning_rate": 6.173977202918346e-08, "loss": 2.0076, "step": 29011 }, { "epoch": 0.97, "grad_norm": 0.7560703754425049, "learning_rate": 6.162190051293925e-08, "loss": 2.066, "step": 29012 }, { "epoch": 0.97, "grad_norm": 0.7423843741416931, "learning_rate": 6.150414127466642e-08, "loss": 2.1156, "step": 29013 }, { "epoch": 0.97, "grad_norm": 0.7280965447425842, "learning_rate": 6.138649431569499e-08, "loss": 2.0208, "step": 29014 }, { "epoch": 0.97, "grad_norm": 0.7192726135253906, "learning_rate": 6.126895963735391e-08, "loss": 2.0353, "step": 29015 }, { "epoch": 0.97, "grad_norm": 0.7671297192573547, "learning_rate": 6.115153724097323e-08, "loss": 2.0466, "step": 29016 }, { "epoch": 0.97, "grad_norm": 0.7226524353027344, "learning_rate": 6.103422712787632e-08, "loss": 1.9613, "step": 29017 }, { "epoch": 0.97, "grad_norm": 0.730130672454834, "learning_rate": 6.091702929938992e-08, "loss": 2.0591, "step": 29018 }, { "epoch": 0.97, "grad_norm": 0.7481353878974915, "learning_rate": 6.07999437568385e-08, "loss": 2.0096, "step": 29019 }, { "epoch": 0.97, "grad_norm": 0.7259511351585388, "learning_rate": 6.068297050154437e-08, "loss": 2.0838, "step": 29020 }, { "epoch": 0.97, "grad_norm": 0.7114219069480896, "learning_rate": 6.056610953482866e-08, "loss": 1.9516, "step": 29021 }, { "epoch": 0.97, "grad_norm": 0.7733140587806702, "learning_rate": 6.044936085801146e-08, "loss": 2.0729, "step": 29022 }, { "epoch": 0.97, "grad_norm": 0.7676507830619812, "learning_rate": 6.033272447241278e-08, "loss": 2.1271, "step": 29023 }, { "epoch": 0.97, "grad_norm": 0.7615839838981628, "learning_rate": 6.02162003793505e-08, "loss": 2.0445, "step": 29024 }, { "epoch": 0.97, "grad_norm": 0.7395052313804626, "learning_rate": 6.009978858014021e-08, "loss": 2.0405, "step": 29025 }, { "epoch": 0.97, "grad_norm": 0.7613799571990967, "learning_rate": 5.998348907609641e-08, "loss": 2.0583, "step": 29026 }, { "epoch": 0.97, "grad_norm": 0.737933337688446, "learning_rate": 5.986730186853362e-08, "loss": 1.9918, "step": 29027 }, { "epoch": 0.97, "grad_norm": 0.7355883717536926, "learning_rate": 5.975122695876522e-08, "loss": 2.0326, "step": 29028 }, { "epoch": 0.97, "grad_norm": 0.7704764604568481, "learning_rate": 5.96352643481013e-08, "loss": 2.0672, "step": 29029 }, { "epoch": 0.97, "grad_norm": 0.725749135017395, "learning_rate": 5.9519414037852994e-08, "loss": 2.0473, "step": 29030 }, { "epoch": 0.97, "grad_norm": 0.7410781383514404, "learning_rate": 5.9403676029328175e-08, "loss": 2.0831, "step": 29031 }, { "epoch": 0.97, "grad_norm": 0.7277485728263855, "learning_rate": 5.928805032383467e-08, "loss": 2.0151, "step": 29032 }, { "epoch": 0.97, "grad_norm": 0.7296704649925232, "learning_rate": 5.917253692268032e-08, "loss": 2.0932, "step": 29033 }, { "epoch": 0.97, "grad_norm": 0.7552341222763062, "learning_rate": 5.9057135827167435e-08, "loss": 2.0987, "step": 29034 }, { "epoch": 0.97, "grad_norm": 0.7646024227142334, "learning_rate": 5.894184703860162e-08, "loss": 2.0154, "step": 29035 }, { "epoch": 0.97, "grad_norm": 0.737494707107544, "learning_rate": 5.882667055828406e-08, "loss": 2.0324, "step": 29036 }, { "epoch": 0.97, "grad_norm": 0.7273925542831421, "learning_rate": 5.871160638751816e-08, "loss": 2.0419, "step": 29037 }, { "epoch": 0.97, "grad_norm": 0.7386282682418823, "learning_rate": 5.859665452760177e-08, "loss": 2.0346, "step": 29038 }, { "epoch": 0.97, "grad_norm": 0.7568864226341248, "learning_rate": 5.848181497983274e-08, "loss": 2.0759, "step": 29039 }, { "epoch": 0.97, "grad_norm": 0.7352434396743774, "learning_rate": 5.836708774551114e-08, "loss": 2.0934, "step": 29040 }, { "epoch": 0.97, "grad_norm": 0.7033225297927856, "learning_rate": 5.825247282593149e-08, "loss": 2.084, "step": 29041 }, { "epoch": 0.97, "grad_norm": 0.7662063241004944, "learning_rate": 5.813797022238943e-08, "loss": 2.0607, "step": 29042 }, { "epoch": 0.97, "grad_norm": 0.7338163256645203, "learning_rate": 5.802357993617835e-08, "loss": 2.0115, "step": 29043 }, { "epoch": 0.97, "grad_norm": 0.7215204834938049, "learning_rate": 5.790930196858946e-08, "loss": 2.053, "step": 29044 }, { "epoch": 0.97, "grad_norm": 0.7399293184280396, "learning_rate": 5.779513632091505e-08, "loss": 2.04, "step": 29045 }, { "epoch": 0.97, "grad_norm": 0.7710161209106445, "learning_rate": 5.768108299444408e-08, "loss": 2.0538, "step": 29046 }, { "epoch": 0.97, "grad_norm": 0.7531158328056335, "learning_rate": 5.756714199046553e-08, "loss": 2.0446, "step": 29047 }, { "epoch": 0.97, "grad_norm": 0.7178133726119995, "learning_rate": 5.7453313310267264e-08, "loss": 2.0535, "step": 29048 }, { "epoch": 0.97, "grad_norm": 0.7536664009094238, "learning_rate": 5.73395969551338e-08, "loss": 2.0522, "step": 29049 }, { "epoch": 0.97, "grad_norm": 0.7335782051086426, "learning_rate": 5.722599292635078e-08, "loss": 2.0491, "step": 29050 }, { "epoch": 0.97, "grad_norm": 0.7559759616851807, "learning_rate": 5.711250122520162e-08, "loss": 2.0539, "step": 29051 }, { "epoch": 0.97, "grad_norm": 0.7440823316574097, "learning_rate": 5.699912185296752e-08, "loss": 2.031, "step": 29052 }, { "epoch": 0.97, "grad_norm": 0.7325991988182068, "learning_rate": 5.68858548109319e-08, "loss": 1.9825, "step": 29053 }, { "epoch": 0.97, "grad_norm": 0.7515924572944641, "learning_rate": 5.6772700100371504e-08, "loss": 2.0492, "step": 29054 }, { "epoch": 0.97, "grad_norm": 0.7323790788650513, "learning_rate": 5.665965772256532e-08, "loss": 1.9529, "step": 29055 }, { "epoch": 0.97, "grad_norm": 0.7622612118721008, "learning_rate": 5.6546727678792324e-08, "loss": 2.0658, "step": 29056 }, { "epoch": 0.97, "grad_norm": 0.7377007007598877, "learning_rate": 5.643390997032594e-08, "loss": 2.0349, "step": 29057 }, { "epoch": 0.97, "grad_norm": 0.7276656031608582, "learning_rate": 5.632120459844181e-08, "loss": 2.037, "step": 29058 }, { "epoch": 0.97, "grad_norm": 0.7768324017524719, "learning_rate": 5.620861156441337e-08, "loss": 2.0006, "step": 29059 }, { "epoch": 0.97, "grad_norm": 0.7395504117012024, "learning_rate": 5.6096130869512934e-08, "loss": 2.0122, "step": 29060 }, { "epoch": 0.97, "grad_norm": 0.7336729168891907, "learning_rate": 5.5983762515009476e-08, "loss": 2.0195, "step": 29061 }, { "epoch": 0.97, "grad_norm": 0.7562929391860962, "learning_rate": 5.587150650217421e-08, "loss": 2.0333, "step": 29062 }, { "epoch": 0.97, "grad_norm": 0.7388936877250671, "learning_rate": 5.575936283227501e-08, "loss": 2.048, "step": 29063 }, { "epoch": 0.97, "grad_norm": 0.717592179775238, "learning_rate": 5.564733150657975e-08, "loss": 2.0604, "step": 29064 }, { "epoch": 0.97, "grad_norm": 0.7575987577438354, "learning_rate": 5.553541252635075e-08, "loss": 2.0288, "step": 29065 }, { "epoch": 0.97, "grad_norm": 0.7681446075439453, "learning_rate": 5.542360589285589e-08, "loss": 2.1356, "step": 29066 }, { "epoch": 0.97, "grad_norm": 0.7275213003158569, "learning_rate": 5.531191160735749e-08, "loss": 2.0062, "step": 29067 }, { "epoch": 0.97, "grad_norm": 0.7394939064979553, "learning_rate": 5.520032967111566e-08, "loss": 2.0982, "step": 29068 }, { "epoch": 0.97, "grad_norm": 0.766218364238739, "learning_rate": 5.5088860085393825e-08, "loss": 2.0696, "step": 29069 }, { "epoch": 0.97, "grad_norm": 0.7595894932746887, "learning_rate": 5.497750285144876e-08, "loss": 2.041, "step": 29070 }, { "epoch": 0.97, "grad_norm": 0.7612637281417847, "learning_rate": 5.486625797053946e-08, "loss": 2.1198, "step": 29071 }, { "epoch": 0.97, "grad_norm": 0.7550984025001526, "learning_rate": 5.4755125443922695e-08, "loss": 2.0885, "step": 29072 }, { "epoch": 0.97, "grad_norm": 0.7625553607940674, "learning_rate": 5.464410527285524e-08, "loss": 1.9847, "step": 29073 }, { "epoch": 0.97, "grad_norm": 0.7424097061157227, "learning_rate": 5.453319745858832e-08, "loss": 2.073, "step": 29074 }, { "epoch": 0.97, "grad_norm": 0.7774603962898254, "learning_rate": 5.442240200237759e-08, "loss": 1.9919, "step": 29075 }, { "epoch": 0.97, "grad_norm": 0.7656825184822083, "learning_rate": 5.431171890547426e-08, "loss": 2.0802, "step": 29076 }, { "epoch": 0.97, "grad_norm": 0.7224841117858887, "learning_rate": 5.420114816912847e-08, "loss": 2.0013, "step": 29077 }, { "epoch": 0.97, "grad_norm": 0.7326154708862305, "learning_rate": 5.409068979458809e-08, "loss": 2.0082, "step": 29078 }, { "epoch": 0.97, "grad_norm": 0.7507390379905701, "learning_rate": 5.398034378310324e-08, "loss": 2.1636, "step": 29079 }, { "epoch": 0.97, "grad_norm": 0.7503821849822998, "learning_rate": 5.3870110135919586e-08, "loss": 1.9807, "step": 29080 }, { "epoch": 0.97, "grad_norm": 0.7579681873321533, "learning_rate": 5.3759988854281686e-08, "loss": 2.0914, "step": 29081 }, { "epoch": 0.97, "grad_norm": 0.7389354705810547, "learning_rate": 5.364997993943521e-08, "loss": 2.0364, "step": 29082 }, { "epoch": 0.97, "grad_norm": 0.7581992149353027, "learning_rate": 5.35400833926214e-08, "loss": 2.1103, "step": 29083 }, { "epoch": 0.97, "grad_norm": 0.7444687485694885, "learning_rate": 5.343029921508258e-08, "loss": 2.0253, "step": 29084 }, { "epoch": 0.97, "grad_norm": 0.7197502255439758, "learning_rate": 5.332062740805888e-08, "loss": 2.0658, "step": 29085 }, { "epoch": 0.97, "grad_norm": 0.7424801588058472, "learning_rate": 5.321106797279041e-08, "loss": 2.0488, "step": 29086 }, { "epoch": 0.97, "grad_norm": 0.7322023510932922, "learning_rate": 5.310162091051285e-08, "loss": 2.0491, "step": 29087 }, { "epoch": 0.97, "grad_norm": 0.7882124781608582, "learning_rate": 5.2992286222464105e-08, "loss": 2.0903, "step": 29088 }, { "epoch": 0.97, "grad_norm": 0.7491951584815979, "learning_rate": 5.2883063909878743e-08, "loss": 2.0401, "step": 29089 }, { "epoch": 0.97, "grad_norm": 0.7461510300636292, "learning_rate": 5.277395397399132e-08, "loss": 2.0899, "step": 29090 }, { "epoch": 0.97, "grad_norm": 0.7436741590499878, "learning_rate": 5.2664956416034196e-08, "loss": 2.0568, "step": 29091 }, { "epoch": 0.97, "grad_norm": 0.7614251971244812, "learning_rate": 5.2556071237238606e-08, "loss": 2.0445, "step": 29092 }, { "epoch": 0.97, "grad_norm": 0.73028963804245, "learning_rate": 5.244729843883467e-08, "loss": 2.0654, "step": 29093 }, { "epoch": 0.97, "grad_norm": 0.7380455732345581, "learning_rate": 5.233863802205141e-08, "loss": 2.0821, "step": 29094 }, { "epoch": 0.97, "grad_norm": 0.761981725692749, "learning_rate": 5.223008998811674e-08, "loss": 2.0558, "step": 29095 }, { "epoch": 0.97, "grad_norm": 0.7487305998802185, "learning_rate": 5.2121654338255226e-08, "loss": 2.0613, "step": 29096 }, { "epoch": 0.97, "grad_norm": 0.7516537308692932, "learning_rate": 5.201333107369366e-08, "loss": 2.0357, "step": 29097 }, { "epoch": 0.97, "grad_norm": 0.7404289841651917, "learning_rate": 5.1905120195655524e-08, "loss": 2.0063, "step": 29098 }, { "epoch": 0.97, "grad_norm": 0.7483970522880554, "learning_rate": 5.179702170536316e-08, "loss": 2.0285, "step": 29099 }, { "epoch": 0.97, "grad_norm": 0.7592679858207703, "learning_rate": 5.168903560403893e-08, "loss": 2.045, "step": 29100 }, { "epoch": 0.97, "grad_norm": 0.7569833397865295, "learning_rate": 5.1581161892900746e-08, "loss": 2.045, "step": 29101 }, { "epoch": 0.97, "grad_norm": 0.7518364191055298, "learning_rate": 5.147340057316763e-08, "loss": 2.0314, "step": 29102 }, { "epoch": 0.97, "grad_norm": 0.7628983855247498, "learning_rate": 5.136575164605861e-08, "loss": 2.1692, "step": 29103 }, { "epoch": 0.97, "grad_norm": 0.7567183971405029, "learning_rate": 5.1258215112789386e-08, "loss": 2.0166, "step": 29104 }, { "epoch": 0.97, "grad_norm": 0.7611680030822754, "learning_rate": 5.115079097457343e-08, "loss": 2.0312, "step": 29105 }, { "epoch": 0.97, "grad_norm": 0.7365122437477112, "learning_rate": 5.1043479232624205e-08, "loss": 2.0798, "step": 29106 }, { "epoch": 0.97, "grad_norm": 0.7223207354545593, "learning_rate": 5.0936279888156304e-08, "loss": 2.0103, "step": 29107 }, { "epoch": 0.97, "grad_norm": 0.7277834415435791, "learning_rate": 5.0829192942379866e-08, "loss": 2.0596, "step": 29108 }, { "epoch": 0.97, "grad_norm": 0.749782383441925, "learning_rate": 5.072221839650393e-08, "loss": 2.0028, "step": 29109 }, { "epoch": 0.97, "grad_norm": 0.7451756596565247, "learning_rate": 5.0615356251737525e-08, "loss": 2.0019, "step": 29110 }, { "epoch": 0.97, "grad_norm": 0.7164727449417114, "learning_rate": 5.050860650928857e-08, "loss": 1.9867, "step": 29111 }, { "epoch": 0.97, "grad_norm": 0.7343751192092896, "learning_rate": 5.040196917036166e-08, "loss": 2.0904, "step": 29112 }, { "epoch": 0.97, "grad_norm": 0.7372276186943054, "learning_rate": 5.0295444236162504e-08, "loss": 1.9819, "step": 29113 }, { "epoch": 0.97, "grad_norm": 0.7556163668632507, "learning_rate": 5.018903170789458e-08, "loss": 2.0453, "step": 29114 }, { "epoch": 0.97, "grad_norm": 0.7306556701660156, "learning_rate": 5.0082731586759134e-08, "loss": 2.1084, "step": 29115 }, { "epoch": 0.97, "grad_norm": 0.7674879431724548, "learning_rate": 4.997654387395745e-08, "loss": 2.0874, "step": 29116 }, { "epoch": 0.97, "grad_norm": 0.7262237071990967, "learning_rate": 4.987046857069078e-08, "loss": 2.0369, "step": 29117 }, { "epoch": 0.97, "grad_norm": 0.7901185750961304, "learning_rate": 4.976450567815483e-08, "loss": 2.0059, "step": 29118 }, { "epoch": 0.97, "grad_norm": 0.7210655212402344, "learning_rate": 4.9658655197548644e-08, "loss": 2.0305, "step": 29119 }, { "epoch": 0.97, "grad_norm": 0.7304787039756775, "learning_rate": 4.9552917130067935e-08, "loss": 2.0612, "step": 29120 }, { "epoch": 0.97, "grad_norm": 0.7489160299301147, "learning_rate": 4.944729147690619e-08, "loss": 2.0492, "step": 29121 }, { "epoch": 0.97, "grad_norm": 0.7471380233764648, "learning_rate": 4.934177823925801e-08, "loss": 1.9651, "step": 29122 }, { "epoch": 0.97, "grad_norm": 0.7341728210449219, "learning_rate": 4.923637741831466e-08, "loss": 2.0456, "step": 29123 }, { "epoch": 0.97, "grad_norm": 0.7330076694488525, "learning_rate": 4.913108901526742e-08, "loss": 2.0299, "step": 29124 }, { "epoch": 0.97, "grad_norm": 0.7185376286506653, "learning_rate": 4.902591303130422e-08, "loss": 2.019, "step": 29125 }, { "epoch": 0.97, "grad_norm": 0.7315093874931335, "learning_rate": 4.892084946761522e-08, "loss": 2.0662, "step": 29126 }, { "epoch": 0.97, "grad_norm": 0.7521397471427917, "learning_rate": 4.881589832538614e-08, "loss": 2.0278, "step": 29127 }, { "epoch": 0.97, "grad_norm": 0.7190423607826233, "learning_rate": 4.871105960580269e-08, "loss": 1.9851, "step": 29128 }, { "epoch": 0.97, "grad_norm": 0.7722873687744141, "learning_rate": 4.86063333100506e-08, "loss": 2.0967, "step": 29129 }, { "epoch": 0.97, "grad_norm": 0.7487995028495789, "learning_rate": 4.850171943931114e-08, "loss": 2.0232, "step": 29130 }, { "epoch": 0.97, "grad_norm": 0.7543607354164124, "learning_rate": 4.839721799476893e-08, "loss": 2.0848, "step": 29131 }, { "epoch": 0.97, "grad_norm": 0.7456114888191223, "learning_rate": 4.829282897760079e-08, "loss": 2.034, "step": 29132 }, { "epoch": 0.97, "grad_norm": 0.7335970997810364, "learning_rate": 4.818855238898912e-08, "loss": 2.0948, "step": 29133 }, { "epoch": 0.97, "grad_norm": 0.7695701122283936, "learning_rate": 4.8084388230109636e-08, "loss": 2.0132, "step": 29134 }, { "epoch": 0.97, "grad_norm": 0.7518985867500305, "learning_rate": 4.79803365021414e-08, "loss": 2.0489, "step": 29135 }, { "epoch": 0.97, "grad_norm": 0.7235926389694214, "learning_rate": 4.787639720625903e-08, "loss": 2.0223, "step": 29136 }, { "epoch": 0.97, "grad_norm": 0.7602631449699402, "learning_rate": 4.777257034363603e-08, "loss": 2.0498, "step": 29137 }, { "epoch": 0.97, "grad_norm": 0.7562738656997681, "learning_rate": 4.76688559154459e-08, "loss": 2.0638, "step": 29138 }, { "epoch": 0.97, "grad_norm": 0.7670345306396484, "learning_rate": 4.7565253922861045e-08, "loss": 2.1211, "step": 29139 }, { "epoch": 0.97, "grad_norm": 0.7302688360214233, "learning_rate": 4.746176436705052e-08, "loss": 1.9879, "step": 29140 }, { "epoch": 0.97, "grad_norm": 0.7528406381607056, "learning_rate": 4.73583872491834e-08, "loss": 2.0743, "step": 29141 }, { "epoch": 0.97, "grad_norm": 0.7400205731391907, "learning_rate": 4.725512257042986e-08, "loss": 2.0831, "step": 29142 }, { "epoch": 0.97, "grad_norm": 0.7529747486114502, "learning_rate": 4.715197033195451e-08, "loss": 2.0314, "step": 29143 }, { "epoch": 0.97, "grad_norm": 0.7333756685256958, "learning_rate": 4.70489305349231e-08, "loss": 2.0468, "step": 29144 }, { "epoch": 0.97, "grad_norm": 0.728471040725708, "learning_rate": 4.6946003180500246e-08, "loss": 2.0505, "step": 29145 }, { "epoch": 0.97, "grad_norm": 0.7578626871109009, "learning_rate": 4.684318826984835e-08, "loss": 2.0641, "step": 29146 }, { "epoch": 0.97, "grad_norm": 0.7417796850204468, "learning_rate": 4.674048580412871e-08, "loss": 2.0896, "step": 29147 }, { "epoch": 0.97, "grad_norm": 0.7563989758491516, "learning_rate": 4.663789578450262e-08, "loss": 2.1458, "step": 29148 }, { "epoch": 0.97, "grad_norm": 0.7407302260398865, "learning_rate": 4.653541821212803e-08, "loss": 2.0158, "step": 29149 }, { "epoch": 0.97, "grad_norm": 0.7220346331596375, "learning_rate": 4.6433053088162925e-08, "loss": 2.0577, "step": 29150 }, { "epoch": 0.97, "grad_norm": 0.7730715870857239, "learning_rate": 4.633080041376303e-08, "loss": 2.0311, "step": 29151 }, { "epoch": 0.97, "grad_norm": 0.7569090723991394, "learning_rate": 4.6228660190085206e-08, "loss": 2.1349, "step": 29152 }, { "epoch": 0.97, "grad_norm": 0.7082535028457642, "learning_rate": 4.612663241828186e-08, "loss": 2.0348, "step": 29153 }, { "epoch": 0.97, "grad_norm": 0.7277864217758179, "learning_rate": 4.602471709950762e-08, "loss": 1.9863, "step": 29154 }, { "epoch": 0.97, "grad_norm": 0.7526946067810059, "learning_rate": 4.592291423491158e-08, "loss": 2.0505, "step": 29155 }, { "epoch": 0.97, "grad_norm": 0.7481178641319275, "learning_rate": 4.5821223825643915e-08, "loss": 2.0493, "step": 29156 }, { "epoch": 0.97, "grad_norm": 0.7619435787200928, "learning_rate": 4.5719645872855936e-08, "loss": 2.0455, "step": 29157 }, { "epoch": 0.97, "grad_norm": 0.7525490522384644, "learning_rate": 4.561818037769228e-08, "loss": 2.0562, "step": 29158 }, { "epoch": 0.97, "grad_norm": 0.7606086134910583, "learning_rate": 4.551682734130203e-08, "loss": 2.0165, "step": 29159 }, { "epoch": 0.97, "grad_norm": 0.7671846151351929, "learning_rate": 4.54155867648276e-08, "loss": 2.0304, "step": 29160 }, { "epoch": 0.97, "grad_norm": 0.719883382320404, "learning_rate": 4.531445864941475e-08, "loss": 2.0112, "step": 29161 }, { "epoch": 0.97, "grad_norm": 0.7404399514198303, "learning_rate": 4.521344299620367e-08, "loss": 1.9952, "step": 29162 }, { "epoch": 0.97, "grad_norm": 0.7131633162498474, "learning_rate": 4.5112539806337895e-08, "loss": 2.0892, "step": 29163 }, { "epoch": 0.97, "grad_norm": 0.7469248175621033, "learning_rate": 4.5011749080957625e-08, "loss": 2.0159, "step": 29164 }, { "epoch": 0.97, "grad_norm": 0.7516930103302002, "learning_rate": 4.491107082119861e-08, "loss": 2.1159, "step": 29165 }, { "epoch": 0.97, "grad_norm": 0.721413254737854, "learning_rate": 4.4810505028201056e-08, "loss": 2.141, "step": 29166 }, { "epoch": 0.97, "grad_norm": 0.75139981508255, "learning_rate": 4.471005170310072e-08, "loss": 2.0632, "step": 29167 }, { "epoch": 0.97, "grad_norm": 0.7599073648452759, "learning_rate": 4.460971084703003e-08, "loss": 2.0917, "step": 29168 }, { "epoch": 0.97, "grad_norm": 0.7397643327713013, "learning_rate": 4.4509482461125855e-08, "loss": 2.0029, "step": 29169 }, { "epoch": 0.97, "grad_norm": 0.7287266254425049, "learning_rate": 4.44093665465184e-08, "loss": 2.0486, "step": 29170 }, { "epoch": 0.97, "grad_norm": 0.7567946910858154, "learning_rate": 4.430936310434009e-08, "loss": 2.0379, "step": 29171 }, { "epoch": 0.97, "grad_norm": 0.7328543663024902, "learning_rate": 4.420947213571891e-08, "loss": 1.9658, "step": 29172 }, { "epoch": 0.97, "grad_norm": 0.7548821568489075, "learning_rate": 4.410969364178508e-08, "loss": 2.0554, "step": 29173 }, { "epoch": 0.97, "grad_norm": 0.7098091840744019, "learning_rate": 4.401002762366546e-08, "loss": 2.02, "step": 29174 }, { "epoch": 0.97, "grad_norm": 0.7328534126281738, "learning_rate": 4.391047408248472e-08, "loss": 2.0009, "step": 29175 }, { "epoch": 0.97, "grad_norm": 0.7250224947929382, "learning_rate": 4.381103301936973e-08, "loss": 2.0653, "step": 29176 }, { "epoch": 0.97, "grad_norm": 0.7702951431274414, "learning_rate": 4.371170443544182e-08, "loss": 2.0688, "step": 29177 }, { "epoch": 0.97, "grad_norm": 0.7229671478271484, "learning_rate": 4.361248833182452e-08, "loss": 2.0792, "step": 29178 }, { "epoch": 0.97, "grad_norm": 0.7605637907981873, "learning_rate": 4.351338470963917e-08, "loss": 2.0085, "step": 29179 }, { "epoch": 0.97, "grad_norm": 0.7301909923553467, "learning_rate": 4.3414393570003765e-08, "loss": 1.9944, "step": 29180 }, { "epoch": 0.97, "grad_norm": 0.7481768131256104, "learning_rate": 4.33155149140374e-08, "loss": 2.0395, "step": 29181 }, { "epoch": 0.97, "grad_norm": 0.772861659526825, "learning_rate": 4.3216748742856975e-08, "loss": 2.1005, "step": 29182 }, { "epoch": 0.97, "grad_norm": 0.7764679789543152, "learning_rate": 4.311809505757936e-08, "loss": 2.0957, "step": 29183 }, { "epoch": 0.97, "grad_norm": 0.7575426697731018, "learning_rate": 4.3019553859317e-08, "loss": 2.0345, "step": 29184 }, { "epoch": 0.97, "grad_norm": 0.7302182912826538, "learning_rate": 4.292112514918456e-08, "loss": 1.9896, "step": 29185 }, { "epoch": 0.97, "grad_norm": 0.7696461081504822, "learning_rate": 4.282280892829449e-08, "loss": 2.1197, "step": 29186 }, { "epoch": 0.97, "grad_norm": 0.7428693175315857, "learning_rate": 4.272460519775701e-08, "loss": 2.0323, "step": 29187 }, { "epoch": 0.97, "grad_norm": 0.7418534159660339, "learning_rate": 4.262651395868123e-08, "loss": 2.0548, "step": 29188 }, { "epoch": 0.97, "grad_norm": 0.7256563305854797, "learning_rate": 4.2528535212175146e-08, "loss": 2.0756, "step": 29189 }, { "epoch": 0.97, "grad_norm": 0.7481099963188171, "learning_rate": 4.243066895934567e-08, "loss": 2.0571, "step": 29190 }, { "epoch": 0.97, "grad_norm": 0.7209005355834961, "learning_rate": 4.2332915201298566e-08, "loss": 2.0789, "step": 29191 }, { "epoch": 0.97, "grad_norm": 0.7448647618293762, "learning_rate": 4.223527393913962e-08, "loss": 2.0672, "step": 29192 }, { "epoch": 0.97, "grad_norm": 0.7893803119659424, "learning_rate": 4.213774517397018e-08, "loss": 2.0441, "step": 29193 }, { "epoch": 0.97, "grad_norm": 0.712611734867096, "learning_rate": 4.204032890689269e-08, "loss": 2.0759, "step": 29194 }, { "epoch": 0.97, "grad_norm": 0.7360569834709167, "learning_rate": 4.194302513900739e-08, "loss": 2.0224, "step": 29195 }, { "epoch": 0.97, "grad_norm": 0.7265943884849548, "learning_rate": 4.184583387141339e-08, "loss": 2.0331, "step": 29196 }, { "epoch": 0.97, "grad_norm": 0.720267117023468, "learning_rate": 4.1748755105209814e-08, "loss": 2.0143, "step": 29197 }, { "epoch": 0.97, "grad_norm": 0.7371760010719299, "learning_rate": 4.165178884149246e-08, "loss": 2.0593, "step": 29198 }, { "epoch": 0.97, "grad_norm": 0.7384008765220642, "learning_rate": 4.1554935081357104e-08, "loss": 1.9241, "step": 29199 }, { "epoch": 0.97, "grad_norm": 0.7674313187599182, "learning_rate": 4.145819382589844e-08, "loss": 2.06, "step": 29200 }, { "epoch": 0.97, "grad_norm": 0.757977306842804, "learning_rate": 4.136156507620781e-08, "loss": 2.0493, "step": 29201 }, { "epoch": 0.97, "grad_norm": 0.7460862398147583, "learning_rate": 4.1265048833378784e-08, "loss": 2.0446, "step": 29202 }, { "epoch": 0.97, "grad_norm": 0.7289160490036011, "learning_rate": 4.1168645098500494e-08, "loss": 2.0314, "step": 29203 }, { "epoch": 0.97, "grad_norm": 0.7429171800613403, "learning_rate": 4.107235387266206e-08, "loss": 2.0248, "step": 29204 }, { "epoch": 0.97, "grad_norm": 0.7336742281913757, "learning_rate": 4.0976175156951515e-08, "loss": 2.0912, "step": 29205 }, { "epoch": 0.97, "grad_norm": 0.7690092325210571, "learning_rate": 4.0880108952456866e-08, "loss": 1.9986, "step": 29206 }, { "epoch": 0.97, "grad_norm": 0.7633804082870483, "learning_rate": 4.0784155260260584e-08, "loss": 2.0707, "step": 29207 }, { "epoch": 0.97, "grad_norm": 0.7094035744667053, "learning_rate": 4.068831408144958e-08, "loss": 2.0894, "step": 29208 }, { "epoch": 0.97, "grad_norm": 0.7410601377487183, "learning_rate": 4.059258541710409e-08, "loss": 2.065, "step": 29209 }, { "epoch": 0.97, "grad_norm": 0.7327796816825867, "learning_rate": 4.049696926830771e-08, "loss": 2.0453, "step": 29210 }, { "epoch": 0.97, "grad_norm": 0.735737144947052, "learning_rate": 4.0401465636139556e-08, "loss": 2.0193, "step": 29211 }, { "epoch": 0.97, "grad_norm": 0.7469883561134338, "learning_rate": 4.0306074521677673e-08, "loss": 1.9847, "step": 29212 }, { "epoch": 0.97, "grad_norm": 0.7568383812904358, "learning_rate": 4.0210795926001186e-08, "loss": 2.0591, "step": 29213 }, { "epoch": 0.97, "grad_norm": 0.7379813194274902, "learning_rate": 4.0115629850187024e-08, "loss": 2.0099, "step": 29214 }, { "epoch": 0.97, "grad_norm": 0.7758004665374756, "learning_rate": 4.0020576295308755e-08, "loss": 2.0011, "step": 29215 }, { "epoch": 0.97, "grad_norm": 0.7378414273262024, "learning_rate": 3.9925635262441084e-08, "loss": 2.048, "step": 29216 }, { "epoch": 0.97, "grad_norm": 0.7421635985374451, "learning_rate": 3.9830806752656495e-08, "loss": 2.0593, "step": 29217 }, { "epoch": 0.97, "grad_norm": 0.7714683413505554, "learning_rate": 3.973609076702634e-08, "loss": 1.9935, "step": 29218 }, { "epoch": 0.97, "grad_norm": 0.735077977180481, "learning_rate": 3.964148730661976e-08, "loss": 2.0893, "step": 29219 }, { "epoch": 0.97, "grad_norm": 0.7345150113105774, "learning_rate": 3.9546996372507027e-08, "loss": 2.0077, "step": 29220 }, { "epoch": 0.97, "grad_norm": 0.7440850138664246, "learning_rate": 3.945261796575506e-08, "loss": 2.0053, "step": 29221 }, { "epoch": 0.97, "grad_norm": 0.7897260785102844, "learning_rate": 3.935835208742966e-08, "loss": 2.0654, "step": 29222 }, { "epoch": 0.97, "grad_norm": 0.7450955510139465, "learning_rate": 3.926419873859666e-08, "loss": 1.9985, "step": 29223 }, { "epoch": 0.97, "grad_norm": 0.7278220057487488, "learning_rate": 3.917015792031853e-08, "loss": 2.0343, "step": 29224 }, { "epoch": 0.97, "grad_norm": 0.7684190273284912, "learning_rate": 3.907622963365776e-08, "loss": 2.0466, "step": 29225 }, { "epoch": 0.97, "grad_norm": 0.7469903826713562, "learning_rate": 3.898241387967683e-08, "loss": 2.0339, "step": 29226 }, { "epoch": 0.97, "grad_norm": 0.7290650606155396, "learning_rate": 3.8888710659434894e-08, "loss": 1.9912, "step": 29227 }, { "epoch": 0.97, "grad_norm": 0.7257793545722961, "learning_rate": 3.87951199739911e-08, "loss": 2.0193, "step": 29228 }, { "epoch": 0.97, "grad_norm": 0.7449692487716675, "learning_rate": 3.870164182440128e-08, "loss": 2.029, "step": 29229 }, { "epoch": 0.97, "grad_norm": 0.7385895252227783, "learning_rate": 3.860827621172236e-08, "loss": 2.0783, "step": 29230 }, { "epoch": 0.97, "grad_norm": 0.7360743880271912, "learning_rate": 3.851502313700906e-08, "loss": 2.0436, "step": 29231 }, { "epoch": 0.97, "grad_norm": 0.7701732516288757, "learning_rate": 3.8421882601316075e-08, "loss": 2.0097, "step": 29232 }, { "epoch": 0.97, "grad_norm": 0.7386481761932373, "learning_rate": 3.832885460569369e-08, "loss": 2.0959, "step": 29233 }, { "epoch": 0.97, "grad_norm": 0.730781078338623, "learning_rate": 3.823593915119439e-08, "loss": 2.0204, "step": 29234 }, { "epoch": 0.97, "grad_norm": 0.7177600860595703, "learning_rate": 3.814313623886623e-08, "loss": 2.1408, "step": 29235 }, { "epoch": 0.97, "grad_norm": 0.7370340824127197, "learning_rate": 3.8050445869759476e-08, "loss": 1.9934, "step": 29236 }, { "epoch": 0.97, "grad_norm": 0.7403169274330139, "learning_rate": 3.7957868044921075e-08, "loss": 2.0657, "step": 29237 }, { "epoch": 0.97, "grad_norm": 0.7843307256698608, "learning_rate": 3.7865402765395744e-08, "loss": 2.0287, "step": 29238 }, { "epoch": 0.97, "grad_norm": 0.7840523719787598, "learning_rate": 3.77730500322282e-08, "loss": 2.0693, "step": 29239 }, { "epoch": 0.97, "grad_norm": 0.7744840383529663, "learning_rate": 3.768080984646316e-08, "loss": 2.0889, "step": 29240 }, { "epoch": 0.97, "grad_norm": 0.7646792531013489, "learning_rate": 3.7588682209140916e-08, "loss": 1.99, "step": 29241 }, { "epoch": 0.97, "grad_norm": 0.7263439297676086, "learning_rate": 3.749666712130395e-08, "loss": 2.0678, "step": 29242 }, { "epoch": 0.97, "grad_norm": 0.7870316505432129, "learning_rate": 3.7404764583991445e-08, "loss": 2.0835, "step": 29243 }, { "epoch": 0.97, "grad_norm": 0.7363491058349609, "learning_rate": 3.731297459824035e-08, "loss": 2.0081, "step": 29244 }, { "epoch": 0.97, "grad_norm": 0.7411637306213379, "learning_rate": 3.722129716508871e-08, "loss": 2.0667, "step": 29245 }, { "epoch": 0.97, "grad_norm": 0.7221792340278625, "learning_rate": 3.712973228557237e-08, "loss": 1.9776, "step": 29246 }, { "epoch": 0.97, "grad_norm": 0.7152597904205322, "learning_rate": 3.7038279960724955e-08, "loss": 1.9721, "step": 29247 }, { "epoch": 0.97, "grad_norm": 0.7625518441200256, "learning_rate": 3.694694019158007e-08, "loss": 2.0733, "step": 29248 }, { "epoch": 0.97, "grad_norm": 0.7547231316566467, "learning_rate": 3.685571297917134e-08, "loss": 2.0748, "step": 29249 }, { "epoch": 0.97, "grad_norm": 0.7913203835487366, "learning_rate": 3.676459832452683e-08, "loss": 2.0479, "step": 29250 }, { "epoch": 0.97, "grad_norm": 0.7813690900802612, "learning_rate": 3.667359622867572e-08, "loss": 2.1247, "step": 29251 }, { "epoch": 0.97, "grad_norm": 0.7526161670684814, "learning_rate": 3.6582706692649403e-08, "loss": 2.1116, "step": 29252 }, { "epoch": 0.97, "grad_norm": 0.7368454337120056, "learning_rate": 3.64919297174704e-08, "loss": 2.0796, "step": 29253 }, { "epoch": 0.97, "grad_norm": 0.720645546913147, "learning_rate": 3.6401265304167875e-08, "loss": 1.9912, "step": 29254 }, { "epoch": 0.97, "grad_norm": 0.7790018916130066, "learning_rate": 3.6310713453764356e-08, "loss": 2.056, "step": 29255 }, { "epoch": 0.97, "grad_norm": 0.763236403465271, "learning_rate": 3.6220274167282354e-08, "loss": 2.0924, "step": 29256 }, { "epoch": 0.97, "grad_norm": 0.7481763958930969, "learning_rate": 3.6129947445744385e-08, "loss": 2.0863, "step": 29257 }, { "epoch": 0.97, "grad_norm": 0.7485073804855347, "learning_rate": 3.603973329017185e-08, "loss": 1.9945, "step": 29258 }, { "epoch": 0.97, "grad_norm": 0.7226691842079163, "learning_rate": 3.594963170158283e-08, "loss": 1.9495, "step": 29259 }, { "epoch": 0.97, "grad_norm": 0.7686389684677124, "learning_rate": 3.585964268099429e-08, "loss": 2.0025, "step": 29260 }, { "epoch": 0.97, "grad_norm": 0.7592135071754456, "learning_rate": 3.5769766229425405e-08, "loss": 2.0164, "step": 29261 }, { "epoch": 0.97, "grad_norm": 0.7605711817741394, "learning_rate": 3.5680002347888707e-08, "loss": 2.0188, "step": 29262 }, { "epoch": 0.97, "grad_norm": 0.7534777522087097, "learning_rate": 3.559035103740005e-08, "loss": 2.0598, "step": 29263 }, { "epoch": 0.97, "grad_norm": 0.7408876419067383, "learning_rate": 3.550081229897195e-08, "loss": 2.095, "step": 29264 }, { "epoch": 0.97, "grad_norm": 0.7327544093132019, "learning_rate": 3.541138613361694e-08, "loss": 2.0563, "step": 29265 }, { "epoch": 0.97, "grad_norm": 0.7415438890457153, "learning_rate": 3.53220725423431e-08, "loss": 2.0332, "step": 29266 }, { "epoch": 0.97, "grad_norm": 0.7405075430870056, "learning_rate": 3.523287152616073e-08, "loss": 2.0358, "step": 29267 }, { "epoch": 0.97, "grad_norm": 0.725394070148468, "learning_rate": 3.514378308607791e-08, "loss": 1.988, "step": 29268 }, { "epoch": 0.97, "grad_norm": 0.7278767824172974, "learning_rate": 3.5054807223100504e-08, "loss": 2.0266, "step": 29269 }, { "epoch": 0.97, "grad_norm": 0.7820540070533752, "learning_rate": 3.496594393823327e-08, "loss": 1.9844, "step": 29270 }, { "epoch": 0.97, "grad_norm": 0.7289568185806274, "learning_rate": 3.487719323248096e-08, "loss": 2.0419, "step": 29271 }, { "epoch": 0.97, "grad_norm": 0.7435545325279236, "learning_rate": 3.47885551068472e-08, "loss": 1.9952, "step": 29272 }, { "epoch": 0.97, "grad_norm": 0.7308414578437805, "learning_rate": 3.47000295623301e-08, "loss": 2.0122, "step": 29273 }, { "epoch": 0.97, "grad_norm": 0.7619144320487976, "learning_rate": 3.461161659993328e-08, "loss": 2.0502, "step": 29274 }, { "epoch": 0.97, "grad_norm": 0.7386350631713867, "learning_rate": 3.452331622065375e-08, "loss": 2.0249, "step": 29275 }, { "epoch": 0.97, "grad_norm": 0.7373751997947693, "learning_rate": 3.4435128425489575e-08, "loss": 2.0295, "step": 29276 }, { "epoch": 0.97, "grad_norm": 0.7175050973892212, "learning_rate": 3.434705321543663e-08, "loss": 2.0148, "step": 29277 }, { "epoch": 0.97, "grad_norm": 0.7567881345748901, "learning_rate": 3.4259090591490795e-08, "loss": 2.0143, "step": 29278 }, { "epoch": 0.97, "grad_norm": 0.7517544031143188, "learning_rate": 3.4171240554644604e-08, "loss": 2.0228, "step": 29279 }, { "epoch": 0.97, "grad_norm": 0.735106885433197, "learning_rate": 3.4083503105891705e-08, "loss": 2.0337, "step": 29280 }, { "epoch": 0.97, "grad_norm": 0.7352213859558105, "learning_rate": 3.3995878246222416e-08, "loss": 2.0319, "step": 29281 }, { "epoch": 0.97, "grad_norm": 0.7510189414024353, "learning_rate": 3.390836597662706e-08, "loss": 2.1108, "step": 29282 }, { "epoch": 0.97, "grad_norm": 0.745866060256958, "learning_rate": 3.382096629809373e-08, "loss": 1.9574, "step": 29283 }, { "epoch": 0.97, "grad_norm": 0.7446914315223694, "learning_rate": 3.373367921161164e-08, "loss": 2.0823, "step": 29284 }, { "epoch": 0.97, "grad_norm": 0.7464628219604492, "learning_rate": 3.364650471816444e-08, "loss": 2.0415, "step": 29285 }, { "epoch": 0.97, "grad_norm": 0.7358343601226807, "learning_rate": 3.355944281873913e-08, "loss": 2.0232, "step": 29286 }, { "epoch": 0.97, "grad_norm": 0.7484214901924133, "learning_rate": 3.347249351431714e-08, "loss": 2.1048, "step": 29287 }, { "epoch": 0.97, "grad_norm": 0.7285729050636292, "learning_rate": 3.3385656805883235e-08, "loss": 2.0389, "step": 29288 }, { "epoch": 0.97, "grad_norm": 0.7495244741439819, "learning_rate": 3.329893269441664e-08, "loss": 2.0387, "step": 29289 }, { "epoch": 0.97, "grad_norm": 0.7547352910041809, "learning_rate": 3.3212321180897676e-08, "loss": 1.9868, "step": 29290 }, { "epoch": 0.97, "grad_norm": 0.7827876210212708, "learning_rate": 3.312582226630445e-08, "loss": 2.0996, "step": 29291 }, { "epoch": 0.97, "grad_norm": 0.7101939916610718, "learning_rate": 3.303943595161507e-08, "loss": 2.0477, "step": 29292 }, { "epoch": 0.97, "grad_norm": 0.7370198369026184, "learning_rate": 3.295316223780432e-08, "loss": 2.019, "step": 29293 }, { "epoch": 0.97, "grad_norm": 0.7574487924575806, "learning_rate": 3.286700112584806e-08, "loss": 2.0207, "step": 29294 }, { "epoch": 0.97, "grad_norm": 0.7522162199020386, "learning_rate": 3.278095261671888e-08, "loss": 2.0353, "step": 29295 }, { "epoch": 0.97, "grad_norm": 0.7233257293701172, "learning_rate": 3.2695016711389304e-08, "loss": 2.0286, "step": 29296 }, { "epoch": 0.97, "grad_norm": 0.7328599691390991, "learning_rate": 3.260919341082969e-08, "loss": 2.0278, "step": 29297 }, { "epoch": 0.97, "grad_norm": 0.744659423828125, "learning_rate": 3.252348271601036e-08, "loss": 2.0223, "step": 29298 }, { "epoch": 0.97, "grad_norm": 0.7363348603248596, "learning_rate": 3.243788462789943e-08, "loss": 2.0274, "step": 29299 }, { "epoch": 0.97, "grad_norm": 0.7408971190452576, "learning_rate": 3.23523991474628e-08, "loss": 2.1354, "step": 29300 }, { "epoch": 0.97, "grad_norm": 0.747258722782135, "learning_rate": 3.226702627566747e-08, "loss": 2.0502, "step": 29301 }, { "epoch": 0.97, "grad_norm": 0.745252788066864, "learning_rate": 3.218176601347822e-08, "loss": 2.0378, "step": 29302 }, { "epoch": 0.97, "grad_norm": 0.7400323748588562, "learning_rate": 3.209661836185762e-08, "loss": 2.1078, "step": 29303 }, { "epoch": 0.97, "grad_norm": 0.7377060651779175, "learning_rate": 3.201158332176712e-08, "loss": 2.0405, "step": 29304 }, { "epoch": 0.97, "grad_norm": 0.7518259882926941, "learning_rate": 3.192666089416707e-08, "loss": 2.0257, "step": 29305 }, { "epoch": 0.98, "grad_norm": 0.7264400720596313, "learning_rate": 3.1841851080018915e-08, "loss": 1.9768, "step": 29306 }, { "epoch": 0.98, "grad_norm": 0.7252213358879089, "learning_rate": 3.175715388027967e-08, "loss": 2.0355, "step": 29307 }, { "epoch": 0.98, "grad_norm": 0.7488684058189392, "learning_rate": 3.167256929590634e-08, "loss": 1.9702, "step": 29308 }, { "epoch": 0.98, "grad_norm": 0.7420268654823303, "learning_rate": 3.158809732785373e-08, "loss": 2.032, "step": 29309 }, { "epoch": 0.98, "grad_norm": 0.7812830805778503, "learning_rate": 3.150373797707662e-08, "loss": 2.0282, "step": 29310 }, { "epoch": 0.98, "grad_norm": 0.7494191527366638, "learning_rate": 3.14194912445287e-08, "loss": 2.0459, "step": 29311 }, { "epoch": 0.98, "grad_norm": 0.7648168802261353, "learning_rate": 3.1335357131161424e-08, "loss": 2.1919, "step": 29312 }, { "epoch": 0.98, "grad_norm": 0.779164731502533, "learning_rate": 3.125133563792404e-08, "loss": 2.0307, "step": 29313 }, { "epoch": 0.98, "grad_norm": 0.7374753355979919, "learning_rate": 3.116742676576689e-08, "loss": 2.0529, "step": 29314 }, { "epoch": 0.98, "grad_norm": 0.7372316718101501, "learning_rate": 3.108363051563812e-08, "loss": 2.0014, "step": 29315 }, { "epoch": 0.98, "grad_norm": 0.7344589233398438, "learning_rate": 3.099994688848473e-08, "loss": 1.9612, "step": 29316 }, { "epoch": 0.98, "grad_norm": 0.7689493894577026, "learning_rate": 3.091637588525154e-08, "loss": 2.1096, "step": 29317 }, { "epoch": 0.98, "grad_norm": 0.776805579662323, "learning_rate": 3.083291750688222e-08, "loss": 2.0552, "step": 29318 }, { "epoch": 0.98, "grad_norm": 0.726927638053894, "learning_rate": 3.074957175431936e-08, "loss": 2.0349, "step": 29319 }, { "epoch": 0.98, "grad_norm": 0.8585315942764282, "learning_rate": 3.066633862850665e-08, "loss": 2.142, "step": 29320 }, { "epoch": 0.98, "grad_norm": 0.7576346397399902, "learning_rate": 3.058321813038223e-08, "loss": 2.0176, "step": 29321 }, { "epoch": 0.98, "grad_norm": 0.7717317938804626, "learning_rate": 3.0500210260886455e-08, "loss": 2.0725, "step": 29322 }, { "epoch": 0.98, "grad_norm": 0.7225966453552246, "learning_rate": 3.041731502095635e-08, "loss": 2.0448, "step": 29323 }, { "epoch": 0.98, "grad_norm": 0.7220733761787415, "learning_rate": 3.033453241152784e-08, "loss": 2.0348, "step": 29324 }, { "epoch": 0.98, "grad_norm": 0.7422890067100525, "learning_rate": 3.0251862433537946e-08, "loss": 2.0319, "step": 29325 }, { "epoch": 0.98, "grad_norm": 0.745943009853363, "learning_rate": 3.016930508792038e-08, "loss": 2.0937, "step": 29326 }, { "epoch": 0.98, "grad_norm": 0.7437798380851746, "learning_rate": 3.008686037560549e-08, "loss": 2.045, "step": 29327 }, { "epoch": 0.98, "grad_norm": 0.7862579226493835, "learning_rate": 3.000452829752698e-08, "loss": 2.0465, "step": 29328 }, { "epoch": 0.98, "grad_norm": 0.7073240876197815, "learning_rate": 2.992230885461411e-08, "loss": 2.0288, "step": 29329 }, { "epoch": 0.98, "grad_norm": 0.7497251033782959, "learning_rate": 2.984020204779725e-08, "loss": 2.0893, "step": 29330 }, { "epoch": 0.98, "grad_norm": 0.736870288848877, "learning_rate": 2.975820787800121e-08, "loss": 2.0346, "step": 29331 }, { "epoch": 0.98, "grad_norm": 0.7323058843612671, "learning_rate": 2.9676326346154137e-08, "loss": 2.0351, "step": 29332 }, { "epoch": 0.98, "grad_norm": 0.7193198204040527, "learning_rate": 2.959455745318085e-08, "loss": 2.0809, "step": 29333 }, { "epoch": 0.98, "grad_norm": 0.7655064463615417, "learning_rate": 2.9512901200005052e-08, "loss": 2.0029, "step": 29334 }, { "epoch": 0.98, "grad_norm": 0.7439627051353455, "learning_rate": 2.9431357587549336e-08, "loss": 2.0991, "step": 29335 }, { "epoch": 0.98, "grad_norm": 0.7420775294303894, "learning_rate": 2.934992661673408e-08, "loss": 2.0818, "step": 29336 }, { "epoch": 0.98, "grad_norm": 0.7294792532920837, "learning_rate": 2.926860828848077e-08, "loss": 2.0354, "step": 29337 }, { "epoch": 0.98, "grad_norm": 0.7574129700660706, "learning_rate": 2.918740260370756e-08, "loss": 2.0776, "step": 29338 }, { "epoch": 0.98, "grad_norm": 0.7367841005325317, "learning_rate": 2.9106309563331493e-08, "loss": 2.0132, "step": 29339 }, { "epoch": 0.98, "grad_norm": 0.7288132309913635, "learning_rate": 2.90253291682685e-08, "loss": 1.9739, "step": 29340 }, { "epoch": 0.98, "grad_norm": 0.7488398551940918, "learning_rate": 2.8944461419433413e-08, "loss": 2.0527, "step": 29341 }, { "epoch": 0.98, "grad_norm": 0.7898589372634888, "learning_rate": 2.8863706317739937e-08, "loss": 2.1155, "step": 29342 }, { "epoch": 0.98, "grad_norm": 0.7256209850311279, "learning_rate": 2.878306386410179e-08, "loss": 2.1007, "step": 29343 }, { "epoch": 0.98, "grad_norm": 0.7454590201377869, "learning_rate": 2.8702534059428245e-08, "loss": 1.9412, "step": 29344 }, { "epoch": 0.98, "grad_norm": 0.7770012617111206, "learning_rate": 2.8622116904629683e-08, "loss": 2.112, "step": 29345 }, { "epoch": 0.98, "grad_norm": 0.7592570185661316, "learning_rate": 2.8541812400615378e-08, "loss": 2.0265, "step": 29346 }, { "epoch": 0.98, "grad_norm": 0.7534446716308594, "learning_rate": 2.8461620548291268e-08, "loss": 2.0923, "step": 29347 }, { "epoch": 0.98, "grad_norm": 0.7456063628196716, "learning_rate": 2.83815413485633e-08, "loss": 2.0968, "step": 29348 }, { "epoch": 0.98, "grad_norm": 0.7405781149864197, "learning_rate": 2.8301574802337418e-08, "loss": 2.0849, "step": 29349 }, { "epoch": 0.98, "grad_norm": 0.7542093992233276, "learning_rate": 2.8221720910516227e-08, "loss": 2.0954, "step": 29350 }, { "epoch": 0.98, "grad_norm": 0.7183520793914795, "learning_rate": 2.814197967400234e-08, "loss": 2.0267, "step": 29351 }, { "epoch": 0.98, "grad_norm": 0.7541064620018005, "learning_rate": 2.806235109369504e-08, "loss": 2.0654, "step": 29352 }, { "epoch": 0.98, "grad_norm": 0.773010790348053, "learning_rate": 2.7982835170496936e-08, "loss": 2.0448, "step": 29353 }, { "epoch": 0.98, "grad_norm": 0.7693642377853394, "learning_rate": 2.790343190530287e-08, "loss": 2.0506, "step": 29354 }, { "epoch": 0.98, "grad_norm": 0.8021641373634338, "learning_rate": 2.782414129901323e-08, "loss": 2.0334, "step": 29355 }, { "epoch": 0.98, "grad_norm": 0.7557674050331116, "learning_rate": 2.7744963352521748e-08, "loss": 2.0602, "step": 29356 }, { "epoch": 0.98, "grad_norm": 0.7517495155334473, "learning_rate": 2.7665898066722153e-08, "loss": 1.9862, "step": 29357 }, { "epoch": 0.98, "grad_norm": 0.7144278883934021, "learning_rate": 2.758694544250928e-08, "loss": 2.0314, "step": 29358 }, { "epoch": 0.98, "grad_norm": 0.7326173186302185, "learning_rate": 2.750810548077576e-08, "loss": 1.9813, "step": 29359 }, { "epoch": 0.98, "grad_norm": 0.7637680768966675, "learning_rate": 2.742937818241087e-08, "loss": 2.0561, "step": 29360 }, { "epoch": 0.98, "grad_norm": 0.74089515209198, "learning_rate": 2.7350763548303904e-08, "loss": 2.0966, "step": 29361 }, { "epoch": 0.98, "grad_norm": 0.7455511689186096, "learning_rate": 2.727226157934304e-08, "loss": 2.0368, "step": 29362 }, { "epoch": 0.98, "grad_norm": 0.7451121211051941, "learning_rate": 2.7193872276416455e-08, "loss": 2.1327, "step": 29363 }, { "epoch": 0.98, "grad_norm": 0.7322178483009338, "learning_rate": 2.7115595640408997e-08, "loss": 2.0899, "step": 29364 }, { "epoch": 0.98, "grad_norm": 0.7536479830741882, "learning_rate": 2.7037431672204405e-08, "loss": 2.0056, "step": 29365 }, { "epoch": 0.98, "grad_norm": 0.7790051102638245, "learning_rate": 2.6959380372686416e-08, "loss": 2.0319, "step": 29366 }, { "epoch": 0.98, "grad_norm": 0.7282761335372925, "learning_rate": 2.6881441742735437e-08, "loss": 2.0594, "step": 29367 }, { "epoch": 0.98, "grad_norm": 0.7305344343185425, "learning_rate": 2.6803615783234094e-08, "loss": 1.9841, "step": 29368 }, { "epoch": 0.98, "grad_norm": 0.7390607595443726, "learning_rate": 2.6725902495060575e-08, "loss": 2.0526, "step": 29369 }, { "epoch": 0.98, "grad_norm": 0.7290567755699158, "learning_rate": 2.6648301879093063e-08, "loss": 2.0031, "step": 29370 }, { "epoch": 0.98, "grad_norm": 0.7620627284049988, "learning_rate": 2.657081393620753e-08, "loss": 2.0376, "step": 29371 }, { "epoch": 0.98, "grad_norm": 0.7454214096069336, "learning_rate": 2.6493438667281047e-08, "loss": 2.0847, "step": 29372 }, { "epoch": 0.98, "grad_norm": 0.7452623844146729, "learning_rate": 2.6416176073185142e-08, "loss": 2.0482, "step": 29373 }, { "epoch": 0.98, "grad_norm": 0.7562936544418335, "learning_rate": 2.6339026154795777e-08, "loss": 2.045, "step": 29374 }, { "epoch": 0.98, "grad_norm": 0.7706596255302429, "learning_rate": 2.626198891298226e-08, "loss": 2.0529, "step": 29375 }, { "epoch": 0.98, "grad_norm": 0.7458763122558594, "learning_rate": 2.6185064348615008e-08, "loss": 1.9987, "step": 29376 }, { "epoch": 0.98, "grad_norm": 0.7423750758171082, "learning_rate": 2.6108252462564433e-08, "loss": 2.0116, "step": 29377 }, { "epoch": 0.98, "grad_norm": 0.747506856918335, "learning_rate": 2.603155325569762e-08, "loss": 2.0382, "step": 29378 }, { "epoch": 0.98, "grad_norm": 0.7569146752357483, "learning_rate": 2.595496672888054e-08, "loss": 2.1183, "step": 29379 }, { "epoch": 0.98, "grad_norm": 0.7375648021697998, "learning_rate": 2.5878492882978057e-08, "loss": 1.9968, "step": 29380 }, { "epoch": 0.98, "grad_norm": 0.7698425054550171, "learning_rate": 2.580213171885615e-08, "loss": 2.0333, "step": 29381 }, { "epoch": 0.98, "grad_norm": 0.748769223690033, "learning_rate": 2.572588323737635e-08, "loss": 2.0137, "step": 29382 }, { "epoch": 0.98, "grad_norm": 0.7573413848876953, "learning_rate": 2.5649747439399076e-08, "loss": 1.9936, "step": 29383 }, { "epoch": 0.98, "grad_norm": 0.7400423288345337, "learning_rate": 2.5573724325785864e-08, "loss": 2.0659, "step": 29384 }, { "epoch": 0.98, "grad_norm": 0.7357686161994934, "learning_rate": 2.549781389739603e-08, "loss": 2.0694, "step": 29385 }, { "epoch": 0.98, "grad_norm": 0.7538254261016846, "learning_rate": 2.5422016155085548e-08, "loss": 2.0528, "step": 29386 }, { "epoch": 0.98, "grad_norm": 0.7565253376960754, "learning_rate": 2.5346331099711518e-08, "loss": 2.0526, "step": 29387 }, { "epoch": 0.98, "grad_norm": 0.7397695183753967, "learning_rate": 2.527075873212881e-08, "loss": 2.1293, "step": 29388 }, { "epoch": 0.98, "grad_norm": 0.7330043911933899, "learning_rate": 2.519529905319118e-08, "loss": 2.1158, "step": 29389 }, { "epoch": 0.98, "grad_norm": 0.7262603640556335, "learning_rate": 2.5119952063751283e-08, "loss": 1.9945, "step": 29390 }, { "epoch": 0.98, "grad_norm": 0.7386509776115417, "learning_rate": 2.5044717764660663e-08, "loss": 2.0188, "step": 29391 }, { "epoch": 0.98, "grad_norm": 0.7373019456863403, "learning_rate": 2.4969596156768638e-08, "loss": 2.0441, "step": 29392 }, { "epoch": 0.98, "grad_norm": 0.7476633191108704, "learning_rate": 2.4894587240923417e-08, "loss": 2.003, "step": 29393 }, { "epoch": 0.98, "grad_norm": 0.7162723541259766, "learning_rate": 2.4819691017974324e-08, "loss": 2.0466, "step": 29394 }, { "epoch": 0.98, "grad_norm": 0.7529038786888123, "learning_rate": 2.4744907488766235e-08, "loss": 2.009, "step": 29395 }, { "epoch": 0.98, "grad_norm": 0.7432703375816345, "learning_rate": 2.467023665414403e-08, "loss": 1.9579, "step": 29396 }, { "epoch": 0.98, "grad_norm": 0.7757458090782166, "learning_rate": 2.459567851495148e-08, "loss": 2.0704, "step": 29397 }, { "epoch": 0.98, "grad_norm": 0.760806679725647, "learning_rate": 2.4521233072030136e-08, "loss": 2.0925, "step": 29398 }, { "epoch": 0.98, "grad_norm": 0.7359606623649597, "learning_rate": 2.4446900326221546e-08, "loss": 2.0926, "step": 29399 }, { "epoch": 0.98, "grad_norm": 0.7385281920433044, "learning_rate": 2.4372680278366145e-08, "loss": 2.0881, "step": 29400 }, { "epoch": 0.98, "grad_norm": 0.7395015358924866, "learning_rate": 2.4298572929302154e-08, "loss": 2.0404, "step": 29401 }, { "epoch": 0.98, "grad_norm": 0.7336463332176208, "learning_rate": 2.4224578279866685e-08, "loss": 2.0498, "step": 29402 }, { "epoch": 0.98, "grad_norm": 0.7778065800666809, "learning_rate": 2.415069633089462e-08, "loss": 2.0837, "step": 29403 }, { "epoch": 0.98, "grad_norm": 0.7437312006950378, "learning_rate": 2.4076927083221958e-08, "loss": 2.0579, "step": 29404 }, { "epoch": 0.98, "grad_norm": 0.7457354664802551, "learning_rate": 2.4003270537682476e-08, "loss": 2.0958, "step": 29405 }, { "epoch": 0.98, "grad_norm": 0.7468243837356567, "learning_rate": 2.3929726695106625e-08, "loss": 2.0544, "step": 29406 }, { "epoch": 0.98, "grad_norm": 0.7298638224601746, "learning_rate": 2.385629555632707e-08, "loss": 1.9889, "step": 29407 }, { "epoch": 0.98, "grad_norm": 0.7524846196174622, "learning_rate": 2.3782977122170925e-08, "loss": 1.9358, "step": 29408 }, { "epoch": 0.98, "grad_norm": 0.7457708120346069, "learning_rate": 2.370977139346975e-08, "loss": 2.049, "step": 29409 }, { "epoch": 0.98, "grad_norm": 0.7436990141868591, "learning_rate": 2.363667837104844e-08, "loss": 2.0708, "step": 29410 }, { "epoch": 0.98, "grad_norm": 0.8649788498878479, "learning_rate": 2.3563698055732996e-08, "loss": 2.0458, "step": 29411 }, { "epoch": 0.98, "grad_norm": 0.7574883103370667, "learning_rate": 2.3490830448347212e-08, "loss": 2.035, "step": 29412 }, { "epoch": 0.98, "grad_norm": 0.7400567531585693, "learning_rate": 2.341807554971598e-08, "loss": 2.0675, "step": 29413 }, { "epoch": 0.98, "grad_norm": 0.7336089611053467, "learning_rate": 2.3345433360660862e-08, "loss": 2.0558, "step": 29414 }, { "epoch": 0.98, "grad_norm": 0.7311110496520996, "learning_rate": 2.3272903882002317e-08, "loss": 1.9729, "step": 29415 }, { "epoch": 0.98, "grad_norm": 0.7742444276809692, "learning_rate": 2.3200487114558578e-08, "loss": 1.9891, "step": 29416 }, { "epoch": 0.98, "grad_norm": 0.7852016687393188, "learning_rate": 2.31281830591501e-08, "loss": 1.9626, "step": 29417 }, { "epoch": 0.98, "grad_norm": 0.749605655670166, "learning_rate": 2.3055991716590676e-08, "loss": 2.0609, "step": 29418 }, { "epoch": 0.98, "grad_norm": 0.7289119362831116, "learning_rate": 2.298391308769965e-08, "loss": 2.0068, "step": 29419 }, { "epoch": 0.98, "grad_norm": 0.7433663010597229, "learning_rate": 2.2911947173287486e-08, "loss": 2.0569, "step": 29420 }, { "epoch": 0.98, "grad_norm": 0.750966489315033, "learning_rate": 2.2840093974169087e-08, "loss": 2.0608, "step": 29421 }, { "epoch": 0.98, "grad_norm": 0.7541066408157349, "learning_rate": 2.2768353491157136e-08, "loss": 2.0828, "step": 29422 }, { "epoch": 0.98, "grad_norm": 0.7495532035827637, "learning_rate": 2.2696725725060987e-08, "loss": 2.0623, "step": 29423 }, { "epoch": 0.98, "grad_norm": 0.7380034923553467, "learning_rate": 2.262521067668888e-08, "loss": 2.0021, "step": 29424 }, { "epoch": 0.98, "grad_norm": 0.8025142550468445, "learning_rate": 2.255380834685128e-08, "loss": 1.9405, "step": 29425 }, { "epoch": 0.98, "grad_norm": 0.7490605711936951, "learning_rate": 2.2482518736351988e-08, "loss": 2.0364, "step": 29426 }, { "epoch": 0.98, "grad_norm": 0.794539213180542, "learning_rate": 2.2411341845999247e-08, "loss": 2.084, "step": 29427 }, { "epoch": 0.98, "grad_norm": 0.7571232318878174, "learning_rate": 2.2340277676594634e-08, "loss": 2.0391, "step": 29428 }, { "epoch": 0.98, "grad_norm": 0.7575921416282654, "learning_rate": 2.2269326228941955e-08, "loss": 2.0379, "step": 29429 }, { "epoch": 0.98, "grad_norm": 0.729982852935791, "learning_rate": 2.21984875038439e-08, "loss": 2.0477, "step": 29430 }, { "epoch": 0.98, "grad_norm": 0.7419064044952393, "learning_rate": 2.2127761502098722e-08, "loss": 2.057, "step": 29431 }, { "epoch": 0.98, "grad_norm": 0.7254217267036438, "learning_rate": 2.2057148224507996e-08, "loss": 2.0505, "step": 29432 }, { "epoch": 0.98, "grad_norm": 0.7280661463737488, "learning_rate": 2.1986647671866647e-08, "loss": 2.0061, "step": 29433 }, { "epoch": 0.98, "grad_norm": 0.7255533337593079, "learning_rate": 2.191625984497181e-08, "loss": 2.0489, "step": 29434 }, { "epoch": 0.98, "grad_norm": 0.7380592823028564, "learning_rate": 2.184598474462063e-08, "loss": 2.0897, "step": 29435 }, { "epoch": 0.98, "grad_norm": 0.7484970688819885, "learning_rate": 2.1775822371605803e-08, "loss": 2.0594, "step": 29436 }, { "epoch": 0.98, "grad_norm": 0.7230631113052368, "learning_rate": 2.170577272671892e-08, "loss": 2.0514, "step": 29437 }, { "epoch": 0.98, "grad_norm": 0.7763804197311401, "learning_rate": 2.163583581075379e-08, "loss": 2.0119, "step": 29438 }, { "epoch": 0.98, "grad_norm": 0.7494169473648071, "learning_rate": 2.1566011624497562e-08, "loss": 2.0406, "step": 29439 }, { "epoch": 0.98, "grad_norm": 0.7479962706565857, "learning_rate": 2.1496300168741823e-08, "loss": 2.085, "step": 29440 }, { "epoch": 0.98, "grad_norm": 0.7252236008644104, "learning_rate": 2.1426701444272614e-08, "loss": 1.983, "step": 29441 }, { "epoch": 0.98, "grad_norm": 0.7312276363372803, "learning_rate": 2.1357215451875967e-08, "loss": 2.083, "step": 29442 }, { "epoch": 0.98, "grad_norm": 0.7349491119384766, "learning_rate": 2.1287842192337926e-08, "loss": 2.0301, "step": 29443 }, { "epoch": 0.98, "grad_norm": 0.752582848072052, "learning_rate": 2.1218581666441197e-08, "loss": 1.9869, "step": 29444 }, { "epoch": 0.98, "grad_norm": 0.7455941438674927, "learning_rate": 2.1149433874969595e-08, "loss": 2.0186, "step": 29445 }, { "epoch": 0.98, "grad_norm": 0.7596490383148193, "learning_rate": 2.1080398818702496e-08, "loss": 2.0746, "step": 29446 }, { "epoch": 0.98, "grad_norm": 0.7599145770072937, "learning_rate": 2.10114764984215e-08, "loss": 2.0093, "step": 29447 }, { "epoch": 0.98, "grad_norm": 0.7501655220985413, "learning_rate": 2.0942666914904874e-08, "loss": 2.0748, "step": 29448 }, { "epoch": 0.98, "grad_norm": 0.7493268847465515, "learning_rate": 2.0873970068928662e-08, "loss": 2.0496, "step": 29449 }, { "epoch": 0.98, "grad_norm": 0.7420281767845154, "learning_rate": 2.080538596127113e-08, "loss": 2.0445, "step": 29450 }, { "epoch": 0.98, "grad_norm": 0.7468722462654114, "learning_rate": 2.0736914592704993e-08, "loss": 2.0641, "step": 29451 }, { "epoch": 0.98, "grad_norm": 0.7560680508613586, "learning_rate": 2.0668555964005188e-08, "loss": 2.0356, "step": 29452 }, { "epoch": 0.98, "grad_norm": 0.7382835745811462, "learning_rate": 2.060031007594443e-08, "loss": 1.9545, "step": 29453 }, { "epoch": 0.98, "grad_norm": 0.7260236144065857, "learning_rate": 2.0532176929292103e-08, "loss": 1.9785, "step": 29454 }, { "epoch": 0.98, "grad_norm": 0.7342872619628906, "learning_rate": 2.04641565248187e-08, "loss": 2.0301, "step": 29455 }, { "epoch": 0.98, "grad_norm": 0.7518648505210876, "learning_rate": 2.03962488632925e-08, "loss": 2.0148, "step": 29456 }, { "epoch": 0.98, "grad_norm": 0.7374304533004761, "learning_rate": 2.0328453945480662e-08, "loss": 2.0239, "step": 29457 }, { "epoch": 0.98, "grad_norm": 0.7572942972183228, "learning_rate": 2.0260771772150356e-08, "loss": 2.0707, "step": 29458 }, { "epoch": 0.98, "grad_norm": 0.7302369475364685, "learning_rate": 2.01932023440643e-08, "loss": 2.0184, "step": 29459 }, { "epoch": 0.98, "grad_norm": 0.746562659740448, "learning_rate": 2.012574566198633e-08, "loss": 1.9963, "step": 29460 }, { "epoch": 0.98, "grad_norm": 0.7334021329879761, "learning_rate": 2.0058401726679166e-08, "loss": 2.0358, "step": 29461 }, { "epoch": 0.98, "grad_norm": 0.7479128241539001, "learning_rate": 1.9991170538904425e-08, "loss": 2.0132, "step": 29462 }, { "epoch": 0.98, "grad_norm": 0.7861015200614929, "learning_rate": 1.9924052099419278e-08, "loss": 2.0161, "step": 29463 }, { "epoch": 0.98, "grad_norm": 0.7479234933853149, "learning_rate": 1.9857046408984226e-08, "loss": 2.0371, "step": 29464 }, { "epoch": 0.98, "grad_norm": 0.7484697699546814, "learning_rate": 1.9790153468354222e-08, "loss": 2.0438, "step": 29465 }, { "epoch": 0.98, "grad_norm": 0.7572760581970215, "learning_rate": 1.972337327828755e-08, "loss": 2.0907, "step": 29466 }, { "epoch": 0.98, "grad_norm": 0.7560367584228516, "learning_rate": 1.965670583953583e-08, "loss": 2.0687, "step": 29467 }, { "epoch": 0.98, "grad_norm": 0.74394291639328, "learning_rate": 1.9590151152854008e-08, "loss": 2.0257, "step": 29468 }, { "epoch": 0.98, "grad_norm": 0.7362436652183533, "learning_rate": 1.9523709218993715e-08, "loss": 2.1362, "step": 29469 }, { "epoch": 0.98, "grad_norm": 0.7707787752151489, "learning_rate": 1.9457380038705454e-08, "loss": 2.1186, "step": 29470 }, { "epoch": 0.98, "grad_norm": 0.7447287440299988, "learning_rate": 1.939116361273863e-08, "loss": 2.0038, "step": 29471 }, { "epoch": 0.98, "grad_norm": 0.7444043159484863, "learning_rate": 1.9325059941840417e-08, "loss": 1.9613, "step": 29472 }, { "epoch": 0.98, "grad_norm": 0.7259814739227295, "learning_rate": 1.9259069026760225e-08, "loss": 2.0658, "step": 29473 }, { "epoch": 0.98, "grad_norm": 0.7391754388809204, "learning_rate": 1.9193190868239674e-08, "loss": 2.0075, "step": 29474 }, { "epoch": 0.98, "grad_norm": 0.7758517861366272, "learning_rate": 1.9127425467025952e-08, "loss": 2.0041, "step": 29475 }, { "epoch": 0.98, "grad_norm": 0.7531094551086426, "learning_rate": 1.906177282386179e-08, "loss": 2.0881, "step": 29476 }, { "epoch": 0.98, "grad_norm": 0.7357012629508972, "learning_rate": 1.8996232939487713e-08, "loss": 2.0042, "step": 29477 }, { "epoch": 0.98, "grad_norm": 0.746687114238739, "learning_rate": 1.893080581464535e-08, "loss": 2.0724, "step": 29478 }, { "epoch": 0.98, "grad_norm": 0.7484965324401855, "learning_rate": 1.8865491450072993e-08, "loss": 2.0544, "step": 29479 }, { "epoch": 0.98, "grad_norm": 0.7309772968292236, "learning_rate": 1.8800289846508947e-08, "loss": 2.0621, "step": 29480 }, { "epoch": 0.98, "grad_norm": 0.7399543523788452, "learning_rate": 1.8735201004689287e-08, "loss": 2.0533, "step": 29481 }, { "epoch": 0.98, "grad_norm": 0.7407483458518982, "learning_rate": 1.8670224925348978e-08, "loss": 2.0458, "step": 29482 }, { "epoch": 0.98, "grad_norm": 0.7825150489807129, "learning_rate": 1.8605361609224106e-08, "loss": 2.0457, "step": 29483 }, { "epoch": 0.98, "grad_norm": 0.7484698295593262, "learning_rate": 1.854061105704519e-08, "loss": 2.0085, "step": 29484 }, { "epoch": 0.98, "grad_norm": 0.7498574256896973, "learning_rate": 1.8475973269546088e-08, "loss": 2.0966, "step": 29485 }, { "epoch": 0.98, "grad_norm": 0.7407478094100952, "learning_rate": 1.841144824745511e-08, "loss": 2.0304, "step": 29486 }, { "epoch": 0.98, "grad_norm": 0.7292353510856628, "learning_rate": 1.834703599150056e-08, "loss": 2.0005, "step": 29487 }, { "epoch": 0.98, "grad_norm": 0.7498602271080017, "learning_rate": 1.8282736502412968e-08, "loss": 2.0751, "step": 29488 }, { "epoch": 0.98, "grad_norm": 0.7229365706443787, "learning_rate": 1.8218549780917305e-08, "loss": 2.0404, "step": 29489 }, { "epoch": 0.98, "grad_norm": 0.7302898168563843, "learning_rate": 1.8154475827737438e-08, "loss": 2.0191, "step": 29490 }, { "epoch": 0.98, "grad_norm": 0.7422252297401428, "learning_rate": 1.8090514643598345e-08, "loss": 2.0666, "step": 29491 }, { "epoch": 0.98, "grad_norm": 0.7347898483276367, "learning_rate": 1.8026666229222778e-08, "loss": 2.0146, "step": 29492 }, { "epoch": 0.98, "grad_norm": 0.7506165504455566, "learning_rate": 1.796293058533238e-08, "loss": 2.0602, "step": 29493 }, { "epoch": 0.98, "grad_norm": 0.7565147876739502, "learning_rate": 1.7899307712646587e-08, "loss": 2.0605, "step": 29494 }, { "epoch": 0.98, "grad_norm": 0.740929126739502, "learning_rate": 1.783579761188481e-08, "loss": 2.0986, "step": 29495 }, { "epoch": 0.98, "grad_norm": 0.8062602281570435, "learning_rate": 1.7772400283763148e-08, "loss": 2.0244, "step": 29496 }, { "epoch": 0.98, "grad_norm": 0.7222066521644592, "learning_rate": 1.7709115728999915e-08, "loss": 2.0052, "step": 29497 }, { "epoch": 0.98, "grad_norm": 0.7307189702987671, "learning_rate": 1.7645943948307875e-08, "loss": 2.0226, "step": 29498 }, { "epoch": 0.98, "grad_norm": 0.7577928304672241, "learning_rate": 1.7582884942402013e-08, "loss": 2.0333, "step": 29499 }, { "epoch": 0.98, "grad_norm": 0.7576102018356323, "learning_rate": 1.751993871199398e-08, "loss": 2.0079, "step": 29500 }, { "epoch": 0.98, "grad_norm": 0.7542493343353271, "learning_rate": 1.7457105257796535e-08, "loss": 2.0701, "step": 29501 }, { "epoch": 0.98, "grad_norm": 0.7300220131874084, "learning_rate": 1.7394384580518008e-08, "loss": 2.0531, "step": 29502 }, { "epoch": 0.98, "grad_norm": 0.7652482986450195, "learning_rate": 1.733177668086672e-08, "loss": 2.0727, "step": 29503 }, { "epoch": 0.98, "grad_norm": 0.7316184639930725, "learning_rate": 1.7269281559550988e-08, "loss": 2.0036, "step": 29504 }, { "epoch": 0.98, "grad_norm": 0.7651429772377014, "learning_rate": 1.7206899217275807e-08, "loss": 2.0291, "step": 29505 }, { "epoch": 0.98, "grad_norm": 0.7546770572662354, "learning_rate": 1.714462965474728e-08, "loss": 2.0266, "step": 29506 }, { "epoch": 0.98, "grad_norm": 0.7330334782600403, "learning_rate": 1.7082472872668176e-08, "loss": 2.0851, "step": 29507 }, { "epoch": 0.98, "grad_norm": 0.7222148180007935, "learning_rate": 1.702042887174127e-08, "loss": 2.0723, "step": 29508 }, { "epoch": 0.98, "grad_norm": 0.7068103551864624, "learning_rate": 1.6958497652665996e-08, "loss": 2.0503, "step": 29509 }, { "epoch": 0.98, "grad_norm": 0.7782090306282043, "learning_rate": 1.689667921614291e-08, "loss": 2.0464, "step": 29510 }, { "epoch": 0.98, "grad_norm": 0.7475563883781433, "learning_rate": 1.683497356287145e-08, "loss": 2.0938, "step": 29511 }, { "epoch": 0.98, "grad_norm": 0.7553049325942993, "learning_rate": 1.6773380693546616e-08, "loss": 2.118, "step": 29512 }, { "epoch": 0.98, "grad_norm": 0.7494302988052368, "learning_rate": 1.6711900608865628e-08, "loss": 2.0732, "step": 29513 }, { "epoch": 0.98, "grad_norm": 0.7336648106575012, "learning_rate": 1.6650533309523486e-08, "loss": 2.025, "step": 29514 }, { "epoch": 0.98, "grad_norm": 0.749219536781311, "learning_rate": 1.6589278796212972e-08, "loss": 1.981, "step": 29515 }, { "epoch": 0.98, "grad_norm": 0.7438738346099854, "learning_rate": 1.652813706962464e-08, "loss": 2.1071, "step": 29516 }, { "epoch": 0.98, "grad_norm": 0.7481508255004883, "learning_rate": 1.646710813045127e-08, "loss": 1.9306, "step": 29517 }, { "epoch": 0.98, "grad_norm": 0.7123644351959229, "learning_rate": 1.6406191979382315e-08, "loss": 2.0349, "step": 29518 }, { "epoch": 0.98, "grad_norm": 0.7726489901542664, "learning_rate": 1.6345388617103885e-08, "loss": 2.0378, "step": 29519 }, { "epoch": 0.98, "grad_norm": 0.7686692476272583, "learning_rate": 1.6284698044305434e-08, "loss": 1.9719, "step": 29520 }, { "epoch": 0.98, "grad_norm": 0.7811552286148071, "learning_rate": 1.6224120261670863e-08, "loss": 2.0849, "step": 29521 }, { "epoch": 0.98, "grad_norm": 0.7561351656913757, "learning_rate": 1.616365526988517e-08, "loss": 2.1096, "step": 29522 }, { "epoch": 0.98, "grad_norm": 0.7295064330101013, "learning_rate": 1.610330306963115e-08, "loss": 1.9981, "step": 29523 }, { "epoch": 0.98, "grad_norm": 0.7390469312667847, "learning_rate": 1.6043063661591584e-08, "loss": 2.0026, "step": 29524 }, { "epoch": 0.98, "grad_norm": 0.7628945112228394, "learning_rate": 1.5982937046444823e-08, "loss": 2.0456, "step": 29525 }, { "epoch": 0.98, "grad_norm": 0.7747546434402466, "learning_rate": 1.5922923224872545e-08, "loss": 2.0321, "step": 29526 }, { "epoch": 0.98, "grad_norm": 0.739540696144104, "learning_rate": 1.586302219755198e-08, "loss": 2.0125, "step": 29527 }, { "epoch": 0.98, "grad_norm": 0.7367304563522339, "learning_rate": 1.580323396515926e-08, "loss": 2.0717, "step": 29528 }, { "epoch": 0.98, "grad_norm": 0.7799481749534607, "learning_rate": 1.5743558528371615e-08, "loss": 2.1441, "step": 29529 }, { "epoch": 0.98, "grad_norm": 0.7562848329544067, "learning_rate": 1.5683995887860738e-08, "loss": 2.005, "step": 29530 }, { "epoch": 0.98, "grad_norm": 0.7527329921722412, "learning_rate": 1.5624546044300525e-08, "loss": 2.0239, "step": 29531 }, { "epoch": 0.98, "grad_norm": 0.7479802370071411, "learning_rate": 1.556520899836378e-08, "loss": 2.032, "step": 29532 }, { "epoch": 0.98, "grad_norm": 0.7515600323677063, "learning_rate": 1.550598475071885e-08, "loss": 2.0218, "step": 29533 }, { "epoch": 0.98, "grad_norm": 0.7401430010795593, "learning_rate": 1.5446873302036314e-08, "loss": 2.0931, "step": 29534 }, { "epoch": 0.98, "grad_norm": 0.7397017478942871, "learning_rate": 1.5387874652983415e-08, "loss": 2.0383, "step": 29535 }, { "epoch": 0.98, "grad_norm": 0.7486613988876343, "learning_rate": 1.5328988804226285e-08, "loss": 2.0738, "step": 29536 }, { "epoch": 0.98, "grad_norm": 0.7537353038787842, "learning_rate": 1.527021575643106e-08, "loss": 2.0808, "step": 29537 }, { "epoch": 0.98, "grad_norm": 0.7955949902534485, "learning_rate": 1.5211555510261655e-08, "loss": 2.0531, "step": 29538 }, { "epoch": 0.98, "grad_norm": 0.7563859820365906, "learning_rate": 1.515300806637976e-08, "loss": 2.0007, "step": 29539 }, { "epoch": 0.98, "grad_norm": 0.7405341863632202, "learning_rate": 1.509457342544818e-08, "loss": 2.0667, "step": 29540 }, { "epoch": 0.98, "grad_norm": 0.7750539183616638, "learning_rate": 1.5036251588125274e-08, "loss": 2.1104, "step": 29541 }, { "epoch": 0.98, "grad_norm": 0.7347989678382874, "learning_rate": 1.4978042555072734e-08, "loss": 2.0539, "step": 29542 }, { "epoch": 0.98, "grad_norm": 0.7428281903266907, "learning_rate": 1.49199463269456e-08, "loss": 2.0465, "step": 29543 }, { "epoch": 0.98, "grad_norm": 0.7458774447441101, "learning_rate": 1.4861962904401118e-08, "loss": 2.1313, "step": 29544 }, { "epoch": 0.98, "grad_norm": 0.7424444556236267, "learning_rate": 1.4804092288093208e-08, "loss": 2.0041, "step": 29545 }, { "epoch": 0.98, "grad_norm": 0.7710654139518738, "learning_rate": 1.4746334478678014e-08, "loss": 2.0339, "step": 29546 }, { "epoch": 0.98, "grad_norm": 0.7892676591873169, "learning_rate": 1.4688689476807239e-08, "loss": 2.006, "step": 29547 }, { "epoch": 0.98, "grad_norm": 0.7307862043380737, "learning_rate": 1.4631157283130359e-08, "loss": 2.0476, "step": 29548 }, { "epoch": 0.98, "grad_norm": 0.742354154586792, "learning_rate": 1.4573737898299078e-08, "loss": 2.0345, "step": 29549 }, { "epoch": 0.98, "grad_norm": 0.7802332639694214, "learning_rate": 1.4516431322961765e-08, "loss": 2.0298, "step": 29550 }, { "epoch": 0.98, "grad_norm": 0.7733453512191772, "learning_rate": 1.4459237557766792e-08, "loss": 2.0846, "step": 29551 }, { "epoch": 0.98, "grad_norm": 0.7710857391357422, "learning_rate": 1.4402156603358086e-08, "loss": 2.0037, "step": 29552 }, { "epoch": 0.98, "grad_norm": 0.7386457324028015, "learning_rate": 1.4345188460381798e-08, "loss": 2.0342, "step": 29553 }, { "epoch": 0.98, "grad_norm": 0.7242453098297119, "learning_rate": 1.4288333129481857e-08, "loss": 2.0362, "step": 29554 }, { "epoch": 0.98, "grad_norm": 0.7186710834503174, "learning_rate": 1.4231590611299972e-08, "loss": 1.9885, "step": 29555 }, { "epoch": 0.98, "grad_norm": 0.7374231219291687, "learning_rate": 1.4174960906476742e-08, "loss": 2.0945, "step": 29556 }, { "epoch": 0.98, "grad_norm": 0.7453835010528564, "learning_rate": 1.4118444015652766e-08, "loss": 2.0179, "step": 29557 }, { "epoch": 0.98, "grad_norm": 0.8274710178375244, "learning_rate": 1.4062039939466421e-08, "loss": 2.0576, "step": 29558 }, { "epoch": 0.98, "grad_norm": 0.7202733755111694, "learning_rate": 1.4005748678554976e-08, "loss": 1.9881, "step": 29559 }, { "epoch": 0.98, "grad_norm": 0.7747653126716614, "learning_rate": 1.3949570233553478e-08, "loss": 2.0566, "step": 29560 }, { "epoch": 0.98, "grad_norm": 0.7432642579078674, "learning_rate": 1.3893504605098086e-08, "loss": 2.0375, "step": 29561 }, { "epoch": 0.98, "grad_norm": 0.7311941385269165, "learning_rate": 1.3837551793821625e-08, "loss": 2.0504, "step": 29562 }, { "epoch": 0.98, "grad_norm": 0.7494640350341797, "learning_rate": 1.3781711800355813e-08, "loss": 2.0431, "step": 29563 }, { "epoch": 0.98, "grad_norm": 0.7465858459472656, "learning_rate": 1.3725984625332367e-08, "loss": 2.0663, "step": 29564 }, { "epoch": 0.98, "grad_norm": 0.7584214210510254, "learning_rate": 1.3670370269378563e-08, "loss": 1.9609, "step": 29565 }, { "epoch": 0.98, "grad_norm": 0.7568184733390808, "learning_rate": 1.3614868733125008e-08, "loss": 2.0191, "step": 29566 }, { "epoch": 0.98, "grad_norm": 0.7417550086975098, "learning_rate": 1.3559480017198978e-08, "loss": 2.0459, "step": 29567 }, { "epoch": 0.98, "grad_norm": 0.7618486285209656, "learning_rate": 1.3504204122224418e-08, "loss": 2.1306, "step": 29568 }, { "epoch": 0.98, "grad_norm": 0.7364776730537415, "learning_rate": 1.3449041048827494e-08, "loss": 2.0277, "step": 29569 }, { "epoch": 0.98, "grad_norm": 0.7382826209068298, "learning_rate": 1.339399079762993e-08, "loss": 2.0747, "step": 29570 }, { "epoch": 0.98, "grad_norm": 0.7807106971740723, "learning_rate": 1.3339053369254562e-08, "loss": 2.0417, "step": 29571 }, { "epoch": 0.98, "grad_norm": 0.725170910358429, "learning_rate": 1.3284228764320895e-08, "loss": 2.1039, "step": 29572 }, { "epoch": 0.98, "grad_norm": 0.7597686052322388, "learning_rate": 1.3229516983450652e-08, "loss": 2.0164, "step": 29573 }, { "epoch": 0.98, "grad_norm": 0.739107608795166, "learning_rate": 1.317491802726001e-08, "loss": 2.0248, "step": 29574 }, { "epoch": 0.98, "grad_norm": 0.7512032985687256, "learning_rate": 1.3120431896366248e-08, "loss": 1.9587, "step": 29575 }, { "epoch": 0.98, "grad_norm": 0.7588019371032715, "learning_rate": 1.3066058591384434e-08, "loss": 2.1234, "step": 29576 }, { "epoch": 0.98, "grad_norm": 0.7493727803230286, "learning_rate": 1.301179811293074e-08, "loss": 2.0889, "step": 29577 }, { "epoch": 0.98, "grad_norm": 0.7338868975639343, "learning_rate": 1.295765046161579e-08, "loss": 1.9625, "step": 29578 }, { "epoch": 0.98, "grad_norm": 0.7567895650863647, "learning_rate": 1.2903615638051315e-08, "loss": 2.0153, "step": 29579 }, { "epoch": 0.98, "grad_norm": 0.7556126117706299, "learning_rate": 1.2849693642850159e-08, "loss": 2.0444, "step": 29580 }, { "epoch": 0.98, "grad_norm": 0.7612341046333313, "learning_rate": 1.2795884476619613e-08, "loss": 2.0805, "step": 29581 }, { "epoch": 0.98, "grad_norm": 0.7357383966445923, "learning_rate": 1.2742188139969191e-08, "loss": 2.0411, "step": 29582 }, { "epoch": 0.98, "grad_norm": 0.7807154059410095, "learning_rate": 1.2688604633503965e-08, "loss": 2.0683, "step": 29583 }, { "epoch": 0.98, "grad_norm": 0.7407559156417847, "learning_rate": 1.2635133957829004e-08, "loss": 2.0471, "step": 29584 }, { "epoch": 0.98, "grad_norm": 0.7420535683631897, "learning_rate": 1.2581776113549382e-08, "loss": 2.0779, "step": 29585 }, { "epoch": 0.98, "grad_norm": 0.7476972937583923, "learning_rate": 1.2528531101267949e-08, "loss": 2.0873, "step": 29586 }, { "epoch": 0.98, "grad_norm": 0.7247859835624695, "learning_rate": 1.2475398921585335e-08, "loss": 2.0219, "step": 29587 }, { "epoch": 0.98, "grad_norm": 0.7406803965568542, "learning_rate": 1.2422379575102172e-08, "loss": 1.9931, "step": 29588 }, { "epoch": 0.98, "grad_norm": 0.7296877503395081, "learning_rate": 1.236947306241909e-08, "loss": 2.0133, "step": 29589 }, { "epoch": 0.98, "grad_norm": 0.7596883177757263, "learning_rate": 1.231667938413117e-08, "loss": 1.9814, "step": 29590 }, { "epoch": 0.98, "grad_norm": 0.7269588708877563, "learning_rate": 1.226399854083682e-08, "loss": 2.0812, "step": 29591 }, { "epoch": 0.98, "grad_norm": 0.7719945311546326, "learning_rate": 1.2211430533130009e-08, "loss": 2.0562, "step": 29592 }, { "epoch": 0.98, "grad_norm": 0.758465051651001, "learning_rate": 1.2158975361604708e-08, "loss": 2.0015, "step": 29593 }, { "epoch": 0.98, "grad_norm": 0.7640414237976074, "learning_rate": 1.2106633026854886e-08, "loss": 2.0533, "step": 29594 }, { "epoch": 0.98, "grad_norm": 0.7794142961502075, "learning_rate": 1.205440352947007e-08, "loss": 2.0934, "step": 29595 }, { "epoch": 0.98, "grad_norm": 0.7244154810905457, "learning_rate": 1.2002286870040902e-08, "loss": 2.0317, "step": 29596 }, { "epoch": 0.98, "grad_norm": 0.7503039240837097, "learning_rate": 1.1950283049156907e-08, "loss": 2.0845, "step": 29597 }, { "epoch": 0.98, "grad_norm": 0.7408627271652222, "learning_rate": 1.1898392067404286e-08, "loss": 2.1072, "step": 29598 }, { "epoch": 0.98, "grad_norm": 0.7340129613876343, "learning_rate": 1.1846613925370342e-08, "loss": 1.9957, "step": 29599 }, { "epoch": 0.98, "grad_norm": 0.7448450922966003, "learning_rate": 1.1794948623640168e-08, "loss": 2.0431, "step": 29600 }, { "epoch": 0.98, "grad_norm": 0.7366681694984436, "learning_rate": 1.1743396162796627e-08, "loss": 1.9557, "step": 29601 }, { "epoch": 0.98, "grad_norm": 0.7510009407997131, "learning_rate": 1.1691956543422589e-08, "loss": 2.0743, "step": 29602 }, { "epoch": 0.98, "grad_norm": 0.7619598507881165, "learning_rate": 1.1640629766099808e-08, "loss": 2.0006, "step": 29603 }, { "epoch": 0.98, "grad_norm": 0.7773876190185547, "learning_rate": 1.1589415831406713e-08, "loss": 2.014, "step": 29604 }, { "epoch": 0.98, "grad_norm": 0.784982442855835, "learning_rate": 1.153831473992284e-08, "loss": 2.0201, "step": 29605 }, { "epoch": 0.98, "grad_norm": 0.7617422342300415, "learning_rate": 1.1487326492224393e-08, "loss": 2.0168, "step": 29606 }, { "epoch": 0.99, "grad_norm": 0.7350152730941772, "learning_rate": 1.14364510888898e-08, "loss": 2.0355, "step": 29607 }, { "epoch": 0.99, "grad_norm": 0.7190080285072327, "learning_rate": 1.1385688530490824e-08, "loss": 1.937, "step": 29608 }, { "epoch": 0.99, "grad_norm": 0.7455457448959351, "learning_rate": 1.1335038817603672e-08, "loss": 2.0857, "step": 29609 }, { "epoch": 0.99, "grad_norm": 0.7399818897247314, "learning_rate": 1.1284501950798998e-08, "loss": 2.0474, "step": 29610 }, { "epoch": 0.99, "grad_norm": 0.7759206891059875, "learning_rate": 1.1234077930647458e-08, "loss": 2.0103, "step": 29611 }, { "epoch": 0.99, "grad_norm": 0.7352486252784729, "learning_rate": 1.1183766757719705e-08, "loss": 2.0795, "step": 29612 }, { "epoch": 0.99, "grad_norm": 0.7422617673873901, "learning_rate": 1.1133568432584175e-08, "loss": 2.0318, "step": 29613 }, { "epoch": 0.99, "grad_norm": 0.7562263011932373, "learning_rate": 1.1083482955805969e-08, "loss": 2.0602, "step": 29614 }, { "epoch": 0.99, "grad_norm": 0.7274857759475708, "learning_rate": 1.1033510327954632e-08, "loss": 2.0917, "step": 29615 }, { "epoch": 0.99, "grad_norm": 0.7442124485969543, "learning_rate": 1.098365054959083e-08, "loss": 2.0546, "step": 29616 }, { "epoch": 0.99, "grad_norm": 0.7492870092391968, "learning_rate": 1.0933903621280773e-08, "loss": 2.0097, "step": 29617 }, { "epoch": 0.99, "grad_norm": 0.7204904556274414, "learning_rate": 1.0884269543585124e-08, "loss": 1.9993, "step": 29618 }, { "epoch": 0.99, "grad_norm": 0.7400587201118469, "learning_rate": 1.0834748317064547e-08, "loss": 2.1099, "step": 29619 }, { "epoch": 0.99, "grad_norm": 0.7545045614242554, "learning_rate": 1.0785339942278594e-08, "loss": 2.0918, "step": 29620 }, { "epoch": 0.99, "grad_norm": 0.7184118032455444, "learning_rate": 1.0736044419785707e-08, "loss": 2.0128, "step": 29621 }, { "epoch": 0.99, "grad_norm": 0.7397406697273254, "learning_rate": 1.0686861750142108e-08, "loss": 2.0585, "step": 29622 }, { "epoch": 0.99, "grad_norm": 0.7237058281898499, "learning_rate": 1.0637791933905128e-08, "loss": 2.0502, "step": 29623 }, { "epoch": 0.99, "grad_norm": 0.7550670504570007, "learning_rate": 1.0588834971627659e-08, "loss": 2.0742, "step": 29624 }, { "epoch": 0.99, "grad_norm": 0.7255280017852783, "learning_rate": 1.0539990863862592e-08, "loss": 2.0403, "step": 29625 }, { "epoch": 0.99, "grad_norm": 0.7502086162567139, "learning_rate": 1.0491259611162818e-08, "loss": 2.0854, "step": 29626 }, { "epoch": 0.99, "grad_norm": 0.7208864688873291, "learning_rate": 1.0442641214079007e-08, "loss": 1.9534, "step": 29627 }, { "epoch": 0.99, "grad_norm": 0.7577435374259949, "learning_rate": 1.0394135673158501e-08, "loss": 2.0205, "step": 29628 }, { "epoch": 0.99, "grad_norm": 0.7699874043464661, "learning_rate": 1.0345742988951969e-08, "loss": 2.0415, "step": 29629 }, { "epoch": 0.99, "grad_norm": 0.7290918231010437, "learning_rate": 1.0297463162003419e-08, "loss": 2.1093, "step": 29630 }, { "epoch": 0.99, "grad_norm": 0.7549092769622803, "learning_rate": 1.0249296192860192e-08, "loss": 2.0528, "step": 29631 }, { "epoch": 0.99, "grad_norm": 0.784666895866394, "learning_rate": 1.0201242082066298e-08, "loss": 2.0729, "step": 29632 }, { "epoch": 0.99, "grad_norm": 0.7704221606254578, "learning_rate": 1.0153300830163526e-08, "loss": 2.0253, "step": 29633 }, { "epoch": 0.99, "grad_norm": 0.7227026224136353, "learning_rate": 1.0105472437693664e-08, "loss": 2.04, "step": 29634 }, { "epoch": 0.99, "grad_norm": 0.7599170804023743, "learning_rate": 1.0057756905198501e-08, "loss": 1.9933, "step": 29635 }, { "epoch": 0.99, "grad_norm": 0.7635394334793091, "learning_rate": 1.0010154233215386e-08, "loss": 2.0861, "step": 29636 }, { "epoch": 0.99, "grad_norm": 0.7088033556938171, "learning_rate": 9.962664422283885e-09, "loss": 2.0271, "step": 29637 }, { "epoch": 0.99, "grad_norm": 0.7389460802078247, "learning_rate": 9.915287472938017e-09, "loss": 1.9971, "step": 29638 }, { "epoch": 0.99, "grad_norm": 0.7813568115234375, "learning_rate": 9.86802338571624e-09, "loss": 2.0412, "step": 29639 }, { "epoch": 0.99, "grad_norm": 0.7510318160057068, "learning_rate": 9.820872161149242e-09, "loss": 2.0311, "step": 29640 }, { "epoch": 0.99, "grad_norm": 0.7509331703186035, "learning_rate": 9.773833799772148e-09, "loss": 2.0148, "step": 29641 }, { "epoch": 0.99, "grad_norm": 0.7266464233398438, "learning_rate": 9.726908302114536e-09, "loss": 2.0278, "step": 29642 }, { "epoch": 0.99, "grad_norm": 0.738103985786438, "learning_rate": 9.680095668708201e-09, "loss": 2.0379, "step": 29643 }, { "epoch": 0.99, "grad_norm": 0.718728244304657, "learning_rate": 9.6333959000805e-09, "loss": 2.0861, "step": 29644 }, { "epoch": 0.99, "grad_norm": 0.7732469439506531, "learning_rate": 9.5868089967599e-09, "loss": 2.0609, "step": 29645 }, { "epoch": 0.99, "grad_norm": 0.7301507592201233, "learning_rate": 9.540334959273757e-09, "loss": 2.0737, "step": 29646 }, { "epoch": 0.99, "grad_norm": 0.7596578598022461, "learning_rate": 9.493973788144984e-09, "loss": 2.051, "step": 29647 }, { "epoch": 0.99, "grad_norm": 0.7596074342727661, "learning_rate": 9.447725483897607e-09, "loss": 2.1161, "step": 29648 }, { "epoch": 0.99, "grad_norm": 0.7364417910575867, "learning_rate": 9.401590047055654e-09, "loss": 2.0431, "step": 29649 }, { "epoch": 0.99, "grad_norm": 0.7456708550453186, "learning_rate": 9.355567478139816e-09, "loss": 2.0127, "step": 29650 }, { "epoch": 0.99, "grad_norm": 0.7360016703605652, "learning_rate": 9.309657777668567e-09, "loss": 2.0113, "step": 29651 }, { "epoch": 0.99, "grad_norm": 0.7269750833511353, "learning_rate": 9.263860946162606e-09, "loss": 2.0153, "step": 29652 }, { "epoch": 0.99, "grad_norm": 0.7294180393218994, "learning_rate": 9.218176984138183e-09, "loss": 2.0944, "step": 29653 }, { "epoch": 0.99, "grad_norm": 0.7614117860794067, "learning_rate": 9.172605892111553e-09, "loss": 1.993, "step": 29654 }, { "epoch": 0.99, "grad_norm": 0.7454225420951843, "learning_rate": 9.127147670597857e-09, "loss": 2.0419, "step": 29655 }, { "epoch": 0.99, "grad_norm": 0.7067463994026184, "learning_rate": 9.081802320110023e-09, "loss": 2.0469, "step": 29656 }, { "epoch": 0.99, "grad_norm": 0.7330135107040405, "learning_rate": 9.036569841160969e-09, "loss": 1.9889, "step": 29657 }, { "epoch": 0.99, "grad_norm": 0.7079134583473206, "learning_rate": 8.991450234261401e-09, "loss": 2.0369, "step": 29658 }, { "epoch": 0.99, "grad_norm": 0.7251440286636353, "learning_rate": 8.946443499920909e-09, "loss": 1.9958, "step": 29659 }, { "epoch": 0.99, "grad_norm": 0.7601599097251892, "learning_rate": 8.901549638649087e-09, "loss": 2.0143, "step": 29660 }, { "epoch": 0.99, "grad_norm": 0.7717041373252869, "learning_rate": 8.856768650951087e-09, "loss": 2.0537, "step": 29661 }, { "epoch": 0.99, "grad_norm": 0.7553925514221191, "learning_rate": 8.812100537334278e-09, "loss": 2.0493, "step": 29662 }, { "epoch": 0.99, "grad_norm": 0.7448487877845764, "learning_rate": 8.767545298302705e-09, "loss": 2.091, "step": 29663 }, { "epoch": 0.99, "grad_norm": 0.771101713180542, "learning_rate": 8.723102934360405e-09, "loss": 2.0883, "step": 29664 }, { "epoch": 0.99, "grad_norm": 0.7331533432006836, "learning_rate": 8.678773446009204e-09, "loss": 2.0631, "step": 29665 }, { "epoch": 0.99, "grad_norm": 0.7572809457778931, "learning_rate": 8.634556833749808e-09, "loss": 2.028, "step": 29666 }, { "epoch": 0.99, "grad_norm": 0.7649959325790405, "learning_rate": 8.59045309808071e-09, "loss": 2.0588, "step": 29667 }, { "epoch": 0.99, "grad_norm": 0.7431898713111877, "learning_rate": 8.546462239501507e-09, "loss": 2.0215, "step": 29668 }, { "epoch": 0.99, "grad_norm": 0.7499906420707703, "learning_rate": 8.502584258509583e-09, "loss": 2.0637, "step": 29669 }, { "epoch": 0.99, "grad_norm": 0.7345190644264221, "learning_rate": 8.458819155598985e-09, "loss": 2.0044, "step": 29670 }, { "epoch": 0.99, "grad_norm": 0.7668816447257996, "learning_rate": 8.415166931264873e-09, "loss": 2.0766, "step": 29671 }, { "epoch": 0.99, "grad_norm": 0.724146842956543, "learning_rate": 8.371627586001296e-09, "loss": 2.0674, "step": 29672 }, { "epoch": 0.99, "grad_norm": 0.7534029483795166, "learning_rate": 8.328201120298974e-09, "loss": 2.053, "step": 29673 }, { "epoch": 0.99, "grad_norm": 0.7470707297325134, "learning_rate": 8.284887534649732e-09, "loss": 2.0973, "step": 29674 }, { "epoch": 0.99, "grad_norm": 0.7218387126922607, "learning_rate": 8.241686829540962e-09, "loss": 2.0382, "step": 29675 }, { "epoch": 0.99, "grad_norm": 0.743836522102356, "learning_rate": 8.198599005462271e-09, "loss": 2.0343, "step": 29676 }, { "epoch": 0.99, "grad_norm": 0.7407042980194092, "learning_rate": 8.155624062899937e-09, "loss": 1.9875, "step": 29677 }, { "epoch": 0.99, "grad_norm": 0.7585245966911316, "learning_rate": 8.112762002340236e-09, "loss": 2.0656, "step": 29678 }, { "epoch": 0.99, "grad_norm": 0.7230626940727234, "learning_rate": 8.070012824266115e-09, "loss": 2.037, "step": 29679 }, { "epoch": 0.99, "grad_norm": 0.7681514024734497, "learning_rate": 8.027376529161634e-09, "loss": 2.0645, "step": 29680 }, { "epoch": 0.99, "grad_norm": 0.7748247981071472, "learning_rate": 7.984853117507519e-09, "loss": 1.9916, "step": 29681 }, { "epoch": 0.99, "grad_norm": 0.7601355910301208, "learning_rate": 7.942442589784493e-09, "loss": 2.0704, "step": 29682 }, { "epoch": 0.99, "grad_norm": 0.7219997644424438, "learning_rate": 7.900144946472178e-09, "loss": 2.0289, "step": 29683 }, { "epoch": 0.99, "grad_norm": 0.7585458159446716, "learning_rate": 7.857960188047964e-09, "loss": 2.0793, "step": 29684 }, { "epoch": 0.99, "grad_norm": 0.7438634037971497, "learning_rate": 7.81588831498814e-09, "loss": 2.1122, "step": 29685 }, { "epoch": 0.99, "grad_norm": 0.7371315956115723, "learning_rate": 7.773929327768992e-09, "loss": 2.0037, "step": 29686 }, { "epoch": 0.99, "grad_norm": 0.7867748737335205, "learning_rate": 7.732083226864584e-09, "loss": 2.0982, "step": 29687 }, { "epoch": 0.99, "grad_norm": 0.7369576096534729, "learning_rate": 7.690350012745651e-09, "loss": 2.0421, "step": 29688 }, { "epoch": 0.99, "grad_norm": 0.7357144951820374, "learning_rate": 7.648729685886258e-09, "loss": 2.0333, "step": 29689 }, { "epoch": 0.99, "grad_norm": 0.7184031009674072, "learning_rate": 7.60722224675492e-09, "loss": 2.0271, "step": 29690 }, { "epoch": 0.99, "grad_norm": 0.7642199993133545, "learning_rate": 7.56582769582015e-09, "loss": 2.0271, "step": 29691 }, { "epoch": 0.99, "grad_norm": 0.7612342238426208, "learning_rate": 7.524546033551573e-09, "loss": 2.1027, "step": 29692 }, { "epoch": 0.99, "grad_norm": 0.7427971959114075, "learning_rate": 7.483377260414371e-09, "loss": 2.0451, "step": 29693 }, { "epoch": 0.99, "grad_norm": 0.7498494386672974, "learning_rate": 7.442321376873729e-09, "loss": 2.0547, "step": 29694 }, { "epoch": 0.99, "grad_norm": 0.7374542951583862, "learning_rate": 7.40137838339372e-09, "loss": 2.0807, "step": 29695 }, { "epoch": 0.99, "grad_norm": 0.7531236410140991, "learning_rate": 7.360548280437307e-09, "loss": 2.0744, "step": 29696 }, { "epoch": 0.99, "grad_norm": 0.7319228649139404, "learning_rate": 7.3198310684641224e-09, "loss": 2.0814, "step": 29697 }, { "epoch": 0.99, "grad_norm": 0.7377665042877197, "learning_rate": 7.2792267479349085e-09, "loss": 2.0492, "step": 29698 }, { "epoch": 0.99, "grad_norm": 0.7268849611282349, "learning_rate": 7.2387353193092976e-09, "loss": 2.0281, "step": 29699 }, { "epoch": 0.99, "grad_norm": 0.7611731290817261, "learning_rate": 7.198356783044702e-09, "loss": 2.056, "step": 29700 }, { "epoch": 0.99, "grad_norm": 0.7511168122291565, "learning_rate": 7.158091139595202e-09, "loss": 2.0307, "step": 29701 }, { "epoch": 0.99, "grad_norm": 0.7155812382698059, "learning_rate": 7.117938389418211e-09, "loss": 2.0492, "step": 29702 }, { "epoch": 0.99, "grad_norm": 0.7363794445991516, "learning_rate": 7.077898532966698e-09, "loss": 2.0766, "step": 29703 }, { "epoch": 0.99, "grad_norm": 0.745597243309021, "learning_rate": 7.037971570692526e-09, "loss": 2.0093, "step": 29704 }, { "epoch": 0.99, "grad_norm": 0.7444412708282471, "learning_rate": 6.998157503046443e-09, "loss": 2.0103, "step": 29705 }, { "epoch": 0.99, "grad_norm": 0.7229227423667908, "learning_rate": 6.958456330478092e-09, "loss": 2.0341, "step": 29706 }, { "epoch": 0.99, "grad_norm": 0.7718050479888916, "learning_rate": 6.918868053437111e-09, "loss": 2.0436, "step": 29707 }, { "epoch": 0.99, "grad_norm": 0.72885662317276, "learning_rate": 6.879392672370921e-09, "loss": 1.9681, "step": 29708 }, { "epoch": 0.99, "grad_norm": 0.7438578605651855, "learning_rate": 6.8400301877247225e-09, "loss": 2.0896, "step": 29709 }, { "epoch": 0.99, "grad_norm": 0.7355804443359375, "learning_rate": 6.800780599942602e-09, "loss": 2.0462, "step": 29710 }, { "epoch": 0.99, "grad_norm": 0.733769416809082, "learning_rate": 6.7616439094686515e-09, "loss": 2.0807, "step": 29711 }, { "epoch": 0.99, "grad_norm": 0.7588351964950562, "learning_rate": 6.7226201167458485e-09, "loss": 2.0371, "step": 29712 }, { "epoch": 0.99, "grad_norm": 0.7769057750701904, "learning_rate": 6.6837092222138415e-09, "loss": 1.9914, "step": 29713 }, { "epoch": 0.99, "grad_norm": 0.7687263488769531, "learning_rate": 6.6449112263122786e-09, "loss": 2.0593, "step": 29714 }, { "epoch": 0.99, "grad_norm": 0.7562466263771057, "learning_rate": 6.606226129479698e-09, "loss": 2.0619, "step": 29715 }, { "epoch": 0.99, "grad_norm": 0.7476504445075989, "learning_rate": 6.567653932154638e-09, "loss": 2.0715, "step": 29716 }, { "epoch": 0.99, "grad_norm": 0.7379528880119324, "learning_rate": 6.529194634770086e-09, "loss": 2.0215, "step": 29717 }, { "epoch": 0.99, "grad_norm": 0.7403675317764282, "learning_rate": 6.49084823776236e-09, "loss": 2.0343, "step": 29718 }, { "epoch": 0.99, "grad_norm": 0.7936694622039795, "learning_rate": 6.452614741564445e-09, "loss": 2.0803, "step": 29719 }, { "epoch": 0.99, "grad_norm": 0.7684759497642517, "learning_rate": 6.414494146608219e-09, "loss": 2.1022, "step": 29720 }, { "epoch": 0.99, "grad_norm": 0.7687221765518188, "learning_rate": 6.376486453323339e-09, "loss": 2.0353, "step": 29721 }, { "epoch": 0.99, "grad_norm": 0.7182919383049011, "learning_rate": 6.33859166214057e-09, "loss": 2.0384, "step": 29722 }, { "epoch": 0.99, "grad_norm": 0.7458496689796448, "learning_rate": 6.300809773488459e-09, "loss": 2.0143, "step": 29723 }, { "epoch": 0.99, "grad_norm": 0.7239437103271484, "learning_rate": 6.263140787791111e-09, "loss": 2.0606, "step": 29724 }, { "epoch": 0.99, "grad_norm": 0.7361354231834412, "learning_rate": 6.225584705477073e-09, "loss": 2.0772, "step": 29725 }, { "epoch": 0.99, "grad_norm": 0.75112384557724, "learning_rate": 6.188141526969338e-09, "loss": 2.1305, "step": 29726 }, { "epoch": 0.99, "grad_norm": 0.7456130385398865, "learning_rate": 6.150811252690902e-09, "loss": 2.0345, "step": 29727 }, { "epoch": 0.99, "grad_norm": 0.7336434721946716, "learning_rate": 6.113593883063651e-09, "loss": 2.0786, "step": 29728 }, { "epoch": 0.99, "grad_norm": 0.7628545165061951, "learning_rate": 6.076489418508358e-09, "loss": 2.065, "step": 29729 }, { "epoch": 0.99, "grad_norm": 0.7467114925384521, "learning_rate": 6.039497859443577e-09, "loss": 2.1038, "step": 29730 }, { "epoch": 0.99, "grad_norm": 0.7414436340332031, "learning_rate": 6.002619206287863e-09, "loss": 2.0947, "step": 29731 }, { "epoch": 0.99, "grad_norm": 0.7386296391487122, "learning_rate": 5.965853459456439e-09, "loss": 1.9619, "step": 29732 }, { "epoch": 0.99, "grad_norm": 0.7349427938461304, "learning_rate": 5.929200619366748e-09, "loss": 2.0399, "step": 29733 }, { "epoch": 0.99, "grad_norm": 0.7421636581420898, "learning_rate": 5.892660686431795e-09, "loss": 2.0488, "step": 29734 }, { "epoch": 0.99, "grad_norm": 0.741570770740509, "learning_rate": 5.856233661063471e-09, "loss": 2.0301, "step": 29735 }, { "epoch": 0.99, "grad_norm": 0.7685263156890869, "learning_rate": 5.819919543674779e-09, "loss": 2.059, "step": 29736 }, { "epoch": 0.99, "grad_norm": 0.7527056932449341, "learning_rate": 5.7837183346765026e-09, "loss": 2.0603, "step": 29737 }, { "epoch": 0.99, "grad_norm": 0.730739176273346, "learning_rate": 5.747630034474983e-09, "loss": 2.0735, "step": 29738 }, { "epoch": 0.99, "grad_norm": 0.7473884224891663, "learning_rate": 5.711654643481001e-09, "loss": 2.0478, "step": 29739 }, { "epoch": 0.99, "grad_norm": 0.7674278616905212, "learning_rate": 5.675792162098681e-09, "loss": 2.0496, "step": 29740 }, { "epoch": 0.99, "grad_norm": 0.7264618873596191, "learning_rate": 5.640042590734363e-09, "loss": 2.0238, "step": 29741 }, { "epoch": 0.99, "grad_norm": 0.7265802025794983, "learning_rate": 5.604405929791057e-09, "loss": 1.9846, "step": 29742 }, { "epoch": 0.99, "grad_norm": 0.7242604494094849, "learning_rate": 5.568882179671775e-09, "loss": 2.0438, "step": 29743 }, { "epoch": 0.99, "grad_norm": 0.7134151458740234, "learning_rate": 5.533471340778418e-09, "loss": 1.9752, "step": 29744 }, { "epoch": 0.99, "grad_norm": 0.7598056197166443, "learning_rate": 5.498173413510666e-09, "loss": 2.0222, "step": 29745 }, { "epoch": 0.99, "grad_norm": 0.7902846932411194, "learning_rate": 5.462988398267088e-09, "loss": 2.0956, "step": 29746 }, { "epoch": 0.99, "grad_norm": 0.7384594082832336, "learning_rate": 5.427916295445146e-09, "loss": 2.0823, "step": 29747 }, { "epoch": 0.99, "grad_norm": 0.7374424934387207, "learning_rate": 5.392957105441188e-09, "loss": 2.0575, "step": 29748 }, { "epoch": 0.99, "grad_norm": 0.7300637364387512, "learning_rate": 5.358110828650453e-09, "loss": 2.0639, "step": 29749 }, { "epoch": 0.99, "grad_norm": 0.7577400803565979, "learning_rate": 5.3233774654670726e-09, "loss": 1.9891, "step": 29750 }, { "epoch": 0.99, "grad_norm": 0.7438801527023315, "learning_rate": 5.288757016280732e-09, "loss": 2.0265, "step": 29751 }, { "epoch": 0.99, "grad_norm": 0.7334945797920227, "learning_rate": 5.2542494814855625e-09, "loss": 2.0082, "step": 29752 }, { "epoch": 0.99, "grad_norm": 0.7470763921737671, "learning_rate": 5.219854861471252e-09, "loss": 2.0531, "step": 29753 }, { "epoch": 0.99, "grad_norm": 0.7655604481697083, "learning_rate": 5.185573156624157e-09, "loss": 2.0685, "step": 29754 }, { "epoch": 0.99, "grad_norm": 0.7120406627655029, "learning_rate": 5.1514043673339676e-09, "loss": 2.0826, "step": 29755 }, { "epoch": 0.99, "grad_norm": 0.7336919903755188, "learning_rate": 5.117348493984819e-09, "loss": 2.0048, "step": 29756 }, { "epoch": 0.99, "grad_norm": 0.7167341709136963, "learning_rate": 5.083405536963071e-09, "loss": 2.0906, "step": 29757 }, { "epoch": 0.99, "grad_norm": 0.719492495059967, "learning_rate": 5.049575496650638e-09, "loss": 2.0015, "step": 29758 }, { "epoch": 0.99, "grad_norm": 0.7418146729469299, "learning_rate": 5.015858373430549e-09, "loss": 2.0247, "step": 29759 }, { "epoch": 0.99, "grad_norm": 0.7380086779594421, "learning_rate": 4.982254167684719e-09, "loss": 1.989, "step": 29760 }, { "epoch": 0.99, "grad_norm": 0.7662522196769714, "learning_rate": 4.948762879790625e-09, "loss": 2.0487, "step": 29761 }, { "epoch": 0.99, "grad_norm": 0.7406746745109558, "learning_rate": 4.915384510127963e-09, "loss": 2.0882, "step": 29762 }, { "epoch": 0.99, "grad_norm": 0.7317564487457275, "learning_rate": 4.882119059074209e-09, "loss": 2.0608, "step": 29763 }, { "epoch": 0.99, "grad_norm": 0.750950038433075, "learning_rate": 4.8489665270035065e-09, "loss": 2.0467, "step": 29764 }, { "epoch": 0.99, "grad_norm": 0.7487965226173401, "learning_rate": 4.815926914292224e-09, "loss": 1.9789, "step": 29765 }, { "epoch": 0.99, "grad_norm": 0.7353529930114746, "learning_rate": 4.783000221312284e-09, "loss": 2.0593, "step": 29766 }, { "epoch": 0.99, "grad_norm": 0.764518678188324, "learning_rate": 4.750186448436722e-09, "loss": 2.0306, "step": 29767 }, { "epoch": 0.99, "grad_norm": 0.7514039874076843, "learning_rate": 4.7174855960352425e-09, "loss": 2.1046, "step": 29768 }, { "epoch": 0.99, "grad_norm": 0.7701081037521362, "learning_rate": 4.68489766447866e-09, "loss": 2.0319, "step": 29769 }, { "epoch": 0.99, "grad_norm": 0.7539198994636536, "learning_rate": 4.652422654133349e-09, "loss": 2.0326, "step": 29770 }, { "epoch": 0.99, "grad_norm": 0.7633764743804932, "learning_rate": 4.620060565367901e-09, "loss": 2.0207, "step": 29771 }, { "epoch": 0.99, "grad_norm": 0.7434589862823486, "learning_rate": 4.587811398546471e-09, "loss": 2.0601, "step": 29772 }, { "epoch": 0.99, "grad_norm": 0.7705041170120239, "learning_rate": 4.555675154034322e-09, "loss": 2.0045, "step": 29773 }, { "epoch": 0.99, "grad_norm": 0.7179151177406311, "learning_rate": 4.523651832194498e-09, "loss": 1.9183, "step": 29774 }, { "epoch": 0.99, "grad_norm": 0.7549268007278442, "learning_rate": 4.49174143338893e-09, "loss": 2.1068, "step": 29775 }, { "epoch": 0.99, "grad_norm": 0.7398304343223572, "learning_rate": 4.459943957976221e-09, "loss": 2.033, "step": 29776 }, { "epoch": 0.99, "grad_norm": 0.7293802499771118, "learning_rate": 4.4282594063183025e-09, "loss": 2.0089, "step": 29777 }, { "epoch": 0.99, "grad_norm": 0.7181735038757324, "learning_rate": 4.396687778771558e-09, "loss": 2.0224, "step": 29778 }, { "epoch": 0.99, "grad_norm": 0.7577188014984131, "learning_rate": 4.365229075693478e-09, "loss": 2.1095, "step": 29779 }, { "epoch": 0.99, "grad_norm": 0.7307273745536804, "learning_rate": 4.333883297438224e-09, "loss": 2.0738, "step": 29780 }, { "epoch": 0.99, "grad_norm": 0.7482345104217529, "learning_rate": 4.3026504443610675e-09, "loss": 2.0877, "step": 29781 }, { "epoch": 0.99, "grad_norm": 0.7417686581611633, "learning_rate": 4.271530516815059e-09, "loss": 2.0988, "step": 29782 }, { "epoch": 0.99, "grad_norm": 0.7734464406967163, "learning_rate": 4.24052351515103e-09, "loss": 2.0461, "step": 29783 }, { "epoch": 0.99, "grad_norm": 0.7529640197753906, "learning_rate": 4.209629439718699e-09, "loss": 2.0654, "step": 29784 }, { "epoch": 0.99, "grad_norm": 0.7443147897720337, "learning_rate": 4.178848290868897e-09, "loss": 2.0289, "step": 29785 }, { "epoch": 0.99, "grad_norm": 0.7400572896003723, "learning_rate": 4.148180068946906e-09, "loss": 1.9984, "step": 29786 }, { "epoch": 0.99, "grad_norm": 0.7442275285720825, "learning_rate": 4.117624774302442e-09, "loss": 2.0802, "step": 29787 }, { "epoch": 0.99, "grad_norm": 0.7369182705879211, "learning_rate": 4.087182407277457e-09, "loss": 1.9597, "step": 29788 }, { "epoch": 0.99, "grad_norm": 0.7372701168060303, "learning_rate": 4.0568529682183385e-09, "loss": 2.0218, "step": 29789 }, { "epoch": 0.99, "grad_norm": 0.7393997311592102, "learning_rate": 4.0266364574659264e-09, "loss": 2.0666, "step": 29790 }, { "epoch": 0.99, "grad_norm": 0.7620957493782043, "learning_rate": 3.996532875362169e-09, "loss": 2.0768, "step": 29791 }, { "epoch": 0.99, "grad_norm": 0.7546300888061523, "learning_rate": 3.966542222247904e-09, "loss": 2.1012, "step": 29792 }, { "epoch": 0.99, "grad_norm": 0.7544488906860352, "learning_rate": 3.936664498461751e-09, "loss": 1.9908, "step": 29793 }, { "epoch": 0.99, "grad_norm": 0.7754558324813843, "learning_rate": 3.906899704340106e-09, "loss": 2.0459, "step": 29794 }, { "epoch": 0.99, "grad_norm": 0.7280136346817017, "learning_rate": 3.877247840220477e-09, "loss": 1.9975, "step": 29795 }, { "epoch": 0.99, "grad_norm": 0.7703222036361694, "learning_rate": 3.847708906437042e-09, "loss": 1.9957, "step": 29796 }, { "epoch": 0.99, "grad_norm": 0.8217587471008301, "learning_rate": 3.818282903323978e-09, "loss": 2.0223, "step": 29797 }, { "epoch": 0.99, "grad_norm": 0.7617921829223633, "learning_rate": 3.788969831214351e-09, "loss": 2.0872, "step": 29798 }, { "epoch": 0.99, "grad_norm": 0.7546753287315369, "learning_rate": 3.759769690437898e-09, "loss": 2.0639, "step": 29799 }, { "epoch": 0.99, "grad_norm": 0.734090268611908, "learning_rate": 3.730682481325465e-09, "loss": 2.1197, "step": 29800 }, { "epoch": 0.99, "grad_norm": 0.7444719076156616, "learning_rate": 3.701708204204568e-09, "loss": 2.0151, "step": 29801 }, { "epoch": 0.99, "grad_norm": 0.759628415107727, "learning_rate": 3.6728468594038334e-09, "loss": 2.0119, "step": 29802 }, { "epoch": 0.99, "grad_norm": 0.746938169002533, "learning_rate": 3.6440984472496664e-09, "loss": 2.0614, "step": 29803 }, { "epoch": 0.99, "grad_norm": 0.7536671161651611, "learning_rate": 3.615462968065142e-09, "loss": 1.9821, "step": 29804 }, { "epoch": 0.99, "grad_norm": 0.7402051687240601, "learning_rate": 3.5869404221755556e-09, "loss": 1.9341, "step": 29805 }, { "epoch": 0.99, "grad_norm": 0.7187182307243347, "learning_rate": 3.558530809900651e-09, "loss": 2.0193, "step": 29806 }, { "epoch": 0.99, "grad_norm": 0.7408974766731262, "learning_rate": 3.530234131564614e-09, "loss": 2.048, "step": 29807 }, { "epoch": 0.99, "grad_norm": 0.7606223821640015, "learning_rate": 3.5020503874849675e-09, "loss": 2.0572, "step": 29808 }, { "epoch": 0.99, "grad_norm": 0.76725834608078, "learning_rate": 3.4739795779803465e-09, "loss": 2.0642, "step": 29809 }, { "epoch": 0.99, "grad_norm": 0.7686064839363098, "learning_rate": 3.4460217033682743e-09, "loss": 2.0617, "step": 29810 }, { "epoch": 0.99, "grad_norm": 0.7634626626968384, "learning_rate": 3.418176763964054e-09, "loss": 2.0822, "step": 29811 }, { "epoch": 0.99, "grad_norm": 0.7247858643531799, "learning_rate": 3.3904447600829892e-09, "loss": 2.0412, "step": 29812 }, { "epoch": 0.99, "grad_norm": 0.7405315041542053, "learning_rate": 3.362825692038163e-09, "loss": 1.9816, "step": 29813 }, { "epoch": 0.99, "grad_norm": 0.7717000246047974, "learning_rate": 3.3353195601415477e-09, "loss": 2.078, "step": 29814 }, { "epoch": 0.99, "grad_norm": 0.7307572364807129, "learning_rate": 3.3079263647040063e-09, "loss": 2.0309, "step": 29815 }, { "epoch": 0.99, "grad_norm": 0.7386662364006042, "learning_rate": 3.28064610603529e-09, "loss": 1.998, "step": 29816 }, { "epoch": 0.99, "grad_norm": 0.763184666633606, "learning_rate": 3.2534787844429316e-09, "loss": 2.0431, "step": 29817 }, { "epoch": 0.99, "grad_norm": 0.7335972785949707, "learning_rate": 3.226424400233352e-09, "loss": 2.0647, "step": 29818 }, { "epoch": 0.99, "grad_norm": 0.7244024872779846, "learning_rate": 3.199482953714084e-09, "loss": 2.0443, "step": 29819 }, { "epoch": 0.99, "grad_norm": 0.7186897397041321, "learning_rate": 3.1726544451871067e-09, "loss": 2.0021, "step": 29820 }, { "epoch": 0.99, "grad_norm": 0.712941586971283, "learning_rate": 3.1459388749577324e-09, "loss": 2.0007, "step": 29821 }, { "epoch": 0.99, "grad_norm": 0.7409473061561584, "learning_rate": 3.1193362433257213e-09, "loss": 2.04, "step": 29822 }, { "epoch": 0.99, "grad_norm": 0.7436716556549072, "learning_rate": 3.092846550594164e-09, "loss": 1.996, "step": 29823 }, { "epoch": 0.99, "grad_norm": 0.7529311776161194, "learning_rate": 3.0664697970594904e-09, "loss": 2.1097, "step": 29824 }, { "epoch": 0.99, "grad_norm": 0.7243223786354065, "learning_rate": 3.0402059830225706e-09, "loss": 2.0224, "step": 29825 }, { "epoch": 0.99, "grad_norm": 0.7718964219093323, "learning_rate": 3.0140551087776138e-09, "loss": 2.0808, "step": 29826 }, { "epoch": 0.99, "grad_norm": 0.7570213079452515, "learning_rate": 2.9880171746210497e-09, "loss": 2.0302, "step": 29827 }, { "epoch": 0.99, "grad_norm": 0.7510157823562622, "learning_rate": 2.962092180847087e-09, "loss": 2.0225, "step": 29828 }, { "epoch": 0.99, "grad_norm": 0.7550667524337769, "learning_rate": 2.936280127748825e-09, "loss": 2.0902, "step": 29829 }, { "epoch": 0.99, "grad_norm": 0.7680709958076477, "learning_rate": 2.9105810156171423e-09, "loss": 2.0541, "step": 29830 }, { "epoch": 0.99, "grad_norm": 0.7224922776222229, "learning_rate": 2.8849948447429168e-09, "loss": 2.0499, "step": 29831 }, { "epoch": 0.99, "grad_norm": 0.7553815245628357, "learning_rate": 2.8595216154159167e-09, "loss": 1.9698, "step": 29832 }, { "epoch": 0.99, "grad_norm": 0.7676466703414917, "learning_rate": 2.8341613279225797e-09, "loss": 2.0111, "step": 29833 }, { "epoch": 0.99, "grad_norm": 0.7479047179222107, "learning_rate": 2.808913982550454e-09, "loss": 2.0212, "step": 29834 }, { "epoch": 0.99, "grad_norm": 0.8118809461593628, "learning_rate": 2.7837795795837564e-09, "loss": 2.0195, "step": 29835 }, { "epoch": 0.99, "grad_norm": 0.7683497667312622, "learning_rate": 2.7587581193067035e-09, "loss": 2.0374, "step": 29836 }, { "epoch": 0.99, "grad_norm": 0.7969748377799988, "learning_rate": 2.733849602002403e-09, "loss": 2.0504, "step": 29837 }, { "epoch": 0.99, "grad_norm": 0.7508156895637512, "learning_rate": 2.709054027952851e-09, "loss": 1.9562, "step": 29838 }, { "epoch": 0.99, "grad_norm": 0.7445412278175354, "learning_rate": 2.6843713974367136e-09, "loss": 2.0852, "step": 29839 }, { "epoch": 0.99, "grad_norm": 0.7348653674125671, "learning_rate": 2.659801710733767e-09, "loss": 2.1057, "step": 29840 }, { "epoch": 0.99, "grad_norm": 0.7156270742416382, "learning_rate": 2.6353449681204568e-09, "loss": 1.9938, "step": 29841 }, { "epoch": 0.99, "grad_norm": 0.7586727738380432, "learning_rate": 2.6110011698754487e-09, "loss": 2.0305, "step": 29842 }, { "epoch": 0.99, "grad_norm": 0.7782207727432251, "learning_rate": 2.5867703162718584e-09, "loss": 2.0649, "step": 29843 }, { "epoch": 0.99, "grad_norm": 0.7509174346923828, "learning_rate": 2.5626524075828e-09, "loss": 2.0734, "step": 29844 }, { "epoch": 0.99, "grad_norm": 0.7275986075401306, "learning_rate": 2.5386474440836086e-09, "loss": 1.9885, "step": 29845 }, { "epoch": 0.99, "grad_norm": 0.7723815441131592, "learning_rate": 2.5147554260418484e-09, "loss": 2.066, "step": 29846 }, { "epoch": 0.99, "grad_norm": 0.7597500085830688, "learning_rate": 2.4909763537306343e-09, "loss": 2.0793, "step": 29847 }, { "epoch": 0.99, "grad_norm": 0.7478899359703064, "learning_rate": 2.4673102274164195e-09, "loss": 2.0993, "step": 29848 }, { "epoch": 0.99, "grad_norm": 0.7401597499847412, "learning_rate": 2.4437570473678783e-09, "loss": 2.0208, "step": 29849 }, { "epoch": 0.99, "grad_norm": 0.7216199040412903, "learning_rate": 2.4203168138514644e-09, "loss": 2.0327, "step": 29850 }, { "epoch": 0.99, "grad_norm": 0.7516917586326599, "learning_rate": 2.3969895271303e-09, "loss": 2.0059, "step": 29851 }, { "epoch": 0.99, "grad_norm": 0.7287757396697998, "learning_rate": 2.373775187468619e-09, "loss": 2.0258, "step": 29852 }, { "epoch": 0.99, "grad_norm": 0.7130069136619568, "learning_rate": 2.350673795128433e-09, "loss": 1.9762, "step": 29853 }, { "epoch": 0.99, "grad_norm": 0.7495232820510864, "learning_rate": 2.3276853503717557e-09, "loss": 2.0254, "step": 29854 }, { "epoch": 0.99, "grad_norm": 0.7562741041183472, "learning_rate": 2.3048098534572684e-09, "loss": 1.9963, "step": 29855 }, { "epoch": 0.99, "grad_norm": 0.757908046245575, "learning_rate": 2.2820473046447634e-09, "loss": 1.9716, "step": 29856 }, { "epoch": 0.99, "grad_norm": 0.7227439880371094, "learning_rate": 2.259397704189592e-09, "loss": 2.0302, "step": 29857 }, { "epoch": 0.99, "grad_norm": 0.7256495952606201, "learning_rate": 2.236861052348216e-09, "loss": 2.0523, "step": 29858 }, { "epoch": 0.99, "grad_norm": 0.7868555188179016, "learning_rate": 2.214437349375986e-09, "loss": 2.1287, "step": 29859 }, { "epoch": 0.99, "grad_norm": 0.7411066889762878, "learning_rate": 2.192126595526034e-09, "loss": 1.9836, "step": 29860 }, { "epoch": 0.99, "grad_norm": 0.7502613663673401, "learning_rate": 2.1699287910503795e-09, "loss": 2.0437, "step": 29861 }, { "epoch": 0.99, "grad_norm": 0.7456634044647217, "learning_rate": 2.1478439361988235e-09, "loss": 2.0257, "step": 29862 }, { "epoch": 0.99, "grad_norm": 0.7811187505722046, "learning_rate": 2.125872031222276e-09, "loss": 1.9934, "step": 29863 }, { "epoch": 0.99, "grad_norm": 0.7530847787857056, "learning_rate": 2.104013076367206e-09, "loss": 2.0055, "step": 29864 }, { "epoch": 0.99, "grad_norm": 0.7474566698074341, "learning_rate": 2.0822670718823046e-09, "loss": 2.065, "step": 29865 }, { "epoch": 0.99, "grad_norm": 0.7490169405937195, "learning_rate": 2.0606340180129305e-09, "loss": 2.0545, "step": 29866 }, { "epoch": 0.99, "grad_norm": 0.792894184589386, "learning_rate": 2.0391139150033325e-09, "loss": 2.0598, "step": 29867 }, { "epoch": 0.99, "grad_norm": 0.7788861393928528, "learning_rate": 2.0177067630955395e-09, "loss": 2.0175, "step": 29868 }, { "epoch": 0.99, "grad_norm": 0.7672091722488403, "learning_rate": 1.99641256253269e-09, "loss": 2.029, "step": 29869 }, { "epoch": 0.99, "grad_norm": 0.7585312724113464, "learning_rate": 1.975231313555703e-09, "loss": 2.0056, "step": 29870 }, { "epoch": 0.99, "grad_norm": 0.7147022485733032, "learning_rate": 1.9541630164021664e-09, "loss": 1.9611, "step": 29871 }, { "epoch": 0.99, "grad_norm": 0.761692225933075, "learning_rate": 1.9332076713107772e-09, "loss": 2.0929, "step": 29872 }, { "epoch": 0.99, "grad_norm": 0.7389810085296631, "learning_rate": 1.912365278519124e-09, "loss": 2.0018, "step": 29873 }, { "epoch": 0.99, "grad_norm": 0.7708852291107178, "learning_rate": 1.8916358382625733e-09, "loss": 2.0456, "step": 29874 }, { "epoch": 0.99, "grad_norm": 0.7427096366882324, "learning_rate": 1.8710193507742723e-09, "loss": 1.9986, "step": 29875 }, { "epoch": 0.99, "grad_norm": 0.7353299260139465, "learning_rate": 1.8505158162873683e-09, "loss": 2.0679, "step": 29876 }, { "epoch": 0.99, "grad_norm": 0.7231204509735107, "learning_rate": 1.8301252350350075e-09, "loss": 2.0221, "step": 29877 }, { "epoch": 0.99, "grad_norm": 0.7684375047683716, "learning_rate": 1.809847607245896e-09, "loss": 2.0617, "step": 29878 }, { "epoch": 0.99, "grad_norm": 0.7398586273193359, "learning_rate": 1.7896829331487397e-09, "loss": 1.9826, "step": 29879 }, { "epoch": 0.99, "grad_norm": 0.74696946144104, "learning_rate": 1.7696312129733551e-09, "loss": 2.0215, "step": 29880 }, { "epoch": 0.99, "grad_norm": 0.7459486722946167, "learning_rate": 1.7496924469440068e-09, "loss": 2.0193, "step": 29881 }, { "epoch": 0.99, "grad_norm": 0.7631803750991821, "learning_rate": 1.729866635288291e-09, "loss": 2.0135, "step": 29882 }, { "epoch": 0.99, "grad_norm": 0.7474335432052612, "learning_rate": 1.7101537782282517e-09, "loss": 1.9952, "step": 29883 }, { "epoch": 0.99, "grad_norm": 0.7493554949760437, "learning_rate": 1.6905538759881546e-09, "loss": 1.9999, "step": 29884 }, { "epoch": 0.99, "grad_norm": 0.7568851709365845, "learning_rate": 1.6710669287878233e-09, "loss": 2.0524, "step": 29885 }, { "epoch": 0.99, "grad_norm": 0.7399751543998718, "learning_rate": 1.6516929368481927e-09, "loss": 1.9676, "step": 29886 }, { "epoch": 0.99, "grad_norm": 0.7268072962760925, "learning_rate": 1.6324319003879764e-09, "loss": 1.9913, "step": 29887 }, { "epoch": 0.99, "grad_norm": 0.7300494909286499, "learning_rate": 1.613283819624778e-09, "loss": 2.0023, "step": 29888 }, { "epoch": 0.99, "grad_norm": 0.7397010326385498, "learning_rate": 1.5942486947762016e-09, "loss": 2.0924, "step": 29889 }, { "epoch": 0.99, "grad_norm": 0.7560091018676758, "learning_rate": 1.5753265260554096e-09, "loss": 2.078, "step": 29890 }, { "epoch": 0.99, "grad_norm": 0.7547938823699951, "learning_rate": 1.5565173136766753e-09, "loss": 2.0011, "step": 29891 }, { "epoch": 0.99, "grad_norm": 0.7331838607788086, "learning_rate": 1.5378210578531616e-09, "loss": 2.0382, "step": 29892 }, { "epoch": 0.99, "grad_norm": 0.7528529167175293, "learning_rate": 1.5192377587958107e-09, "loss": 2.0209, "step": 29893 }, { "epoch": 0.99, "grad_norm": 0.7621193528175354, "learning_rate": 1.5007674167133445e-09, "loss": 2.0497, "step": 29894 }, { "epoch": 0.99, "grad_norm": 0.703238308429718, "learning_rate": 1.4824100318167055e-09, "loss": 2.0252, "step": 29895 }, { "epoch": 0.99, "grad_norm": 0.7415446639060974, "learning_rate": 1.4641656043112852e-09, "loss": 2.0524, "step": 29896 }, { "epoch": 0.99, "grad_norm": 0.7276495695114136, "learning_rate": 1.4460341344046946e-09, "loss": 2.0805, "step": 29897 }, { "epoch": 0.99, "grad_norm": 0.7539460062980652, "learning_rate": 1.4280156223012154e-09, "loss": 2.0759, "step": 29898 }, { "epoch": 0.99, "grad_norm": 0.7755263447761536, "learning_rate": 1.4101100682040181e-09, "loss": 2.0013, "step": 29899 }, { "epoch": 0.99, "grad_norm": 0.7303889393806458, "learning_rate": 1.3923174723151633e-09, "loss": 2.0302, "step": 29900 }, { "epoch": 0.99, "grad_norm": 0.73729008436203, "learning_rate": 1.3746378348367117e-09, "loss": 2.0715, "step": 29901 }, { "epoch": 0.99, "grad_norm": 0.7594935297966003, "learning_rate": 1.3570711559673933e-09, "loss": 1.9935, "step": 29902 }, { "epoch": 0.99, "grad_norm": 0.7298699617385864, "learning_rate": 1.3396174359059377e-09, "loss": 1.9887, "step": 29903 }, { "epoch": 0.99, "grad_norm": 0.7334200143814087, "learning_rate": 1.3222766748510751e-09, "loss": 2.0957, "step": 29904 }, { "epoch": 0.99, "grad_norm": 0.7237154245376587, "learning_rate": 1.3050488729959842e-09, "loss": 2.112, "step": 29905 }, { "epoch": 0.99, "grad_norm": 0.7643848657608032, "learning_rate": 1.2879340305371745e-09, "loss": 2.078, "step": 29906 }, { "epoch": 1.0, "grad_norm": 0.7261351346969604, "learning_rate": 1.2709321476667147e-09, "loss": 2.0147, "step": 29907 }, { "epoch": 1.0, "grad_norm": 0.7658722400665283, "learning_rate": 1.2540432245777834e-09, "loss": 1.9805, "step": 29908 }, { "epoch": 1.0, "grad_norm": 0.7499039173126221, "learning_rate": 1.237267261461339e-09, "loss": 2.0319, "step": 29909 }, { "epoch": 1.0, "grad_norm": 0.7775822281837463, "learning_rate": 1.2206042585061195e-09, "loss": 2.0564, "step": 29910 }, { "epoch": 1.0, "grad_norm": 0.7540507316589355, "learning_rate": 1.2040542159008628e-09, "loss": 2.0354, "step": 29911 }, { "epoch": 1.0, "grad_norm": 0.7205124497413635, "learning_rate": 1.1876171338320864e-09, "loss": 2.0899, "step": 29912 }, { "epoch": 1.0, "grad_norm": 0.7431591749191284, "learning_rate": 1.1712930124851973e-09, "loss": 2.007, "step": 29913 }, { "epoch": 1.0, "grad_norm": 0.7286253571510315, "learning_rate": 1.1550818520467133e-09, "loss": 2.0256, "step": 29914 }, { "epoch": 1.0, "grad_norm": 0.7481906414031982, "learning_rate": 1.1389836526964904e-09, "loss": 1.9846, "step": 29915 }, { "epoch": 1.0, "grad_norm": 0.7880908250808716, "learning_rate": 1.1229984146188255e-09, "loss": 2.0561, "step": 29916 }, { "epoch": 1.0, "grad_norm": 0.7736382484436035, "learning_rate": 1.1071261379935748e-09, "loss": 2.0835, "step": 29917 }, { "epoch": 1.0, "grad_norm": 0.7612571120262146, "learning_rate": 1.0913668230005948e-09, "loss": 2.0654, "step": 29918 }, { "epoch": 1.0, "grad_norm": 0.7665936946868896, "learning_rate": 1.0757204698164103e-09, "loss": 2.1786, "step": 29919 }, { "epoch": 1.0, "grad_norm": 0.7419058084487915, "learning_rate": 1.0601870786197677e-09, "loss": 2.0098, "step": 29920 }, { "epoch": 1.0, "grad_norm": 0.7315691709518433, "learning_rate": 1.0447666495849717e-09, "loss": 2.0495, "step": 29921 }, { "epoch": 1.0, "grad_norm": 0.7426202297210693, "learning_rate": 1.0294591828863276e-09, "loss": 2.0219, "step": 29922 }, { "epoch": 1.0, "grad_norm": 0.7323428392410278, "learning_rate": 1.01426467869592e-09, "loss": 2.0538, "step": 29923 }, { "epoch": 1.0, "grad_norm": 0.7710581421852112, "learning_rate": 9.991831371880535e-10, "loss": 1.9981, "step": 29924 }, { "epoch": 1.0, "grad_norm": 0.7526863217353821, "learning_rate": 9.842145585303719e-10, "loss": 2.0371, "step": 29925 }, { "epoch": 1.0, "grad_norm": 0.7504081726074219, "learning_rate": 9.693589428927396e-10, "loss": 2.0718, "step": 29926 }, { "epoch": 1.0, "grad_norm": 0.7425186038017273, "learning_rate": 9.546162904439104e-10, "loss": 2.0842, "step": 29927 }, { "epoch": 1.0, "grad_norm": 0.7677820324897766, "learning_rate": 9.399866013504178e-10, "loss": 2.0427, "step": 29928 }, { "epoch": 1.0, "grad_norm": 0.7290091514587402, "learning_rate": 9.254698757754643e-10, "loss": 2.0533, "step": 29929 }, { "epoch": 1.0, "grad_norm": 0.7729735970497131, "learning_rate": 9.110661138855836e-10, "loss": 2.0557, "step": 29930 }, { "epoch": 1.0, "grad_norm": 0.7551309466362, "learning_rate": 8.96775315841758e-10, "loss": 2.0087, "step": 29931 }, { "epoch": 1.0, "grad_norm": 0.7739366292953491, "learning_rate": 8.825974818071903e-10, "loss": 2.0437, "step": 29932 }, { "epoch": 1.0, "grad_norm": 0.7256814241409302, "learning_rate": 8.685326119395321e-10, "loss": 2.0321, "step": 29933 }, { "epoch": 1.0, "grad_norm": 0.7723352909088135, "learning_rate": 8.545807063997658e-10, "loss": 2.1138, "step": 29934 }, { "epoch": 1.0, "grad_norm": 0.7476518750190735, "learning_rate": 8.40741765345543e-10, "loss": 2.0165, "step": 29935 }, { "epoch": 1.0, "grad_norm": 0.7521471977233887, "learning_rate": 8.27015788932295e-10, "loss": 2.0426, "step": 29936 }, { "epoch": 1.0, "grad_norm": 0.7272869944572449, "learning_rate": 8.134027773143427e-10, "loss": 2.0323, "step": 29937 }, { "epoch": 1.0, "grad_norm": 0.7486531734466553, "learning_rate": 7.999027306471174e-10, "loss": 1.9711, "step": 29938 }, { "epoch": 1.0, "grad_norm": 0.7193907499313354, "learning_rate": 7.865156490827197e-10, "loss": 2.0457, "step": 29939 }, { "epoch": 1.0, "grad_norm": 0.7560902833938599, "learning_rate": 7.732415327721399e-10, "loss": 2.0842, "step": 29940 }, { "epoch": 1.0, "grad_norm": 0.7276819348335266, "learning_rate": 7.60080381865258e-10, "loss": 2.0696, "step": 29941 }, { "epoch": 1.0, "grad_norm": 0.7517814040184021, "learning_rate": 7.470321965108441e-10, "loss": 2.0371, "step": 29942 }, { "epoch": 1.0, "grad_norm": 0.771757960319519, "learning_rate": 7.340969768554473e-10, "loss": 2.0861, "step": 29943 }, { "epoch": 1.0, "grad_norm": 0.7464506030082703, "learning_rate": 7.212747230467276e-10, "loss": 2.0, "step": 29944 }, { "epoch": 1.0, "grad_norm": 0.7161481380462646, "learning_rate": 7.085654352290138e-10, "loss": 2.0726, "step": 29945 }, { "epoch": 1.0, "grad_norm": 0.7373945116996765, "learning_rate": 6.959691135466351e-10, "loss": 2.0002, "step": 29946 }, { "epoch": 1.0, "grad_norm": 0.7244176268577576, "learning_rate": 6.834857581394793e-10, "loss": 2.0628, "step": 29947 }, { "epoch": 1.0, "grad_norm": 0.7278868556022644, "learning_rate": 6.711153691507655e-10, "loss": 2.0937, "step": 29948 }, { "epoch": 1.0, "grad_norm": 0.7371912598609924, "learning_rate": 6.588579467203815e-10, "loss": 2.0254, "step": 29949 }, { "epoch": 1.0, "grad_norm": 0.7127786874771118, "learning_rate": 6.467134909848849e-10, "loss": 1.9971, "step": 29950 }, { "epoch": 1.0, "grad_norm": 0.7275497317314148, "learning_rate": 6.346820020830535e-10, "loss": 1.9863, "step": 29951 }, { "epoch": 1.0, "grad_norm": 0.7169925570487976, "learning_rate": 6.227634801503346e-10, "loss": 2.0047, "step": 29952 }, { "epoch": 1.0, "grad_norm": 0.7752792239189148, "learning_rate": 6.109579253210651e-10, "loss": 2.0266, "step": 29953 }, { "epoch": 1.0, "grad_norm": 0.7461509704589844, "learning_rate": 5.99265337729582e-10, "loss": 2.1087, "step": 29954 }, { "epoch": 1.0, "grad_norm": 0.7324398756027222, "learning_rate": 5.876857175068917e-10, "loss": 2.0306, "step": 29955 }, { "epoch": 1.0, "grad_norm": 0.7414193153381348, "learning_rate": 5.762190647851107e-10, "loss": 2.0772, "step": 29956 }, { "epoch": 1.0, "grad_norm": 0.7633237242698669, "learning_rate": 5.648653796919145e-10, "loss": 2.0775, "step": 29957 }, { "epoch": 1.0, "grad_norm": 0.735564112663269, "learning_rate": 5.536246623571995e-10, "loss": 2.0399, "step": 29958 }, { "epoch": 1.0, "grad_norm": 0.764510452747345, "learning_rate": 5.424969129075308e-10, "loss": 1.9942, "step": 29959 }, { "epoch": 1.0, "grad_norm": 0.7274280786514282, "learning_rate": 5.314821314683638e-10, "loss": 2.0554, "step": 29960 }, { "epoch": 1.0, "grad_norm": 0.7093517184257507, "learning_rate": 5.205803181640434e-10, "loss": 1.9897, "step": 29961 }, { "epoch": 1.0, "grad_norm": 0.7587512731552124, "learning_rate": 5.097914731178044e-10, "loss": 2.0273, "step": 29962 }, { "epoch": 1.0, "grad_norm": 0.755617618560791, "learning_rate": 4.991155964517713e-10, "loss": 2.067, "step": 29963 }, { "epoch": 1.0, "grad_norm": 0.7443338632583618, "learning_rate": 4.885526882869585e-10, "loss": 2.0564, "step": 29964 }, { "epoch": 1.0, "grad_norm": 0.7335346937179565, "learning_rate": 4.781027487421597e-10, "loss": 2.0159, "step": 29965 }, { "epoch": 1.0, "grad_norm": 0.725265383720398, "learning_rate": 4.677657779350586e-10, "loss": 1.9666, "step": 29966 }, { "epoch": 1.0, "grad_norm": 0.7502939701080322, "learning_rate": 4.575417759822287e-10, "loss": 2.0238, "step": 29967 }, { "epoch": 1.0, "grad_norm": 0.7693683505058289, "learning_rate": 4.4743074300024334e-10, "loss": 2.0421, "step": 29968 }, { "epoch": 1.0, "grad_norm": 0.7351319789886475, "learning_rate": 4.3743267910345554e-10, "loss": 2.0349, "step": 29969 }, { "epoch": 1.0, "grad_norm": 0.7460674047470093, "learning_rate": 4.275475844039978e-10, "loss": 1.9725, "step": 29970 }, { "epoch": 1.0, "grad_norm": 0.7762727737426758, "learning_rate": 4.1777545901400264e-10, "loss": 2.0384, "step": 29971 }, { "epoch": 1.0, "grad_norm": 0.7705658078193665, "learning_rate": 4.0811630304338213e-10, "loss": 2.0174, "step": 29972 }, { "epoch": 1.0, "grad_norm": 0.7528339624404907, "learning_rate": 3.985701166009381e-10, "loss": 2.0718, "step": 29973 }, { "epoch": 1.0, "grad_norm": 0.7343324422836304, "learning_rate": 3.891368997954725e-10, "loss": 2.0095, "step": 29974 }, { "epoch": 1.0, "grad_norm": 0.7579321265220642, "learning_rate": 3.798166527335667e-10, "loss": 2.0645, "step": 29975 }, { "epoch": 1.0, "grad_norm": 0.7555983662605286, "learning_rate": 3.7060937551958164e-10, "loss": 2.0777, "step": 29976 }, { "epoch": 1.0, "grad_norm": 0.7653681039810181, "learning_rate": 3.615150682589885e-10, "loss": 1.9922, "step": 29977 }, { "epoch": 1.0, "grad_norm": 0.7409499287605286, "learning_rate": 3.5253373105281764e-10, "loss": 2.1126, "step": 29978 }, { "epoch": 1.0, "grad_norm": 0.7467514872550964, "learning_rate": 3.436653640032095e-10, "loss": 2.0713, "step": 29979 }, { "epoch": 1.0, "grad_norm": 0.7554823756217957, "learning_rate": 3.349099672111944e-10, "loss": 2.0426, "step": 29980 }, { "epoch": 1.0, "grad_norm": 0.747926652431488, "learning_rate": 3.26267540774472e-10, "loss": 2.1105, "step": 29981 }, { "epoch": 1.0, "grad_norm": 0.7688230276107788, "learning_rate": 3.1773808479074184e-10, "loss": 2.0657, "step": 29982 }, { "epoch": 1.0, "grad_norm": 0.7157496809959412, "learning_rate": 3.093215993577037e-10, "loss": 2.0882, "step": 29983 }, { "epoch": 1.0, "grad_norm": 0.7708505988121033, "learning_rate": 3.0101808456972635e-10, "loss": 2.0793, "step": 29984 }, { "epoch": 1.0, "grad_norm": 0.7430346012115479, "learning_rate": 2.928275405200687e-10, "loss": 2.0664, "step": 29985 }, { "epoch": 1.0, "grad_norm": 0.7533227801322937, "learning_rate": 2.847499673008791e-10, "loss": 2.0388, "step": 29986 }, { "epoch": 1.0, "grad_norm": 0.7363753318786621, "learning_rate": 2.767853650054164e-10, "loss": 2.1146, "step": 29987 }, { "epoch": 1.0, "grad_norm": 0.7249892950057983, "learning_rate": 2.689337337213882e-10, "loss": 2.1128, "step": 29988 }, { "epoch": 1.0, "grad_norm": 0.726254403591156, "learning_rate": 2.6119507353983273e-10, "loss": 2.0108, "step": 29989 }, { "epoch": 1.0, "grad_norm": 0.7713172435760498, "learning_rate": 2.5356938454623724e-10, "loss": 2.0801, "step": 29990 }, { "epoch": 1.0, "grad_norm": 0.7783042788505554, "learning_rate": 2.4605666682719907e-10, "loss": 2.1466, "step": 29991 }, { "epoch": 1.0, "grad_norm": 0.7423444986343384, "learning_rate": 2.386569204670952e-10, "loss": 2.067, "step": 29992 }, { "epoch": 1.0, "grad_norm": 0.7339439392089844, "learning_rate": 2.3137014555141279e-10, "loss": 2.0256, "step": 29993 }, { "epoch": 1.0, "grad_norm": 0.7885274887084961, "learning_rate": 2.241963421600879e-10, "loss": 2.0554, "step": 29994 }, { "epoch": 1.0, "grad_norm": 0.7579604983329773, "learning_rate": 2.1713551037638726e-10, "loss": 2.0346, "step": 29995 }, { "epoch": 1.0, "grad_norm": 0.76185142993927, "learning_rate": 2.1018765027913667e-10, "loss": 2.0471, "step": 29996 }, { "epoch": 1.0, "grad_norm": 0.7575711011886597, "learning_rate": 2.033527619460518e-10, "loss": 2.093, "step": 29997 }, { "epoch": 1.0, "grad_norm": 0.7392294406890869, "learning_rate": 1.9663084545484823e-10, "loss": 2.015, "step": 29998 }, { "epoch": 1.0, "grad_norm": 0.7899391055107117, "learning_rate": 1.9002190088213135e-10, "loss": 2.0268, "step": 29999 }, { "epoch": 1.0, "grad_norm": 0.7452924847602844, "learning_rate": 1.835259283022861e-10, "loss": 2.0789, "step": 30000 }, { "epoch": 1.0, "grad_norm": 0.7229639887809753, "learning_rate": 1.7714292778858722e-10, "loss": 2.0081, "step": 30001 }, { "epoch": 1.0, "grad_norm": 0.7499478459358215, "learning_rate": 1.7087289941208895e-10, "loss": 2.0555, "step": 30002 }, { "epoch": 1.0, "grad_norm": 0.7569377422332764, "learning_rate": 1.6471584324495582e-10, "loss": 2.1134, "step": 30003 }, { "epoch": 1.0, "grad_norm": 0.7243926525115967, "learning_rate": 1.5867175935713186e-10, "loss": 2.054, "step": 30004 }, { "epoch": 1.0, "grad_norm": 0.7314569354057312, "learning_rate": 1.5274064781523046e-10, "loss": 2.0387, "step": 30005 }, { "epoch": 1.0, "grad_norm": 0.7608683705329895, "learning_rate": 1.4692250868808543e-10, "loss": 2.0155, "step": 30006 }, { "epoch": 1.0, "grad_norm": 0.7373477816581726, "learning_rate": 1.4121734204008976e-10, "loss": 2.0881, "step": 30007 }, { "epoch": 1.0, "grad_norm": 0.7580901980400085, "learning_rate": 1.356251479356363e-10, "loss": 2.0622, "step": 30008 }, { "epoch": 1.0, "grad_norm": 0.7529125213623047, "learning_rate": 1.3014592643911805e-10, "loss": 2.073, "step": 30009 }, { "epoch": 1.0, "grad_norm": 0.7689856290817261, "learning_rate": 1.2477967761159726e-10, "loss": 2.0341, "step": 30010 }, { "epoch": 1.0, "grad_norm": 0.7512357234954834, "learning_rate": 1.1952640151302598e-10, "loss": 2.0299, "step": 30011 }, { "epoch": 1.0, "grad_norm": 0.7306007146835327, "learning_rate": 1.1438609820446645e-10, "loss": 1.9798, "step": 30012 }, { "epoch": 1.0, "grad_norm": 0.7438822388648987, "learning_rate": 1.0935876774254006e-10, "loss": 2.0404, "step": 30013 }, { "epoch": 1.0, "grad_norm": 0.7729618549346924, "learning_rate": 1.0444441018497842e-10, "loss": 2.0789, "step": 30014 }, { "epoch": 1.0, "grad_norm": 0.7457106113433838, "learning_rate": 9.964302558729266e-11, "loss": 2.0646, "step": 30015 }, { "epoch": 1.0, "grad_norm": 0.7459310293197632, "learning_rate": 9.49546140027735e-11, "loss": 2.0073, "step": 30016 }, { "epoch": 1.0, "grad_norm": 0.7553513050079346, "learning_rate": 9.037917548471164e-11, "loss": 1.9777, "step": 30017 }, { "epoch": 1.0, "grad_norm": 0.7469952702522278, "learning_rate": 8.591671008528757e-11, "loss": 2.0279, "step": 30018 }, { "epoch": 1.0, "grad_norm": 0.7217015027999878, "learning_rate": 8.156721785446131e-11, "loss": 2.0209, "step": 30019 }, { "epoch": 1.0, "grad_norm": 0.7337492108345032, "learning_rate": 7.733069884108268e-11, "loss": 2.055, "step": 30020 }, { "epoch": 1.0, "grad_norm": 0.7287655472755432, "learning_rate": 7.32071530940015e-11, "loss": 2.0554, "step": 30021 }, { "epoch": 1.0, "grad_norm": 0.7633906006813049, "learning_rate": 6.919658065984714e-11, "loss": 2.072, "step": 30022 }, { "epoch": 1.0, "grad_norm": 0.7760948538780212, "learning_rate": 6.529898158302849e-11, "loss": 2.0192, "step": 30023 }, { "epoch": 1.0, "grad_norm": 0.7492859363555908, "learning_rate": 6.151435590795452e-11, "loss": 2.0125, "step": 30024 }, { "epoch": 1.0, "grad_norm": 0.7688169479370117, "learning_rate": 5.784270367681366e-11, "loss": 2.038, "step": 30025 }, { "epoch": 1.0, "grad_norm": 0.7396836280822754, "learning_rate": 5.428402493179441e-11, "loss": 2.0208, "step": 30026 }, { "epoch": 1.0, "grad_norm": 0.7266297936439514, "learning_rate": 5.083831971286479e-11, "loss": 1.9321, "step": 30027 }, { "epoch": 1.0, "grad_norm": 0.7384500503540039, "learning_rate": 4.7505588059992836e-11, "loss": 1.9722, "step": 30028 }, { "epoch": 1.0, "grad_norm": 0.7619094848632812, "learning_rate": 4.4285830008705675e-11, "loss": 2.0746, "step": 30029 }, { "epoch": 1.0, "grad_norm": 0.7471997141838074, "learning_rate": 4.11790455967509e-11, "loss": 1.9753, "step": 30030 }, { "epoch": 1.0, "grad_norm": 0.737399160861969, "learning_rate": 3.818523485965564e-11, "loss": 2.05, "step": 30031 }, { "epoch": 1.0, "grad_norm": 0.7499684691429138, "learning_rate": 3.5304397829616364e-11, "loss": 1.9798, "step": 30032 }, { "epoch": 1.0, "grad_norm": 0.7324346899986267, "learning_rate": 3.253653454104999e-11, "loss": 1.9936, "step": 30033 }, { "epoch": 1.0, "grad_norm": 0.7423503398895264, "learning_rate": 2.9881645023932536e-11, "loss": 1.9536, "step": 30034 }, { "epoch": 1.0, "grad_norm": 0.7474167943000793, "learning_rate": 2.7339729308240026e-11, "loss": 2.1112, "step": 30035 }, { "epoch": 1.0, "grad_norm": 0.7588308453559875, "learning_rate": 2.4910787423948478e-11, "loss": 2.0964, "step": 30036 }, { "epoch": 1.0, "grad_norm": 0.7837567925453186, "learning_rate": 2.259481939770325e-11, "loss": 2.0522, "step": 30037 }, { "epoch": 1.0, "grad_norm": 0.7425159811973572, "learning_rate": 2.0391825255039464e-11, "loss": 2.0676, "step": 30038 }, { "epoch": 1.0, "grad_norm": 0.741051197052002, "learning_rate": 1.8301805021492258e-11, "loss": 2.0925, "step": 30039 }, { "epoch": 1.0, "grad_norm": 0.7427074909210205, "learning_rate": 1.632475872037631e-11, "loss": 2.0548, "step": 30040 }, { "epoch": 1.0, "grad_norm": 0.738802433013916, "learning_rate": 1.4460686375006305e-11, "loss": 1.9549, "step": 30041 }, { "epoch": 1.0, "grad_norm": 0.7539389133453369, "learning_rate": 1.2709588005366259e-11, "loss": 2.0847, "step": 30042 }, { "epoch": 1.0, "grad_norm": 0.7748835682868958, "learning_rate": 1.1071463631440182e-11, "loss": 2.0514, "step": 30043 }, { "epoch": 1.0, "grad_norm": 0.7527546882629395, "learning_rate": 9.546313270991647e-12, "loss": 2.0212, "step": 30044 }, { "epoch": 1.0, "grad_norm": 0.7306848168373108, "learning_rate": 8.134136944004667e-12, "loss": 2.0182, "step": 30045 }, { "epoch": 1.0, "grad_norm": 0.7830349802970886, "learning_rate": 6.834934662691695e-12, "loss": 2.0485, "step": 30046 }, { "epoch": 1.0, "grad_norm": 0.7780806422233582, "learning_rate": 5.648706444816299e-12, "loss": 2.0785, "step": 30047 }, { "epoch": 1.0, "grad_norm": 0.7345147728919983, "learning_rate": 4.575452302590933e-12, "loss": 2.069, "step": 30048 }, { "epoch": 1.0, "grad_norm": 0.7313547134399414, "learning_rate": 3.6151722471178264e-12, "loss": 2.0154, "step": 30049 }, { "epoch": 1.0, "grad_norm": 0.7374529242515564, "learning_rate": 2.767866290609433e-12, "loss": 2.039, "step": 30050 }, { "epoch": 1.0, "grad_norm": 0.7542058229446411, "learning_rate": 2.033534443057761e-12, "loss": 2.0975, "step": 30051 }, { "epoch": 1.0, "grad_norm": 0.7186015248298645, "learning_rate": 1.4121767111241469e-12, "loss": 2.0925, "step": 30052 }, { "epoch": 1.0, "grad_norm": 0.7711829543113708, "learning_rate": 9.037931025801527e-13, "loss": 2.14, "step": 30053 }, { "epoch": 1.0, "grad_norm": 0.7336012125015259, "learning_rate": 5.083836240871165e-13, "loss": 2.0082, "step": 30054 }, { "epoch": 1.0, "grad_norm": 0.7710098028182983, "learning_rate": 2.2594827786548423e-13, "loss": 2.0744, "step": 30055 }, { "epoch": 1.0, "grad_norm": 0.747883141040802, "learning_rate": 5.6487069466371057e-14, "loss": 2.0049, "step": 30056 }, { "epoch": 1.0, "grad_norm": 1.5368523597717285, "learning_rate": 0.0, "loss": 2.0168, "step": 30057 }, { "epoch": 1.0, "step": 30057, "total_flos": 5.53843527867433e+16, "train_loss": 2.1406584009897505, "train_runtime": 4517.6027, "train_samples_per_second": 425.801, "train_steps_per_second": 6.653 } ], "logging_steps": 1.0, "max_steps": 30057, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "total_flos": 5.53843527867433e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }