{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.24999189937074312, "eval_steps": 2411, "global_step": 2411, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010368805448807264, "grad_norm": 3.078125, "learning_rate": 2e-05, "loss": 11.9266, "step": 1 }, { "epoch": 0.00010368805448807264, "eval_loss": 11.923038482666016, "eval_runtime": 0.4444, "eval_samples_per_second": 335.305, "eval_steps_per_second": 15.753, "step": 1 }, { "epoch": 0.00020737610897614527, "grad_norm": 3.046875, "learning_rate": 4e-05, "loss": 11.9246, "step": 2 }, { "epoch": 0.0003110641634642179, "grad_norm": 3.171875, "learning_rate": 6e-05, "loss": 11.8897, "step": 3 }, { "epoch": 0.00041475221795229055, "grad_norm": 2.890625, "learning_rate": 8e-05, "loss": 11.7637, "step": 4 }, { "epoch": 0.0005184402724403631, "grad_norm": 2.6875, "learning_rate": 0.0001, "loss": 11.6167, "step": 5 }, { "epoch": 0.0006221283269284358, "grad_norm": 2.5625, "learning_rate": 0.00012, "loss": 11.3739, "step": 6 }, { "epoch": 0.0007258163814165084, "grad_norm": 2.53125, "learning_rate": 0.00014, "loss": 11.1311, "step": 7 }, { "epoch": 0.0008295044359045811, "grad_norm": 2.359375, "learning_rate": 0.00016, "loss": 10.8909, "step": 8 }, { "epoch": 0.0009331924903926537, "grad_norm": 2.21875, "learning_rate": 0.00018, "loss": 10.6657, "step": 9 }, { "epoch": 0.0010368805448807263, "grad_norm": 2.125, "learning_rate": 0.0002, "loss": 10.4763, "step": 10 }, { "epoch": 0.001140568599368799, "grad_norm": 2.078125, "learning_rate": 0.00019999999941005286, "loss": 10.2975, "step": 11 }, { "epoch": 0.0012442566538568716, "grad_norm": 2.015625, "learning_rate": 0.00019999999764021143, "loss": 10.1245, "step": 12 }, { "epoch": 0.0013479447083449443, "grad_norm": 2.0, "learning_rate": 0.00019999999469047573, "loss": 9.9439, "step": 13 }, { "epoch": 0.0014516327628330168, "grad_norm": 2.015625, "learning_rate": 0.0001999999905608458, "loss": 9.7362, "step": 14 }, { "epoch": 0.0015553208173210895, "grad_norm": 1.9609375, "learning_rate": 0.00019999998525132166, "loss": 9.5586, "step": 15 }, { "epoch": 0.0016590088718091622, "grad_norm": 1.875, "learning_rate": 0.00019999997876190344, "loss": 9.4056, "step": 16 }, { "epoch": 0.0017626969262972347, "grad_norm": 1.8515625, "learning_rate": 0.00019999997109259115, "loss": 9.2022, "step": 17 }, { "epoch": 0.0018663849807853074, "grad_norm": 1.7890625, "learning_rate": 0.00019999996224338487, "loss": 9.0599, "step": 18 }, { "epoch": 0.00197007303527338, "grad_norm": 1.734375, "learning_rate": 0.0001999999522142848, "loss": 8.9008, "step": 19 }, { "epoch": 0.0020737610897614525, "grad_norm": 1.6171875, "learning_rate": 0.000199999941005291, "loss": 8.8079, "step": 20 }, { "epoch": 0.0021774491442495252, "grad_norm": 1.5625, "learning_rate": 0.00019999992861640355, "loss": 8.654, "step": 21 }, { "epoch": 0.002281137198737598, "grad_norm": 1.46875, "learning_rate": 0.0001999999150476227, "loss": 8.5387, "step": 22 }, { "epoch": 0.0023848252532256706, "grad_norm": 1.390625, "learning_rate": 0.0001999999002989485, "loss": 8.4015, "step": 23 }, { "epoch": 0.0024885133077137433, "grad_norm": 1.2890625, "learning_rate": 0.00019999988437038123, "loss": 8.3069, "step": 24 }, { "epoch": 0.002592201362201816, "grad_norm": 1.2109375, "learning_rate": 0.00019999986726192102, "loss": 8.1671, "step": 25 }, { "epoch": 0.0026958894166898887, "grad_norm": 1.0546875, "learning_rate": 0.00019999984897356806, "loss": 8.1281, "step": 26 }, { "epoch": 0.002799577471177961, "grad_norm": 0.921875, "learning_rate": 0.0001999998295053226, "loss": 8.0346, "step": 27 }, { "epoch": 0.0029032655256660336, "grad_norm": 0.7890625, "learning_rate": 0.00019999980885718487, "loss": 7.9803, "step": 28 }, { "epoch": 0.0030069535801541063, "grad_norm": 0.67578125, "learning_rate": 0.00019999978702915508, "loss": 7.9035, "step": 29 }, { "epoch": 0.003110641634642179, "grad_norm": 0.58984375, "learning_rate": 0.0001999997640212335, "loss": 7.8359, "step": 30 }, { "epoch": 0.0032143296891302517, "grad_norm": 0.470703125, "learning_rate": 0.00019999973983342043, "loss": 7.8463, "step": 31 }, { "epoch": 0.0033180177436183244, "grad_norm": 0.4140625, "learning_rate": 0.0001999997144657161, "loss": 7.7655, "step": 32 }, { "epoch": 0.003421705798106397, "grad_norm": 0.33984375, "learning_rate": 0.0001999996879181209, "loss": 7.7699, "step": 33 }, { "epoch": 0.0035253938525944693, "grad_norm": 0.296875, "learning_rate": 0.00019999966019063506, "loss": 7.7232, "step": 34 }, { "epoch": 0.003629081907082542, "grad_norm": 0.28515625, "learning_rate": 0.00019999963128325892, "loss": 7.658, "step": 35 }, { "epoch": 0.0037327699615706147, "grad_norm": 0.263671875, "learning_rate": 0.00019999960119599283, "loss": 7.6972, "step": 36 }, { "epoch": 0.0038364580160586874, "grad_norm": 0.232421875, "learning_rate": 0.00019999956992883716, "loss": 7.6441, "step": 37 }, { "epoch": 0.00394014607054676, "grad_norm": 0.296875, "learning_rate": 0.00019999953748179228, "loss": 7.5872, "step": 38 }, { "epoch": 0.004043834125034833, "grad_norm": 0.37890625, "learning_rate": 0.00019999950385485855, "loss": 7.5796, "step": 39 }, { "epoch": 0.004147522179522905, "grad_norm": 0.26171875, "learning_rate": 0.00019999946904803638, "loss": 7.5814, "step": 40 }, { "epoch": 0.004251210234010978, "grad_norm": 0.255859375, "learning_rate": 0.00019999943306132621, "loss": 7.5596, "step": 41 }, { "epoch": 0.0043548982884990504, "grad_norm": 0.275390625, "learning_rate": 0.00019999939589472837, "loss": 7.5181, "step": 42 }, { "epoch": 0.004458586342987124, "grad_norm": 0.2392578125, "learning_rate": 0.00019999935754824342, "loss": 7.4972, "step": 43 }, { "epoch": 0.004562274397475196, "grad_norm": 0.2451171875, "learning_rate": 0.00019999931802187172, "loss": 7.4948, "step": 44 }, { "epoch": 0.004665962451963268, "grad_norm": 0.2578125, "learning_rate": 0.0001999992773156138, "loss": 7.4437, "step": 45 }, { "epoch": 0.004769650506451341, "grad_norm": 0.240234375, "learning_rate": 0.0001999992354294701, "loss": 7.4184, "step": 46 }, { "epoch": 0.0048733385609394135, "grad_norm": 0.2421875, "learning_rate": 0.00019999919236344114, "loss": 7.3949, "step": 47 }, { "epoch": 0.004977026615427487, "grad_norm": 0.2451171875, "learning_rate": 0.00019999914811752738, "loss": 7.3632, "step": 48 }, { "epoch": 0.005080714669915559, "grad_norm": 0.28125, "learning_rate": 0.00019999910269172938, "loss": 7.3576, "step": 49 }, { "epoch": 0.005184402724403632, "grad_norm": 0.232421875, "learning_rate": 0.0001999990560860477, "loss": 7.3231, "step": 50 }, { "epoch": 0.005288090778891704, "grad_norm": 0.263671875, "learning_rate": 0.00019999900830048283, "loss": 7.3035, "step": 51 }, { "epoch": 0.005391778833379777, "grad_norm": 0.306640625, "learning_rate": 0.0001999989593350354, "loss": 7.2858, "step": 52 }, { "epoch": 0.00549546688786785, "grad_norm": 0.265625, "learning_rate": 0.00019999890918970592, "loss": 7.2276, "step": 53 }, { "epoch": 0.005599154942355922, "grad_norm": 0.2314453125, "learning_rate": 0.00019999885786449505, "loss": 7.2519, "step": 54 }, { "epoch": 0.005702842996843995, "grad_norm": 0.302734375, "learning_rate": 0.00019999880535940333, "loss": 7.2018, "step": 55 }, { "epoch": 0.005806531051332067, "grad_norm": 0.2392578125, "learning_rate": 0.00019999875167443142, "loss": 7.1873, "step": 56 }, { "epoch": 0.00591021910582014, "grad_norm": 0.2470703125, "learning_rate": 0.00019999869680957993, "loss": 7.1665, "step": 57 }, { "epoch": 0.006013907160308213, "grad_norm": 0.310546875, "learning_rate": 0.00019999864076484955, "loss": 7.1518, "step": 58 }, { "epoch": 0.006117595214796286, "grad_norm": 0.376953125, "learning_rate": 0.0001999985835402409, "loss": 7.1188, "step": 59 }, { "epoch": 0.006221283269284358, "grad_norm": 0.4375, "learning_rate": 0.00019999852513575466, "loss": 7.0633, "step": 60 }, { "epoch": 0.00632497132377243, "grad_norm": 0.376953125, "learning_rate": 0.00019999846555139152, "loss": 7.0708, "step": 61 }, { "epoch": 0.006428659378260503, "grad_norm": 0.296875, "learning_rate": 0.0001999984047871522, "loss": 7.0265, "step": 62 }, { "epoch": 0.006532347432748576, "grad_norm": 0.40625, "learning_rate": 0.0001999983428430374, "loss": 6.976, "step": 63 }, { "epoch": 0.006636035487236649, "grad_norm": 0.47265625, "learning_rate": 0.00019999827971904787, "loss": 6.9527, "step": 64 }, { "epoch": 0.006739723541724721, "grad_norm": 0.357421875, "learning_rate": 0.00019999821541518437, "loss": 6.9225, "step": 65 }, { "epoch": 0.006843411596212794, "grad_norm": 0.28125, "learning_rate": 0.00019999814993144755, "loss": 6.9846, "step": 66 }, { "epoch": 0.0069470996507008664, "grad_norm": 0.310546875, "learning_rate": 0.00019999808326783835, "loss": 6.9026, "step": 67 }, { "epoch": 0.007050787705188939, "grad_norm": 0.298828125, "learning_rate": 0.00019999801542435743, "loss": 6.8856, "step": 68 }, { "epoch": 0.007154475759677012, "grad_norm": 0.32421875, "learning_rate": 0.00019999794640100562, "loss": 6.8605, "step": 69 }, { "epoch": 0.007258163814165084, "grad_norm": 0.251953125, "learning_rate": 0.00019999787619778375, "loss": 6.8081, "step": 70 }, { "epoch": 0.007361851868653157, "grad_norm": 0.380859375, "learning_rate": 0.00019999780481469266, "loss": 6.8199, "step": 71 }, { "epoch": 0.0074655399231412295, "grad_norm": 0.48828125, "learning_rate": 0.00019999773225173314, "loss": 6.8462, "step": 72 }, { "epoch": 0.007569227977629303, "grad_norm": 0.60546875, "learning_rate": 0.00019999765850890614, "loss": 6.7978, "step": 73 }, { "epoch": 0.007672916032117375, "grad_norm": 0.5625, "learning_rate": 0.0001999975835862124, "loss": 6.7541, "step": 74 }, { "epoch": 0.007776604086605447, "grad_norm": 0.4296875, "learning_rate": 0.00019999750748365294, "loss": 6.7211, "step": 75 }, { "epoch": 0.00788029214109352, "grad_norm": 0.453125, "learning_rate": 0.00019999743020122855, "loss": 6.7321, "step": 76 }, { "epoch": 0.007983980195581592, "grad_norm": 0.48828125, "learning_rate": 0.0001999973517389402, "loss": 6.7423, "step": 77 }, { "epoch": 0.008087668250069666, "grad_norm": 0.515625, "learning_rate": 0.00019999727209678883, "loss": 6.6788, "step": 78 }, { "epoch": 0.008191356304557739, "grad_norm": 0.5625, "learning_rate": 0.0001999971912747753, "loss": 6.689, "step": 79 }, { "epoch": 0.00829504435904581, "grad_norm": 0.416015625, "learning_rate": 0.00019999710927290064, "loss": 6.6457, "step": 80 }, { "epoch": 0.008398732413533883, "grad_norm": 0.453125, "learning_rate": 0.00019999702609116578, "loss": 6.6439, "step": 81 }, { "epoch": 0.008502420468021956, "grad_norm": 0.56640625, "learning_rate": 0.00019999694172957174, "loss": 6.6209, "step": 82 }, { "epoch": 0.008606108522510028, "grad_norm": 0.578125, "learning_rate": 0.00019999685618811948, "loss": 6.5961, "step": 83 }, { "epoch": 0.008709796576998101, "grad_norm": 0.5546875, "learning_rate": 0.00019999676946681, "loss": 6.6656, "step": 84 }, { "epoch": 0.008813484631486174, "grad_norm": 0.412109375, "learning_rate": 0.00019999668156564436, "loss": 6.5868, "step": 85 }, { "epoch": 0.008917172685974247, "grad_norm": 0.443359375, "learning_rate": 0.00019999659248462357, "loss": 6.6077, "step": 86 }, { "epoch": 0.009020860740462319, "grad_norm": 0.38671875, "learning_rate": 0.0001999965022237487, "loss": 6.5545, "step": 87 }, { "epoch": 0.009124548794950392, "grad_norm": 0.318359375, "learning_rate": 0.00019999641078302077, "loss": 6.5389, "step": 88 }, { "epoch": 0.009228236849438465, "grad_norm": 0.36328125, "learning_rate": 0.00019999631816244095, "loss": 6.5265, "step": 89 }, { "epoch": 0.009331924903926536, "grad_norm": 0.5078125, "learning_rate": 0.00019999622436201025, "loss": 6.5339, "step": 90 }, { "epoch": 0.00943561295841461, "grad_norm": 0.578125, "learning_rate": 0.0001999961293817298, "loss": 6.5092, "step": 91 }, { "epoch": 0.009539301012902682, "grad_norm": 0.447265625, "learning_rate": 0.0001999960332216007, "loss": 6.4901, "step": 92 }, { "epoch": 0.009642989067390756, "grad_norm": 0.6015625, "learning_rate": 0.00019999593588162414, "loss": 6.5021, "step": 93 }, { "epoch": 0.009746677121878827, "grad_norm": 0.59765625, "learning_rate": 0.00019999583736180122, "loss": 6.4337, "step": 94 }, { "epoch": 0.0098503651763669, "grad_norm": 0.4296875, "learning_rate": 0.00019999573766213313, "loss": 6.4328, "step": 95 }, { "epoch": 0.009954053230854973, "grad_norm": 0.640625, "learning_rate": 0.00019999563678262106, "loss": 6.4444, "step": 96 }, { "epoch": 0.010057741285343045, "grad_norm": 0.71484375, "learning_rate": 0.00019999553472326614, "loss": 6.4165, "step": 97 }, { "epoch": 0.010161429339831118, "grad_norm": 1.4140625, "learning_rate": 0.0001999954314840696, "loss": 6.4282, "step": 98 }, { "epoch": 0.01026511739431919, "grad_norm": 1.1875, "learning_rate": 0.0001999953270650327, "loss": 6.4431, "step": 99 }, { "epoch": 0.010368805448807264, "grad_norm": 0.8515625, "learning_rate": 0.00019999522146615662, "loss": 6.3568, "step": 100 }, { "epoch": 0.010472493503295335, "grad_norm": 0.69921875, "learning_rate": 0.00019999511468744263, "loss": 6.3853, "step": 101 }, { "epoch": 0.010576181557783408, "grad_norm": 0.94921875, "learning_rate": 0.000199995006728892, "loss": 6.4344, "step": 102 }, { "epoch": 0.010679869612271482, "grad_norm": 0.74609375, "learning_rate": 0.000199994897590506, "loss": 6.3957, "step": 103 }, { "epoch": 0.010783557666759555, "grad_norm": 0.60546875, "learning_rate": 0.00019999478727228588, "loss": 6.3347, "step": 104 }, { "epoch": 0.010887245721247626, "grad_norm": 0.5546875, "learning_rate": 0.00019999467577423296, "loss": 6.3382, "step": 105 }, { "epoch": 0.0109909337757357, "grad_norm": 0.62890625, "learning_rate": 0.0001999945630963486, "loss": 6.3785, "step": 106 }, { "epoch": 0.011094621830223772, "grad_norm": 0.439453125, "learning_rate": 0.00019999444923863405, "loss": 6.3325, "step": 107 }, { "epoch": 0.011198309884711844, "grad_norm": 0.482421875, "learning_rate": 0.00019999433420109073, "loss": 6.3663, "step": 108 }, { "epoch": 0.011301997939199917, "grad_norm": 0.41796875, "learning_rate": 0.00019999421798371997, "loss": 6.2954, "step": 109 }, { "epoch": 0.01140568599368799, "grad_norm": 0.474609375, "learning_rate": 0.00019999410058652313, "loss": 6.2503, "step": 110 }, { "epoch": 0.011509374048176063, "grad_norm": 0.396484375, "learning_rate": 0.00019999398200950158, "loss": 6.2885, "step": 111 }, { "epoch": 0.011613062102664135, "grad_norm": 0.462890625, "learning_rate": 0.00019999386225265676, "loss": 6.3214, "step": 112 }, { "epoch": 0.011716750157152208, "grad_norm": 0.314453125, "learning_rate": 0.00019999374131599007, "loss": 6.2841, "step": 113 }, { "epoch": 0.01182043821164028, "grad_norm": 0.39453125, "learning_rate": 0.00019999361919950293, "loss": 6.29, "step": 114 }, { "epoch": 0.011924126266128352, "grad_norm": 0.376953125, "learning_rate": 0.00019999349590319677, "loss": 6.2106, "step": 115 }, { "epoch": 0.012027814320616425, "grad_norm": 0.4296875, "learning_rate": 0.00019999337142707305, "loss": 6.219, "step": 116 }, { "epoch": 0.012131502375104498, "grad_norm": 0.482421875, "learning_rate": 0.00019999324577113324, "loss": 6.2419, "step": 117 }, { "epoch": 0.012235190429592572, "grad_norm": 0.76953125, "learning_rate": 0.00019999311893537883, "loss": 6.2168, "step": 118 }, { "epoch": 0.012338878484080643, "grad_norm": 1.515625, "learning_rate": 0.00019999299091981134, "loss": 6.2602, "step": 119 }, { "epoch": 0.012442566538568716, "grad_norm": 1.15625, "learning_rate": 0.00019999286172443223, "loss": 6.2084, "step": 120 }, { "epoch": 0.01254625459305679, "grad_norm": 0.890625, "learning_rate": 0.00019999273134924307, "loss": 6.2672, "step": 121 }, { "epoch": 0.01264994264754486, "grad_norm": 1.1796875, "learning_rate": 0.00019999259979424535, "loss": 6.2597, "step": 122 }, { "epoch": 0.012753630702032934, "grad_norm": 0.76171875, "learning_rate": 0.00019999246705944068, "loss": 6.1437, "step": 123 }, { "epoch": 0.012857318756521007, "grad_norm": 0.97265625, "learning_rate": 0.00019999233314483056, "loss": 6.216, "step": 124 }, { "epoch": 0.01296100681100908, "grad_norm": 0.87109375, "learning_rate": 0.00019999219805041663, "loss": 6.1778, "step": 125 }, { "epoch": 0.013064694865497151, "grad_norm": 0.8203125, "learning_rate": 0.00019999206177620047, "loss": 6.1466, "step": 126 }, { "epoch": 0.013168382919985224, "grad_norm": 0.74609375, "learning_rate": 0.00019999192432218363, "loss": 6.1517, "step": 127 }, { "epoch": 0.013272070974473298, "grad_norm": 0.67578125, "learning_rate": 0.00019999178568836783, "loss": 6.1833, "step": 128 }, { "epoch": 0.013375759028961369, "grad_norm": 0.6640625, "learning_rate": 0.00019999164587475464, "loss": 6.177, "step": 129 }, { "epoch": 0.013479447083449442, "grad_norm": 0.57421875, "learning_rate": 0.0001999915048813457, "loss": 6.19, "step": 130 }, { "epoch": 0.013583135137937515, "grad_norm": 0.6875, "learning_rate": 0.0001999913627081427, "loss": 6.1394, "step": 131 }, { "epoch": 0.013686823192425588, "grad_norm": 0.578125, "learning_rate": 0.00019999121935514736, "loss": 6.1704, "step": 132 }, { "epoch": 0.01379051124691366, "grad_norm": 0.57421875, "learning_rate": 0.00019999107482236128, "loss": 6.1321, "step": 133 }, { "epoch": 0.013894199301401733, "grad_norm": 0.55078125, "learning_rate": 0.00019999092910978625, "loss": 6.1726, "step": 134 }, { "epoch": 0.013997887355889806, "grad_norm": 0.5703125, "learning_rate": 0.00019999078221742393, "loss": 6.1438, "step": 135 }, { "epoch": 0.014101575410377877, "grad_norm": 0.5703125, "learning_rate": 0.00019999063414527607, "loss": 6.1395, "step": 136 }, { "epoch": 0.01420526346486595, "grad_norm": 0.6875, "learning_rate": 0.00019999048489334443, "loss": 6.0961, "step": 137 }, { "epoch": 0.014308951519354024, "grad_norm": 0.80859375, "learning_rate": 0.00019999033446163077, "loss": 6.1165, "step": 138 }, { "epoch": 0.014412639573842097, "grad_norm": 1.171875, "learning_rate": 0.00019999018285013685, "loss": 6.1087, "step": 139 }, { "epoch": 0.014516327628330168, "grad_norm": 1.421875, "learning_rate": 0.00019999003005886446, "loss": 6.0861, "step": 140 }, { "epoch": 0.014620015682818241, "grad_norm": 0.47265625, "learning_rate": 0.00019998987608781544, "loss": 6.1238, "step": 141 }, { "epoch": 0.014723703737306314, "grad_norm": 1.734375, "learning_rate": 0.00019998972093699153, "loss": 6.0859, "step": 142 }, { "epoch": 0.014827391791794386, "grad_norm": 0.97265625, "learning_rate": 0.00019998956460639465, "loss": 6.1074, "step": 143 }, { "epoch": 0.014931079846282459, "grad_norm": 0.8046875, "learning_rate": 0.00019998940709602657, "loss": 6.1206, "step": 144 }, { "epoch": 0.015034767900770532, "grad_norm": 1.265625, "learning_rate": 0.00019998924840588917, "loss": 6.0488, "step": 145 }, { "epoch": 0.015138455955258605, "grad_norm": 0.8984375, "learning_rate": 0.00019998908853598434, "loss": 6.058, "step": 146 }, { "epoch": 0.015242144009746677, "grad_norm": 0.65625, "learning_rate": 0.000199988927486314, "loss": 6.0474, "step": 147 }, { "epoch": 0.01534583206423475, "grad_norm": 0.6328125, "learning_rate": 0.00019998876525687998, "loss": 6.0019, "step": 148 }, { "epoch": 0.015449520118722823, "grad_norm": 0.7578125, "learning_rate": 0.0001999886018476842, "loss": 6.047, "step": 149 }, { "epoch": 0.015553208173210894, "grad_norm": 0.80859375, "learning_rate": 0.0001999884372587286, "loss": 6.0433, "step": 150 }, { "epoch": 0.015656896227698967, "grad_norm": 0.66796875, "learning_rate": 0.0001999882714900152, "loss": 6.0162, "step": 151 }, { "epoch": 0.01576058428218704, "grad_norm": 0.50390625, "learning_rate": 0.00019998810454154584, "loss": 6.0574, "step": 152 }, { "epoch": 0.015864272336675114, "grad_norm": 0.69140625, "learning_rate": 0.00019998793641332256, "loss": 6.0328, "step": 153 }, { "epoch": 0.015967960391163185, "grad_norm": 0.515625, "learning_rate": 0.0001999877671053473, "loss": 5.9934, "step": 154 }, { "epoch": 0.01607164844565126, "grad_norm": 0.45703125, "learning_rate": 0.0001999875966176221, "loss": 6.0257, "step": 155 }, { "epoch": 0.01617533650013933, "grad_norm": 0.49609375, "learning_rate": 0.00019998742495014896, "loss": 5.9716, "step": 156 }, { "epoch": 0.016279024554627403, "grad_norm": 0.57421875, "learning_rate": 0.0001999872521029299, "loss": 6.011, "step": 157 }, { "epoch": 0.016382712609115477, "grad_norm": 0.373046875, "learning_rate": 0.0001999870780759669, "loss": 5.986, "step": 158 }, { "epoch": 0.01648640066360355, "grad_norm": 0.4375, "learning_rate": 0.0001999869028692621, "loss": 5.9172, "step": 159 }, { "epoch": 0.01659008871809162, "grad_norm": 0.65625, "learning_rate": 0.00019998672648281757, "loss": 5.9745, "step": 160 }, { "epoch": 0.016693776772579695, "grad_norm": 0.81640625, "learning_rate": 0.0001999865489166353, "loss": 5.9591, "step": 161 }, { "epoch": 0.016797464827067767, "grad_norm": 1.1171875, "learning_rate": 0.00019998637017071752, "loss": 5.982, "step": 162 }, { "epoch": 0.016901152881555838, "grad_norm": 1.578125, "learning_rate": 0.0001999861902450662, "loss": 5.9767, "step": 163 }, { "epoch": 0.017004840936043913, "grad_norm": 0.87890625, "learning_rate": 0.00019998600913968356, "loss": 6.0064, "step": 164 }, { "epoch": 0.017108528990531984, "grad_norm": 0.84375, "learning_rate": 0.00019998582685457165, "loss": 5.9651, "step": 165 }, { "epoch": 0.017212217045020056, "grad_norm": 0.9921875, "learning_rate": 0.00019998564338973273, "loss": 5.9246, "step": 166 }, { "epoch": 0.01731590509950813, "grad_norm": 1.015625, "learning_rate": 0.00019998545874516888, "loss": 5.907, "step": 167 }, { "epoch": 0.017419593153996202, "grad_norm": 1.1015625, "learning_rate": 0.00019998527292088228, "loss": 5.9681, "step": 168 }, { "epoch": 0.017523281208484277, "grad_norm": 0.96875, "learning_rate": 0.00019998508591687522, "loss": 5.962, "step": 169 }, { "epoch": 0.017626969262972348, "grad_norm": 0.9609375, "learning_rate": 0.00019998489773314976, "loss": 5.8911, "step": 170 }, { "epoch": 0.01773065731746042, "grad_norm": 0.8359375, "learning_rate": 0.00019998470836970827, "loss": 5.924, "step": 171 }, { "epoch": 0.017834345371948494, "grad_norm": 0.78125, "learning_rate": 0.00019998451782655282, "loss": 5.9342, "step": 172 }, { "epoch": 0.017938033426436566, "grad_norm": 0.921875, "learning_rate": 0.00019998432610368583, "loss": 5.9439, "step": 173 }, { "epoch": 0.018041721480924637, "grad_norm": 0.7734375, "learning_rate": 0.00019998413320110943, "loss": 5.9079, "step": 174 }, { "epoch": 0.018145409535412712, "grad_norm": 0.62890625, "learning_rate": 0.00019998393911882598, "loss": 5.9049, "step": 175 }, { "epoch": 0.018249097589900783, "grad_norm": 0.59765625, "learning_rate": 0.0001999837438568377, "loss": 5.8602, "step": 176 }, { "epoch": 0.018352785644388855, "grad_norm": 0.51953125, "learning_rate": 0.00019998354741514694, "loss": 5.9309, "step": 177 }, { "epoch": 0.01845647369887693, "grad_norm": 0.5625, "learning_rate": 0.00019998334979375604, "loss": 5.9288, "step": 178 }, { "epoch": 0.018560161753365, "grad_norm": 0.5, "learning_rate": 0.00019998315099266728, "loss": 5.8735, "step": 179 }, { "epoch": 0.018663849807853072, "grad_norm": 0.3671875, "learning_rate": 0.000199982951011883, "loss": 5.8988, "step": 180 }, { "epoch": 0.018767537862341147, "grad_norm": 0.50390625, "learning_rate": 0.0001999827498514056, "loss": 5.8772, "step": 181 }, { "epoch": 0.01887122591682922, "grad_norm": 0.69921875, "learning_rate": 0.00019998254751123746, "loss": 5.8569, "step": 182 }, { "epoch": 0.018974913971317293, "grad_norm": 0.63671875, "learning_rate": 0.00019998234399138092, "loss": 5.8623, "step": 183 }, { "epoch": 0.019078602025805365, "grad_norm": 0.5, "learning_rate": 0.00019998213929183842, "loss": 5.832, "step": 184 }, { "epoch": 0.019182290080293436, "grad_norm": 0.453125, "learning_rate": 0.00019998193341261238, "loss": 5.8703, "step": 185 }, { "epoch": 0.01928597813478151, "grad_norm": 0.52734375, "learning_rate": 0.00019998172635370516, "loss": 5.8666, "step": 186 }, { "epoch": 0.019389666189269582, "grad_norm": 0.3671875, "learning_rate": 0.00019998151811511928, "loss": 5.8473, "step": 187 }, { "epoch": 0.019493354243757654, "grad_norm": 0.498046875, "learning_rate": 0.00019998130869685717, "loss": 5.8832, "step": 188 }, { "epoch": 0.01959704229824573, "grad_norm": 0.60546875, "learning_rate": 0.00019998109809892133, "loss": 5.8562, "step": 189 }, { "epoch": 0.0197007303527338, "grad_norm": 0.474609375, "learning_rate": 0.00019998088632131419, "loss": 5.8587, "step": 190 }, { "epoch": 0.01980441840722187, "grad_norm": 0.53515625, "learning_rate": 0.00019998067336403827, "loss": 5.8556, "step": 191 }, { "epoch": 0.019908106461709946, "grad_norm": 0.76171875, "learning_rate": 0.0001999804592270961, "loss": 5.8503, "step": 192 }, { "epoch": 0.020011794516198018, "grad_norm": 1.0390625, "learning_rate": 0.0001999802439104902, "loss": 5.8315, "step": 193 }, { "epoch": 0.02011548257068609, "grad_norm": 1.671875, "learning_rate": 0.0001999800274142231, "loss": 5.7988, "step": 194 }, { "epoch": 0.020219170625174164, "grad_norm": 0.453125, "learning_rate": 0.00019997980973829736, "loss": 5.7978, "step": 195 }, { "epoch": 0.020322858679662235, "grad_norm": 2.46875, "learning_rate": 0.00019997959088271554, "loss": 5.8234, "step": 196 }, { "epoch": 0.02042654673415031, "grad_norm": 0.9375, "learning_rate": 0.00019997937084748025, "loss": 5.8187, "step": 197 }, { "epoch": 0.02053023478863838, "grad_norm": 3.75, "learning_rate": 0.00019997914963259405, "loss": 5.8793, "step": 198 }, { "epoch": 0.020633922843126453, "grad_norm": 3.390625, "learning_rate": 0.00019997892723805957, "loss": 5.8885, "step": 199 }, { "epoch": 0.020737610897614528, "grad_norm": 1.4765625, "learning_rate": 0.00019997870366387943, "loss": 5.8362, "step": 200 }, { "epoch": 0.0208412989521026, "grad_norm": 3.171875, "learning_rate": 0.00019997847891005627, "loss": 5.8813, "step": 201 }, { "epoch": 0.02094498700659067, "grad_norm": 2.4375, "learning_rate": 0.00019997825297659273, "loss": 5.9057, "step": 202 }, { "epoch": 0.021048675061078746, "grad_norm": 2.03125, "learning_rate": 0.0001999780258634915, "loss": 5.8437, "step": 203 }, { "epoch": 0.021152363115566817, "grad_norm": 1.1171875, "learning_rate": 0.00019997779757075526, "loss": 5.8491, "step": 204 }, { "epoch": 0.02125605117005489, "grad_norm": 1.765625, "learning_rate": 0.0001999775680983867, "loss": 5.8136, "step": 205 }, { "epoch": 0.021359739224542963, "grad_norm": 1.0703125, "learning_rate": 0.00019997733744638846, "loss": 5.8221, "step": 206 }, { "epoch": 0.021463427279031035, "grad_norm": 1.6171875, "learning_rate": 0.00019997710561476335, "loss": 5.8324, "step": 207 }, { "epoch": 0.02156711533351911, "grad_norm": 1.1171875, "learning_rate": 0.0001999768726035141, "loss": 5.8158, "step": 208 }, { "epoch": 0.02167080338800718, "grad_norm": 1.2578125, "learning_rate": 0.00019997663841264337, "loss": 5.8085, "step": 209 }, { "epoch": 0.021774491442495252, "grad_norm": 0.89453125, "learning_rate": 0.00019997640304215402, "loss": 5.807, "step": 210 }, { "epoch": 0.021878179496983327, "grad_norm": 1.0703125, "learning_rate": 0.0001999761664920488, "loss": 5.8282, "step": 211 }, { "epoch": 0.0219818675514714, "grad_norm": 0.7578125, "learning_rate": 0.0001999759287623305, "loss": 5.7731, "step": 212 }, { "epoch": 0.02208555560595947, "grad_norm": 0.8515625, "learning_rate": 0.0001999756898530019, "loss": 5.7734, "step": 213 }, { "epoch": 0.022189243660447545, "grad_norm": 0.6953125, "learning_rate": 0.00019997544976406588, "loss": 5.7714, "step": 214 }, { "epoch": 0.022292931714935616, "grad_norm": 0.7109375, "learning_rate": 0.00019997520849552517, "loss": 5.7947, "step": 215 }, { "epoch": 0.022396619769423688, "grad_norm": 0.6875, "learning_rate": 0.00019997496604738272, "loss": 5.8244, "step": 216 }, { "epoch": 0.022500307823911762, "grad_norm": 0.6796875, "learning_rate": 0.00019997472241964134, "loss": 5.7605, "step": 217 }, { "epoch": 0.022603995878399834, "grad_norm": 0.65234375, "learning_rate": 0.00019997447761230393, "loss": 5.7564, "step": 218 }, { "epoch": 0.022707683932887905, "grad_norm": 0.57421875, "learning_rate": 0.00019997423162537335, "loss": 5.7313, "step": 219 }, { "epoch": 0.02281137198737598, "grad_norm": 0.60546875, "learning_rate": 0.00019997398445885248, "loss": 5.7635, "step": 220 }, { "epoch": 0.02291506004186405, "grad_norm": 0.5234375, "learning_rate": 0.00019997373611274432, "loss": 5.7636, "step": 221 }, { "epoch": 0.023018748096352126, "grad_norm": 0.58203125, "learning_rate": 0.00019997348658705173, "loss": 5.7049, "step": 222 }, { "epoch": 0.023122436150840198, "grad_norm": 0.5390625, "learning_rate": 0.00019997323588177767, "loss": 5.7628, "step": 223 }, { "epoch": 0.02322612420532827, "grad_norm": 0.478515625, "learning_rate": 0.0001999729839969251, "loss": 5.7761, "step": 224 }, { "epoch": 0.023329812259816344, "grad_norm": 0.490234375, "learning_rate": 0.000199972730932497, "loss": 5.772, "step": 225 }, { "epoch": 0.023433500314304415, "grad_norm": 0.45703125, "learning_rate": 0.00019997247668849638, "loss": 5.7357, "step": 226 }, { "epoch": 0.023537188368792487, "grad_norm": 0.54296875, "learning_rate": 0.00019997222126492617, "loss": 5.75, "step": 227 }, { "epoch": 0.02364087642328056, "grad_norm": 0.4453125, "learning_rate": 0.00019997196466178943, "loss": 5.7343, "step": 228 }, { "epoch": 0.023744564477768633, "grad_norm": 0.408203125, "learning_rate": 0.00019997170687908919, "loss": 5.7152, "step": 229 }, { "epoch": 0.023848252532256704, "grad_norm": 0.4296875, "learning_rate": 0.00019997144791682848, "loss": 5.734, "step": 230 }, { "epoch": 0.02395194058674478, "grad_norm": 0.392578125, "learning_rate": 0.00019997118777501037, "loss": 5.7307, "step": 231 }, { "epoch": 0.02405562864123285, "grad_norm": 0.45703125, "learning_rate": 0.0001999709264536379, "loss": 5.7429, "step": 232 }, { "epoch": 0.024159316695720922, "grad_norm": 0.458984375, "learning_rate": 0.00019997066395271418, "loss": 5.7384, "step": 233 }, { "epoch": 0.024263004750208997, "grad_norm": 0.330078125, "learning_rate": 0.00019997040027224232, "loss": 5.7171, "step": 234 }, { "epoch": 0.024366692804697068, "grad_norm": 0.400390625, "learning_rate": 0.00019997013541222538, "loss": 5.7003, "step": 235 }, { "epoch": 0.024470380859185143, "grad_norm": 0.33203125, "learning_rate": 0.00019996986937266653, "loss": 5.6908, "step": 236 }, { "epoch": 0.024574068913673214, "grad_norm": 0.33203125, "learning_rate": 0.0001999696021535689, "loss": 5.6583, "step": 237 }, { "epoch": 0.024677756968161286, "grad_norm": 0.34375, "learning_rate": 0.00019996933375493562, "loss": 5.724, "step": 238 }, { "epoch": 0.02478144502264936, "grad_norm": 0.333984375, "learning_rate": 0.0001999690641767699, "loss": 5.6948, "step": 239 }, { "epoch": 0.024885133077137432, "grad_norm": 0.341796875, "learning_rate": 0.00019996879341907487, "loss": 5.7105, "step": 240 }, { "epoch": 0.024988821131625504, "grad_norm": 0.41015625, "learning_rate": 0.00019996852148185373, "loss": 5.6769, "step": 241 }, { "epoch": 0.02509250918611358, "grad_norm": 0.294921875, "learning_rate": 0.00019996824836510975, "loss": 5.671, "step": 242 }, { "epoch": 0.02519619724060165, "grad_norm": 0.365234375, "learning_rate": 0.0001999679740688461, "loss": 5.6549, "step": 243 }, { "epoch": 0.02529988529508972, "grad_norm": 0.447265625, "learning_rate": 0.000199967698593066, "loss": 5.6671, "step": 244 }, { "epoch": 0.025403573349577796, "grad_norm": 0.392578125, "learning_rate": 0.00019996742193777273, "loss": 5.6164, "step": 245 }, { "epoch": 0.025507261404065867, "grad_norm": 0.5, "learning_rate": 0.00019996714410296958, "loss": 5.647, "step": 246 }, { "epoch": 0.02561094945855394, "grad_norm": 0.498046875, "learning_rate": 0.0001999668650886598, "loss": 5.64, "step": 247 }, { "epoch": 0.025714637513042014, "grad_norm": 0.69140625, "learning_rate": 0.00019996658489484666, "loss": 5.6899, "step": 248 }, { "epoch": 0.025818325567530085, "grad_norm": 0.921875, "learning_rate": 0.00019996630352153353, "loss": 5.6688, "step": 249 }, { "epoch": 0.02592201362201816, "grad_norm": 1.78125, "learning_rate": 0.0001999660209687236, "loss": 5.6829, "step": 250 }, { "epoch": 0.02602570167650623, "grad_norm": 0.6484375, "learning_rate": 0.00019996573723642035, "loss": 5.6252, "step": 251 }, { "epoch": 0.026129389730994303, "grad_norm": 0.6015625, "learning_rate": 0.00019996545232462708, "loss": 5.6089, "step": 252 }, { "epoch": 0.026233077785482378, "grad_norm": 1.4765625, "learning_rate": 0.00019996516623334713, "loss": 5.672, "step": 253 }, { "epoch": 0.02633676583997045, "grad_norm": 1.046875, "learning_rate": 0.00019996487896258388, "loss": 5.6516, "step": 254 }, { "epoch": 0.02644045389445852, "grad_norm": 1.1484375, "learning_rate": 0.0001999645905123407, "loss": 5.608, "step": 255 }, { "epoch": 0.026544141948946595, "grad_norm": 0.87890625, "learning_rate": 0.00019996430088262108, "loss": 5.6102, "step": 256 }, { "epoch": 0.026647830003434667, "grad_norm": 0.54296875, "learning_rate": 0.00019996401007342832, "loss": 5.6053, "step": 257 }, { "epoch": 0.026751518057922738, "grad_norm": 0.65625, "learning_rate": 0.00019996371808476596, "loss": 5.6318, "step": 258 }, { "epoch": 0.026855206112410813, "grad_norm": 0.796875, "learning_rate": 0.00019996342491663733, "loss": 5.612, "step": 259 }, { "epoch": 0.026958894166898884, "grad_norm": 0.6171875, "learning_rate": 0.000199963130569046, "loss": 5.6529, "step": 260 }, { "epoch": 0.02706258222138696, "grad_norm": 0.5390625, "learning_rate": 0.00019996283504199538, "loss": 5.5798, "step": 261 }, { "epoch": 0.02716627027587503, "grad_norm": 0.62109375, "learning_rate": 0.00019996253833548896, "loss": 5.6042, "step": 262 }, { "epoch": 0.027269958330363102, "grad_norm": 0.59375, "learning_rate": 0.00019996224044953028, "loss": 5.6064, "step": 263 }, { "epoch": 0.027373646384851177, "grad_norm": 0.447265625, "learning_rate": 0.0001999619413841228, "loss": 5.6279, "step": 264 }, { "epoch": 0.027477334439339248, "grad_norm": 0.51953125, "learning_rate": 0.00019996164113927008, "loss": 5.595, "step": 265 }, { "epoch": 0.02758102249382732, "grad_norm": 0.46875, "learning_rate": 0.00019996133971497568, "loss": 5.6144, "step": 266 }, { "epoch": 0.027684710548315394, "grad_norm": 0.48828125, "learning_rate": 0.00019996103711124313, "loss": 5.6075, "step": 267 }, { "epoch": 0.027788398602803466, "grad_norm": 0.55078125, "learning_rate": 0.000199960733328076, "loss": 5.5957, "step": 268 }, { "epoch": 0.027892086657291537, "grad_norm": 0.392578125, "learning_rate": 0.00019996042836547786, "loss": 5.5676, "step": 269 }, { "epoch": 0.027995774711779612, "grad_norm": 0.435546875, "learning_rate": 0.00019996012222345236, "loss": 5.606, "step": 270 }, { "epoch": 0.028099462766267683, "grad_norm": 0.5, "learning_rate": 0.00019995981490200304, "loss": 5.5437, "step": 271 }, { "epoch": 0.028203150820755755, "grad_norm": 0.41796875, "learning_rate": 0.0001999595064011336, "loss": 5.5388, "step": 272 }, { "epoch": 0.02830683887524383, "grad_norm": 0.5703125, "learning_rate": 0.00019995919672084763, "loss": 5.5548, "step": 273 }, { "epoch": 0.0284105269297319, "grad_norm": 0.70703125, "learning_rate": 0.0001999588858611488, "loss": 5.5774, "step": 274 }, { "epoch": 0.028514214984219976, "grad_norm": 0.63671875, "learning_rate": 0.00019995857382204083, "loss": 5.5795, "step": 275 }, { "epoch": 0.028617903038708047, "grad_norm": 0.57421875, "learning_rate": 0.00019995826060352728, "loss": 5.5806, "step": 276 }, { "epoch": 0.02872159109319612, "grad_norm": 0.70703125, "learning_rate": 0.00019995794620561195, "loss": 5.5304, "step": 277 }, { "epoch": 0.028825279147684194, "grad_norm": 0.7421875, "learning_rate": 0.0001999576306282985, "loss": 5.594, "step": 278 }, { "epoch": 0.028928967202172265, "grad_norm": 0.79296875, "learning_rate": 0.00019995731387159067, "loss": 5.5603, "step": 279 }, { "epoch": 0.029032655256660336, "grad_norm": 1.0390625, "learning_rate": 0.0001999569959354922, "loss": 5.5512, "step": 280 }, { "epoch": 0.02913634331114841, "grad_norm": 1.3125, "learning_rate": 0.00019995667682000683, "loss": 5.5618, "step": 281 }, { "epoch": 0.029240031365636483, "grad_norm": 0.69921875, "learning_rate": 0.00019995635652513835, "loss": 5.5426, "step": 282 }, { "epoch": 0.029343719420124554, "grad_norm": 0.69921875, "learning_rate": 0.0001999560350508905, "loss": 5.5263, "step": 283 }, { "epoch": 0.02944740747461263, "grad_norm": 1.0859375, "learning_rate": 0.0001999557123972671, "loss": 5.5721, "step": 284 }, { "epoch": 0.0295510955291007, "grad_norm": 1.53125, "learning_rate": 0.00019995538856427196, "loss": 5.5413, "step": 285 }, { "epoch": 0.02965478358358877, "grad_norm": 0.546875, "learning_rate": 0.00019995506355190889, "loss": 5.5277, "step": 286 }, { "epoch": 0.029758471638076846, "grad_norm": 1.34375, "learning_rate": 0.00019995473736018172, "loss": 5.5505, "step": 287 }, { "epoch": 0.029862159692564918, "grad_norm": 1.0078125, "learning_rate": 0.00019995440998909431, "loss": 5.5775, "step": 288 }, { "epoch": 0.029965847747052993, "grad_norm": 1.140625, "learning_rate": 0.00019995408143865052, "loss": 5.5016, "step": 289 }, { "epoch": 0.030069535801541064, "grad_norm": 1.265625, "learning_rate": 0.00019995375170885424, "loss": 5.5683, "step": 290 }, { "epoch": 0.030173223856029135, "grad_norm": 0.671875, "learning_rate": 0.00019995342079970932, "loss": 5.5217, "step": 291 }, { "epoch": 0.03027691191051721, "grad_norm": 0.73828125, "learning_rate": 0.00019995308871121971, "loss": 5.562, "step": 292 }, { "epoch": 0.03038059996500528, "grad_norm": 0.7109375, "learning_rate": 0.00019995275544338928, "loss": 5.5251, "step": 293 }, { "epoch": 0.030484288019493353, "grad_norm": 0.875, "learning_rate": 0.00019995242099622203, "loss": 5.5147, "step": 294 }, { "epoch": 0.030587976073981428, "grad_norm": 1.15625, "learning_rate": 0.00019995208536972183, "loss": 5.4956, "step": 295 }, { "epoch": 0.0306916641284695, "grad_norm": 0.703125, "learning_rate": 0.0001999517485638927, "loss": 5.4974, "step": 296 }, { "epoch": 0.03079535218295757, "grad_norm": 0.77734375, "learning_rate": 0.00019995141057873857, "loss": 5.4782, "step": 297 }, { "epoch": 0.030899040237445646, "grad_norm": 0.9609375, "learning_rate": 0.00019995107141426347, "loss": 5.5044, "step": 298 }, { "epoch": 0.031002728291933717, "grad_norm": 1.1328125, "learning_rate": 0.00019995073107047134, "loss": 5.5123, "step": 299 }, { "epoch": 0.03110641634642179, "grad_norm": 0.82421875, "learning_rate": 0.0001999503895473663, "loss": 5.5219, "step": 300 }, { "epoch": 0.031210104400909863, "grad_norm": 0.953125, "learning_rate": 0.00019995004684495227, "loss": 5.5151, "step": 301 }, { "epoch": 0.031313792455397935, "grad_norm": 1.15625, "learning_rate": 0.00019994970296323335, "loss": 5.5151, "step": 302 }, { "epoch": 0.031417480509886006, "grad_norm": 0.90234375, "learning_rate": 0.00019994935790221358, "loss": 5.5093, "step": 303 }, { "epoch": 0.03152116856437408, "grad_norm": 0.83984375, "learning_rate": 0.00019994901166189708, "loss": 5.5052, "step": 304 }, { "epoch": 0.031624856618862156, "grad_norm": 1.09375, "learning_rate": 0.00019994866424228783, "loss": 5.4662, "step": 305 }, { "epoch": 0.03172854467335023, "grad_norm": 0.8671875, "learning_rate": 0.00019994831564339004, "loss": 5.4841, "step": 306 }, { "epoch": 0.0318322327278383, "grad_norm": 0.79296875, "learning_rate": 0.00019994796586520773, "loss": 5.4731, "step": 307 }, { "epoch": 0.03193592078232637, "grad_norm": 0.85546875, "learning_rate": 0.00019994761490774513, "loss": 5.4515, "step": 308 }, { "epoch": 0.03203960883681444, "grad_norm": 0.66015625, "learning_rate": 0.00019994726277100628, "loss": 5.519, "step": 309 }, { "epoch": 0.03214329689130252, "grad_norm": 0.62109375, "learning_rate": 0.0001999469094549954, "loss": 5.5203, "step": 310 }, { "epoch": 0.03224698494579059, "grad_norm": 0.78515625, "learning_rate": 0.0001999465549597166, "loss": 5.5374, "step": 311 }, { "epoch": 0.03235067300027866, "grad_norm": 0.62890625, "learning_rate": 0.00019994619928517416, "loss": 5.508, "step": 312 }, { "epoch": 0.032454361054766734, "grad_norm": 0.6953125, "learning_rate": 0.00019994584243137218, "loss": 5.4988, "step": 313 }, { "epoch": 0.032558049109254805, "grad_norm": 0.68359375, "learning_rate": 0.00019994548439831487, "loss": 5.4792, "step": 314 }, { "epoch": 0.03266173716374288, "grad_norm": 0.462890625, "learning_rate": 0.00019994512518600654, "loss": 5.4493, "step": 315 }, { "epoch": 0.032765425218230955, "grad_norm": 0.69921875, "learning_rate": 0.0001999447647944514, "loss": 5.4875, "step": 316 }, { "epoch": 0.032869113272719026, "grad_norm": 0.55078125, "learning_rate": 0.00019994440322365363, "loss": 5.5292, "step": 317 }, { "epoch": 0.0329728013272071, "grad_norm": 0.55859375, "learning_rate": 0.00019994404047361756, "loss": 5.4906, "step": 318 }, { "epoch": 0.03307648938169517, "grad_norm": 0.404296875, "learning_rate": 0.00019994367654434746, "loss": 5.461, "step": 319 }, { "epoch": 0.03318017743618324, "grad_norm": 0.470703125, "learning_rate": 0.00019994331143584763, "loss": 5.439, "step": 320 }, { "epoch": 0.03328386549067131, "grad_norm": 0.515625, "learning_rate": 0.00019994294514812238, "loss": 5.451, "step": 321 }, { "epoch": 0.03338755354515939, "grad_norm": 0.5703125, "learning_rate": 0.00019994257768117602, "loss": 5.4436, "step": 322 }, { "epoch": 0.03349124159964746, "grad_norm": 0.609375, "learning_rate": 0.0001999422090350129, "loss": 5.4283, "step": 323 }, { "epoch": 0.03359492965413553, "grad_norm": 0.6875, "learning_rate": 0.0001999418392096373, "loss": 5.4542, "step": 324 }, { "epoch": 0.033698617708623604, "grad_norm": 0.5625, "learning_rate": 0.0001999414682050537, "loss": 5.4371, "step": 325 }, { "epoch": 0.033802305763111676, "grad_norm": 0.431640625, "learning_rate": 0.00019994109602126638, "loss": 5.4293, "step": 326 }, { "epoch": 0.033905993817599754, "grad_norm": 0.59765625, "learning_rate": 0.00019994072265827977, "loss": 5.4883, "step": 327 }, { "epoch": 0.034009681872087826, "grad_norm": 0.66015625, "learning_rate": 0.0001999403481160983, "loss": 5.4215, "step": 328 }, { "epoch": 0.0341133699265759, "grad_norm": 0.80859375, "learning_rate": 0.00019993997239472634, "loss": 5.4048, "step": 329 }, { "epoch": 0.03421705798106397, "grad_norm": 1.0703125, "learning_rate": 0.00019993959549416835, "loss": 5.4454, "step": 330 }, { "epoch": 0.03432074603555204, "grad_norm": 1.4921875, "learning_rate": 0.00019993921741442877, "loss": 5.4441, "step": 331 }, { "epoch": 0.03442443409004011, "grad_norm": 0.56640625, "learning_rate": 0.0001999388381555121, "loss": 5.4147, "step": 332 }, { "epoch": 0.03452812214452819, "grad_norm": 0.92578125, "learning_rate": 0.00019993845771742276, "loss": 5.4387, "step": 333 }, { "epoch": 0.03463181019901626, "grad_norm": 1.921875, "learning_rate": 0.00019993807610016524, "loss": 5.3842, "step": 334 }, { "epoch": 0.03473549825350433, "grad_norm": 0.921875, "learning_rate": 0.00019993769330374408, "loss": 5.4189, "step": 335 }, { "epoch": 0.034839186307992404, "grad_norm": 5.53125, "learning_rate": 0.00019993730932816377, "loss": 5.5256, "step": 336 }, { "epoch": 0.034942874362480475, "grad_norm": 5.0, "learning_rate": 0.00019993692417342884, "loss": 5.5175, "step": 337 }, { "epoch": 0.03504656241696855, "grad_norm": 1.28125, "learning_rate": 0.00019993653783954388, "loss": 5.4119, "step": 338 }, { "epoch": 0.035150250471456625, "grad_norm": 3.4375, "learning_rate": 0.00019993615032651337, "loss": 5.5242, "step": 339 }, { "epoch": 0.035253938525944696, "grad_norm": 3.15625, "learning_rate": 0.00019993576163434193, "loss": 5.53, "step": 340 }, { "epoch": 0.03535762658043277, "grad_norm": 1.5546875, "learning_rate": 0.00019993537176303416, "loss": 5.4046, "step": 341 }, { "epoch": 0.03546131463492084, "grad_norm": 2.59375, "learning_rate": 0.00019993498071259463, "loss": 5.4244, "step": 342 }, { "epoch": 0.03556500268940891, "grad_norm": 2.28125, "learning_rate": 0.00019993458848302796, "loss": 5.4798, "step": 343 }, { "epoch": 0.03566869074389699, "grad_norm": 1.484375, "learning_rate": 0.00019993419507433876, "loss": 5.4492, "step": 344 }, { "epoch": 0.03577237879838506, "grad_norm": 1.2109375, "learning_rate": 0.00019993380048653175, "loss": 5.4628, "step": 345 }, { "epoch": 0.03587606685287313, "grad_norm": 1.34375, "learning_rate": 0.0001999334047196115, "loss": 5.4155, "step": 346 }, { "epoch": 0.0359797549073612, "grad_norm": 1.2578125, "learning_rate": 0.00019993300777358268, "loss": 5.4204, "step": 347 }, { "epoch": 0.036083442961849274, "grad_norm": 0.93359375, "learning_rate": 0.00019993260964845, "loss": 5.448, "step": 348 }, { "epoch": 0.03618713101633735, "grad_norm": 1.046875, "learning_rate": 0.0001999322103442182, "loss": 5.4505, "step": 349 }, { "epoch": 0.036290819070825424, "grad_norm": 1.109375, "learning_rate": 0.00019993180986089192, "loss": 5.4001, "step": 350 }, { "epoch": 0.036394507125313495, "grad_norm": 0.96875, "learning_rate": 0.00019993140819847595, "loss": 5.3998, "step": 351 }, { "epoch": 0.03649819517980157, "grad_norm": 0.97265625, "learning_rate": 0.00019993100535697496, "loss": 5.4476, "step": 352 }, { "epoch": 0.03660188323428964, "grad_norm": 0.99609375, "learning_rate": 0.00019993060133639376, "loss": 5.4657, "step": 353 }, { "epoch": 0.03670557128877771, "grad_norm": 0.99609375, "learning_rate": 0.00019993019613673708, "loss": 5.4182, "step": 354 }, { "epoch": 0.03680925934326579, "grad_norm": 0.7578125, "learning_rate": 0.00019992978975800972, "loss": 5.4388, "step": 355 }, { "epoch": 0.03691294739775386, "grad_norm": 0.76171875, "learning_rate": 0.0001999293822002165, "loss": 5.4333, "step": 356 }, { "epoch": 0.03701663545224193, "grad_norm": 0.75, "learning_rate": 0.00019992897346336218, "loss": 5.4078, "step": 357 }, { "epoch": 0.03712032350673, "grad_norm": 0.81640625, "learning_rate": 0.00019992856354745158, "loss": 5.3843, "step": 358 }, { "epoch": 0.03722401156121807, "grad_norm": 0.6640625, "learning_rate": 0.00019992815245248958, "loss": 5.4116, "step": 359 }, { "epoch": 0.037327699615706145, "grad_norm": 0.62109375, "learning_rate": 0.000199927740178481, "loss": 5.3862, "step": 360 }, { "epoch": 0.03743138767019422, "grad_norm": 0.52734375, "learning_rate": 0.00019992732672543073, "loss": 5.3852, "step": 361 }, { "epoch": 0.037535075724682294, "grad_norm": 0.6640625, "learning_rate": 0.00019992691209334362, "loss": 5.3804, "step": 362 }, { "epoch": 0.037638763779170366, "grad_norm": 0.494140625, "learning_rate": 0.0001999264962822246, "loss": 5.3745, "step": 363 }, { "epoch": 0.03774245183365844, "grad_norm": 0.58203125, "learning_rate": 0.00019992607929207853, "loss": 5.3948, "step": 364 }, { "epoch": 0.03784613988814651, "grad_norm": 0.48828125, "learning_rate": 0.00019992566112291034, "loss": 5.384, "step": 365 }, { "epoch": 0.03794982794263459, "grad_norm": 0.51953125, "learning_rate": 0.000199925241774725, "loss": 5.3606, "step": 366 }, { "epoch": 0.03805351599712266, "grad_norm": 0.5703125, "learning_rate": 0.0001999248212475274, "loss": 5.4083, "step": 367 }, { "epoch": 0.03815720405161073, "grad_norm": 0.439453125, "learning_rate": 0.00019992439954132256, "loss": 5.3636, "step": 368 }, { "epoch": 0.0382608921060988, "grad_norm": 0.55078125, "learning_rate": 0.00019992397665611543, "loss": 5.3986, "step": 369 }, { "epoch": 0.03836458016058687, "grad_norm": 0.46484375, "learning_rate": 0.00019992355259191097, "loss": 5.4029, "step": 370 }, { "epoch": 0.038468268215074944, "grad_norm": 0.51171875, "learning_rate": 0.00019992312734871425, "loss": 5.405, "step": 371 }, { "epoch": 0.03857195626956302, "grad_norm": 0.484375, "learning_rate": 0.00019992270092653022, "loss": 5.3392, "step": 372 }, { "epoch": 0.038675644324051094, "grad_norm": 0.48828125, "learning_rate": 0.00019992227332536397, "loss": 5.3901, "step": 373 }, { "epoch": 0.038779332378539165, "grad_norm": 0.439453125, "learning_rate": 0.00019992184454522053, "loss": 5.3694, "step": 374 }, { "epoch": 0.038883020433027236, "grad_norm": 0.5078125, "learning_rate": 0.0001999214145861049, "loss": 5.359, "step": 375 }, { "epoch": 0.03898670848751531, "grad_norm": 0.4609375, "learning_rate": 0.00019992098344802223, "loss": 5.3545, "step": 376 }, { "epoch": 0.039090396542003386, "grad_norm": 0.41796875, "learning_rate": 0.00019992055113097755, "loss": 5.3735, "step": 377 }, { "epoch": 0.03919408459649146, "grad_norm": 0.46484375, "learning_rate": 0.000199920117634976, "loss": 5.3751, "step": 378 }, { "epoch": 0.03929777265097953, "grad_norm": 0.447265625, "learning_rate": 0.0001999196829600227, "loss": 5.2899, "step": 379 }, { "epoch": 0.0394014607054676, "grad_norm": 0.478515625, "learning_rate": 0.00019991924710612276, "loss": 5.3484, "step": 380 }, { "epoch": 0.03950514875995567, "grad_norm": 0.39453125, "learning_rate": 0.00019991881007328131, "loss": 5.3171, "step": 381 }, { "epoch": 0.03960883681444374, "grad_norm": 0.494140625, "learning_rate": 0.0001999183718615035, "loss": 5.3399, "step": 382 }, { "epoch": 0.03971252486893182, "grad_norm": 0.345703125, "learning_rate": 0.00019991793247079457, "loss": 5.3487, "step": 383 }, { "epoch": 0.03981621292341989, "grad_norm": 0.416015625, "learning_rate": 0.00019991749190115962, "loss": 5.335, "step": 384 }, { "epoch": 0.039919900977907964, "grad_norm": 0.435546875, "learning_rate": 0.0001999170501526039, "loss": 5.333, "step": 385 }, { "epoch": 0.040023589032396036, "grad_norm": 0.439453125, "learning_rate": 0.00019991660722513258, "loss": 5.3132, "step": 386 }, { "epoch": 0.04012727708688411, "grad_norm": 0.365234375, "learning_rate": 0.00019991616311875092, "loss": 5.3834, "step": 387 }, { "epoch": 0.04023096514137218, "grad_norm": 0.443359375, "learning_rate": 0.00019991571783346416, "loss": 5.3623, "step": 388 }, { "epoch": 0.04033465319586026, "grad_norm": 0.50390625, "learning_rate": 0.00019991527136927753, "loss": 5.3597, "step": 389 }, { "epoch": 0.04043834125034833, "grad_norm": 0.439453125, "learning_rate": 0.00019991482372619634, "loss": 5.3423, "step": 390 }, { "epoch": 0.0405420293048364, "grad_norm": 0.421875, "learning_rate": 0.0001999143749042258, "loss": 5.3152, "step": 391 }, { "epoch": 0.04064571735932447, "grad_norm": 0.57421875, "learning_rate": 0.00019991392490337128, "loss": 5.3344, "step": 392 }, { "epoch": 0.04074940541381254, "grad_norm": 0.55859375, "learning_rate": 0.00019991347372363806, "loss": 5.3016, "step": 393 }, { "epoch": 0.04085309346830062, "grad_norm": 0.451171875, "learning_rate": 0.00019991302136503148, "loss": 5.3684, "step": 394 }, { "epoch": 0.04095678152278869, "grad_norm": 0.447265625, "learning_rate": 0.00019991256782755684, "loss": 5.3299, "step": 395 }, { "epoch": 0.04106046957727676, "grad_norm": 0.56640625, "learning_rate": 0.0001999121131112195, "loss": 5.2799, "step": 396 }, { "epoch": 0.041164157631764835, "grad_norm": 0.486328125, "learning_rate": 0.00019991165721602484, "loss": 5.3184, "step": 397 }, { "epoch": 0.041267845686252906, "grad_norm": 0.5546875, "learning_rate": 0.00019991120014197828, "loss": 5.3399, "step": 398 }, { "epoch": 0.04137153374074098, "grad_norm": 0.46875, "learning_rate": 0.00019991074188908513, "loss": 5.2717, "step": 399 }, { "epoch": 0.041475221795229056, "grad_norm": 0.546875, "learning_rate": 0.00019991028245735083, "loss": 5.344, "step": 400 }, { "epoch": 0.04157890984971713, "grad_norm": 0.5703125, "learning_rate": 0.00019990982184678086, "loss": 5.3392, "step": 401 }, { "epoch": 0.0416825979042052, "grad_norm": 0.48046875, "learning_rate": 0.00019990936005738052, "loss": 5.262, "step": 402 }, { "epoch": 0.04178628595869327, "grad_norm": 0.69140625, "learning_rate": 0.0001999088970891554, "loss": 5.3249, "step": 403 }, { "epoch": 0.04188997401318134, "grad_norm": 0.53515625, "learning_rate": 0.00019990843294211087, "loss": 5.3172, "step": 404 }, { "epoch": 0.04199366206766942, "grad_norm": 0.578125, "learning_rate": 0.00019990796761625246, "loss": 5.3205, "step": 405 }, { "epoch": 0.04209735012215749, "grad_norm": 0.5859375, "learning_rate": 0.00019990750111158564, "loss": 5.3259, "step": 406 }, { "epoch": 0.04220103817664556, "grad_norm": 0.5625, "learning_rate": 0.0001999070334281159, "loss": 5.3271, "step": 407 }, { "epoch": 0.042304726231133634, "grad_norm": 0.478515625, "learning_rate": 0.00019990656456584876, "loss": 5.3035, "step": 408 }, { "epoch": 0.042408414285621705, "grad_norm": 0.51171875, "learning_rate": 0.0001999060945247898, "loss": 5.2843, "step": 409 }, { "epoch": 0.04251210234010978, "grad_norm": 0.57421875, "learning_rate": 0.0001999056233049445, "loss": 5.2869, "step": 410 }, { "epoch": 0.042615790394597855, "grad_norm": 0.5703125, "learning_rate": 0.00019990515090631848, "loss": 5.2991, "step": 411 }, { "epoch": 0.042719478449085926, "grad_norm": 0.70703125, "learning_rate": 0.00019990467732891725, "loss": 5.2575, "step": 412 }, { "epoch": 0.042823166503574, "grad_norm": 0.6484375, "learning_rate": 0.00019990420257274643, "loss": 5.3164, "step": 413 }, { "epoch": 0.04292685455806207, "grad_norm": 0.6015625, "learning_rate": 0.00019990372663781166, "loss": 5.2825, "step": 414 }, { "epoch": 0.04303054261255014, "grad_norm": 0.625, "learning_rate": 0.00019990324952411846, "loss": 5.2654, "step": 415 }, { "epoch": 0.04313423066703822, "grad_norm": 0.53515625, "learning_rate": 0.00019990277123167258, "loss": 5.2965, "step": 416 }, { "epoch": 0.04323791872152629, "grad_norm": 0.62109375, "learning_rate": 0.00019990229176047958, "loss": 5.2539, "step": 417 }, { "epoch": 0.04334160677601436, "grad_norm": 0.71484375, "learning_rate": 0.0001999018111105451, "loss": 5.2908, "step": 418 }, { "epoch": 0.04344529483050243, "grad_norm": 0.828125, "learning_rate": 0.0001999013292818749, "loss": 5.2979, "step": 419 }, { "epoch": 0.043548982884990504, "grad_norm": 0.7578125, "learning_rate": 0.0001999008462744746, "loss": 5.2747, "step": 420 }, { "epoch": 0.043652670939478576, "grad_norm": 0.4765625, "learning_rate": 0.00019990036208834992, "loss": 5.3147, "step": 421 }, { "epoch": 0.043756358993966654, "grad_norm": 0.486328125, "learning_rate": 0.00019989987672350656, "loss": 5.2847, "step": 422 }, { "epoch": 0.043860047048454726, "grad_norm": 0.67578125, "learning_rate": 0.00019989939017995024, "loss": 5.2892, "step": 423 }, { "epoch": 0.0439637351029428, "grad_norm": 0.73046875, "learning_rate": 0.00019989890245768673, "loss": 5.2348, "step": 424 }, { "epoch": 0.04406742315743087, "grad_norm": 0.70703125, "learning_rate": 0.00019989841355672178, "loss": 5.2523, "step": 425 }, { "epoch": 0.04417111121191894, "grad_norm": 0.58984375, "learning_rate": 0.00019989792347706114, "loss": 5.2648, "step": 426 }, { "epoch": 0.04427479926640701, "grad_norm": 0.51953125, "learning_rate": 0.00019989743221871057, "loss": 5.2998, "step": 427 }, { "epoch": 0.04437848732089509, "grad_norm": 0.66015625, "learning_rate": 0.00019989693978167595, "loss": 5.1773, "step": 428 }, { "epoch": 0.04448217537538316, "grad_norm": 0.58203125, "learning_rate": 0.00019989644616596298, "loss": 5.2602, "step": 429 }, { "epoch": 0.04458586342987123, "grad_norm": 0.5625, "learning_rate": 0.00019989595137157758, "loss": 5.2278, "step": 430 }, { "epoch": 0.044689551484359304, "grad_norm": 0.6484375, "learning_rate": 0.0001998954553985255, "loss": 5.2847, "step": 431 }, { "epoch": 0.044793239538847375, "grad_norm": 0.8828125, "learning_rate": 0.0001998949582468127, "loss": 5.2977, "step": 432 }, { "epoch": 0.04489692759333545, "grad_norm": 1.0625, "learning_rate": 0.00019989445991644496, "loss": 5.2542, "step": 433 }, { "epoch": 0.045000615647823525, "grad_norm": 1.421875, "learning_rate": 0.00019989396040742818, "loss": 5.2308, "step": 434 }, { "epoch": 0.045104303702311596, "grad_norm": 0.81640625, "learning_rate": 0.00019989345971976828, "loss": 5.2806, "step": 435 }, { "epoch": 0.04520799175679967, "grad_norm": 0.9140625, "learning_rate": 0.00019989295785347112, "loss": 5.2863, "step": 436 }, { "epoch": 0.04531167981128774, "grad_norm": 1.21875, "learning_rate": 0.00019989245480854265, "loss": 5.1997, "step": 437 }, { "epoch": 0.04541536786577581, "grad_norm": 0.88671875, "learning_rate": 0.00019989195058498882, "loss": 5.264, "step": 438 }, { "epoch": 0.04551905592026389, "grad_norm": 0.89453125, "learning_rate": 0.00019989144518281558, "loss": 5.2664, "step": 439 }, { "epoch": 0.04562274397475196, "grad_norm": 0.84375, "learning_rate": 0.00019989093860202885, "loss": 5.2437, "step": 440 }, { "epoch": 0.04572643202924003, "grad_norm": 0.63671875, "learning_rate": 0.00019989043084263464, "loss": 5.2767, "step": 441 }, { "epoch": 0.0458301200837281, "grad_norm": 0.79296875, "learning_rate": 0.00019988992190463894, "loss": 5.2763, "step": 442 }, { "epoch": 0.045933808138216174, "grad_norm": 0.9375, "learning_rate": 0.00019988941178804775, "loss": 5.24, "step": 443 }, { "epoch": 0.04603749619270425, "grad_norm": 1.0625, "learning_rate": 0.00019988890049286705, "loss": 5.2116, "step": 444 }, { "epoch": 0.046141184247192324, "grad_norm": 1.34375, "learning_rate": 0.00019988838801910297, "loss": 5.269, "step": 445 }, { "epoch": 0.046244872301680395, "grad_norm": 0.63671875, "learning_rate": 0.00019988787436676147, "loss": 5.2098, "step": 446 }, { "epoch": 0.04634856035616847, "grad_norm": 0.765625, "learning_rate": 0.00019988735953584862, "loss": 5.2502, "step": 447 }, { "epoch": 0.04645224841065654, "grad_norm": 1.203125, "learning_rate": 0.00019988684352637056, "loss": 5.2842, "step": 448 }, { "epoch": 0.04655593646514461, "grad_norm": 1.015625, "learning_rate": 0.0001998863263383333, "loss": 5.2292, "step": 449 }, { "epoch": 0.04665962451963269, "grad_norm": 1.1953125, "learning_rate": 0.00019988580797174297, "loss": 5.2584, "step": 450 }, { "epoch": 0.04676331257412076, "grad_norm": 0.69140625, "learning_rate": 0.0001998852884266057, "loss": 5.1916, "step": 451 }, { "epoch": 0.04686700062860883, "grad_norm": 0.79296875, "learning_rate": 0.00019988476770292762, "loss": 5.2374, "step": 452 }, { "epoch": 0.0469706886830969, "grad_norm": 1.21875, "learning_rate": 0.00019988424580071485, "loss": 5.2478, "step": 453 }, { "epoch": 0.04707437673758497, "grad_norm": 0.75, "learning_rate": 0.00019988372271997356, "loss": 5.2032, "step": 454 }, { "epoch": 0.04717806479207305, "grad_norm": 0.7265625, "learning_rate": 0.0001998831984607099, "loss": 5.2208, "step": 455 }, { "epoch": 0.04728175284656112, "grad_norm": 0.71484375, "learning_rate": 0.00019988267302293013, "loss": 5.2389, "step": 456 }, { "epoch": 0.047385440901049194, "grad_norm": 0.703125, "learning_rate": 0.00019988214640664036, "loss": 5.2707, "step": 457 }, { "epoch": 0.047489128955537266, "grad_norm": 0.796875, "learning_rate": 0.00019988161861184687, "loss": 5.221, "step": 458 }, { "epoch": 0.04759281701002534, "grad_norm": 0.515625, "learning_rate": 0.00019988108963855586, "loss": 5.1917, "step": 459 }, { "epoch": 0.04769650506451341, "grad_norm": 0.61328125, "learning_rate": 0.00019988055948677355, "loss": 5.2225, "step": 460 }, { "epoch": 0.04780019311900149, "grad_norm": 0.57421875, "learning_rate": 0.00019988002815650622, "loss": 5.1813, "step": 461 }, { "epoch": 0.04790388117348956, "grad_norm": 0.72265625, "learning_rate": 0.00019987949564776014, "loss": 5.2486, "step": 462 }, { "epoch": 0.04800756922797763, "grad_norm": 0.83984375, "learning_rate": 0.0001998789619605416, "loss": 5.2211, "step": 463 }, { "epoch": 0.0481112572824657, "grad_norm": 0.81640625, "learning_rate": 0.00019987842709485686, "loss": 5.2175, "step": 464 }, { "epoch": 0.04821494533695377, "grad_norm": 0.6796875, "learning_rate": 0.0001998778910507123, "loss": 5.2004, "step": 465 }, { "epoch": 0.048318633391441844, "grad_norm": 0.51953125, "learning_rate": 0.00019987735382811416, "loss": 5.2169, "step": 466 }, { "epoch": 0.04842232144592992, "grad_norm": 0.45703125, "learning_rate": 0.0001998768154270688, "loss": 5.165, "step": 467 }, { "epoch": 0.048526009500417994, "grad_norm": 0.53515625, "learning_rate": 0.00019987627584758263, "loss": 5.2305, "step": 468 }, { "epoch": 0.048629697554906065, "grad_norm": 0.439453125, "learning_rate": 0.00019987573508966199, "loss": 5.2377, "step": 469 }, { "epoch": 0.048733385609394136, "grad_norm": 0.5234375, "learning_rate": 0.00019987519315331324, "loss": 5.2023, "step": 470 }, { "epoch": 0.04883707366388221, "grad_norm": 0.49609375, "learning_rate": 0.00019987465003854275, "loss": 5.2201, "step": 471 }, { "epoch": 0.048940761718370286, "grad_norm": 0.59765625, "learning_rate": 0.000199874105745357, "loss": 5.1884, "step": 472 }, { "epoch": 0.04904444977285836, "grad_norm": 0.4609375, "learning_rate": 0.00019987356027376238, "loss": 5.1698, "step": 473 }, { "epoch": 0.04914813782734643, "grad_norm": 0.46875, "learning_rate": 0.0001998730136237653, "loss": 5.2097, "step": 474 }, { "epoch": 0.0492518258818345, "grad_norm": 0.609375, "learning_rate": 0.00019987246579537222, "loss": 5.2154, "step": 475 }, { "epoch": 0.04935551393632257, "grad_norm": 0.6328125, "learning_rate": 0.00019987191678858964, "loss": 5.1997, "step": 476 }, { "epoch": 0.04945920199081064, "grad_norm": 0.62890625, "learning_rate": 0.00019987136660342398, "loss": 5.2161, "step": 477 }, { "epoch": 0.04956289004529872, "grad_norm": 0.6875, "learning_rate": 0.00019987081523988178, "loss": 5.2113, "step": 478 }, { "epoch": 0.04966657809978679, "grad_norm": 0.68359375, "learning_rate": 0.00019987026269796952, "loss": 5.1944, "step": 479 }, { "epoch": 0.049770266154274864, "grad_norm": 0.57421875, "learning_rate": 0.00019986970897769375, "loss": 5.1795, "step": 480 }, { "epoch": 0.049873954208762936, "grad_norm": 0.64453125, "learning_rate": 0.00019986915407906096, "loss": 5.2348, "step": 481 }, { "epoch": 0.04997764226325101, "grad_norm": 0.83203125, "learning_rate": 0.00019986859800207772, "loss": 5.1926, "step": 482 }, { "epoch": 0.050081330317739085, "grad_norm": 0.87890625, "learning_rate": 0.00019986804074675058, "loss": 5.2065, "step": 483 }, { "epoch": 0.05018501837222716, "grad_norm": 0.76953125, "learning_rate": 0.00019986748231308615, "loss": 5.1956, "step": 484 }, { "epoch": 0.05028870642671523, "grad_norm": 0.66796875, "learning_rate": 0.00019986692270109098, "loss": 5.1497, "step": 485 }, { "epoch": 0.0503923944812033, "grad_norm": 0.6796875, "learning_rate": 0.00019986636191077168, "loss": 5.1756, "step": 486 }, { "epoch": 0.05049608253569137, "grad_norm": 0.76953125, "learning_rate": 0.00019986579994213486, "loss": 5.2124, "step": 487 }, { "epoch": 0.05059977059017944, "grad_norm": 0.9375, "learning_rate": 0.00019986523679518722, "loss": 5.1792, "step": 488 }, { "epoch": 0.05070345864466752, "grad_norm": 1.09375, "learning_rate": 0.00019986467246993527, "loss": 5.1996, "step": 489 }, { "epoch": 0.05080714669915559, "grad_norm": 0.99609375, "learning_rate": 0.0001998641069663858, "loss": 5.2116, "step": 490 }, { "epoch": 0.05091083475364366, "grad_norm": 1.1875, "learning_rate": 0.00019986354028454542, "loss": 5.1607, "step": 491 }, { "epoch": 0.051014522808131735, "grad_norm": 0.8828125, "learning_rate": 0.0001998629724244208, "loss": 5.1851, "step": 492 }, { "epoch": 0.051118210862619806, "grad_norm": 0.890625, "learning_rate": 0.00019986240338601869, "loss": 5.196, "step": 493 }, { "epoch": 0.05122189891710788, "grad_norm": 1.1953125, "learning_rate": 0.00019986183316934576, "loss": 5.1985, "step": 494 }, { "epoch": 0.051325586971595956, "grad_norm": 0.96484375, "learning_rate": 0.0001998612617744088, "loss": 5.1647, "step": 495 }, { "epoch": 0.05142927502608403, "grad_norm": 0.97265625, "learning_rate": 0.0001998606892012145, "loss": 5.205, "step": 496 }, { "epoch": 0.0515329630805721, "grad_norm": 1.1171875, "learning_rate": 0.00019986011544976956, "loss": 5.2331, "step": 497 }, { "epoch": 0.05163665113506017, "grad_norm": 0.87890625, "learning_rate": 0.00019985954052008085, "loss": 5.1625, "step": 498 }, { "epoch": 0.05174033918954824, "grad_norm": 0.7578125, "learning_rate": 0.00019985896441215514, "loss": 5.1805, "step": 499 }, { "epoch": 0.05184402724403632, "grad_norm": 0.7421875, "learning_rate": 0.0001998583871259992, "loss": 5.1823, "step": 500 }, { "epoch": 0.05194771529852439, "grad_norm": 0.71484375, "learning_rate": 0.00019985780866161985, "loss": 5.1178, "step": 501 }, { "epoch": 0.05205140335301246, "grad_norm": 0.76953125, "learning_rate": 0.00019985722901902389, "loss": 5.1614, "step": 502 }, { "epoch": 0.052155091407500534, "grad_norm": 0.78515625, "learning_rate": 0.0001998566481982182, "loss": 5.178, "step": 503 }, { "epoch": 0.052258779461988605, "grad_norm": 0.80078125, "learning_rate": 0.0001998560661992096, "loss": 5.1482, "step": 504 }, { "epoch": 0.05236246751647668, "grad_norm": 0.74609375, "learning_rate": 0.00019985548302200497, "loss": 5.1702, "step": 505 }, { "epoch": 0.052466155570964755, "grad_norm": 0.78515625, "learning_rate": 0.0001998548986666112, "loss": 5.165, "step": 506 }, { "epoch": 0.052569843625452826, "grad_norm": 0.7421875, "learning_rate": 0.00019985431313303517, "loss": 5.1466, "step": 507 }, { "epoch": 0.0526735316799409, "grad_norm": 0.5546875, "learning_rate": 0.00019985372642128383, "loss": 5.183, "step": 508 }, { "epoch": 0.05277721973442897, "grad_norm": 0.6328125, "learning_rate": 0.00019985313853136403, "loss": 5.1417, "step": 509 }, { "epoch": 0.05288090778891704, "grad_norm": 0.69140625, "learning_rate": 0.00019985254946328274, "loss": 5.1574, "step": 510 }, { "epoch": 0.05298459584340512, "grad_norm": 0.625, "learning_rate": 0.00019985195921704696, "loss": 5.1722, "step": 511 }, { "epoch": 0.05308828389789319, "grad_norm": 0.58203125, "learning_rate": 0.0001998513677926636, "loss": 5.163, "step": 512 }, { "epoch": 0.05319197195238126, "grad_norm": 0.69140625, "learning_rate": 0.0001998507751901396, "loss": 5.1497, "step": 513 }, { "epoch": 0.05329566000686933, "grad_norm": 0.69140625, "learning_rate": 0.000199850181409482, "loss": 5.1216, "step": 514 }, { "epoch": 0.053399348061357405, "grad_norm": 0.8359375, "learning_rate": 0.00019984958645069786, "loss": 5.1793, "step": 515 }, { "epoch": 0.053503036115845476, "grad_norm": 0.9453125, "learning_rate": 0.0001998489903137941, "loss": 5.1748, "step": 516 }, { "epoch": 0.053606724170333554, "grad_norm": 1.1484375, "learning_rate": 0.0001998483929987778, "loss": 5.1428, "step": 517 }, { "epoch": 0.053710412224821626, "grad_norm": 0.875, "learning_rate": 0.00019984779450565605, "loss": 5.1818, "step": 518 }, { "epoch": 0.0538141002793097, "grad_norm": 0.77734375, "learning_rate": 0.00019984719483443587, "loss": 5.1619, "step": 519 }, { "epoch": 0.05391778833379777, "grad_norm": 0.796875, "learning_rate": 0.0001998465939851243, "loss": 5.1613, "step": 520 }, { "epoch": 0.05402147638828584, "grad_norm": 0.82421875, "learning_rate": 0.00019984599195772845, "loss": 5.1229, "step": 521 }, { "epoch": 0.05412516444277392, "grad_norm": 0.9453125, "learning_rate": 0.00019984538875225547, "loss": 5.1587, "step": 522 }, { "epoch": 0.05422885249726199, "grad_norm": 1.21875, "learning_rate": 0.00019984478436871244, "loss": 5.163, "step": 523 }, { "epoch": 0.05433254055175006, "grad_norm": 0.86328125, "learning_rate": 0.00019984417880710646, "loss": 5.1997, "step": 524 }, { "epoch": 0.05443622860623813, "grad_norm": 0.85546875, "learning_rate": 0.00019984357206744474, "loss": 5.1593, "step": 525 }, { "epoch": 0.054539916660726204, "grad_norm": 0.81640625, "learning_rate": 0.0001998429641497344, "loss": 5.1307, "step": 526 }, { "epoch": 0.054643604715214275, "grad_norm": 0.6015625, "learning_rate": 0.00019984235505398262, "loss": 5.0949, "step": 527 }, { "epoch": 0.05474729276970235, "grad_norm": 0.63671875, "learning_rate": 0.0001998417447801966, "loss": 5.1336, "step": 528 }, { "epoch": 0.054850980824190425, "grad_norm": 0.63671875, "learning_rate": 0.00019984113332838352, "loss": 5.1222, "step": 529 }, { "epoch": 0.054954668878678496, "grad_norm": 0.7578125, "learning_rate": 0.0001998405206985506, "loss": 5.1132, "step": 530 }, { "epoch": 0.05505835693316657, "grad_norm": 0.77734375, "learning_rate": 0.00019983990689070508, "loss": 5.1254, "step": 531 }, { "epoch": 0.05516204498765464, "grad_norm": 0.671875, "learning_rate": 0.00019983929190485423, "loss": 5.1434, "step": 532 }, { "epoch": 0.05526573304214271, "grad_norm": 0.67578125, "learning_rate": 0.0001998386757410052, "loss": 5.0891, "step": 533 }, { "epoch": 0.05536942109663079, "grad_norm": 0.69921875, "learning_rate": 0.0001998380583991654, "loss": 5.144, "step": 534 }, { "epoch": 0.05547310915111886, "grad_norm": 0.71484375, "learning_rate": 0.00019983743987934198, "loss": 5.1256, "step": 535 }, { "epoch": 0.05557679720560693, "grad_norm": 0.546875, "learning_rate": 0.00019983682018154234, "loss": 5.1397, "step": 536 }, { "epoch": 0.055680485260095, "grad_norm": 0.58984375, "learning_rate": 0.00019983619930577374, "loss": 5.1786, "step": 537 }, { "epoch": 0.055784173314583074, "grad_norm": 0.478515625, "learning_rate": 0.00019983557725204352, "loss": 5.1161, "step": 538 }, { "epoch": 0.05588786136907115, "grad_norm": 0.54296875, "learning_rate": 0.00019983495402035902, "loss": 5.1185, "step": 539 }, { "epoch": 0.055991549423559224, "grad_norm": 0.546875, "learning_rate": 0.0001998343296107276, "loss": 5.1027, "step": 540 }, { "epoch": 0.056095237478047295, "grad_norm": 0.50390625, "learning_rate": 0.0001998337040231566, "loss": 5.1329, "step": 541 }, { "epoch": 0.05619892553253537, "grad_norm": 0.49609375, "learning_rate": 0.00019983307725765346, "loss": 5.15, "step": 542 }, { "epoch": 0.05630261358702344, "grad_norm": 0.625, "learning_rate": 0.0001998324493142255, "loss": 5.1271, "step": 543 }, { "epoch": 0.05640630164151151, "grad_norm": 0.703125, "learning_rate": 0.00019983182019288017, "loss": 5.1476, "step": 544 }, { "epoch": 0.05650998969599959, "grad_norm": 0.734375, "learning_rate": 0.0001998311898936249, "loss": 5.0857, "step": 545 }, { "epoch": 0.05661367775048766, "grad_norm": 0.578125, "learning_rate": 0.0001998305584164671, "loss": 5.1089, "step": 546 }, { "epoch": 0.05671736580497573, "grad_norm": 0.482421875, "learning_rate": 0.00019982992576141425, "loss": 5.1258, "step": 547 }, { "epoch": 0.0568210538594638, "grad_norm": 0.59765625, "learning_rate": 0.0001998292919284738, "loss": 5.1405, "step": 548 }, { "epoch": 0.05692474191395187, "grad_norm": 0.59765625, "learning_rate": 0.00019982865691765323, "loss": 5.1548, "step": 549 }, { "epoch": 0.05702842996843995, "grad_norm": 0.71875, "learning_rate": 0.00019982802072896004, "loss": 5.0671, "step": 550 }, { "epoch": 0.05713211802292802, "grad_norm": 0.66796875, "learning_rate": 0.00019982738336240172, "loss": 5.1215, "step": 551 }, { "epoch": 0.057235806077416095, "grad_norm": 0.55859375, "learning_rate": 0.0001998267448179858, "loss": 5.0941, "step": 552 }, { "epoch": 0.057339494131904166, "grad_norm": 0.6015625, "learning_rate": 0.00019982610509571979, "loss": 5.1049, "step": 553 }, { "epoch": 0.05744318218639224, "grad_norm": 0.58203125, "learning_rate": 0.0001998254641956113, "loss": 5.1052, "step": 554 }, { "epoch": 0.05754687024088031, "grad_norm": 0.67578125, "learning_rate": 0.0001998248221176678, "loss": 5.0879, "step": 555 }, { "epoch": 0.05765055829536839, "grad_norm": 0.6796875, "learning_rate": 0.00019982417886189698, "loss": 5.1172, "step": 556 }, { "epoch": 0.05775424634985646, "grad_norm": 0.5859375, "learning_rate": 0.00019982353442830634, "loss": 5.0876, "step": 557 }, { "epoch": 0.05785793440434453, "grad_norm": 0.515625, "learning_rate": 0.00019982288881690349, "loss": 5.1051, "step": 558 }, { "epoch": 0.0579616224588326, "grad_norm": 0.59765625, "learning_rate": 0.00019982224202769611, "loss": 5.0873, "step": 559 }, { "epoch": 0.05806531051332067, "grad_norm": 0.66015625, "learning_rate": 0.00019982159406069176, "loss": 5.0969, "step": 560 }, { "epoch": 0.058168998567808744, "grad_norm": 0.6328125, "learning_rate": 0.00019982094491589813, "loss": 5.1099, "step": 561 }, { "epoch": 0.05827268662229682, "grad_norm": 0.6328125, "learning_rate": 0.00019982029459332287, "loss": 5.0659, "step": 562 }, { "epoch": 0.058376374676784894, "grad_norm": 0.5625, "learning_rate": 0.00019981964309297363, "loss": 5.0939, "step": 563 }, { "epoch": 0.058480062731272965, "grad_norm": 0.5859375, "learning_rate": 0.00019981899041485813, "loss": 5.1099, "step": 564 }, { "epoch": 0.058583750785761037, "grad_norm": 0.6484375, "learning_rate": 0.00019981833655898404, "loss": 5.1289, "step": 565 }, { "epoch": 0.05868743884024911, "grad_norm": 0.56640625, "learning_rate": 0.00019981768152535913, "loss": 5.0745, "step": 566 }, { "epoch": 0.058791126894737186, "grad_norm": 0.59765625, "learning_rate": 0.00019981702531399106, "loss": 5.1127, "step": 567 }, { "epoch": 0.05889481494922526, "grad_norm": 0.671875, "learning_rate": 0.0001998163679248876, "loss": 5.0662, "step": 568 }, { "epoch": 0.05899850300371333, "grad_norm": 0.73046875, "learning_rate": 0.0001998157093580565, "loss": 5.109, "step": 569 }, { "epoch": 0.0591021910582014, "grad_norm": 0.95703125, "learning_rate": 0.00019981504961350558, "loss": 5.0697, "step": 570 }, { "epoch": 0.05920587911268947, "grad_norm": 1.1484375, "learning_rate": 0.00019981438869124256, "loss": 5.0217, "step": 571 }, { "epoch": 0.05930956716717754, "grad_norm": 0.765625, "learning_rate": 0.00019981372659127523, "loss": 5.0847, "step": 572 }, { "epoch": 0.05941325522166562, "grad_norm": 0.5078125, "learning_rate": 0.00019981306331361148, "loss": 5.0614, "step": 573 }, { "epoch": 0.05951694327615369, "grad_norm": 0.74609375, "learning_rate": 0.00019981239885825906, "loss": 5.0795, "step": 574 }, { "epoch": 0.059620631330641764, "grad_norm": 0.7734375, "learning_rate": 0.00019981173322522586, "loss": 5.0716, "step": 575 }, { "epoch": 0.059724319385129836, "grad_norm": 0.62109375, "learning_rate": 0.00019981106641451973, "loss": 5.0803, "step": 576 }, { "epoch": 0.05982800743961791, "grad_norm": 0.59765625, "learning_rate": 0.0001998103984261485, "loss": 5.0795, "step": 577 }, { "epoch": 0.059931695494105985, "grad_norm": 0.62109375, "learning_rate": 0.00019980972926012005, "loss": 5.0748, "step": 578 }, { "epoch": 0.06003538354859406, "grad_norm": 0.6328125, "learning_rate": 0.0001998090589164423, "loss": 5.0885, "step": 579 }, { "epoch": 0.06013907160308213, "grad_norm": 0.671875, "learning_rate": 0.0001998083873951232, "loss": 5.0466, "step": 580 }, { "epoch": 0.0602427596575702, "grad_norm": 0.65625, "learning_rate": 0.00019980771469617058, "loss": 5.0745, "step": 581 }, { "epoch": 0.06034644771205827, "grad_norm": 0.625, "learning_rate": 0.00019980704081959248, "loss": 5.0909, "step": 582 }, { "epoch": 0.06045013576654634, "grad_norm": 0.640625, "learning_rate": 0.00019980636576539678, "loss": 5.017, "step": 583 }, { "epoch": 0.06055382382103442, "grad_norm": 0.63671875, "learning_rate": 0.00019980568953359144, "loss": 5.1101, "step": 584 }, { "epoch": 0.06065751187552249, "grad_norm": 0.859375, "learning_rate": 0.00019980501212418447, "loss": 5.0365, "step": 585 }, { "epoch": 0.06076119993001056, "grad_norm": 1.3515625, "learning_rate": 0.0001998043335371839, "loss": 5.07, "step": 586 }, { "epoch": 0.060864887984498635, "grad_norm": 0.7734375, "learning_rate": 0.00019980365377259763, "loss": 5.0933, "step": 587 }, { "epoch": 0.060968576038986706, "grad_norm": 0.921875, "learning_rate": 0.00019980297283043379, "loss": 5.1316, "step": 588 }, { "epoch": 0.061072264093474785, "grad_norm": 1.3359375, "learning_rate": 0.00019980229071070037, "loss": 5.0325, "step": 589 }, { "epoch": 0.061175952147962856, "grad_norm": 0.9375, "learning_rate": 0.00019980160741340537, "loss": 5.1035, "step": 590 }, { "epoch": 0.06127964020245093, "grad_norm": 1.6328125, "learning_rate": 0.0001998009229385569, "loss": 5.0554, "step": 591 }, { "epoch": 0.061383328256939, "grad_norm": 0.6484375, "learning_rate": 0.00019980023728616305, "loss": 5.0741, "step": 592 }, { "epoch": 0.06148701631142707, "grad_norm": 2.171875, "learning_rate": 0.0001997995504562319, "loss": 5.083, "step": 593 }, { "epoch": 0.06159070436591514, "grad_norm": 1.296875, "learning_rate": 0.00019979886244877158, "loss": 5.0724, "step": 594 }, { "epoch": 0.06169439242040322, "grad_norm": 3.359375, "learning_rate": 0.00019979817326379012, "loss": 5.1534, "step": 595 }, { "epoch": 0.06179808047489129, "grad_norm": 3.109375, "learning_rate": 0.00019979748290129573, "loss": 5.1388, "step": 596 }, { "epoch": 0.06190176852937936, "grad_norm": 1.3984375, "learning_rate": 0.00019979679136129653, "loss": 5.0704, "step": 597 }, { "epoch": 0.062005456583867434, "grad_norm": 2.40625, "learning_rate": 0.00019979609864380067, "loss": 5.1138, "step": 598 }, { "epoch": 0.062109144638355505, "grad_norm": 2.265625, "learning_rate": 0.00019979540474881634, "loss": 5.1004, "step": 599 }, { "epoch": 0.06221283269284358, "grad_norm": 1.2890625, "learning_rate": 0.00019979470967635172, "loss": 5.0521, "step": 600 }, { "epoch": 0.062316520747331655, "grad_norm": 1.796875, "learning_rate": 0.00019979401342641503, "loss": 5.0985, "step": 601 }, { "epoch": 0.062420208801819727, "grad_norm": 1.5, "learning_rate": 0.00019979331599901445, "loss": 5.0998, "step": 602 }, { "epoch": 0.0625238968563078, "grad_norm": 1.078125, "learning_rate": 0.00019979261739415825, "loss": 5.1008, "step": 603 }, { "epoch": 0.06262758491079587, "grad_norm": 2.265625, "learning_rate": 0.00019979191761185466, "loss": 5.088, "step": 604 }, { "epoch": 0.06273127296528394, "grad_norm": 1.5625, "learning_rate": 0.00019979121665211186, "loss": 5.1075, "step": 605 }, { "epoch": 0.06283496101977201, "grad_norm": 2.25, "learning_rate": 0.00019979051451493826, "loss": 5.1131, "step": 606 }, { "epoch": 0.06293864907426008, "grad_norm": 1.375, "learning_rate": 0.00019978981120034203, "loss": 5.0863, "step": 607 }, { "epoch": 0.06304233712874815, "grad_norm": 2.875, "learning_rate": 0.0001997891067083315, "loss": 5.126, "step": 608 }, { "epoch": 0.06314602518323624, "grad_norm": 2.453125, "learning_rate": 0.00019978840103891505, "loss": 5.1418, "step": 609 }, { "epoch": 0.06324971323772431, "grad_norm": 1.484375, "learning_rate": 0.0001997876941921009, "loss": 5.0454, "step": 610 }, { "epoch": 0.06335340129221238, "grad_norm": 1.421875, "learning_rate": 0.00019978698616789745, "loss": 5.0991, "step": 611 }, { "epoch": 0.06345708934670045, "grad_norm": 1.4296875, "learning_rate": 0.00019978627696631306, "loss": 5.0266, "step": 612 }, { "epoch": 0.06356077740118853, "grad_norm": 1.3203125, "learning_rate": 0.00019978556658735606, "loss": 5.096, "step": 613 }, { "epoch": 0.0636644654556766, "grad_norm": 1.140625, "learning_rate": 0.00019978485503103485, "loss": 5.0584, "step": 614 }, { "epoch": 0.06376815351016467, "grad_norm": 1.3125, "learning_rate": 0.0001997841422973578, "loss": 5.092, "step": 615 }, { "epoch": 0.06387184156465274, "grad_norm": 1.1484375, "learning_rate": 0.00019978342838633344, "loss": 5.1038, "step": 616 }, { "epoch": 0.06397552961914081, "grad_norm": 1.1015625, "learning_rate": 0.00019978271329797003, "loss": 5.0346, "step": 617 }, { "epoch": 0.06407921767362888, "grad_norm": 0.9921875, "learning_rate": 0.00019978199703227608, "loss": 5.0628, "step": 618 }, { "epoch": 0.06418290572811695, "grad_norm": 0.96484375, "learning_rate": 0.00019978127958926006, "loss": 5.1023, "step": 619 }, { "epoch": 0.06428659378260504, "grad_norm": 0.89453125, "learning_rate": 0.00019978056096893042, "loss": 5.0591, "step": 620 }, { "epoch": 0.06439028183709311, "grad_norm": 0.98046875, "learning_rate": 0.0001997798411712956, "loss": 5.0737, "step": 621 }, { "epoch": 0.06449396989158118, "grad_norm": 0.84765625, "learning_rate": 0.00019977912019636415, "loss": 5.0839, "step": 622 }, { "epoch": 0.06459765794606925, "grad_norm": 0.7421875, "learning_rate": 0.00019977839804414456, "loss": 5.0516, "step": 623 }, { "epoch": 0.06470134600055732, "grad_norm": 0.76171875, "learning_rate": 0.00019977767471464531, "loss": 5.0687, "step": 624 }, { "epoch": 0.0648050340550454, "grad_norm": 0.671875, "learning_rate": 0.00019977695020787498, "loss": 5.0761, "step": 625 }, { "epoch": 0.06490872210953347, "grad_norm": 0.76171875, "learning_rate": 0.00019977622452384212, "loss": 5.0706, "step": 626 }, { "epoch": 0.06501241016402154, "grad_norm": 0.609375, "learning_rate": 0.00019977549766255528, "loss": 5.0099, "step": 627 }, { "epoch": 0.06511609821850961, "grad_norm": 0.66015625, "learning_rate": 0.00019977476962402304, "loss": 5.0794, "step": 628 }, { "epoch": 0.06521978627299768, "grad_norm": 0.69140625, "learning_rate": 0.00019977404040825395, "loss": 5.0676, "step": 629 }, { "epoch": 0.06532347432748575, "grad_norm": 0.7265625, "learning_rate": 0.0001997733100152567, "loss": 5.0754, "step": 630 }, { "epoch": 0.06542716238197384, "grad_norm": 0.66015625, "learning_rate": 0.0001997725784450398, "loss": 5.0106, "step": 631 }, { "epoch": 0.06553085043646191, "grad_norm": 0.474609375, "learning_rate": 0.000199771845697612, "loss": 5.0163, "step": 632 }, { "epoch": 0.06563453849094998, "grad_norm": 0.64453125, "learning_rate": 0.00019977111177298183, "loss": 5.0573, "step": 633 }, { "epoch": 0.06573822654543805, "grad_norm": 0.53125, "learning_rate": 0.00019977037667115802, "loss": 5.0315, "step": 634 }, { "epoch": 0.06584191459992612, "grad_norm": 0.5078125, "learning_rate": 0.00019976964039214923, "loss": 5.0168, "step": 635 }, { "epoch": 0.0659456026544142, "grad_norm": 0.5078125, "learning_rate": 0.00019976890293596416, "loss": 5.0454, "step": 636 }, { "epoch": 0.06604929070890227, "grad_norm": 0.5546875, "learning_rate": 0.00019976816430261146, "loss": 5.0578, "step": 637 }, { "epoch": 0.06615297876339034, "grad_norm": 0.498046875, "learning_rate": 0.00019976742449209992, "loss": 5.0527, "step": 638 }, { "epoch": 0.06625666681787841, "grad_norm": 0.46484375, "learning_rate": 0.0001997666835044382, "loss": 5.083, "step": 639 }, { "epoch": 0.06636035487236648, "grad_norm": 0.6171875, "learning_rate": 0.00019976594133963512, "loss": 5.0622, "step": 640 }, { "epoch": 0.06646404292685455, "grad_norm": 0.46484375, "learning_rate": 0.00019976519799769931, "loss": 5.0684, "step": 641 }, { "epoch": 0.06656773098134262, "grad_norm": 0.5703125, "learning_rate": 0.00019976445347863968, "loss": 5.0087, "step": 642 }, { "epoch": 0.06667141903583071, "grad_norm": 0.546875, "learning_rate": 0.00019976370778246495, "loss": 5.0554, "step": 643 }, { "epoch": 0.06677510709031878, "grad_norm": 0.44140625, "learning_rate": 0.0001997629609091839, "loss": 5.0249, "step": 644 }, { "epoch": 0.06687879514480685, "grad_norm": 0.470703125, "learning_rate": 0.0001997622128588054, "loss": 5.0804, "step": 645 }, { "epoch": 0.06698248319929492, "grad_norm": 0.47265625, "learning_rate": 0.0001997614636313382, "loss": 5.0207, "step": 646 }, { "epoch": 0.067086171253783, "grad_norm": 0.52734375, "learning_rate": 0.0001997607132267912, "loss": 5.0279, "step": 647 }, { "epoch": 0.06718985930827107, "grad_norm": 0.4453125, "learning_rate": 0.00019975996164517325, "loss": 5.062, "step": 648 }, { "epoch": 0.06729354736275914, "grad_norm": 0.50390625, "learning_rate": 0.00019975920888649318, "loss": 4.9891, "step": 649 }, { "epoch": 0.06739723541724721, "grad_norm": 0.443359375, "learning_rate": 0.00019975845495075992, "loss": 5.0609, "step": 650 }, { "epoch": 0.06750092347173528, "grad_norm": 0.5078125, "learning_rate": 0.0001997576998379823, "loss": 5.0621, "step": 651 }, { "epoch": 0.06760461152622335, "grad_norm": 0.419921875, "learning_rate": 0.0001997569435481693, "loss": 4.9655, "step": 652 }, { "epoch": 0.06770829958071142, "grad_norm": 0.466796875, "learning_rate": 0.00019975618608132983, "loss": 5.0336, "step": 653 }, { "epoch": 0.06781198763519951, "grad_norm": 0.5, "learning_rate": 0.0001997554274374728, "loss": 5.0162, "step": 654 }, { "epoch": 0.06791567568968758, "grad_norm": 0.490234375, "learning_rate": 0.00019975466761660714, "loss": 4.9897, "step": 655 }, { "epoch": 0.06801936374417565, "grad_norm": 0.6484375, "learning_rate": 0.00019975390661874188, "loss": 4.9694, "step": 656 }, { "epoch": 0.06812305179866372, "grad_norm": 0.6640625, "learning_rate": 0.00019975314444388597, "loss": 5.0425, "step": 657 }, { "epoch": 0.0682267398531518, "grad_norm": 0.5234375, "learning_rate": 0.00019975238109204836, "loss": 5.0513, "step": 658 }, { "epoch": 0.06833042790763987, "grad_norm": 0.50390625, "learning_rate": 0.00019975161656323812, "loss": 5.0141, "step": 659 }, { "epoch": 0.06843411596212794, "grad_norm": 0.5625, "learning_rate": 0.00019975085085746427, "loss": 5.0405, "step": 660 }, { "epoch": 0.06853780401661601, "grad_norm": 0.51171875, "learning_rate": 0.00019975008397473578, "loss": 5.0309, "step": 661 }, { "epoch": 0.06864149207110408, "grad_norm": 0.44921875, "learning_rate": 0.00019974931591506176, "loss": 5.0045, "step": 662 }, { "epoch": 0.06874518012559215, "grad_norm": 0.58203125, "learning_rate": 0.00019974854667845126, "loss": 5.0045, "step": 663 }, { "epoch": 0.06884886818008022, "grad_norm": 0.55859375, "learning_rate": 0.00019974777626491334, "loss": 5.0137, "step": 664 }, { "epoch": 0.06895255623456831, "grad_norm": 0.439453125, "learning_rate": 0.0001997470046744571, "loss": 5.0363, "step": 665 }, { "epoch": 0.06905624428905638, "grad_norm": 0.55078125, "learning_rate": 0.00019974623190709164, "loss": 5.052, "step": 666 }, { "epoch": 0.06915993234354445, "grad_norm": 0.6015625, "learning_rate": 0.00019974545796282606, "loss": 5.0489, "step": 667 }, { "epoch": 0.06926362039803252, "grad_norm": 0.51171875, "learning_rate": 0.00019974468284166954, "loss": 5.0173, "step": 668 }, { "epoch": 0.06936730845252059, "grad_norm": 0.5234375, "learning_rate": 0.0001997439065436312, "loss": 4.9908, "step": 669 }, { "epoch": 0.06947099650700866, "grad_norm": 0.68359375, "learning_rate": 0.00019974312906872018, "loss": 5.024, "step": 670 }, { "epoch": 0.06957468456149674, "grad_norm": 0.66796875, "learning_rate": 0.00019974235041694566, "loss": 5.0214, "step": 671 }, { "epoch": 0.06967837261598481, "grad_norm": 0.5625, "learning_rate": 0.00019974157058831685, "loss": 5.0328, "step": 672 }, { "epoch": 0.06978206067047288, "grad_norm": 0.51171875, "learning_rate": 0.00019974078958284294, "loss": 4.9868, "step": 673 }, { "epoch": 0.06988574872496095, "grad_norm": 0.67578125, "learning_rate": 0.00019974000740053316, "loss": 4.9927, "step": 674 }, { "epoch": 0.06998943677944902, "grad_norm": 0.6328125, "learning_rate": 0.0001997392240413967, "loss": 5.0442, "step": 675 }, { "epoch": 0.0700931248339371, "grad_norm": 0.53515625, "learning_rate": 0.0001997384395054428, "loss": 5.0179, "step": 676 }, { "epoch": 0.07019681288842518, "grad_norm": 0.58203125, "learning_rate": 0.00019973765379268082, "loss": 5.0022, "step": 677 }, { "epoch": 0.07030050094291325, "grad_norm": 0.7109375, "learning_rate": 0.00019973686690311987, "loss": 5.0517, "step": 678 }, { "epoch": 0.07040418899740132, "grad_norm": 0.458984375, "learning_rate": 0.00019973607883676936, "loss": 4.9883, "step": 679 }, { "epoch": 0.07050787705188939, "grad_norm": 0.546875, "learning_rate": 0.00019973528959363855, "loss": 4.9936, "step": 680 }, { "epoch": 0.07061156510637746, "grad_norm": 0.62890625, "learning_rate": 0.00019973449917373674, "loss": 5.0083, "step": 681 }, { "epoch": 0.07071525316086553, "grad_norm": 0.53515625, "learning_rate": 0.00019973370757707325, "loss": 5.0072, "step": 682 }, { "epoch": 0.0708189412153536, "grad_norm": 0.546875, "learning_rate": 0.00019973291480365743, "loss": 5.0331, "step": 683 }, { "epoch": 0.07092262926984168, "grad_norm": 0.60546875, "learning_rate": 0.00019973212085349867, "loss": 4.9926, "step": 684 }, { "epoch": 0.07102631732432975, "grad_norm": 0.67578125, "learning_rate": 0.00019973132572660628, "loss": 4.9922, "step": 685 }, { "epoch": 0.07113000537881782, "grad_norm": 0.6015625, "learning_rate": 0.00019973052942298967, "loss": 5.0278, "step": 686 }, { "epoch": 0.0712336934333059, "grad_norm": 0.6171875, "learning_rate": 0.00019972973194265823, "loss": 5.0312, "step": 687 }, { "epoch": 0.07133738148779398, "grad_norm": 0.6796875, "learning_rate": 0.00019972893328562137, "loss": 4.9927, "step": 688 }, { "epoch": 0.07144106954228205, "grad_norm": 0.48046875, "learning_rate": 0.00019972813345188852, "loss": 5.0116, "step": 689 }, { "epoch": 0.07154475759677012, "grad_norm": 0.494140625, "learning_rate": 0.00019972733244146912, "loss": 4.9578, "step": 690 }, { "epoch": 0.07164844565125819, "grad_norm": 0.58984375, "learning_rate": 0.00019972653025437261, "loss": 4.9803, "step": 691 }, { "epoch": 0.07175213370574626, "grad_norm": 0.58984375, "learning_rate": 0.00019972572689060846, "loss": 4.9453, "step": 692 }, { "epoch": 0.07185582176023433, "grad_norm": 0.474609375, "learning_rate": 0.00019972492235018616, "loss": 4.9737, "step": 693 }, { "epoch": 0.0719595098147224, "grad_norm": 0.58203125, "learning_rate": 0.00019972411663311517, "loss": 5.0081, "step": 694 }, { "epoch": 0.07206319786921048, "grad_norm": 0.7421875, "learning_rate": 0.00019972330973940503, "loss": 4.9832, "step": 695 }, { "epoch": 0.07216688592369855, "grad_norm": 0.6875, "learning_rate": 0.00019972250166906523, "loss": 5.0042, "step": 696 }, { "epoch": 0.07227057397818662, "grad_norm": 0.640625, "learning_rate": 0.0001997216924221053, "loss": 5.0134, "step": 697 }, { "epoch": 0.0723742620326747, "grad_norm": 0.6015625, "learning_rate": 0.00019972088199853488, "loss": 4.9701, "step": 698 }, { "epoch": 0.07247795008716278, "grad_norm": 0.58984375, "learning_rate": 0.0001997200703983634, "loss": 5.0043, "step": 699 }, { "epoch": 0.07258163814165085, "grad_norm": 0.5234375, "learning_rate": 0.00019971925762160054, "loss": 5.0103, "step": 700 }, { "epoch": 0.07268532619613892, "grad_norm": 0.55078125, "learning_rate": 0.0001997184436682558, "loss": 4.9916, "step": 701 }, { "epoch": 0.07278901425062699, "grad_norm": 0.59375, "learning_rate": 0.00019971762853833886, "loss": 4.9837, "step": 702 }, { "epoch": 0.07289270230511506, "grad_norm": 0.625, "learning_rate": 0.0001997168122318593, "loss": 4.9554, "step": 703 }, { "epoch": 0.07299639035960313, "grad_norm": 0.609375, "learning_rate": 0.0001997159947488268, "loss": 5.0302, "step": 704 }, { "epoch": 0.0731000784140912, "grad_norm": 0.58984375, "learning_rate": 0.00019971517608925092, "loss": 5.0219, "step": 705 }, { "epoch": 0.07320376646857928, "grad_norm": 0.63671875, "learning_rate": 0.00019971435625314139, "loss": 4.9801, "step": 706 }, { "epoch": 0.07330745452306735, "grad_norm": 0.5859375, "learning_rate": 0.00019971353524050783, "loss": 5.0094, "step": 707 }, { "epoch": 0.07341114257755542, "grad_norm": 0.6796875, "learning_rate": 0.00019971271305135998, "loss": 4.9897, "step": 708 }, { "epoch": 0.07351483063204349, "grad_norm": 0.8828125, "learning_rate": 0.00019971188968570752, "loss": 4.9852, "step": 709 }, { "epoch": 0.07361851868653158, "grad_norm": 1.2265625, "learning_rate": 0.00019971106514356018, "loss": 5.0171, "step": 710 }, { "epoch": 0.07372220674101965, "grad_norm": 1.0703125, "learning_rate": 0.00019971023942492763, "loss": 5.022, "step": 711 }, { "epoch": 0.07382589479550772, "grad_norm": 0.8828125, "learning_rate": 0.00019970941252981964, "loss": 4.9833, "step": 712 }, { "epoch": 0.07392958284999579, "grad_norm": 0.6171875, "learning_rate": 0.00019970858445824603, "loss": 5.025, "step": 713 }, { "epoch": 0.07403327090448386, "grad_norm": 0.59375, "learning_rate": 0.0001997077552102165, "loss": 4.9984, "step": 714 }, { "epoch": 0.07413695895897193, "grad_norm": 0.89453125, "learning_rate": 0.00019970692478574084, "loss": 4.9837, "step": 715 }, { "epoch": 0.07424064701346, "grad_norm": 0.98046875, "learning_rate": 0.00019970609318482887, "loss": 5.0337, "step": 716 }, { "epoch": 0.07434433506794808, "grad_norm": 1.0859375, "learning_rate": 0.0001997052604074904, "loss": 5.0083, "step": 717 }, { "epoch": 0.07444802312243615, "grad_norm": 0.9375, "learning_rate": 0.00019970442645373526, "loss": 4.99, "step": 718 }, { "epoch": 0.07455171117692422, "grad_norm": 1.0078125, "learning_rate": 0.00019970359132357327, "loss": 4.9583, "step": 719 }, { "epoch": 0.07465539923141229, "grad_norm": 1.1796875, "learning_rate": 0.0001997027550170143, "loss": 4.9729, "step": 720 }, { "epoch": 0.07475908728590037, "grad_norm": 0.6875, "learning_rate": 0.0001997019175340682, "loss": 5.0005, "step": 721 }, { "epoch": 0.07486277534038845, "grad_norm": 0.7578125, "learning_rate": 0.00019970107887474486, "loss": 4.9606, "step": 722 }, { "epoch": 0.07496646339487652, "grad_norm": 1.1796875, "learning_rate": 0.0001997002390390542, "loss": 4.9606, "step": 723 }, { "epoch": 0.07507015144936459, "grad_norm": 1.0703125, "learning_rate": 0.00019969939802700606, "loss": 4.9956, "step": 724 }, { "epoch": 0.07517383950385266, "grad_norm": 0.875, "learning_rate": 0.00019969855583861046, "loss": 5.0123, "step": 725 }, { "epoch": 0.07527752755834073, "grad_norm": 0.8671875, "learning_rate": 0.00019969771247387724, "loss": 5.0062, "step": 726 }, { "epoch": 0.0753812156128288, "grad_norm": 1.0625, "learning_rate": 0.00019969686793281643, "loss": 4.9654, "step": 727 }, { "epoch": 0.07548490366731687, "grad_norm": 1.0625, "learning_rate": 0.00019969602221543798, "loss": 5.0011, "step": 728 }, { "epoch": 0.07558859172180495, "grad_norm": 0.765625, "learning_rate": 0.00019969517532175183, "loss": 4.9748, "step": 729 }, { "epoch": 0.07569227977629302, "grad_norm": 0.8828125, "learning_rate": 0.000199694327251768, "loss": 4.9883, "step": 730 }, { "epoch": 0.07579596783078109, "grad_norm": 1.140625, "learning_rate": 0.00019969347800549646, "loss": 4.9907, "step": 731 }, { "epoch": 0.07589965588526917, "grad_norm": 0.8984375, "learning_rate": 0.0001996926275829473, "loss": 4.9906, "step": 732 }, { "epoch": 0.07600334393975725, "grad_norm": 0.6875, "learning_rate": 0.0001996917759841305, "loss": 4.9916, "step": 733 }, { "epoch": 0.07610703199424532, "grad_norm": 0.82421875, "learning_rate": 0.0001996909232090561, "loss": 4.9873, "step": 734 }, { "epoch": 0.07621072004873339, "grad_norm": 0.83984375, "learning_rate": 0.0001996900692577342, "loss": 5.0099, "step": 735 }, { "epoch": 0.07631440810322146, "grad_norm": 0.9140625, "learning_rate": 0.00019968921413017487, "loss": 5.0039, "step": 736 }, { "epoch": 0.07641809615770953, "grad_norm": 0.75, "learning_rate": 0.0001996883578263882, "loss": 4.9653, "step": 737 }, { "epoch": 0.0765217842121976, "grad_norm": 0.734375, "learning_rate": 0.00019968750034638427, "loss": 4.967, "step": 738 }, { "epoch": 0.07662547226668567, "grad_norm": 0.7109375, "learning_rate": 0.0001996866416901732, "loss": 4.9534, "step": 739 }, { "epoch": 0.07672916032117374, "grad_norm": 0.6953125, "learning_rate": 0.00019968578185776515, "loss": 4.9588, "step": 740 }, { "epoch": 0.07683284837566182, "grad_norm": 0.828125, "learning_rate": 0.0001996849208491702, "loss": 4.9746, "step": 741 }, { "epoch": 0.07693653643014989, "grad_norm": 0.859375, "learning_rate": 0.0001996840586643986, "loss": 4.9714, "step": 742 }, { "epoch": 0.07704022448463797, "grad_norm": 0.7265625, "learning_rate": 0.00019968319530346048, "loss": 4.9626, "step": 743 }, { "epoch": 0.07714391253912604, "grad_norm": 0.80078125, "learning_rate": 0.000199682330766366, "loss": 4.9415, "step": 744 }, { "epoch": 0.07724760059361412, "grad_norm": 0.84375, "learning_rate": 0.0001996814650531254, "loss": 4.9414, "step": 745 }, { "epoch": 0.07735128864810219, "grad_norm": 1.15625, "learning_rate": 0.00019968059816374888, "loss": 4.9692, "step": 746 }, { "epoch": 0.07745497670259026, "grad_norm": 0.79296875, "learning_rate": 0.00019967973009824664, "loss": 4.9797, "step": 747 }, { "epoch": 0.07755866475707833, "grad_norm": 0.734375, "learning_rate": 0.000199678860856629, "loss": 4.9844, "step": 748 }, { "epoch": 0.0776623528115664, "grad_norm": 0.90625, "learning_rate": 0.00019967799043890615, "loss": 4.982, "step": 749 }, { "epoch": 0.07776604086605447, "grad_norm": 0.82421875, "learning_rate": 0.00019967711884508839, "loss": 4.9981, "step": 750 }, { "epoch": 0.07786972892054254, "grad_norm": 0.87109375, "learning_rate": 0.00019967624607518595, "loss": 4.9928, "step": 751 }, { "epoch": 0.07797341697503062, "grad_norm": 1.078125, "learning_rate": 0.0001996753721292092, "loss": 4.9345, "step": 752 }, { "epoch": 0.07807710502951869, "grad_norm": 0.765625, "learning_rate": 0.0001996744970071684, "loss": 4.9615, "step": 753 }, { "epoch": 0.07818079308400677, "grad_norm": 0.84375, "learning_rate": 0.0001996736207090739, "loss": 5.0009, "step": 754 }, { "epoch": 0.07828448113849484, "grad_norm": 0.85546875, "learning_rate": 0.00019967274323493605, "loss": 4.9854, "step": 755 }, { "epoch": 0.07838816919298291, "grad_norm": 0.61328125, "learning_rate": 0.0001996718645847652, "loss": 4.9598, "step": 756 }, { "epoch": 0.07849185724747099, "grad_norm": 0.7890625, "learning_rate": 0.0001996709847585717, "loss": 4.9534, "step": 757 }, { "epoch": 0.07859554530195906, "grad_norm": 0.95703125, "learning_rate": 0.0001996701037563659, "loss": 4.9808, "step": 758 }, { "epoch": 0.07869923335644713, "grad_norm": 1.0703125, "learning_rate": 0.00019966922157815825, "loss": 4.9569, "step": 759 }, { "epoch": 0.0788029214109352, "grad_norm": 0.94140625, "learning_rate": 0.00019966833822395916, "loss": 4.9297, "step": 760 }, { "epoch": 0.07890660946542327, "grad_norm": 0.91015625, "learning_rate": 0.000199667453693779, "loss": 4.9103, "step": 761 }, { "epoch": 0.07901029751991134, "grad_norm": 0.7109375, "learning_rate": 0.00019966656798762827, "loss": 4.9669, "step": 762 }, { "epoch": 0.07911398557439941, "grad_norm": 0.60546875, "learning_rate": 0.00019966568110551736, "loss": 4.9627, "step": 763 }, { "epoch": 0.07921767362888749, "grad_norm": 0.60546875, "learning_rate": 0.0001996647930474568, "loss": 4.9544, "step": 764 }, { "epoch": 0.07932136168337557, "grad_norm": 0.7734375, "learning_rate": 0.000199663903813457, "loss": 4.9443, "step": 765 }, { "epoch": 0.07942504973786364, "grad_norm": 0.6796875, "learning_rate": 0.00019966301340352852, "loss": 4.9739, "step": 766 }, { "epoch": 0.07952873779235171, "grad_norm": 0.578125, "learning_rate": 0.00019966212181768178, "loss": 4.9593, "step": 767 }, { "epoch": 0.07963242584683979, "grad_norm": 0.77734375, "learning_rate": 0.0001996612290559274, "loss": 4.9728, "step": 768 }, { "epoch": 0.07973611390132786, "grad_norm": 0.65234375, "learning_rate": 0.00019966033511827584, "loss": 4.968, "step": 769 }, { "epoch": 0.07983980195581593, "grad_norm": 0.58203125, "learning_rate": 0.00019965944000473768, "loss": 4.9428, "step": 770 }, { "epoch": 0.079943490010304, "grad_norm": 0.6796875, "learning_rate": 0.00019965854371532346, "loss": 4.9784, "step": 771 }, { "epoch": 0.08004717806479207, "grad_norm": 0.62890625, "learning_rate": 0.00019965764625004377, "loss": 4.9521, "step": 772 }, { "epoch": 0.08015086611928014, "grad_norm": 0.5234375, "learning_rate": 0.0001996567476089092, "loss": 4.9318, "step": 773 }, { "epoch": 0.08025455417376821, "grad_norm": 0.55859375, "learning_rate": 0.00019965584779193035, "loss": 4.9945, "step": 774 }, { "epoch": 0.08035824222825629, "grad_norm": 0.55859375, "learning_rate": 0.00019965494679911782, "loss": 4.9496, "step": 775 }, { "epoch": 0.08046193028274436, "grad_norm": 0.427734375, "learning_rate": 0.0001996540446304823, "loss": 4.9248, "step": 776 }, { "epoch": 0.08056561833723244, "grad_norm": 0.625, "learning_rate": 0.00019965314128603435, "loss": 4.9327, "step": 777 }, { "epoch": 0.08066930639172051, "grad_norm": 0.5546875, "learning_rate": 0.00019965223676578472, "loss": 4.9228, "step": 778 }, { "epoch": 0.08077299444620858, "grad_norm": 0.546875, "learning_rate": 0.00019965133106974396, "loss": 4.9292, "step": 779 }, { "epoch": 0.08087668250069666, "grad_norm": 0.54296875, "learning_rate": 0.00019965042419792288, "loss": 4.944, "step": 780 }, { "epoch": 0.08098037055518473, "grad_norm": 0.51171875, "learning_rate": 0.00019964951615033215, "loss": 4.9587, "step": 781 }, { "epoch": 0.0810840586096728, "grad_norm": 0.6328125, "learning_rate": 0.0001996486069269824, "loss": 4.9611, "step": 782 }, { "epoch": 0.08118774666416087, "grad_norm": 0.61328125, "learning_rate": 0.00019964769652788448, "loss": 4.9236, "step": 783 }, { "epoch": 0.08129143471864894, "grad_norm": 0.63671875, "learning_rate": 0.00019964678495304906, "loss": 4.9575, "step": 784 }, { "epoch": 0.08139512277313701, "grad_norm": 0.55859375, "learning_rate": 0.00019964587220248686, "loss": 4.9343, "step": 785 }, { "epoch": 0.08149881082762508, "grad_norm": 0.5390625, "learning_rate": 0.00019964495827620875, "loss": 4.9315, "step": 786 }, { "epoch": 0.08160249888211316, "grad_norm": 0.640625, "learning_rate": 0.0001996440431742254, "loss": 4.9526, "step": 787 }, { "epoch": 0.08170618693660124, "grad_norm": 0.56640625, "learning_rate": 0.00019964312689654777, "loss": 4.9687, "step": 788 }, { "epoch": 0.08180987499108931, "grad_norm": 0.5390625, "learning_rate": 0.0001996422094431865, "loss": 4.9541, "step": 789 }, { "epoch": 0.08191356304557738, "grad_norm": 0.515625, "learning_rate": 0.0001996412908141525, "loss": 5.007, "step": 790 }, { "epoch": 0.08201725110006546, "grad_norm": 0.65625, "learning_rate": 0.0001996403710094566, "loss": 4.8851, "step": 791 }, { "epoch": 0.08212093915455353, "grad_norm": 0.6484375, "learning_rate": 0.00019963945002910964, "loss": 4.8957, "step": 792 }, { "epoch": 0.0822246272090416, "grad_norm": 0.6015625, "learning_rate": 0.0001996385278731225, "loss": 4.9079, "step": 793 }, { "epoch": 0.08232831526352967, "grad_norm": 0.546875, "learning_rate": 0.00019963760454150603, "loss": 4.9504, "step": 794 }, { "epoch": 0.08243200331801774, "grad_norm": 0.6640625, "learning_rate": 0.0001996366800342712, "loss": 4.9559, "step": 795 }, { "epoch": 0.08253569137250581, "grad_norm": 0.6328125, "learning_rate": 0.0001996357543514288, "loss": 4.9396, "step": 796 }, { "epoch": 0.08263937942699388, "grad_norm": 0.51171875, "learning_rate": 0.00019963482749298984, "loss": 4.9287, "step": 797 }, { "epoch": 0.08274306748148196, "grad_norm": 0.71484375, "learning_rate": 0.00019963389945896527, "loss": 4.922, "step": 798 }, { "epoch": 0.08284675553597004, "grad_norm": 0.73046875, "learning_rate": 0.00019963297024936595, "loss": 4.9211, "step": 799 }, { "epoch": 0.08295044359045811, "grad_norm": 0.87109375, "learning_rate": 0.00019963203986420296, "loss": 4.9202, "step": 800 }, { "epoch": 0.08305413164494618, "grad_norm": 0.98828125, "learning_rate": 0.00019963110830348714, "loss": 4.9276, "step": 801 }, { "epoch": 0.08315781969943425, "grad_norm": 1.2109375, "learning_rate": 0.00019963017556722963, "loss": 4.8891, "step": 802 }, { "epoch": 0.08326150775392233, "grad_norm": 0.9375, "learning_rate": 0.0001996292416554413, "loss": 4.9741, "step": 803 }, { "epoch": 0.0833651958084104, "grad_norm": 0.78515625, "learning_rate": 0.0001996283065681333, "loss": 4.9151, "step": 804 }, { "epoch": 0.08346888386289847, "grad_norm": 0.6796875, "learning_rate": 0.00019962737030531654, "loss": 4.9354, "step": 805 }, { "epoch": 0.08357257191738654, "grad_norm": 0.80859375, "learning_rate": 0.00019962643286700215, "loss": 4.9499, "step": 806 }, { "epoch": 0.08367625997187461, "grad_norm": 0.79296875, "learning_rate": 0.00019962549425320112, "loss": 4.9544, "step": 807 }, { "epoch": 0.08377994802636268, "grad_norm": 0.72265625, "learning_rate": 0.00019962455446392461, "loss": 4.9092, "step": 808 }, { "epoch": 0.08388363608085075, "grad_norm": 0.74609375, "learning_rate": 0.00019962361349918365, "loss": 4.9512, "step": 809 }, { "epoch": 0.08398732413533884, "grad_norm": 0.6171875, "learning_rate": 0.00019962267135898936, "loss": 4.9327, "step": 810 }, { "epoch": 0.08409101218982691, "grad_norm": 0.7109375, "learning_rate": 0.00019962172804335285, "loss": 4.944, "step": 811 }, { "epoch": 0.08419470024431498, "grad_norm": 0.796875, "learning_rate": 0.00019962078355228525, "loss": 4.8733, "step": 812 }, { "epoch": 0.08429838829880305, "grad_norm": 0.765625, "learning_rate": 0.0001996198378857977, "loss": 4.901, "step": 813 }, { "epoch": 0.08440207635329113, "grad_norm": 0.65625, "learning_rate": 0.00019961889104390138, "loss": 4.9424, "step": 814 }, { "epoch": 0.0845057644077792, "grad_norm": 0.62890625, "learning_rate": 0.00019961794302660746, "loss": 4.9486, "step": 815 }, { "epoch": 0.08460945246226727, "grad_norm": 0.73046875, "learning_rate": 0.00019961699383392708, "loss": 4.9393, "step": 816 }, { "epoch": 0.08471314051675534, "grad_norm": 0.6953125, "learning_rate": 0.0001996160434658715, "loss": 4.9351, "step": 817 }, { "epoch": 0.08481682857124341, "grad_norm": 0.62890625, "learning_rate": 0.0001996150919224519, "loss": 4.9562, "step": 818 }, { "epoch": 0.08492051662573148, "grad_norm": 0.61328125, "learning_rate": 0.00019961413920367948, "loss": 4.9268, "step": 819 }, { "epoch": 0.08502420468021955, "grad_norm": 0.73046875, "learning_rate": 0.00019961318530956556, "loss": 4.9037, "step": 820 }, { "epoch": 0.08512789273470764, "grad_norm": 0.625, "learning_rate": 0.00019961223024012132, "loss": 4.9261, "step": 821 }, { "epoch": 0.08523158078919571, "grad_norm": 0.640625, "learning_rate": 0.0001996112739953581, "loss": 4.8719, "step": 822 }, { "epoch": 0.08533526884368378, "grad_norm": 0.7109375, "learning_rate": 0.00019961031657528708, "loss": 4.8848, "step": 823 }, { "epoch": 0.08543895689817185, "grad_norm": 0.63671875, "learning_rate": 0.00019960935797991967, "loss": 4.9255, "step": 824 }, { "epoch": 0.08554264495265992, "grad_norm": 0.65234375, "learning_rate": 0.0001996083982092671, "loss": 4.8943, "step": 825 }, { "epoch": 0.085646333007148, "grad_norm": 0.65234375, "learning_rate": 0.00019960743726334072, "loss": 4.8836, "step": 826 }, { "epoch": 0.08575002106163607, "grad_norm": 0.62109375, "learning_rate": 0.0001996064751421519, "loss": 4.9091, "step": 827 }, { "epoch": 0.08585370911612414, "grad_norm": 0.5234375, "learning_rate": 0.00019960551184571192, "loss": 4.8901, "step": 828 }, { "epoch": 0.08595739717061221, "grad_norm": 0.52734375, "learning_rate": 0.00019960454737403223, "loss": 4.9074, "step": 829 }, { "epoch": 0.08606108522510028, "grad_norm": 0.59765625, "learning_rate": 0.00019960358172712412, "loss": 4.9147, "step": 830 }, { "epoch": 0.08616477327958835, "grad_norm": 0.470703125, "learning_rate": 0.00019960261490499907, "loss": 4.8972, "step": 831 }, { "epoch": 0.08626846133407644, "grad_norm": 0.5234375, "learning_rate": 0.00019960164690766843, "loss": 4.9044, "step": 832 }, { "epoch": 0.08637214938856451, "grad_norm": 0.6171875, "learning_rate": 0.00019960067773514364, "loss": 4.9212, "step": 833 }, { "epoch": 0.08647583744305258, "grad_norm": 0.5546875, "learning_rate": 0.00019959970738743613, "loss": 4.9064, "step": 834 }, { "epoch": 0.08657952549754065, "grad_norm": 0.5859375, "learning_rate": 0.00019959873586455738, "loss": 4.8741, "step": 835 }, { "epoch": 0.08668321355202872, "grad_norm": 0.7578125, "learning_rate": 0.0001995977631665188, "loss": 4.9145, "step": 836 }, { "epoch": 0.0867869016065168, "grad_norm": 0.80859375, "learning_rate": 0.0001995967892933319, "loss": 4.9278, "step": 837 }, { "epoch": 0.08689058966100487, "grad_norm": 0.74609375, "learning_rate": 0.00019959581424500817, "loss": 4.9188, "step": 838 }, { "epoch": 0.08699427771549294, "grad_norm": 0.6953125, "learning_rate": 0.00019959483802155912, "loss": 4.9492, "step": 839 }, { "epoch": 0.08709796576998101, "grad_norm": 0.74609375, "learning_rate": 0.00019959386062299626, "loss": 4.9078, "step": 840 }, { "epoch": 0.08720165382446908, "grad_norm": 0.89453125, "learning_rate": 0.0001995928820493311, "loss": 4.898, "step": 841 }, { "epoch": 0.08730534187895715, "grad_norm": 1.078125, "learning_rate": 0.00019959190230057518, "loss": 4.9058, "step": 842 }, { "epoch": 0.08740902993344522, "grad_norm": 1.203125, "learning_rate": 0.00019959092137674013, "loss": 4.8931, "step": 843 }, { "epoch": 0.08751271798793331, "grad_norm": 0.828125, "learning_rate": 0.0001995899392778375, "loss": 4.9151, "step": 844 }, { "epoch": 0.08761640604242138, "grad_norm": 0.703125, "learning_rate": 0.0001995889560038788, "loss": 4.9288, "step": 845 }, { "epoch": 0.08772009409690945, "grad_norm": 0.6875, "learning_rate": 0.0001995879715548757, "loss": 4.8856, "step": 846 }, { "epoch": 0.08782378215139752, "grad_norm": 1.046875, "learning_rate": 0.00019958698593083981, "loss": 4.8822, "step": 847 }, { "epoch": 0.0879274702058856, "grad_norm": 1.1640625, "learning_rate": 0.00019958599913178277, "loss": 4.9602, "step": 848 }, { "epoch": 0.08803115826037367, "grad_norm": 0.859375, "learning_rate": 0.00019958501115771622, "loss": 4.9163, "step": 849 }, { "epoch": 0.08813484631486174, "grad_norm": 0.83203125, "learning_rate": 0.00019958402200865178, "loss": 4.918, "step": 850 }, { "epoch": 0.08823853436934981, "grad_norm": 0.77734375, "learning_rate": 0.00019958303168460115, "loss": 4.8722, "step": 851 }, { "epoch": 0.08834222242383788, "grad_norm": 0.81640625, "learning_rate": 0.000199582040185576, "loss": 4.8877, "step": 852 }, { "epoch": 0.08844591047832595, "grad_norm": 0.85546875, "learning_rate": 0.00019958104751158806, "loss": 4.905, "step": 853 }, { "epoch": 0.08854959853281402, "grad_norm": 0.70703125, "learning_rate": 0.00019958005366264901, "loss": 4.8835, "step": 854 }, { "epoch": 0.08865328658730211, "grad_norm": 0.6171875, "learning_rate": 0.0001995790586387706, "loss": 4.8851, "step": 855 }, { "epoch": 0.08875697464179018, "grad_norm": 0.57421875, "learning_rate": 0.00019957806243996453, "loss": 4.8804, "step": 856 }, { "epoch": 0.08886066269627825, "grad_norm": 0.6328125, "learning_rate": 0.0001995770650662426, "loss": 4.9273, "step": 857 }, { "epoch": 0.08896435075076632, "grad_norm": 0.7421875, "learning_rate": 0.00019957606651761656, "loss": 4.872, "step": 858 }, { "epoch": 0.0890680388052544, "grad_norm": 0.78125, "learning_rate": 0.00019957506679409818, "loss": 4.9288, "step": 859 }, { "epoch": 0.08917172685974246, "grad_norm": 0.75390625, "learning_rate": 0.00019957406589569927, "loss": 4.8692, "step": 860 }, { "epoch": 0.08927541491423054, "grad_norm": 0.76171875, "learning_rate": 0.00019957306382243167, "loss": 4.9116, "step": 861 }, { "epoch": 0.08937910296871861, "grad_norm": 0.7578125, "learning_rate": 0.00019957206057430712, "loss": 4.8783, "step": 862 }, { "epoch": 0.08948279102320668, "grad_norm": 0.8671875, "learning_rate": 0.00019957105615133754, "loss": 4.8542, "step": 863 }, { "epoch": 0.08958647907769475, "grad_norm": 0.67578125, "learning_rate": 0.00019957005055353474, "loss": 4.8961, "step": 864 }, { "epoch": 0.08969016713218282, "grad_norm": 0.50390625, "learning_rate": 0.0001995690437809106, "loss": 4.848, "step": 865 }, { "epoch": 0.0897938551866709, "grad_norm": 0.765625, "learning_rate": 0.00019956803583347696, "loss": 4.8486, "step": 866 }, { "epoch": 0.08989754324115898, "grad_norm": 0.80859375, "learning_rate": 0.0001995670267112458, "loss": 4.8902, "step": 867 }, { "epoch": 0.09000123129564705, "grad_norm": 0.65234375, "learning_rate": 0.00019956601641422892, "loss": 4.8824, "step": 868 }, { "epoch": 0.09010491935013512, "grad_norm": 0.6953125, "learning_rate": 0.00019956500494243832, "loss": 4.8438, "step": 869 }, { "epoch": 0.09020860740462319, "grad_norm": 0.59765625, "learning_rate": 0.00019956399229588588, "loss": 4.9038, "step": 870 }, { "epoch": 0.09031229545911126, "grad_norm": 0.65234375, "learning_rate": 0.0001995629784745836, "loss": 4.8895, "step": 871 }, { "epoch": 0.09041598351359934, "grad_norm": 0.6953125, "learning_rate": 0.0001995619634785434, "loss": 4.9058, "step": 872 }, { "epoch": 0.0905196715680874, "grad_norm": 0.59765625, "learning_rate": 0.0001995609473077773, "loss": 4.8509, "step": 873 }, { "epoch": 0.09062335962257548, "grad_norm": 0.625, "learning_rate": 0.00019955992996229728, "loss": 4.8988, "step": 874 }, { "epoch": 0.09072704767706355, "grad_norm": 0.65625, "learning_rate": 0.00019955891144211524, "loss": 4.9242, "step": 875 }, { "epoch": 0.09083073573155162, "grad_norm": 0.57421875, "learning_rate": 0.00019955789174724338, "loss": 4.8703, "step": 876 }, { "epoch": 0.0909344237860397, "grad_norm": 0.7890625, "learning_rate": 0.00019955687087769357, "loss": 4.8834, "step": 877 }, { "epoch": 0.09103811184052778, "grad_norm": 0.92578125, "learning_rate": 0.00019955584883347792, "loss": 4.9105, "step": 878 }, { "epoch": 0.09114179989501585, "grad_norm": 0.859375, "learning_rate": 0.00019955482561460853, "loss": 4.9056, "step": 879 }, { "epoch": 0.09124548794950392, "grad_norm": 0.94921875, "learning_rate": 0.00019955380122109738, "loss": 4.8805, "step": 880 }, { "epoch": 0.09134917600399199, "grad_norm": 0.95703125, "learning_rate": 0.00019955277565295666, "loss": 4.8183, "step": 881 }, { "epoch": 0.09145286405848006, "grad_norm": 0.90234375, "learning_rate": 0.00019955174891019838, "loss": 4.8363, "step": 882 }, { "epoch": 0.09155655211296813, "grad_norm": 1.0390625, "learning_rate": 0.00019955072099283472, "loss": 4.8429, "step": 883 }, { "epoch": 0.0916602401674562, "grad_norm": 1.0703125, "learning_rate": 0.00019954969190087777, "loss": 4.8869, "step": 884 }, { "epoch": 0.09176392822194428, "grad_norm": 0.875, "learning_rate": 0.0001995486616343397, "loss": 4.9302, "step": 885 }, { "epoch": 0.09186761627643235, "grad_norm": 1.015625, "learning_rate": 0.00019954763019323265, "loss": 4.8797, "step": 886 }, { "epoch": 0.09197130433092042, "grad_norm": 1.21875, "learning_rate": 0.00019954659757756877, "loss": 4.8558, "step": 887 }, { "epoch": 0.0920749923854085, "grad_norm": 0.5859375, "learning_rate": 0.00019954556378736028, "loss": 4.9109, "step": 888 }, { "epoch": 0.09217868043989658, "grad_norm": 0.8515625, "learning_rate": 0.00019954452882261933, "loss": 4.8751, "step": 889 }, { "epoch": 0.09228236849438465, "grad_norm": 1.3125, "learning_rate": 0.0001995434926833582, "loss": 4.8356, "step": 890 }, { "epoch": 0.09238605654887272, "grad_norm": 0.66796875, "learning_rate": 0.00019954245536958908, "loss": 4.8736, "step": 891 }, { "epoch": 0.09248974460336079, "grad_norm": 0.96875, "learning_rate": 0.00019954141688132419, "loss": 4.922, "step": 892 }, { "epoch": 0.09259343265784886, "grad_norm": 1.1875, "learning_rate": 0.0001995403772185758, "loss": 4.8539, "step": 893 }, { "epoch": 0.09269712071233693, "grad_norm": 0.80859375, "learning_rate": 0.00019953933638135616, "loss": 4.9001, "step": 894 }, { "epoch": 0.092800808766825, "grad_norm": 1.078125, "learning_rate": 0.00019953829436967759, "loss": 4.8676, "step": 895 }, { "epoch": 0.09290449682131308, "grad_norm": 0.94140625, "learning_rate": 0.00019953725118355235, "loss": 4.8616, "step": 896 }, { "epoch": 0.09300818487580115, "grad_norm": 0.84765625, "learning_rate": 0.00019953620682299278, "loss": 4.8614, "step": 897 }, { "epoch": 0.09311187293028922, "grad_norm": 0.9921875, "learning_rate": 0.0001995351612880112, "loss": 4.8647, "step": 898 }, { "epoch": 0.0932155609847773, "grad_norm": 1.0625, "learning_rate": 0.0001995341145786199, "loss": 4.8408, "step": 899 }, { "epoch": 0.09331924903926538, "grad_norm": 1.09375, "learning_rate": 0.00019953306669483127, "loss": 4.9216, "step": 900 }, { "epoch": 0.09342293709375345, "grad_norm": 0.859375, "learning_rate": 0.00019953201763665766, "loss": 4.8706, "step": 901 }, { "epoch": 0.09352662514824152, "grad_norm": 0.96484375, "learning_rate": 0.00019953096740411144, "loss": 4.8723, "step": 902 }, { "epoch": 0.09363031320272959, "grad_norm": 0.96484375, "learning_rate": 0.00019952991599720503, "loss": 4.8612, "step": 903 }, { "epoch": 0.09373400125721766, "grad_norm": 0.8125, "learning_rate": 0.0001995288634159508, "loss": 4.9012, "step": 904 }, { "epoch": 0.09383768931170573, "grad_norm": 1.125, "learning_rate": 0.00019952780966036123, "loss": 4.8422, "step": 905 }, { "epoch": 0.0939413773661938, "grad_norm": 0.64453125, "learning_rate": 0.00019952675473044868, "loss": 4.8762, "step": 906 }, { "epoch": 0.09404506542068188, "grad_norm": 1.0078125, "learning_rate": 0.00019952569862622562, "loss": 4.8799, "step": 907 }, { "epoch": 0.09414875347516995, "grad_norm": 0.87890625, "learning_rate": 0.00019952464134770454, "loss": 4.8731, "step": 908 }, { "epoch": 0.09425244152965802, "grad_norm": 0.9453125, "learning_rate": 0.0001995235828948979, "loss": 4.8628, "step": 909 }, { "epoch": 0.0943561295841461, "grad_norm": 0.96484375, "learning_rate": 0.00019952252326781815, "loss": 4.8959, "step": 910 }, { "epoch": 0.09445981763863417, "grad_norm": 0.9296875, "learning_rate": 0.00019952146246647785, "loss": 4.8658, "step": 911 }, { "epoch": 0.09456350569312225, "grad_norm": 0.8828125, "learning_rate": 0.0001995204004908895, "loss": 4.8525, "step": 912 }, { "epoch": 0.09466719374761032, "grad_norm": 0.68359375, "learning_rate": 0.0001995193373410656, "loss": 4.8731, "step": 913 }, { "epoch": 0.09477088180209839, "grad_norm": 0.765625, "learning_rate": 0.00019951827301701872, "loss": 4.862, "step": 914 }, { "epoch": 0.09487456985658646, "grad_norm": 0.77734375, "learning_rate": 0.00019951720751876142, "loss": 4.8848, "step": 915 }, { "epoch": 0.09497825791107453, "grad_norm": 0.66796875, "learning_rate": 0.00019951614084630625, "loss": 4.8816, "step": 916 }, { "epoch": 0.0950819459655626, "grad_norm": 0.7890625, "learning_rate": 0.00019951507299966585, "loss": 4.8559, "step": 917 }, { "epoch": 0.09518563402005067, "grad_norm": 0.63671875, "learning_rate": 0.00019951400397885273, "loss": 4.8733, "step": 918 }, { "epoch": 0.09528932207453875, "grad_norm": 0.8359375, "learning_rate": 0.00019951293378387962, "loss": 4.7999, "step": 919 }, { "epoch": 0.09539301012902682, "grad_norm": 0.8203125, "learning_rate": 0.000199511862414759, "loss": 4.829, "step": 920 }, { "epoch": 0.09549669818351489, "grad_norm": 0.62890625, "learning_rate": 0.00019951078987150365, "loss": 4.893, "step": 921 }, { "epoch": 0.09560038623800297, "grad_norm": 0.78515625, "learning_rate": 0.00019950971615412616, "loss": 4.853, "step": 922 }, { "epoch": 0.09570407429249105, "grad_norm": 0.7578125, "learning_rate": 0.00019950864126263917, "loss": 4.8412, "step": 923 }, { "epoch": 0.09580776234697912, "grad_norm": 0.76171875, "learning_rate": 0.00019950756519705544, "loss": 4.8648, "step": 924 }, { "epoch": 0.09591145040146719, "grad_norm": 0.6953125, "learning_rate": 0.0001995064879573876, "loss": 4.8454, "step": 925 }, { "epoch": 0.09601513845595526, "grad_norm": 0.68359375, "learning_rate": 0.0001995054095436484, "loss": 4.8425, "step": 926 }, { "epoch": 0.09611882651044333, "grad_norm": 0.6015625, "learning_rate": 0.00019950432995585054, "loss": 4.8581, "step": 927 }, { "epoch": 0.0962225145649314, "grad_norm": 0.73046875, "learning_rate": 0.00019950324919400676, "loss": 4.8878, "step": 928 }, { "epoch": 0.09632620261941947, "grad_norm": 0.79296875, "learning_rate": 0.00019950216725812982, "loss": 4.7899, "step": 929 }, { "epoch": 0.09642989067390755, "grad_norm": 0.5625, "learning_rate": 0.0001995010841482325, "loss": 4.8928, "step": 930 }, { "epoch": 0.09653357872839562, "grad_norm": 0.6328125, "learning_rate": 0.00019949999986432757, "loss": 4.9347, "step": 931 }, { "epoch": 0.09663726678288369, "grad_norm": 0.67578125, "learning_rate": 0.0001994989144064278, "loss": 4.8777, "step": 932 }, { "epoch": 0.09674095483737177, "grad_norm": 0.6328125, "learning_rate": 0.00019949782777454602, "loss": 4.9026, "step": 933 }, { "epoch": 0.09684464289185984, "grad_norm": 0.71484375, "learning_rate": 0.00019949673996869506, "loss": 4.8867, "step": 934 }, { "epoch": 0.09694833094634792, "grad_norm": 0.59375, "learning_rate": 0.00019949565098888771, "loss": 4.847, "step": 935 }, { "epoch": 0.09705201900083599, "grad_norm": 0.60546875, "learning_rate": 0.00019949456083513686, "loss": 4.8488, "step": 936 }, { "epoch": 0.09715570705532406, "grad_norm": 0.578125, "learning_rate": 0.00019949346950745537, "loss": 4.8131, "step": 937 }, { "epoch": 0.09725939510981213, "grad_norm": 0.51171875, "learning_rate": 0.0001994923770058561, "loss": 4.8802, "step": 938 }, { "epoch": 0.0973630831643002, "grad_norm": 0.5546875, "learning_rate": 0.00019949128333035198, "loss": 4.8309, "step": 939 }, { "epoch": 0.09746677121878827, "grad_norm": 0.6640625, "learning_rate": 0.00019949018848095586, "loss": 4.8319, "step": 940 }, { "epoch": 0.09757045927327634, "grad_norm": 0.6015625, "learning_rate": 0.00019948909245768066, "loss": 4.8677, "step": 941 }, { "epoch": 0.09767414732776442, "grad_norm": 0.625, "learning_rate": 0.00019948799526053938, "loss": 4.8561, "step": 942 }, { "epoch": 0.09777783538225249, "grad_norm": 0.7265625, "learning_rate": 0.00019948689688954489, "loss": 4.8883, "step": 943 }, { "epoch": 0.09788152343674057, "grad_norm": 0.62890625, "learning_rate": 0.00019948579734471017, "loss": 4.826, "step": 944 }, { "epoch": 0.09798521149122864, "grad_norm": 0.5625, "learning_rate": 0.00019948469662604823, "loss": 4.7702, "step": 945 }, { "epoch": 0.09808889954571672, "grad_norm": 0.55859375, "learning_rate": 0.00019948359473357202, "loss": 4.8859, "step": 946 }, { "epoch": 0.09819258760020479, "grad_norm": 0.70703125, "learning_rate": 0.00019948249166729454, "loss": 4.8573, "step": 947 }, { "epoch": 0.09829627565469286, "grad_norm": 0.51171875, "learning_rate": 0.0001994813874272288, "loss": 4.8528, "step": 948 }, { "epoch": 0.09839996370918093, "grad_norm": 0.6171875, "learning_rate": 0.00019948028201338788, "loss": 4.9022, "step": 949 }, { "epoch": 0.098503651763669, "grad_norm": 0.578125, "learning_rate": 0.00019947917542578478, "loss": 4.8382, "step": 950 }, { "epoch": 0.09860733981815707, "grad_norm": 0.640625, "learning_rate": 0.00019947806766443255, "loss": 4.8507, "step": 951 }, { "epoch": 0.09871102787264514, "grad_norm": 0.65234375, "learning_rate": 0.0001994769587293443, "loss": 4.7939, "step": 952 }, { "epoch": 0.09881471592713321, "grad_norm": 0.5546875, "learning_rate": 0.00019947584862053307, "loss": 4.8567, "step": 953 }, { "epoch": 0.09891840398162129, "grad_norm": 0.640625, "learning_rate": 0.00019947473733801196, "loss": 4.8055, "step": 954 }, { "epoch": 0.09902209203610937, "grad_norm": 0.76953125, "learning_rate": 0.00019947362488179413, "loss": 4.8541, "step": 955 }, { "epoch": 0.09912578009059744, "grad_norm": 0.89453125, "learning_rate": 0.00019947251125189264, "loss": 4.8555, "step": 956 }, { "epoch": 0.09922946814508551, "grad_norm": 0.9296875, "learning_rate": 0.0001994713964483207, "loss": 4.8593, "step": 957 }, { "epoch": 0.09933315619957359, "grad_norm": 0.796875, "learning_rate": 0.00019947028047109143, "loss": 4.8419, "step": 958 }, { "epoch": 0.09943684425406166, "grad_norm": 0.81640625, "learning_rate": 0.00019946916332021797, "loss": 4.8051, "step": 959 }, { "epoch": 0.09954053230854973, "grad_norm": 0.98046875, "learning_rate": 0.00019946804499571354, "loss": 4.8403, "step": 960 }, { "epoch": 0.0996442203630378, "grad_norm": 1.15625, "learning_rate": 0.00019946692549759133, "loss": 4.8334, "step": 961 }, { "epoch": 0.09974790841752587, "grad_norm": 0.96484375, "learning_rate": 0.00019946580482586452, "loss": 4.8314, "step": 962 }, { "epoch": 0.09985159647201394, "grad_norm": 1.0546875, "learning_rate": 0.00019946468298054636, "loss": 4.8393, "step": 963 }, { "epoch": 0.09995528452650201, "grad_norm": 1.046875, "learning_rate": 0.00019946355996165006, "loss": 4.8274, "step": 964 }, { "epoch": 0.10005897258099009, "grad_norm": 1.0, "learning_rate": 0.00019946243576918893, "loss": 4.844, "step": 965 }, { "epoch": 0.10016266063547817, "grad_norm": 0.90625, "learning_rate": 0.00019946131040317618, "loss": 4.8438, "step": 966 }, { "epoch": 0.10026634868996624, "grad_norm": 0.76171875, "learning_rate": 0.00019946018386362508, "loss": 4.8389, "step": 967 }, { "epoch": 0.10037003674445431, "grad_norm": 0.7734375, "learning_rate": 0.00019945905615054898, "loss": 4.822, "step": 968 }, { "epoch": 0.10047372479894238, "grad_norm": 0.8203125, "learning_rate": 0.00019945792726396114, "loss": 4.8085, "step": 969 }, { "epoch": 0.10057741285343046, "grad_norm": 0.890625, "learning_rate": 0.00019945679720387486, "loss": 4.8656, "step": 970 }, { "epoch": 0.10068110090791853, "grad_norm": 1.03125, "learning_rate": 0.00019945566597030353, "loss": 4.8728, "step": 971 }, { "epoch": 0.1007847889624066, "grad_norm": 0.9765625, "learning_rate": 0.00019945453356326045, "loss": 4.8481, "step": 972 }, { "epoch": 0.10088847701689467, "grad_norm": 0.80859375, "learning_rate": 0.00019945339998275903, "loss": 4.8336, "step": 973 }, { "epoch": 0.10099216507138274, "grad_norm": 0.71875, "learning_rate": 0.0001994522652288126, "loss": 4.8323, "step": 974 }, { "epoch": 0.10109585312587081, "grad_norm": 0.81640625, "learning_rate": 0.00019945112930143456, "loss": 4.8381, "step": 975 }, { "epoch": 0.10119954118035888, "grad_norm": 0.88671875, "learning_rate": 0.00019944999220063834, "loss": 4.8525, "step": 976 }, { "epoch": 0.10130322923484697, "grad_norm": 0.87109375, "learning_rate": 0.00019944885392643734, "loss": 4.8475, "step": 977 }, { "epoch": 0.10140691728933504, "grad_norm": 0.890625, "learning_rate": 0.00019944771447884496, "loss": 4.8167, "step": 978 }, { "epoch": 0.10151060534382311, "grad_norm": 0.94921875, "learning_rate": 0.00019944657385787467, "loss": 4.8381, "step": 979 }, { "epoch": 0.10161429339831118, "grad_norm": 0.93359375, "learning_rate": 0.00019944543206353995, "loss": 4.8668, "step": 980 }, { "epoch": 0.10171798145279926, "grad_norm": 0.80078125, "learning_rate": 0.00019944428909585423, "loss": 4.8108, "step": 981 }, { "epoch": 0.10182166950728733, "grad_norm": 0.8515625, "learning_rate": 0.00019944314495483104, "loss": 4.8041, "step": 982 }, { "epoch": 0.1019253575617754, "grad_norm": 1.015625, "learning_rate": 0.0001994419996404838, "loss": 4.7916, "step": 983 }, { "epoch": 0.10202904561626347, "grad_norm": 1.0546875, "learning_rate": 0.00019944085315282614, "loss": 4.8384, "step": 984 }, { "epoch": 0.10213273367075154, "grad_norm": 1.09375, "learning_rate": 0.0001994397054918715, "loss": 4.8389, "step": 985 }, { "epoch": 0.10223642172523961, "grad_norm": 0.7734375, "learning_rate": 0.00019943855665763345, "loss": 4.8711, "step": 986 }, { "epoch": 0.10234010977972768, "grad_norm": 0.66796875, "learning_rate": 0.00019943740665012553, "loss": 4.8159, "step": 987 }, { "epoch": 0.10244379783421576, "grad_norm": 1.0234375, "learning_rate": 0.00019943625546936134, "loss": 4.7974, "step": 988 }, { "epoch": 0.10254748588870384, "grad_norm": 0.9609375, "learning_rate": 0.00019943510311535445, "loss": 4.8683, "step": 989 }, { "epoch": 0.10265117394319191, "grad_norm": 0.734375, "learning_rate": 0.00019943394958811842, "loss": 4.8304, "step": 990 }, { "epoch": 0.10275486199767998, "grad_norm": 0.62109375, "learning_rate": 0.00019943279488766693, "loss": 4.8283, "step": 991 }, { "epoch": 0.10285855005216805, "grad_norm": 0.80859375, "learning_rate": 0.00019943163901401355, "loss": 4.8337, "step": 992 }, { "epoch": 0.10296223810665613, "grad_norm": 0.79296875, "learning_rate": 0.0001994304819671719, "loss": 4.8154, "step": 993 }, { "epoch": 0.1030659261611442, "grad_norm": 0.57421875, "learning_rate": 0.0001994293237471557, "loss": 4.8173, "step": 994 }, { "epoch": 0.10316961421563227, "grad_norm": 0.73046875, "learning_rate": 0.0001994281643539786, "loss": 4.8074, "step": 995 }, { "epoch": 0.10327330227012034, "grad_norm": 0.671875, "learning_rate": 0.00019942700378765423, "loss": 4.8224, "step": 996 }, { "epoch": 0.10337699032460841, "grad_norm": 0.58203125, "learning_rate": 0.00019942584204819632, "loss": 4.8471, "step": 997 }, { "epoch": 0.10348067837909648, "grad_norm": 0.64453125, "learning_rate": 0.00019942467913561859, "loss": 4.776, "step": 998 }, { "epoch": 0.10358436643358455, "grad_norm": 0.68359375, "learning_rate": 0.0001994235150499347, "loss": 4.8177, "step": 999 }, { "epoch": 0.10368805448807264, "grad_norm": 0.8515625, "learning_rate": 0.00019942234979115848, "loss": 4.8716, "step": 1000 }, { "epoch": 0.10379174254256071, "grad_norm": 0.70703125, "learning_rate": 0.0001994211833593036, "loss": 4.8509, "step": 1001 }, { "epoch": 0.10389543059704878, "grad_norm": 0.5, "learning_rate": 0.00019942001575438384, "loss": 4.7899, "step": 1002 }, { "epoch": 0.10399911865153685, "grad_norm": 0.65625, "learning_rate": 0.00019941884697641298, "loss": 4.8424, "step": 1003 }, { "epoch": 0.10410280670602493, "grad_norm": 0.63671875, "learning_rate": 0.00019941767702540483, "loss": 4.8242, "step": 1004 }, { "epoch": 0.104206494760513, "grad_norm": 0.69921875, "learning_rate": 0.0001994165059013732, "loss": 4.8327, "step": 1005 }, { "epoch": 0.10431018281500107, "grad_norm": 0.640625, "learning_rate": 0.00019941533360433184, "loss": 4.8073, "step": 1006 }, { "epoch": 0.10441387086948914, "grad_norm": 0.56640625, "learning_rate": 0.00019941416013429468, "loss": 4.82, "step": 1007 }, { "epoch": 0.10451755892397721, "grad_norm": 0.8046875, "learning_rate": 0.0001994129854912755, "loss": 4.8072, "step": 1008 }, { "epoch": 0.10462124697846528, "grad_norm": 0.6640625, "learning_rate": 0.00019941180967528817, "loss": 4.8173, "step": 1009 }, { "epoch": 0.10472493503295335, "grad_norm": 0.609375, "learning_rate": 0.00019941063268634655, "loss": 4.7941, "step": 1010 }, { "epoch": 0.10482862308744144, "grad_norm": 0.859375, "learning_rate": 0.00019940945452446456, "loss": 4.8366, "step": 1011 }, { "epoch": 0.10493231114192951, "grad_norm": 0.7890625, "learning_rate": 0.0001994082751896561, "loss": 4.832, "step": 1012 }, { "epoch": 0.10503599919641758, "grad_norm": 0.68359375, "learning_rate": 0.00019940709468193509, "loss": 4.8127, "step": 1013 }, { "epoch": 0.10513968725090565, "grad_norm": 0.734375, "learning_rate": 0.00019940591300131538, "loss": 4.8416, "step": 1014 }, { "epoch": 0.10524337530539372, "grad_norm": 0.68359375, "learning_rate": 0.00019940473014781103, "loss": 4.8164, "step": 1015 }, { "epoch": 0.1053470633598818, "grad_norm": 0.84765625, "learning_rate": 0.0001994035461214359, "loss": 4.8007, "step": 1016 }, { "epoch": 0.10545075141436987, "grad_norm": 0.8125, "learning_rate": 0.00019940236092220404, "loss": 4.7715, "step": 1017 }, { "epoch": 0.10555443946885794, "grad_norm": 0.77734375, "learning_rate": 0.00019940117455012935, "loss": 4.8128, "step": 1018 }, { "epoch": 0.10565812752334601, "grad_norm": 0.58203125, "learning_rate": 0.00019939998700522587, "loss": 4.8322, "step": 1019 }, { "epoch": 0.10576181557783408, "grad_norm": 0.79296875, "learning_rate": 0.00019939879828750768, "loss": 4.7856, "step": 1020 }, { "epoch": 0.10586550363232215, "grad_norm": 0.6875, "learning_rate": 0.00019939760839698866, "loss": 4.7796, "step": 1021 }, { "epoch": 0.10596919168681024, "grad_norm": 0.65625, "learning_rate": 0.00019939641733368298, "loss": 4.812, "step": 1022 }, { "epoch": 0.10607287974129831, "grad_norm": 0.796875, "learning_rate": 0.00019939522509760462, "loss": 4.8104, "step": 1023 }, { "epoch": 0.10617656779578638, "grad_norm": 0.8125, "learning_rate": 0.0001993940316887677, "loss": 4.8058, "step": 1024 }, { "epoch": 0.10628025585027445, "grad_norm": 0.8359375, "learning_rate": 0.00019939283710718623, "loss": 4.8323, "step": 1025 }, { "epoch": 0.10638394390476252, "grad_norm": 0.92578125, "learning_rate": 0.00019939164135287433, "loss": 4.8172, "step": 1026 }, { "epoch": 0.1064876319592506, "grad_norm": 0.7109375, "learning_rate": 0.00019939044442584617, "loss": 4.8447, "step": 1027 }, { "epoch": 0.10659132001373867, "grad_norm": 0.703125, "learning_rate": 0.00019938924632611582, "loss": 4.8457, "step": 1028 }, { "epoch": 0.10669500806822674, "grad_norm": 0.69921875, "learning_rate": 0.00019938804705369741, "loss": 4.8359, "step": 1029 }, { "epoch": 0.10679869612271481, "grad_norm": 0.67578125, "learning_rate": 0.00019938684660860513, "loss": 4.8462, "step": 1030 }, { "epoch": 0.10690238417720288, "grad_norm": 0.69921875, "learning_rate": 0.00019938564499085305, "loss": 4.853, "step": 1031 }, { "epoch": 0.10700607223169095, "grad_norm": 0.7421875, "learning_rate": 0.00019938444220045547, "loss": 4.7956, "step": 1032 }, { "epoch": 0.10710976028617904, "grad_norm": 0.59765625, "learning_rate": 0.0001993832382374265, "loss": 4.8215, "step": 1033 }, { "epoch": 0.10721344834066711, "grad_norm": 0.77734375, "learning_rate": 0.0001993820331017804, "loss": 4.8004, "step": 1034 }, { "epoch": 0.10731713639515518, "grad_norm": 0.796875, "learning_rate": 0.00019938082679353132, "loss": 4.8516, "step": 1035 }, { "epoch": 0.10742082444964325, "grad_norm": 0.80859375, "learning_rate": 0.00019937961931269357, "loss": 4.7898, "step": 1036 }, { "epoch": 0.10752451250413132, "grad_norm": 0.625, "learning_rate": 0.00019937841065928135, "loss": 4.8164, "step": 1037 }, { "epoch": 0.1076282005586194, "grad_norm": 0.6015625, "learning_rate": 0.00019937720083330893, "loss": 4.8158, "step": 1038 }, { "epoch": 0.10773188861310747, "grad_norm": 0.65234375, "learning_rate": 0.00019937598983479058, "loss": 4.8225, "step": 1039 }, { "epoch": 0.10783557666759554, "grad_norm": 0.703125, "learning_rate": 0.0001993747776637406, "loss": 4.7566, "step": 1040 }, { "epoch": 0.10793926472208361, "grad_norm": 0.62890625, "learning_rate": 0.00019937356432017328, "loss": 4.8108, "step": 1041 }, { "epoch": 0.10804295277657168, "grad_norm": 0.640625, "learning_rate": 0.00019937234980410296, "loss": 4.7864, "step": 1042 }, { "epoch": 0.10814664083105975, "grad_norm": 0.7109375, "learning_rate": 0.00019937113411554395, "loss": 4.8389, "step": 1043 }, { "epoch": 0.10825032888554784, "grad_norm": 0.6953125, "learning_rate": 0.00019936991725451057, "loss": 4.8236, "step": 1044 }, { "epoch": 0.10835401694003591, "grad_norm": 0.69921875, "learning_rate": 0.00019936869922101727, "loss": 4.8259, "step": 1045 }, { "epoch": 0.10845770499452398, "grad_norm": 0.69921875, "learning_rate": 0.0001993674800150783, "loss": 4.7816, "step": 1046 }, { "epoch": 0.10856139304901205, "grad_norm": 0.66796875, "learning_rate": 0.00019936625963670813, "loss": 4.7689, "step": 1047 }, { "epoch": 0.10866508110350012, "grad_norm": 0.7734375, "learning_rate": 0.0001993650380859211, "loss": 4.8243, "step": 1048 }, { "epoch": 0.1087687691579882, "grad_norm": 0.828125, "learning_rate": 0.00019936381536273166, "loss": 4.828, "step": 1049 }, { "epoch": 0.10887245721247626, "grad_norm": 0.87109375, "learning_rate": 0.00019936259146715425, "loss": 4.8372, "step": 1050 }, { "epoch": 0.10897614526696434, "grad_norm": 0.89453125, "learning_rate": 0.0001993613663992033, "loss": 4.7794, "step": 1051 }, { "epoch": 0.10907983332145241, "grad_norm": 0.7890625, "learning_rate": 0.00019936014015889321, "loss": 4.7735, "step": 1052 }, { "epoch": 0.10918352137594048, "grad_norm": 0.78125, "learning_rate": 0.00019935891274623852, "loss": 4.7367, "step": 1053 }, { "epoch": 0.10928720943042855, "grad_norm": 0.76953125, "learning_rate": 0.00019935768416125371, "loss": 4.8041, "step": 1054 }, { "epoch": 0.10939089748491662, "grad_norm": 0.875, "learning_rate": 0.00019935645440395322, "loss": 4.8382, "step": 1055 }, { "epoch": 0.1094945855394047, "grad_norm": 0.93359375, "learning_rate": 0.0001993552234743516, "loss": 4.7804, "step": 1056 }, { "epoch": 0.10959827359389278, "grad_norm": 0.77734375, "learning_rate": 0.00019935399137246338, "loss": 4.8278, "step": 1057 }, { "epoch": 0.10970196164838085, "grad_norm": 0.68359375, "learning_rate": 0.00019935275809830307, "loss": 4.7946, "step": 1058 }, { "epoch": 0.10980564970286892, "grad_norm": 0.7265625, "learning_rate": 0.00019935152365188526, "loss": 4.8324, "step": 1059 }, { "epoch": 0.10990933775735699, "grad_norm": 0.65234375, "learning_rate": 0.00019935028803322444, "loss": 4.7871, "step": 1060 }, { "epoch": 0.11001302581184506, "grad_norm": 0.671875, "learning_rate": 0.00019934905124233528, "loss": 4.7859, "step": 1061 }, { "epoch": 0.11011671386633314, "grad_norm": 0.75, "learning_rate": 0.00019934781327923232, "loss": 4.7975, "step": 1062 }, { "epoch": 0.1102204019208212, "grad_norm": 0.921875, "learning_rate": 0.00019934657414393016, "loss": 4.785, "step": 1063 }, { "epoch": 0.11032408997530928, "grad_norm": 0.890625, "learning_rate": 0.00019934533383644348, "loss": 4.8142, "step": 1064 }, { "epoch": 0.11042777802979735, "grad_norm": 0.99609375, "learning_rate": 0.00019934409235678683, "loss": 4.8027, "step": 1065 }, { "epoch": 0.11053146608428542, "grad_norm": 1.046875, "learning_rate": 0.00019934284970497492, "loss": 4.8281, "step": 1066 }, { "epoch": 0.1106351541387735, "grad_norm": 0.80078125, "learning_rate": 0.00019934160588102242, "loss": 4.8019, "step": 1067 }, { "epoch": 0.11073884219326158, "grad_norm": 0.9609375, "learning_rate": 0.00019934036088494394, "loss": 4.7709, "step": 1068 }, { "epoch": 0.11084253024774965, "grad_norm": 1.0, "learning_rate": 0.00019933911471675423, "loss": 4.7505, "step": 1069 }, { "epoch": 0.11094621830223772, "grad_norm": 0.9375, "learning_rate": 0.00019933786737646797, "loss": 4.8155, "step": 1070 }, { "epoch": 0.11104990635672579, "grad_norm": 1.203125, "learning_rate": 0.00019933661886409988, "loss": 4.8044, "step": 1071 }, { "epoch": 0.11115359441121386, "grad_norm": 0.9140625, "learning_rate": 0.00019933536917966468, "loss": 4.8024, "step": 1072 }, { "epoch": 0.11125728246570193, "grad_norm": 0.94140625, "learning_rate": 0.00019933411832317712, "loss": 4.7923, "step": 1073 }, { "epoch": 0.11136097052019, "grad_norm": 0.8828125, "learning_rate": 0.000199332866294652, "loss": 4.7929, "step": 1074 }, { "epoch": 0.11146465857467808, "grad_norm": 0.9296875, "learning_rate": 0.00019933161309410402, "loss": 4.8082, "step": 1075 }, { "epoch": 0.11156834662916615, "grad_norm": 1.0703125, "learning_rate": 0.00019933035872154802, "loss": 4.7484, "step": 1076 }, { "epoch": 0.11167203468365422, "grad_norm": 1.046875, "learning_rate": 0.00019932910317699878, "loss": 4.7837, "step": 1077 }, { "epoch": 0.1117757227381423, "grad_norm": 0.96484375, "learning_rate": 0.00019932784646047112, "loss": 4.8484, "step": 1078 }, { "epoch": 0.11187941079263038, "grad_norm": 0.89453125, "learning_rate": 0.00019932658857197986, "loss": 4.8276, "step": 1079 }, { "epoch": 0.11198309884711845, "grad_norm": 0.765625, "learning_rate": 0.00019932532951153986, "loss": 4.7676, "step": 1080 }, { "epoch": 0.11208678690160652, "grad_norm": 0.8203125, "learning_rate": 0.00019932406927916595, "loss": 4.8098, "step": 1081 }, { "epoch": 0.11219047495609459, "grad_norm": 0.82421875, "learning_rate": 0.000199322807874873, "loss": 4.8138, "step": 1082 }, { "epoch": 0.11229416301058266, "grad_norm": 0.75390625, "learning_rate": 0.00019932154529867595, "loss": 4.8027, "step": 1083 }, { "epoch": 0.11239785106507073, "grad_norm": 0.828125, "learning_rate": 0.00019932028155058963, "loss": 4.8091, "step": 1084 }, { "epoch": 0.1125015391195588, "grad_norm": 0.95703125, "learning_rate": 0.00019931901663062894, "loss": 4.787, "step": 1085 }, { "epoch": 0.11260522717404688, "grad_norm": 0.8125, "learning_rate": 0.00019931775053880888, "loss": 4.7998, "step": 1086 }, { "epoch": 0.11270891522853495, "grad_norm": 0.86328125, "learning_rate": 0.00019931648327514435, "loss": 4.819, "step": 1087 }, { "epoch": 0.11281260328302302, "grad_norm": 0.9375, "learning_rate": 0.0001993152148396503, "loss": 4.7823, "step": 1088 }, { "epoch": 0.1129162913375111, "grad_norm": 0.77734375, "learning_rate": 0.00019931394523234165, "loss": 4.804, "step": 1089 }, { "epoch": 0.11301997939199918, "grad_norm": 0.65625, "learning_rate": 0.00019931267445323346, "loss": 4.8188, "step": 1090 }, { "epoch": 0.11312366744648725, "grad_norm": 0.85546875, "learning_rate": 0.00019931140250234068, "loss": 4.8397, "step": 1091 }, { "epoch": 0.11322735550097532, "grad_norm": 0.7578125, "learning_rate": 0.00019931012937967834, "loss": 4.7646, "step": 1092 }, { "epoch": 0.11333104355546339, "grad_norm": 0.75390625, "learning_rate": 0.00019930885508526145, "loss": 4.8125, "step": 1093 }, { "epoch": 0.11343473160995146, "grad_norm": 0.81640625, "learning_rate": 0.000199307579619105, "loss": 4.7948, "step": 1094 }, { "epoch": 0.11353841966443953, "grad_norm": 0.66015625, "learning_rate": 0.00019930630298122415, "loss": 4.7597, "step": 1095 }, { "epoch": 0.1136421077189276, "grad_norm": 0.66796875, "learning_rate": 0.00019930502517163386, "loss": 4.7938, "step": 1096 }, { "epoch": 0.11374579577341568, "grad_norm": 0.83984375, "learning_rate": 0.00019930374619034927, "loss": 4.8352, "step": 1097 }, { "epoch": 0.11384948382790375, "grad_norm": 0.81640625, "learning_rate": 0.0001993024660373854, "loss": 4.8116, "step": 1098 }, { "epoch": 0.11395317188239182, "grad_norm": 0.734375, "learning_rate": 0.00019930118471275744, "loss": 4.7617, "step": 1099 }, { "epoch": 0.1140568599368799, "grad_norm": 0.77734375, "learning_rate": 0.00019929990221648043, "loss": 4.7716, "step": 1100 }, { "epoch": 0.11416054799136797, "grad_norm": 0.76171875, "learning_rate": 0.00019929861854856956, "loss": 4.8149, "step": 1101 }, { "epoch": 0.11426423604585605, "grad_norm": 0.73828125, "learning_rate": 0.00019929733370903995, "loss": 4.7617, "step": 1102 }, { "epoch": 0.11436792410034412, "grad_norm": 0.6484375, "learning_rate": 0.00019929604769790675, "loss": 4.7973, "step": 1103 }, { "epoch": 0.11447161215483219, "grad_norm": 0.640625, "learning_rate": 0.00019929476051518518, "loss": 4.8084, "step": 1104 }, { "epoch": 0.11457530020932026, "grad_norm": 0.734375, "learning_rate": 0.00019929347216089037, "loss": 4.8131, "step": 1105 }, { "epoch": 0.11467898826380833, "grad_norm": 0.8203125, "learning_rate": 0.00019929218263503752, "loss": 4.7745, "step": 1106 }, { "epoch": 0.1147826763182964, "grad_norm": 0.90625, "learning_rate": 0.00019929089193764193, "loss": 4.7861, "step": 1107 }, { "epoch": 0.11488636437278447, "grad_norm": 0.9296875, "learning_rate": 0.00019928960006871874, "loss": 4.7646, "step": 1108 }, { "epoch": 0.11499005242727255, "grad_norm": 0.6953125, "learning_rate": 0.00019928830702828322, "loss": 4.8098, "step": 1109 }, { "epoch": 0.11509374048176062, "grad_norm": 0.62109375, "learning_rate": 0.00019928701281635066, "loss": 4.7654, "step": 1110 }, { "epoch": 0.1151974285362487, "grad_norm": 0.84375, "learning_rate": 0.00019928571743293625, "loss": 4.7427, "step": 1111 }, { "epoch": 0.11530111659073677, "grad_norm": 1.046875, "learning_rate": 0.00019928442087805537, "loss": 4.8149, "step": 1112 }, { "epoch": 0.11540480464522485, "grad_norm": 0.75, "learning_rate": 0.00019928312315172324, "loss": 4.8035, "step": 1113 }, { "epoch": 0.11550849269971292, "grad_norm": 0.640625, "learning_rate": 0.0001992818242539552, "loss": 4.7691, "step": 1114 }, { "epoch": 0.11561218075420099, "grad_norm": 0.734375, "learning_rate": 0.0001992805241847666, "loss": 4.7827, "step": 1115 }, { "epoch": 0.11571586880868906, "grad_norm": 0.91796875, "learning_rate": 0.00019927922294417277, "loss": 4.7606, "step": 1116 }, { "epoch": 0.11581955686317713, "grad_norm": 0.921875, "learning_rate": 0.00019927792053218903, "loss": 4.7606, "step": 1117 }, { "epoch": 0.1159232449176652, "grad_norm": 0.828125, "learning_rate": 0.00019927661694883075, "loss": 4.7667, "step": 1118 }, { "epoch": 0.11602693297215327, "grad_norm": 0.84375, "learning_rate": 0.00019927531219411337, "loss": 4.7663, "step": 1119 }, { "epoch": 0.11613062102664135, "grad_norm": 0.65234375, "learning_rate": 0.00019927400626805223, "loss": 4.7812, "step": 1120 }, { "epoch": 0.11623430908112942, "grad_norm": 0.76953125, "learning_rate": 0.00019927269917066273, "loss": 4.7531, "step": 1121 }, { "epoch": 0.11633799713561749, "grad_norm": 0.74609375, "learning_rate": 0.00019927139090196035, "loss": 4.7844, "step": 1122 }, { "epoch": 0.11644168519010557, "grad_norm": 0.69921875, "learning_rate": 0.00019927008146196048, "loss": 4.7869, "step": 1123 }, { "epoch": 0.11654537324459364, "grad_norm": 0.80859375, "learning_rate": 0.0001992687708506786, "loss": 4.78, "step": 1124 }, { "epoch": 0.11664906129908172, "grad_norm": 0.6953125, "learning_rate": 0.00019926745906813012, "loss": 4.7729, "step": 1125 }, { "epoch": 0.11675274935356979, "grad_norm": 0.69140625, "learning_rate": 0.00019926614611433057, "loss": 4.8032, "step": 1126 }, { "epoch": 0.11685643740805786, "grad_norm": 0.66796875, "learning_rate": 0.0001992648319892954, "loss": 4.7693, "step": 1127 }, { "epoch": 0.11696012546254593, "grad_norm": 0.6953125, "learning_rate": 0.00019926351669304017, "loss": 4.7452, "step": 1128 }, { "epoch": 0.117063813517034, "grad_norm": 0.6640625, "learning_rate": 0.00019926220022558036, "loss": 4.7493, "step": 1129 }, { "epoch": 0.11716750157152207, "grad_norm": 0.62890625, "learning_rate": 0.00019926088258693153, "loss": 4.7705, "step": 1130 }, { "epoch": 0.11727118962601014, "grad_norm": 0.703125, "learning_rate": 0.0001992595637771092, "loss": 4.7883, "step": 1131 }, { "epoch": 0.11737487768049822, "grad_norm": 0.71484375, "learning_rate": 0.0001992582437961289, "loss": 4.767, "step": 1132 }, { "epoch": 0.11747856573498629, "grad_norm": 0.67578125, "learning_rate": 0.00019925692264400629, "loss": 4.7713, "step": 1133 }, { "epoch": 0.11758225378947437, "grad_norm": 0.68359375, "learning_rate": 0.0001992556003207569, "loss": 4.7332, "step": 1134 }, { "epoch": 0.11768594184396244, "grad_norm": 0.77734375, "learning_rate": 0.00019925427682639636, "loss": 4.7582, "step": 1135 }, { "epoch": 0.11778962989845052, "grad_norm": 0.72265625, "learning_rate": 0.00019925295216094023, "loss": 4.8117, "step": 1136 }, { "epoch": 0.11789331795293859, "grad_norm": 0.609375, "learning_rate": 0.00019925162632440419, "loss": 4.7745, "step": 1137 }, { "epoch": 0.11799700600742666, "grad_norm": 0.6015625, "learning_rate": 0.0001992502993168039, "loss": 4.7928, "step": 1138 }, { "epoch": 0.11810069406191473, "grad_norm": 0.671875, "learning_rate": 0.00019924897113815496, "loss": 4.7954, "step": 1139 }, { "epoch": 0.1182043821164028, "grad_norm": 0.65234375, "learning_rate": 0.0001992476417884731, "loss": 4.7575, "step": 1140 }, { "epoch": 0.11830807017089087, "grad_norm": 0.62109375, "learning_rate": 0.00019924631126777396, "loss": 4.7496, "step": 1141 }, { "epoch": 0.11841175822537894, "grad_norm": 0.65625, "learning_rate": 0.00019924497957607326, "loss": 4.7866, "step": 1142 }, { "epoch": 0.11851544627986701, "grad_norm": 0.84375, "learning_rate": 0.00019924364671338672, "loss": 4.773, "step": 1143 }, { "epoch": 0.11861913433435509, "grad_norm": 0.6953125, "learning_rate": 0.00019924231267973004, "loss": 4.7703, "step": 1144 }, { "epoch": 0.11872282238884317, "grad_norm": 0.6875, "learning_rate": 0.00019924097747511896, "loss": 4.7405, "step": 1145 }, { "epoch": 0.11882651044333124, "grad_norm": 0.79296875, "learning_rate": 0.00019923964109956925, "loss": 4.7476, "step": 1146 }, { "epoch": 0.11893019849781931, "grad_norm": 0.828125, "learning_rate": 0.00019923830355309669, "loss": 4.7496, "step": 1147 }, { "epoch": 0.11903388655230739, "grad_norm": 0.91015625, "learning_rate": 0.00019923696483571703, "loss": 4.7838, "step": 1148 }, { "epoch": 0.11913757460679546, "grad_norm": 0.8828125, "learning_rate": 0.00019923562494744611, "loss": 4.7803, "step": 1149 }, { "epoch": 0.11924126266128353, "grad_norm": 0.59375, "learning_rate": 0.0001992342838882997, "loss": 4.7226, "step": 1150 }, { "epoch": 0.1193449507157716, "grad_norm": 0.68359375, "learning_rate": 0.00019923294165829364, "loss": 4.7638, "step": 1151 }, { "epoch": 0.11944863877025967, "grad_norm": 0.7890625, "learning_rate": 0.00019923159825744376, "loss": 4.7377, "step": 1152 }, { "epoch": 0.11955232682474774, "grad_norm": 0.71484375, "learning_rate": 0.0001992302536857659, "loss": 4.7789, "step": 1153 }, { "epoch": 0.11965601487923581, "grad_norm": 0.59375, "learning_rate": 0.00019922890794327595, "loss": 4.7689, "step": 1154 }, { "epoch": 0.11975970293372389, "grad_norm": 0.7265625, "learning_rate": 0.00019922756102998975, "loss": 4.7883, "step": 1155 }, { "epoch": 0.11986339098821197, "grad_norm": 0.65234375, "learning_rate": 0.00019922621294592326, "loss": 4.7345, "step": 1156 }, { "epoch": 0.11996707904270004, "grad_norm": 0.72265625, "learning_rate": 0.00019922486369109232, "loss": 4.7087, "step": 1157 }, { "epoch": 0.12007076709718811, "grad_norm": 0.875, "learning_rate": 0.00019922351326551288, "loss": 4.764, "step": 1158 }, { "epoch": 0.12017445515167618, "grad_norm": 0.84765625, "learning_rate": 0.00019922216166920088, "loss": 4.751, "step": 1159 }, { "epoch": 0.12027814320616426, "grad_norm": 0.6875, "learning_rate": 0.00019922080890217222, "loss": 4.7546, "step": 1160 }, { "epoch": 0.12038183126065233, "grad_norm": 0.61328125, "learning_rate": 0.00019921945496444293, "loss": 4.7621, "step": 1161 }, { "epoch": 0.1204855193151404, "grad_norm": 0.70703125, "learning_rate": 0.00019921809985602894, "loss": 4.729, "step": 1162 }, { "epoch": 0.12058920736962847, "grad_norm": 0.71875, "learning_rate": 0.00019921674357694624, "loss": 4.8148, "step": 1163 }, { "epoch": 0.12069289542411654, "grad_norm": 0.60546875, "learning_rate": 0.00019921538612721084, "loss": 4.7172, "step": 1164 }, { "epoch": 0.12079658347860461, "grad_norm": 0.83984375, "learning_rate": 0.0001992140275068388, "loss": 4.7725, "step": 1165 }, { "epoch": 0.12090027153309268, "grad_norm": 0.7578125, "learning_rate": 0.00019921266771584604, "loss": 4.7636, "step": 1166 }, { "epoch": 0.12100395958758077, "grad_norm": 0.609375, "learning_rate": 0.0001992113067542487, "loss": 4.7482, "step": 1167 }, { "epoch": 0.12110764764206884, "grad_norm": 0.72265625, "learning_rate": 0.00019920994462206283, "loss": 4.7445, "step": 1168 }, { "epoch": 0.12121133569655691, "grad_norm": 0.76171875, "learning_rate": 0.0001992085813193045, "loss": 4.8108, "step": 1169 }, { "epoch": 0.12131502375104498, "grad_norm": 0.6875, "learning_rate": 0.00019920721684598975, "loss": 4.7589, "step": 1170 }, { "epoch": 0.12141871180553306, "grad_norm": 0.5703125, "learning_rate": 0.0001992058512021347, "loss": 4.7789, "step": 1171 }, { "epoch": 0.12152239986002113, "grad_norm": 0.6171875, "learning_rate": 0.0001992044843877555, "loss": 4.757, "step": 1172 }, { "epoch": 0.1216260879145092, "grad_norm": 0.7421875, "learning_rate": 0.00019920311640286823, "loss": 4.7454, "step": 1173 }, { "epoch": 0.12172977596899727, "grad_norm": 0.66796875, "learning_rate": 0.00019920174724748902, "loss": 4.7181, "step": 1174 }, { "epoch": 0.12183346402348534, "grad_norm": 0.55859375, "learning_rate": 0.00019920037692163409, "loss": 4.7726, "step": 1175 }, { "epoch": 0.12193715207797341, "grad_norm": 0.609375, "learning_rate": 0.00019919900542531956, "loss": 4.7689, "step": 1176 }, { "epoch": 0.12204084013246148, "grad_norm": 0.66015625, "learning_rate": 0.00019919763275856164, "loss": 4.7677, "step": 1177 }, { "epoch": 0.12214452818694957, "grad_norm": 0.671875, "learning_rate": 0.0001991962589213765, "loss": 4.77, "step": 1178 }, { "epoch": 0.12224821624143764, "grad_norm": 0.55078125, "learning_rate": 0.00019919488391378034, "loss": 4.7638, "step": 1179 }, { "epoch": 0.12235190429592571, "grad_norm": 0.6171875, "learning_rate": 0.0001991935077357894, "loss": 4.7877, "step": 1180 }, { "epoch": 0.12245559235041378, "grad_norm": 0.609375, "learning_rate": 0.00019919213038741996, "loss": 4.7555, "step": 1181 }, { "epoch": 0.12255928040490185, "grad_norm": 0.71484375, "learning_rate": 0.00019919075186868824, "loss": 4.7536, "step": 1182 }, { "epoch": 0.12266296845938993, "grad_norm": 0.703125, "learning_rate": 0.00019918937217961043, "loss": 4.7577, "step": 1183 }, { "epoch": 0.122766656513878, "grad_norm": 0.578125, "learning_rate": 0.0001991879913202029, "loss": 4.7453, "step": 1184 }, { "epoch": 0.12287034456836607, "grad_norm": 0.5625, "learning_rate": 0.00019918660929048196, "loss": 4.8053, "step": 1185 }, { "epoch": 0.12297403262285414, "grad_norm": 0.61328125, "learning_rate": 0.00019918522609046387, "loss": 4.7143, "step": 1186 }, { "epoch": 0.12307772067734221, "grad_norm": 0.609375, "learning_rate": 0.00019918384172016494, "loss": 4.7592, "step": 1187 }, { "epoch": 0.12318140873183028, "grad_norm": 0.6484375, "learning_rate": 0.0001991824561796015, "loss": 4.7616, "step": 1188 }, { "epoch": 0.12328509678631835, "grad_norm": 0.65625, "learning_rate": 0.00019918106946878995, "loss": 4.7261, "step": 1189 }, { "epoch": 0.12338878484080644, "grad_norm": 0.68359375, "learning_rate": 0.00019917968158774657, "loss": 4.7459, "step": 1190 }, { "epoch": 0.12349247289529451, "grad_norm": 0.71484375, "learning_rate": 0.00019917829253648784, "loss": 4.7121, "step": 1191 }, { "epoch": 0.12359616094978258, "grad_norm": 0.66796875, "learning_rate": 0.00019917690231503006, "loss": 4.7695, "step": 1192 }, { "epoch": 0.12369984900427065, "grad_norm": 0.5703125, "learning_rate": 0.00019917551092338967, "loss": 4.768, "step": 1193 }, { "epoch": 0.12380353705875873, "grad_norm": 0.61328125, "learning_rate": 0.00019917411836158308, "loss": 4.7858, "step": 1194 }, { "epoch": 0.1239072251132468, "grad_norm": 0.69140625, "learning_rate": 0.00019917272462962674, "loss": 4.776, "step": 1195 }, { "epoch": 0.12401091316773487, "grad_norm": 0.63671875, "learning_rate": 0.0001991713297275371, "loss": 4.77, "step": 1196 }, { "epoch": 0.12411460122222294, "grad_norm": 0.6640625, "learning_rate": 0.00019916993365533056, "loss": 4.7629, "step": 1197 }, { "epoch": 0.12421828927671101, "grad_norm": 0.76953125, "learning_rate": 0.00019916853641302365, "loss": 4.7412, "step": 1198 }, { "epoch": 0.12432197733119908, "grad_norm": 0.8671875, "learning_rate": 0.0001991671380006328, "loss": 4.7251, "step": 1199 }, { "epoch": 0.12442566538568715, "grad_norm": 0.75, "learning_rate": 0.0001991657384181746, "loss": 4.7247, "step": 1200 }, { "epoch": 0.12452935344017524, "grad_norm": 0.79296875, "learning_rate": 0.00019916433766566547, "loss": 4.744, "step": 1201 }, { "epoch": 0.12463304149466331, "grad_norm": 0.73046875, "learning_rate": 0.000199162935743122, "loss": 4.7852, "step": 1202 }, { "epoch": 0.12473672954915138, "grad_norm": 0.80859375, "learning_rate": 0.0001991615326505607, "loss": 4.76, "step": 1203 }, { "epoch": 0.12484041760363945, "grad_norm": 0.95703125, "learning_rate": 0.00019916012838799813, "loss": 4.7716, "step": 1204 }, { "epoch": 0.12494410565812752, "grad_norm": 1.0546875, "learning_rate": 0.00019915872295545086, "loss": 4.7749, "step": 1205 }, { "epoch": 0.1250477937126156, "grad_norm": 1.0, "learning_rate": 0.00019915731635293548, "loss": 4.7514, "step": 1206 }, { "epoch": 0.12515148176710367, "grad_norm": 1.1484375, "learning_rate": 0.00019915590858046858, "loss": 4.765, "step": 1207 }, { "epoch": 0.12525516982159174, "grad_norm": 0.90625, "learning_rate": 0.00019915449963806676, "loss": 4.7111, "step": 1208 }, { "epoch": 0.1253588578760798, "grad_norm": 1.4140625, "learning_rate": 0.0001991530895257467, "loss": 4.806, "step": 1209 }, { "epoch": 0.12546254593056788, "grad_norm": 0.734375, "learning_rate": 0.00019915167824352493, "loss": 4.7864, "step": 1210 }, { "epoch": 0.12556623398505595, "grad_norm": 1.28125, "learning_rate": 0.0001991502657914182, "loss": 4.7845, "step": 1211 }, { "epoch": 0.12566992203954402, "grad_norm": 1.078125, "learning_rate": 0.00019914885216944312, "loss": 4.7299, "step": 1212 }, { "epoch": 0.1257736100940321, "grad_norm": 1.25, "learning_rate": 0.0001991474373776164, "loss": 4.7712, "step": 1213 }, { "epoch": 0.12587729814852017, "grad_norm": 0.74609375, "learning_rate": 0.00019914602141595476, "loss": 4.7366, "step": 1214 }, { "epoch": 0.12598098620300824, "grad_norm": 1.40625, "learning_rate": 0.0001991446042844748, "loss": 4.7595, "step": 1215 }, { "epoch": 0.1260846742574963, "grad_norm": 0.828125, "learning_rate": 0.00019914318598319338, "loss": 4.7051, "step": 1216 }, { "epoch": 0.12618836231198438, "grad_norm": 1.609375, "learning_rate": 0.0001991417665121271, "loss": 4.7561, "step": 1217 }, { "epoch": 0.12629205036647248, "grad_norm": 1.09375, "learning_rate": 0.0001991403458712928, "loss": 4.7613, "step": 1218 }, { "epoch": 0.12639573842096055, "grad_norm": 2.3125, "learning_rate": 0.00019913892406070723, "loss": 4.7776, "step": 1219 }, { "epoch": 0.12649942647544862, "grad_norm": 2.0625, "learning_rate": 0.00019913750108038715, "loss": 4.7397, "step": 1220 }, { "epoch": 0.1266031145299367, "grad_norm": 1.4140625, "learning_rate": 0.00019913607693034934, "loss": 4.7404, "step": 1221 }, { "epoch": 0.12670680258442477, "grad_norm": 1.4921875, "learning_rate": 0.0001991346516106106, "loss": 4.7672, "step": 1222 }, { "epoch": 0.12681049063891284, "grad_norm": 1.2734375, "learning_rate": 0.00019913322512118774, "loss": 4.7471, "step": 1223 }, { "epoch": 0.1269141786934009, "grad_norm": 1.578125, "learning_rate": 0.00019913179746209765, "loss": 4.8062, "step": 1224 }, { "epoch": 0.12701786674788898, "grad_norm": 1.2890625, "learning_rate": 0.00019913036863335713, "loss": 4.776, "step": 1225 }, { "epoch": 0.12712155480237705, "grad_norm": 1.5390625, "learning_rate": 0.00019912893863498305, "loss": 4.7452, "step": 1226 }, { "epoch": 0.12722524285686512, "grad_norm": 1.3125, "learning_rate": 0.00019912750746699226, "loss": 4.7816, "step": 1227 }, { "epoch": 0.1273289309113532, "grad_norm": 1.234375, "learning_rate": 0.0001991260751294017, "loss": 4.719, "step": 1228 }, { "epoch": 0.12743261896584127, "grad_norm": 1.21875, "learning_rate": 0.00019912464162222818, "loss": 4.7678, "step": 1229 }, { "epoch": 0.12753630702032934, "grad_norm": 1.140625, "learning_rate": 0.0001991232069454887, "loss": 4.7393, "step": 1230 }, { "epoch": 0.1276399950748174, "grad_norm": 0.9453125, "learning_rate": 0.00019912177109920016, "loss": 4.7238, "step": 1231 }, { "epoch": 0.12774368312930548, "grad_norm": 1.1328125, "learning_rate": 0.0001991203340833795, "loss": 4.7208, "step": 1232 }, { "epoch": 0.12784737118379355, "grad_norm": 1.0546875, "learning_rate": 0.00019911889589804366, "loss": 4.7451, "step": 1233 }, { "epoch": 0.12795105923828162, "grad_norm": 0.90234375, "learning_rate": 0.00019911745654320963, "loss": 4.7744, "step": 1234 }, { "epoch": 0.1280547472927697, "grad_norm": 1.1015625, "learning_rate": 0.00019911601601889438, "loss": 4.7203, "step": 1235 }, { "epoch": 0.12815843534725777, "grad_norm": 0.78515625, "learning_rate": 0.0001991145743251149, "loss": 4.7109, "step": 1236 }, { "epoch": 0.12826212340174584, "grad_norm": 0.984375, "learning_rate": 0.00019911313146188823, "loss": 4.7083, "step": 1237 }, { "epoch": 0.1283658114562339, "grad_norm": 1.1875, "learning_rate": 0.00019911168742923138, "loss": 4.711, "step": 1238 }, { "epoch": 0.12846949951072198, "grad_norm": 0.82421875, "learning_rate": 0.00019911024222716138, "loss": 4.754, "step": 1239 }, { "epoch": 0.12857318756521008, "grad_norm": 0.83984375, "learning_rate": 0.0001991087958556953, "loss": 4.7559, "step": 1240 }, { "epoch": 0.12867687561969815, "grad_norm": 0.75, "learning_rate": 0.00019910734831485015, "loss": 4.7452, "step": 1241 }, { "epoch": 0.12878056367418622, "grad_norm": 0.69921875, "learning_rate": 0.00019910589960464304, "loss": 4.7699, "step": 1242 }, { "epoch": 0.1288842517286743, "grad_norm": 0.80078125, "learning_rate": 0.00019910444972509112, "loss": 4.7216, "step": 1243 }, { "epoch": 0.12898793978316236, "grad_norm": 0.67578125, "learning_rate": 0.00019910299867621146, "loss": 4.7461, "step": 1244 }, { "epoch": 0.12909162783765044, "grad_norm": 0.70703125, "learning_rate": 0.00019910154645802112, "loss": 4.7232, "step": 1245 }, { "epoch": 0.1291953158921385, "grad_norm": 0.82421875, "learning_rate": 0.00019910009307053735, "loss": 4.7075, "step": 1246 }, { "epoch": 0.12929900394662658, "grad_norm": 0.71484375, "learning_rate": 0.00019909863851377718, "loss": 4.7002, "step": 1247 }, { "epoch": 0.12940269200111465, "grad_norm": 0.73828125, "learning_rate": 0.00019909718278775785, "loss": 4.734, "step": 1248 }, { "epoch": 0.12950638005560272, "grad_norm": 0.6015625, "learning_rate": 0.0001990957258924965, "loss": 4.731, "step": 1249 }, { "epoch": 0.1296100681100908, "grad_norm": 0.640625, "learning_rate": 0.00019909426782801037, "loss": 4.7217, "step": 1250 }, { "epoch": 0.12971375616457886, "grad_norm": 0.67578125, "learning_rate": 0.00019909280859431658, "loss": 4.723, "step": 1251 }, { "epoch": 0.12981744421906694, "grad_norm": 0.59765625, "learning_rate": 0.00019909134819143243, "loss": 4.7431, "step": 1252 }, { "epoch": 0.129921132273555, "grad_norm": 0.6328125, "learning_rate": 0.00019908988661937512, "loss": 4.7613, "step": 1253 }, { "epoch": 0.13002482032804308, "grad_norm": 0.66796875, "learning_rate": 0.00019908842387816183, "loss": 4.7186, "step": 1254 }, { "epoch": 0.13012850838253115, "grad_norm": 0.578125, "learning_rate": 0.00019908695996780993, "loss": 4.7484, "step": 1255 }, { "epoch": 0.13023219643701922, "grad_norm": 0.56640625, "learning_rate": 0.00019908549488833663, "loss": 4.7378, "step": 1256 }, { "epoch": 0.1303358844915073, "grad_norm": 0.6640625, "learning_rate": 0.00019908402863975925, "loss": 4.7338, "step": 1257 }, { "epoch": 0.13043957254599536, "grad_norm": 0.515625, "learning_rate": 0.00019908256122209506, "loss": 4.7363, "step": 1258 }, { "epoch": 0.13054326060048344, "grad_norm": 0.64453125, "learning_rate": 0.00019908109263536134, "loss": 4.7391, "step": 1259 }, { "epoch": 0.1306469486549715, "grad_norm": 0.6015625, "learning_rate": 0.00019907962287957548, "loss": 4.7361, "step": 1260 }, { "epoch": 0.13075063670945958, "grad_norm": 0.6328125, "learning_rate": 0.00019907815195475484, "loss": 4.7187, "step": 1261 }, { "epoch": 0.13085432476394768, "grad_norm": 0.5390625, "learning_rate": 0.0001990766798609167, "loss": 4.7298, "step": 1262 }, { "epoch": 0.13095801281843575, "grad_norm": 0.61328125, "learning_rate": 0.0001990752065980785, "loss": 4.7505, "step": 1263 }, { "epoch": 0.13106170087292382, "grad_norm": 0.57421875, "learning_rate": 0.00019907373216625755, "loss": 4.6917, "step": 1264 }, { "epoch": 0.1311653889274119, "grad_norm": 0.466796875, "learning_rate": 0.0001990722565654713, "loss": 4.7232, "step": 1265 }, { "epoch": 0.13126907698189996, "grad_norm": 0.578125, "learning_rate": 0.00019907077979573713, "loss": 4.722, "step": 1266 }, { "epoch": 0.13137276503638803, "grad_norm": 0.54296875, "learning_rate": 0.0001990693018570725, "loss": 4.734, "step": 1267 }, { "epoch": 0.1314764530908761, "grad_norm": 0.51953125, "learning_rate": 0.00019906782274949482, "loss": 4.7565, "step": 1268 }, { "epoch": 0.13158014114536418, "grad_norm": 0.58984375, "learning_rate": 0.0001990663424730216, "loss": 4.7149, "step": 1269 }, { "epoch": 0.13168382919985225, "grad_norm": 0.5390625, "learning_rate": 0.0001990648610276702, "loss": 4.7146, "step": 1270 }, { "epoch": 0.13178751725434032, "grad_norm": 0.546875, "learning_rate": 0.00019906337841345818, "loss": 4.7258, "step": 1271 }, { "epoch": 0.1318912053088284, "grad_norm": 0.498046875, "learning_rate": 0.00019906189463040299, "loss": 4.7316, "step": 1272 }, { "epoch": 0.13199489336331646, "grad_norm": 0.5859375, "learning_rate": 0.00019906040967852215, "loss": 4.7284, "step": 1273 }, { "epoch": 0.13209858141780453, "grad_norm": 0.59375, "learning_rate": 0.0001990589235578332, "loss": 4.7609, "step": 1274 }, { "epoch": 0.1322022694722926, "grad_norm": 0.63671875, "learning_rate": 0.00019905743626835368, "loss": 4.7654, "step": 1275 }, { "epoch": 0.13230595752678068, "grad_norm": 0.59765625, "learning_rate": 0.0001990559478101011, "loss": 4.7341, "step": 1276 }, { "epoch": 0.13240964558126875, "grad_norm": 0.6640625, "learning_rate": 0.00019905445818309305, "loss": 4.7512, "step": 1277 }, { "epoch": 0.13251333363575682, "grad_norm": 0.57421875, "learning_rate": 0.00019905296738734709, "loss": 4.6994, "step": 1278 }, { "epoch": 0.1326170216902449, "grad_norm": 0.69921875, "learning_rate": 0.00019905147542288086, "loss": 4.7447, "step": 1279 }, { "epoch": 0.13272070974473296, "grad_norm": 0.8046875, "learning_rate": 0.0001990499822897119, "loss": 4.7525, "step": 1280 }, { "epoch": 0.13282439779922103, "grad_norm": 0.73046875, "learning_rate": 0.00019904848798785781, "loss": 4.7446, "step": 1281 }, { "epoch": 0.1329280858537091, "grad_norm": 0.671875, "learning_rate": 0.00019904699251733628, "loss": 4.7214, "step": 1282 }, { "epoch": 0.13303177390819718, "grad_norm": 0.6484375, "learning_rate": 0.00019904549587816494, "loss": 4.7364, "step": 1283 }, { "epoch": 0.13313546196268525, "grad_norm": 0.7265625, "learning_rate": 0.00019904399807036145, "loss": 4.7707, "step": 1284 }, { "epoch": 0.13323915001717335, "grad_norm": 0.73046875, "learning_rate": 0.00019904249909394347, "loss": 4.7137, "step": 1285 }, { "epoch": 0.13334283807166142, "grad_norm": 0.671875, "learning_rate": 0.0001990409989489287, "loss": 4.7243, "step": 1286 }, { "epoch": 0.1334465261261495, "grad_norm": 0.625, "learning_rate": 0.00019903949763533483, "loss": 4.7646, "step": 1287 }, { "epoch": 0.13355021418063756, "grad_norm": 0.6015625, "learning_rate": 0.00019903799515317956, "loss": 4.6683, "step": 1288 }, { "epoch": 0.13365390223512563, "grad_norm": 0.55859375, "learning_rate": 0.00019903649150248068, "loss": 4.7398, "step": 1289 }, { "epoch": 0.1337575902896137, "grad_norm": 0.62109375, "learning_rate": 0.00019903498668325583, "loss": 4.7181, "step": 1290 }, { "epoch": 0.13386127834410178, "grad_norm": 0.50390625, "learning_rate": 0.00019903348069552285, "loss": 4.7208, "step": 1291 }, { "epoch": 0.13396496639858985, "grad_norm": 0.5703125, "learning_rate": 0.0001990319735392995, "loss": 4.7317, "step": 1292 }, { "epoch": 0.13406865445307792, "grad_norm": 0.67578125, "learning_rate": 0.00019903046521460352, "loss": 4.7149, "step": 1293 }, { "epoch": 0.134172342507566, "grad_norm": 0.5234375, "learning_rate": 0.00019902895572145274, "loss": 4.6968, "step": 1294 }, { "epoch": 0.13427603056205406, "grad_norm": 0.55078125, "learning_rate": 0.00019902744505986494, "loss": 4.7335, "step": 1295 }, { "epoch": 0.13437971861654213, "grad_norm": 0.60546875, "learning_rate": 0.00019902593322985797, "loss": 4.6848, "step": 1296 }, { "epoch": 0.1344834066710302, "grad_norm": 0.57421875, "learning_rate": 0.0001990244202314497, "loss": 4.7019, "step": 1297 }, { "epoch": 0.13458709472551827, "grad_norm": 0.6328125, "learning_rate": 0.0001990229060646579, "loss": 4.7149, "step": 1298 }, { "epoch": 0.13469078278000635, "grad_norm": 0.75390625, "learning_rate": 0.0001990213907295005, "loss": 4.7474, "step": 1299 }, { "epoch": 0.13479447083449442, "grad_norm": 0.6796875, "learning_rate": 0.00019901987422599535, "loss": 4.694, "step": 1300 }, { "epoch": 0.1348981588889825, "grad_norm": 0.6015625, "learning_rate": 0.00019901835655416038, "loss": 4.677, "step": 1301 }, { "epoch": 0.13500184694347056, "grad_norm": 0.63671875, "learning_rate": 0.00019901683771401344, "loss": 4.7183, "step": 1302 }, { "epoch": 0.13510553499795863, "grad_norm": 0.62890625, "learning_rate": 0.0001990153177055725, "loss": 4.7433, "step": 1303 }, { "epoch": 0.1352092230524467, "grad_norm": 0.65234375, "learning_rate": 0.00019901379652885543, "loss": 4.7238, "step": 1304 }, { "epoch": 0.13531291110693477, "grad_norm": 0.69921875, "learning_rate": 0.00019901227418388028, "loss": 4.7238, "step": 1305 }, { "epoch": 0.13541659916142285, "grad_norm": 0.53125, "learning_rate": 0.00019901075067066493, "loss": 4.7488, "step": 1306 }, { "epoch": 0.13552028721591095, "grad_norm": 0.6953125, "learning_rate": 0.00019900922598922738, "loss": 4.7578, "step": 1307 }, { "epoch": 0.13562397527039902, "grad_norm": 0.74609375, "learning_rate": 0.00019900770013958562, "loss": 4.7248, "step": 1308 }, { "epoch": 0.1357276633248871, "grad_norm": 0.6015625, "learning_rate": 0.00019900617312175768, "loss": 4.6949, "step": 1309 }, { "epoch": 0.13583135137937516, "grad_norm": 0.5625, "learning_rate": 0.00019900464493576153, "loss": 4.7147, "step": 1310 }, { "epoch": 0.13593503943386323, "grad_norm": 0.63671875, "learning_rate": 0.00019900311558161522, "loss": 4.7156, "step": 1311 }, { "epoch": 0.1360387274883513, "grad_norm": 0.64453125, "learning_rate": 0.00019900158505933678, "loss": 4.7342, "step": 1312 }, { "epoch": 0.13614241554283937, "grad_norm": 0.55078125, "learning_rate": 0.0001990000533689443, "loss": 4.7456, "step": 1313 }, { "epoch": 0.13624610359732744, "grad_norm": 0.6796875, "learning_rate": 0.00019899852051045587, "loss": 4.7326, "step": 1314 }, { "epoch": 0.13634979165181552, "grad_norm": 0.71484375, "learning_rate": 0.00019899698648388948, "loss": 4.7254, "step": 1315 }, { "epoch": 0.1364534797063036, "grad_norm": 0.5703125, "learning_rate": 0.00019899545128926333, "loss": 4.7298, "step": 1316 }, { "epoch": 0.13655716776079166, "grad_norm": 0.5625, "learning_rate": 0.00019899391492659551, "loss": 4.7243, "step": 1317 }, { "epoch": 0.13666085581527973, "grad_norm": 0.59375, "learning_rate": 0.00019899237739590413, "loss": 4.723, "step": 1318 }, { "epoch": 0.1367645438697678, "grad_norm": 0.54296875, "learning_rate": 0.00019899083869720735, "loss": 4.7191, "step": 1319 }, { "epoch": 0.13686823192425587, "grad_norm": 0.546875, "learning_rate": 0.00019898929883052326, "loss": 4.6991, "step": 1320 }, { "epoch": 0.13697191997874394, "grad_norm": 0.71875, "learning_rate": 0.0001989877577958701, "loss": 4.6526, "step": 1321 }, { "epoch": 0.13707560803323202, "grad_norm": 0.5625, "learning_rate": 0.00019898621559326607, "loss": 4.6876, "step": 1322 }, { "epoch": 0.1371792960877201, "grad_norm": 0.51953125, "learning_rate": 0.00019898467222272928, "loss": 4.6841, "step": 1323 }, { "epoch": 0.13728298414220816, "grad_norm": 0.80078125, "learning_rate": 0.00019898312768427802, "loss": 4.677, "step": 1324 }, { "epoch": 0.13738667219669623, "grad_norm": 0.83203125, "learning_rate": 0.00019898158197793046, "loss": 4.7136, "step": 1325 }, { "epoch": 0.1374903602511843, "grad_norm": 0.6484375, "learning_rate": 0.00019898003510370488, "loss": 4.6611, "step": 1326 }, { "epoch": 0.13759404830567237, "grad_norm": 0.7109375, "learning_rate": 0.0001989784870616195, "loss": 4.6735, "step": 1327 }, { "epoch": 0.13769773636016044, "grad_norm": 0.8515625, "learning_rate": 0.00019897693785169261, "loss": 4.7221, "step": 1328 }, { "epoch": 0.13780142441464854, "grad_norm": 0.79296875, "learning_rate": 0.00019897538747394247, "loss": 4.6936, "step": 1329 }, { "epoch": 0.13790511246913661, "grad_norm": 0.64453125, "learning_rate": 0.00019897383592838738, "loss": 4.7377, "step": 1330 }, { "epoch": 0.1380088005236247, "grad_norm": 0.6796875, "learning_rate": 0.00019897228321504563, "loss": 4.7499, "step": 1331 }, { "epoch": 0.13811248857811276, "grad_norm": 0.9609375, "learning_rate": 0.00019897072933393559, "loss": 4.705, "step": 1332 }, { "epoch": 0.13821617663260083, "grad_norm": 1.0390625, "learning_rate": 0.00019896917428507553, "loss": 4.719, "step": 1333 }, { "epoch": 0.1383198646870889, "grad_norm": 1.0546875, "learning_rate": 0.00019896761806848385, "loss": 4.7109, "step": 1334 }, { "epoch": 0.13842355274157697, "grad_norm": 1.109375, "learning_rate": 0.00019896606068417887, "loss": 4.7314, "step": 1335 }, { "epoch": 0.13852724079606504, "grad_norm": 1.140625, "learning_rate": 0.000198964502132179, "loss": 4.7162, "step": 1336 }, { "epoch": 0.13863092885055311, "grad_norm": 0.77734375, "learning_rate": 0.00019896294241250262, "loss": 4.7474, "step": 1337 }, { "epoch": 0.13873461690504119, "grad_norm": 0.67578125, "learning_rate": 0.00019896138152516812, "loss": 4.6801, "step": 1338 }, { "epoch": 0.13883830495952926, "grad_norm": 0.86328125, "learning_rate": 0.00019895981947019392, "loss": 4.6727, "step": 1339 }, { "epoch": 0.13894199301401733, "grad_norm": 1.09375, "learning_rate": 0.00019895825624759845, "loss": 4.7344, "step": 1340 }, { "epoch": 0.1390456810685054, "grad_norm": 0.9453125, "learning_rate": 0.00019895669185740017, "loss": 4.737, "step": 1341 }, { "epoch": 0.13914936912299347, "grad_norm": 1.015625, "learning_rate": 0.00019895512629961753, "loss": 4.6886, "step": 1342 }, { "epoch": 0.13925305717748154, "grad_norm": 0.88671875, "learning_rate": 0.000198953559574269, "loss": 4.658, "step": 1343 }, { "epoch": 0.13935674523196961, "grad_norm": 0.57421875, "learning_rate": 0.00019895199168137306, "loss": 4.6986, "step": 1344 }, { "epoch": 0.13946043328645769, "grad_norm": 0.78515625, "learning_rate": 0.0001989504226209482, "loss": 4.7303, "step": 1345 }, { "epoch": 0.13956412134094576, "grad_norm": 1.0859375, "learning_rate": 0.00019894885239301298, "loss": 4.6842, "step": 1346 }, { "epoch": 0.13966780939543383, "grad_norm": 0.78125, "learning_rate": 0.0001989472809975859, "loss": 4.6811, "step": 1347 }, { "epoch": 0.1397714974499219, "grad_norm": 0.72265625, "learning_rate": 0.00019894570843468544, "loss": 4.713, "step": 1348 }, { "epoch": 0.13987518550440997, "grad_norm": 0.98828125, "learning_rate": 0.00019894413470433026, "loss": 4.6777, "step": 1349 }, { "epoch": 0.13997887355889804, "grad_norm": 0.875, "learning_rate": 0.00019894255980653887, "loss": 4.7175, "step": 1350 }, { "epoch": 0.14008256161338611, "grad_norm": 0.953125, "learning_rate": 0.00019894098374132987, "loss": 4.7008, "step": 1351 }, { "epoch": 0.1401862496678742, "grad_norm": 1.359375, "learning_rate": 0.0001989394065087218, "loss": 4.76, "step": 1352 }, { "epoch": 0.14028993772236228, "grad_norm": 0.74609375, "learning_rate": 0.00019893782810873338, "loss": 4.7276, "step": 1353 }, { "epoch": 0.14039362577685036, "grad_norm": 1.4296875, "learning_rate": 0.00019893624854138312, "loss": 4.7048, "step": 1354 }, { "epoch": 0.14049731383133843, "grad_norm": 0.8046875, "learning_rate": 0.00019893466780668972, "loss": 4.6713, "step": 1355 }, { "epoch": 0.1406010018858265, "grad_norm": 1.4609375, "learning_rate": 0.00019893308590467185, "loss": 4.7421, "step": 1356 }, { "epoch": 0.14070468994031457, "grad_norm": 0.88671875, "learning_rate": 0.0001989315028353481, "loss": 4.7306, "step": 1357 }, { "epoch": 0.14080837799480264, "grad_norm": 1.7109375, "learning_rate": 0.00019892991859873723, "loss": 4.7135, "step": 1358 }, { "epoch": 0.1409120660492907, "grad_norm": 1.3125, "learning_rate": 0.00019892833319485787, "loss": 4.7376, "step": 1359 }, { "epoch": 0.14101575410377878, "grad_norm": 2.25, "learning_rate": 0.00019892674662372876, "loss": 4.7339, "step": 1360 }, { "epoch": 0.14111944215826686, "grad_norm": 2.078125, "learning_rate": 0.0001989251588853686, "loss": 4.717, "step": 1361 }, { "epoch": 0.14122313021275493, "grad_norm": 1.359375, "learning_rate": 0.00019892356997979613, "loss": 4.7236, "step": 1362 }, { "epoch": 0.141326818267243, "grad_norm": 1.59375, "learning_rate": 0.0001989219799070301, "loss": 4.6997, "step": 1363 }, { "epoch": 0.14143050632173107, "grad_norm": 1.2578125, "learning_rate": 0.00019892038866708932, "loss": 4.6979, "step": 1364 }, { "epoch": 0.14153419437621914, "grad_norm": 2.1875, "learning_rate": 0.00019891879625999245, "loss": 4.6499, "step": 1365 }, { "epoch": 0.1416378824307072, "grad_norm": 1.7890625, "learning_rate": 0.00019891720268575837, "loss": 4.7374, "step": 1366 }, { "epoch": 0.14174157048519528, "grad_norm": 2.109375, "learning_rate": 0.00019891560794440587, "loss": 4.6959, "step": 1367 }, { "epoch": 0.14184525853968336, "grad_norm": 1.3828125, "learning_rate": 0.00019891401203595374, "loss": 4.7039, "step": 1368 }, { "epoch": 0.14194894659417143, "grad_norm": 2.609375, "learning_rate": 0.00019891241496042082, "loss": 4.7391, "step": 1369 }, { "epoch": 0.1420526346486595, "grad_norm": 2.40625, "learning_rate": 0.000198910816717826, "loss": 4.7635, "step": 1370 }, { "epoch": 0.14215632270314757, "grad_norm": 1.6171875, "learning_rate": 0.00019890921730818806, "loss": 4.7121, "step": 1371 }, { "epoch": 0.14226001075763564, "grad_norm": 1.6171875, "learning_rate": 0.00019890761673152591, "loss": 4.7292, "step": 1372 }, { "epoch": 0.1423636988121237, "grad_norm": 1.453125, "learning_rate": 0.00019890601498785844, "loss": 4.71, "step": 1373 }, { "epoch": 0.1424673868666118, "grad_norm": 1.421875, "learning_rate": 0.00019890441207720454, "loss": 4.7202, "step": 1374 }, { "epoch": 0.14257107492109988, "grad_norm": 1.25, "learning_rate": 0.0001989028079995831, "loss": 4.7506, "step": 1375 }, { "epoch": 0.14267476297558795, "grad_norm": 1.5390625, "learning_rate": 0.00019890120275501308, "loss": 4.684, "step": 1376 }, { "epoch": 0.14277845103007603, "grad_norm": 1.0703125, "learning_rate": 0.00019889959634351344, "loss": 4.736, "step": 1377 }, { "epoch": 0.1428821390845641, "grad_norm": 2.40625, "learning_rate": 0.0001988979887651031, "loss": 4.7732, "step": 1378 }, { "epoch": 0.14298582713905217, "grad_norm": 2.03125, "learning_rate": 0.00019889638001980103, "loss": 4.7442, "step": 1379 }, { "epoch": 0.14308951519354024, "grad_norm": 1.7890625, "learning_rate": 0.00019889477010762618, "loss": 4.6948, "step": 1380 }, { "epoch": 0.1431932032480283, "grad_norm": 1.53125, "learning_rate": 0.00019889315902859762, "loss": 4.7068, "step": 1381 }, { "epoch": 0.14329689130251638, "grad_norm": 1.7265625, "learning_rate": 0.0001988915467827343, "loss": 4.7583, "step": 1382 }, { "epoch": 0.14340057935700445, "grad_norm": 1.3359375, "learning_rate": 0.00019888993337005526, "loss": 4.7032, "step": 1383 }, { "epoch": 0.14350426741149253, "grad_norm": 1.5859375, "learning_rate": 0.00019888831879057953, "loss": 4.7053, "step": 1384 }, { "epoch": 0.1436079554659806, "grad_norm": 1.0859375, "learning_rate": 0.00019888670304432619, "loss": 4.6836, "step": 1385 }, { "epoch": 0.14371164352046867, "grad_norm": 1.8828125, "learning_rate": 0.00019888508613131426, "loss": 4.7571, "step": 1386 }, { "epoch": 0.14381533157495674, "grad_norm": 1.3515625, "learning_rate": 0.00019888346805156283, "loss": 4.6962, "step": 1387 }, { "epoch": 0.1439190196294448, "grad_norm": 2.1875, "learning_rate": 0.00019888184880509103, "loss": 4.7161, "step": 1388 }, { "epoch": 0.14402270768393288, "grad_norm": 2.03125, "learning_rate": 0.00019888022839191792, "loss": 4.704, "step": 1389 }, { "epoch": 0.14412639573842095, "grad_norm": 1.5390625, "learning_rate": 0.00019887860681206266, "loss": 4.7299, "step": 1390 }, { "epoch": 0.14423008379290903, "grad_norm": 1.5390625, "learning_rate": 0.00019887698406554431, "loss": 4.7452, "step": 1391 }, { "epoch": 0.1443337718473971, "grad_norm": 1.28125, "learning_rate": 0.00019887536015238212, "loss": 4.7157, "step": 1392 }, { "epoch": 0.14443745990188517, "grad_norm": 1.171875, "learning_rate": 0.00019887373507259518, "loss": 4.6886, "step": 1393 }, { "epoch": 0.14454114795637324, "grad_norm": 1.2734375, "learning_rate": 0.00019887210882620266, "loss": 4.7412, "step": 1394 }, { "epoch": 0.1446448360108613, "grad_norm": 1.09375, "learning_rate": 0.00019887048141322376, "loss": 4.7263, "step": 1395 }, { "epoch": 0.1447485240653494, "grad_norm": 1.109375, "learning_rate": 0.00019886885283367772, "loss": 4.7014, "step": 1396 }, { "epoch": 0.14485221211983748, "grad_norm": 0.77734375, "learning_rate": 0.00019886722308758373, "loss": 4.6781, "step": 1397 }, { "epoch": 0.14495590017432555, "grad_norm": 1.125, "learning_rate": 0.00019886559217496098, "loss": 4.6926, "step": 1398 }, { "epoch": 0.14505958822881362, "grad_norm": 0.84375, "learning_rate": 0.00019886396009582876, "loss": 4.7069, "step": 1399 }, { "epoch": 0.1451632762833017, "grad_norm": 0.78515625, "learning_rate": 0.00019886232685020633, "loss": 4.6784, "step": 1400 }, { "epoch": 0.14526696433778977, "grad_norm": 0.87109375, "learning_rate": 0.00019886069243811293, "loss": 4.6576, "step": 1401 }, { "epoch": 0.14537065239227784, "grad_norm": 0.7421875, "learning_rate": 0.0001988590568595679, "loss": 4.6882, "step": 1402 }, { "epoch": 0.1454743404467659, "grad_norm": 0.703125, "learning_rate": 0.00019885742011459045, "loss": 4.7041, "step": 1403 }, { "epoch": 0.14557802850125398, "grad_norm": 0.70703125, "learning_rate": 0.00019885578220319995, "loss": 4.714, "step": 1404 }, { "epoch": 0.14568171655574205, "grad_norm": 0.6953125, "learning_rate": 0.00019885414312541573, "loss": 4.6943, "step": 1405 }, { "epoch": 0.14578540461023012, "grad_norm": 0.61328125, "learning_rate": 0.00019885250288125713, "loss": 4.6866, "step": 1406 }, { "epoch": 0.1458890926647182, "grad_norm": 0.71875, "learning_rate": 0.00019885086147074344, "loss": 4.6937, "step": 1407 }, { "epoch": 0.14599278071920627, "grad_norm": 0.63671875, "learning_rate": 0.0001988492188938941, "loss": 4.7627, "step": 1408 }, { "epoch": 0.14609646877369434, "grad_norm": 0.640625, "learning_rate": 0.00019884757515072844, "loss": 4.7113, "step": 1409 }, { "epoch": 0.1462001568281824, "grad_norm": 0.640625, "learning_rate": 0.00019884593024126592, "loss": 4.6828, "step": 1410 }, { "epoch": 0.14630384488267048, "grad_norm": 0.56640625, "learning_rate": 0.0001988442841655259, "loss": 4.7033, "step": 1411 }, { "epoch": 0.14640753293715855, "grad_norm": 0.68359375, "learning_rate": 0.00019884263692352777, "loss": 4.7208, "step": 1412 }, { "epoch": 0.14651122099164662, "grad_norm": 0.59765625, "learning_rate": 0.00019884098851529104, "loss": 4.7189, "step": 1413 }, { "epoch": 0.1466149090461347, "grad_norm": 0.65625, "learning_rate": 0.00019883933894083514, "loss": 4.6782, "step": 1414 }, { "epoch": 0.14671859710062277, "grad_norm": 0.67578125, "learning_rate": 0.00019883768820017948, "loss": 4.7375, "step": 1415 }, { "epoch": 0.14682228515511084, "grad_norm": 0.68359375, "learning_rate": 0.0001988360362933436, "loss": 4.7187, "step": 1416 }, { "epoch": 0.1469259732095989, "grad_norm": 0.61328125, "learning_rate": 0.00019883438322034695, "loss": 4.7011, "step": 1417 }, { "epoch": 0.14702966126408698, "grad_norm": 0.54296875, "learning_rate": 0.00019883272898120905, "loss": 4.7445, "step": 1418 }, { "epoch": 0.14713334931857508, "grad_norm": 0.65234375, "learning_rate": 0.00019883107357594943, "loss": 4.724, "step": 1419 }, { "epoch": 0.14723703737306315, "grad_norm": 0.625, "learning_rate": 0.0001988294170045876, "loss": 4.6845, "step": 1420 }, { "epoch": 0.14734072542755122, "grad_norm": 0.6015625, "learning_rate": 0.00019882775926714313, "loss": 4.7319, "step": 1421 }, { "epoch": 0.1474444134820393, "grad_norm": 0.6328125, "learning_rate": 0.00019882610036363557, "loss": 4.71, "step": 1422 }, { "epoch": 0.14754810153652737, "grad_norm": 0.828125, "learning_rate": 0.00019882444029408448, "loss": 4.7078, "step": 1423 }, { "epoch": 0.14765178959101544, "grad_norm": 0.6640625, "learning_rate": 0.00019882277905850946, "loss": 4.6876, "step": 1424 }, { "epoch": 0.1477554776455035, "grad_norm": 0.58984375, "learning_rate": 0.00019882111665693011, "loss": 4.6975, "step": 1425 }, { "epoch": 0.14785916569999158, "grad_norm": 0.77734375, "learning_rate": 0.00019881945308936603, "loss": 4.6452, "step": 1426 }, { "epoch": 0.14796285375447965, "grad_norm": 0.75390625, "learning_rate": 0.00019881778835583686, "loss": 4.7131, "step": 1427 }, { "epoch": 0.14806654180896772, "grad_norm": 0.54296875, "learning_rate": 0.00019881612245636226, "loss": 4.681, "step": 1428 }, { "epoch": 0.1481702298634558, "grad_norm": 0.66015625, "learning_rate": 0.00019881445539096185, "loss": 4.7163, "step": 1429 }, { "epoch": 0.14827391791794386, "grad_norm": 0.73828125, "learning_rate": 0.00019881278715965534, "loss": 4.7312, "step": 1430 }, { "epoch": 0.14837760597243194, "grad_norm": 0.6484375, "learning_rate": 0.00019881111776246234, "loss": 4.7217, "step": 1431 }, { "epoch": 0.14848129402692, "grad_norm": 0.74609375, "learning_rate": 0.00019880944719940263, "loss": 4.7345, "step": 1432 }, { "epoch": 0.14858498208140808, "grad_norm": 0.71484375, "learning_rate": 0.0001988077754704959, "loss": 4.6661, "step": 1433 }, { "epoch": 0.14868867013589615, "grad_norm": 0.68359375, "learning_rate": 0.0001988061025757619, "loss": 4.662, "step": 1434 }, { "epoch": 0.14879235819038422, "grad_norm": 0.94921875, "learning_rate": 0.00019880442851522029, "loss": 4.7057, "step": 1435 }, { "epoch": 0.1488960462448723, "grad_norm": 0.89453125, "learning_rate": 0.00019880275328889083, "loss": 4.6554, "step": 1436 }, { "epoch": 0.14899973429936036, "grad_norm": 0.76953125, "learning_rate": 0.00019880107689679337, "loss": 4.673, "step": 1437 }, { "epoch": 0.14910342235384844, "grad_norm": 0.6796875, "learning_rate": 0.00019879939933894762, "loss": 4.6325, "step": 1438 }, { "epoch": 0.1492071104083365, "grad_norm": 0.8515625, "learning_rate": 0.0001987977206153734, "loss": 4.7093, "step": 1439 }, { "epoch": 0.14931079846282458, "grad_norm": 0.71875, "learning_rate": 0.0001987960407260905, "loss": 4.6711, "step": 1440 }, { "epoch": 0.14941448651731268, "grad_norm": 0.6484375, "learning_rate": 0.00019879435967111876, "loss": 4.6034, "step": 1441 }, { "epoch": 0.14951817457180075, "grad_norm": 0.71875, "learning_rate": 0.000198792677450478, "loss": 4.713, "step": 1442 }, { "epoch": 0.14962186262628882, "grad_norm": 0.84375, "learning_rate": 0.00019879099406418807, "loss": 4.7042, "step": 1443 }, { "epoch": 0.1497255506807769, "grad_norm": 0.5703125, "learning_rate": 0.00019878930951226887, "loss": 4.6731, "step": 1444 }, { "epoch": 0.14982923873526496, "grad_norm": 0.69140625, "learning_rate": 0.00019878762379474022, "loss": 4.6662, "step": 1445 }, { "epoch": 0.14993292678975303, "grad_norm": 0.71484375, "learning_rate": 0.00019878593691162203, "loss": 4.6774, "step": 1446 }, { "epoch": 0.1500366148442411, "grad_norm": 0.78515625, "learning_rate": 0.00019878424886293422, "loss": 4.6983, "step": 1447 }, { "epoch": 0.15014030289872918, "grad_norm": 0.70703125, "learning_rate": 0.00019878255964869666, "loss": 4.6673, "step": 1448 }, { "epoch": 0.15024399095321725, "grad_norm": 0.62890625, "learning_rate": 0.00019878086926892934, "loss": 4.6991, "step": 1449 }, { "epoch": 0.15034767900770532, "grad_norm": 0.671875, "learning_rate": 0.00019877917772365215, "loss": 4.7173, "step": 1450 }, { "epoch": 0.1504513670621934, "grad_norm": 0.796875, "learning_rate": 0.0001987774850128851, "loss": 4.7385, "step": 1451 }, { "epoch": 0.15055505511668146, "grad_norm": 0.68359375, "learning_rate": 0.00019877579113664816, "loss": 4.7129, "step": 1452 }, { "epoch": 0.15065874317116953, "grad_norm": 0.64453125, "learning_rate": 0.00019877409609496126, "loss": 4.6592, "step": 1453 }, { "epoch": 0.1507624312256576, "grad_norm": 0.69140625, "learning_rate": 0.00019877239988784444, "loss": 4.7102, "step": 1454 }, { "epoch": 0.15086611928014568, "grad_norm": 0.640625, "learning_rate": 0.00019877070251531772, "loss": 4.7239, "step": 1455 }, { "epoch": 0.15096980733463375, "grad_norm": 0.66015625, "learning_rate": 0.0001987690039774011, "loss": 4.6529, "step": 1456 }, { "epoch": 0.15107349538912182, "grad_norm": 0.98828125, "learning_rate": 0.00019876730427411467, "loss": 4.7155, "step": 1457 }, { "epoch": 0.1511771834436099, "grad_norm": 1.109375, "learning_rate": 0.00019876560340547844, "loss": 4.7025, "step": 1458 }, { "epoch": 0.15128087149809796, "grad_norm": 0.69921875, "learning_rate": 0.00019876390137151247, "loss": 4.6933, "step": 1459 }, { "epoch": 0.15138455955258603, "grad_norm": 0.67578125, "learning_rate": 0.00019876219817223687, "loss": 4.6941, "step": 1460 }, { "epoch": 0.1514882476070741, "grad_norm": 0.8046875, "learning_rate": 0.00019876049380767173, "loss": 4.6765, "step": 1461 }, { "epoch": 0.15159193566156218, "grad_norm": 0.87109375, "learning_rate": 0.00019875878827783713, "loss": 4.6645, "step": 1462 }, { "epoch": 0.15169562371605028, "grad_norm": 0.93359375, "learning_rate": 0.00019875708158275326, "loss": 4.6656, "step": 1463 }, { "epoch": 0.15179931177053835, "grad_norm": 0.7265625, "learning_rate": 0.00019875537372244023, "loss": 4.6743, "step": 1464 }, { "epoch": 0.15190299982502642, "grad_norm": 0.5625, "learning_rate": 0.00019875366469691814, "loss": 4.7085, "step": 1465 }, { "epoch": 0.1520066878795145, "grad_norm": 0.703125, "learning_rate": 0.0001987519545062072, "loss": 4.7214, "step": 1466 }, { "epoch": 0.15211037593400256, "grad_norm": 0.73828125, "learning_rate": 0.0001987502431503276, "loss": 4.6776, "step": 1467 }, { "epoch": 0.15221406398849063, "grad_norm": 0.6015625, "learning_rate": 0.0001987485306292995, "loss": 4.7105, "step": 1468 }, { "epoch": 0.1523177520429787, "grad_norm": 0.63671875, "learning_rate": 0.0001987468169431431, "loss": 4.685, "step": 1469 }, { "epoch": 0.15242144009746678, "grad_norm": 0.59765625, "learning_rate": 0.0001987451020918787, "loss": 4.6538, "step": 1470 }, { "epoch": 0.15252512815195485, "grad_norm": 0.58984375, "learning_rate": 0.00019874338607552642, "loss": 4.6843, "step": 1471 }, { "epoch": 0.15262881620644292, "grad_norm": 0.62109375, "learning_rate": 0.00019874166889410658, "loss": 4.6666, "step": 1472 }, { "epoch": 0.152732504260931, "grad_norm": 0.546875, "learning_rate": 0.0001987399505476394, "loss": 4.6916, "step": 1473 }, { "epoch": 0.15283619231541906, "grad_norm": 0.6171875, "learning_rate": 0.0001987382310361452, "loss": 4.6385, "step": 1474 }, { "epoch": 0.15293988036990713, "grad_norm": 0.6328125, "learning_rate": 0.00019873651035964425, "loss": 4.6691, "step": 1475 }, { "epoch": 0.1530435684243952, "grad_norm": 0.482421875, "learning_rate": 0.00019873478851815683, "loss": 4.6766, "step": 1476 }, { "epoch": 0.15314725647888328, "grad_norm": 0.5546875, "learning_rate": 0.00019873306551170328, "loss": 4.6835, "step": 1477 }, { "epoch": 0.15325094453337135, "grad_norm": 0.58203125, "learning_rate": 0.00019873134134030393, "loss": 4.6331, "step": 1478 }, { "epoch": 0.15335463258785942, "grad_norm": 0.4609375, "learning_rate": 0.0001987296160039791, "loss": 4.6557, "step": 1479 }, { "epoch": 0.1534583206423475, "grad_norm": 0.5390625, "learning_rate": 0.00019872788950274918, "loss": 4.6338, "step": 1480 }, { "epoch": 0.15356200869683556, "grad_norm": 0.5234375, "learning_rate": 0.00019872616183663451, "loss": 4.6594, "step": 1481 }, { "epoch": 0.15366569675132363, "grad_norm": 0.609375, "learning_rate": 0.00019872443300565548, "loss": 4.6934, "step": 1482 }, { "epoch": 0.1537693848058117, "grad_norm": 0.55859375, "learning_rate": 0.0001987227030098325, "loss": 4.664, "step": 1483 }, { "epoch": 0.15387307286029978, "grad_norm": 0.46875, "learning_rate": 0.000198720971849186, "loss": 4.6917, "step": 1484 }, { "epoch": 0.15397676091478785, "grad_norm": 0.58984375, "learning_rate": 0.00019871923952373635, "loss": 4.6441, "step": 1485 }, { "epoch": 0.15408044896927595, "grad_norm": 0.5078125, "learning_rate": 0.00019871750603350408, "loss": 4.6871, "step": 1486 }, { "epoch": 0.15418413702376402, "grad_norm": 0.57421875, "learning_rate": 0.00019871577137850954, "loss": 4.6767, "step": 1487 }, { "epoch": 0.1542878250782521, "grad_norm": 0.61328125, "learning_rate": 0.00019871403555877327, "loss": 4.6942, "step": 1488 }, { "epoch": 0.15439151313274016, "grad_norm": 0.6015625, "learning_rate": 0.00019871229857431572, "loss": 4.6879, "step": 1489 }, { "epoch": 0.15449520118722823, "grad_norm": 0.57421875, "learning_rate": 0.0001987105604251574, "loss": 4.6799, "step": 1490 }, { "epoch": 0.1545988892417163, "grad_norm": 0.5234375, "learning_rate": 0.0001987088211113188, "loss": 4.6938, "step": 1491 }, { "epoch": 0.15470257729620437, "grad_norm": 0.6171875, "learning_rate": 0.00019870708063282044, "loss": 4.6613, "step": 1492 }, { "epoch": 0.15480626535069245, "grad_norm": 0.61328125, "learning_rate": 0.00019870533898968287, "loss": 4.6938, "step": 1493 }, { "epoch": 0.15490995340518052, "grad_norm": 0.51171875, "learning_rate": 0.00019870359618192663, "loss": 4.6946, "step": 1494 }, { "epoch": 0.1550136414596686, "grad_norm": 0.59375, "learning_rate": 0.0001987018522095723, "loss": 4.6413, "step": 1495 }, { "epoch": 0.15511732951415666, "grad_norm": 0.5625, "learning_rate": 0.00019870010707264045, "loss": 4.6721, "step": 1496 }, { "epoch": 0.15522101756864473, "grad_norm": 0.474609375, "learning_rate": 0.00019869836077115164, "loss": 4.7004, "step": 1497 }, { "epoch": 0.1553247056231328, "grad_norm": 0.52734375, "learning_rate": 0.00019869661330512654, "loss": 4.6585, "step": 1498 }, { "epoch": 0.15542839367762087, "grad_norm": 0.5390625, "learning_rate": 0.0001986948646745857, "loss": 4.6284, "step": 1499 }, { "epoch": 0.15553208173210895, "grad_norm": 0.474609375, "learning_rate": 0.0001986931148795498, "loss": 4.6635, "step": 1500 }, { "epoch": 0.15563576978659702, "grad_norm": 0.5390625, "learning_rate": 0.00019869136392003945, "loss": 4.6898, "step": 1501 }, { "epoch": 0.1557394578410851, "grad_norm": 0.51953125, "learning_rate": 0.00019868961179607536, "loss": 4.7494, "step": 1502 }, { "epoch": 0.15584314589557316, "grad_norm": 0.5, "learning_rate": 0.00019868785850767813, "loss": 4.6805, "step": 1503 }, { "epoch": 0.15594683395006123, "grad_norm": 0.478515625, "learning_rate": 0.0001986861040548685, "loss": 4.6183, "step": 1504 }, { "epoch": 0.1560505220045493, "grad_norm": 0.498046875, "learning_rate": 0.00019868434843766717, "loss": 4.6631, "step": 1505 }, { "epoch": 0.15615421005903737, "grad_norm": 0.48046875, "learning_rate": 0.00019868259165609482, "loss": 4.6455, "step": 1506 }, { "epoch": 0.15625789811352545, "grad_norm": 0.486328125, "learning_rate": 0.00019868083371017223, "loss": 4.6403, "step": 1507 }, { "epoch": 0.15636158616801354, "grad_norm": 0.5234375, "learning_rate": 0.0001986790745999201, "loss": 4.6782, "step": 1508 }, { "epoch": 0.15646527422250162, "grad_norm": 0.54296875, "learning_rate": 0.00019867731432535922, "loss": 4.6598, "step": 1509 }, { "epoch": 0.1565689622769897, "grad_norm": 0.578125, "learning_rate": 0.0001986755528865103, "loss": 4.6547, "step": 1510 }, { "epoch": 0.15667265033147776, "grad_norm": 0.53125, "learning_rate": 0.00019867379028339416, "loss": 4.6414, "step": 1511 }, { "epoch": 0.15677633838596583, "grad_norm": 0.56640625, "learning_rate": 0.0001986720265160316, "loss": 4.6567, "step": 1512 }, { "epoch": 0.1568800264404539, "grad_norm": 0.70703125, "learning_rate": 0.00019867026158444344, "loss": 4.6425, "step": 1513 }, { "epoch": 0.15698371449494197, "grad_norm": 0.66015625, "learning_rate": 0.0001986684954886505, "loss": 4.6946, "step": 1514 }, { "epoch": 0.15708740254943004, "grad_norm": 0.6328125, "learning_rate": 0.0001986667282286736, "loss": 4.693, "step": 1515 }, { "epoch": 0.15719109060391812, "grad_norm": 0.65625, "learning_rate": 0.0001986649598045336, "loss": 4.673, "step": 1516 }, { "epoch": 0.1572947786584062, "grad_norm": 0.640625, "learning_rate": 0.00019866319021625138, "loss": 4.6624, "step": 1517 }, { "epoch": 0.15739846671289426, "grad_norm": 0.75390625, "learning_rate": 0.0001986614194638478, "loss": 4.6387, "step": 1518 }, { "epoch": 0.15750215476738233, "grad_norm": 0.84765625, "learning_rate": 0.00019865964754734377, "loss": 4.6426, "step": 1519 }, { "epoch": 0.1576058428218704, "grad_norm": 0.65625, "learning_rate": 0.00019865787446676016, "loss": 4.6452, "step": 1520 }, { "epoch": 0.15770953087635847, "grad_norm": 0.65234375, "learning_rate": 0.00019865610022211795, "loss": 4.6182, "step": 1521 }, { "epoch": 0.15781321893084654, "grad_norm": 0.8359375, "learning_rate": 0.000198654324813438, "loss": 4.6999, "step": 1522 }, { "epoch": 0.15791690698533462, "grad_norm": 0.890625, "learning_rate": 0.00019865254824074136, "loss": 4.6654, "step": 1523 }, { "epoch": 0.1580205950398227, "grad_norm": 0.87890625, "learning_rate": 0.0001986507705040489, "loss": 4.7078, "step": 1524 }, { "epoch": 0.15812428309431076, "grad_norm": 0.65234375, "learning_rate": 0.0001986489916033816, "loss": 4.7079, "step": 1525 }, { "epoch": 0.15822797114879883, "grad_norm": 0.8125, "learning_rate": 0.0001986472115387605, "loss": 4.6934, "step": 1526 }, { "epoch": 0.1583316592032869, "grad_norm": 1.015625, "learning_rate": 0.00019864543031020658, "loss": 4.6808, "step": 1527 }, { "epoch": 0.15843534725777497, "grad_norm": 0.75390625, "learning_rate": 0.00019864364791774084, "loss": 4.6659, "step": 1528 }, { "epoch": 0.15853903531226304, "grad_norm": 0.6953125, "learning_rate": 0.00019864186436138433, "loss": 4.6941, "step": 1529 }, { "epoch": 0.15864272336675114, "grad_norm": 0.70703125, "learning_rate": 0.0001986400796411581, "loss": 4.682, "step": 1530 }, { "epoch": 0.15874641142123921, "grad_norm": 0.734375, "learning_rate": 0.0001986382937570832, "loss": 4.6594, "step": 1531 }, { "epoch": 0.15885009947572729, "grad_norm": 0.79296875, "learning_rate": 0.0001986365067091807, "loss": 4.681, "step": 1532 }, { "epoch": 0.15895378753021536, "grad_norm": 0.73828125, "learning_rate": 0.00019863471849747167, "loss": 4.7219, "step": 1533 }, { "epoch": 0.15905747558470343, "grad_norm": 0.828125, "learning_rate": 0.0001986329291219772, "loss": 4.6721, "step": 1534 }, { "epoch": 0.1591611636391915, "grad_norm": 0.6171875, "learning_rate": 0.00019863113858271846, "loss": 4.6567, "step": 1535 }, { "epoch": 0.15926485169367957, "grad_norm": 0.63671875, "learning_rate": 0.00019862934687971655, "loss": 4.6987, "step": 1536 }, { "epoch": 0.15936853974816764, "grad_norm": 0.78125, "learning_rate": 0.00019862755401299257, "loss": 4.6722, "step": 1537 }, { "epoch": 0.1594722278026557, "grad_norm": 0.85546875, "learning_rate": 0.00019862575998256773, "loss": 4.6555, "step": 1538 }, { "epoch": 0.15957591585714379, "grad_norm": 0.82421875, "learning_rate": 0.00019862396478846316, "loss": 4.6425, "step": 1539 }, { "epoch": 0.15967960391163186, "grad_norm": 0.796875, "learning_rate": 0.0001986221684307001, "loss": 4.6574, "step": 1540 }, { "epoch": 0.15978329196611993, "grad_norm": 0.765625, "learning_rate": 0.00019862037090929966, "loss": 4.6743, "step": 1541 }, { "epoch": 0.159886980020608, "grad_norm": 0.8046875, "learning_rate": 0.00019861857222428308, "loss": 4.6539, "step": 1542 }, { "epoch": 0.15999066807509607, "grad_norm": 0.80859375, "learning_rate": 0.00019861677237567162, "loss": 4.6581, "step": 1543 }, { "epoch": 0.16009435612958414, "grad_norm": 0.7890625, "learning_rate": 0.00019861497136348648, "loss": 4.6538, "step": 1544 }, { "epoch": 0.1601980441840722, "grad_norm": 0.77734375, "learning_rate": 0.0001986131691877489, "loss": 4.6637, "step": 1545 }, { "epoch": 0.16030173223856028, "grad_norm": 0.703125, "learning_rate": 0.00019861136584848019, "loss": 4.6621, "step": 1546 }, { "epoch": 0.16040542029304836, "grad_norm": 0.7578125, "learning_rate": 0.0001986095613457016, "loss": 4.6788, "step": 1547 }, { "epoch": 0.16050910834753643, "grad_norm": 0.84765625, "learning_rate": 0.0001986077556794344, "loss": 4.6588, "step": 1548 }, { "epoch": 0.1606127964020245, "grad_norm": 0.93359375, "learning_rate": 0.00019860594884969993, "loss": 4.6647, "step": 1549 }, { "epoch": 0.16071648445651257, "grad_norm": 1.0625, "learning_rate": 0.0001986041408565195, "loss": 4.6423, "step": 1550 }, { "epoch": 0.16082017251100064, "grad_norm": 0.890625, "learning_rate": 0.0001986023316999144, "loss": 4.6748, "step": 1551 }, { "epoch": 0.1609238605654887, "grad_norm": 0.6953125, "learning_rate": 0.00019860052137990605, "loss": 4.6763, "step": 1552 }, { "epoch": 0.1610275486199768, "grad_norm": 0.84765625, "learning_rate": 0.00019859870989651576, "loss": 4.6478, "step": 1553 }, { "epoch": 0.16113123667446488, "grad_norm": 0.96875, "learning_rate": 0.00019859689724976488, "loss": 4.6574, "step": 1554 }, { "epoch": 0.16123492472895296, "grad_norm": 0.953125, "learning_rate": 0.0001985950834396749, "loss": 4.6907, "step": 1555 }, { "epoch": 0.16133861278344103, "grad_norm": 0.91796875, "learning_rate": 0.00019859326846626712, "loss": 4.6621, "step": 1556 }, { "epoch": 0.1614423008379291, "grad_norm": 0.83984375, "learning_rate": 0.000198591452329563, "loss": 4.6589, "step": 1557 }, { "epoch": 0.16154598889241717, "grad_norm": 0.69921875, "learning_rate": 0.00019858963502958395, "loss": 4.7071, "step": 1558 }, { "epoch": 0.16164967694690524, "grad_norm": 0.71875, "learning_rate": 0.00019858781656635142, "loss": 4.6513, "step": 1559 }, { "epoch": 0.1617533650013933, "grad_norm": 0.94140625, "learning_rate": 0.00019858599693988688, "loss": 4.6913, "step": 1560 }, { "epoch": 0.16185705305588138, "grad_norm": 1.2265625, "learning_rate": 0.00019858417615021176, "loss": 4.6505, "step": 1561 }, { "epoch": 0.16196074111036945, "grad_norm": 0.765625, "learning_rate": 0.00019858235419734758, "loss": 4.6975, "step": 1562 }, { "epoch": 0.16206442916485753, "grad_norm": 1.0546875, "learning_rate": 0.00019858053108131585, "loss": 4.6967, "step": 1563 }, { "epoch": 0.1621681172193456, "grad_norm": 1.203125, "learning_rate": 0.00019857870680213804, "loss": 4.6619, "step": 1564 }, { "epoch": 0.16227180527383367, "grad_norm": 0.81640625, "learning_rate": 0.0001985768813598357, "loss": 4.6586, "step": 1565 }, { "epoch": 0.16237549332832174, "grad_norm": 0.8984375, "learning_rate": 0.00019857505475443033, "loss": 4.6451, "step": 1566 }, { "epoch": 0.1624791813828098, "grad_norm": 0.9921875, "learning_rate": 0.00019857322698594353, "loss": 4.6634, "step": 1567 }, { "epoch": 0.16258286943729788, "grad_norm": 0.87890625, "learning_rate": 0.00019857139805439688, "loss": 4.6611, "step": 1568 }, { "epoch": 0.16268655749178595, "grad_norm": 0.671875, "learning_rate": 0.0001985695679598119, "loss": 4.6922, "step": 1569 }, { "epoch": 0.16279024554627403, "grad_norm": 0.8359375, "learning_rate": 0.0001985677367022102, "loss": 4.5804, "step": 1570 }, { "epoch": 0.1628939336007621, "grad_norm": 0.9140625, "learning_rate": 0.00019856590428161342, "loss": 4.6266, "step": 1571 }, { "epoch": 0.16299762165525017, "grad_norm": 0.85546875, "learning_rate": 0.00019856407069804316, "loss": 4.6377, "step": 1572 }, { "epoch": 0.16310130970973824, "grad_norm": 0.83203125, "learning_rate": 0.00019856223595152104, "loss": 4.6668, "step": 1573 }, { "epoch": 0.1632049977642263, "grad_norm": 0.77734375, "learning_rate": 0.0001985604000420687, "loss": 4.6548, "step": 1574 }, { "epoch": 0.1633086858187144, "grad_norm": 0.73046875, "learning_rate": 0.00019855856296970784, "loss": 4.7052, "step": 1575 }, { "epoch": 0.16341237387320248, "grad_norm": 0.69140625, "learning_rate": 0.00019855672473446012, "loss": 4.6898, "step": 1576 }, { "epoch": 0.16351606192769055, "grad_norm": 0.796875, "learning_rate": 0.00019855488533634724, "loss": 4.6651, "step": 1577 }, { "epoch": 0.16361974998217862, "grad_norm": 0.79296875, "learning_rate": 0.00019855304477539085, "loss": 4.6913, "step": 1578 }, { "epoch": 0.1637234380366667, "grad_norm": 0.828125, "learning_rate": 0.00019855120305161273, "loss": 4.6939, "step": 1579 }, { "epoch": 0.16382712609115477, "grad_norm": 0.7734375, "learning_rate": 0.0001985493601650346, "loss": 4.677, "step": 1580 }, { "epoch": 0.16393081414564284, "grad_norm": 0.69921875, "learning_rate": 0.0001985475161156782, "loss": 4.6388, "step": 1581 }, { "epoch": 0.1640345022001309, "grad_norm": 0.7421875, "learning_rate": 0.00019854567090356526, "loss": 4.6185, "step": 1582 }, { "epoch": 0.16413819025461898, "grad_norm": 0.90625, "learning_rate": 0.00019854382452871757, "loss": 4.6091, "step": 1583 }, { "epoch": 0.16424187830910705, "grad_norm": 0.86328125, "learning_rate": 0.00019854197699115692, "loss": 4.683, "step": 1584 }, { "epoch": 0.16434556636359512, "grad_norm": 0.6875, "learning_rate": 0.0001985401282909051, "loss": 4.6445, "step": 1585 }, { "epoch": 0.1644492544180832, "grad_norm": 0.7890625, "learning_rate": 0.00019853827842798393, "loss": 4.6586, "step": 1586 }, { "epoch": 0.16455294247257127, "grad_norm": 0.63671875, "learning_rate": 0.00019853642740241522, "loss": 4.6508, "step": 1587 }, { "epoch": 0.16465663052705934, "grad_norm": 0.7265625, "learning_rate": 0.00019853457521422084, "loss": 4.6936, "step": 1588 }, { "epoch": 0.1647603185815474, "grad_norm": 0.6015625, "learning_rate": 0.00019853272186342262, "loss": 4.639, "step": 1589 }, { "epoch": 0.16486400663603548, "grad_norm": 0.59765625, "learning_rate": 0.00019853086735004247, "loss": 4.6583, "step": 1590 }, { "epoch": 0.16496769469052355, "grad_norm": 0.72265625, "learning_rate": 0.0001985290116741022, "loss": 4.6363, "step": 1591 }, { "epoch": 0.16507138274501162, "grad_norm": 0.7421875, "learning_rate": 0.00019852715483562375, "loss": 4.6199, "step": 1592 }, { "epoch": 0.1651750707994997, "grad_norm": 0.7578125, "learning_rate": 0.00019852529683462902, "loss": 4.6458, "step": 1593 }, { "epoch": 0.16527875885398777, "grad_norm": 0.62109375, "learning_rate": 0.00019852343767113993, "loss": 4.6503, "step": 1594 }, { "epoch": 0.16538244690847584, "grad_norm": 0.66015625, "learning_rate": 0.00019852157734517843, "loss": 4.6741, "step": 1595 }, { "epoch": 0.1654861349629639, "grad_norm": 0.6484375, "learning_rate": 0.00019851971585676644, "loss": 4.6378, "step": 1596 }, { "epoch": 0.165589823017452, "grad_norm": 0.7109375, "learning_rate": 0.00019851785320592596, "loss": 4.6136, "step": 1597 }, { "epoch": 0.16569351107194008, "grad_norm": 0.70703125, "learning_rate": 0.00019851598939267894, "loss": 4.6617, "step": 1598 }, { "epoch": 0.16579719912642815, "grad_norm": 0.62890625, "learning_rate": 0.0001985141244170474, "loss": 4.67, "step": 1599 }, { "epoch": 0.16590088718091622, "grad_norm": 0.65234375, "learning_rate": 0.0001985122582790533, "loss": 4.6461, "step": 1600 }, { "epoch": 0.1660045752354043, "grad_norm": 0.69921875, "learning_rate": 0.00019851039097871872, "loss": 4.6634, "step": 1601 }, { "epoch": 0.16610826328989237, "grad_norm": 0.62890625, "learning_rate": 0.00019850852251606562, "loss": 4.6191, "step": 1602 }, { "epoch": 0.16621195134438044, "grad_norm": 0.8125, "learning_rate": 0.0001985066528911161, "loss": 4.6047, "step": 1603 }, { "epoch": 0.1663156393988685, "grad_norm": 0.83203125, "learning_rate": 0.00019850478210389218, "loss": 4.6388, "step": 1604 }, { "epoch": 0.16641932745335658, "grad_norm": 0.7578125, "learning_rate": 0.00019850291015441598, "loss": 4.6735, "step": 1605 }, { "epoch": 0.16652301550784465, "grad_norm": 0.73828125, "learning_rate": 0.00019850103704270957, "loss": 4.6201, "step": 1606 }, { "epoch": 0.16662670356233272, "grad_norm": 0.66015625, "learning_rate": 0.000198499162768795, "loss": 4.6704, "step": 1607 }, { "epoch": 0.1667303916168208, "grad_norm": 0.8203125, "learning_rate": 0.00019849728733269446, "loss": 4.6382, "step": 1608 }, { "epoch": 0.16683407967130887, "grad_norm": 0.97265625, "learning_rate": 0.00019849541073443006, "loss": 4.6594, "step": 1609 }, { "epoch": 0.16693776772579694, "grad_norm": 0.91015625, "learning_rate": 0.00019849353297402388, "loss": 4.6754, "step": 1610 }, { "epoch": 0.167041455780285, "grad_norm": 0.9453125, "learning_rate": 0.00019849165405149818, "loss": 4.6522, "step": 1611 }, { "epoch": 0.16714514383477308, "grad_norm": 1.0703125, "learning_rate": 0.00019848977396687504, "loss": 4.636, "step": 1612 }, { "epoch": 0.16724883188926115, "grad_norm": 0.96484375, "learning_rate": 0.00019848789272017668, "loss": 4.6058, "step": 1613 }, { "epoch": 0.16735251994374922, "grad_norm": 0.78515625, "learning_rate": 0.0001984860103114253, "loss": 4.6336, "step": 1614 }, { "epoch": 0.1674562079982373, "grad_norm": 0.75, "learning_rate": 0.00019848412674064306, "loss": 4.6957, "step": 1615 }, { "epoch": 0.16755989605272537, "grad_norm": 0.640625, "learning_rate": 0.00019848224200785228, "loss": 4.6593, "step": 1616 }, { "epoch": 0.16766358410721344, "grad_norm": 0.81640625, "learning_rate": 0.00019848035611307513, "loss": 4.6497, "step": 1617 }, { "epoch": 0.1677672721617015, "grad_norm": 1.1640625, "learning_rate": 0.00019847846905633385, "loss": 4.598, "step": 1618 }, { "epoch": 0.16787096021618958, "grad_norm": 0.90234375, "learning_rate": 0.00019847658083765076, "loss": 4.6646, "step": 1619 }, { "epoch": 0.16797464827067768, "grad_norm": 0.62890625, "learning_rate": 0.0001984746914570481, "loss": 4.6261, "step": 1620 }, { "epoch": 0.16807833632516575, "grad_norm": 0.7890625, "learning_rate": 0.0001984728009145482, "loss": 4.6637, "step": 1621 }, { "epoch": 0.16818202437965382, "grad_norm": 0.9609375, "learning_rate": 0.0001984709092101733, "loss": 4.6613, "step": 1622 }, { "epoch": 0.1682857124341419, "grad_norm": 0.81640625, "learning_rate": 0.00019846901634394576, "loss": 4.6403, "step": 1623 }, { "epoch": 0.16838940048862996, "grad_norm": 0.73828125, "learning_rate": 0.00019846712231588796, "loss": 4.647, "step": 1624 }, { "epoch": 0.16849308854311804, "grad_norm": 0.69140625, "learning_rate": 0.00019846522712602216, "loss": 4.6536, "step": 1625 }, { "epoch": 0.1685967765976061, "grad_norm": 0.75, "learning_rate": 0.00019846333077437077, "loss": 4.6393, "step": 1626 }, { "epoch": 0.16870046465209418, "grad_norm": 0.80859375, "learning_rate": 0.00019846143326095615, "loss": 4.6717, "step": 1627 }, { "epoch": 0.16880415270658225, "grad_norm": 0.69921875, "learning_rate": 0.0001984595345858007, "loss": 4.6327, "step": 1628 }, { "epoch": 0.16890784076107032, "grad_norm": 0.69921875, "learning_rate": 0.00019845763474892681, "loss": 4.6776, "step": 1629 }, { "epoch": 0.1690115288155584, "grad_norm": 0.66796875, "learning_rate": 0.00019845573375035694, "loss": 4.6468, "step": 1630 }, { "epoch": 0.16911521687004646, "grad_norm": 0.78515625, "learning_rate": 0.00019845383159011347, "loss": 4.6192, "step": 1631 }, { "epoch": 0.16921890492453454, "grad_norm": 0.7421875, "learning_rate": 0.00019845192826821884, "loss": 4.6868, "step": 1632 }, { "epoch": 0.1693225929790226, "grad_norm": 0.74609375, "learning_rate": 0.00019845002378469554, "loss": 4.6603, "step": 1633 }, { "epoch": 0.16942628103351068, "grad_norm": 0.83984375, "learning_rate": 0.000198448118139566, "loss": 4.6291, "step": 1634 }, { "epoch": 0.16952996908799875, "grad_norm": 0.6796875, "learning_rate": 0.00019844621133285276, "loss": 4.5767, "step": 1635 }, { "epoch": 0.16963365714248682, "grad_norm": 0.6875, "learning_rate": 0.0001984443033645783, "loss": 4.571, "step": 1636 }, { "epoch": 0.1697373451969749, "grad_norm": 0.75390625, "learning_rate": 0.00019844239423476507, "loss": 4.6399, "step": 1637 }, { "epoch": 0.16984103325146296, "grad_norm": 0.78515625, "learning_rate": 0.00019844048394343568, "loss": 4.6592, "step": 1638 }, { "epoch": 0.16994472130595104, "grad_norm": 0.6875, "learning_rate": 0.00019843857249061264, "loss": 4.6768, "step": 1639 }, { "epoch": 0.1700484093604391, "grad_norm": 0.7265625, "learning_rate": 0.00019843665987631849, "loss": 4.671, "step": 1640 }, { "epoch": 0.17015209741492718, "grad_norm": 0.71484375, "learning_rate": 0.00019843474610057576, "loss": 4.6523, "step": 1641 }, { "epoch": 0.17025578546941528, "grad_norm": 0.6953125, "learning_rate": 0.00019843283116340713, "loss": 4.6186, "step": 1642 }, { "epoch": 0.17035947352390335, "grad_norm": 0.6875, "learning_rate": 0.00019843091506483514, "loss": 4.6413, "step": 1643 }, { "epoch": 0.17046316157839142, "grad_norm": 0.73046875, "learning_rate": 0.00019842899780488237, "loss": 4.66, "step": 1644 }, { "epoch": 0.1705668496328795, "grad_norm": 0.796875, "learning_rate": 0.0001984270793835715, "loss": 4.6728, "step": 1645 }, { "epoch": 0.17067053768736756, "grad_norm": 0.59375, "learning_rate": 0.00019842515980092514, "loss": 4.6588, "step": 1646 }, { "epoch": 0.17077422574185563, "grad_norm": 0.73046875, "learning_rate": 0.00019842323905696588, "loss": 4.6629, "step": 1647 }, { "epoch": 0.1708779137963437, "grad_norm": 0.76171875, "learning_rate": 0.0001984213171517165, "loss": 4.6732, "step": 1648 }, { "epoch": 0.17098160185083178, "grad_norm": 0.73046875, "learning_rate": 0.00019841939408519958, "loss": 4.6467, "step": 1649 }, { "epoch": 0.17108528990531985, "grad_norm": 0.62109375, "learning_rate": 0.00019841746985743786, "loss": 4.6683, "step": 1650 }, { "epoch": 0.17118897795980792, "grad_norm": 0.625, "learning_rate": 0.00019841554446845404, "loss": 4.6328, "step": 1651 }, { "epoch": 0.171292666014296, "grad_norm": 0.73828125, "learning_rate": 0.0001984136179182708, "loss": 4.6829, "step": 1652 }, { "epoch": 0.17139635406878406, "grad_norm": 0.65234375, "learning_rate": 0.0001984116902069109, "loss": 4.6603, "step": 1653 }, { "epoch": 0.17150004212327213, "grad_norm": 0.6875, "learning_rate": 0.00019840976133439706, "loss": 4.6468, "step": 1654 }, { "epoch": 0.1716037301777602, "grad_norm": 0.77734375, "learning_rate": 0.00019840783130075206, "loss": 4.6419, "step": 1655 }, { "epoch": 0.17170741823224828, "grad_norm": 0.78125, "learning_rate": 0.0001984059001059987, "loss": 4.5958, "step": 1656 }, { "epoch": 0.17181110628673635, "grad_norm": 0.7734375, "learning_rate": 0.00019840396775015976, "loss": 4.6597, "step": 1657 }, { "epoch": 0.17191479434122442, "grad_norm": 0.8046875, "learning_rate": 0.000198402034233258, "loss": 4.6056, "step": 1658 }, { "epoch": 0.1720184823957125, "grad_norm": 0.875, "learning_rate": 0.00019840009955531622, "loss": 4.6602, "step": 1659 }, { "epoch": 0.17212217045020056, "grad_norm": 1.0234375, "learning_rate": 0.0001983981637163573, "loss": 4.6517, "step": 1660 }, { "epoch": 0.17222585850468863, "grad_norm": 0.80078125, "learning_rate": 0.00019839622671640405, "loss": 4.6581, "step": 1661 }, { "epoch": 0.1723295465591767, "grad_norm": 0.9921875, "learning_rate": 0.00019839428855547935, "loss": 4.6584, "step": 1662 }, { "epoch": 0.17243323461366478, "grad_norm": 0.91015625, "learning_rate": 0.00019839234923360604, "loss": 4.6582, "step": 1663 }, { "epoch": 0.17253692266815288, "grad_norm": 0.75390625, "learning_rate": 0.00019839040875080702, "loss": 4.6181, "step": 1664 }, { "epoch": 0.17264061072264095, "grad_norm": 0.76171875, "learning_rate": 0.0001983884671071052, "loss": 4.6752, "step": 1665 }, { "epoch": 0.17274429877712902, "grad_norm": 0.80078125, "learning_rate": 0.00019838652430252346, "loss": 4.6136, "step": 1666 }, { "epoch": 0.1728479868316171, "grad_norm": 0.953125, "learning_rate": 0.0001983845803370847, "loss": 4.6409, "step": 1667 }, { "epoch": 0.17295167488610516, "grad_norm": 0.90234375, "learning_rate": 0.00019838263521081191, "loss": 4.578, "step": 1668 }, { "epoch": 0.17305536294059323, "grad_norm": 0.73046875, "learning_rate": 0.000198380688923728, "loss": 4.6316, "step": 1669 }, { "epoch": 0.1731590509950813, "grad_norm": 0.8359375, "learning_rate": 0.000198378741475856, "loss": 4.6198, "step": 1670 }, { "epoch": 0.17326273904956938, "grad_norm": 0.8671875, "learning_rate": 0.0001983767928672188, "loss": 4.6566, "step": 1671 }, { "epoch": 0.17336642710405745, "grad_norm": 0.62890625, "learning_rate": 0.00019837484309783945, "loss": 4.6642, "step": 1672 }, { "epoch": 0.17347011515854552, "grad_norm": 0.69140625, "learning_rate": 0.00019837289216774093, "loss": 4.6507, "step": 1673 }, { "epoch": 0.1735738032130336, "grad_norm": 0.83203125, "learning_rate": 0.0001983709400769463, "loss": 4.5731, "step": 1674 }, { "epoch": 0.17367749126752166, "grad_norm": 0.765625, "learning_rate": 0.00019836898682547852, "loss": 4.6674, "step": 1675 }, { "epoch": 0.17378117932200973, "grad_norm": 0.69921875, "learning_rate": 0.00019836703241336067, "loss": 4.6437, "step": 1676 }, { "epoch": 0.1738848673764978, "grad_norm": 0.703125, "learning_rate": 0.00019836507684061584, "loss": 4.6252, "step": 1677 }, { "epoch": 0.17398855543098588, "grad_norm": 0.8125, "learning_rate": 0.00019836312010726708, "loss": 4.6167, "step": 1678 }, { "epoch": 0.17409224348547395, "grad_norm": 1.0703125, "learning_rate": 0.00019836116221333747, "loss": 4.6435, "step": 1679 }, { "epoch": 0.17419593153996202, "grad_norm": 0.93359375, "learning_rate": 0.0001983592031588501, "loss": 4.6064, "step": 1680 }, { "epoch": 0.1742996195944501, "grad_norm": 0.70703125, "learning_rate": 0.00019835724294382814, "loss": 4.5754, "step": 1681 }, { "epoch": 0.17440330764893816, "grad_norm": 0.6171875, "learning_rate": 0.00019835528156829466, "loss": 4.5984, "step": 1682 }, { "epoch": 0.17450699570342623, "grad_norm": 0.76171875, "learning_rate": 0.00019835331903227284, "loss": 4.6164, "step": 1683 }, { "epoch": 0.1746106837579143, "grad_norm": 0.84375, "learning_rate": 0.0001983513553357858, "loss": 4.6206, "step": 1684 }, { "epoch": 0.17471437181240237, "grad_norm": 0.68359375, "learning_rate": 0.00019834939047885675, "loss": 4.5944, "step": 1685 }, { "epoch": 0.17481805986689045, "grad_norm": 0.78515625, "learning_rate": 0.0001983474244615088, "loss": 4.6713, "step": 1686 }, { "epoch": 0.17492174792137855, "grad_norm": 0.8359375, "learning_rate": 0.00019834545728376527, "loss": 4.646, "step": 1687 }, { "epoch": 0.17502543597586662, "grad_norm": 0.71875, "learning_rate": 0.00019834348894564924, "loss": 4.6358, "step": 1688 }, { "epoch": 0.1751291240303547, "grad_norm": 0.66015625, "learning_rate": 0.00019834151944718404, "loss": 4.6095, "step": 1689 }, { "epoch": 0.17523281208484276, "grad_norm": 0.7890625, "learning_rate": 0.00019833954878839283, "loss": 4.6115, "step": 1690 }, { "epoch": 0.17533650013933083, "grad_norm": 0.796875, "learning_rate": 0.0001983375769692989, "loss": 4.6866, "step": 1691 }, { "epoch": 0.1754401881938189, "grad_norm": 0.78125, "learning_rate": 0.00019833560398992552, "loss": 4.6569, "step": 1692 }, { "epoch": 0.17554387624830697, "grad_norm": 0.66015625, "learning_rate": 0.00019833362985029594, "loss": 4.6235, "step": 1693 }, { "epoch": 0.17564756430279505, "grad_norm": 0.81640625, "learning_rate": 0.0001983316545504335, "loss": 4.6602, "step": 1694 }, { "epoch": 0.17575125235728312, "grad_norm": 0.875, "learning_rate": 0.00019832967809036144, "loss": 4.6386, "step": 1695 }, { "epoch": 0.1758549404117712, "grad_norm": 0.76171875, "learning_rate": 0.00019832770047010316, "loss": 4.6023, "step": 1696 }, { "epoch": 0.17595862846625926, "grad_norm": 0.67578125, "learning_rate": 0.00019832572168968193, "loss": 4.6724, "step": 1697 }, { "epoch": 0.17606231652074733, "grad_norm": 0.83203125, "learning_rate": 0.00019832374174912111, "loss": 4.5923, "step": 1698 }, { "epoch": 0.1761660045752354, "grad_norm": 0.76953125, "learning_rate": 0.00019832176064844408, "loss": 4.6562, "step": 1699 }, { "epoch": 0.17626969262972347, "grad_norm": 0.6640625, "learning_rate": 0.00019831977838767422, "loss": 4.5973, "step": 1700 }, { "epoch": 0.17637338068421154, "grad_norm": 0.859375, "learning_rate": 0.0001983177949668349, "loss": 4.6508, "step": 1701 }, { "epoch": 0.17647706873869962, "grad_norm": 0.78515625, "learning_rate": 0.0001983158103859495, "loss": 4.643, "step": 1702 }, { "epoch": 0.1765807567931877, "grad_norm": 0.546875, "learning_rate": 0.00019831382464504147, "loss": 4.6174, "step": 1703 }, { "epoch": 0.17668444484767576, "grad_norm": 0.64453125, "learning_rate": 0.00019831183774413424, "loss": 4.6328, "step": 1704 }, { "epoch": 0.17678813290216383, "grad_norm": 0.6171875, "learning_rate": 0.00019830984968325122, "loss": 4.6157, "step": 1705 }, { "epoch": 0.1768918209566519, "grad_norm": 0.5703125, "learning_rate": 0.00019830786046241592, "loss": 4.6173, "step": 1706 }, { "epoch": 0.17699550901113997, "grad_norm": 0.671875, "learning_rate": 0.00019830587008165177, "loss": 4.606, "step": 1707 }, { "epoch": 0.17709919706562804, "grad_norm": 0.65234375, "learning_rate": 0.0001983038785409823, "loss": 4.5933, "step": 1708 }, { "epoch": 0.17720288512011614, "grad_norm": 0.640625, "learning_rate": 0.00019830188584043094, "loss": 4.6165, "step": 1709 }, { "epoch": 0.17730657317460422, "grad_norm": 0.68359375, "learning_rate": 0.00019829989198002124, "loss": 4.6634, "step": 1710 }, { "epoch": 0.1774102612290923, "grad_norm": 0.59375, "learning_rate": 0.00019829789695977672, "loss": 4.5776, "step": 1711 }, { "epoch": 0.17751394928358036, "grad_norm": 0.69140625, "learning_rate": 0.00019829590077972094, "loss": 4.6129, "step": 1712 }, { "epoch": 0.17761763733806843, "grad_norm": 0.8671875, "learning_rate": 0.00019829390343987743, "loss": 4.5699, "step": 1713 }, { "epoch": 0.1777213253925565, "grad_norm": 0.921875, "learning_rate": 0.00019829190494026974, "loss": 4.6348, "step": 1714 }, { "epoch": 0.17782501344704457, "grad_norm": 0.87109375, "learning_rate": 0.00019828990528092147, "loss": 4.635, "step": 1715 }, { "epoch": 0.17792870150153264, "grad_norm": 0.828125, "learning_rate": 0.00019828790446185622, "loss": 4.6516, "step": 1716 }, { "epoch": 0.17803238955602071, "grad_norm": 0.7890625, "learning_rate": 0.0001982859024830976, "loss": 4.6052, "step": 1717 }, { "epoch": 0.1781360776105088, "grad_norm": 0.6953125, "learning_rate": 0.0001982838993446692, "loss": 4.6519, "step": 1718 }, { "epoch": 0.17823976566499686, "grad_norm": 0.63671875, "learning_rate": 0.00019828189504659472, "loss": 4.6294, "step": 1719 }, { "epoch": 0.17834345371948493, "grad_norm": 0.5546875, "learning_rate": 0.00019827988958889776, "loss": 4.6044, "step": 1720 }, { "epoch": 0.178447141773973, "grad_norm": 0.640625, "learning_rate": 0.00019827788297160196, "loss": 4.6074, "step": 1721 }, { "epoch": 0.17855082982846107, "grad_norm": 0.67578125, "learning_rate": 0.00019827587519473107, "loss": 4.6163, "step": 1722 }, { "epoch": 0.17865451788294914, "grad_norm": 0.69140625, "learning_rate": 0.00019827386625830871, "loss": 4.6524, "step": 1723 }, { "epoch": 0.17875820593743721, "grad_norm": 0.66015625, "learning_rate": 0.0001982718561623586, "loss": 4.5932, "step": 1724 }, { "epoch": 0.17886189399192529, "grad_norm": 0.59375, "learning_rate": 0.00019826984490690447, "loss": 4.5631, "step": 1725 }, { "epoch": 0.17896558204641336, "grad_norm": 0.69921875, "learning_rate": 0.00019826783249197004, "loss": 4.6035, "step": 1726 }, { "epoch": 0.17906927010090143, "grad_norm": 0.7421875, "learning_rate": 0.00019826581891757908, "loss": 4.6006, "step": 1727 }, { "epoch": 0.1791729581553895, "grad_norm": 0.71875, "learning_rate": 0.00019826380418375532, "loss": 4.6126, "step": 1728 }, { "epoch": 0.17927664620987757, "grad_norm": 0.6171875, "learning_rate": 0.00019826178829052254, "loss": 4.6404, "step": 1729 }, { "epoch": 0.17938033426436564, "grad_norm": 0.68359375, "learning_rate": 0.0001982597712379045, "loss": 4.6289, "step": 1730 }, { "epoch": 0.17948402231885374, "grad_norm": 0.6328125, "learning_rate": 0.00019825775302592503, "loss": 4.6162, "step": 1731 }, { "epoch": 0.1795877103733418, "grad_norm": 0.71875, "learning_rate": 0.00019825573365460798, "loss": 4.615, "step": 1732 }, { "epoch": 0.17969139842782988, "grad_norm": 0.76171875, "learning_rate": 0.0001982537131239771, "loss": 4.6037, "step": 1733 }, { "epoch": 0.17979508648231796, "grad_norm": 0.63671875, "learning_rate": 0.00019825169143405623, "loss": 4.6038, "step": 1734 }, { "epoch": 0.17989877453680603, "grad_norm": 0.62109375, "learning_rate": 0.00019824966858486933, "loss": 4.6219, "step": 1735 }, { "epoch": 0.1800024625912941, "grad_norm": 0.7734375, "learning_rate": 0.00019824764457644016, "loss": 4.6097, "step": 1736 }, { "epoch": 0.18010615064578217, "grad_norm": 0.74609375, "learning_rate": 0.00019824561940879262, "loss": 4.5909, "step": 1737 }, { "epoch": 0.18020983870027024, "grad_norm": 0.70703125, "learning_rate": 0.00019824359308195068, "loss": 4.5772, "step": 1738 }, { "epoch": 0.1803135267547583, "grad_norm": 0.6328125, "learning_rate": 0.00019824156559593813, "loss": 4.6719, "step": 1739 }, { "epoch": 0.18041721480924638, "grad_norm": 0.65234375, "learning_rate": 0.00019823953695077896, "loss": 4.5728, "step": 1740 }, { "epoch": 0.18052090286373446, "grad_norm": 0.7109375, "learning_rate": 0.0001982375071464971, "loss": 4.6433, "step": 1741 }, { "epoch": 0.18062459091822253, "grad_norm": 0.74609375, "learning_rate": 0.00019823547618311654, "loss": 4.6155, "step": 1742 }, { "epoch": 0.1807282789727106, "grad_norm": 0.7421875, "learning_rate": 0.00019823344406066115, "loss": 4.6704, "step": 1743 }, { "epoch": 0.18083196702719867, "grad_norm": 0.88671875, "learning_rate": 0.00019823141077915496, "loss": 4.6568, "step": 1744 }, { "epoch": 0.18093565508168674, "grad_norm": 1.046875, "learning_rate": 0.00019822937633862198, "loss": 4.6189, "step": 1745 }, { "epoch": 0.1810393431361748, "grad_norm": 0.84765625, "learning_rate": 0.00019822734073908618, "loss": 4.631, "step": 1746 }, { "epoch": 0.18114303119066288, "grad_norm": 0.63671875, "learning_rate": 0.0001982253039805716, "loss": 4.5911, "step": 1747 }, { "epoch": 0.18124671924515096, "grad_norm": 0.9453125, "learning_rate": 0.00019822326606310227, "loss": 4.6091, "step": 1748 }, { "epoch": 0.18135040729963903, "grad_norm": 1.125, "learning_rate": 0.0001982212269867022, "loss": 4.5828, "step": 1749 }, { "epoch": 0.1814540953541271, "grad_norm": 0.8671875, "learning_rate": 0.00019821918675139548, "loss": 4.6346, "step": 1750 }, { "epoch": 0.18155778340861517, "grad_norm": 0.79296875, "learning_rate": 0.0001982171453572062, "loss": 4.6333, "step": 1751 }, { "epoch": 0.18166147146310324, "grad_norm": 0.86328125, "learning_rate": 0.00019821510280415837, "loss": 4.658, "step": 1752 }, { "epoch": 0.1817651595175913, "grad_norm": 0.76953125, "learning_rate": 0.00019821305909227622, "loss": 4.6648, "step": 1753 }, { "epoch": 0.1818688475720794, "grad_norm": 0.87109375, "learning_rate": 0.00019821101422158374, "loss": 4.6353, "step": 1754 }, { "epoch": 0.18197253562656748, "grad_norm": 0.94921875, "learning_rate": 0.00019820896819210514, "loss": 4.5773, "step": 1755 }, { "epoch": 0.18207622368105555, "grad_norm": 1.015625, "learning_rate": 0.0001982069210038645, "loss": 4.5819, "step": 1756 }, { "epoch": 0.18217991173554363, "grad_norm": 1.1484375, "learning_rate": 0.00019820487265688602, "loss": 4.6235, "step": 1757 }, { "epoch": 0.1822835997900317, "grad_norm": 0.79296875, "learning_rate": 0.00019820282315119382, "loss": 4.6436, "step": 1758 }, { "epoch": 0.18238728784451977, "grad_norm": 0.8359375, "learning_rate": 0.0001982007724868121, "loss": 4.6404, "step": 1759 }, { "epoch": 0.18249097589900784, "grad_norm": 0.94921875, "learning_rate": 0.00019819872066376512, "loss": 4.6505, "step": 1760 }, { "epoch": 0.1825946639534959, "grad_norm": 1.0703125, "learning_rate": 0.000198196667682077, "loss": 4.6425, "step": 1761 }, { "epoch": 0.18269835200798398, "grad_norm": 1.140625, "learning_rate": 0.00019819461354177205, "loss": 4.666, "step": 1762 }, { "epoch": 0.18280204006247205, "grad_norm": 0.890625, "learning_rate": 0.0001981925582428744, "loss": 4.5952, "step": 1763 }, { "epoch": 0.18290572811696013, "grad_norm": 0.8046875, "learning_rate": 0.0001981905017854084, "loss": 4.6084, "step": 1764 }, { "epoch": 0.1830094161714482, "grad_norm": 0.828125, "learning_rate": 0.00019818844416939822, "loss": 4.6354, "step": 1765 }, { "epoch": 0.18311310422593627, "grad_norm": 0.94921875, "learning_rate": 0.00019818638539486822, "loss": 4.6241, "step": 1766 }, { "epoch": 0.18321679228042434, "grad_norm": 1.2890625, "learning_rate": 0.00019818432546184266, "loss": 4.6408, "step": 1767 }, { "epoch": 0.1833204803349124, "grad_norm": 0.796875, "learning_rate": 0.00019818226437034583, "loss": 4.5669, "step": 1768 }, { "epoch": 0.18342416838940048, "grad_norm": 0.85546875, "learning_rate": 0.0001981802021204021, "loss": 4.6347, "step": 1769 }, { "epoch": 0.18352785644388855, "grad_norm": 1.0703125, "learning_rate": 0.00019817813871203573, "loss": 4.6193, "step": 1770 }, { "epoch": 0.18363154449837663, "grad_norm": 1.2109375, "learning_rate": 0.00019817607414527108, "loss": 4.6221, "step": 1771 }, { "epoch": 0.1837352325528647, "grad_norm": 0.7734375, "learning_rate": 0.00019817400842013258, "loss": 4.6161, "step": 1772 }, { "epoch": 0.18383892060735277, "grad_norm": 0.890625, "learning_rate": 0.00019817194153664455, "loss": 4.5937, "step": 1773 }, { "epoch": 0.18394260866184084, "grad_norm": 0.7421875, "learning_rate": 0.00019816987349483136, "loss": 4.6, "step": 1774 }, { "epoch": 0.1840462967163289, "grad_norm": 0.9375, "learning_rate": 0.00019816780429471743, "loss": 4.6343, "step": 1775 }, { "epoch": 0.184149984770817, "grad_norm": 0.83203125, "learning_rate": 0.0001981657339363272, "loss": 4.592, "step": 1776 }, { "epoch": 0.18425367282530508, "grad_norm": 0.82421875, "learning_rate": 0.00019816366241968506, "loss": 4.6029, "step": 1777 }, { "epoch": 0.18435736087979315, "grad_norm": 0.703125, "learning_rate": 0.00019816158974481548, "loss": 4.6375, "step": 1778 }, { "epoch": 0.18446104893428122, "grad_norm": 0.66796875, "learning_rate": 0.0001981595159117429, "loss": 4.6048, "step": 1779 }, { "epoch": 0.1845647369887693, "grad_norm": 0.79296875, "learning_rate": 0.0001981574409204918, "loss": 4.5995, "step": 1780 }, { "epoch": 0.18466842504325737, "grad_norm": 0.76171875, "learning_rate": 0.00019815536477108662, "loss": 4.6003, "step": 1781 }, { "epoch": 0.18477211309774544, "grad_norm": 0.6328125, "learning_rate": 0.00019815328746355192, "loss": 4.6188, "step": 1782 }, { "epoch": 0.1848758011522335, "grad_norm": 0.62109375, "learning_rate": 0.00019815120899791216, "loss": 4.6491, "step": 1783 }, { "epoch": 0.18497948920672158, "grad_norm": 0.76953125, "learning_rate": 0.0001981491293741919, "loss": 4.6405, "step": 1784 }, { "epoch": 0.18508317726120965, "grad_norm": 0.6796875, "learning_rate": 0.00019814704859241565, "loss": 4.6342, "step": 1785 }, { "epoch": 0.18518686531569772, "grad_norm": 0.640625, "learning_rate": 0.00019814496665260798, "loss": 4.6543, "step": 1786 }, { "epoch": 0.1852905533701858, "grad_norm": 0.71484375, "learning_rate": 0.00019814288355479346, "loss": 4.6173, "step": 1787 }, { "epoch": 0.18539424142467387, "grad_norm": 0.83984375, "learning_rate": 0.00019814079929899662, "loss": 4.6002, "step": 1788 }, { "epoch": 0.18549792947916194, "grad_norm": 0.58984375, "learning_rate": 0.00019813871388524215, "loss": 4.5986, "step": 1789 }, { "epoch": 0.18560161753365, "grad_norm": 0.6171875, "learning_rate": 0.00019813662731355452, "loss": 4.6107, "step": 1790 }, { "epoch": 0.18570530558813808, "grad_norm": 0.7578125, "learning_rate": 0.00019813453958395847, "loss": 4.6364, "step": 1791 }, { "epoch": 0.18580899364262615, "grad_norm": 0.75, "learning_rate": 0.00019813245069647857, "loss": 4.5867, "step": 1792 }, { "epoch": 0.18591268169711422, "grad_norm": 0.7578125, "learning_rate": 0.0001981303606511395, "loss": 4.5961, "step": 1793 }, { "epoch": 0.1860163697516023, "grad_norm": 0.62109375, "learning_rate": 0.00019812826944796586, "loss": 4.6213, "step": 1794 }, { "epoch": 0.18612005780609037, "grad_norm": 0.94921875, "learning_rate": 0.00019812617708698241, "loss": 4.5809, "step": 1795 }, { "epoch": 0.18622374586057844, "grad_norm": 1.0703125, "learning_rate": 0.00019812408356821378, "loss": 4.6406, "step": 1796 }, { "epoch": 0.1863274339150665, "grad_norm": 0.69140625, "learning_rate": 0.00019812198889168468, "loss": 4.6107, "step": 1797 }, { "epoch": 0.1864311219695546, "grad_norm": 0.6484375, "learning_rate": 0.00019811989305741984, "loss": 4.5858, "step": 1798 }, { "epoch": 0.18653481002404268, "grad_norm": 0.8046875, "learning_rate": 0.00019811779606544397, "loss": 4.6217, "step": 1799 }, { "epoch": 0.18663849807853075, "grad_norm": 0.6796875, "learning_rate": 0.00019811569791578182, "loss": 4.5735, "step": 1800 }, { "epoch": 0.18674218613301882, "grad_norm": 0.5859375, "learning_rate": 0.00019811359860845814, "loss": 4.6024, "step": 1801 }, { "epoch": 0.1868458741875069, "grad_norm": 0.640625, "learning_rate": 0.00019811149814349773, "loss": 4.5761, "step": 1802 }, { "epoch": 0.18694956224199497, "grad_norm": 0.58203125, "learning_rate": 0.00019810939652092533, "loss": 4.5857, "step": 1803 }, { "epoch": 0.18705325029648304, "grad_norm": 0.6015625, "learning_rate": 0.00019810729374076575, "loss": 4.633, "step": 1804 }, { "epoch": 0.1871569383509711, "grad_norm": 0.6953125, "learning_rate": 0.00019810518980304385, "loss": 4.628, "step": 1805 }, { "epoch": 0.18726062640545918, "grad_norm": 0.62109375, "learning_rate": 0.00019810308470778436, "loss": 4.5949, "step": 1806 }, { "epoch": 0.18736431445994725, "grad_norm": 0.71484375, "learning_rate": 0.0001981009784550122, "loss": 4.5659, "step": 1807 }, { "epoch": 0.18746800251443532, "grad_norm": 0.69921875, "learning_rate": 0.0001980988710447522, "loss": 4.5788, "step": 1808 }, { "epoch": 0.1875716905689234, "grad_norm": 0.64453125, "learning_rate": 0.0001980967624770292, "loss": 4.5945, "step": 1809 }, { "epoch": 0.18767537862341147, "grad_norm": 0.73046875, "learning_rate": 0.0001980946527518681, "loss": 4.6004, "step": 1810 }, { "epoch": 0.18777906667789954, "grad_norm": 0.6796875, "learning_rate": 0.0001980925418692938, "loss": 4.6061, "step": 1811 }, { "epoch": 0.1878827547323876, "grad_norm": 0.64453125, "learning_rate": 0.00019809042982933117, "loss": 4.6141, "step": 1812 }, { "epoch": 0.18798644278687568, "grad_norm": 0.609375, "learning_rate": 0.00019808831663200517, "loss": 4.641, "step": 1813 }, { "epoch": 0.18809013084136375, "grad_norm": 0.59765625, "learning_rate": 0.0001980862022773407, "loss": 4.542, "step": 1814 }, { "epoch": 0.18819381889585182, "grad_norm": 0.69140625, "learning_rate": 0.00019808408676536275, "loss": 4.587, "step": 1815 }, { "epoch": 0.1882975069503399, "grad_norm": 0.5859375, "learning_rate": 0.00019808197009609624, "loss": 4.6407, "step": 1816 }, { "epoch": 0.18840119500482796, "grad_norm": 0.58203125, "learning_rate": 0.00019807985226956616, "loss": 4.6117, "step": 1817 }, { "epoch": 0.18850488305931604, "grad_norm": 0.63671875, "learning_rate": 0.0001980777332857975, "loss": 4.6044, "step": 1818 }, { "epoch": 0.1886085711138041, "grad_norm": 0.609375, "learning_rate": 0.00019807561314481525, "loss": 4.5919, "step": 1819 }, { "epoch": 0.1887122591682922, "grad_norm": 0.53515625, "learning_rate": 0.00019807349184664447, "loss": 4.5995, "step": 1820 }, { "epoch": 0.18881594722278028, "grad_norm": 0.71875, "learning_rate": 0.0001980713693913101, "loss": 4.6098, "step": 1821 }, { "epoch": 0.18891963527726835, "grad_norm": 0.67578125, "learning_rate": 0.0001980692457788373, "loss": 4.6336, "step": 1822 }, { "epoch": 0.18902332333175642, "grad_norm": 0.67578125, "learning_rate": 0.00019806712100925103, "loss": 4.5602, "step": 1823 }, { "epoch": 0.1891270113862445, "grad_norm": 0.7109375, "learning_rate": 0.00019806499508257636, "loss": 4.585, "step": 1824 }, { "epoch": 0.18923069944073256, "grad_norm": 0.66796875, "learning_rate": 0.00019806286799883846, "loss": 4.6508, "step": 1825 }, { "epoch": 0.18933438749522064, "grad_norm": 0.58203125, "learning_rate": 0.00019806073975806235, "loss": 4.5661, "step": 1826 }, { "epoch": 0.1894380755497087, "grad_norm": 0.6015625, "learning_rate": 0.00019805861036027318, "loss": 4.6343, "step": 1827 }, { "epoch": 0.18954176360419678, "grad_norm": 0.765625, "learning_rate": 0.00019805647980549606, "loss": 4.6045, "step": 1828 }, { "epoch": 0.18964545165868485, "grad_norm": 0.7578125, "learning_rate": 0.0001980543480937561, "loss": 4.6495, "step": 1829 }, { "epoch": 0.18974913971317292, "grad_norm": 0.6171875, "learning_rate": 0.0001980522152250785, "loss": 4.5932, "step": 1830 }, { "epoch": 0.189852827767661, "grad_norm": 0.5625, "learning_rate": 0.00019805008119948842, "loss": 4.6119, "step": 1831 }, { "epoch": 0.18995651582214906, "grad_norm": 0.859375, "learning_rate": 0.000198047946017011, "loss": 4.6007, "step": 1832 }, { "epoch": 0.19006020387663713, "grad_norm": 0.9609375, "learning_rate": 0.00019804580967767147, "loss": 4.5951, "step": 1833 }, { "epoch": 0.1901638919311252, "grad_norm": 0.9609375, "learning_rate": 0.00019804367218149505, "loss": 4.5742, "step": 1834 }, { "epoch": 0.19026757998561328, "grad_norm": 0.8125, "learning_rate": 0.0001980415335285069, "loss": 4.5971, "step": 1835 }, { "epoch": 0.19037126804010135, "grad_norm": 0.69140625, "learning_rate": 0.0001980393937187323, "loss": 4.6173, "step": 1836 }, { "epoch": 0.19047495609458942, "grad_norm": 0.72265625, "learning_rate": 0.00019803725275219648, "loss": 4.6155, "step": 1837 }, { "epoch": 0.1905786441490775, "grad_norm": 0.80859375, "learning_rate": 0.0001980351106289247, "loss": 4.5651, "step": 1838 }, { "epoch": 0.19068233220356556, "grad_norm": 0.62890625, "learning_rate": 0.00019803296734894227, "loss": 4.6264, "step": 1839 }, { "epoch": 0.19078602025805363, "grad_norm": 0.68359375, "learning_rate": 0.00019803082291227443, "loss": 4.599, "step": 1840 }, { "epoch": 0.1908897083125417, "grad_norm": 0.89453125, "learning_rate": 0.0001980286773189465, "loss": 4.5985, "step": 1841 }, { "epoch": 0.19099339636702978, "grad_norm": 0.84375, "learning_rate": 0.0001980265305689838, "loss": 4.5839, "step": 1842 }, { "epoch": 0.19109708442151788, "grad_norm": 0.7265625, "learning_rate": 0.0001980243826624117, "loss": 4.5921, "step": 1843 }, { "epoch": 0.19120077247600595, "grad_norm": 0.91796875, "learning_rate": 0.00019802223359925545, "loss": 4.6503, "step": 1844 }, { "epoch": 0.19130446053049402, "grad_norm": 1.0546875, "learning_rate": 0.00019802008337954047, "loss": 4.5806, "step": 1845 }, { "epoch": 0.1914081485849821, "grad_norm": 0.97265625, "learning_rate": 0.00019801793200329213, "loss": 4.5883, "step": 1846 }, { "epoch": 0.19151183663947016, "grad_norm": 0.765625, "learning_rate": 0.00019801577947053579, "loss": 4.6491, "step": 1847 }, { "epoch": 0.19161552469395823, "grad_norm": 0.66015625, "learning_rate": 0.00019801362578129685, "loss": 4.6137, "step": 1848 }, { "epoch": 0.1917192127484463, "grad_norm": 0.90234375, "learning_rate": 0.00019801147093560076, "loss": 4.6222, "step": 1849 }, { "epoch": 0.19182290080293438, "grad_norm": 0.921875, "learning_rate": 0.00019800931493347288, "loss": 4.6001, "step": 1850 }, { "epoch": 0.19192658885742245, "grad_norm": 0.79296875, "learning_rate": 0.00019800715777493872, "loss": 4.6157, "step": 1851 }, { "epoch": 0.19203027691191052, "grad_norm": 0.7578125, "learning_rate": 0.00019800499946002366, "loss": 4.581, "step": 1852 }, { "epoch": 0.1921339649663986, "grad_norm": 0.78125, "learning_rate": 0.00019800283998875324, "loss": 4.6079, "step": 1853 }, { "epoch": 0.19223765302088666, "grad_norm": 0.75390625, "learning_rate": 0.00019800067936115288, "loss": 4.577, "step": 1854 }, { "epoch": 0.19234134107537473, "grad_norm": 0.72265625, "learning_rate": 0.00019799851757724808, "loss": 4.575, "step": 1855 }, { "epoch": 0.1924450291298628, "grad_norm": 0.58984375, "learning_rate": 0.00019799635463706438, "loss": 4.6081, "step": 1856 }, { "epoch": 0.19254871718435088, "grad_norm": 0.71484375, "learning_rate": 0.00019799419054062728, "loss": 4.6058, "step": 1857 }, { "epoch": 0.19265240523883895, "grad_norm": 0.515625, "learning_rate": 0.00019799202528796231, "loss": 4.618, "step": 1858 }, { "epoch": 0.19275609329332702, "grad_norm": 0.63671875, "learning_rate": 0.00019798985887909502, "loss": 4.6002, "step": 1859 }, { "epoch": 0.1928597813478151, "grad_norm": 0.61328125, "learning_rate": 0.00019798769131405098, "loss": 4.6159, "step": 1860 }, { "epoch": 0.19296346940230316, "grad_norm": 0.640625, "learning_rate": 0.00019798552259285579, "loss": 4.5643, "step": 1861 }, { "epoch": 0.19306715745679123, "grad_norm": 0.625, "learning_rate": 0.000197983352715535, "loss": 4.6061, "step": 1862 }, { "epoch": 0.1931708455112793, "grad_norm": 0.6640625, "learning_rate": 0.0001979811816821142, "loss": 4.5693, "step": 1863 }, { "epoch": 0.19327453356576738, "grad_norm": 0.61328125, "learning_rate": 0.000197979009492619, "loss": 4.6038, "step": 1864 }, { "epoch": 0.19337822162025547, "grad_norm": 0.8125, "learning_rate": 0.00019797683614707512, "loss": 4.5976, "step": 1865 }, { "epoch": 0.19348190967474355, "grad_norm": 0.7890625, "learning_rate": 0.0001979746616455081, "loss": 4.5936, "step": 1866 }, { "epoch": 0.19358559772923162, "grad_norm": 0.8359375, "learning_rate": 0.00019797248598794364, "loss": 4.5944, "step": 1867 }, { "epoch": 0.1936892857837197, "grad_norm": 0.9296875, "learning_rate": 0.0001979703091744074, "loss": 4.6682, "step": 1868 }, { "epoch": 0.19379297383820776, "grad_norm": 0.92578125, "learning_rate": 0.00019796813120492507, "loss": 4.562, "step": 1869 }, { "epoch": 0.19389666189269583, "grad_norm": 0.8671875, "learning_rate": 0.00019796595207952238, "loss": 4.5784, "step": 1870 }, { "epoch": 0.1940003499471839, "grad_norm": 0.78125, "learning_rate": 0.000197963771798225, "loss": 4.5825, "step": 1871 }, { "epoch": 0.19410403800167197, "grad_norm": 0.66015625, "learning_rate": 0.00019796159036105865, "loss": 4.6164, "step": 1872 }, { "epoch": 0.19420772605616005, "grad_norm": 0.74609375, "learning_rate": 0.00019795940776804906, "loss": 4.5685, "step": 1873 }, { "epoch": 0.19431141411064812, "grad_norm": 0.796875, "learning_rate": 0.00019795722401922205, "loss": 4.6074, "step": 1874 }, { "epoch": 0.1944151021651362, "grad_norm": 0.8046875, "learning_rate": 0.0001979550391146033, "loss": 4.5823, "step": 1875 }, { "epoch": 0.19451879021962426, "grad_norm": 0.73828125, "learning_rate": 0.0001979528530542187, "loss": 4.6129, "step": 1876 }, { "epoch": 0.19462247827411233, "grad_norm": 0.75390625, "learning_rate": 0.00019795066583809393, "loss": 4.5628, "step": 1877 }, { "epoch": 0.1947261663286004, "grad_norm": 0.75390625, "learning_rate": 0.00019794847746625483, "loss": 4.5917, "step": 1878 }, { "epoch": 0.19482985438308847, "grad_norm": 0.64453125, "learning_rate": 0.00019794628793872723, "loss": 4.6206, "step": 1879 }, { "epoch": 0.19493354243757655, "grad_norm": 0.64453125, "learning_rate": 0.00019794409725553699, "loss": 4.5926, "step": 1880 }, { "epoch": 0.19503723049206462, "grad_norm": 0.71484375, "learning_rate": 0.00019794190541670993, "loss": 4.6335, "step": 1881 }, { "epoch": 0.1951409185465527, "grad_norm": 0.8984375, "learning_rate": 0.0001979397124222719, "loss": 4.6138, "step": 1882 }, { "epoch": 0.19524460660104076, "grad_norm": 0.765625, "learning_rate": 0.0001979375182722488, "loss": 4.622, "step": 1883 }, { "epoch": 0.19534829465552883, "grad_norm": 0.796875, "learning_rate": 0.0001979353229666665, "loss": 4.5461, "step": 1884 }, { "epoch": 0.1954519827100169, "grad_norm": 0.78515625, "learning_rate": 0.00019793312650555093, "loss": 4.5757, "step": 1885 }, { "epoch": 0.19555567076450497, "grad_norm": 0.88671875, "learning_rate": 0.00019793092888892799, "loss": 4.6087, "step": 1886 }, { "epoch": 0.19565935881899307, "grad_norm": 0.94921875, "learning_rate": 0.00019792873011682357, "loss": 4.5903, "step": 1887 }, { "epoch": 0.19576304687348114, "grad_norm": 0.8515625, "learning_rate": 0.0001979265301892637, "loss": 4.5752, "step": 1888 }, { "epoch": 0.19586673492796922, "grad_norm": 0.83203125, "learning_rate": 0.00019792432910627425, "loss": 4.5782, "step": 1889 }, { "epoch": 0.1959704229824573, "grad_norm": 0.81640625, "learning_rate": 0.00019792212686788122, "loss": 4.5527, "step": 1890 }, { "epoch": 0.19607411103694536, "grad_norm": 0.98046875, "learning_rate": 0.00019791992347411064, "loss": 4.6206, "step": 1891 }, { "epoch": 0.19617779909143343, "grad_norm": 1.28125, "learning_rate": 0.00019791771892498843, "loss": 4.6059, "step": 1892 }, { "epoch": 0.1962814871459215, "grad_norm": 0.8125, "learning_rate": 0.00019791551322054067, "loss": 4.5715, "step": 1893 }, { "epoch": 0.19638517520040957, "grad_norm": 0.86328125, "learning_rate": 0.00019791330636079332, "loss": 4.5888, "step": 1894 }, { "epoch": 0.19648886325489764, "grad_norm": 0.921875, "learning_rate": 0.0001979110983457725, "loss": 4.5763, "step": 1895 }, { "epoch": 0.19659255130938572, "grad_norm": 0.95703125, "learning_rate": 0.0001979088891755042, "loss": 4.5754, "step": 1896 }, { "epoch": 0.1966962393638738, "grad_norm": 0.88671875, "learning_rate": 0.00019790667885001448, "loss": 4.6025, "step": 1897 }, { "epoch": 0.19679992741836186, "grad_norm": 0.8671875, "learning_rate": 0.00019790446736932946, "loss": 4.6139, "step": 1898 }, { "epoch": 0.19690361547284993, "grad_norm": 0.96875, "learning_rate": 0.0001979022547334752, "loss": 4.5304, "step": 1899 }, { "epoch": 0.197007303527338, "grad_norm": 1.015625, "learning_rate": 0.00019790004094247783, "loss": 4.6188, "step": 1900 }, { "epoch": 0.19711099158182607, "grad_norm": 0.81640625, "learning_rate": 0.00019789782599636348, "loss": 4.6019, "step": 1901 }, { "epoch": 0.19721467963631414, "grad_norm": 0.8359375, "learning_rate": 0.00019789560989515824, "loss": 4.5996, "step": 1902 }, { "epoch": 0.19731836769080222, "grad_norm": 1.1015625, "learning_rate": 0.0001978933926388883, "loss": 4.585, "step": 1903 }, { "epoch": 0.1974220557452903, "grad_norm": 0.8828125, "learning_rate": 0.0001978911742275798, "loss": 4.6249, "step": 1904 }, { "epoch": 0.19752574379977836, "grad_norm": 0.83984375, "learning_rate": 0.0001978889546612589, "loss": 4.5999, "step": 1905 }, { "epoch": 0.19762943185426643, "grad_norm": 0.83984375, "learning_rate": 0.00019788673393995182, "loss": 4.6135, "step": 1906 }, { "epoch": 0.1977331199087545, "grad_norm": 0.77734375, "learning_rate": 0.00019788451206368475, "loss": 4.6007, "step": 1907 }, { "epoch": 0.19783680796324257, "grad_norm": 0.83984375, "learning_rate": 0.00019788228903248393, "loss": 4.6105, "step": 1908 }, { "epoch": 0.19794049601773064, "grad_norm": 0.76171875, "learning_rate": 0.00019788006484637553, "loss": 4.6072, "step": 1909 }, { "epoch": 0.19804418407221874, "grad_norm": 0.76171875, "learning_rate": 0.00019787783950538587, "loss": 4.59, "step": 1910 }, { "epoch": 0.19814787212670681, "grad_norm": 0.78515625, "learning_rate": 0.0001978756130095411, "loss": 4.5976, "step": 1911 }, { "epoch": 0.19825156018119489, "grad_norm": 0.6953125, "learning_rate": 0.0001978733853588676, "loss": 4.5981, "step": 1912 }, { "epoch": 0.19835524823568296, "grad_norm": 0.6875, "learning_rate": 0.00019787115655339163, "loss": 4.6151, "step": 1913 }, { "epoch": 0.19845893629017103, "grad_norm": 0.765625, "learning_rate": 0.00019786892659313945, "loss": 4.6266, "step": 1914 }, { "epoch": 0.1985626243446591, "grad_norm": 0.70703125, "learning_rate": 0.00019786669547813737, "loss": 4.6112, "step": 1915 }, { "epoch": 0.19866631239914717, "grad_norm": 0.72265625, "learning_rate": 0.00019786446320841172, "loss": 4.5899, "step": 1916 }, { "epoch": 0.19877000045363524, "grad_norm": 0.84765625, "learning_rate": 0.00019786222978398889, "loss": 4.546, "step": 1917 }, { "epoch": 0.19887368850812331, "grad_norm": 0.734375, "learning_rate": 0.00019785999520489518, "loss": 4.537, "step": 1918 }, { "epoch": 0.19897737656261139, "grad_norm": 0.68359375, "learning_rate": 0.00019785775947115696, "loss": 4.5717, "step": 1919 }, { "epoch": 0.19908106461709946, "grad_norm": 0.8828125, "learning_rate": 0.00019785552258280064, "loss": 4.5215, "step": 1920 }, { "epoch": 0.19918475267158753, "grad_norm": 1.1171875, "learning_rate": 0.00019785328453985257, "loss": 4.6153, "step": 1921 }, { "epoch": 0.1992884407260756, "grad_norm": 0.6875, "learning_rate": 0.00019785104534233918, "loss": 4.5823, "step": 1922 }, { "epoch": 0.19939212878056367, "grad_norm": 0.73046875, "learning_rate": 0.00019784880499028692, "loss": 4.5774, "step": 1923 }, { "epoch": 0.19949581683505174, "grad_norm": 1.0078125, "learning_rate": 0.00019784656348372214, "loss": 4.6047, "step": 1924 }, { "epoch": 0.1995995048895398, "grad_norm": 0.87890625, "learning_rate": 0.00019784432082267138, "loss": 4.612, "step": 1925 }, { "epoch": 0.19970319294402789, "grad_norm": 0.83984375, "learning_rate": 0.00019784207700716103, "loss": 4.6276, "step": 1926 }, { "epoch": 0.19980688099851596, "grad_norm": 0.73828125, "learning_rate": 0.00019783983203721758, "loss": 4.5811, "step": 1927 }, { "epoch": 0.19991056905300403, "grad_norm": 0.953125, "learning_rate": 0.0001978375859128676, "loss": 4.5809, "step": 1928 }, { "epoch": 0.2000142571074921, "grad_norm": 1.0859375, "learning_rate": 0.0001978353386341375, "loss": 4.6231, "step": 1929 }, { "epoch": 0.20011794516198017, "grad_norm": 0.796875, "learning_rate": 0.00019783309020105375, "loss": 4.5642, "step": 1930 }, { "epoch": 0.20022163321646824, "grad_norm": 0.78515625, "learning_rate": 0.00019783084061364303, "loss": 4.581, "step": 1931 }, { "epoch": 0.20032532127095634, "grad_norm": 1.1875, "learning_rate": 0.00019782858987193178, "loss": 4.5868, "step": 1932 }, { "epoch": 0.2004290093254444, "grad_norm": 0.81640625, "learning_rate": 0.00019782633797594659, "loss": 4.617, "step": 1933 }, { "epoch": 0.20053269737993248, "grad_norm": 0.8046875, "learning_rate": 0.00019782408492571399, "loss": 4.6109, "step": 1934 }, { "epoch": 0.20063638543442056, "grad_norm": 1.0234375, "learning_rate": 0.00019782183072126062, "loss": 4.6252, "step": 1935 }, { "epoch": 0.20074007348890863, "grad_norm": 1.0625, "learning_rate": 0.00019781957536261303, "loss": 4.6283, "step": 1936 }, { "epoch": 0.2008437615433967, "grad_norm": 0.99609375, "learning_rate": 0.00019781731884979786, "loss": 4.606, "step": 1937 }, { "epoch": 0.20094744959788477, "grad_norm": 0.86328125, "learning_rate": 0.00019781506118284173, "loss": 4.6014, "step": 1938 }, { "epoch": 0.20105113765237284, "grad_norm": 0.7421875, "learning_rate": 0.00019781280236177127, "loss": 4.6413, "step": 1939 }, { "epoch": 0.2011548257068609, "grad_norm": 0.7734375, "learning_rate": 0.0001978105423866131, "loss": 4.5813, "step": 1940 }, { "epoch": 0.20125851376134898, "grad_norm": 0.66796875, "learning_rate": 0.00019780828125739398, "loss": 4.5541, "step": 1941 }, { "epoch": 0.20136220181583706, "grad_norm": 0.8515625, "learning_rate": 0.0001978060189741405, "loss": 4.6159, "step": 1942 }, { "epoch": 0.20146588987032513, "grad_norm": 0.97265625, "learning_rate": 0.00019780375553687937, "loss": 4.6638, "step": 1943 }, { "epoch": 0.2015695779248132, "grad_norm": 0.96484375, "learning_rate": 0.00019780149094563733, "loss": 4.6151, "step": 1944 }, { "epoch": 0.20167326597930127, "grad_norm": 0.95703125, "learning_rate": 0.00019779922520044108, "loss": 4.5835, "step": 1945 }, { "epoch": 0.20177695403378934, "grad_norm": 0.96875, "learning_rate": 0.00019779695830131732, "loss": 4.5769, "step": 1946 }, { "epoch": 0.2018806420882774, "grad_norm": 0.96875, "learning_rate": 0.00019779469024829285, "loss": 4.6143, "step": 1947 }, { "epoch": 0.20198433014276548, "grad_norm": 1.1796875, "learning_rate": 0.0001977924210413944, "loss": 4.604, "step": 1948 }, { "epoch": 0.20208801819725355, "grad_norm": 0.81640625, "learning_rate": 0.00019779015068064877, "loss": 4.5636, "step": 1949 }, { "epoch": 0.20219170625174163, "grad_norm": 0.91796875, "learning_rate": 0.00019778787916608273, "loss": 4.5581, "step": 1950 }, { "epoch": 0.2022953943062297, "grad_norm": 1.1171875, "learning_rate": 0.00019778560649772305, "loss": 4.5624, "step": 1951 }, { "epoch": 0.20239908236071777, "grad_norm": 0.77734375, "learning_rate": 0.00019778333267559658, "loss": 4.6108, "step": 1952 }, { "epoch": 0.20250277041520584, "grad_norm": 1.015625, "learning_rate": 0.00019778105769973018, "loss": 4.5904, "step": 1953 }, { "epoch": 0.20260645846969394, "grad_norm": 1.3203125, "learning_rate": 0.00019777878157015063, "loss": 4.5902, "step": 1954 }, { "epoch": 0.202710146524182, "grad_norm": 0.67578125, "learning_rate": 0.00019777650428688483, "loss": 4.6012, "step": 1955 }, { "epoch": 0.20281383457867008, "grad_norm": 1.515625, "learning_rate": 0.00019777422584995965, "loss": 4.544, "step": 1956 }, { "epoch": 0.20291752263315815, "grad_norm": 0.76171875, "learning_rate": 0.00019777194625940193, "loss": 4.6165, "step": 1957 }, { "epoch": 0.20302121068764623, "grad_norm": 1.59375, "learning_rate": 0.00019776966551523858, "loss": 4.5806, "step": 1958 }, { "epoch": 0.2031248987421343, "grad_norm": 0.9140625, "learning_rate": 0.00019776738361749655, "loss": 4.5912, "step": 1959 }, { "epoch": 0.20322858679662237, "grad_norm": 1.9375, "learning_rate": 0.00019776510056620272, "loss": 4.6098, "step": 1960 }, { "epoch": 0.20333227485111044, "grad_norm": 1.453125, "learning_rate": 0.00019776281636138407, "loss": 4.5712, "step": 1961 }, { "epoch": 0.2034359629055985, "grad_norm": 2.65625, "learning_rate": 0.0001977605310030675, "loss": 4.6034, "step": 1962 }, { "epoch": 0.20353965096008658, "grad_norm": 2.484375, "learning_rate": 0.00019775824449128003, "loss": 4.6355, "step": 1963 }, { "epoch": 0.20364333901457465, "grad_norm": 1.40625, "learning_rate": 0.0001977559568260486, "loss": 4.5732, "step": 1964 }, { "epoch": 0.20374702706906272, "grad_norm": 1.90625, "learning_rate": 0.0001977536680074002, "loss": 4.5894, "step": 1965 }, { "epoch": 0.2038507151235508, "grad_norm": 1.59375, "learning_rate": 0.00019775137803536186, "loss": 4.6234, "step": 1966 }, { "epoch": 0.20395440317803887, "grad_norm": 1.859375, "learning_rate": 0.00019774908690996056, "loss": 4.646, "step": 1967 }, { "epoch": 0.20405809123252694, "grad_norm": 1.6875, "learning_rate": 0.0001977467946312234, "loss": 4.518, "step": 1968 }, { "epoch": 0.204161779287015, "grad_norm": 1.5546875, "learning_rate": 0.00019774450119917737, "loss": 4.6341, "step": 1969 }, { "epoch": 0.20426546734150308, "grad_norm": 1.28125, "learning_rate": 0.00019774220661384956, "loss": 4.5852, "step": 1970 }, { "epoch": 0.20436915539599115, "grad_norm": 1.5, "learning_rate": 0.000197739910875267, "loss": 4.5918, "step": 1971 }, { "epoch": 0.20447284345047922, "grad_norm": 1.078125, "learning_rate": 0.00019773761398345682, "loss": 4.5398, "step": 1972 }, { "epoch": 0.2045765315049673, "grad_norm": 1.4375, "learning_rate": 0.00019773531593844613, "loss": 4.5249, "step": 1973 }, { "epoch": 0.20468021955945537, "grad_norm": 1.2421875, "learning_rate": 0.00019773301674026197, "loss": 4.5895, "step": 1974 }, { "epoch": 0.20478390761394344, "grad_norm": 1.234375, "learning_rate": 0.00019773071638893157, "loss": 4.6123, "step": 1975 }, { "epoch": 0.2048875956684315, "grad_norm": 1.2265625, "learning_rate": 0.00019772841488448198, "loss": 4.5979, "step": 1976 }, { "epoch": 0.2049912837229196, "grad_norm": 0.90234375, "learning_rate": 0.00019772611222694045, "loss": 4.5542, "step": 1977 }, { "epoch": 0.20509497177740768, "grad_norm": 1.1015625, "learning_rate": 0.00019772380841633406, "loss": 4.5651, "step": 1978 }, { "epoch": 0.20519865983189575, "grad_norm": 0.83984375, "learning_rate": 0.00019772150345269003, "loss": 4.5454, "step": 1979 }, { "epoch": 0.20530234788638382, "grad_norm": 1.0859375, "learning_rate": 0.00019771919733603557, "loss": 4.6146, "step": 1980 }, { "epoch": 0.2054060359408719, "grad_norm": 0.921875, "learning_rate": 0.00019771689006639785, "loss": 4.5667, "step": 1981 }, { "epoch": 0.20550972399535997, "grad_norm": 0.9765625, "learning_rate": 0.00019771458164380415, "loss": 4.5801, "step": 1982 }, { "epoch": 0.20561341204984804, "grad_norm": 1.234375, "learning_rate": 0.00019771227206828167, "loss": 4.6104, "step": 1983 }, { "epoch": 0.2057171001043361, "grad_norm": 0.84765625, "learning_rate": 0.00019770996133985767, "loss": 4.6062, "step": 1984 }, { "epoch": 0.20582078815882418, "grad_norm": 1.5, "learning_rate": 0.00019770764945855937, "loss": 4.5738, "step": 1985 }, { "epoch": 0.20592447621331225, "grad_norm": 0.98828125, "learning_rate": 0.00019770533642441413, "loss": 4.5966, "step": 1986 }, { "epoch": 0.20602816426780032, "grad_norm": 2.078125, "learning_rate": 0.0001977030222374492, "loss": 4.6273, "step": 1987 }, { "epoch": 0.2061318523222884, "grad_norm": 1.765625, "learning_rate": 0.00019770070689769184, "loss": 4.5446, "step": 1988 }, { "epoch": 0.20623554037677647, "grad_norm": 1.921875, "learning_rate": 0.00019769839040516946, "loss": 4.5785, "step": 1989 }, { "epoch": 0.20633922843126454, "grad_norm": 1.7578125, "learning_rate": 0.00019769607275990934, "loss": 4.5787, "step": 1990 }, { "epoch": 0.2064429164857526, "grad_norm": 1.546875, "learning_rate": 0.00019769375396193881, "loss": 4.5644, "step": 1991 }, { "epoch": 0.20654660454024068, "grad_norm": 1.421875, "learning_rate": 0.00019769143401128525, "loss": 4.5818, "step": 1992 }, { "epoch": 0.20665029259472875, "grad_norm": 1.3203125, "learning_rate": 0.00019768911290797604, "loss": 4.5749, "step": 1993 }, { "epoch": 0.20675398064921682, "grad_norm": 1.2890625, "learning_rate": 0.00019768679065203855, "loss": 4.5796, "step": 1994 }, { "epoch": 0.2068576687037049, "grad_norm": 1.03125, "learning_rate": 0.00019768446724350024, "loss": 4.5841, "step": 1995 }, { "epoch": 0.20696135675819297, "grad_norm": 1.4453125, "learning_rate": 0.00019768214268238842, "loss": 4.569, "step": 1996 }, { "epoch": 0.20706504481268104, "grad_norm": 1.046875, "learning_rate": 0.00019767981696873057, "loss": 4.6043, "step": 1997 }, { "epoch": 0.2071687328671691, "grad_norm": 1.7890625, "learning_rate": 0.00019767749010255416, "loss": 4.6222, "step": 1998 }, { "epoch": 0.2072724209216572, "grad_norm": 1.3984375, "learning_rate": 0.0001976751620838866, "loss": 4.5898, "step": 1999 }, { "epoch": 0.20737610897614528, "grad_norm": 2.09375, "learning_rate": 0.00019767283291275537, "loss": 4.5915, "step": 2000 }, { "epoch": 0.20747979703063335, "grad_norm": 1.640625, "learning_rate": 0.00019767050258918798, "loss": 4.5657, "step": 2001 }, { "epoch": 0.20758348508512142, "grad_norm": 2.0, "learning_rate": 0.00019766817111321186, "loss": 4.6221, "step": 2002 }, { "epoch": 0.2076871731396095, "grad_norm": 1.7265625, "learning_rate": 0.0001976658384848546, "loss": 4.6072, "step": 2003 }, { "epoch": 0.20779086119409756, "grad_norm": 1.796875, "learning_rate": 0.00019766350470414365, "loss": 4.576, "step": 2004 }, { "epoch": 0.20789454924858564, "grad_norm": 1.6328125, "learning_rate": 0.00019766116977110661, "loss": 4.5551, "step": 2005 }, { "epoch": 0.2079982373030737, "grad_norm": 1.6015625, "learning_rate": 0.000197658833685771, "loss": 4.5634, "step": 2006 }, { "epoch": 0.20810192535756178, "grad_norm": 1.4375, "learning_rate": 0.00019765649644816436, "loss": 4.5981, "step": 2007 }, { "epoch": 0.20820561341204985, "grad_norm": 1.6484375, "learning_rate": 0.0001976541580583143, "loss": 4.5633, "step": 2008 }, { "epoch": 0.20830930146653792, "grad_norm": 1.3828125, "learning_rate": 0.0001976518185162484, "loss": 4.6089, "step": 2009 }, { "epoch": 0.208412989521026, "grad_norm": 1.8125, "learning_rate": 0.00019764947782199426, "loss": 4.5937, "step": 2010 }, { "epoch": 0.20851667757551406, "grad_norm": 1.4140625, "learning_rate": 0.00019764713597557952, "loss": 4.5695, "step": 2011 }, { "epoch": 0.20862036563000214, "grad_norm": 2.078125, "learning_rate": 0.00019764479297703177, "loss": 4.6169, "step": 2012 }, { "epoch": 0.2087240536844902, "grad_norm": 1.8984375, "learning_rate": 0.00019764244882637867, "loss": 4.6056, "step": 2013 }, { "epoch": 0.20882774173897828, "grad_norm": 1.40625, "learning_rate": 0.00019764010352364792, "loss": 4.5951, "step": 2014 }, { "epoch": 0.20893142979346635, "grad_norm": 1.3046875, "learning_rate": 0.00019763775706886714, "loss": 4.5894, "step": 2015 }, { "epoch": 0.20903511784795442, "grad_norm": 1.375, "learning_rate": 0.00019763540946206404, "loss": 4.6161, "step": 2016 }, { "epoch": 0.2091388059024425, "grad_norm": 1.0625, "learning_rate": 0.00019763306070326632, "loss": 4.5434, "step": 2017 }, { "epoch": 0.20924249395693056, "grad_norm": 1.7734375, "learning_rate": 0.00019763071079250164, "loss": 4.5784, "step": 2018 }, { "epoch": 0.20934618201141864, "grad_norm": 1.453125, "learning_rate": 0.00019762835972979783, "loss": 4.5505, "step": 2019 }, { "epoch": 0.2094498700659067, "grad_norm": 1.671875, "learning_rate": 0.00019762600751518255, "loss": 4.5706, "step": 2020 }, { "epoch": 0.2095535581203948, "grad_norm": 1.46875, "learning_rate": 0.00019762365414868356, "loss": 4.5687, "step": 2021 }, { "epoch": 0.20965724617488288, "grad_norm": 1.78125, "learning_rate": 0.00019762129963032867, "loss": 4.5933, "step": 2022 }, { "epoch": 0.20976093422937095, "grad_norm": 1.4609375, "learning_rate": 0.0001976189439601456, "loss": 4.6114, "step": 2023 }, { "epoch": 0.20986462228385902, "grad_norm": 1.8203125, "learning_rate": 0.0001976165871381622, "loss": 4.5804, "step": 2024 }, { "epoch": 0.2099683103383471, "grad_norm": 1.5625, "learning_rate": 0.0001976142291644063, "loss": 4.6148, "step": 2025 }, { "epoch": 0.21007199839283516, "grad_norm": 1.8828125, "learning_rate": 0.00019761187003890563, "loss": 4.5602, "step": 2026 }, { "epoch": 0.21017568644732323, "grad_norm": 1.6796875, "learning_rate": 0.0001976095097616881, "loss": 4.6301, "step": 2027 }, { "epoch": 0.2102793745018113, "grad_norm": 1.5703125, "learning_rate": 0.00019760714833278148, "loss": 4.5808, "step": 2028 }, { "epoch": 0.21038306255629938, "grad_norm": 1.4453125, "learning_rate": 0.00019760478575221372, "loss": 4.5701, "step": 2029 }, { "epoch": 0.21048675061078745, "grad_norm": 1.40625, "learning_rate": 0.00019760242202001267, "loss": 4.5741, "step": 2030 }, { "epoch": 0.21059043866527552, "grad_norm": 1.1875, "learning_rate": 0.00019760005713620623, "loss": 4.5976, "step": 2031 }, { "epoch": 0.2106941267197636, "grad_norm": 1.3359375, "learning_rate": 0.00019759769110082223, "loss": 4.6137, "step": 2032 }, { "epoch": 0.21079781477425166, "grad_norm": 1.0859375, "learning_rate": 0.00019759532391388867, "loss": 4.5868, "step": 2033 }, { "epoch": 0.21090150282873973, "grad_norm": 1.8125, "learning_rate": 0.00019759295557543344, "loss": 4.5967, "step": 2034 }, { "epoch": 0.2110051908832278, "grad_norm": 1.421875, "learning_rate": 0.0001975905860854845, "loss": 4.5903, "step": 2035 }, { "epoch": 0.21110887893771588, "grad_norm": 2.015625, "learning_rate": 0.0001975882154440698, "loss": 4.6173, "step": 2036 }, { "epoch": 0.21121256699220395, "grad_norm": 1.8984375, "learning_rate": 0.00019758584365121734, "loss": 4.6118, "step": 2037 }, { "epoch": 0.21131625504669202, "grad_norm": 1.4609375, "learning_rate": 0.00019758347070695503, "loss": 4.5663, "step": 2038 }, { "epoch": 0.2114199431011801, "grad_norm": 1.4375, "learning_rate": 0.00019758109661131092, "loss": 4.5646, "step": 2039 }, { "epoch": 0.21152363115566816, "grad_norm": 1.296875, "learning_rate": 0.00019757872136431305, "loss": 4.6037, "step": 2040 }, { "epoch": 0.21162731921015623, "grad_norm": 1.1640625, "learning_rate": 0.0001975763449659894, "loss": 4.5981, "step": 2041 }, { "epoch": 0.2117310072646443, "grad_norm": 1.09375, "learning_rate": 0.00019757396741636803, "loss": 4.6128, "step": 2042 }, { "epoch": 0.21183469531913238, "grad_norm": 1.1875, "learning_rate": 0.00019757158871547699, "loss": 4.523, "step": 2043 }, { "epoch": 0.21193838337362048, "grad_norm": 0.94921875, "learning_rate": 0.00019756920886334432, "loss": 4.5688, "step": 2044 }, { "epoch": 0.21204207142810855, "grad_norm": 1.3671875, "learning_rate": 0.00019756682785999812, "loss": 4.5693, "step": 2045 }, { "epoch": 0.21214575948259662, "grad_norm": 0.96875, "learning_rate": 0.0001975644457054665, "loss": 4.5807, "step": 2046 }, { "epoch": 0.2122494475370847, "grad_norm": 1.7578125, "learning_rate": 0.00019756206239977751, "loss": 4.5522, "step": 2047 }, { "epoch": 0.21235313559157276, "grad_norm": 1.453125, "learning_rate": 0.00019755967794295938, "loss": 4.5546, "step": 2048 }, { "epoch": 0.21245682364606083, "grad_norm": 1.65625, "learning_rate": 0.00019755729233504013, "loss": 4.573, "step": 2049 }, { "epoch": 0.2125605117005489, "grad_norm": 1.390625, "learning_rate": 0.00019755490557604795, "loss": 4.5695, "step": 2050 }, { "epoch": 0.21266419975503698, "grad_norm": 1.859375, "learning_rate": 0.000197552517666011, "loss": 4.5793, "step": 2051 }, { "epoch": 0.21276788780952505, "grad_norm": 1.4375, "learning_rate": 0.00019755012860495747, "loss": 4.601, "step": 2052 }, { "epoch": 0.21287157586401312, "grad_norm": 2.109375, "learning_rate": 0.00019754773839291556, "loss": 4.6343, "step": 2053 }, { "epoch": 0.2129752639185012, "grad_norm": 1.96875, "learning_rate": 0.00019754534702991344, "loss": 4.6069, "step": 2054 }, { "epoch": 0.21307895197298926, "grad_norm": 1.25, "learning_rate": 0.00019754295451597932, "loss": 4.5174, "step": 2055 }, { "epoch": 0.21318264002747733, "grad_norm": 1.296875, "learning_rate": 0.00019754056085114144, "loss": 4.5313, "step": 2056 }, { "epoch": 0.2132863280819654, "grad_norm": 1.15625, "learning_rate": 0.0001975381660354281, "loss": 4.5743, "step": 2057 }, { "epoch": 0.21339001613645348, "grad_norm": 1.140625, "learning_rate": 0.00019753577006886744, "loss": 4.5637, "step": 2058 }, { "epoch": 0.21349370419094155, "grad_norm": 1.1171875, "learning_rate": 0.0001975333729514878, "loss": 4.5662, "step": 2059 }, { "epoch": 0.21359739224542962, "grad_norm": 0.984375, "learning_rate": 0.00019753097468331747, "loss": 4.5688, "step": 2060 }, { "epoch": 0.2137010802999177, "grad_norm": 1.15625, "learning_rate": 0.00019752857526438472, "loss": 4.5697, "step": 2061 }, { "epoch": 0.21380476835440576, "grad_norm": 0.83203125, "learning_rate": 0.0001975261746947179, "loss": 4.6017, "step": 2062 }, { "epoch": 0.21390845640889383, "grad_norm": 1.203125, "learning_rate": 0.0001975237729743453, "loss": 4.5543, "step": 2063 }, { "epoch": 0.2140121444633819, "grad_norm": 0.859375, "learning_rate": 0.00019752137010329527, "loss": 4.5657, "step": 2064 }, { "epoch": 0.21411583251786998, "grad_norm": 1.4609375, "learning_rate": 0.00019751896608159614, "loss": 4.5717, "step": 2065 }, { "epoch": 0.21421952057235807, "grad_norm": 1.1015625, "learning_rate": 0.0001975165609092763, "loss": 4.5827, "step": 2066 }, { "epoch": 0.21432320862684615, "grad_norm": 1.90625, "learning_rate": 0.00019751415458636414, "loss": 4.5571, "step": 2067 }, { "epoch": 0.21442689668133422, "grad_norm": 1.90625, "learning_rate": 0.000197511747112888, "loss": 4.6079, "step": 2068 }, { "epoch": 0.2145305847358223, "grad_norm": 0.91796875, "learning_rate": 0.00019750933848887634, "loss": 4.5673, "step": 2069 }, { "epoch": 0.21463427279031036, "grad_norm": 1.5390625, "learning_rate": 0.00019750692871435755, "loss": 4.5605, "step": 2070 }, { "epoch": 0.21473796084479843, "grad_norm": 1.1015625, "learning_rate": 0.00019750451778936007, "loss": 4.5711, "step": 2071 }, { "epoch": 0.2148416488992865, "grad_norm": 1.578125, "learning_rate": 0.00019750210571391232, "loss": 4.563, "step": 2072 }, { "epoch": 0.21494533695377457, "grad_norm": 1.5078125, "learning_rate": 0.00019749969248804283, "loss": 4.5555, "step": 2073 }, { "epoch": 0.21504902500826265, "grad_norm": 1.015625, "learning_rate": 0.00019749727811178, "loss": 4.589, "step": 2074 }, { "epoch": 0.21515271306275072, "grad_norm": 1.421875, "learning_rate": 0.00019749486258515238, "loss": 4.5742, "step": 2075 }, { "epoch": 0.2152564011172388, "grad_norm": 1.0859375, "learning_rate": 0.00019749244590818842, "loss": 4.5774, "step": 2076 }, { "epoch": 0.21536008917172686, "grad_norm": 1.7421875, "learning_rate": 0.00019749002808091667, "loss": 4.5611, "step": 2077 }, { "epoch": 0.21546377722621493, "grad_norm": 1.5234375, "learning_rate": 0.00019748760910336558, "loss": 4.551, "step": 2078 }, { "epoch": 0.215567465280703, "grad_norm": 1.53125, "learning_rate": 0.00019748518897556383, "loss": 4.6089, "step": 2079 }, { "epoch": 0.21567115333519107, "grad_norm": 1.1796875, "learning_rate": 0.00019748276769753983, "loss": 4.5747, "step": 2080 }, { "epoch": 0.21577484138967915, "grad_norm": 1.515625, "learning_rate": 0.00019748034526932226, "loss": 4.5606, "step": 2081 }, { "epoch": 0.21587852944416722, "grad_norm": 0.9375, "learning_rate": 0.00019747792169093963, "loss": 4.5935, "step": 2082 }, { "epoch": 0.2159822174986553, "grad_norm": 1.8125, "learning_rate": 0.00019747549696242059, "loss": 4.6101, "step": 2083 }, { "epoch": 0.21608590555314336, "grad_norm": 1.59375, "learning_rate": 0.0001974730710837937, "loss": 4.5796, "step": 2084 }, { "epoch": 0.21618959360763143, "grad_norm": 1.7890625, "learning_rate": 0.00019747064405508763, "loss": 4.5739, "step": 2085 }, { "epoch": 0.2162932816621195, "grad_norm": 1.5390625, "learning_rate": 0.00019746821587633099, "loss": 4.5887, "step": 2086 }, { "epoch": 0.21639696971660757, "grad_norm": 1.8359375, "learning_rate": 0.0001974657865475524, "loss": 4.544, "step": 2087 }, { "epoch": 0.21650065777109567, "grad_norm": 1.6875, "learning_rate": 0.00019746335606878054, "loss": 4.5873, "step": 2088 }, { "epoch": 0.21660434582558374, "grad_norm": 1.625, "learning_rate": 0.00019746092444004412, "loss": 4.6125, "step": 2089 }, { "epoch": 0.21670803388007182, "grad_norm": 1.453125, "learning_rate": 0.00019745849166137185, "loss": 4.5585, "step": 2090 }, { "epoch": 0.2168117219345599, "grad_norm": 1.453125, "learning_rate": 0.00019745605773279236, "loss": 4.5969, "step": 2091 }, { "epoch": 0.21691540998904796, "grad_norm": 1.203125, "learning_rate": 0.00019745362265433442, "loss": 4.5379, "step": 2092 }, { "epoch": 0.21701909804353603, "grad_norm": 1.7578125, "learning_rate": 0.00019745118642602673, "loss": 4.5126, "step": 2093 }, { "epoch": 0.2171227860980241, "grad_norm": 1.484375, "learning_rate": 0.00019744874904789806, "loss": 4.5135, "step": 2094 }, { "epoch": 0.21722647415251217, "grad_norm": 1.8359375, "learning_rate": 0.00019744631051997718, "loss": 4.5868, "step": 2095 }, { "epoch": 0.21733016220700024, "grad_norm": 1.640625, "learning_rate": 0.0001974438708422928, "loss": 4.5675, "step": 2096 }, { "epoch": 0.21743385026148832, "grad_norm": 1.453125, "learning_rate": 0.00019744143001487378, "loss": 4.5875, "step": 2097 }, { "epoch": 0.2175375383159764, "grad_norm": 1.34375, "learning_rate": 0.00019743898803774884, "loss": 4.6141, "step": 2098 }, { "epoch": 0.21764122637046446, "grad_norm": 1.4140625, "learning_rate": 0.0001974365449109469, "loss": 4.5761, "step": 2099 }, { "epoch": 0.21774491442495253, "grad_norm": 1.2734375, "learning_rate": 0.0001974341006344967, "loss": 4.5581, "step": 2100 }, { "epoch": 0.2178486024794406, "grad_norm": 1.1484375, "learning_rate": 0.0001974316552084271, "loss": 4.5847, "step": 2101 }, { "epoch": 0.21795229053392867, "grad_norm": 1.7109375, "learning_rate": 0.00019742920863276693, "loss": 4.5691, "step": 2102 }, { "epoch": 0.21805597858841674, "grad_norm": 1.234375, "learning_rate": 0.00019742676090754512, "loss": 4.5477, "step": 2103 }, { "epoch": 0.21815966664290481, "grad_norm": 2.421875, "learning_rate": 0.0001974243120327905, "loss": 4.5926, "step": 2104 }, { "epoch": 0.2182633546973929, "grad_norm": 2.1875, "learning_rate": 0.000197421862008532, "loss": 4.588, "step": 2105 }, { "epoch": 0.21836704275188096, "grad_norm": 1.5703125, "learning_rate": 0.0001974194108347985, "loss": 4.5484, "step": 2106 }, { "epoch": 0.21847073080636903, "grad_norm": 1.515625, "learning_rate": 0.00019741695851161893, "loss": 4.6028, "step": 2107 }, { "epoch": 0.2185744188608571, "grad_norm": 1.6171875, "learning_rate": 0.00019741450503902222, "loss": 4.6113, "step": 2108 }, { "epoch": 0.21867810691534517, "grad_norm": 1.1171875, "learning_rate": 0.00019741205041703733, "loss": 4.5785, "step": 2109 }, { "epoch": 0.21878179496983324, "grad_norm": 2.265625, "learning_rate": 0.0001974095946456932, "loss": 4.5964, "step": 2110 }, { "epoch": 0.21888548302432134, "grad_norm": 2.09375, "learning_rate": 0.0001974071377250188, "loss": 4.5467, "step": 2111 }, { "epoch": 0.2189891710788094, "grad_norm": 1.34375, "learning_rate": 0.0001974046796550432, "loss": 4.5953, "step": 2112 }, { "epoch": 0.21909285913329749, "grad_norm": 1.3359375, "learning_rate": 0.00019740222043579527, "loss": 4.6259, "step": 2113 }, { "epoch": 0.21919654718778556, "grad_norm": 1.421875, "learning_rate": 0.00019739976006730414, "loss": 4.5235, "step": 2114 }, { "epoch": 0.21930023524227363, "grad_norm": 1.140625, "learning_rate": 0.0001973972985495988, "loss": 4.5858, "step": 2115 }, { "epoch": 0.2194039232967617, "grad_norm": 1.84375, "learning_rate": 0.00019739483588270828, "loss": 4.5866, "step": 2116 }, { "epoch": 0.21950761135124977, "grad_norm": 1.484375, "learning_rate": 0.00019739237206666164, "loss": 4.6022, "step": 2117 }, { "epoch": 0.21961129940573784, "grad_norm": 1.8828125, "learning_rate": 0.00019738990710148796, "loss": 4.6095, "step": 2118 }, { "epoch": 0.2197149874602259, "grad_norm": 1.7265625, "learning_rate": 0.00019738744098721632, "loss": 4.5528, "step": 2119 }, { "epoch": 0.21981867551471398, "grad_norm": 1.6796875, "learning_rate": 0.00019738497372387586, "loss": 4.5543, "step": 2120 }, { "epoch": 0.21992236356920206, "grad_norm": 1.4453125, "learning_rate": 0.0001973825053114956, "loss": 4.5641, "step": 2121 }, { "epoch": 0.22002605162369013, "grad_norm": 1.75, "learning_rate": 0.00019738003575010474, "loss": 4.5842, "step": 2122 }, { "epoch": 0.2201297396781782, "grad_norm": 1.3203125, "learning_rate": 0.0001973775650397324, "loss": 4.5878, "step": 2123 }, { "epoch": 0.22023342773266627, "grad_norm": 2.21875, "learning_rate": 0.0001973750931804077, "loss": 4.5831, "step": 2124 }, { "epoch": 0.22033711578715434, "grad_norm": 1.984375, "learning_rate": 0.00019737262017215982, "loss": 4.5747, "step": 2125 }, { "epoch": 0.2204408038416424, "grad_norm": 1.546875, "learning_rate": 0.000197370146015018, "loss": 4.5351, "step": 2126 }, { "epoch": 0.22054449189613048, "grad_norm": 1.5390625, "learning_rate": 0.00019736767070901133, "loss": 4.5474, "step": 2127 }, { "epoch": 0.22064817995061856, "grad_norm": 1.34375, "learning_rate": 0.00019736519425416908, "loss": 4.6022, "step": 2128 }, { "epoch": 0.22075186800510663, "grad_norm": 1.21875, "learning_rate": 0.00019736271665052047, "loss": 4.5988, "step": 2129 }, { "epoch": 0.2208555560595947, "grad_norm": 1.5390625, "learning_rate": 0.00019736023789809472, "loss": 4.5532, "step": 2130 }, { "epoch": 0.22095924411408277, "grad_norm": 1.296875, "learning_rate": 0.0001973577579969211, "loss": 4.5863, "step": 2131 }, { "epoch": 0.22106293216857084, "grad_norm": 2.0, "learning_rate": 0.0001973552769470288, "loss": 4.5876, "step": 2132 }, { "epoch": 0.22116662022305894, "grad_norm": 1.8125, "learning_rate": 0.00019735279474844718, "loss": 4.6137, "step": 2133 }, { "epoch": 0.221270308277547, "grad_norm": 1.4375, "learning_rate": 0.00019735031140120547, "loss": 4.5873, "step": 2134 }, { "epoch": 0.22137399633203508, "grad_norm": 1.3515625, "learning_rate": 0.00019734782690533298, "loss": 4.605, "step": 2135 }, { "epoch": 0.22147768438652315, "grad_norm": 1.4921875, "learning_rate": 0.00019734534126085904, "loss": 4.603, "step": 2136 }, { "epoch": 0.22158137244101123, "grad_norm": 1.34375, "learning_rate": 0.00019734285446781297, "loss": 4.5563, "step": 2137 }, { "epoch": 0.2216850604954993, "grad_norm": 1.5078125, "learning_rate": 0.00019734036652622412, "loss": 4.5683, "step": 2138 }, { "epoch": 0.22178874854998737, "grad_norm": 1.421875, "learning_rate": 0.00019733787743612185, "loss": 4.5973, "step": 2139 }, { "epoch": 0.22189243660447544, "grad_norm": 1.40625, "learning_rate": 0.00019733538719753552, "loss": 4.6036, "step": 2140 }, { "epoch": 0.2219961246589635, "grad_norm": 1.2109375, "learning_rate": 0.00019733289581049448, "loss": 4.5687, "step": 2141 }, { "epoch": 0.22209981271345158, "grad_norm": 1.421875, "learning_rate": 0.00019733040327502815, "loss": 4.6214, "step": 2142 }, { "epoch": 0.22220350076793965, "grad_norm": 1.203125, "learning_rate": 0.000197327909591166, "loss": 4.5759, "step": 2143 }, { "epoch": 0.22230718882242773, "grad_norm": 1.5078125, "learning_rate": 0.00019732541475893733, "loss": 4.5674, "step": 2144 }, { "epoch": 0.2224108768769158, "grad_norm": 1.3125, "learning_rate": 0.00019732291877837165, "loss": 4.5504, "step": 2145 }, { "epoch": 0.22251456493140387, "grad_norm": 1.4296875, "learning_rate": 0.00019732042164949845, "loss": 4.5623, "step": 2146 }, { "epoch": 0.22261825298589194, "grad_norm": 1.1875, "learning_rate": 0.0001973179233723471, "loss": 4.5639, "step": 2147 }, { "epoch": 0.22272194104038, "grad_norm": 1.3671875, "learning_rate": 0.00019731542394694715, "loss": 4.5888, "step": 2148 }, { "epoch": 0.22282562909486808, "grad_norm": 1.171875, "learning_rate": 0.00019731292337332807, "loss": 4.5787, "step": 2149 }, { "epoch": 0.22292931714935615, "grad_norm": 1.46875, "learning_rate": 0.00019731042165151936, "loss": 4.5718, "step": 2150 }, { "epoch": 0.22303300520384423, "grad_norm": 1.21875, "learning_rate": 0.00019730791878155052, "loss": 4.5511, "step": 2151 }, { "epoch": 0.2231366932583323, "grad_norm": 1.484375, "learning_rate": 0.0001973054147634511, "loss": 4.588, "step": 2152 }, { "epoch": 0.22324038131282037, "grad_norm": 1.4140625, "learning_rate": 0.00019730290959725063, "loss": 4.5352, "step": 2153 }, { "epoch": 0.22334406936730844, "grad_norm": 1.484375, "learning_rate": 0.0001973004032829787, "loss": 4.5747, "step": 2154 }, { "epoch": 0.22344775742179654, "grad_norm": 1.3046875, "learning_rate": 0.00019729789582066486, "loss": 4.5886, "step": 2155 }, { "epoch": 0.2235514454762846, "grad_norm": 1.46875, "learning_rate": 0.0001972953872103387, "loss": 4.5761, "step": 2156 }, { "epoch": 0.22365513353077268, "grad_norm": 1.25, "learning_rate": 0.0001972928774520298, "loss": 4.5831, "step": 2157 }, { "epoch": 0.22375882158526075, "grad_norm": 1.5078125, "learning_rate": 0.0001972903665457678, "loss": 4.5881, "step": 2158 }, { "epoch": 0.22386250963974882, "grad_norm": 1.2421875, "learning_rate": 0.00019728785449158232, "loss": 4.5763, "step": 2159 }, { "epoch": 0.2239661976942369, "grad_norm": 1.546875, "learning_rate": 0.00019728534128950299, "loss": 4.5638, "step": 2160 }, { "epoch": 0.22406988574872497, "grad_norm": 1.1171875, "learning_rate": 0.00019728282693955946, "loss": 4.5464, "step": 2161 }, { "epoch": 0.22417357380321304, "grad_norm": 1.78125, "learning_rate": 0.00019728031144178142, "loss": 4.589, "step": 2162 }, { "epoch": 0.2242772618577011, "grad_norm": 1.4296875, "learning_rate": 0.00019727779479619852, "loss": 4.5857, "step": 2163 }, { "epoch": 0.22438094991218918, "grad_norm": 1.984375, "learning_rate": 0.00019727527700284046, "loss": 4.5437, "step": 2164 }, { "epoch": 0.22448463796667725, "grad_norm": 1.7890625, "learning_rate": 0.00019727275806173696, "loss": 4.573, "step": 2165 }, { "epoch": 0.22458832602116532, "grad_norm": 1.421875, "learning_rate": 0.00019727023797291778, "loss": 4.5802, "step": 2166 }, { "epoch": 0.2246920140756534, "grad_norm": 1.359375, "learning_rate": 0.00019726771673641256, "loss": 4.5805, "step": 2167 }, { "epoch": 0.22479570213014147, "grad_norm": 1.65625, "learning_rate": 0.00019726519435225113, "loss": 4.593, "step": 2168 }, { "epoch": 0.22489939018462954, "grad_norm": 1.4296875, "learning_rate": 0.0001972626708204632, "loss": 4.5521, "step": 2169 }, { "epoch": 0.2250030782391176, "grad_norm": 1.546875, "learning_rate": 0.00019726014614107856, "loss": 4.5509, "step": 2170 }, { "epoch": 0.22510676629360568, "grad_norm": 1.484375, "learning_rate": 0.00019725762031412702, "loss": 4.5501, "step": 2171 }, { "epoch": 0.22521045434809375, "grad_norm": 1.515625, "learning_rate": 0.0001972550933396384, "loss": 4.6034, "step": 2172 }, { "epoch": 0.22531414240258182, "grad_norm": 1.328125, "learning_rate": 0.0001972525652176424, "loss": 4.5989, "step": 2173 }, { "epoch": 0.2254178304570699, "grad_norm": 1.4921875, "learning_rate": 0.000197250035948169, "loss": 4.6182, "step": 2174 }, { "epoch": 0.22552151851155797, "grad_norm": 1.296875, "learning_rate": 0.00019724750553124794, "loss": 4.54, "step": 2175 }, { "epoch": 0.22562520656604604, "grad_norm": 1.5625, "learning_rate": 0.00019724497396690911, "loss": 4.6102, "step": 2176 }, { "epoch": 0.2257288946205341, "grad_norm": 1.3984375, "learning_rate": 0.0001972424412551824, "loss": 4.6087, "step": 2177 }, { "epoch": 0.2258325826750222, "grad_norm": 1.484375, "learning_rate": 0.00019723990739609765, "loss": 4.5766, "step": 2178 }, { "epoch": 0.22593627072951028, "grad_norm": 1.3046875, "learning_rate": 0.0001972373723896848, "loss": 4.5922, "step": 2179 }, { "epoch": 0.22603995878399835, "grad_norm": 1.5625, "learning_rate": 0.0001972348362359737, "loss": 4.6109, "step": 2180 }, { "epoch": 0.22614364683848642, "grad_norm": 1.296875, "learning_rate": 0.00019723229893499436, "loss": 4.5587, "step": 2181 }, { "epoch": 0.2262473348929745, "grad_norm": 1.6796875, "learning_rate": 0.00019722976048677668, "loss": 4.6016, "step": 2182 }, { "epoch": 0.22635102294746257, "grad_norm": 1.46875, "learning_rate": 0.00019722722089135058, "loss": 4.6004, "step": 2183 }, { "epoch": 0.22645471100195064, "grad_norm": 1.40625, "learning_rate": 0.00019722468014874602, "loss": 4.5439, "step": 2184 }, { "epoch": 0.2265583990564387, "grad_norm": 1.328125, "learning_rate": 0.00019722213825899306, "loss": 4.538, "step": 2185 }, { "epoch": 0.22666208711092678, "grad_norm": 1.421875, "learning_rate": 0.0001972195952221216, "loss": 4.5763, "step": 2186 }, { "epoch": 0.22676577516541485, "grad_norm": 1.2421875, "learning_rate": 0.00019721705103816167, "loss": 4.5971, "step": 2187 }, { "epoch": 0.22686946321990292, "grad_norm": 1.3671875, "learning_rate": 0.00019721450570714332, "loss": 4.5941, "step": 2188 }, { "epoch": 0.226973151274391, "grad_norm": 1.2578125, "learning_rate": 0.00019721195922909658, "loss": 4.5869, "step": 2189 }, { "epoch": 0.22707683932887907, "grad_norm": 1.515625, "learning_rate": 0.00019720941160405146, "loss": 4.5875, "step": 2190 }, { "epoch": 0.22718052738336714, "grad_norm": 1.2421875, "learning_rate": 0.00019720686283203803, "loss": 4.5523, "step": 2191 }, { "epoch": 0.2272842154378552, "grad_norm": 1.5078125, "learning_rate": 0.0001972043129130864, "loss": 4.5619, "step": 2192 }, { "epoch": 0.22738790349234328, "grad_norm": 1.2890625, "learning_rate": 0.00019720176184722662, "loss": 4.55, "step": 2193 }, { "epoch": 0.22749159154683135, "grad_norm": 1.9375, "learning_rate": 0.00019719920963448876, "loss": 4.5597, "step": 2194 }, { "epoch": 0.22759527960131942, "grad_norm": 1.609375, "learning_rate": 0.000197196656274903, "loss": 4.5254, "step": 2195 }, { "epoch": 0.2276989676558075, "grad_norm": 1.6875, "learning_rate": 0.00019719410176849943, "loss": 4.5352, "step": 2196 }, { "epoch": 0.22780265571029557, "grad_norm": 1.484375, "learning_rate": 0.0001971915461153082, "loss": 4.5494, "step": 2197 }, { "epoch": 0.22790634376478364, "grad_norm": 1.421875, "learning_rate": 0.00019718898931535948, "loss": 4.5682, "step": 2198 }, { "epoch": 0.2280100318192717, "grad_norm": 1.2734375, "learning_rate": 0.0001971864313686834, "loss": 4.562, "step": 2199 }, { "epoch": 0.2281137198737598, "grad_norm": 1.5, "learning_rate": 0.00019718387227531014, "loss": 4.5403, "step": 2200 }, { "epoch": 0.22821740792824788, "grad_norm": 1.21875, "learning_rate": 0.00019718131203526996, "loss": 4.5693, "step": 2201 }, { "epoch": 0.22832109598273595, "grad_norm": 1.4140625, "learning_rate": 0.00019717875064859298, "loss": 4.5906, "step": 2202 }, { "epoch": 0.22842478403722402, "grad_norm": 1.203125, "learning_rate": 0.0001971761881153095, "loss": 4.535, "step": 2203 }, { "epoch": 0.2285284720917121, "grad_norm": 1.6171875, "learning_rate": 0.0001971736244354497, "loss": 4.5743, "step": 2204 }, { "epoch": 0.22863216014620016, "grad_norm": 1.2421875, "learning_rate": 0.00019717105960904386, "loss": 4.5418, "step": 2205 }, { "epoch": 0.22873584820068824, "grad_norm": 1.7421875, "learning_rate": 0.00019716849363612222, "loss": 4.581, "step": 2206 }, { "epoch": 0.2288395362551763, "grad_norm": 1.375, "learning_rate": 0.00019716592651671506, "loss": 4.5945, "step": 2207 }, { "epoch": 0.22894322430966438, "grad_norm": 1.90625, "learning_rate": 0.00019716335825085269, "loss": 4.5853, "step": 2208 }, { "epoch": 0.22904691236415245, "grad_norm": 1.8359375, "learning_rate": 0.0001971607888385654, "loss": 4.574, "step": 2209 }, { "epoch": 0.22915060041864052, "grad_norm": 1.4140625, "learning_rate": 0.0001971582182798835, "loss": 4.5358, "step": 2210 }, { "epoch": 0.2292542884731286, "grad_norm": 1.34375, "learning_rate": 0.0001971556465748373, "loss": 4.5675, "step": 2211 }, { "epoch": 0.22935797652761666, "grad_norm": 1.546875, "learning_rate": 0.0001971530737234572, "loss": 4.5541, "step": 2212 }, { "epoch": 0.22946166458210474, "grad_norm": 1.328125, "learning_rate": 0.00019715049972577353, "loss": 4.5301, "step": 2213 }, { "epoch": 0.2295653526365928, "grad_norm": 1.5703125, "learning_rate": 0.00019714792458181663, "loss": 4.5643, "step": 2214 }, { "epoch": 0.22966904069108088, "grad_norm": 1.4140625, "learning_rate": 0.00019714534829161693, "loss": 4.5358, "step": 2215 }, { "epoch": 0.22977272874556895, "grad_norm": 1.4609375, "learning_rate": 0.0001971427708552048, "loss": 4.5537, "step": 2216 }, { "epoch": 0.22987641680005702, "grad_norm": 1.3046875, "learning_rate": 0.00019714019227261067, "loss": 4.593, "step": 2217 }, { "epoch": 0.2299801048545451, "grad_norm": 1.59375, "learning_rate": 0.00019713761254386495, "loss": 4.5416, "step": 2218 }, { "epoch": 0.23008379290903316, "grad_norm": 1.375, "learning_rate": 0.00019713503166899807, "loss": 4.5677, "step": 2219 }, { "epoch": 0.23018748096352123, "grad_norm": 1.5390625, "learning_rate": 0.0001971324496480405, "loss": 4.5802, "step": 2220 }, { "epoch": 0.2302911690180093, "grad_norm": 1.4609375, "learning_rate": 0.0001971298664810227, "loss": 4.516, "step": 2221 }, { "epoch": 0.2303948570724974, "grad_norm": 1.3828125, "learning_rate": 0.00019712728216797514, "loss": 4.5601, "step": 2222 }, { "epoch": 0.23049854512698548, "grad_norm": 1.1796875, "learning_rate": 0.0001971246967089283, "loss": 4.5543, "step": 2223 }, { "epoch": 0.23060223318147355, "grad_norm": 1.515625, "learning_rate": 0.00019712211010391274, "loss": 4.5348, "step": 2224 }, { "epoch": 0.23070592123596162, "grad_norm": 1.2734375, "learning_rate": 0.0001971195223529589, "loss": 4.5637, "step": 2225 }, { "epoch": 0.2308096092904497, "grad_norm": 1.90625, "learning_rate": 0.00019711693345609739, "loss": 4.608, "step": 2226 }, { "epoch": 0.23091329734493776, "grad_norm": 1.671875, "learning_rate": 0.0001971143434133587, "loss": 4.5851, "step": 2227 }, { "epoch": 0.23101698539942583, "grad_norm": 1.7109375, "learning_rate": 0.00019711175222477344, "loss": 4.5839, "step": 2228 }, { "epoch": 0.2311206734539139, "grad_norm": 1.546875, "learning_rate": 0.00019710915989037213, "loss": 4.5727, "step": 2229 }, { "epoch": 0.23122436150840198, "grad_norm": 1.390625, "learning_rate": 0.0001971065664101854, "loss": 4.5646, "step": 2230 }, { "epoch": 0.23132804956289005, "grad_norm": 1.3828125, "learning_rate": 0.00019710397178424383, "loss": 4.5607, "step": 2231 }, { "epoch": 0.23143173761737812, "grad_norm": 1.1875, "learning_rate": 0.00019710137601257804, "loss": 4.5645, "step": 2232 }, { "epoch": 0.2315354256718662, "grad_norm": 1.1015625, "learning_rate": 0.00019709877909521864, "loss": 4.5555, "step": 2233 }, { "epoch": 0.23163911372635426, "grad_norm": 1.1171875, "learning_rate": 0.0001970961810321963, "loss": 4.5427, "step": 2234 }, { "epoch": 0.23174280178084233, "grad_norm": 0.9140625, "learning_rate": 0.00019709358182354162, "loss": 4.5674, "step": 2235 }, { "epoch": 0.2318464898353304, "grad_norm": 1.171875, "learning_rate": 0.00019709098146928535, "loss": 4.53, "step": 2236 }, { "epoch": 0.23195017788981848, "grad_norm": 0.8515625, "learning_rate": 0.0001970883799694581, "loss": 4.5445, "step": 2237 }, { "epoch": 0.23205386594430655, "grad_norm": 1.375, "learning_rate": 0.00019708577732409062, "loss": 4.5639, "step": 2238 }, { "epoch": 0.23215755399879462, "grad_norm": 1.0703125, "learning_rate": 0.00019708317353321357, "loss": 4.5738, "step": 2239 }, { "epoch": 0.2322612420532827, "grad_norm": 1.609375, "learning_rate": 0.0001970805685968577, "loss": 4.5423, "step": 2240 }, { "epoch": 0.23236493010777076, "grad_norm": 1.5546875, "learning_rate": 0.00019707796251505375, "loss": 4.5786, "step": 2241 }, { "epoch": 0.23246861816225883, "grad_norm": 1.0078125, "learning_rate": 0.00019707535528783244, "loss": 4.562, "step": 2242 }, { "epoch": 0.2325723062167469, "grad_norm": 1.09375, "learning_rate": 0.00019707274691522456, "loss": 4.547, "step": 2243 }, { "epoch": 0.23267599427123498, "grad_norm": 0.99609375, "learning_rate": 0.0001970701373972609, "loss": 4.5492, "step": 2244 }, { "epoch": 0.23277968232572308, "grad_norm": 0.9296875, "learning_rate": 0.00019706752673397218, "loss": 4.5297, "step": 2245 }, { "epoch": 0.23288337038021115, "grad_norm": 0.90625, "learning_rate": 0.00019706491492538927, "loss": 4.5933, "step": 2246 }, { "epoch": 0.23298705843469922, "grad_norm": 0.8984375, "learning_rate": 0.00019706230197154298, "loss": 4.5682, "step": 2247 }, { "epoch": 0.2330907464891873, "grad_norm": 0.83984375, "learning_rate": 0.00019705968787246412, "loss": 4.5609, "step": 2248 }, { "epoch": 0.23319443454367536, "grad_norm": 0.828125, "learning_rate": 0.00019705707262818354, "loss": 4.5367, "step": 2249 }, { "epoch": 0.23329812259816343, "grad_norm": 0.80078125, "learning_rate": 0.0001970544562387321, "loss": 4.5668, "step": 2250 }, { "epoch": 0.2334018106526515, "grad_norm": 0.78515625, "learning_rate": 0.00019705183870414062, "loss": 4.5505, "step": 2251 }, { "epoch": 0.23350549870713957, "grad_norm": 0.73828125, "learning_rate": 0.00019704922002444008, "loss": 4.5626, "step": 2252 }, { "epoch": 0.23360918676162765, "grad_norm": 0.71875, "learning_rate": 0.00019704660019966133, "loss": 4.5656, "step": 2253 }, { "epoch": 0.23371287481611572, "grad_norm": 0.6953125, "learning_rate": 0.00019704397922983526, "loss": 4.5666, "step": 2254 }, { "epoch": 0.2338165628706038, "grad_norm": 0.6328125, "learning_rate": 0.00019704135711499286, "loss": 4.5267, "step": 2255 }, { "epoch": 0.23392025092509186, "grad_norm": 0.6328125, "learning_rate": 0.00019703873385516497, "loss": 4.5764, "step": 2256 }, { "epoch": 0.23402393897957993, "grad_norm": 0.6328125, "learning_rate": 0.0001970361094503826, "loss": 4.5261, "step": 2257 }, { "epoch": 0.234127627034068, "grad_norm": 0.62109375, "learning_rate": 0.00019703348390067674, "loss": 4.5832, "step": 2258 }, { "epoch": 0.23423131508855607, "grad_norm": 0.6171875, "learning_rate": 0.0001970308572060783, "loss": 4.5381, "step": 2259 }, { "epoch": 0.23433500314304415, "grad_norm": 0.62890625, "learning_rate": 0.00019702822936661836, "loss": 4.5449, "step": 2260 }, { "epoch": 0.23443869119753222, "grad_norm": 0.609375, "learning_rate": 0.00019702560038232782, "loss": 4.5032, "step": 2261 }, { "epoch": 0.2345423792520203, "grad_norm": 0.609375, "learning_rate": 0.0001970229702532378, "loss": 4.5321, "step": 2262 }, { "epoch": 0.23464606730650836, "grad_norm": 0.60546875, "learning_rate": 0.00019702033897937927, "loss": 4.5649, "step": 2263 }, { "epoch": 0.23474975536099643, "grad_norm": 0.5390625, "learning_rate": 0.0001970177065607833, "loss": 4.6028, "step": 2264 }, { "epoch": 0.2348534434154845, "grad_norm": 0.55859375, "learning_rate": 0.00019701507299748095, "loss": 4.539, "step": 2265 }, { "epoch": 0.23495713146997257, "grad_norm": 0.5390625, "learning_rate": 0.00019701243828950329, "loss": 4.5593, "step": 2266 }, { "epoch": 0.23506081952446067, "grad_norm": 0.5703125, "learning_rate": 0.0001970098024368814, "loss": 4.5882, "step": 2267 }, { "epoch": 0.23516450757894874, "grad_norm": 0.55859375, "learning_rate": 0.00019700716543964638, "loss": 4.5798, "step": 2268 }, { "epoch": 0.23526819563343682, "grad_norm": 0.56640625, "learning_rate": 0.00019700452729782934, "loss": 4.5523, "step": 2269 }, { "epoch": 0.2353718836879249, "grad_norm": 0.59375, "learning_rate": 0.0001970018880114614, "loss": 4.5734, "step": 2270 }, { "epoch": 0.23547557174241296, "grad_norm": 0.5546875, "learning_rate": 0.00019699924758057377, "loss": 4.5664, "step": 2271 }, { "epoch": 0.23557925979690103, "grad_norm": 0.5703125, "learning_rate": 0.00019699660600519753, "loss": 4.557, "step": 2272 }, { "epoch": 0.2356829478513891, "grad_norm": 0.58203125, "learning_rate": 0.00019699396328536384, "loss": 4.5388, "step": 2273 }, { "epoch": 0.23578663590587717, "grad_norm": 0.59765625, "learning_rate": 0.00019699131942110397, "loss": 4.5299, "step": 2274 }, { "epoch": 0.23589032396036524, "grad_norm": 0.6015625, "learning_rate": 0.000196988674412449, "loss": 4.5497, "step": 2275 }, { "epoch": 0.23599401201485332, "grad_norm": 0.58203125, "learning_rate": 0.0001969860282594302, "loss": 4.5566, "step": 2276 }, { "epoch": 0.2360977000693414, "grad_norm": 0.578125, "learning_rate": 0.00019698338096207883, "loss": 4.5407, "step": 2277 }, { "epoch": 0.23620138812382946, "grad_norm": 0.5625, "learning_rate": 0.00019698073252042605, "loss": 4.5606, "step": 2278 }, { "epoch": 0.23630507617831753, "grad_norm": 0.58984375, "learning_rate": 0.00019697808293450312, "loss": 4.5585, "step": 2279 }, { "epoch": 0.2364087642328056, "grad_norm": 0.482421875, "learning_rate": 0.00019697543220434133, "loss": 4.5512, "step": 2280 }, { "epoch": 0.23651245228729367, "grad_norm": 0.58203125, "learning_rate": 0.00019697278032997198, "loss": 4.5865, "step": 2281 }, { "epoch": 0.23661614034178174, "grad_norm": 0.51171875, "learning_rate": 0.0001969701273114263, "loss": 4.5634, "step": 2282 }, { "epoch": 0.23671982839626982, "grad_norm": 0.5703125, "learning_rate": 0.0001969674731487356, "loss": 4.5198, "step": 2283 }, { "epoch": 0.2368235164507579, "grad_norm": 0.53515625, "learning_rate": 0.00019696481784193127, "loss": 4.4959, "step": 2284 }, { "epoch": 0.23692720450524596, "grad_norm": 0.5234375, "learning_rate": 0.00019696216139104453, "loss": 4.5441, "step": 2285 }, { "epoch": 0.23703089255973403, "grad_norm": 0.52734375, "learning_rate": 0.00019695950379610682, "loss": 4.5769, "step": 2286 }, { "epoch": 0.2371345806142221, "grad_norm": 0.5, "learning_rate": 0.00019695684505714942, "loss": 4.5464, "step": 2287 }, { "epoch": 0.23723826866871017, "grad_norm": 0.5546875, "learning_rate": 0.00019695418517420377, "loss": 4.545, "step": 2288 }, { "epoch": 0.23734195672319827, "grad_norm": 0.5078125, "learning_rate": 0.0001969515241473012, "loss": 4.5357, "step": 2289 }, { "epoch": 0.23744564477768634, "grad_norm": 0.5, "learning_rate": 0.00019694886197647312, "loss": 4.5784, "step": 2290 }, { "epoch": 0.23754933283217441, "grad_norm": 0.482421875, "learning_rate": 0.00019694619866175098, "loss": 4.5894, "step": 2291 }, { "epoch": 0.23765302088666249, "grad_norm": 0.52734375, "learning_rate": 0.00019694353420316615, "loss": 4.5586, "step": 2292 }, { "epoch": 0.23775670894115056, "grad_norm": 0.53515625, "learning_rate": 0.0001969408686007501, "loss": 4.5119, "step": 2293 }, { "epoch": 0.23786039699563863, "grad_norm": 0.53125, "learning_rate": 0.00019693820185453427, "loss": 4.5694, "step": 2294 }, { "epoch": 0.2379640850501267, "grad_norm": 0.5078125, "learning_rate": 0.00019693553396455012, "loss": 4.5731, "step": 2295 }, { "epoch": 0.23806777310461477, "grad_norm": 0.546875, "learning_rate": 0.0001969328649308291, "loss": 4.5255, "step": 2296 }, { "epoch": 0.23817146115910284, "grad_norm": 0.5859375, "learning_rate": 0.0001969301947534028, "loss": 4.5547, "step": 2297 }, { "epoch": 0.23827514921359091, "grad_norm": 0.53515625, "learning_rate": 0.00019692752343230264, "loss": 4.5426, "step": 2298 }, { "epoch": 0.23837883726807899, "grad_norm": 0.5859375, "learning_rate": 0.00019692485096756016, "loss": 4.551, "step": 2299 }, { "epoch": 0.23848252532256706, "grad_norm": 0.59765625, "learning_rate": 0.0001969221773592069, "loss": 4.5627, "step": 2300 }, { "epoch": 0.23858621337705513, "grad_norm": 0.609375, "learning_rate": 0.00019691950260727437, "loss": 4.5379, "step": 2301 }, { "epoch": 0.2386899014315432, "grad_norm": 0.56640625, "learning_rate": 0.00019691682671179415, "loss": 4.5418, "step": 2302 }, { "epoch": 0.23879358948603127, "grad_norm": 0.5859375, "learning_rate": 0.00019691414967279786, "loss": 4.5495, "step": 2303 }, { "epoch": 0.23889727754051934, "grad_norm": 0.578125, "learning_rate": 0.00019691147149031703, "loss": 4.557, "step": 2304 }, { "epoch": 0.23900096559500741, "grad_norm": 0.61328125, "learning_rate": 0.00019690879216438325, "loss": 4.5889, "step": 2305 }, { "epoch": 0.23910465364949549, "grad_norm": 0.5859375, "learning_rate": 0.0001969061116950282, "loss": 4.539, "step": 2306 }, { "epoch": 0.23920834170398356, "grad_norm": 0.57421875, "learning_rate": 0.00019690343008228343, "loss": 4.5705, "step": 2307 }, { "epoch": 0.23931202975847163, "grad_norm": 0.63671875, "learning_rate": 0.00019690074732618066, "loss": 4.5375, "step": 2308 }, { "epoch": 0.2394157178129597, "grad_norm": 0.5703125, "learning_rate": 0.00019689806342675147, "loss": 4.5483, "step": 2309 }, { "epoch": 0.23951940586744777, "grad_norm": 0.625, "learning_rate": 0.00019689537838402758, "loss": 4.5793, "step": 2310 }, { "epoch": 0.23962309392193584, "grad_norm": 0.58203125, "learning_rate": 0.0001968926921980406, "loss": 4.5669, "step": 2311 }, { "epoch": 0.23972678197642394, "grad_norm": 0.5625, "learning_rate": 0.00019689000486882235, "loss": 4.5548, "step": 2312 }, { "epoch": 0.239830470030912, "grad_norm": 0.5703125, "learning_rate": 0.00019688731639640438, "loss": 4.5066, "step": 2313 }, { "epoch": 0.23993415808540008, "grad_norm": 0.59765625, "learning_rate": 0.00019688462678081852, "loss": 4.5549, "step": 2314 }, { "epoch": 0.24003784613988816, "grad_norm": 0.59765625, "learning_rate": 0.00019688193602209652, "loss": 4.5162, "step": 2315 }, { "epoch": 0.24014153419437623, "grad_norm": 0.54296875, "learning_rate": 0.00019687924412027004, "loss": 4.5942, "step": 2316 }, { "epoch": 0.2402452222488643, "grad_norm": 0.59765625, "learning_rate": 0.00019687655107537087, "loss": 4.5106, "step": 2317 }, { "epoch": 0.24034891030335237, "grad_norm": 0.5390625, "learning_rate": 0.00019687385688743083, "loss": 4.5898, "step": 2318 }, { "epoch": 0.24045259835784044, "grad_norm": 0.50390625, "learning_rate": 0.00019687116155648167, "loss": 4.5338, "step": 2319 }, { "epoch": 0.2405562864123285, "grad_norm": 0.53125, "learning_rate": 0.00019686846508255518, "loss": 4.5561, "step": 2320 }, { "epoch": 0.24065997446681658, "grad_norm": 0.5234375, "learning_rate": 0.00019686576746568321, "loss": 4.5322, "step": 2321 }, { "epoch": 0.24076366252130466, "grad_norm": 0.59375, "learning_rate": 0.0001968630687058976, "loss": 4.556, "step": 2322 }, { "epoch": 0.24086735057579273, "grad_norm": 0.486328125, "learning_rate": 0.00019686036880323012, "loss": 4.5641, "step": 2323 }, { "epoch": 0.2409710386302808, "grad_norm": 0.6171875, "learning_rate": 0.00019685766775771272, "loss": 4.541, "step": 2324 }, { "epoch": 0.24107472668476887, "grad_norm": 0.5, "learning_rate": 0.00019685496556937722, "loss": 4.5167, "step": 2325 }, { "epoch": 0.24117841473925694, "grad_norm": 0.56640625, "learning_rate": 0.0001968522622382555, "loss": 4.5524, "step": 2326 }, { "epoch": 0.241282102793745, "grad_norm": 0.4765625, "learning_rate": 0.00019684955776437947, "loss": 4.5259, "step": 2327 }, { "epoch": 0.24138579084823308, "grad_norm": 0.53125, "learning_rate": 0.000196846852147781, "loss": 4.5491, "step": 2328 }, { "epoch": 0.24148947890272116, "grad_norm": 0.55078125, "learning_rate": 0.00019684414538849207, "loss": 4.5132, "step": 2329 }, { "epoch": 0.24159316695720923, "grad_norm": 0.50390625, "learning_rate": 0.00019684143748654458, "loss": 4.5794, "step": 2330 }, { "epoch": 0.2416968550116973, "grad_norm": 0.671875, "learning_rate": 0.00019683872844197052, "loss": 4.5889, "step": 2331 }, { "epoch": 0.24180054306618537, "grad_norm": 0.578125, "learning_rate": 0.0001968360182548018, "loss": 4.5713, "step": 2332 }, { "epoch": 0.24190423112067344, "grad_norm": 0.5859375, "learning_rate": 0.00019683330692507042, "loss": 4.5571, "step": 2333 }, { "epoch": 0.24200791917516154, "grad_norm": 0.56640625, "learning_rate": 0.00019683059445280837, "loss": 4.5484, "step": 2334 }, { "epoch": 0.2421116072296496, "grad_norm": 0.5859375, "learning_rate": 0.00019682788083804771, "loss": 4.538, "step": 2335 }, { "epoch": 0.24221529528413768, "grad_norm": 0.6015625, "learning_rate": 0.00019682516608082037, "loss": 4.5748, "step": 2336 }, { "epoch": 0.24231898333862575, "grad_norm": 0.52734375, "learning_rate": 0.00019682245018115842, "loss": 4.5281, "step": 2337 }, { "epoch": 0.24242267139311383, "grad_norm": 0.53515625, "learning_rate": 0.0001968197331390939, "loss": 4.5424, "step": 2338 }, { "epoch": 0.2425263594476019, "grad_norm": 0.5234375, "learning_rate": 0.00019681701495465889, "loss": 4.5263, "step": 2339 }, { "epoch": 0.24263004750208997, "grad_norm": 0.5546875, "learning_rate": 0.00019681429562788542, "loss": 4.5765, "step": 2340 }, { "epoch": 0.24273373555657804, "grad_norm": 0.58984375, "learning_rate": 0.00019681157515880564, "loss": 4.4784, "step": 2341 }, { "epoch": 0.2428374236110661, "grad_norm": 0.51953125, "learning_rate": 0.00019680885354745158, "loss": 4.542, "step": 2342 }, { "epoch": 0.24294111166555418, "grad_norm": 0.52734375, "learning_rate": 0.00019680613079385537, "loss": 4.5242, "step": 2343 }, { "epoch": 0.24304479972004225, "grad_norm": 0.52734375, "learning_rate": 0.00019680340689804914, "loss": 4.5195, "step": 2344 }, { "epoch": 0.24314848777453033, "grad_norm": 0.54296875, "learning_rate": 0.00019680068186006506, "loss": 4.5474, "step": 2345 }, { "epoch": 0.2432521758290184, "grad_norm": 0.470703125, "learning_rate": 0.00019679795567993527, "loss": 4.5701, "step": 2346 }, { "epoch": 0.24335586388350647, "grad_norm": 0.52734375, "learning_rate": 0.00019679522835769188, "loss": 4.5311, "step": 2347 }, { "epoch": 0.24345955193799454, "grad_norm": 0.439453125, "learning_rate": 0.00019679249989336715, "loss": 4.5522, "step": 2348 }, { "epoch": 0.2435632399924826, "grad_norm": 0.56640625, "learning_rate": 0.00019678977028699318, "loss": 4.5616, "step": 2349 }, { "epoch": 0.24366692804697068, "grad_norm": 0.45703125, "learning_rate": 0.0001967870395386023, "loss": 4.5, "step": 2350 }, { "epoch": 0.24377061610145875, "grad_norm": 0.55859375, "learning_rate": 0.00019678430764822661, "loss": 4.5487, "step": 2351 }, { "epoch": 0.24387430415594682, "grad_norm": 0.466796875, "learning_rate": 0.00019678157461589844, "loss": 4.522, "step": 2352 }, { "epoch": 0.2439779922104349, "grad_norm": 0.609375, "learning_rate": 0.00019677884044164997, "loss": 4.5293, "step": 2353 }, { "epoch": 0.24408168026492297, "grad_norm": 0.5, "learning_rate": 0.00019677610512551348, "loss": 4.5629, "step": 2354 }, { "epoch": 0.24418536831941104, "grad_norm": 0.53125, "learning_rate": 0.00019677336866752123, "loss": 4.5529, "step": 2355 }, { "epoch": 0.24428905637389914, "grad_norm": 0.482421875, "learning_rate": 0.00019677063106770555, "loss": 4.5488, "step": 2356 }, { "epoch": 0.2443927444283872, "grad_norm": 0.52734375, "learning_rate": 0.00019676789232609868, "loss": 4.5474, "step": 2357 }, { "epoch": 0.24449643248287528, "grad_norm": 0.515625, "learning_rate": 0.000196765152442733, "loss": 4.5178, "step": 2358 }, { "epoch": 0.24460012053736335, "grad_norm": 0.5078125, "learning_rate": 0.0001967624114176408, "loss": 4.5141, "step": 2359 }, { "epoch": 0.24470380859185142, "grad_norm": 0.51953125, "learning_rate": 0.00019675966925085443, "loss": 4.4834, "step": 2360 }, { "epoch": 0.2448074966463395, "grad_norm": 0.470703125, "learning_rate": 0.00019675692594240624, "loss": 4.5927, "step": 2361 }, { "epoch": 0.24491118470082757, "grad_norm": 0.45703125, "learning_rate": 0.0001967541814923286, "loss": 4.5491, "step": 2362 }, { "epoch": 0.24501487275531564, "grad_norm": 0.490234375, "learning_rate": 0.00019675143590065387, "loss": 4.5038, "step": 2363 }, { "epoch": 0.2451185608098037, "grad_norm": 0.44921875, "learning_rate": 0.00019674868916741452, "loss": 4.5872, "step": 2364 }, { "epoch": 0.24522224886429178, "grad_norm": 0.51953125, "learning_rate": 0.00019674594129264286, "loss": 4.5441, "step": 2365 }, { "epoch": 0.24532593691877985, "grad_norm": 0.4921875, "learning_rate": 0.0001967431922763714, "loss": 4.5074, "step": 2366 }, { "epoch": 0.24542962497326792, "grad_norm": 0.546875, "learning_rate": 0.00019674044211863247, "loss": 4.4877, "step": 2367 }, { "epoch": 0.245533313027756, "grad_norm": 0.55078125, "learning_rate": 0.00019673769081945863, "loss": 4.527, "step": 2368 }, { "epoch": 0.24563700108224407, "grad_norm": 0.59375, "learning_rate": 0.00019673493837888228, "loss": 4.5584, "step": 2369 }, { "epoch": 0.24574068913673214, "grad_norm": 0.5703125, "learning_rate": 0.00019673218479693592, "loss": 4.5505, "step": 2370 }, { "epoch": 0.2458443771912202, "grad_norm": 0.5703125, "learning_rate": 0.00019672943007365202, "loss": 4.5845, "step": 2371 }, { "epoch": 0.24594806524570828, "grad_norm": 0.625, "learning_rate": 0.00019672667420906308, "loss": 4.5538, "step": 2372 }, { "epoch": 0.24605175330019635, "grad_norm": 0.5703125, "learning_rate": 0.00019672391720320165, "loss": 4.523, "step": 2373 }, { "epoch": 0.24615544135468442, "grad_norm": 0.62109375, "learning_rate": 0.00019672115905610023, "loss": 4.5312, "step": 2374 }, { "epoch": 0.2462591294091725, "grad_norm": 0.5859375, "learning_rate": 0.00019671839976779138, "loss": 4.5019, "step": 2375 }, { "epoch": 0.24636281746366057, "grad_norm": 0.57421875, "learning_rate": 0.00019671563933830767, "loss": 4.5387, "step": 2376 }, { "epoch": 0.24646650551814864, "grad_norm": 0.62890625, "learning_rate": 0.0001967128777676816, "loss": 4.5441, "step": 2377 }, { "epoch": 0.2465701935726367, "grad_norm": 0.60546875, "learning_rate": 0.00019671011505594581, "loss": 4.5023, "step": 2378 }, { "epoch": 0.2466738816271248, "grad_norm": 0.490234375, "learning_rate": 0.0001967073512031329, "loss": 4.5643, "step": 2379 }, { "epoch": 0.24677756968161288, "grad_norm": 0.5859375, "learning_rate": 0.00019670458620927548, "loss": 4.5538, "step": 2380 }, { "epoch": 0.24688125773610095, "grad_norm": 0.55078125, "learning_rate": 0.00019670182007440614, "loss": 4.583, "step": 2381 }, { "epoch": 0.24698494579058902, "grad_norm": 0.55078125, "learning_rate": 0.0001966990527985576, "loss": 4.5283, "step": 2382 }, { "epoch": 0.2470886338450771, "grad_norm": 0.58984375, "learning_rate": 0.0001966962843817624, "loss": 4.5397, "step": 2383 }, { "epoch": 0.24719232189956516, "grad_norm": 0.515625, "learning_rate": 0.00019669351482405324, "loss": 4.5366, "step": 2384 }, { "epoch": 0.24729600995405324, "grad_norm": 0.71875, "learning_rate": 0.00019669074412546284, "loss": 4.5615, "step": 2385 }, { "epoch": 0.2473996980085413, "grad_norm": 0.5078125, "learning_rate": 0.0001966879722860239, "loss": 4.5026, "step": 2386 }, { "epoch": 0.24750338606302938, "grad_norm": 0.71484375, "learning_rate": 0.00019668519930576904, "loss": 4.5545, "step": 2387 }, { "epoch": 0.24760707411751745, "grad_norm": 0.6484375, "learning_rate": 0.00019668242518473106, "loss": 4.5279, "step": 2388 }, { "epoch": 0.24771076217200552, "grad_norm": 0.56640625, "learning_rate": 0.00019667964992294264, "loss": 4.5575, "step": 2389 }, { "epoch": 0.2478144502264936, "grad_norm": 0.6015625, "learning_rate": 0.00019667687352043655, "loss": 4.5308, "step": 2390 }, { "epoch": 0.24791813828098166, "grad_norm": 0.6015625, "learning_rate": 0.00019667409597724553, "loss": 4.5585, "step": 2391 }, { "epoch": 0.24802182633546974, "grad_norm": 0.5625, "learning_rate": 0.0001966713172934024, "loss": 4.5285, "step": 2392 }, { "epoch": 0.2481255143899578, "grad_norm": 0.59765625, "learning_rate": 0.00019666853746893987, "loss": 4.4994, "step": 2393 }, { "epoch": 0.24822920244444588, "grad_norm": 0.5546875, "learning_rate": 0.00019666575650389084, "loss": 4.491, "step": 2394 }, { "epoch": 0.24833289049893395, "grad_norm": 0.53515625, "learning_rate": 0.000196662974398288, "loss": 4.4838, "step": 2395 }, { "epoch": 0.24843657855342202, "grad_norm": 0.53125, "learning_rate": 0.0001966601911521643, "loss": 4.5466, "step": 2396 }, { "epoch": 0.2485402666079101, "grad_norm": 0.59765625, "learning_rate": 0.00019665740676555246, "loss": 4.5693, "step": 2397 }, { "epoch": 0.24864395466239816, "grad_norm": 0.53515625, "learning_rate": 0.00019665462123848545, "loss": 4.5713, "step": 2398 }, { "epoch": 0.24874764271688624, "grad_norm": 0.59765625, "learning_rate": 0.00019665183457099602, "loss": 4.5254, "step": 2399 }, { "epoch": 0.2488513307713743, "grad_norm": 0.53125, "learning_rate": 0.00019664904676311716, "loss": 4.4666, "step": 2400 }, { "epoch": 0.2489550188258624, "grad_norm": 0.57421875, "learning_rate": 0.00019664625781488167, "loss": 4.4776, "step": 2401 }, { "epoch": 0.24905870688035048, "grad_norm": 0.56640625, "learning_rate": 0.00019664346772632252, "loss": 4.5156, "step": 2402 }, { "epoch": 0.24916239493483855, "grad_norm": 0.56640625, "learning_rate": 0.0001966406764974726, "loss": 4.5436, "step": 2403 }, { "epoch": 0.24926608298932662, "grad_norm": 0.54296875, "learning_rate": 0.00019663788412836483, "loss": 4.4748, "step": 2404 }, { "epoch": 0.2493697710438147, "grad_norm": 0.60546875, "learning_rate": 0.0001966350906190322, "loss": 4.5189, "step": 2405 }, { "epoch": 0.24947345909830276, "grad_norm": 0.6796875, "learning_rate": 0.00019663229596950766, "loss": 4.4902, "step": 2406 }, { "epoch": 0.24957714715279083, "grad_norm": 0.59375, "learning_rate": 0.00019662950017982416, "loss": 4.5363, "step": 2407 }, { "epoch": 0.2496808352072789, "grad_norm": 0.55078125, "learning_rate": 0.00019662670325001468, "loss": 4.5453, "step": 2408 }, { "epoch": 0.24978452326176698, "grad_norm": 0.578125, "learning_rate": 0.00019662390518011228, "loss": 4.5477, "step": 2409 }, { "epoch": 0.24988821131625505, "grad_norm": 0.49609375, "learning_rate": 0.0001966211059701499, "loss": 4.5409, "step": 2410 }, { "epoch": 0.24999189937074312, "grad_norm": 0.61328125, "learning_rate": 0.0001966183056201606, "loss": 4.525, "step": 2411 }, { "epoch": 0.24999189937074312, "eval_loss": 4.54582405090332, "eval_runtime": 0.4393, "eval_samples_per_second": 339.196, "eval_steps_per_second": 15.935, "step": 2411 } ], "logging_steps": 1, "max_steps": 28932, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 2411, "total_flos": 3.4729528980175585e+18, "train_batch_size": 3, "trial_name": null, "trial_params": null }