{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1500444342516412, "eval_steps": 500, "global_step": 2617, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.7334518246710435e-05, "grad_norm": 0.0, "learning_rate": 1.2658227848101266e-07, "loss": 15.7887, "step": 1 }, { "epoch": 0.00011466903649342087, "grad_norm": 0.0, "learning_rate": 2.5316455696202533e-07, "loss": 15.8118, "step": 2 }, { "epoch": 0.0001720035547401313, "grad_norm": 0.0, "learning_rate": 3.79746835443038e-07, "loss": 15.5044, "step": 3 }, { "epoch": 0.00022933807298684174, "grad_norm": 0.0, "learning_rate": 5.063291139240507e-07, "loss": 15.726, "step": 4 }, { "epoch": 0.00028667259123355216, "grad_norm": 0.0, "learning_rate": 6.329113924050634e-07, "loss": 15.5511, "step": 5 }, { "epoch": 0.0003440071094802626, "grad_norm": 0.0, "learning_rate": 7.59493670886076e-07, "loss": 15.6542, "step": 6 }, { "epoch": 0.00040134162772697304, "grad_norm": 0.0, "learning_rate": 8.860759493670887e-07, "loss": 15.7995, "step": 7 }, { "epoch": 0.0004586761459736835, "grad_norm": 0.0, "learning_rate": 1.0126582278481013e-06, "loss": 16.2208, "step": 8 }, { "epoch": 0.0005160106642203939, "grad_norm": 0.0, "learning_rate": 1.139240506329114e-06, "loss": 15.7266, "step": 9 }, { "epoch": 0.0005733451824671043, "grad_norm": 0.0, "learning_rate": 1.2658227848101267e-06, "loss": 16.0773, "step": 10 }, { "epoch": 0.0006306797007138148, "grad_norm": 0.0, "learning_rate": 1.3924050632911392e-06, "loss": 16.0366, "step": 11 }, { "epoch": 0.0006880142189605252, "grad_norm": 0.0, "learning_rate": 1.518987341772152e-06, "loss": 15.5359, "step": 12 }, { "epoch": 0.0007453487372072356, "grad_norm": 0.0, "learning_rate": 1.6455696202531647e-06, "loss": 15.7016, "step": 13 }, { "epoch": 0.0008026832554539461, "grad_norm": 0.0, "learning_rate": 1.7721518987341774e-06, "loss": 16.0417, "step": 14 }, { "epoch": 0.0008600177737006565, "grad_norm": 0.0, "learning_rate": 1.8987341772151901e-06, "loss": 15.7408, "step": 15 }, { "epoch": 0.000917352291947367, "grad_norm": 0.0, "learning_rate": 2.0253164556962026e-06, "loss": 16.0203, "step": 16 }, { "epoch": 0.0009746868101940773, "grad_norm": 0.0, "learning_rate": 2.1518987341772153e-06, "loss": 15.6151, "step": 17 }, { "epoch": 0.0010320213284407877, "grad_norm": 0.0, "learning_rate": 2.278481012658228e-06, "loss": 15.7387, "step": 18 }, { "epoch": 0.0010893558466874983, "grad_norm": 0.0, "learning_rate": 2.4050632911392408e-06, "loss": 15.9719, "step": 19 }, { "epoch": 0.0011466903649342086, "grad_norm": 0.0, "learning_rate": 2.5316455696202535e-06, "loss": 15.6512, "step": 20 }, { "epoch": 0.0012040248831809192, "grad_norm": 0.0, "learning_rate": 2.6582278481012658e-06, "loss": 15.6905, "step": 21 }, { "epoch": 0.0012613594014276295, "grad_norm": 0.0, "learning_rate": 2.7848101265822785e-06, "loss": 15.8747, "step": 22 }, { "epoch": 0.0013186939196743398, "grad_norm": 0.0, "learning_rate": 2.9113924050632912e-06, "loss": 15.9172, "step": 23 }, { "epoch": 0.0013760284379210504, "grad_norm": 0.0, "learning_rate": 3.037974683544304e-06, "loss": 15.4097, "step": 24 }, { "epoch": 0.0014333629561677607, "grad_norm": 0.0, "learning_rate": 3.164556962025317e-06, "loss": 15.9326, "step": 25 }, { "epoch": 0.0014906974744144713, "grad_norm": 0.0, "learning_rate": 3.2911392405063294e-06, "loss": 15.7093, "step": 26 }, { "epoch": 0.0015480319926611816, "grad_norm": 0.0, "learning_rate": 3.417721518987342e-06, "loss": 16.1732, "step": 27 }, { "epoch": 0.0016053665109078922, "grad_norm": 0.0, "learning_rate": 3.544303797468355e-06, "loss": 15.9809, "step": 28 }, { "epoch": 0.0016627010291546025, "grad_norm": 0.0, "learning_rate": 3.6708860759493675e-06, "loss": 15.6426, "step": 29 }, { "epoch": 0.001720035547401313, "grad_norm": 0.0, "learning_rate": 3.7974683544303802e-06, "loss": 15.5717, "step": 30 }, { "epoch": 0.0017773700656480234, "grad_norm": 0.0, "learning_rate": 3.924050632911393e-06, "loss": 15.6596, "step": 31 }, { "epoch": 0.001834704583894734, "grad_norm": 0.0, "learning_rate": 4.050632911392405e-06, "loss": 15.4429, "step": 32 }, { "epoch": 0.0018920391021414443, "grad_norm": 0.0, "learning_rate": 4.177215189873418e-06, "loss": 15.9317, "step": 33 }, { "epoch": 0.0019493736203881546, "grad_norm": 0.0, "learning_rate": 4.303797468354431e-06, "loss": 15.6599, "step": 34 }, { "epoch": 0.002006708138634865, "grad_norm": 0.0, "learning_rate": 4.430379746835443e-06, "loss": 15.8037, "step": 35 }, { "epoch": 0.0020640426568815755, "grad_norm": 0.0, "learning_rate": 4.556962025316456e-06, "loss": 15.976, "step": 36 }, { "epoch": 0.002121377175128286, "grad_norm": 0.0, "learning_rate": 4.683544303797468e-06, "loss": 15.8437, "step": 37 }, { "epoch": 0.0021787116933749966, "grad_norm": 0.0, "learning_rate": 4.8101265822784815e-06, "loss": 15.8187, "step": 38 }, { "epoch": 0.002236046211621707, "grad_norm": 0.0, "learning_rate": 4.936708860759495e-06, "loss": 15.8865, "step": 39 }, { "epoch": 0.0022933807298684173, "grad_norm": 0.0, "learning_rate": 5.063291139240507e-06, "loss": 15.9997, "step": 40 }, { "epoch": 0.0023507152481151276, "grad_norm": 0.0, "learning_rate": 5.189873417721519e-06, "loss": 16.2616, "step": 41 }, { "epoch": 0.0024080497663618384, "grad_norm": 0.0, "learning_rate": 5.3164556962025316e-06, "loss": 15.7347, "step": 42 }, { "epoch": 0.0024653842846085487, "grad_norm": 0.0, "learning_rate": 5.443037974683545e-06, "loss": 15.7051, "step": 43 }, { "epoch": 0.002522718802855259, "grad_norm": 0.0, "learning_rate": 5.569620253164557e-06, "loss": 15.9378, "step": 44 }, { "epoch": 0.0025800533211019694, "grad_norm": 0.0, "learning_rate": 5.69620253164557e-06, "loss": 15.7706, "step": 45 }, { "epoch": 0.0026373878393486797, "grad_norm": 0.0, "learning_rate": 5.8227848101265824e-06, "loss": 15.4018, "step": 46 }, { "epoch": 0.0026947223575953905, "grad_norm": 0.0, "learning_rate": 5.949367088607595e-06, "loss": 15.4771, "step": 47 }, { "epoch": 0.002752056875842101, "grad_norm": 0.0, "learning_rate": 6.075949367088608e-06, "loss": 15.8046, "step": 48 }, { "epoch": 0.002809391394088811, "grad_norm": 0.0, "learning_rate": 6.20253164556962e-06, "loss": 16.0405, "step": 49 }, { "epoch": 0.0028667259123355215, "grad_norm": 0.0, "learning_rate": 6.329113924050634e-06, "loss": 16.0027, "step": 50 }, { "epoch": 0.0029240604305822322, "grad_norm": 0.0, "learning_rate": 6.4556962025316464e-06, "loss": 16.0728, "step": 51 }, { "epoch": 0.0029813949488289426, "grad_norm": 0.0, "learning_rate": 6.582278481012659e-06, "loss": 15.831, "step": 52 }, { "epoch": 0.003038729467075653, "grad_norm": 0.0, "learning_rate": 6.708860759493672e-06, "loss": 15.4141, "step": 53 }, { "epoch": 0.0030960639853223632, "grad_norm": 0.0, "learning_rate": 6.835443037974684e-06, "loss": 15.6401, "step": 54 }, { "epoch": 0.0031533985035690736, "grad_norm": 0.0, "learning_rate": 6.962025316455697e-06, "loss": 15.7223, "step": 55 }, { "epoch": 0.0032107330218157843, "grad_norm": 0.0, "learning_rate": 7.08860759493671e-06, "loss": 15.805, "step": 56 }, { "epoch": 0.0032680675400624947, "grad_norm": 0.0, "learning_rate": 7.215189873417722e-06, "loss": 15.6997, "step": 57 }, { "epoch": 0.003325402058309205, "grad_norm": 0.0, "learning_rate": 7.341772151898735e-06, "loss": 15.8021, "step": 58 }, { "epoch": 0.0033827365765559153, "grad_norm": 0.0, "learning_rate": 7.468354430379747e-06, "loss": 15.6945, "step": 59 }, { "epoch": 0.003440071094802626, "grad_norm": 0.0, "learning_rate": 7.5949367088607605e-06, "loss": 15.7754, "step": 60 }, { "epoch": 0.0034974056130493364, "grad_norm": 0.0, "learning_rate": 7.721518987341773e-06, "loss": 15.6585, "step": 61 }, { "epoch": 0.0035547401312960468, "grad_norm": 0.0, "learning_rate": 7.848101265822786e-06, "loss": 16.1158, "step": 62 }, { "epoch": 0.003612074649542757, "grad_norm": 0.0, "learning_rate": 7.974683544303799e-06, "loss": 15.8004, "step": 63 }, { "epoch": 0.003669409167789468, "grad_norm": 0.0, "learning_rate": 8.10126582278481e-06, "loss": 15.3772, "step": 64 }, { "epoch": 0.003726743686036178, "grad_norm": 0.0, "learning_rate": 8.227848101265824e-06, "loss": 15.7289, "step": 65 }, { "epoch": 0.0037840782042828885, "grad_norm": 0.0, "learning_rate": 8.354430379746837e-06, "loss": 15.7923, "step": 66 }, { "epoch": 0.003841412722529599, "grad_norm": 0.0, "learning_rate": 8.481012658227848e-06, "loss": 15.5226, "step": 67 }, { "epoch": 0.003898747240776309, "grad_norm": 0.0, "learning_rate": 8.607594936708861e-06, "loss": 15.7584, "step": 68 }, { "epoch": 0.00395608175902302, "grad_norm": 0.0, "learning_rate": 8.734177215189874e-06, "loss": 15.9545, "step": 69 }, { "epoch": 0.00401341627726973, "grad_norm": 0.0, "learning_rate": 8.860759493670886e-06, "loss": 15.9191, "step": 70 }, { "epoch": 0.004070750795516441, "grad_norm": 0.0, "learning_rate": 8.987341772151899e-06, "loss": 15.7233, "step": 71 }, { "epoch": 0.004128085313763151, "grad_norm": 0.0, "learning_rate": 9.113924050632912e-06, "loss": 15.5079, "step": 72 }, { "epoch": 0.004185419832009861, "grad_norm": 0.0, "learning_rate": 9.240506329113925e-06, "loss": 15.7607, "step": 73 }, { "epoch": 0.004242754350256572, "grad_norm": 0.0, "learning_rate": 9.367088607594937e-06, "loss": 16.0051, "step": 74 }, { "epoch": 0.004300088868503283, "grad_norm": 0.0, "learning_rate": 9.49367088607595e-06, "loss": 15.9513, "step": 75 }, { "epoch": 0.004357423386749993, "grad_norm": 0.0, "learning_rate": 9.620253164556963e-06, "loss": 15.9781, "step": 76 }, { "epoch": 0.0044147579049967035, "grad_norm": 0.0, "learning_rate": 9.746835443037975e-06, "loss": 16.0359, "step": 77 }, { "epoch": 0.004472092423243414, "grad_norm": 0.0, "learning_rate": 9.87341772151899e-06, "loss": 15.8052, "step": 78 }, { "epoch": 0.004529426941490124, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 15.4073, "step": 79 }, { "epoch": 0.0045867614597368345, "grad_norm": 0.0, "learning_rate": 9.999996169491213e-06, "loss": 15.8073, "step": 80 }, { "epoch": 0.004644095977983545, "grad_norm": 0.0, "learning_rate": 9.999984677970716e-06, "loss": 15.9295, "step": 81 }, { "epoch": 0.004701430496230255, "grad_norm": 0.0, "learning_rate": 9.99996552545612e-06, "loss": 15.5126, "step": 82 }, { "epoch": 0.0047587650144769655, "grad_norm": 0.0, "learning_rate": 9.999938711976769e-06, "loss": 15.5249, "step": 83 }, { "epoch": 0.004816099532723677, "grad_norm": 0.0, "learning_rate": 9.999904237573746e-06, "loss": 15.7576, "step": 84 }, { "epoch": 0.004873434050970387, "grad_norm": 0.0, "learning_rate": 9.999862102299874e-06, "loss": 16.1043, "step": 85 }, { "epoch": 0.004930768569217097, "grad_norm": 0.0, "learning_rate": 9.999812306219712e-06, "loss": 15.7214, "step": 86 }, { "epoch": 0.004988103087463808, "grad_norm": 0.0, "learning_rate": 9.999754849409559e-06, "loss": 16.0242, "step": 87 }, { "epoch": 0.005045437605710518, "grad_norm": 0.0, "learning_rate": 9.99968973195745e-06, "loss": 15.825, "step": 88 }, { "epoch": 0.005102772123957228, "grad_norm": 0.0, "learning_rate": 9.999616953963156e-06, "loss": 15.7653, "step": 89 }, { "epoch": 0.005160106642203939, "grad_norm": 0.0, "learning_rate": 9.99953651553819e-06, "loss": 15.7074, "step": 90 }, { "epoch": 0.005217441160450649, "grad_norm": 0.0, "learning_rate": 9.999448416805802e-06, "loss": 16.1031, "step": 91 }, { "epoch": 0.005274775678697359, "grad_norm": 0.0, "learning_rate": 9.999352657900973e-06, "loss": 15.7045, "step": 92 }, { "epoch": 0.005332110196944071, "grad_norm": 0.0, "learning_rate": 9.999249238970427e-06, "loss": 16.0837, "step": 93 }, { "epoch": 0.005389444715190781, "grad_norm": 0.0, "learning_rate": 9.999138160172624e-06, "loss": 15.6533, "step": 94 }, { "epoch": 0.005446779233437491, "grad_norm": 0.0, "learning_rate": 9.999019421677755e-06, "loss": 15.8828, "step": 95 }, { "epoch": 0.005504113751684202, "grad_norm": 0.0, "learning_rate": 9.998893023667758e-06, "loss": 15.9728, "step": 96 }, { "epoch": 0.005561448269930912, "grad_norm": 0.0, "learning_rate": 9.998758966336296e-06, "loss": 15.9939, "step": 97 }, { "epoch": 0.005618782788177622, "grad_norm": 0.0, "learning_rate": 9.998617249888773e-06, "loss": 15.896, "step": 98 }, { "epoch": 0.005676117306424333, "grad_norm": 0.0, "learning_rate": 9.998467874542328e-06, "loss": 15.8819, "step": 99 }, { "epoch": 0.005733451824671043, "grad_norm": 0.0, "learning_rate": 9.998310840525835e-06, "loss": 15.7407, "step": 100 }, { "epoch": 0.005790786342917753, "grad_norm": 0.0, "learning_rate": 9.9981461480799e-06, "loss": 15.5921, "step": 101 }, { "epoch": 0.0058481208611644644, "grad_norm": 0.0, "learning_rate": 9.997973797456867e-06, "loss": 16.1465, "step": 102 }, { "epoch": 0.005905455379411175, "grad_norm": 0.0, "learning_rate": 9.99779378892081e-06, "loss": 15.5944, "step": 103 }, { "epoch": 0.005962789897657885, "grad_norm": 0.0, "learning_rate": 9.997606122747543e-06, "loss": 15.6872, "step": 104 }, { "epoch": 0.0060201244159045954, "grad_norm": 0.0, "learning_rate": 9.997410799224604e-06, "loss": 15.5546, "step": 105 }, { "epoch": 0.006077458934151306, "grad_norm": 0.0, "learning_rate": 9.997207818651273e-06, "loss": 15.9227, "step": 106 }, { "epoch": 0.006134793452398016, "grad_norm": 0.0, "learning_rate": 9.996997181338554e-06, "loss": 15.7962, "step": 107 }, { "epoch": 0.0061921279706447264, "grad_norm": 0.0, "learning_rate": 9.996778887609189e-06, "loss": 15.4476, "step": 108 }, { "epoch": 0.006249462488891437, "grad_norm": 0.0, "learning_rate": 9.996552937797646e-06, "loss": 15.6365, "step": 109 }, { "epoch": 0.006306797007138147, "grad_norm": 0.0, "learning_rate": 9.996319332250127e-06, "loss": 15.935, "step": 110 }, { "epoch": 0.006364131525384858, "grad_norm": 0.0, "learning_rate": 9.996078071324562e-06, "loss": 15.8665, "step": 111 }, { "epoch": 0.006421466043631569, "grad_norm": 0.0, "learning_rate": 9.995829155390613e-06, "loss": 15.5091, "step": 112 }, { "epoch": 0.006478800561878279, "grad_norm": 0.0, "learning_rate": 9.99557258482967e-06, "loss": 15.586, "step": 113 }, { "epoch": 0.006536135080124989, "grad_norm": 0.0, "learning_rate": 9.995308360034852e-06, "loss": 15.6547, "step": 114 }, { "epoch": 0.0065934695983717, "grad_norm": 0.0, "learning_rate": 9.995036481411005e-06, "loss": 15.3795, "step": 115 }, { "epoch": 0.00665080411661841, "grad_norm": 0.0, "learning_rate": 9.9947569493747e-06, "loss": 15.7044, "step": 116 }, { "epoch": 0.00670813863486512, "grad_norm": 0.0, "learning_rate": 9.99446976435424e-06, "loss": 15.8559, "step": 117 }, { "epoch": 0.006765473153111831, "grad_norm": 0.0, "learning_rate": 9.994174926789648e-06, "loss": 15.6059, "step": 118 }, { "epoch": 0.006822807671358541, "grad_norm": 0.0, "learning_rate": 9.993872437132678e-06, "loss": 16.1036, "step": 119 }, { "epoch": 0.006880142189605252, "grad_norm": 0.0, "learning_rate": 9.993562295846806e-06, "loss": 15.5074, "step": 120 }, { "epoch": 0.0069374767078519625, "grad_norm": 0.0, "learning_rate": 9.993244503407227e-06, "loss": 15.5059, "step": 121 }, { "epoch": 0.006994811226098673, "grad_norm": 0.0, "learning_rate": 9.99291906030087e-06, "loss": 15.5546, "step": 122 }, { "epoch": 0.007052145744345383, "grad_norm": 0.0, "learning_rate": 9.992585967026374e-06, "loss": 15.8098, "step": 123 }, { "epoch": 0.0071094802625920935, "grad_norm": 0.0, "learning_rate": 9.99224522409411e-06, "loss": 15.9037, "step": 124 }, { "epoch": 0.007166814780838804, "grad_norm": 0.0, "learning_rate": 9.991896832026162e-06, "loss": 16.0274, "step": 125 }, { "epoch": 0.007224149299085514, "grad_norm": 0.0, "learning_rate": 9.991540791356342e-06, "loss": 15.5932, "step": 126 }, { "epoch": 0.0072814838173322245, "grad_norm": 0.0, "learning_rate": 9.991177102630173e-06, "loss": 15.7162, "step": 127 }, { "epoch": 0.007338818335578936, "grad_norm": 0.0, "learning_rate": 9.990805766404902e-06, "loss": 15.2172, "step": 128 }, { "epoch": 0.007396152853825646, "grad_norm": 0.0, "learning_rate": 9.990426783249492e-06, "loss": 15.444, "step": 129 }, { "epoch": 0.007453487372072356, "grad_norm": 0.0, "learning_rate": 9.99004015374462e-06, "loss": 15.4409, "step": 130 }, { "epoch": 0.007510821890319067, "grad_norm": 0.0, "learning_rate": 9.989645878482684e-06, "loss": 15.941, "step": 131 }, { "epoch": 0.007568156408565777, "grad_norm": 0.0, "learning_rate": 9.989243958067791e-06, "loss": 16.0396, "step": 132 }, { "epoch": 0.007625490926812487, "grad_norm": 0.0, "learning_rate": 9.988834393115768e-06, "loss": 15.4225, "step": 133 }, { "epoch": 0.007682825445059198, "grad_norm": 0.0, "learning_rate": 9.988417184254148e-06, "loss": 15.82, "step": 134 }, { "epoch": 0.007740159963305908, "grad_norm": 0.0, "learning_rate": 9.987992332122182e-06, "loss": 15.7309, "step": 135 }, { "epoch": 0.007797494481552618, "grad_norm": 0.0, "learning_rate": 9.987559837370832e-06, "loss": 15.7048, "step": 136 }, { "epoch": 0.00785482899979933, "grad_norm": 0.0, "learning_rate": 9.987119700662766e-06, "loss": 15.3024, "step": 137 }, { "epoch": 0.00791216351804604, "grad_norm": 0.0, "learning_rate": 9.986671922672362e-06, "loss": 15.7417, "step": 138 }, { "epoch": 0.00796949803629275, "grad_norm": 0.0, "learning_rate": 9.986216504085709e-06, "loss": 15.8964, "step": 139 }, { "epoch": 0.00802683255453946, "grad_norm": 0.0, "learning_rate": 9.9857534456006e-06, "loss": 15.7391, "step": 140 }, { "epoch": 0.008084167072786171, "grad_norm": 0.0, "learning_rate": 9.985282747926535e-06, "loss": 15.558, "step": 141 }, { "epoch": 0.008141501591032881, "grad_norm": 0.0, "learning_rate": 9.984804411784717e-06, "loss": 15.2734, "step": 142 }, { "epoch": 0.008198836109279592, "grad_norm": 0.0, "learning_rate": 9.984318437908056e-06, "loss": 15.6669, "step": 143 }, { "epoch": 0.008256170627526302, "grad_norm": 0.0, "learning_rate": 9.983824827041164e-06, "loss": 15.8501, "step": 144 }, { "epoch": 0.008313505145773012, "grad_norm": 0.0, "learning_rate": 9.983323579940351e-06, "loss": 15.7229, "step": 145 }, { "epoch": 0.008370839664019723, "grad_norm": 0.0, "learning_rate": 9.98281469737363e-06, "loss": 15.7334, "step": 146 }, { "epoch": 0.008428174182266433, "grad_norm": 0.0, "learning_rate": 9.982298180120715e-06, "loss": 15.3766, "step": 147 }, { "epoch": 0.008485508700513143, "grad_norm": 0.0, "learning_rate": 9.981774028973013e-06, "loss": 15.6116, "step": 148 }, { "epoch": 0.008542843218759854, "grad_norm": 0.0, "learning_rate": 9.981242244733631e-06, "loss": 15.4852, "step": 149 }, { "epoch": 0.008600177737006566, "grad_norm": 0.0, "learning_rate": 9.98070282821737e-06, "loss": 15.7986, "step": 150 }, { "epoch": 0.008657512255253276, "grad_norm": 0.0, "learning_rate": 9.980155780250728e-06, "loss": 15.6175, "step": 151 }, { "epoch": 0.008714846773499986, "grad_norm": 0.0, "learning_rate": 9.97960110167189e-06, "loss": 15.7265, "step": 152 }, { "epoch": 0.008772181291746697, "grad_norm": 0.0, "learning_rate": 9.979038793330743e-06, "loss": 15.3184, "step": 153 }, { "epoch": 0.008829515809993407, "grad_norm": 0.0, "learning_rate": 9.97846885608885e-06, "loss": 15.6656, "step": 154 }, { "epoch": 0.008886850328240117, "grad_norm": 0.0, "learning_rate": 9.977891290819474e-06, "loss": 15.5521, "step": 155 }, { "epoch": 0.008944184846486828, "grad_norm": 0.0, "learning_rate": 9.977306098407566e-06, "loss": 15.6549, "step": 156 }, { "epoch": 0.009001519364733538, "grad_norm": 0.0, "learning_rate": 9.976713279749754e-06, "loss": 15.719, "step": 157 }, { "epoch": 0.009058853882980248, "grad_norm": 0.0, "learning_rate": 9.976112835754362e-06, "loss": 15.5373, "step": 158 }, { "epoch": 0.009116188401226959, "grad_norm": 0.0, "learning_rate": 9.975504767341388e-06, "loss": 15.9543, "step": 159 }, { "epoch": 0.009173522919473669, "grad_norm": 0.0, "learning_rate": 9.97488907544252e-06, "loss": 15.6243, "step": 160 }, { "epoch": 0.00923085743772038, "grad_norm": 0.0, "learning_rate": 9.974265761001123e-06, "loss": 15.4797, "step": 161 }, { "epoch": 0.00928819195596709, "grad_norm": 0.0, "learning_rate": 9.97363482497224e-06, "loss": 15.9338, "step": 162 }, { "epoch": 0.0093455264742138, "grad_norm": 0.0, "learning_rate": 9.972996268322594e-06, "loss": 15.7102, "step": 163 }, { "epoch": 0.00940286099246051, "grad_norm": 0.0, "learning_rate": 9.972350092030583e-06, "loss": 15.4883, "step": 164 }, { "epoch": 0.00946019551070722, "grad_norm": 0.0, "learning_rate": 9.971696297086282e-06, "loss": 15.7564, "step": 165 }, { "epoch": 0.009517530028953931, "grad_norm": 0.0, "learning_rate": 9.971034884491436e-06, "loss": 15.8265, "step": 166 }, { "epoch": 0.009574864547200641, "grad_norm": 0.0, "learning_rate": 9.970365855259465e-06, "loss": 15.7952, "step": 167 }, { "epoch": 0.009632199065447353, "grad_norm": 0.0, "learning_rate": 9.96968921041546e-06, "loss": 15.3301, "step": 168 }, { "epoch": 0.009689533583694064, "grad_norm": 0.0, "learning_rate": 9.969004950996175e-06, "loss": 15.6668, "step": 169 }, { "epoch": 0.009746868101940774, "grad_norm": 0.0, "learning_rate": 9.968313078050035e-06, "loss": 15.7378, "step": 170 }, { "epoch": 0.009804202620187484, "grad_norm": 0.0, "learning_rate": 9.967613592637133e-06, "loss": 15.3782, "step": 171 }, { "epoch": 0.009861537138434195, "grad_norm": 0.0, "learning_rate": 9.96690649582922e-06, "loss": 15.5379, "step": 172 }, { "epoch": 0.009918871656680905, "grad_norm": 0.0, "learning_rate": 9.966191788709716e-06, "loss": 15.4814, "step": 173 }, { "epoch": 0.009976206174927615, "grad_norm": 0.0, "learning_rate": 9.965469472373693e-06, "loss": 15.6, "step": 174 }, { "epoch": 0.010033540693174326, "grad_norm": 0.0, "learning_rate": 9.964739547927892e-06, "loss": 15.8427, "step": 175 }, { "epoch": 0.010090875211421036, "grad_norm": 0.0, "learning_rate": 9.964002016490698e-06, "loss": 15.403, "step": 176 }, { "epoch": 0.010148209729667746, "grad_norm": 0.0, "learning_rate": 9.963256879192167e-06, "loss": 15.7499, "step": 177 }, { "epoch": 0.010205544247914457, "grad_norm": 0.0, "learning_rate": 9.962504137173997e-06, "loss": 15.9071, "step": 178 }, { "epoch": 0.010262878766161167, "grad_norm": 0.0, "learning_rate": 9.961743791589544e-06, "loss": 15.852, "step": 179 }, { "epoch": 0.010320213284407877, "grad_norm": 0.0, "learning_rate": 9.96097584360381e-06, "loss": 15.6302, "step": 180 }, { "epoch": 0.010377547802654588, "grad_norm": 0.0, "learning_rate": 9.96020029439345e-06, "loss": 15.6271, "step": 181 }, { "epoch": 0.010434882320901298, "grad_norm": 0.0, "learning_rate": 9.959417145146761e-06, "loss": 15.8202, "step": 182 }, { "epoch": 0.010492216839148008, "grad_norm": 0.0, "learning_rate": 9.958626397063688e-06, "loss": 15.4802, "step": 183 }, { "epoch": 0.010549551357394719, "grad_norm": 0.0, "learning_rate": 9.957828051355817e-06, "loss": 15.7718, "step": 184 }, { "epoch": 0.010606885875641429, "grad_norm": 0.0, "learning_rate": 9.95702210924638e-06, "loss": 15.905, "step": 185 }, { "epoch": 0.010664220393888141, "grad_norm": 0.0, "learning_rate": 9.956208571970238e-06, "loss": 15.4488, "step": 186 }, { "epoch": 0.010721554912134851, "grad_norm": 0.0, "learning_rate": 9.955387440773902e-06, "loss": 15.6706, "step": 187 }, { "epoch": 0.010778889430381562, "grad_norm": 0.0, "learning_rate": 9.954558716915508e-06, "loss": 15.7807, "step": 188 }, { "epoch": 0.010836223948628272, "grad_norm": 0.0, "learning_rate": 9.953722401664829e-06, "loss": 15.9079, "step": 189 }, { "epoch": 0.010893558466874982, "grad_norm": 0.0, "learning_rate": 9.952878496303274e-06, "loss": 15.8757, "step": 190 }, { "epoch": 0.010950892985121693, "grad_norm": 0.0, "learning_rate": 9.952027002123877e-06, "loss": 15.8773, "step": 191 }, { "epoch": 0.011008227503368403, "grad_norm": 0.0, "learning_rate": 9.951167920431297e-06, "loss": 15.3914, "step": 192 }, { "epoch": 0.011065562021615113, "grad_norm": 0.0, "learning_rate": 9.950301252541824e-06, "loss": 15.5592, "step": 193 }, { "epoch": 0.011122896539861824, "grad_norm": 0.0, "learning_rate": 9.94942699978337e-06, "loss": 15.9349, "step": 194 }, { "epoch": 0.011180231058108534, "grad_norm": 0.0, "learning_rate": 9.94854516349547e-06, "loss": 15.3999, "step": 195 }, { "epoch": 0.011237565576355244, "grad_norm": 0.0, "learning_rate": 9.94765574502927e-06, "loss": 15.7352, "step": 196 }, { "epoch": 0.011294900094601955, "grad_norm": 0.0, "learning_rate": 9.946758745747549e-06, "loss": 16.1242, "step": 197 }, { "epoch": 0.011352234612848665, "grad_norm": 0.0, "learning_rate": 9.945854167024685e-06, "loss": 15.617, "step": 198 }, { "epoch": 0.011409569131095375, "grad_norm": 0.0, "learning_rate": 9.944942010246681e-06, "loss": 15.5772, "step": 199 }, { "epoch": 0.011466903649342086, "grad_norm": 0.0, "learning_rate": 9.944022276811147e-06, "loss": 15.8159, "step": 200 }, { "epoch": 0.011524238167588796, "grad_norm": 0.0, "learning_rate": 9.943094968127298e-06, "loss": 15.7496, "step": 201 }, { "epoch": 0.011581572685835506, "grad_norm": 0.0, "learning_rate": 9.942160085615963e-06, "loss": 15.694, "step": 202 }, { "epoch": 0.011638907204082219, "grad_norm": 0.0, "learning_rate": 9.941217630709571e-06, "loss": 15.2256, "step": 203 }, { "epoch": 0.011696241722328929, "grad_norm": 0.0, "learning_rate": 9.940267604852155e-06, "loss": 15.7162, "step": 204 }, { "epoch": 0.01175357624057564, "grad_norm": 0.0, "learning_rate": 9.939310009499348e-06, "loss": 16.0539, "step": 205 }, { "epoch": 0.01181091075882235, "grad_norm": 0.0, "learning_rate": 9.938344846118382e-06, "loss": 15.3012, "step": 206 }, { "epoch": 0.01186824527706906, "grad_norm": 0.0, "learning_rate": 9.937372116188081e-06, "loss": 15.5806, "step": 207 }, { "epoch": 0.01192557979531577, "grad_norm": 0.0, "learning_rate": 9.936391821198868e-06, "loss": 15.522, "step": 208 }, { "epoch": 0.01198291431356248, "grad_norm": 0.0, "learning_rate": 9.935403962652753e-06, "loss": 15.522, "step": 209 }, { "epoch": 0.012040248831809191, "grad_norm": 0.0, "learning_rate": 9.934408542063337e-06, "loss": 16.0803, "step": 210 }, { "epoch": 0.012097583350055901, "grad_norm": 0.0, "learning_rate": 9.933405560955805e-06, "loss": 15.6953, "step": 211 }, { "epoch": 0.012154917868302612, "grad_norm": 0.0, "learning_rate": 9.932395020866929e-06, "loss": 15.6063, "step": 212 }, { "epoch": 0.012212252386549322, "grad_norm": 0.0, "learning_rate": 9.931376923345067e-06, "loss": 15.7092, "step": 213 }, { "epoch": 0.012269586904796032, "grad_norm": 0.0, "learning_rate": 9.930351269950144e-06, "loss": 15.7337, "step": 214 }, { "epoch": 0.012326921423042743, "grad_norm": 0.0, "learning_rate": 9.929318062253673e-06, "loss": 15.5571, "step": 215 }, { "epoch": 0.012384255941289453, "grad_norm": 0.0, "learning_rate": 9.92827730183874e-06, "loss": 16.0815, "step": 216 }, { "epoch": 0.012441590459536163, "grad_norm": 0.0, "learning_rate": 9.9272289903e-06, "loss": 15.7825, "step": 217 }, { "epoch": 0.012498924977782874, "grad_norm": 0.0, "learning_rate": 9.92617312924368e-06, "loss": 15.7294, "step": 218 }, { "epoch": 0.012556259496029584, "grad_norm": 0.0, "learning_rate": 9.925109720287574e-06, "loss": 15.7288, "step": 219 }, { "epoch": 0.012613594014276294, "grad_norm": 0.0, "learning_rate": 9.924038765061042e-06, "loss": 15.5877, "step": 220 }, { "epoch": 0.012670928532523006, "grad_norm": 0.0, "learning_rate": 9.922960265205001e-06, "loss": 15.5727, "step": 221 }, { "epoch": 0.012728263050769717, "grad_norm": 0.0, "learning_rate": 9.921874222371939e-06, "loss": 15.5752, "step": 222 }, { "epoch": 0.012785597569016427, "grad_norm": 0.0, "learning_rate": 9.92078063822589e-06, "loss": 15.5827, "step": 223 }, { "epoch": 0.012842932087263137, "grad_norm": 0.0, "learning_rate": 9.919679514442449e-06, "loss": 15.451, "step": 224 }, { "epoch": 0.012900266605509848, "grad_norm": 0.0, "learning_rate": 9.918570852708762e-06, "loss": 15.7206, "step": 225 }, { "epoch": 0.012957601123756558, "grad_norm": 0.0, "learning_rate": 9.917454654723522e-06, "loss": 16.032, "step": 226 }, { "epoch": 0.013014935642003268, "grad_norm": 0.0, "learning_rate": 9.916330922196975e-06, "loss": 15.7988, "step": 227 }, { "epoch": 0.013072270160249979, "grad_norm": 0.0, "learning_rate": 9.915199656850906e-06, "loss": 15.6503, "step": 228 }, { "epoch": 0.013129604678496689, "grad_norm": 0.0, "learning_rate": 9.914060860418644e-06, "loss": 15.5872, "step": 229 }, { "epoch": 0.0131869391967434, "grad_norm": 0.0, "learning_rate": 9.912914534645056e-06, "loss": 15.89, "step": 230 }, { "epoch": 0.01324427371499011, "grad_norm": 0.0, "learning_rate": 9.91176068128655e-06, "loss": 15.5145, "step": 231 }, { "epoch": 0.01330160823323682, "grad_norm": 0.0, "learning_rate": 9.910599302111057e-06, "loss": 15.885, "step": 232 }, { "epoch": 0.01335894275148353, "grad_norm": 0.0, "learning_rate": 9.909430398898053e-06, "loss": 15.4595, "step": 233 }, { "epoch": 0.01341627726973024, "grad_norm": 0.0, "learning_rate": 9.908253973438533e-06, "loss": 15.66, "step": 234 }, { "epoch": 0.013473611787976951, "grad_norm": 0.0, "learning_rate": 9.907070027535022e-06, "loss": 15.8289, "step": 235 }, { "epoch": 0.013530946306223661, "grad_norm": 0.0, "learning_rate": 9.905878563001563e-06, "loss": 15.5208, "step": 236 }, { "epoch": 0.013588280824470372, "grad_norm": 0.0, "learning_rate": 9.904679581663725e-06, "loss": 15.7992, "step": 237 }, { "epoch": 0.013645615342717082, "grad_norm": 0.0, "learning_rate": 9.903473085358589e-06, "loss": 15.7632, "step": 238 }, { "epoch": 0.013702949860963794, "grad_norm": 0.0, "learning_rate": 9.902259075934755e-06, "loss": 15.5836, "step": 239 }, { "epoch": 0.013760284379210504, "grad_norm": 0.0, "learning_rate": 9.90103755525233e-06, "loss": 15.3104, "step": 240 }, { "epoch": 0.013817618897457215, "grad_norm": 0.0, "learning_rate": 9.899808525182935e-06, "loss": 15.7533, "step": 241 }, { "epoch": 0.013874953415703925, "grad_norm": 0.0, "learning_rate": 9.898571987609692e-06, "loss": 15.4214, "step": 242 }, { "epoch": 0.013932287933950635, "grad_norm": 0.0, "learning_rate": 9.897327944427231e-06, "loss": 15.9372, "step": 243 }, { "epoch": 0.013989622452197346, "grad_norm": 0.0, "learning_rate": 9.896076397541676e-06, "loss": 15.4472, "step": 244 }, { "epoch": 0.014046956970444056, "grad_norm": 0.0, "learning_rate": 9.894817348870654e-06, "loss": 15.308, "step": 245 }, { "epoch": 0.014104291488690766, "grad_norm": 0.0, "learning_rate": 9.893550800343283e-06, "loss": 15.892, "step": 246 }, { "epoch": 0.014161626006937477, "grad_norm": 0.0, "learning_rate": 9.892276753900173e-06, "loss": 15.5055, "step": 247 }, { "epoch": 0.014218960525184187, "grad_norm": 0.0, "learning_rate": 9.890995211493422e-06, "loss": 15.7145, "step": 248 }, { "epoch": 0.014276295043430897, "grad_norm": 0.0, "learning_rate": 9.889706175086615e-06, "loss": 15.7733, "step": 249 }, { "epoch": 0.014333629561677608, "grad_norm": 0.0, "learning_rate": 9.888409646654818e-06, "loss": 15.4903, "step": 250 }, { "epoch": 0.014390964079924318, "grad_norm": 0.0, "learning_rate": 9.887105628184575e-06, "loss": 15.7896, "step": 251 }, { "epoch": 0.014448298598171028, "grad_norm": 0.0, "learning_rate": 9.885794121673907e-06, "loss": 15.5118, "step": 252 }, { "epoch": 0.014505633116417739, "grad_norm": 0.0, "learning_rate": 9.884475129132312e-06, "loss": 15.8531, "step": 253 }, { "epoch": 0.014562967634664449, "grad_norm": 0.0, "learning_rate": 9.883148652580752e-06, "loss": 15.7605, "step": 254 }, { "epoch": 0.01462030215291116, "grad_norm": 0.0, "learning_rate": 9.881814694051662e-06, "loss": 15.462, "step": 255 }, { "epoch": 0.014677636671157871, "grad_norm": 0.0, "learning_rate": 9.880473255588937e-06, "loss": 15.7006, "step": 256 }, { "epoch": 0.014734971189404582, "grad_norm": 0.0, "learning_rate": 9.879124339247931e-06, "loss": 15.4329, "step": 257 }, { "epoch": 0.014792305707651292, "grad_norm": 0.0, "learning_rate": 9.877767947095462e-06, "loss": 15.7845, "step": 258 }, { "epoch": 0.014849640225898002, "grad_norm": 0.0, "learning_rate": 9.876404081209796e-06, "loss": 15.8761, "step": 259 }, { "epoch": 0.014906974744144713, "grad_norm": 0.0, "learning_rate": 9.875032743680656e-06, "loss": 15.4054, "step": 260 }, { "epoch": 0.014964309262391423, "grad_norm": 0.0, "learning_rate": 9.873653936609207e-06, "loss": 15.3106, "step": 261 }, { "epoch": 0.015021643780638133, "grad_norm": 0.0, "learning_rate": 9.872267662108064e-06, "loss": 15.5101, "step": 262 }, { "epoch": 0.015078978298884844, "grad_norm": 0.0, "learning_rate": 9.870873922301281e-06, "loss": 16.1312, "step": 263 }, { "epoch": 0.015136312817131554, "grad_norm": 0.0, "learning_rate": 9.869472719324351e-06, "loss": 16.0715, "step": 264 }, { "epoch": 0.015193647335378264, "grad_norm": 0.0, "learning_rate": 9.868064055324204e-06, "loss": 15.661, "step": 265 }, { "epoch": 0.015250981853624975, "grad_norm": 0.0, "learning_rate": 9.866647932459196e-06, "loss": 15.7833, "step": 266 }, { "epoch": 0.015308316371871685, "grad_norm": 0.0, "learning_rate": 9.86522435289912e-06, "loss": 15.845, "step": 267 }, { "epoch": 0.015365650890118395, "grad_norm": 0.0, "learning_rate": 9.863793318825186e-06, "loss": 15.9774, "step": 268 }, { "epoch": 0.015422985408365106, "grad_norm": 0.0, "learning_rate": 9.862354832430033e-06, "loss": 16.1399, "step": 269 }, { "epoch": 0.015480319926611816, "grad_norm": 0.0, "learning_rate": 9.86090889591771e-06, "loss": 15.7273, "step": 270 }, { "epoch": 0.015537654444858526, "grad_norm": 0.0, "learning_rate": 9.859455511503691e-06, "loss": 15.6735, "step": 271 }, { "epoch": 0.015594988963105237, "grad_norm": 0.0, "learning_rate": 9.857994681414853e-06, "loss": 15.7031, "step": 272 }, { "epoch": 0.015652323481351947, "grad_norm": 0.0, "learning_rate": 9.856526407889486e-06, "loss": 15.4029, "step": 273 }, { "epoch": 0.01570965799959866, "grad_norm": 0.0, "learning_rate": 9.855050693177286e-06, "loss": 15.4254, "step": 274 }, { "epoch": 0.015766992517845368, "grad_norm": 0.0, "learning_rate": 9.853567539539345e-06, "loss": 15.9419, "step": 275 }, { "epoch": 0.01582432703609208, "grad_norm": 0.0, "learning_rate": 9.85207694924816e-06, "loss": 15.5074, "step": 276 }, { "epoch": 0.01588166155433879, "grad_norm": 0.0, "learning_rate": 9.850578924587614e-06, "loss": 15.6744, "step": 277 }, { "epoch": 0.0159389960725855, "grad_norm": 0.0, "learning_rate": 9.849073467852988e-06, "loss": 15.56, "step": 278 }, { "epoch": 0.01599633059083221, "grad_norm": 0.0, "learning_rate": 9.84756058135095e-06, "loss": 15.5879, "step": 279 }, { "epoch": 0.01605366510907892, "grad_norm": 0.0, "learning_rate": 9.846040267399548e-06, "loss": 15.8704, "step": 280 }, { "epoch": 0.01611099962732563, "grad_norm": 0.0, "learning_rate": 9.844512528328212e-06, "loss": 15.7365, "step": 281 }, { "epoch": 0.016168334145572342, "grad_norm": 0.0, "learning_rate": 9.84297736647775e-06, "loss": 15.4265, "step": 282 }, { "epoch": 0.016225668663819054, "grad_norm": 0.0, "learning_rate": 9.841434784200341e-06, "loss": 15.3965, "step": 283 }, { "epoch": 0.016283003182065763, "grad_norm": 0.0, "learning_rate": 9.83988478385954e-06, "loss": 15.5008, "step": 284 }, { "epoch": 0.016340337700312475, "grad_norm": 0.0, "learning_rate": 9.838327367830257e-06, "loss": 15.6644, "step": 285 }, { "epoch": 0.016397672218559183, "grad_norm": 0.0, "learning_rate": 9.83676253849877e-06, "loss": 15.5689, "step": 286 }, { "epoch": 0.016455006736805895, "grad_norm": 0.0, "learning_rate": 9.835190298262721e-06, "loss": 15.2807, "step": 287 }, { "epoch": 0.016512341255052604, "grad_norm": 0.0, "learning_rate": 9.833610649531099e-06, "loss": 15.4072, "step": 288 }, { "epoch": 0.016569675773299316, "grad_norm": 0.0, "learning_rate": 9.832023594724248e-06, "loss": 15.7064, "step": 289 }, { "epoch": 0.016627010291546025, "grad_norm": 0.0, "learning_rate": 9.830429136273858e-06, "loss": 15.4726, "step": 290 }, { "epoch": 0.016684344809792737, "grad_norm": 0.0, "learning_rate": 9.828827276622965e-06, "loss": 15.932, "step": 291 }, { "epoch": 0.016741679328039445, "grad_norm": 0.0, "learning_rate": 9.827218018225944e-06, "loss": 15.5607, "step": 292 }, { "epoch": 0.016799013846286157, "grad_norm": 0.0, "learning_rate": 9.825601363548507e-06, "loss": 15.568, "step": 293 }, { "epoch": 0.016856348364532866, "grad_norm": 0.0, "learning_rate": 9.823977315067696e-06, "loss": 15.6851, "step": 294 }, { "epoch": 0.016913682882779578, "grad_norm": 0.0, "learning_rate": 9.822345875271884e-06, "loss": 15.5914, "step": 295 }, { "epoch": 0.016971017401026287, "grad_norm": 0.0, "learning_rate": 9.82070704666077e-06, "loss": 15.9541, "step": 296 }, { "epoch": 0.017028351919273, "grad_norm": 0.0, "learning_rate": 9.819060831745373e-06, "loss": 15.3636, "step": 297 }, { "epoch": 0.017085686437519707, "grad_norm": 0.0, "learning_rate": 9.817407233048028e-06, "loss": 15.6332, "step": 298 }, { "epoch": 0.01714302095576642, "grad_norm": 0.0, "learning_rate": 9.815746253102385e-06, "loss": 15.562, "step": 299 }, { "epoch": 0.01720035547401313, "grad_norm": 0.0, "learning_rate": 9.814077894453406e-06, "loss": 15.6414, "step": 300 }, { "epoch": 0.01725768999225984, "grad_norm": 0.0, "learning_rate": 9.812402159657352e-06, "loss": 15.2265, "step": 301 }, { "epoch": 0.017315024510506552, "grad_norm": 0.0, "learning_rate": 9.810719051281791e-06, "loss": 15.8043, "step": 302 }, { "epoch": 0.01737235902875326, "grad_norm": 0.0, "learning_rate": 9.80902857190559e-06, "loss": 15.646, "step": 303 }, { "epoch": 0.017429693546999973, "grad_norm": 0.0, "learning_rate": 9.807330724118906e-06, "loss": 15.5725, "step": 304 }, { "epoch": 0.01748702806524668, "grad_norm": 0.0, "learning_rate": 9.805625510523184e-06, "loss": 15.6773, "step": 305 }, { "epoch": 0.017544362583493393, "grad_norm": 0.0, "learning_rate": 9.803912933731163e-06, "loss": 15.5197, "step": 306 }, { "epoch": 0.017601697101740102, "grad_norm": 0.0, "learning_rate": 9.802192996366859e-06, "loss": 15.7341, "step": 307 }, { "epoch": 0.017659031619986814, "grad_norm": 0.0, "learning_rate": 9.800465701065562e-06, "loss": 15.765, "step": 308 }, { "epoch": 0.017716366138233523, "grad_norm": 0.0, "learning_rate": 9.798731050473843e-06, "loss": 15.3062, "step": 309 }, { "epoch": 0.017773700656480235, "grad_norm": 0.0, "learning_rate": 9.796989047249539e-06, "loss": 15.6744, "step": 310 }, { "epoch": 0.017831035174726943, "grad_norm": 0.0, "learning_rate": 9.795239694061754e-06, "loss": 15.4008, "step": 311 }, { "epoch": 0.017888369692973655, "grad_norm": 0.0, "learning_rate": 9.793482993590853e-06, "loss": 15.5721, "step": 312 }, { "epoch": 0.017945704211220364, "grad_norm": 0.0, "learning_rate": 9.791718948528457e-06, "loss": 15.8716, "step": 313 }, { "epoch": 0.018003038729467076, "grad_norm": 0.0, "learning_rate": 9.789947561577445e-06, "loss": 15.5011, "step": 314 }, { "epoch": 0.018060373247713785, "grad_norm": 0.0, "learning_rate": 9.78816883545194e-06, "loss": 15.3945, "step": 315 }, { "epoch": 0.018117707765960497, "grad_norm": 0.0, "learning_rate": 9.786382772877312e-06, "loss": 15.6252, "step": 316 }, { "epoch": 0.018175042284207205, "grad_norm": 0.0, "learning_rate": 9.784589376590175e-06, "loss": 15.1961, "step": 317 }, { "epoch": 0.018232376802453917, "grad_norm": 0.0, "learning_rate": 9.782788649338376e-06, "loss": 15.7459, "step": 318 }, { "epoch": 0.01828971132070063, "grad_norm": 0.0, "learning_rate": 9.780980593880993e-06, "loss": 15.658, "step": 319 }, { "epoch": 0.018347045838947338, "grad_norm": 0.0, "learning_rate": 9.779165212988339e-06, "loss": 15.5801, "step": 320 }, { "epoch": 0.01840438035719405, "grad_norm": 0.0, "learning_rate": 9.777342509441946e-06, "loss": 15.3656, "step": 321 }, { "epoch": 0.01846171487544076, "grad_norm": 0.0, "learning_rate": 9.775512486034564e-06, "loss": 15.6884, "step": 322 }, { "epoch": 0.01851904939368747, "grad_norm": 0.0, "learning_rate": 9.773675145570163e-06, "loss": 15.7698, "step": 323 }, { "epoch": 0.01857638391193418, "grad_norm": 0.0, "learning_rate": 9.771830490863923e-06, "loss": 15.5413, "step": 324 }, { "epoch": 0.01863371843018089, "grad_norm": 0.0, "learning_rate": 9.76997852474223e-06, "loss": 15.227, "step": 325 }, { "epoch": 0.0186910529484276, "grad_norm": 0.0, "learning_rate": 9.768119250042673e-06, "loss": 16.0092, "step": 326 }, { "epoch": 0.018748387466674312, "grad_norm": 0.0, "learning_rate": 9.76625266961404e-06, "loss": 15.5157, "step": 327 }, { "epoch": 0.01880572198492102, "grad_norm": 0.0, "learning_rate": 9.76437878631631e-06, "loss": 15.4693, "step": 328 }, { "epoch": 0.018863056503167733, "grad_norm": 0.0, "learning_rate": 9.762497603020658e-06, "loss": 15.336, "step": 329 }, { "epoch": 0.01892039102141444, "grad_norm": 0.0, "learning_rate": 9.760609122609434e-06, "loss": 15.4514, "step": 330 }, { "epoch": 0.018977725539661153, "grad_norm": 0.0, "learning_rate": 9.758713347976179e-06, "loss": 15.8285, "step": 331 }, { "epoch": 0.019035060057907862, "grad_norm": 0.0, "learning_rate": 9.756810282025602e-06, "loss": 16.0446, "step": 332 }, { "epoch": 0.019092394576154574, "grad_norm": 0.0, "learning_rate": 9.754899927673588e-06, "loss": 15.9323, "step": 333 }, { "epoch": 0.019149729094401283, "grad_norm": 0.0, "learning_rate": 9.752982287847193e-06, "loss": 15.4573, "step": 334 }, { "epoch": 0.019207063612647995, "grad_norm": 0.0, "learning_rate": 9.751057365484625e-06, "loss": 15.6772, "step": 335 }, { "epoch": 0.019264398130894707, "grad_norm": 0.0, "learning_rate": 9.74912516353526e-06, "loss": 15.7258, "step": 336 }, { "epoch": 0.019321732649141415, "grad_norm": 0.0, "learning_rate": 9.747185684959626e-06, "loss": 15.716, "step": 337 }, { "epoch": 0.019379067167388127, "grad_norm": 0.0, "learning_rate": 9.745238932729397e-06, "loss": 15.6332, "step": 338 }, { "epoch": 0.019436401685634836, "grad_norm": 0.0, "learning_rate": 9.743284909827393e-06, "loss": 15.0069, "step": 339 }, { "epoch": 0.019493736203881548, "grad_norm": 0.0, "learning_rate": 9.741323619247575e-06, "loss": 15.6709, "step": 340 }, { "epoch": 0.019551070722128257, "grad_norm": 0.0, "learning_rate": 9.739355063995042e-06, "loss": 15.6234, "step": 341 }, { "epoch": 0.01960840524037497, "grad_norm": 0.0, "learning_rate": 9.73737924708602e-06, "loss": 15.7564, "step": 342 }, { "epoch": 0.019665739758621677, "grad_norm": 0.0, "learning_rate": 9.735396171547859e-06, "loss": 15.582, "step": 343 }, { "epoch": 0.01972307427686839, "grad_norm": 0.0, "learning_rate": 9.73340584041904e-06, "loss": 15.6952, "step": 344 }, { "epoch": 0.019780408795115098, "grad_norm": 0.0, "learning_rate": 9.73140825674915e-06, "loss": 15.4585, "step": 345 }, { "epoch": 0.01983774331336181, "grad_norm": 0.0, "learning_rate": 9.7294034235989e-06, "loss": 15.6929, "step": 346 }, { "epoch": 0.01989507783160852, "grad_norm": 0.0, "learning_rate": 9.727391344040095e-06, "loss": 15.7319, "step": 347 }, { "epoch": 0.01995241234985523, "grad_norm": 0.0, "learning_rate": 9.725372021155656e-06, "loss": 15.6427, "step": 348 }, { "epoch": 0.02000974686810194, "grad_norm": 0.0, "learning_rate": 9.723345458039595e-06, "loss": 15.3811, "step": 349 }, { "epoch": 0.02006708138634865, "grad_norm": 0.0, "learning_rate": 9.721311657797018e-06, "loss": 15.6416, "step": 350 }, { "epoch": 0.02012441590459536, "grad_norm": 0.0, "learning_rate": 9.719270623544122e-06, "loss": 15.5834, "step": 351 }, { "epoch": 0.020181750422842072, "grad_norm": 0.0, "learning_rate": 9.717222358408188e-06, "loss": 15.8382, "step": 352 }, { "epoch": 0.020239084941088784, "grad_norm": 0.0, "learning_rate": 9.71516686552757e-06, "loss": 15.8291, "step": 353 }, { "epoch": 0.020296419459335493, "grad_norm": 0.0, "learning_rate": 9.71310414805171e-06, "loss": 15.6789, "step": 354 }, { "epoch": 0.020353753977582205, "grad_norm": 0.0, "learning_rate": 9.711034209141102e-06, "loss": 15.5076, "step": 355 }, { "epoch": 0.020411088495828913, "grad_norm": 0.0, "learning_rate": 9.708957051967318e-06, "loss": 15.5615, "step": 356 }, { "epoch": 0.020468423014075626, "grad_norm": 0.0, "learning_rate": 9.706872679712986e-06, "loss": 15.6241, "step": 357 }, { "epoch": 0.020525757532322334, "grad_norm": 0.0, "learning_rate": 9.704781095571788e-06, "loss": 15.4126, "step": 358 }, { "epoch": 0.020583092050569046, "grad_norm": 0.0, "learning_rate": 9.702682302748456e-06, "loss": 15.6394, "step": 359 }, { "epoch": 0.020640426568815755, "grad_norm": 0.0, "learning_rate": 9.700576304458769e-06, "loss": 15.7437, "step": 360 }, { "epoch": 0.020697761087062467, "grad_norm": 0.0, "learning_rate": 9.698463103929542e-06, "loss": 15.425, "step": 361 }, { "epoch": 0.020755095605309175, "grad_norm": 0.0, "learning_rate": 9.696342704398632e-06, "loss": 15.5889, "step": 362 }, { "epoch": 0.020812430123555888, "grad_norm": 0.0, "learning_rate": 9.69421510911492e-06, "loss": 15.4247, "step": 363 }, { "epoch": 0.020869764641802596, "grad_norm": 0.0, "learning_rate": 9.692080321338317e-06, "loss": 15.572, "step": 364 }, { "epoch": 0.020927099160049308, "grad_norm": 0.0, "learning_rate": 9.689938344339751e-06, "loss": 15.6727, "step": 365 }, { "epoch": 0.020984433678296017, "grad_norm": 0.0, "learning_rate": 9.687789181401166e-06, "loss": 15.5547, "step": 366 }, { "epoch": 0.02104176819654273, "grad_norm": 0.0, "learning_rate": 9.685632835815519e-06, "loss": 15.5774, "step": 367 }, { "epoch": 0.021099102714789437, "grad_norm": 0.0, "learning_rate": 9.683469310886769e-06, "loss": 15.5133, "step": 368 }, { "epoch": 0.02115643723303615, "grad_norm": 0.0, "learning_rate": 9.681298609929875e-06, "loss": 15.7577, "step": 369 }, { "epoch": 0.021213771751282858, "grad_norm": 0.0, "learning_rate": 9.679120736270796e-06, "loss": 15.8414, "step": 370 }, { "epoch": 0.02127110626952957, "grad_norm": 0.0, "learning_rate": 9.676935693246475e-06, "loss": 15.5358, "step": 371 }, { "epoch": 0.021328440787776282, "grad_norm": 0.0, "learning_rate": 9.674743484204844e-06, "loss": 15.4978, "step": 372 }, { "epoch": 0.02138577530602299, "grad_norm": 0.0, "learning_rate": 9.672544112504813e-06, "loss": 15.9646, "step": 373 }, { "epoch": 0.021443109824269703, "grad_norm": 0.0, "learning_rate": 9.670337581516268e-06, "loss": 15.6926, "step": 374 }, { "epoch": 0.02150044434251641, "grad_norm": 0.0, "learning_rate": 9.668123894620062e-06, "loss": 15.8415, "step": 375 }, { "epoch": 0.021557778860763124, "grad_norm": 0.0, "learning_rate": 9.665903055208013e-06, "loss": 15.7397, "step": 376 }, { "epoch": 0.021615113379009832, "grad_norm": 0.0, "learning_rate": 9.663675066682903e-06, "loss": 15.8171, "step": 377 }, { "epoch": 0.021672447897256544, "grad_norm": 0.0, "learning_rate": 9.66143993245846e-06, "loss": 15.5251, "step": 378 }, { "epoch": 0.021729782415503253, "grad_norm": 0.0, "learning_rate": 9.659197655959364e-06, "loss": 15.9786, "step": 379 }, { "epoch": 0.021787116933749965, "grad_norm": 0.0, "learning_rate": 9.656948240621244e-06, "loss": 15.2645, "step": 380 }, { "epoch": 0.021844451451996674, "grad_norm": 0.0, "learning_rate": 9.654691689890656e-06, "loss": 15.4899, "step": 381 }, { "epoch": 0.021901785970243386, "grad_norm": 0.0, "learning_rate": 9.6524280072251e-06, "loss": 15.5523, "step": 382 }, { "epoch": 0.021959120488490094, "grad_norm": 0.0, "learning_rate": 9.650157196092995e-06, "loss": 16.0337, "step": 383 }, { "epoch": 0.022016455006736806, "grad_norm": 0.0, "learning_rate": 9.647879259973687e-06, "loss": 15.6746, "step": 384 }, { "epoch": 0.022073789524983515, "grad_norm": 0.0, "learning_rate": 9.645594202357438e-06, "loss": 15.5839, "step": 385 }, { "epoch": 0.022131124043230227, "grad_norm": 0.0, "learning_rate": 9.643302026745423e-06, "loss": 15.6965, "step": 386 }, { "epoch": 0.022188458561476936, "grad_norm": 0.0, "learning_rate": 9.641002736649718e-06, "loss": 15.6181, "step": 387 }, { "epoch": 0.022245793079723648, "grad_norm": 0.0, "learning_rate": 9.638696335593304e-06, "loss": 15.6196, "step": 388 }, { "epoch": 0.02230312759797036, "grad_norm": 0.0, "learning_rate": 9.636382827110059e-06, "loss": 15.7294, "step": 389 }, { "epoch": 0.02236046211621707, "grad_norm": 0.0, "learning_rate": 9.634062214744749e-06, "loss": 15.3191, "step": 390 }, { "epoch": 0.02241779663446378, "grad_norm": 0.0, "learning_rate": 9.63173450205302e-06, "loss": 15.4134, "step": 391 }, { "epoch": 0.02247513115271049, "grad_norm": 0.0, "learning_rate": 9.629399692601406e-06, "loss": 15.3963, "step": 392 }, { "epoch": 0.0225324656709572, "grad_norm": 0.0, "learning_rate": 9.62705778996731e-06, "loss": 15.6359, "step": 393 }, { "epoch": 0.02258980018920391, "grad_norm": 0.0, "learning_rate": 9.624708797739002e-06, "loss": 15.721, "step": 394 }, { "epoch": 0.02264713470745062, "grad_norm": 0.0, "learning_rate": 9.622352719515615e-06, "loss": 15.2731, "step": 395 }, { "epoch": 0.02270446922569733, "grad_norm": 0.0, "learning_rate": 9.619989558907144e-06, "loss": 15.5629, "step": 396 }, { "epoch": 0.022761803743944042, "grad_norm": 0.0, "learning_rate": 9.617619319534427e-06, "loss": 15.9466, "step": 397 }, { "epoch": 0.02281913826219075, "grad_norm": 0.0, "learning_rate": 9.615242005029159e-06, "loss": 15.4187, "step": 398 }, { "epoch": 0.022876472780437463, "grad_norm": 0.0, "learning_rate": 9.612857619033865e-06, "loss": 15.7401, "step": 399 }, { "epoch": 0.02293380729868417, "grad_norm": 0.0, "learning_rate": 9.610466165201912e-06, "loss": 15.6079, "step": 400 }, { "epoch": 0.022991141816930884, "grad_norm": 0.0, "learning_rate": 9.608067647197492e-06, "loss": 15.53, "step": 401 }, { "epoch": 0.023048476335177592, "grad_norm": 0.0, "learning_rate": 9.605662068695625e-06, "loss": 15.7122, "step": 402 }, { "epoch": 0.023105810853424304, "grad_norm": 0.0, "learning_rate": 9.603249433382145e-06, "loss": 15.3243, "step": 403 }, { "epoch": 0.023163145371671013, "grad_norm": 0.0, "learning_rate": 9.6008297449537e-06, "loss": 15.6436, "step": 404 }, { "epoch": 0.023220479889917725, "grad_norm": 0.0, "learning_rate": 9.598403007117748e-06, "loss": 15.6834, "step": 405 }, { "epoch": 0.023277814408164437, "grad_norm": 0.0, "learning_rate": 9.595969223592544e-06, "loss": 15.6098, "step": 406 }, { "epoch": 0.023335148926411146, "grad_norm": 0.0, "learning_rate": 9.593528398107137e-06, "loss": 15.7385, "step": 407 }, { "epoch": 0.023392483444657858, "grad_norm": 0.0, "learning_rate": 9.591080534401371e-06, "loss": 15.3407, "step": 408 }, { "epoch": 0.023449817962904566, "grad_norm": 0.0, "learning_rate": 9.588625636225871e-06, "loss": 15.4649, "step": 409 }, { "epoch": 0.02350715248115128, "grad_norm": 0.0, "learning_rate": 9.58616370734204e-06, "loss": 16.0059, "step": 410 }, { "epoch": 0.023564486999397987, "grad_norm": 0.0, "learning_rate": 9.583694751522054e-06, "loss": 15.6544, "step": 411 }, { "epoch": 0.0236218215176447, "grad_norm": 0.0, "learning_rate": 9.58121877254886e-06, "loss": 15.3054, "step": 412 }, { "epoch": 0.023679156035891408, "grad_norm": 0.0, "learning_rate": 9.578735774216155e-06, "loss": 15.3782, "step": 413 }, { "epoch": 0.02373649055413812, "grad_norm": 0.0, "learning_rate": 9.5762457603284e-06, "loss": 15.851, "step": 414 }, { "epoch": 0.02379382507238483, "grad_norm": 0.0, "learning_rate": 9.573748734700806e-06, "loss": 15.4994, "step": 415 }, { "epoch": 0.02385115959063154, "grad_norm": 0.0, "learning_rate": 9.57124470115932e-06, "loss": 15.3128, "step": 416 }, { "epoch": 0.02390849410887825, "grad_norm": 0.0, "learning_rate": 9.568733663540634e-06, "loss": 15.3998, "step": 417 }, { "epoch": 0.02396582862712496, "grad_norm": 0.0, "learning_rate": 9.566215625692168e-06, "loss": 15.5673, "step": 418 }, { "epoch": 0.02402316314537167, "grad_norm": 0.0, "learning_rate": 9.563690591472067e-06, "loss": 15.642, "step": 419 }, { "epoch": 0.024080497663618382, "grad_norm": 0.0, "learning_rate": 9.561158564749202e-06, "loss": 15.7555, "step": 420 }, { "epoch": 0.02413783218186509, "grad_norm": 0.0, "learning_rate": 9.558619549403148e-06, "loss": 15.5263, "step": 421 }, { "epoch": 0.024195166700111802, "grad_norm": 0.0, "learning_rate": 9.556073549324195e-06, "loss": 15.6044, "step": 422 }, { "epoch": 0.02425250121835851, "grad_norm": 0.0, "learning_rate": 9.553520568413335e-06, "loss": 15.8033, "step": 423 }, { "epoch": 0.024309835736605223, "grad_norm": 0.0, "learning_rate": 9.550960610582251e-06, "loss": 15.8008, "step": 424 }, { "epoch": 0.024367170254851935, "grad_norm": 0.0, "learning_rate": 9.548393679753321e-06, "loss": 15.6639, "step": 425 }, { "epoch": 0.024424504773098644, "grad_norm": 0.0, "learning_rate": 9.545819779859607e-06, "loss": 15.2476, "step": 426 }, { "epoch": 0.024481839291345356, "grad_norm": 0.0, "learning_rate": 9.543238914844844e-06, "loss": 15.331, "step": 427 }, { "epoch": 0.024539173809592064, "grad_norm": 0.0, "learning_rate": 9.540651088663446e-06, "loss": 15.4785, "step": 428 }, { "epoch": 0.024596508327838777, "grad_norm": 0.0, "learning_rate": 9.538056305280487e-06, "loss": 15.6987, "step": 429 }, { "epoch": 0.024653842846085485, "grad_norm": 0.0, "learning_rate": 9.535454568671705e-06, "loss": 15.2466, "step": 430 }, { "epoch": 0.024711177364332197, "grad_norm": 0.0, "learning_rate": 9.532845882823489e-06, "loss": 15.8279, "step": 431 }, { "epoch": 0.024768511882578906, "grad_norm": 0.0, "learning_rate": 9.530230251732875e-06, "loss": 15.8509, "step": 432 }, { "epoch": 0.024825846400825618, "grad_norm": 0.0, "learning_rate": 9.527607679407545e-06, "loss": 15.6834, "step": 433 }, { "epoch": 0.024883180919072326, "grad_norm": 0.0, "learning_rate": 9.524978169865813e-06, "loss": 15.2951, "step": 434 }, { "epoch": 0.02494051543731904, "grad_norm": 0.0, "learning_rate": 9.522341727136622e-06, "loss": 15.8398, "step": 435 }, { "epoch": 0.024997849955565747, "grad_norm": 0.0, "learning_rate": 9.519698355259537e-06, "loss": 15.735, "step": 436 }, { "epoch": 0.02505518447381246, "grad_norm": 0.0, "learning_rate": 9.517048058284746e-06, "loss": 15.2163, "step": 437 }, { "epoch": 0.025112518992059168, "grad_norm": 0.0, "learning_rate": 9.51439084027304e-06, "loss": 15.9837, "step": 438 }, { "epoch": 0.02516985351030588, "grad_norm": 0.0, "learning_rate": 9.51172670529582e-06, "loss": 15.5941, "step": 439 }, { "epoch": 0.02522718802855259, "grad_norm": 0.0, "learning_rate": 9.50905565743508e-06, "loss": 15.6597, "step": 440 }, { "epoch": 0.0252845225467993, "grad_norm": 0.0, "learning_rate": 9.506377700783412e-06, "loss": 15.6818, "step": 441 }, { "epoch": 0.025341857065046013, "grad_norm": 0.0, "learning_rate": 9.503692839443988e-06, "loss": 16.0401, "step": 442 }, { "epoch": 0.02539919158329272, "grad_norm": 0.0, "learning_rate": 9.501001077530563e-06, "loss": 15.2495, "step": 443 }, { "epoch": 0.025456526101539433, "grad_norm": 0.0, "learning_rate": 9.498302419167465e-06, "loss": 15.6474, "step": 444 }, { "epoch": 0.025513860619786142, "grad_norm": 0.0, "learning_rate": 9.495596868489588e-06, "loss": 15.633, "step": 445 }, { "epoch": 0.025571195138032854, "grad_norm": 0.0, "learning_rate": 9.492884429642383e-06, "loss": 15.1942, "step": 446 }, { "epoch": 0.025628529656279563, "grad_norm": 0.0, "learning_rate": 9.490165106781863e-06, "loss": 15.7698, "step": 447 }, { "epoch": 0.025685864174526275, "grad_norm": 0.0, "learning_rate": 9.487438904074581e-06, "loss": 15.7203, "step": 448 }, { "epoch": 0.025743198692772983, "grad_norm": 0.0, "learning_rate": 9.484705825697635e-06, "loss": 15.8956, "step": 449 }, { "epoch": 0.025800533211019695, "grad_norm": 0.0, "learning_rate": 9.481965875838657e-06, "loss": 15.7252, "step": 450 }, { "epoch": 0.025857867729266404, "grad_norm": 0.0, "learning_rate": 9.47921905869581e-06, "loss": 15.5917, "step": 451 }, { "epoch": 0.025915202247513116, "grad_norm": 0.0, "learning_rate": 9.476465378477773e-06, "loss": 15.7833, "step": 452 }, { "epoch": 0.025972536765759825, "grad_norm": 0.0, "learning_rate": 9.473704839403748e-06, "loss": 15.8857, "step": 453 }, { "epoch": 0.026029871284006537, "grad_norm": 0.0, "learning_rate": 9.47093744570344e-06, "loss": 15.9607, "step": 454 }, { "epoch": 0.026087205802253245, "grad_norm": 0.0, "learning_rate": 9.468163201617063e-06, "loss": 15.846, "step": 455 }, { "epoch": 0.026144540320499957, "grad_norm": 0.0, "learning_rate": 9.465382111395319e-06, "loss": 15.6212, "step": 456 }, { "epoch": 0.026201874838746666, "grad_norm": 0.0, "learning_rate": 9.462594179299408e-06, "loss": 15.5892, "step": 457 }, { "epoch": 0.026259209356993378, "grad_norm": 0.0, "learning_rate": 9.459799409601006e-06, "loss": 15.3363, "step": 458 }, { "epoch": 0.02631654387524009, "grad_norm": 0.0, "learning_rate": 9.456997806582272e-06, "loss": 15.6226, "step": 459 }, { "epoch": 0.0263738783934868, "grad_norm": 0.0, "learning_rate": 9.45418937453583e-06, "loss": 15.5168, "step": 460 }, { "epoch": 0.02643121291173351, "grad_norm": 0.0, "learning_rate": 9.45137411776477e-06, "loss": 15.5484, "step": 461 }, { "epoch": 0.02648854742998022, "grad_norm": 0.0, "learning_rate": 9.44855204058264e-06, "loss": 15.8039, "step": 462 }, { "epoch": 0.02654588194822693, "grad_norm": 0.0, "learning_rate": 9.445723147313434e-06, "loss": 15.3607, "step": 463 }, { "epoch": 0.02660321646647364, "grad_norm": 0.0, "learning_rate": 9.442887442291593e-06, "loss": 15.6776, "step": 464 }, { "epoch": 0.026660550984720352, "grad_norm": 0.0, "learning_rate": 9.440044929861995e-06, "loss": 15.5051, "step": 465 }, { "epoch": 0.02671788550296706, "grad_norm": 0.0, "learning_rate": 9.437195614379947e-06, "loss": 15.5395, "step": 466 }, { "epoch": 0.026775220021213773, "grad_norm": 0.0, "learning_rate": 9.43433950021118e-06, "loss": 15.0945, "step": 467 }, { "epoch": 0.02683255453946048, "grad_norm": 0.0, "learning_rate": 9.431476591731842e-06, "loss": 15.5566, "step": 468 }, { "epoch": 0.026889889057707193, "grad_norm": 0.0, "learning_rate": 9.428606893328493e-06, "loss": 15.6923, "step": 469 }, { "epoch": 0.026947223575953902, "grad_norm": 0.0, "learning_rate": 9.425730409398094e-06, "loss": 15.7286, "step": 470 }, { "epoch": 0.027004558094200614, "grad_norm": 0.0, "learning_rate": 9.422847144348002e-06, "loss": 15.5483, "step": 471 }, { "epoch": 0.027061892612447323, "grad_norm": 0.0, "learning_rate": 9.41995710259597e-06, "loss": 15.5248, "step": 472 }, { "epoch": 0.027119227130694035, "grad_norm": 0.0, "learning_rate": 9.417060288570126e-06, "loss": 15.9598, "step": 473 }, { "epoch": 0.027176561648940743, "grad_norm": 0.0, "learning_rate": 9.414156706708978e-06, "loss": 15.779, "step": 474 }, { "epoch": 0.027233896167187455, "grad_norm": 0.0, "learning_rate": 9.41124636146141e-06, "loss": 15.7317, "step": 475 }, { "epoch": 0.027291230685434164, "grad_norm": 0.0, "learning_rate": 9.408329257286658e-06, "loss": 15.4797, "step": 476 }, { "epoch": 0.027348565203680876, "grad_norm": 0.0, "learning_rate": 9.405405398654322e-06, "loss": 15.865, "step": 477 }, { "epoch": 0.027405899721927588, "grad_norm": 0.0, "learning_rate": 9.402474790044348e-06, "loss": 15.3192, "step": 478 }, { "epoch": 0.027463234240174297, "grad_norm": 0.0, "learning_rate": 9.399537435947023e-06, "loss": 15.4034, "step": 479 }, { "epoch": 0.02752056875842101, "grad_norm": 0.0, "learning_rate": 9.396593340862972e-06, "loss": 15.5196, "step": 480 }, { "epoch": 0.027577903276667717, "grad_norm": 0.0, "learning_rate": 9.39364250930315e-06, "loss": 16.0287, "step": 481 }, { "epoch": 0.02763523779491443, "grad_norm": 0.0, "learning_rate": 9.39068494578883e-06, "loss": 15.6026, "step": 482 }, { "epoch": 0.027692572313161138, "grad_norm": 0.0, "learning_rate": 9.3877206548516e-06, "loss": 15.3239, "step": 483 }, { "epoch": 0.02774990683140785, "grad_norm": 0.0, "learning_rate": 9.384749641033358e-06, "loss": 15.66, "step": 484 }, { "epoch": 0.02780724134965456, "grad_norm": 0.0, "learning_rate": 9.381771908886303e-06, "loss": 15.6269, "step": 485 }, { "epoch": 0.02786457586790127, "grad_norm": 0.0, "learning_rate": 9.378787462972925e-06, "loss": 15.63, "step": 486 }, { "epoch": 0.02792191038614798, "grad_norm": 0.0, "learning_rate": 9.375796307866003e-06, "loss": 15.5099, "step": 487 }, { "epoch": 0.02797924490439469, "grad_norm": 0.0, "learning_rate": 9.372798448148597e-06, "loss": 15.456, "step": 488 }, { "epoch": 0.0280365794226414, "grad_norm": 0.0, "learning_rate": 9.369793888414036e-06, "loss": 15.8082, "step": 489 }, { "epoch": 0.028093913940888112, "grad_norm": 0.0, "learning_rate": 9.366782633265917e-06, "loss": 15.6783, "step": 490 }, { "epoch": 0.02815124845913482, "grad_norm": 0.0, "learning_rate": 9.363764687318097e-06, "loss": 15.4574, "step": 491 }, { "epoch": 0.028208582977381533, "grad_norm": 0.0, "learning_rate": 9.360740055194682e-06, "loss": 15.6102, "step": 492 }, { "epoch": 0.02826591749562824, "grad_norm": 0.0, "learning_rate": 9.357708741530025e-06, "loss": 15.3994, "step": 493 }, { "epoch": 0.028323252013874953, "grad_norm": 0.0, "learning_rate": 9.354670750968716e-06, "loss": 15.3506, "step": 494 }, { "epoch": 0.028380586532121665, "grad_norm": 0.0, "learning_rate": 9.351626088165574e-06, "loss": 15.783, "step": 495 }, { "epoch": 0.028437921050368374, "grad_norm": 0.0, "learning_rate": 9.348574757785642e-06, "loss": 15.5606, "step": 496 }, { "epoch": 0.028495255568615086, "grad_norm": 0.0, "learning_rate": 9.345516764504179e-06, "loss": 15.3805, "step": 497 }, { "epoch": 0.028552590086861795, "grad_norm": 0.0, "learning_rate": 9.342452113006653e-06, "loss": 15.6996, "step": 498 }, { "epoch": 0.028609924605108507, "grad_norm": 0.0, "learning_rate": 9.339380807988734e-06, "loss": 15.3441, "step": 499 }, { "epoch": 0.028667259123355215, "grad_norm": 0.0, "learning_rate": 9.336302854156287e-06, "loss": 15.5148, "step": 500 }, { "epoch": 0.028724593641601927, "grad_norm": 0.0, "learning_rate": 9.333218256225362e-06, "loss": 15.529, "step": 501 }, { "epoch": 0.028781928159848636, "grad_norm": 0.0, "learning_rate": 9.330127018922195e-06, "loss": 15.8365, "step": 502 }, { "epoch": 0.028839262678095348, "grad_norm": 0.0, "learning_rate": 9.327029146983184e-06, "loss": 15.9286, "step": 503 }, { "epoch": 0.028896597196342057, "grad_norm": 0.0, "learning_rate": 9.323924645154906e-06, "loss": 15.4567, "step": 504 }, { "epoch": 0.02895393171458877, "grad_norm": 0.0, "learning_rate": 9.320813518194084e-06, "loss": 15.8201, "step": 505 }, { "epoch": 0.029011266232835477, "grad_norm": 0.0, "learning_rate": 9.317695770867601e-06, "loss": 15.7347, "step": 506 }, { "epoch": 0.02906860075108219, "grad_norm": 0.0, "learning_rate": 9.31457140795248e-06, "loss": 15.5777, "step": 507 }, { "epoch": 0.029125935269328898, "grad_norm": 0.0, "learning_rate": 9.311440434235879e-06, "loss": 15.6935, "step": 508 }, { "epoch": 0.02918326978757561, "grad_norm": 0.0, "learning_rate": 9.30830285451509e-06, "loss": 15.7309, "step": 509 }, { "epoch": 0.02924060430582232, "grad_norm": 0.0, "learning_rate": 9.30515867359752e-06, "loss": 15.6683, "step": 510 }, { "epoch": 0.02929793882406903, "grad_norm": 0.0, "learning_rate": 9.302007896300697e-06, "loss": 15.4964, "step": 511 }, { "epoch": 0.029355273342315743, "grad_norm": 0.0, "learning_rate": 9.298850527452253e-06, "loss": 15.5383, "step": 512 }, { "epoch": 0.02941260786056245, "grad_norm": 0.0, "learning_rate": 9.295686571889919e-06, "loss": 15.6289, "step": 513 }, { "epoch": 0.029469942378809164, "grad_norm": 0.0, "learning_rate": 9.292516034461517e-06, "loss": 15.329, "step": 514 }, { "epoch": 0.029527276897055872, "grad_norm": 0.0, "learning_rate": 9.289338920024958e-06, "loss": 15.5542, "step": 515 }, { "epoch": 0.029584611415302584, "grad_norm": 0.0, "learning_rate": 9.286155233448226e-06, "loss": 15.3286, "step": 516 }, { "epoch": 0.029641945933549293, "grad_norm": 0.0, "learning_rate": 9.28296497960938e-06, "loss": 15.4839, "step": 517 }, { "epoch": 0.029699280451796005, "grad_norm": 0.0, "learning_rate": 9.279768163396535e-06, "loss": 15.6228, "step": 518 }, { "epoch": 0.029756614970042713, "grad_norm": 0.0, "learning_rate": 9.276564789707865e-06, "loss": 15.5924, "step": 519 }, { "epoch": 0.029813949488289426, "grad_norm": 0.0, "learning_rate": 9.273354863451589e-06, "loss": 15.719, "step": 520 }, { "epoch": 0.029871284006536134, "grad_norm": 0.0, "learning_rate": 9.27013838954597e-06, "loss": 15.6843, "step": 521 }, { "epoch": 0.029928618524782846, "grad_norm": 0.0, "learning_rate": 9.266915372919301e-06, "loss": 15.5317, "step": 522 }, { "epoch": 0.029985953043029555, "grad_norm": 0.0, "learning_rate": 9.263685818509895e-06, "loss": 15.5594, "step": 523 }, { "epoch": 0.030043287561276267, "grad_norm": 0.0, "learning_rate": 9.260449731266092e-06, "loss": 15.8769, "step": 524 }, { "epoch": 0.030100622079522975, "grad_norm": 0.0, "learning_rate": 9.257207116146231e-06, "loss": 15.1793, "step": 525 }, { "epoch": 0.030157956597769688, "grad_norm": 0.0, "learning_rate": 9.253957978118664e-06, "loss": 15.6719, "step": 526 }, { "epoch": 0.030215291116016396, "grad_norm": 0.0, "learning_rate": 9.250702322161726e-06, "loss": 15.5782, "step": 527 }, { "epoch": 0.030272625634263108, "grad_norm": 0.0, "learning_rate": 9.24744015326375e-06, "loss": 15.6119, "step": 528 }, { "epoch": 0.030329960152509817, "grad_norm": 0.0, "learning_rate": 9.244171476423037e-06, "loss": 15.4868, "step": 529 }, { "epoch": 0.03038729467075653, "grad_norm": 0.0, "learning_rate": 9.24089629664787e-06, "loss": 15.7071, "step": 530 }, { "epoch": 0.03044462918900324, "grad_norm": 0.0, "learning_rate": 9.237614618956488e-06, "loss": 15.7531, "step": 531 }, { "epoch": 0.03050196370724995, "grad_norm": 0.0, "learning_rate": 9.234326448377089e-06, "loss": 15.436, "step": 532 }, { "epoch": 0.03055929822549666, "grad_norm": 0.0, "learning_rate": 9.231031789947822e-06, "loss": 15.4103, "step": 533 }, { "epoch": 0.03061663274374337, "grad_norm": 0.0, "learning_rate": 9.227730648716771e-06, "loss": 15.3176, "step": 534 }, { "epoch": 0.030673967261990082, "grad_norm": 0.0, "learning_rate": 9.22442302974196e-06, "loss": 15.5685, "step": 535 }, { "epoch": 0.03073130178023679, "grad_norm": 0.0, "learning_rate": 9.221108938091333e-06, "loss": 15.9073, "step": 536 }, { "epoch": 0.030788636298483503, "grad_norm": 0.0, "learning_rate": 9.217788378842749e-06, "loss": 15.6944, "step": 537 }, { "epoch": 0.03084597081673021, "grad_norm": 0.0, "learning_rate": 9.214461357083986e-06, "loss": 15.636, "step": 538 }, { "epoch": 0.030903305334976924, "grad_norm": 0.0, "learning_rate": 9.211127877912715e-06, "loss": 15.4674, "step": 539 }, { "epoch": 0.030960639853223632, "grad_norm": 0.0, "learning_rate": 9.207787946436509e-06, "loss": 16.0203, "step": 540 }, { "epoch": 0.031017974371470344, "grad_norm": 0.0, "learning_rate": 9.204441567772817e-06, "loss": 15.6157, "step": 541 }, { "epoch": 0.031075308889717053, "grad_norm": 0.0, "learning_rate": 9.201088747048974e-06, "loss": 15.929, "step": 542 }, { "epoch": 0.031132643407963765, "grad_norm": 0.0, "learning_rate": 9.197729489402185e-06, "loss": 15.4643, "step": 543 }, { "epoch": 0.031189977926210474, "grad_norm": 0.0, "learning_rate": 9.194363799979517e-06, "loss": 15.5438, "step": 544 }, { "epoch": 0.031247312444457186, "grad_norm": 0.0, "learning_rate": 9.19099168393789e-06, "loss": 15.5475, "step": 545 }, { "epoch": 0.031304646962703894, "grad_norm": 0.0, "learning_rate": 9.18761314644407e-06, "loss": 15.1468, "step": 546 }, { "epoch": 0.03136198148095061, "grad_norm": 0.0, "learning_rate": 9.184228192674667e-06, "loss": 15.3172, "step": 547 }, { "epoch": 0.03141931599919732, "grad_norm": 0.0, "learning_rate": 9.180836827816118e-06, "loss": 15.5644, "step": 548 }, { "epoch": 0.03147665051744403, "grad_norm": 0.0, "learning_rate": 9.177439057064684e-06, "loss": 15.5696, "step": 549 }, { "epoch": 0.031533985035690736, "grad_norm": 0.0, "learning_rate": 9.17403488562644e-06, "loss": 15.3968, "step": 550 }, { "epoch": 0.03159131955393745, "grad_norm": 0.0, "learning_rate": 9.170624318717274e-06, "loss": 15.6295, "step": 551 }, { "epoch": 0.03164865407218416, "grad_norm": 0.0, "learning_rate": 9.167207361562863e-06, "loss": 15.5716, "step": 552 }, { "epoch": 0.03170598859043087, "grad_norm": 0.0, "learning_rate": 9.163784019398686e-06, "loss": 15.6282, "step": 553 }, { "epoch": 0.03176332310867758, "grad_norm": 0.0, "learning_rate": 9.160354297469994e-06, "loss": 15.2821, "step": 554 }, { "epoch": 0.03182065762692429, "grad_norm": 0.0, "learning_rate": 9.156918201031823e-06, "loss": 15.4259, "step": 555 }, { "epoch": 0.031877992145171, "grad_norm": 0.0, "learning_rate": 9.153475735348973e-06, "loss": 15.3835, "step": 556 }, { "epoch": 0.03193532666341771, "grad_norm": 0.0, "learning_rate": 9.150026905696e-06, "loss": 15.6208, "step": 557 }, { "epoch": 0.03199266118166442, "grad_norm": 0.0, "learning_rate": 9.146571717357211e-06, "loss": 15.3262, "step": 558 }, { "epoch": 0.032049995699911134, "grad_norm": 0.0, "learning_rate": 9.143110175626662e-06, "loss": 15.6206, "step": 559 }, { "epoch": 0.03210733021815784, "grad_norm": 0.0, "learning_rate": 9.139642285808137e-06, "loss": 15.5763, "step": 560 }, { "epoch": 0.03216466473640455, "grad_norm": 0.0, "learning_rate": 9.136168053215148e-06, "loss": 15.5814, "step": 561 }, { "epoch": 0.03222199925465126, "grad_norm": 0.0, "learning_rate": 9.13268748317093e-06, "loss": 15.8486, "step": 562 }, { "epoch": 0.032279333772897975, "grad_norm": 0.0, "learning_rate": 9.12920058100842e-06, "loss": 15.5351, "step": 563 }, { "epoch": 0.032336668291144684, "grad_norm": 0.0, "learning_rate": 9.125707352070265e-06, "loss": 15.677, "step": 564 }, { "epoch": 0.03239400280939139, "grad_norm": 0.0, "learning_rate": 9.122207801708802e-06, "loss": 15.7423, "step": 565 }, { "epoch": 0.03245133732763811, "grad_norm": 0.0, "learning_rate": 9.118701935286054e-06, "loss": 15.7189, "step": 566 }, { "epoch": 0.032508671845884816, "grad_norm": 0.0, "learning_rate": 9.115189758173721e-06, "loss": 15.9269, "step": 567 }, { "epoch": 0.032566006364131525, "grad_norm": 0.0, "learning_rate": 9.111671275753175e-06, "loss": 15.6356, "step": 568 }, { "epoch": 0.032623340882378234, "grad_norm": 0.0, "learning_rate": 9.108146493415448e-06, "loss": 15.61, "step": 569 }, { "epoch": 0.03268067540062495, "grad_norm": 0.0, "learning_rate": 9.10461541656122e-06, "loss": 15.6629, "step": 570 }, { "epoch": 0.03273800991887166, "grad_norm": 0.0, "learning_rate": 9.101078050600823e-06, "loss": 15.712, "step": 571 }, { "epoch": 0.032795344437118366, "grad_norm": 0.0, "learning_rate": 9.097534400954218e-06, "loss": 15.4088, "step": 572 }, { "epoch": 0.032852678955365075, "grad_norm": 0.0, "learning_rate": 9.093984473051e-06, "loss": 15.3972, "step": 573 }, { "epoch": 0.03291001347361179, "grad_norm": 0.0, "learning_rate": 9.090428272330381e-06, "loss": 15.8337, "step": 574 }, { "epoch": 0.0329673479918585, "grad_norm": 0.0, "learning_rate": 9.086865804241184e-06, "loss": 15.5395, "step": 575 }, { "epoch": 0.03302468251010521, "grad_norm": 0.0, "learning_rate": 9.083297074241833e-06, "loss": 15.8856, "step": 576 }, { "epoch": 0.033082017028351916, "grad_norm": 0.0, "learning_rate": 9.079722087800353e-06, "loss": 15.5518, "step": 577 }, { "epoch": 0.03313935154659863, "grad_norm": 0.0, "learning_rate": 9.076140850394345e-06, "loss": 15.6313, "step": 578 }, { "epoch": 0.03319668606484534, "grad_norm": 0.0, "learning_rate": 9.072553367511e-06, "loss": 15.6402, "step": 579 }, { "epoch": 0.03325402058309205, "grad_norm": 0.0, "learning_rate": 9.06895964464707e-06, "loss": 15.7837, "step": 580 }, { "epoch": 0.03331135510133876, "grad_norm": 0.0, "learning_rate": 9.065359687308865e-06, "loss": 15.3619, "step": 581 }, { "epoch": 0.03336868961958547, "grad_norm": 0.0, "learning_rate": 9.061753501012257e-06, "loss": 15.6569, "step": 582 }, { "epoch": 0.03342602413783218, "grad_norm": 0.0, "learning_rate": 9.058141091282656e-06, "loss": 15.8729, "step": 583 }, { "epoch": 0.03348335865607889, "grad_norm": 0.0, "learning_rate": 9.054522463655008e-06, "loss": 15.5701, "step": 584 }, { "epoch": 0.033540693174325606, "grad_norm": 0.0, "learning_rate": 9.050897623673791e-06, "loss": 15.7377, "step": 585 }, { "epoch": 0.033598027692572315, "grad_norm": 0.0, "learning_rate": 9.047266576892993e-06, "loss": 15.5294, "step": 586 }, { "epoch": 0.03365536221081902, "grad_norm": 0.0, "learning_rate": 9.043629328876117e-06, "loss": 15.7416, "step": 587 }, { "epoch": 0.03371269672906573, "grad_norm": 0.0, "learning_rate": 9.039985885196171e-06, "loss": 15.628, "step": 588 }, { "epoch": 0.03377003124731245, "grad_norm": 0.0, "learning_rate": 9.036336251435647e-06, "loss": 15.6772, "step": 589 }, { "epoch": 0.033827365765559156, "grad_norm": 0.0, "learning_rate": 9.032680433186531e-06, "loss": 16.1895, "step": 590 }, { "epoch": 0.033884700283805864, "grad_norm": 0.0, "learning_rate": 9.029018436050278e-06, "loss": 15.6948, "step": 591 }, { "epoch": 0.03394203480205257, "grad_norm": 0.0, "learning_rate": 9.025350265637816e-06, "loss": 15.3231, "step": 592 }, { "epoch": 0.03399936932029929, "grad_norm": 0.0, "learning_rate": 9.021675927569525e-06, "loss": 15.5173, "step": 593 }, { "epoch": 0.034056703838546, "grad_norm": 0.0, "learning_rate": 9.017995427475238e-06, "loss": 15.3488, "step": 594 }, { "epoch": 0.034114038356792706, "grad_norm": 0.0, "learning_rate": 9.014308770994235e-06, "loss": 15.8426, "step": 595 }, { "epoch": 0.034171372875039414, "grad_norm": 0.0, "learning_rate": 9.01061596377522e-06, "loss": 15.4642, "step": 596 }, { "epoch": 0.03422870739328613, "grad_norm": 0.0, "learning_rate": 9.006917011476326e-06, "loss": 15.4795, "step": 597 }, { "epoch": 0.03428604191153284, "grad_norm": 0.0, "learning_rate": 9.003211919765102e-06, "loss": 15.8165, "step": 598 }, { "epoch": 0.03434337642977955, "grad_norm": 0.0, "learning_rate": 8.999500694318501e-06, "loss": 15.5455, "step": 599 }, { "epoch": 0.03440071094802626, "grad_norm": 0.0, "learning_rate": 8.995783340822878e-06, "loss": 15.7814, "step": 600 }, { "epoch": 0.03445804546627297, "grad_norm": 0.0, "learning_rate": 8.992059864973972e-06, "loss": 15.9455, "step": 601 }, { "epoch": 0.03451537998451968, "grad_norm": 0.0, "learning_rate": 8.988330272476908e-06, "loss": 15.2725, "step": 602 }, { "epoch": 0.03457271450276639, "grad_norm": 0.0, "learning_rate": 8.98459456904618e-06, "loss": 15.4554, "step": 603 }, { "epoch": 0.034630049021013104, "grad_norm": 0.0, "learning_rate": 8.980852760405645e-06, "loss": 15.5684, "step": 604 }, { "epoch": 0.03468738353925981, "grad_norm": 0.0, "learning_rate": 8.977104852288519e-06, "loss": 15.7634, "step": 605 }, { "epoch": 0.03474471805750652, "grad_norm": 0.0, "learning_rate": 8.973350850437355e-06, "loss": 15.6467, "step": 606 }, { "epoch": 0.03480205257575323, "grad_norm": 0.0, "learning_rate": 8.96959076060405e-06, "loss": 15.7511, "step": 607 }, { "epoch": 0.034859387093999945, "grad_norm": 0.0, "learning_rate": 8.965824588549827e-06, "loss": 15.5423, "step": 608 }, { "epoch": 0.034916721612246654, "grad_norm": 0.0, "learning_rate": 8.962052340045228e-06, "loss": 15.4482, "step": 609 }, { "epoch": 0.03497405613049336, "grad_norm": 0.0, "learning_rate": 8.958274020870107e-06, "loss": 15.5465, "step": 610 }, { "epoch": 0.03503139064874007, "grad_norm": 0.0, "learning_rate": 8.954489636813615e-06, "loss": 15.5435, "step": 611 }, { "epoch": 0.03508872516698679, "grad_norm": 0.0, "learning_rate": 8.9506991936742e-06, "loss": 15.6975, "step": 612 }, { "epoch": 0.035146059685233495, "grad_norm": 0.0, "learning_rate": 8.946902697259593e-06, "loss": 15.788, "step": 613 }, { "epoch": 0.035203394203480204, "grad_norm": 0.0, "learning_rate": 8.943100153386798e-06, "loss": 15.9397, "step": 614 }, { "epoch": 0.03526072872172691, "grad_norm": 0.0, "learning_rate": 8.939291567882087e-06, "loss": 15.7003, "step": 615 }, { "epoch": 0.03531806323997363, "grad_norm": 0.0, "learning_rate": 8.935476946580988e-06, "loss": 15.5504, "step": 616 }, { "epoch": 0.03537539775822034, "grad_norm": 0.0, "learning_rate": 8.931656295328275e-06, "loss": 15.5768, "step": 617 }, { "epoch": 0.035432732276467045, "grad_norm": 0.0, "learning_rate": 8.927829619977965e-06, "loss": 15.6244, "step": 618 }, { "epoch": 0.03549006679471376, "grad_norm": 0.0, "learning_rate": 8.923996926393306e-06, "loss": 15.4595, "step": 619 }, { "epoch": 0.03554740131296047, "grad_norm": 0.0, "learning_rate": 8.92015822044676e-06, "loss": 15.5153, "step": 620 }, { "epoch": 0.03560473583120718, "grad_norm": 0.0, "learning_rate": 8.91631350802001e-06, "loss": 15.5014, "step": 621 }, { "epoch": 0.03566207034945389, "grad_norm": 0.0, "learning_rate": 8.912462795003932e-06, "loss": 15.3184, "step": 622 }, { "epoch": 0.0357194048677006, "grad_norm": 0.0, "learning_rate": 8.908606087298608e-06, "loss": 15.8297, "step": 623 }, { "epoch": 0.03577673938594731, "grad_norm": 0.0, "learning_rate": 8.904743390813296e-06, "loss": 15.3076, "step": 624 }, { "epoch": 0.03583407390419402, "grad_norm": 0.0, "learning_rate": 8.900874711466436e-06, "loss": 15.5906, "step": 625 }, { "epoch": 0.03589140842244073, "grad_norm": 0.0, "learning_rate": 8.897000055185628e-06, "loss": 15.3853, "step": 626 }, { "epoch": 0.03594874294068744, "grad_norm": 0.0, "learning_rate": 8.893119427907636e-06, "loss": 15.7627, "step": 627 }, { "epoch": 0.03600607745893415, "grad_norm": 0.0, "learning_rate": 8.889232835578372e-06, "loss": 15.5701, "step": 628 }, { "epoch": 0.03606341197718086, "grad_norm": 0.0, "learning_rate": 8.885340284152883e-06, "loss": 15.4473, "step": 629 }, { "epoch": 0.03612074649542757, "grad_norm": 0.0, "learning_rate": 8.881441779595355e-06, "loss": 15.4852, "step": 630 }, { "epoch": 0.036178081013674285, "grad_norm": 0.0, "learning_rate": 8.877537327879087e-06, "loss": 15.9832, "step": 631 }, { "epoch": 0.03623541553192099, "grad_norm": 0.0, "learning_rate": 8.873626934986492e-06, "loss": 15.3101, "step": 632 }, { "epoch": 0.0362927500501677, "grad_norm": 0.0, "learning_rate": 8.869710606909091e-06, "loss": 15.407, "step": 633 }, { "epoch": 0.03635008456841441, "grad_norm": 0.0, "learning_rate": 8.865788349647496e-06, "loss": 15.7147, "step": 634 }, { "epoch": 0.036407419086661126, "grad_norm": 0.0, "learning_rate": 8.8618601692114e-06, "loss": 15.4474, "step": 635 }, { "epoch": 0.036464753604907835, "grad_norm": 0.0, "learning_rate": 8.857926071619578e-06, "loss": 15.5297, "step": 636 }, { "epoch": 0.03652208812315454, "grad_norm": 0.0, "learning_rate": 8.853986062899869e-06, "loss": 15.1538, "step": 637 }, { "epoch": 0.03657942264140126, "grad_norm": 0.0, "learning_rate": 8.850040149089164e-06, "loss": 15.865, "step": 638 }, { "epoch": 0.03663675715964797, "grad_norm": 0.0, "learning_rate": 8.846088336233407e-06, "loss": 15.3478, "step": 639 }, { "epoch": 0.036694091677894676, "grad_norm": 0.0, "learning_rate": 8.842130630387583e-06, "loss": 15.6966, "step": 640 }, { "epoch": 0.036751426196141385, "grad_norm": 0.0, "learning_rate": 8.838167037615699e-06, "loss": 15.8752, "step": 641 }, { "epoch": 0.0368087607143881, "grad_norm": 0.0, "learning_rate": 8.834197563990789e-06, "loss": 15.4659, "step": 642 }, { "epoch": 0.03686609523263481, "grad_norm": 0.0, "learning_rate": 8.83022221559489e-06, "loss": 15.51, "step": 643 }, { "epoch": 0.03692342975088152, "grad_norm": 0.0, "learning_rate": 8.826240998519052e-06, "loss": 15.7284, "step": 644 }, { "epoch": 0.036980764269128226, "grad_norm": 0.0, "learning_rate": 8.822253918863301e-06, "loss": 15.3795, "step": 645 }, { "epoch": 0.03703809878737494, "grad_norm": 0.0, "learning_rate": 8.818260982736662e-06, "loss": 15.6749, "step": 646 }, { "epoch": 0.03709543330562165, "grad_norm": 0.0, "learning_rate": 8.814262196257121e-06, "loss": 15.7075, "step": 647 }, { "epoch": 0.03715276782386836, "grad_norm": 0.0, "learning_rate": 8.810257565551634e-06, "loss": 15.3014, "step": 648 }, { "epoch": 0.03721010234211507, "grad_norm": 0.0, "learning_rate": 8.80624709675611e-06, "loss": 15.6637, "step": 649 }, { "epoch": 0.03726743686036178, "grad_norm": 0.0, "learning_rate": 8.802230796015406e-06, "loss": 15.8715, "step": 650 }, { "epoch": 0.03732477137860849, "grad_norm": 0.0, "learning_rate": 8.79820866948331e-06, "loss": 15.6308, "step": 651 }, { "epoch": 0.0373821058968552, "grad_norm": 0.0, "learning_rate": 8.794180723322537e-06, "loss": 15.8427, "step": 652 }, { "epoch": 0.037439440415101916, "grad_norm": 0.0, "learning_rate": 8.790146963704722e-06, "loss": 15.6036, "step": 653 }, { "epoch": 0.037496774933348624, "grad_norm": 0.0, "learning_rate": 8.786107396810405e-06, "loss": 15.7593, "step": 654 }, { "epoch": 0.03755410945159533, "grad_norm": 0.0, "learning_rate": 8.782062028829028e-06, "loss": 15.302, "step": 655 }, { "epoch": 0.03761144396984204, "grad_norm": 0.0, "learning_rate": 8.778010865958913e-06, "loss": 15.7448, "step": 656 }, { "epoch": 0.03766877848808876, "grad_norm": 0.0, "learning_rate": 8.773953914407267e-06, "loss": 15.527, "step": 657 }, { "epoch": 0.037726113006335465, "grad_norm": 0.0, "learning_rate": 8.769891180390168e-06, "loss": 15.6792, "step": 658 }, { "epoch": 0.037783447524582174, "grad_norm": 0.0, "learning_rate": 8.765822670132549e-06, "loss": 15.4365, "step": 659 }, { "epoch": 0.03784078204282888, "grad_norm": 0.0, "learning_rate": 8.761748389868197e-06, "loss": 15.2451, "step": 660 }, { "epoch": 0.0378981165610756, "grad_norm": 0.0, "learning_rate": 8.757668345839739e-06, "loss": 15.2096, "step": 661 }, { "epoch": 0.03795545107932231, "grad_norm": 0.0, "learning_rate": 8.75358254429863e-06, "loss": 15.507, "step": 662 }, { "epoch": 0.038012785597569015, "grad_norm": 0.0, "learning_rate": 8.749490991505153e-06, "loss": 15.5514, "step": 663 }, { "epoch": 0.038070120115815724, "grad_norm": 0.0, "learning_rate": 8.745393693728395e-06, "loss": 15.6321, "step": 664 }, { "epoch": 0.03812745463406244, "grad_norm": 0.0, "learning_rate": 8.741290657246255e-06, "loss": 15.6004, "step": 665 }, { "epoch": 0.03818478915230915, "grad_norm": 0.0, "learning_rate": 8.737181888345419e-06, "loss": 15.2442, "step": 666 }, { "epoch": 0.03824212367055586, "grad_norm": 0.0, "learning_rate": 8.733067393321354e-06, "loss": 15.4412, "step": 667 }, { "epoch": 0.038299458188802565, "grad_norm": 0.0, "learning_rate": 8.728947178478308e-06, "loss": 15.5328, "step": 668 }, { "epoch": 0.03835679270704928, "grad_norm": 0.0, "learning_rate": 8.724821250129286e-06, "loss": 15.3495, "step": 669 }, { "epoch": 0.03841412722529599, "grad_norm": 0.0, "learning_rate": 8.72068961459605e-06, "loss": 15.3269, "step": 670 }, { "epoch": 0.0384714617435427, "grad_norm": 0.0, "learning_rate": 8.716552278209106e-06, "loss": 15.337, "step": 671 }, { "epoch": 0.038528796261789414, "grad_norm": 0.0, "learning_rate": 8.712409247307696e-06, "loss": 15.5897, "step": 672 }, { "epoch": 0.03858613078003612, "grad_norm": 0.0, "learning_rate": 8.708260528239788e-06, "loss": 15.5198, "step": 673 }, { "epoch": 0.03864346529828283, "grad_norm": 0.0, "learning_rate": 8.704106127362063e-06, "loss": 15.4092, "step": 674 }, { "epoch": 0.03870079981652954, "grad_norm": 0.0, "learning_rate": 8.699946051039907e-06, "loss": 15.6915, "step": 675 }, { "epoch": 0.038758134334776255, "grad_norm": 0.0, "learning_rate": 8.695780305647405e-06, "loss": 15.4175, "step": 676 }, { "epoch": 0.038815468853022964, "grad_norm": 0.0, "learning_rate": 8.691608897567328e-06, "loss": 15.4632, "step": 677 }, { "epoch": 0.03887280337126967, "grad_norm": 0.0, "learning_rate": 8.68743183319112e-06, "loss": 15.816, "step": 678 }, { "epoch": 0.03893013788951638, "grad_norm": 0.0, "learning_rate": 8.683249118918895e-06, "loss": 15.2258, "step": 679 }, { "epoch": 0.038987472407763096, "grad_norm": 0.0, "learning_rate": 8.67906076115942e-06, "loss": 15.7341, "step": 680 }, { "epoch": 0.039044806926009805, "grad_norm": 0.0, "learning_rate": 8.674866766330117e-06, "loss": 15.3757, "step": 681 }, { "epoch": 0.039102141444256514, "grad_norm": 0.0, "learning_rate": 8.670667140857034e-06, "loss": 15.7797, "step": 682 }, { "epoch": 0.03915947596250322, "grad_norm": 0.0, "learning_rate": 8.666461891174854e-06, "loss": 15.3798, "step": 683 }, { "epoch": 0.03921681048074994, "grad_norm": 0.0, "learning_rate": 8.662251023726874e-06, "loss": 15.3629, "step": 684 }, { "epoch": 0.039274144998996646, "grad_norm": 0.0, "learning_rate": 8.658034544965003e-06, "loss": 15.5817, "step": 685 }, { "epoch": 0.039331479517243355, "grad_norm": 0.0, "learning_rate": 8.653812461349742e-06, "loss": 15.8776, "step": 686 }, { "epoch": 0.03938881403549006, "grad_norm": 0.0, "learning_rate": 8.649584779350184e-06, "loss": 15.4464, "step": 687 }, { "epoch": 0.03944614855373678, "grad_norm": 0.0, "learning_rate": 8.645351505443997e-06, "loss": 15.5939, "step": 688 }, { "epoch": 0.03950348307198349, "grad_norm": 0.0, "learning_rate": 8.641112646117419e-06, "loss": 15.7406, "step": 689 }, { "epoch": 0.039560817590230196, "grad_norm": 0.0, "learning_rate": 8.636868207865244e-06, "loss": 15.4987, "step": 690 }, { "epoch": 0.03961815210847691, "grad_norm": 0.0, "learning_rate": 8.632618197190817e-06, "loss": 15.8416, "step": 691 }, { "epoch": 0.03967548662672362, "grad_norm": 0.0, "learning_rate": 8.628362620606017e-06, "loss": 15.942, "step": 692 }, { "epoch": 0.03973282114497033, "grad_norm": 0.0, "learning_rate": 8.624101484631255e-06, "loss": 15.5289, "step": 693 }, { "epoch": 0.03979015566321704, "grad_norm": 0.0, "learning_rate": 8.619834795795458e-06, "loss": 15.5221, "step": 694 }, { "epoch": 0.03984749018146375, "grad_norm": 0.0, "learning_rate": 8.615562560636063e-06, "loss": 15.4384, "step": 695 }, { "epoch": 0.03990482469971046, "grad_norm": 0.0, "learning_rate": 8.611284785699001e-06, "loss": 15.4652, "step": 696 }, { "epoch": 0.03996215921795717, "grad_norm": 0.0, "learning_rate": 8.607001477538697e-06, "loss": 15.6511, "step": 697 }, { "epoch": 0.04001949373620388, "grad_norm": 0.0, "learning_rate": 8.602712642718047e-06, "loss": 15.5522, "step": 698 }, { "epoch": 0.040076828254450594, "grad_norm": 0.0, "learning_rate": 8.598418287808424e-06, "loss": 15.8704, "step": 699 }, { "epoch": 0.0401341627726973, "grad_norm": 0.0, "learning_rate": 8.594118419389648e-06, "loss": 15.3411, "step": 700 }, { "epoch": 0.04019149729094401, "grad_norm": 0.0, "learning_rate": 8.589813044049995e-06, "loss": 15.7049, "step": 701 }, { "epoch": 0.04024883180919072, "grad_norm": 0.0, "learning_rate": 8.585502168386177e-06, "loss": 15.7626, "step": 702 }, { "epoch": 0.040306166327437436, "grad_norm": 0.0, "learning_rate": 8.581185799003334e-06, "loss": 15.8192, "step": 703 }, { "epoch": 0.040363500845684144, "grad_norm": 0.0, "learning_rate": 8.576863942515019e-06, "loss": 15.7336, "step": 704 }, { "epoch": 0.04042083536393085, "grad_norm": 0.0, "learning_rate": 8.572536605543197e-06, "loss": 15.7258, "step": 705 }, { "epoch": 0.04047816988217757, "grad_norm": 0.0, "learning_rate": 8.568203794718228e-06, "loss": 15.6306, "step": 706 }, { "epoch": 0.04053550440042428, "grad_norm": 0.0, "learning_rate": 8.563865516678863e-06, "loss": 15.858, "step": 707 }, { "epoch": 0.040592838918670986, "grad_norm": 0.0, "learning_rate": 8.559521778072225e-06, "loss": 15.4747, "step": 708 }, { "epoch": 0.040650173436917694, "grad_norm": 0.0, "learning_rate": 8.555172585553804e-06, "loss": 15.491, "step": 709 }, { "epoch": 0.04070750795516441, "grad_norm": 0.0, "learning_rate": 8.550817945787452e-06, "loss": 15.6943, "step": 710 }, { "epoch": 0.04076484247341112, "grad_norm": 0.0, "learning_rate": 8.546457865445359e-06, "loss": 15.9682, "step": 711 }, { "epoch": 0.04082217699165783, "grad_norm": 0.0, "learning_rate": 8.542092351208058e-06, "loss": 15.6275, "step": 712 }, { "epoch": 0.040879511509904536, "grad_norm": 0.0, "learning_rate": 8.537721409764406e-06, "loss": 15.8521, "step": 713 }, { "epoch": 0.04093684602815125, "grad_norm": 0.0, "learning_rate": 8.533345047811572e-06, "loss": 15.4229, "step": 714 }, { "epoch": 0.04099418054639796, "grad_norm": 0.0, "learning_rate": 8.528963272055036e-06, "loss": 15.3714, "step": 715 }, { "epoch": 0.04105151506464467, "grad_norm": 0.0, "learning_rate": 8.524576089208567e-06, "loss": 15.2679, "step": 716 }, { "epoch": 0.04110884958289138, "grad_norm": 0.0, "learning_rate": 8.520183505994227e-06, "loss": 15.6539, "step": 717 }, { "epoch": 0.04116618410113809, "grad_norm": 0.0, "learning_rate": 8.515785529142339e-06, "loss": 15.5492, "step": 718 }, { "epoch": 0.0412235186193848, "grad_norm": 0.0, "learning_rate": 8.511382165391508e-06, "loss": 15.3739, "step": 719 }, { "epoch": 0.04128085313763151, "grad_norm": 0.0, "learning_rate": 8.50697342148858e-06, "loss": 15.6877, "step": 720 }, { "epoch": 0.04133818765587822, "grad_norm": 0.0, "learning_rate": 8.502559304188644e-06, "loss": 15.6241, "step": 721 }, { "epoch": 0.041395522174124934, "grad_norm": 0.0, "learning_rate": 8.498139820255033e-06, "loss": 15.3811, "step": 722 }, { "epoch": 0.04145285669237164, "grad_norm": 0.0, "learning_rate": 8.49371497645929e-06, "loss": 15.1943, "step": 723 }, { "epoch": 0.04151019121061835, "grad_norm": 0.0, "learning_rate": 8.489284779581179e-06, "loss": 15.5301, "step": 724 }, { "epoch": 0.041567525728865067, "grad_norm": 0.0, "learning_rate": 8.48484923640866e-06, "loss": 15.8323, "step": 725 }, { "epoch": 0.041624860247111775, "grad_norm": 0.0, "learning_rate": 8.480408353737894e-06, "loss": 15.6009, "step": 726 }, { "epoch": 0.041682194765358484, "grad_norm": 0.0, "learning_rate": 8.475962138373212e-06, "loss": 15.5931, "step": 727 }, { "epoch": 0.04173952928360519, "grad_norm": 0.0, "learning_rate": 8.471510597127122e-06, "loss": 15.7055, "step": 728 }, { "epoch": 0.04179686380185191, "grad_norm": 0.0, "learning_rate": 8.467053736820292e-06, "loss": 15.3792, "step": 729 }, { "epoch": 0.041854198320098616, "grad_norm": 0.0, "learning_rate": 8.46259156428154e-06, "loss": 15.7116, "step": 730 }, { "epoch": 0.041911532838345325, "grad_norm": 0.0, "learning_rate": 8.458124086347818e-06, "loss": 15.0395, "step": 731 }, { "epoch": 0.041968867356592034, "grad_norm": 0.0, "learning_rate": 8.453651309864215e-06, "loss": 15.159, "step": 732 }, { "epoch": 0.04202620187483875, "grad_norm": 0.0, "learning_rate": 8.449173241683934e-06, "loss": 15.8408, "step": 733 }, { "epoch": 0.04208353639308546, "grad_norm": 0.0, "learning_rate": 8.444689888668288e-06, "loss": 15.5556, "step": 734 }, { "epoch": 0.042140870911332166, "grad_norm": 0.0, "learning_rate": 8.440201257686684e-06, "loss": 15.5377, "step": 735 }, { "epoch": 0.042198205429578875, "grad_norm": 0.0, "learning_rate": 8.43570735561662e-06, "loss": 15.1917, "step": 736 }, { "epoch": 0.04225553994782559, "grad_norm": 0.0, "learning_rate": 8.43120818934367e-06, "loss": 15.3535, "step": 737 }, { "epoch": 0.0423128744660723, "grad_norm": 0.0, "learning_rate": 8.426703765761468e-06, "loss": 15.2347, "step": 738 }, { "epoch": 0.04237020898431901, "grad_norm": 0.0, "learning_rate": 8.422194091771709e-06, "loss": 15.5976, "step": 739 }, { "epoch": 0.042427543502565716, "grad_norm": 0.0, "learning_rate": 8.417679174284135e-06, "loss": 15.6626, "step": 740 }, { "epoch": 0.04248487802081243, "grad_norm": 0.0, "learning_rate": 8.413159020216512e-06, "loss": 15.7098, "step": 741 }, { "epoch": 0.04254221253905914, "grad_norm": 0.0, "learning_rate": 8.408633636494643e-06, "loss": 15.3637, "step": 742 }, { "epoch": 0.04259954705730585, "grad_norm": 0.0, "learning_rate": 8.404103030052332e-06, "loss": 15.7865, "step": 743 }, { "epoch": 0.042656881575552565, "grad_norm": 0.0, "learning_rate": 8.399567207831394e-06, "loss": 16.0023, "step": 744 }, { "epoch": 0.04271421609379927, "grad_norm": 0.0, "learning_rate": 8.395026176781627e-06, "loss": 15.3886, "step": 745 }, { "epoch": 0.04277155061204598, "grad_norm": 0.0, "learning_rate": 8.390479943860817e-06, "loss": 15.4875, "step": 746 }, { "epoch": 0.04282888513029269, "grad_norm": 0.0, "learning_rate": 8.385928516034718e-06, "loss": 15.4501, "step": 747 }, { "epoch": 0.042886219648539406, "grad_norm": 0.0, "learning_rate": 8.381371900277045e-06, "loss": 15.6047, "step": 748 }, { "epoch": 0.042943554166786115, "grad_norm": 0.0, "learning_rate": 8.37681010356946e-06, "loss": 15.6225, "step": 749 }, { "epoch": 0.04300088868503282, "grad_norm": 0.0, "learning_rate": 8.372243132901563e-06, "loss": 15.376, "step": 750 }, { "epoch": 0.04305822320327953, "grad_norm": 0.0, "learning_rate": 8.367670995270883e-06, "loss": 15.7378, "step": 751 }, { "epoch": 0.04311555772152625, "grad_norm": 0.0, "learning_rate": 8.363093697682865e-06, "loss": 15.644, "step": 752 }, { "epoch": 0.043172892239772956, "grad_norm": 0.0, "learning_rate": 8.358511247150861e-06, "loss": 15.7673, "step": 753 }, { "epoch": 0.043230226758019664, "grad_norm": 0.0, "learning_rate": 8.353923650696119e-06, "loss": 15.1973, "step": 754 }, { "epoch": 0.04328756127626637, "grad_norm": 0.0, "learning_rate": 8.349330915347766e-06, "loss": 15.6869, "step": 755 }, { "epoch": 0.04334489579451309, "grad_norm": 0.0, "learning_rate": 8.344733048142814e-06, "loss": 15.4703, "step": 756 }, { "epoch": 0.0434022303127598, "grad_norm": 0.0, "learning_rate": 8.340130056126126e-06, "loss": 15.4401, "step": 757 }, { "epoch": 0.043459564831006506, "grad_norm": 0.0, "learning_rate": 8.335521946350424e-06, "loss": 15.6927, "step": 758 }, { "epoch": 0.04351689934925322, "grad_norm": 0.0, "learning_rate": 8.33090872587627e-06, "loss": 15.32, "step": 759 }, { "epoch": 0.04357423386749993, "grad_norm": 0.0, "learning_rate": 8.326290401772057e-06, "loss": 15.8423, "step": 760 }, { "epoch": 0.04363156838574664, "grad_norm": 0.0, "learning_rate": 8.321666981113998e-06, "loss": 15.8161, "step": 761 }, { "epoch": 0.04368890290399335, "grad_norm": 0.0, "learning_rate": 8.317038470986113e-06, "loss": 15.6991, "step": 762 }, { "epoch": 0.04374623742224006, "grad_norm": 0.0, "learning_rate": 8.312404878480222e-06, "loss": 15.5216, "step": 763 }, { "epoch": 0.04380357194048677, "grad_norm": 0.0, "learning_rate": 8.307766210695933e-06, "loss": 15.4135, "step": 764 }, { "epoch": 0.04386090645873348, "grad_norm": 0.0, "learning_rate": 8.303122474740625e-06, "loss": 15.435, "step": 765 }, { "epoch": 0.04391824097698019, "grad_norm": 0.0, "learning_rate": 8.298473677729453e-06, "loss": 15.6051, "step": 766 }, { "epoch": 0.043975575495226904, "grad_norm": 0.0, "learning_rate": 8.293819826785315e-06, "loss": 15.5189, "step": 767 }, { "epoch": 0.04403291001347361, "grad_norm": 0.0, "learning_rate": 8.289160929038858e-06, "loss": 15.6029, "step": 768 }, { "epoch": 0.04409024453172032, "grad_norm": 0.0, "learning_rate": 8.284496991628465e-06, "loss": 15.4684, "step": 769 }, { "epoch": 0.04414757904996703, "grad_norm": 0.0, "learning_rate": 8.279828021700235e-06, "loss": 15.5391, "step": 770 }, { "epoch": 0.044204913568213745, "grad_norm": 0.0, "learning_rate": 8.27515402640798e-06, "loss": 15.3327, "step": 771 }, { "epoch": 0.044262248086460454, "grad_norm": 0.0, "learning_rate": 8.270475012913212e-06, "loss": 15.4333, "step": 772 }, { "epoch": 0.04431958260470716, "grad_norm": 0.0, "learning_rate": 8.265790988385132e-06, "loss": 15.4344, "step": 773 }, { "epoch": 0.04437691712295387, "grad_norm": 0.0, "learning_rate": 8.261101960000619e-06, "loss": 15.1989, "step": 774 }, { "epoch": 0.04443425164120059, "grad_norm": 0.0, "learning_rate": 8.25640793494422e-06, "loss": 15.5202, "step": 775 }, { "epoch": 0.044491586159447295, "grad_norm": 0.0, "learning_rate": 8.251708920408135e-06, "loss": 15.9113, "step": 776 }, { "epoch": 0.044548920677694004, "grad_norm": 0.0, "learning_rate": 8.247004923592212e-06, "loss": 15.5743, "step": 777 }, { "epoch": 0.04460625519594072, "grad_norm": 0.0, "learning_rate": 8.24229595170393e-06, "loss": 15.4708, "step": 778 }, { "epoch": 0.04466358971418743, "grad_norm": 0.0, "learning_rate": 8.237582011958392e-06, "loss": 15.5196, "step": 779 }, { "epoch": 0.04472092423243414, "grad_norm": 0.0, "learning_rate": 8.232863111578314e-06, "loss": 15.7244, "step": 780 }, { "epoch": 0.044778258750680845, "grad_norm": 0.0, "learning_rate": 8.228139257794012e-06, "loss": 15.2314, "step": 781 }, { "epoch": 0.04483559326892756, "grad_norm": 0.0, "learning_rate": 8.223410457843392e-06, "loss": 15.4909, "step": 782 }, { "epoch": 0.04489292778717427, "grad_norm": 0.0, "learning_rate": 8.218676718971936e-06, "loss": 15.3051, "step": 783 }, { "epoch": 0.04495026230542098, "grad_norm": 0.0, "learning_rate": 8.213938048432697e-06, "loss": 15.151, "step": 784 }, { "epoch": 0.04500759682366769, "grad_norm": 0.0, "learning_rate": 8.209194453486283e-06, "loss": 15.5094, "step": 785 }, { "epoch": 0.0450649313419144, "grad_norm": 0.0, "learning_rate": 8.204445941400844e-06, "loss": 15.2905, "step": 786 }, { "epoch": 0.04512226586016111, "grad_norm": 0.0, "learning_rate": 8.19969251945207e-06, "loss": 15.5901, "step": 787 }, { "epoch": 0.04517960037840782, "grad_norm": 0.0, "learning_rate": 8.194934194923167e-06, "loss": 15.7173, "step": 788 }, { "epoch": 0.04523693489665453, "grad_norm": 0.0, "learning_rate": 8.190170975104862e-06, "loss": 15.4733, "step": 789 }, { "epoch": 0.04529426941490124, "grad_norm": 0.0, "learning_rate": 8.185402867295373e-06, "loss": 15.7784, "step": 790 }, { "epoch": 0.04535160393314795, "grad_norm": 0.0, "learning_rate": 8.180629878800413e-06, "loss": 15.7074, "step": 791 }, { "epoch": 0.04540893845139466, "grad_norm": 0.0, "learning_rate": 8.175852016933172e-06, "loss": 15.3187, "step": 792 }, { "epoch": 0.04546627296964137, "grad_norm": 0.0, "learning_rate": 8.171069289014307e-06, "loss": 15.8274, "step": 793 }, { "epoch": 0.045523607487888085, "grad_norm": 0.0, "learning_rate": 8.166281702371929e-06, "loss": 15.3859, "step": 794 }, { "epoch": 0.04558094200613479, "grad_norm": 0.0, "learning_rate": 8.161489264341596e-06, "loss": 15.6264, "step": 795 }, { "epoch": 0.0456382765243815, "grad_norm": 0.0, "learning_rate": 8.156691982266299e-06, "loss": 15.773, "step": 796 }, { "epoch": 0.04569561104262822, "grad_norm": 0.0, "learning_rate": 8.151889863496448e-06, "loss": 15.4782, "step": 797 }, { "epoch": 0.045752945560874926, "grad_norm": 0.0, "learning_rate": 8.14708291538987e-06, "loss": 15.7671, "step": 798 }, { "epoch": 0.045810280079121635, "grad_norm": 0.0, "learning_rate": 8.142271145311784e-06, "loss": 15.0243, "step": 799 }, { "epoch": 0.04586761459736834, "grad_norm": 0.0, "learning_rate": 8.137454560634803e-06, "loss": 15.6344, "step": 800 }, { "epoch": 0.04592494911561506, "grad_norm": 0.0, "learning_rate": 8.132633168738917e-06, "loss": 15.5289, "step": 801 }, { "epoch": 0.04598228363386177, "grad_norm": 0.0, "learning_rate": 8.127806977011476e-06, "loss": 15.4143, "step": 802 }, { "epoch": 0.046039618152108476, "grad_norm": 0.0, "learning_rate": 8.122975992847189e-06, "loss": 15.4817, "step": 803 }, { "epoch": 0.046096952670355185, "grad_norm": 0.0, "learning_rate": 8.118140223648108e-06, "loss": 15.7881, "step": 804 }, { "epoch": 0.0461542871886019, "grad_norm": 0.0, "learning_rate": 8.113299676823614e-06, "loss": 15.3555, "step": 805 }, { "epoch": 0.04621162170684861, "grad_norm": 0.0, "learning_rate": 8.108454359790414e-06, "loss": 15.3229, "step": 806 }, { "epoch": 0.04626895622509532, "grad_norm": 0.0, "learning_rate": 8.103604279972513e-06, "loss": 15.589, "step": 807 }, { "epoch": 0.046326290743342026, "grad_norm": 0.0, "learning_rate": 8.098749444801226e-06, "loss": 15.1841, "step": 808 }, { "epoch": 0.04638362526158874, "grad_norm": 0.0, "learning_rate": 8.093889861715144e-06, "loss": 15.4068, "step": 809 }, { "epoch": 0.04644095977983545, "grad_norm": 0.0, "learning_rate": 8.089025538160142e-06, "loss": 14.9371, "step": 810 }, { "epoch": 0.04649829429808216, "grad_norm": 0.0, "learning_rate": 8.08415648158935e-06, "loss": 15.4922, "step": 811 }, { "epoch": 0.046555628816328874, "grad_norm": 0.0, "learning_rate": 8.079282699463155e-06, "loss": 15.5137, "step": 812 }, { "epoch": 0.04661296333457558, "grad_norm": 0.0, "learning_rate": 8.074404199249184e-06, "loss": 15.7668, "step": 813 }, { "epoch": 0.04667029785282229, "grad_norm": 0.0, "learning_rate": 8.069520988422292e-06, "loss": 15.527, "step": 814 }, { "epoch": 0.046727632371069, "grad_norm": 0.0, "learning_rate": 8.064633074464548e-06, "loss": 15.4981, "step": 815 }, { "epoch": 0.046784966889315716, "grad_norm": 0.0, "learning_rate": 8.059740464865237e-06, "loss": 15.3851, "step": 816 }, { "epoch": 0.046842301407562424, "grad_norm": 0.0, "learning_rate": 8.054843167120827e-06, "loss": 15.5792, "step": 817 }, { "epoch": 0.04689963592580913, "grad_norm": 0.0, "learning_rate": 8.04994118873498e-06, "loss": 15.5655, "step": 818 }, { "epoch": 0.04695697044405584, "grad_norm": 0.0, "learning_rate": 8.04503453721852e-06, "loss": 15.2363, "step": 819 }, { "epoch": 0.04701430496230256, "grad_norm": 0.0, "learning_rate": 8.040123220089437e-06, "loss": 15.2825, "step": 820 }, { "epoch": 0.047071639480549265, "grad_norm": 0.0, "learning_rate": 8.035207244872871e-06, "loss": 15.3609, "step": 821 }, { "epoch": 0.047128973998795974, "grad_norm": 0.0, "learning_rate": 8.030286619101094e-06, "loss": 15.4033, "step": 822 }, { "epoch": 0.04718630851704268, "grad_norm": 0.0, "learning_rate": 8.025361350313506e-06, "loss": 15.7496, "step": 823 }, { "epoch": 0.0472436430352894, "grad_norm": 0.0, "learning_rate": 8.020431446056622e-06, "loss": 15.4934, "step": 824 }, { "epoch": 0.04730097755353611, "grad_norm": 0.0, "learning_rate": 8.01549691388406e-06, "loss": 15.8934, "step": 825 }, { "epoch": 0.047358312071782815, "grad_norm": 0.0, "learning_rate": 8.010557761356523e-06, "loss": 15.6078, "step": 826 }, { "epoch": 0.047415646590029524, "grad_norm": 0.0, "learning_rate": 8.005613996041803e-06, "loss": 15.3614, "step": 827 }, { "epoch": 0.04747298110827624, "grad_norm": 0.0, "learning_rate": 8.000665625514752e-06, "loss": 15.5648, "step": 828 }, { "epoch": 0.04753031562652295, "grad_norm": 0.0, "learning_rate": 7.99571265735728e-06, "loss": 15.2218, "step": 829 }, { "epoch": 0.04758765014476966, "grad_norm": 0.0, "learning_rate": 7.990755099158346e-06, "loss": 15.5548, "step": 830 }, { "epoch": 0.04764498466301637, "grad_norm": 0.0, "learning_rate": 7.985792958513932e-06, "loss": 15.6068, "step": 831 }, { "epoch": 0.04770231918126308, "grad_norm": 0.0, "learning_rate": 7.980826243027052e-06, "loss": 15.4351, "step": 832 }, { "epoch": 0.04775965369950979, "grad_norm": 0.0, "learning_rate": 7.975854960307724e-06, "loss": 15.438, "step": 833 }, { "epoch": 0.0478169882177565, "grad_norm": 0.0, "learning_rate": 7.970879117972964e-06, "loss": 15.4016, "step": 834 }, { "epoch": 0.047874322736003214, "grad_norm": 0.0, "learning_rate": 7.965898723646777e-06, "loss": 15.4042, "step": 835 }, { "epoch": 0.04793165725424992, "grad_norm": 0.0, "learning_rate": 7.960913784960138e-06, "loss": 15.2968, "step": 836 }, { "epoch": 0.04798899177249663, "grad_norm": 0.0, "learning_rate": 7.955924309550991e-06, "loss": 15.1225, "step": 837 }, { "epoch": 0.04804632629074334, "grad_norm": 0.0, "learning_rate": 7.950930305064224e-06, "loss": 15.3629, "step": 838 }, { "epoch": 0.048103660808990055, "grad_norm": 0.0, "learning_rate": 7.94593177915167e-06, "loss": 15.3011, "step": 839 }, { "epoch": 0.048160995327236764, "grad_norm": 0.0, "learning_rate": 7.940928739472088e-06, "loss": 15.3236, "step": 840 }, { "epoch": 0.04821832984548347, "grad_norm": 0.0, "learning_rate": 7.935921193691153e-06, "loss": 15.7638, "step": 841 }, { "epoch": 0.04827566436373018, "grad_norm": 0.0, "learning_rate": 7.930909149481446e-06, "loss": 15.2309, "step": 842 }, { "epoch": 0.048332998881976896, "grad_norm": 0.0, "learning_rate": 7.925892614522433e-06, "loss": 15.6848, "step": 843 }, { "epoch": 0.048390333400223605, "grad_norm": 0.0, "learning_rate": 7.920871596500473e-06, "loss": 15.4714, "step": 844 }, { "epoch": 0.048447667918470314, "grad_norm": 0.0, "learning_rate": 7.915846103108784e-06, "loss": 15.3015, "step": 845 }, { "epoch": 0.04850500243671702, "grad_norm": 0.0, "learning_rate": 7.910816142047447e-06, "loss": 15.3926, "step": 846 }, { "epoch": 0.04856233695496374, "grad_norm": 0.0, "learning_rate": 7.905781721023384e-06, "loss": 15.4996, "step": 847 }, { "epoch": 0.048619671473210446, "grad_norm": 0.0, "learning_rate": 7.900742847750352e-06, "loss": 15.3543, "step": 848 }, { "epoch": 0.048677005991457155, "grad_norm": 0.0, "learning_rate": 7.895699529948932e-06, "loss": 15.3942, "step": 849 }, { "epoch": 0.04873434050970387, "grad_norm": 0.0, "learning_rate": 7.890651775346512e-06, "loss": 15.5402, "step": 850 }, { "epoch": 0.04879167502795058, "grad_norm": 0.0, "learning_rate": 7.885599591677283e-06, "loss": 15.9481, "step": 851 }, { "epoch": 0.04884900954619729, "grad_norm": 0.0, "learning_rate": 7.880542986682212e-06, "loss": 15.3632, "step": 852 }, { "epoch": 0.048906344064443996, "grad_norm": 0.0, "learning_rate": 7.875481968109052e-06, "loss": 15.7589, "step": 853 }, { "epoch": 0.04896367858269071, "grad_norm": 0.0, "learning_rate": 7.870416543712315e-06, "loss": 15.648, "step": 854 }, { "epoch": 0.04902101310093742, "grad_norm": 0.0, "learning_rate": 7.865346721253256e-06, "loss": 15.5267, "step": 855 }, { "epoch": 0.04907834761918413, "grad_norm": 0.0, "learning_rate": 7.860272508499877e-06, "loss": 15.6383, "step": 856 }, { "epoch": 0.04913568213743084, "grad_norm": 0.0, "learning_rate": 7.855193913226907e-06, "loss": 15.2878, "step": 857 }, { "epoch": 0.04919301665567755, "grad_norm": 0.0, "learning_rate": 7.850110943215785e-06, "loss": 15.734, "step": 858 }, { "epoch": 0.04925035117392426, "grad_norm": 0.0, "learning_rate": 7.845023606254658e-06, "loss": 15.7939, "step": 859 }, { "epoch": 0.04930768569217097, "grad_norm": 0.0, "learning_rate": 7.83993191013836e-06, "loss": 15.3507, "step": 860 }, { "epoch": 0.04936502021041768, "grad_norm": 0.0, "learning_rate": 7.834835862668405e-06, "loss": 15.2812, "step": 861 }, { "epoch": 0.049422354728664394, "grad_norm": 0.0, "learning_rate": 7.829735471652978e-06, "loss": 15.1451, "step": 862 }, { "epoch": 0.0494796892469111, "grad_norm": 0.0, "learning_rate": 7.82463074490691e-06, "loss": 15.6257, "step": 863 }, { "epoch": 0.04953702376515781, "grad_norm": 0.0, "learning_rate": 7.819521690251688e-06, "loss": 15.3859, "step": 864 }, { "epoch": 0.04959435828340453, "grad_norm": 0.0, "learning_rate": 7.814408315515419e-06, "loss": 15.6041, "step": 865 }, { "epoch": 0.049651692801651236, "grad_norm": 0.0, "learning_rate": 7.809290628532836e-06, "loss": 15.5338, "step": 866 }, { "epoch": 0.049709027319897944, "grad_norm": 0.0, "learning_rate": 7.804168637145276e-06, "loss": 15.3608, "step": 867 }, { "epoch": 0.04976636183814465, "grad_norm": 0.0, "learning_rate": 7.799042349200672e-06, "loss": 15.3891, "step": 868 }, { "epoch": 0.04982369635639137, "grad_norm": 0.0, "learning_rate": 7.793911772553542e-06, "loss": 15.2893, "step": 869 }, { "epoch": 0.04988103087463808, "grad_norm": 0.0, "learning_rate": 7.788776915064972e-06, "loss": 15.6297, "step": 870 }, { "epoch": 0.049938365392884786, "grad_norm": 0.0, "learning_rate": 7.783637784602608e-06, "loss": 15.6214, "step": 871 }, { "epoch": 0.049995699911131494, "grad_norm": 0.0, "learning_rate": 7.778494389040646e-06, "loss": 15.3664, "step": 872 }, { "epoch": 0.05005303442937821, "grad_norm": 0.0, "learning_rate": 7.773346736259815e-06, "loss": 15.2891, "step": 873 }, { "epoch": 0.05011036894762492, "grad_norm": 0.0, "learning_rate": 7.768194834147362e-06, "loss": 15.8051, "step": 874 }, { "epoch": 0.05016770346587163, "grad_norm": 0.0, "learning_rate": 7.763038690597055e-06, "loss": 15.5375, "step": 875 }, { "epoch": 0.050225037984118336, "grad_norm": 0.0, "learning_rate": 7.757878313509153e-06, "loss": 15.6352, "step": 876 }, { "epoch": 0.05028237250236505, "grad_norm": 0.0, "learning_rate": 7.752713710790405e-06, "loss": 15.6374, "step": 877 }, { "epoch": 0.05033970702061176, "grad_norm": 0.0, "learning_rate": 7.747544890354031e-06, "loss": 15.4307, "step": 878 }, { "epoch": 0.05039704153885847, "grad_norm": 0.0, "learning_rate": 7.742371860119718e-06, "loss": 15.0681, "step": 879 }, { "epoch": 0.05045437605710518, "grad_norm": 0.0, "learning_rate": 7.7371946280136e-06, "loss": 15.284, "step": 880 }, { "epoch": 0.05051171057535189, "grad_norm": 0.0, "learning_rate": 7.73201320196825e-06, "loss": 15.3285, "step": 881 }, { "epoch": 0.0505690450935986, "grad_norm": 0.0, "learning_rate": 7.72682758992267e-06, "loss": 15.2923, "step": 882 }, { "epoch": 0.05062637961184531, "grad_norm": 0.0, "learning_rate": 7.721637799822269e-06, "loss": 15.4685, "step": 883 }, { "epoch": 0.050683714130092025, "grad_norm": 0.0, "learning_rate": 7.716443839618863e-06, "loss": 15.4828, "step": 884 }, { "epoch": 0.050741048648338734, "grad_norm": 0.0, "learning_rate": 7.711245717270659e-06, "loss": 15.5624, "step": 885 }, { "epoch": 0.05079838316658544, "grad_norm": 0.0, "learning_rate": 7.706043440742235e-06, "loss": 15.46, "step": 886 }, { "epoch": 0.05085571768483215, "grad_norm": 0.0, "learning_rate": 7.70083701800454e-06, "loss": 15.5009, "step": 887 }, { "epoch": 0.05091305220307887, "grad_norm": 0.0, "learning_rate": 7.695626457034867e-06, "loss": 15.2884, "step": 888 }, { "epoch": 0.050970386721325575, "grad_norm": 0.0, "learning_rate": 7.690411765816864e-06, "loss": 16.1451, "step": 889 }, { "epoch": 0.051027721239572284, "grad_norm": 0.0, "learning_rate": 7.685192952340495e-06, "loss": 15.5103, "step": 890 }, { "epoch": 0.05108505575781899, "grad_norm": 0.0, "learning_rate": 7.679970024602044e-06, "loss": 15.2557, "step": 891 }, { "epoch": 0.05114239027606571, "grad_norm": 0.0, "learning_rate": 7.674742990604101e-06, "loss": 15.7524, "step": 892 }, { "epoch": 0.051199724794312416, "grad_norm": 0.0, "learning_rate": 7.669511858355545e-06, "loss": 15.4103, "step": 893 }, { "epoch": 0.051257059312559125, "grad_norm": 0.0, "learning_rate": 7.664276635871535e-06, "loss": 15.8326, "step": 894 }, { "epoch": 0.051314393830805834, "grad_norm": 0.0, "learning_rate": 7.659037331173498e-06, "loss": 15.4746, "step": 895 }, { "epoch": 0.05137172834905255, "grad_norm": 0.0, "learning_rate": 7.653793952289114e-06, "loss": 15.2673, "step": 896 }, { "epoch": 0.05142906286729926, "grad_norm": 0.0, "learning_rate": 7.648546507252308e-06, "loss": 15.5551, "step": 897 }, { "epoch": 0.051486397385545966, "grad_norm": 0.0, "learning_rate": 7.643295004103232e-06, "loss": 15.3011, "step": 898 }, { "epoch": 0.051543731903792675, "grad_norm": 0.0, "learning_rate": 7.638039450888259e-06, "loss": 15.2572, "step": 899 }, { "epoch": 0.05160106642203939, "grad_norm": 0.0, "learning_rate": 7.632779855659966e-06, "loss": 15.536, "step": 900 }, { "epoch": 0.0516584009402861, "grad_norm": 0.0, "learning_rate": 7.627516226477123e-06, "loss": 15.4528, "step": 901 }, { "epoch": 0.05171573545853281, "grad_norm": 0.0, "learning_rate": 7.62224857140468e-06, "loss": 15.1914, "step": 902 }, { "epoch": 0.05177306997677952, "grad_norm": 0.0, "learning_rate": 7.616976898513759e-06, "loss": 15.3216, "step": 903 }, { "epoch": 0.05183040449502623, "grad_norm": 0.0, "learning_rate": 7.611701215881635e-06, "loss": 15.4943, "step": 904 }, { "epoch": 0.05188773901327294, "grad_norm": 0.0, "learning_rate": 7.606421531591725e-06, "loss": 15.8347, "step": 905 }, { "epoch": 0.05194507353151965, "grad_norm": 0.0, "learning_rate": 7.601137853733583e-06, "loss": 15.4142, "step": 906 }, { "epoch": 0.052002408049766365, "grad_norm": 0.0, "learning_rate": 7.595850190402877e-06, "loss": 15.3806, "step": 907 }, { "epoch": 0.05205974256801307, "grad_norm": 0.0, "learning_rate": 7.590558549701383e-06, "loss": 15.4033, "step": 908 }, { "epoch": 0.05211707708625978, "grad_norm": 0.0, "learning_rate": 7.585262939736975e-06, "loss": 15.6882, "step": 909 }, { "epoch": 0.05217441160450649, "grad_norm": 0.0, "learning_rate": 7.579963368623602e-06, "loss": 15.5859, "step": 910 }, { "epoch": 0.052231746122753206, "grad_norm": 0.0, "learning_rate": 7.574659844481285e-06, "loss": 15.5471, "step": 911 }, { "epoch": 0.052289080640999915, "grad_norm": 0.0, "learning_rate": 7.569352375436102e-06, "loss": 15.3833, "step": 912 }, { "epoch": 0.05234641515924662, "grad_norm": 0.0, "learning_rate": 7.564040969620179e-06, "loss": 15.4828, "step": 913 }, { "epoch": 0.05240374967749333, "grad_norm": 0.0, "learning_rate": 7.558725635171669e-06, "loss": 15.7124, "step": 914 }, { "epoch": 0.05246108419574005, "grad_norm": 0.0, "learning_rate": 7.553406380234744e-06, "loss": 15.6596, "step": 915 }, { "epoch": 0.052518418713986756, "grad_norm": 0.0, "learning_rate": 7.548083212959588e-06, "loss": 15.318, "step": 916 }, { "epoch": 0.052575753232233464, "grad_norm": 0.0, "learning_rate": 7.542756141502376e-06, "loss": 15.3831, "step": 917 }, { "epoch": 0.05263308775048018, "grad_norm": 0.0, "learning_rate": 7.537425174025265e-06, "loss": 15.5979, "step": 918 }, { "epoch": 0.05269042226872689, "grad_norm": 0.0, "learning_rate": 7.532090318696382e-06, "loss": 15.5016, "step": 919 }, { "epoch": 0.0527477567869736, "grad_norm": 0.0, "learning_rate": 7.526751583689812e-06, "loss": 15.1329, "step": 920 }, { "epoch": 0.052805091305220306, "grad_norm": 0.0, "learning_rate": 7.521408977185584e-06, "loss": 15.3954, "step": 921 }, { "epoch": 0.05286242582346702, "grad_norm": 0.0, "learning_rate": 7.516062507369655e-06, "loss": 15.5931, "step": 922 }, { "epoch": 0.05291976034171373, "grad_norm": 0.0, "learning_rate": 7.510712182433908e-06, "loss": 15.7145, "step": 923 }, { "epoch": 0.05297709485996044, "grad_norm": 0.0, "learning_rate": 7.505358010576132e-06, "loss": 15.1615, "step": 924 }, { "epoch": 0.05303442937820715, "grad_norm": 0.0, "learning_rate": 7.500000000000001e-06, "loss": 15.3657, "step": 925 }, { "epoch": 0.05309176389645386, "grad_norm": 0.0, "learning_rate": 7.494638158915083e-06, "loss": 15.1117, "step": 926 }, { "epoch": 0.05314909841470057, "grad_norm": 0.0, "learning_rate": 7.489272495536809e-06, "loss": 15.3789, "step": 927 }, { "epoch": 0.05320643293294728, "grad_norm": 0.0, "learning_rate": 7.483903018086466e-06, "loss": 15.4555, "step": 928 }, { "epoch": 0.05326376745119399, "grad_norm": 0.0, "learning_rate": 7.4785297347911865e-06, "loss": 15.0974, "step": 929 }, { "epoch": 0.053321101969440704, "grad_norm": 0.0, "learning_rate": 7.473152653883934e-06, "loss": 15.3593, "step": 930 }, { "epoch": 0.05337843648768741, "grad_norm": 0.0, "learning_rate": 7.467771783603492e-06, "loss": 15.8678, "step": 931 }, { "epoch": 0.05343577100593412, "grad_norm": 0.0, "learning_rate": 7.4623871321944485e-06, "loss": 15.7244, "step": 932 }, { "epoch": 0.05349310552418083, "grad_norm": 0.0, "learning_rate": 7.456998707907184e-06, "loss": 15.1704, "step": 933 }, { "epoch": 0.053550440042427545, "grad_norm": 0.0, "learning_rate": 7.4516065189978625e-06, "loss": 15.4617, "step": 934 }, { "epoch": 0.053607774560674254, "grad_norm": 0.0, "learning_rate": 7.446210573728414e-06, "loss": 15.3451, "step": 935 }, { "epoch": 0.05366510907892096, "grad_norm": 0.0, "learning_rate": 7.440810880366524e-06, "loss": 15.4365, "step": 936 }, { "epoch": 0.05372244359716768, "grad_norm": 0.0, "learning_rate": 7.435407447185623e-06, "loss": 15.4767, "step": 937 }, { "epoch": 0.05377977811541439, "grad_norm": 0.0, "learning_rate": 7.430000282464872e-06, "loss": 15.3876, "step": 938 }, { "epoch": 0.053837112633661095, "grad_norm": 0.0, "learning_rate": 7.424589394489145e-06, "loss": 15.5479, "step": 939 }, { "epoch": 0.053894447151907804, "grad_norm": 0.0, "learning_rate": 7.419174791549023e-06, "loss": 15.3345, "step": 940 }, { "epoch": 0.05395178167015452, "grad_norm": 0.0, "learning_rate": 7.413756481940783e-06, "loss": 15.145, "step": 941 }, { "epoch": 0.05400911618840123, "grad_norm": 0.0, "learning_rate": 7.408334473966375e-06, "loss": 15.428, "step": 942 }, { "epoch": 0.05406645070664794, "grad_norm": 0.0, "learning_rate": 7.402908775933419e-06, "loss": 15.3543, "step": 943 }, { "epoch": 0.054123785224894645, "grad_norm": 0.0, "learning_rate": 7.39747939615519e-06, "loss": 15.5398, "step": 944 }, { "epoch": 0.05418111974314136, "grad_norm": 0.0, "learning_rate": 7.392046342950604e-06, "loss": 15.2644, "step": 945 }, { "epoch": 0.05423845426138807, "grad_norm": 0.0, "learning_rate": 7.386609624644201e-06, "loss": 15.641, "step": 946 }, { "epoch": 0.05429578877963478, "grad_norm": 0.0, "learning_rate": 7.38116924956614e-06, "loss": 15.3402, "step": 947 }, { "epoch": 0.05435312329788149, "grad_norm": 0.0, "learning_rate": 7.375725226052186e-06, "loss": 15.4802, "step": 948 }, { "epoch": 0.0544104578161282, "grad_norm": 0.0, "learning_rate": 7.370277562443689e-06, "loss": 15.6399, "step": 949 }, { "epoch": 0.05446779233437491, "grad_norm": 0.0, "learning_rate": 7.364826267087577e-06, "loss": 15.3748, "step": 950 }, { "epoch": 0.05452512685262162, "grad_norm": 0.0, "learning_rate": 7.359371348336346e-06, "loss": 15.559, "step": 951 }, { "epoch": 0.05458246137086833, "grad_norm": 0.0, "learning_rate": 7.353912814548042e-06, "loss": 15.786, "step": 952 }, { "epoch": 0.05463979588911504, "grad_norm": 0.0, "learning_rate": 7.348450674086247e-06, "loss": 15.3269, "step": 953 }, { "epoch": 0.05469713040736175, "grad_norm": 0.0, "learning_rate": 7.342984935320074e-06, "loss": 15.3853, "step": 954 }, { "epoch": 0.05475446492560846, "grad_norm": 0.0, "learning_rate": 7.337515606624148e-06, "loss": 15.5078, "step": 955 }, { "epoch": 0.054811799443855176, "grad_norm": 0.0, "learning_rate": 7.332042696378591e-06, "loss": 15.6212, "step": 956 }, { "epoch": 0.054869133962101885, "grad_norm": 0.0, "learning_rate": 7.326566212969016e-06, "loss": 15.6121, "step": 957 }, { "epoch": 0.05492646848034859, "grad_norm": 0.0, "learning_rate": 7.321086164786513e-06, "loss": 15.406, "step": 958 }, { "epoch": 0.0549838029985953, "grad_norm": 0.0, "learning_rate": 7.315602560227627e-06, "loss": 15.0786, "step": 959 }, { "epoch": 0.05504113751684202, "grad_norm": 0.0, "learning_rate": 7.310115407694358e-06, "loss": 15.2173, "step": 960 }, { "epoch": 0.055098472035088726, "grad_norm": 0.0, "learning_rate": 7.30462471559414e-06, "loss": 15.5237, "step": 961 }, { "epoch": 0.055155806553335435, "grad_norm": 0.0, "learning_rate": 7.299130492339833e-06, "loss": 15.7292, "step": 962 }, { "epoch": 0.05521314107158214, "grad_norm": 0.0, "learning_rate": 7.293632746349702e-06, "loss": 15.5719, "step": 963 }, { "epoch": 0.05527047558982886, "grad_norm": 0.0, "learning_rate": 7.288131486047414e-06, "loss": 15.5459, "step": 964 }, { "epoch": 0.05532781010807557, "grad_norm": 0.0, "learning_rate": 7.282626719862021e-06, "loss": 15.7095, "step": 965 }, { "epoch": 0.055385144626322276, "grad_norm": 0.0, "learning_rate": 7.277118456227941e-06, "loss": 15.6278, "step": 966 }, { "epoch": 0.055442479144568985, "grad_norm": 0.0, "learning_rate": 7.2716067035849595e-06, "loss": 15.4754, "step": 967 }, { "epoch": 0.0554998136628157, "grad_norm": 0.0, "learning_rate": 7.266091470378199e-06, "loss": 15.3922, "step": 968 }, { "epoch": 0.05555714818106241, "grad_norm": 0.0, "learning_rate": 7.260572765058124e-06, "loss": 15.4944, "step": 969 }, { "epoch": 0.05561448269930912, "grad_norm": 0.0, "learning_rate": 7.25505059608051e-06, "loss": 15.5733, "step": 970 }, { "epoch": 0.05567181721755583, "grad_norm": 0.0, "learning_rate": 7.249524971906445e-06, "loss": 15.4449, "step": 971 }, { "epoch": 0.05572915173580254, "grad_norm": 0.0, "learning_rate": 7.243995901002312e-06, "loss": 15.4991, "step": 972 }, { "epoch": 0.05578648625404925, "grad_norm": 0.0, "learning_rate": 7.23846339183977e-06, "loss": 15.4896, "step": 973 }, { "epoch": 0.05584382077229596, "grad_norm": 0.0, "learning_rate": 7.232927452895749e-06, "loss": 15.4687, "step": 974 }, { "epoch": 0.055901155290542674, "grad_norm": 0.0, "learning_rate": 7.227388092652436e-06, "loss": 15.5742, "step": 975 }, { "epoch": 0.05595848980878938, "grad_norm": 0.0, "learning_rate": 7.221845319597258e-06, "loss": 15.5478, "step": 976 }, { "epoch": 0.05601582432703609, "grad_norm": 0.0, "learning_rate": 7.216299142222869e-06, "loss": 15.545, "step": 977 }, { "epoch": 0.0560731588452828, "grad_norm": 0.0, "learning_rate": 7.210749569027145e-06, "loss": 15.5581, "step": 978 }, { "epoch": 0.056130493363529516, "grad_norm": 0.0, "learning_rate": 7.2051966085131584e-06, "loss": 15.317, "step": 979 }, { "epoch": 0.056187827881776224, "grad_norm": 0.0, "learning_rate": 7.199640269189176e-06, "loss": 15.5235, "step": 980 }, { "epoch": 0.05624516240002293, "grad_norm": 0.0, "learning_rate": 7.194080559568642e-06, "loss": 15.6216, "step": 981 }, { "epoch": 0.05630249691826964, "grad_norm": 0.0, "learning_rate": 7.18851748817016e-06, "loss": 15.7069, "step": 982 }, { "epoch": 0.05635983143651636, "grad_norm": 0.0, "learning_rate": 7.18295106351749e-06, "loss": 15.3323, "step": 983 }, { "epoch": 0.056417165954763066, "grad_norm": 0.0, "learning_rate": 7.177381294139527e-06, "loss": 15.568, "step": 984 }, { "epoch": 0.056474500473009774, "grad_norm": 0.0, "learning_rate": 7.1718081885702905e-06, "loss": 15.2707, "step": 985 }, { "epoch": 0.05653183499125648, "grad_norm": 0.0, "learning_rate": 7.1662317553489126e-06, "loss": 15.5602, "step": 986 }, { "epoch": 0.0565891695095032, "grad_norm": 0.0, "learning_rate": 7.160652003019624e-06, "loss": 15.4934, "step": 987 }, { "epoch": 0.05664650402774991, "grad_norm": 0.0, "learning_rate": 7.155068940131741e-06, "loss": 15.3652, "step": 988 }, { "epoch": 0.056703838545996615, "grad_norm": 0.0, "learning_rate": 7.149482575239653e-06, "loss": 15.1606, "step": 989 }, { "epoch": 0.05676117306424333, "grad_norm": 0.0, "learning_rate": 7.143892916902805e-06, "loss": 15.6928, "step": 990 }, { "epoch": 0.05681850758249004, "grad_norm": 0.0, "learning_rate": 7.138299973685694e-06, "loss": 15.3702, "step": 991 }, { "epoch": 0.05687584210073675, "grad_norm": 0.0, "learning_rate": 7.132703754157846e-06, "loss": 15.5082, "step": 992 }, { "epoch": 0.05693317661898346, "grad_norm": 0.0, "learning_rate": 7.1271042668938094e-06, "loss": 15.4877, "step": 993 }, { "epoch": 0.05699051113723017, "grad_norm": 0.0, "learning_rate": 7.121501520473137e-06, "loss": 15.6682, "step": 994 }, { "epoch": 0.05704784565547688, "grad_norm": 0.0, "learning_rate": 7.115895523480376e-06, "loss": 15.3158, "step": 995 }, { "epoch": 0.05710518017372359, "grad_norm": 0.0, "learning_rate": 7.110286284505058e-06, "loss": 15.2529, "step": 996 }, { "epoch": 0.0571625146919703, "grad_norm": 0.0, "learning_rate": 7.104673812141676e-06, "loss": 15.392, "step": 997 }, { "epoch": 0.057219849210217014, "grad_norm": 0.0, "learning_rate": 7.099058114989679e-06, "loss": 15.4152, "step": 998 }, { "epoch": 0.05727718372846372, "grad_norm": 0.0, "learning_rate": 7.09343920165346e-06, "loss": 15.5532, "step": 999 }, { "epoch": 0.05733451824671043, "grad_norm": 0.0, "learning_rate": 7.087817080742337e-06, "loss": 15.4105, "step": 1000 }, { "epoch": 0.05739185276495714, "grad_norm": 0.0, "learning_rate": 7.082191760870543e-06, "loss": 15.8258, "step": 1001 }, { "epoch": 0.057449187283203855, "grad_norm": 0.0, "learning_rate": 7.076563250657213e-06, "loss": 15.3265, "step": 1002 }, { "epoch": 0.057506521801450564, "grad_norm": 0.0, "learning_rate": 7.070931558726373e-06, "loss": 15.5073, "step": 1003 }, { "epoch": 0.05756385631969727, "grad_norm": 0.0, "learning_rate": 7.065296693706916e-06, "loss": 15.2938, "step": 1004 }, { "epoch": 0.05762119083794398, "grad_norm": 0.0, "learning_rate": 7.059658664232605e-06, "loss": 15.242, "step": 1005 }, { "epoch": 0.057678525356190696, "grad_norm": 0.0, "learning_rate": 7.054017478942048e-06, "loss": 15.3581, "step": 1006 }, { "epoch": 0.057735859874437405, "grad_norm": 0.0, "learning_rate": 7.048373146478691e-06, "loss": 15.524, "step": 1007 }, { "epoch": 0.057793194392684114, "grad_norm": 0.0, "learning_rate": 7.042725675490797e-06, "loss": 15.3818, "step": 1008 }, { "epoch": 0.05785052891093083, "grad_norm": 0.0, "learning_rate": 7.037075074631441e-06, "loss": 15.0783, "step": 1009 }, { "epoch": 0.05790786342917754, "grad_norm": 0.0, "learning_rate": 7.031421352558495e-06, "loss": 15.3572, "step": 1010 }, { "epoch": 0.057965197947424246, "grad_norm": 0.0, "learning_rate": 7.025764517934612e-06, "loss": 15.0614, "step": 1011 }, { "epoch": 0.058022532465670955, "grad_norm": 0.0, "learning_rate": 7.0201045794272135e-06, "loss": 15.2281, "step": 1012 }, { "epoch": 0.05807986698391767, "grad_norm": 0.0, "learning_rate": 7.0144415457084765e-06, "loss": 15.6632, "step": 1013 }, { "epoch": 0.05813720150216438, "grad_norm": 0.0, "learning_rate": 7.008775425455323e-06, "loss": 15.3456, "step": 1014 }, { "epoch": 0.05819453602041109, "grad_norm": 0.0, "learning_rate": 7.003106227349399e-06, "loss": 15.2589, "step": 1015 }, { "epoch": 0.058251870538657796, "grad_norm": 0.0, "learning_rate": 6.997433960077072e-06, "loss": 15.1371, "step": 1016 }, { "epoch": 0.05830920505690451, "grad_norm": 0.0, "learning_rate": 6.991758632329411e-06, "loss": 15.6141, "step": 1017 }, { "epoch": 0.05836653957515122, "grad_norm": 0.0, "learning_rate": 6.9860802528021705e-06, "loss": 15.4954, "step": 1018 }, { "epoch": 0.05842387409339793, "grad_norm": 0.0, "learning_rate": 6.980398830195785e-06, "loss": 15.5896, "step": 1019 }, { "epoch": 0.05848120861164464, "grad_norm": 0.0, "learning_rate": 6.97471437321535e-06, "loss": 15.4669, "step": 1020 }, { "epoch": 0.05853854312989135, "grad_norm": 0.0, "learning_rate": 6.969026890570612e-06, "loss": 15.2941, "step": 1021 }, { "epoch": 0.05859587764813806, "grad_norm": 0.0, "learning_rate": 6.963336390975949e-06, "loss": 15.3614, "step": 1022 }, { "epoch": 0.05865321216638477, "grad_norm": 0.0, "learning_rate": 6.957642883150365e-06, "loss": 15.4045, "step": 1023 }, { "epoch": 0.058710546684631486, "grad_norm": 0.0, "learning_rate": 6.9519463758174745e-06, "loss": 15.6422, "step": 1024 }, { "epoch": 0.058767881202878194, "grad_norm": 0.0, "learning_rate": 6.9462468777054855e-06, "loss": 15.4819, "step": 1025 }, { "epoch": 0.0588252157211249, "grad_norm": 0.0, "learning_rate": 6.940544397547189e-06, "loss": 15.569, "step": 1026 }, { "epoch": 0.05888255023937161, "grad_norm": 0.0, "learning_rate": 6.934838944079944e-06, "loss": 15.6353, "step": 1027 }, { "epoch": 0.05893988475761833, "grad_norm": 0.0, "learning_rate": 6.929130526045667e-06, "loss": 15.1708, "step": 1028 }, { "epoch": 0.058997219275865036, "grad_norm": 0.0, "learning_rate": 6.9234191521908176e-06, "loss": 15.071, "step": 1029 }, { "epoch": 0.059054553794111744, "grad_norm": 0.0, "learning_rate": 6.917704831266381e-06, "loss": 15.3808, "step": 1030 }, { "epoch": 0.05911188831235845, "grad_norm": 0.0, "learning_rate": 6.911987572027861e-06, "loss": 15.6273, "step": 1031 }, { "epoch": 0.05916922283060517, "grad_norm": 0.0, "learning_rate": 6.906267383235261e-06, "loss": 15.4842, "step": 1032 }, { "epoch": 0.05922655734885188, "grad_norm": 0.0, "learning_rate": 6.9005442736530745e-06, "loss": 15.3772, "step": 1033 }, { "epoch": 0.059283891867098586, "grad_norm": 0.0, "learning_rate": 6.894818252050272e-06, "loss": 15.1444, "step": 1034 }, { "epoch": 0.059341226385345294, "grad_norm": 0.0, "learning_rate": 6.889089327200282e-06, "loss": 15.694, "step": 1035 }, { "epoch": 0.05939856090359201, "grad_norm": 0.0, "learning_rate": 6.883357507880985e-06, "loss": 15.2363, "step": 1036 }, { "epoch": 0.05945589542183872, "grad_norm": 0.0, "learning_rate": 6.877622802874693e-06, "loss": 15.2058, "step": 1037 }, { "epoch": 0.05951322994008543, "grad_norm": 0.0, "learning_rate": 6.871885220968142e-06, "loss": 15.431, "step": 1038 }, { "epoch": 0.059570564458332136, "grad_norm": 0.0, "learning_rate": 6.866144770952474e-06, "loss": 15.2124, "step": 1039 }, { "epoch": 0.05962789897657885, "grad_norm": 0.0, "learning_rate": 6.86040146162323e-06, "loss": 15.531, "step": 1040 }, { "epoch": 0.05968523349482556, "grad_norm": 0.0, "learning_rate": 6.854655301780324e-06, "loss": 15.1198, "step": 1041 }, { "epoch": 0.05974256801307227, "grad_norm": 0.0, "learning_rate": 6.848906300228047e-06, "loss": 15.3151, "step": 1042 }, { "epoch": 0.059799902531318984, "grad_norm": 0.0, "learning_rate": 6.843154465775036e-06, "loss": 15.3529, "step": 1043 }, { "epoch": 0.05985723704956569, "grad_norm": 0.0, "learning_rate": 6.837399807234273e-06, "loss": 15.306, "step": 1044 }, { "epoch": 0.0599145715678124, "grad_norm": 0.0, "learning_rate": 6.831642333423068e-06, "loss": 15.5283, "step": 1045 }, { "epoch": 0.05997190608605911, "grad_norm": 0.0, "learning_rate": 6.825882053163039e-06, "loss": 15.5263, "step": 1046 }, { "epoch": 0.060029240604305825, "grad_norm": 0.0, "learning_rate": 6.820118975280109e-06, "loss": 15.101, "step": 1047 }, { "epoch": 0.060086575122552534, "grad_norm": 0.0, "learning_rate": 6.814353108604488e-06, "loss": 15.4583, "step": 1048 }, { "epoch": 0.06014390964079924, "grad_norm": 0.0, "learning_rate": 6.8085844619706555e-06, "loss": 15.0133, "step": 1049 }, { "epoch": 0.06020124415904595, "grad_norm": 0.0, "learning_rate": 6.802813044217353e-06, "loss": 15.3445, "step": 1050 }, { "epoch": 0.06025857867729267, "grad_norm": 0.0, "learning_rate": 6.797038864187564e-06, "loss": 15.4779, "step": 1051 }, { "epoch": 0.060315913195539375, "grad_norm": 0.0, "learning_rate": 6.791261930728513e-06, "loss": 15.5129, "step": 1052 }, { "epoch": 0.060373247713786084, "grad_norm": 0.0, "learning_rate": 6.785482252691634e-06, "loss": 15.3129, "step": 1053 }, { "epoch": 0.06043058223203279, "grad_norm": 0.0, "learning_rate": 6.77969983893257e-06, "loss": 15.2355, "step": 1054 }, { "epoch": 0.06048791675027951, "grad_norm": 0.0, "learning_rate": 6.773914698311157e-06, "loss": 15.3794, "step": 1055 }, { "epoch": 0.060545251268526216, "grad_norm": 0.0, "learning_rate": 6.768126839691408e-06, "loss": 15.4321, "step": 1056 }, { "epoch": 0.060602585786772925, "grad_norm": 0.0, "learning_rate": 6.762336271941499e-06, "loss": 15.3637, "step": 1057 }, { "epoch": 0.060659920305019634, "grad_norm": 0.0, "learning_rate": 6.756543003933758e-06, "loss": 15.3693, "step": 1058 }, { "epoch": 0.06071725482326635, "grad_norm": 0.0, "learning_rate": 6.750747044544654e-06, "loss": 15.0256, "step": 1059 }, { "epoch": 0.06077458934151306, "grad_norm": 0.0, "learning_rate": 6.7449484026547705e-06, "loss": 15.4155, "step": 1060 }, { "epoch": 0.060831923859759766, "grad_norm": 0.0, "learning_rate": 6.739147087148812e-06, "loss": 15.5012, "step": 1061 }, { "epoch": 0.06088925837800648, "grad_norm": 0.0, "learning_rate": 6.733343106915573e-06, "loss": 15.3416, "step": 1062 }, { "epoch": 0.06094659289625319, "grad_norm": 0.0, "learning_rate": 6.7275364708479316e-06, "loss": 15.4652, "step": 1063 }, { "epoch": 0.0610039274144999, "grad_norm": 0.0, "learning_rate": 6.721727187842837e-06, "loss": 15.2292, "step": 1064 }, { "epoch": 0.06106126193274661, "grad_norm": 0.0, "learning_rate": 6.715915266801292e-06, "loss": 15.1645, "step": 1065 }, { "epoch": 0.06111859645099332, "grad_norm": 0.0, "learning_rate": 6.710100716628345e-06, "loss": 15.1633, "step": 1066 }, { "epoch": 0.06117593096924003, "grad_norm": 0.0, "learning_rate": 6.704283546233066e-06, "loss": 15.2905, "step": 1067 }, { "epoch": 0.06123326548748674, "grad_norm": 0.0, "learning_rate": 6.6984637645285475e-06, "loss": 14.9021, "step": 1068 }, { "epoch": 0.06129060000573345, "grad_norm": 0.0, "learning_rate": 6.692641380431879e-06, "loss": 15.3016, "step": 1069 }, { "epoch": 0.061347934523980165, "grad_norm": 0.0, "learning_rate": 6.6868164028641355e-06, "loss": 15.2637, "step": 1070 }, { "epoch": 0.06140526904222687, "grad_norm": 0.0, "learning_rate": 6.68098884075037e-06, "loss": 15.2153, "step": 1071 }, { "epoch": 0.06146260356047358, "grad_norm": 0.0, "learning_rate": 6.675158703019594e-06, "loss": 15.416, "step": 1072 }, { "epoch": 0.06151993807872029, "grad_norm": 0.0, "learning_rate": 6.669325998604766e-06, "loss": 15.0624, "step": 1073 }, { "epoch": 0.061577272596967006, "grad_norm": 0.0, "learning_rate": 6.663490736442771e-06, "loss": 14.9949, "step": 1074 }, { "epoch": 0.061634607115213715, "grad_norm": 0.0, "learning_rate": 6.657652925474424e-06, "loss": 15.2689, "step": 1075 }, { "epoch": 0.06169194163346042, "grad_norm": 0.0, "learning_rate": 6.6518125746444376e-06, "loss": 14.9976, "step": 1076 }, { "epoch": 0.06174927615170714, "grad_norm": 0.0, "learning_rate": 6.645969692901416e-06, "loss": 15.2178, "step": 1077 }, { "epoch": 0.06180661066995385, "grad_norm": 0.0, "learning_rate": 6.640124289197845e-06, "loss": 15.2663, "step": 1078 }, { "epoch": 0.061863945188200556, "grad_norm": 0.0, "learning_rate": 6.634276372490074e-06, "loss": 15.3322, "step": 1079 }, { "epoch": 0.061921279706447264, "grad_norm": 0.0, "learning_rate": 6.6284259517383e-06, "loss": 15.8089, "step": 1080 }, { "epoch": 0.06197861422469398, "grad_norm": 0.0, "learning_rate": 6.622573035906557e-06, "loss": 15.5136, "step": 1081 }, { "epoch": 0.06203594874294069, "grad_norm": 0.0, "learning_rate": 6.616717633962703e-06, "loss": 15.4216, "step": 1082 }, { "epoch": 0.0620932832611874, "grad_norm": 0.0, "learning_rate": 6.6108597548784104e-06, "loss": 15.3619, "step": 1083 }, { "epoch": 0.062150617779434106, "grad_norm": 0.0, "learning_rate": 6.604999407629137e-06, "loss": 15.339, "step": 1084 }, { "epoch": 0.06220795229768082, "grad_norm": 0.0, "learning_rate": 6.599136601194128e-06, "loss": 14.9992, "step": 1085 }, { "epoch": 0.06226528681592753, "grad_norm": 0.0, "learning_rate": 6.593271344556399e-06, "loss": 15.297, "step": 1086 }, { "epoch": 0.06232262133417424, "grad_norm": 0.0, "learning_rate": 6.5874036467027135e-06, "loss": 15.0719, "step": 1087 }, { "epoch": 0.06237995585242095, "grad_norm": 0.0, "learning_rate": 6.58153351662358e-06, "loss": 15.1815, "step": 1088 }, { "epoch": 0.06243729037066766, "grad_norm": 0.0, "learning_rate": 6.575660963313233e-06, "loss": 15.363, "step": 1089 }, { "epoch": 0.06249462488891437, "grad_norm": 0.0, "learning_rate": 6.5697859957696195e-06, "loss": 15.193, "step": 1090 }, { "epoch": 0.06255195940716109, "grad_norm": 0.0, "learning_rate": 6.563908622994385e-06, "loss": 15.1983, "step": 1091 }, { "epoch": 0.06260929392540779, "grad_norm": 0.0, "learning_rate": 6.558028853992859e-06, "loss": 15.357, "step": 1092 }, { "epoch": 0.0626666284436545, "grad_norm": 0.0, "learning_rate": 6.552146697774049e-06, "loss": 15.4091, "step": 1093 }, { "epoch": 0.06272396296190122, "grad_norm": 0.0, "learning_rate": 6.546262163350609e-06, "loss": 15.4367, "step": 1094 }, { "epoch": 0.06278129748014792, "grad_norm": 0.0, "learning_rate": 6.540375259738849e-06, "loss": 15.3776, "step": 1095 }, { "epoch": 0.06283863199839464, "grad_norm": 0.0, "learning_rate": 6.534485995958699e-06, "loss": 15.3741, "step": 1096 }, { "epoch": 0.06289596651664134, "grad_norm": 0.0, "learning_rate": 6.528594381033714e-06, "loss": 15.4107, "step": 1097 }, { "epoch": 0.06295330103488805, "grad_norm": 0.0, "learning_rate": 6.522700423991043e-06, "loss": 15.1333, "step": 1098 }, { "epoch": 0.06301063555313477, "grad_norm": 0.0, "learning_rate": 6.51680413386143e-06, "loss": 15.1594, "step": 1099 }, { "epoch": 0.06306797007138147, "grad_norm": 0.0, "learning_rate": 6.510905519679192e-06, "loss": 15.0248, "step": 1100 }, { "epoch": 0.06312530458962819, "grad_norm": 0.0, "learning_rate": 6.5050045904822035e-06, "loss": 15.3396, "step": 1101 }, { "epoch": 0.0631826391078749, "grad_norm": 0.0, "learning_rate": 6.499101355311891e-06, "loss": 15.5805, "step": 1102 }, { "epoch": 0.0632399736261216, "grad_norm": 0.0, "learning_rate": 6.493195823213212e-06, "loss": 15.3565, "step": 1103 }, { "epoch": 0.06329730814436832, "grad_norm": 0.0, "learning_rate": 6.487288003234646e-06, "loss": 15.2634, "step": 1104 }, { "epoch": 0.06335464266261502, "grad_norm": 0.0, "learning_rate": 6.481377904428171e-06, "loss": 15.388, "step": 1105 }, { "epoch": 0.06341197718086174, "grad_norm": 0.0, "learning_rate": 6.475465535849263e-06, "loss": 15.4613, "step": 1106 }, { "epoch": 0.06346931169910845, "grad_norm": 0.0, "learning_rate": 6.469550906556874e-06, "loss": 15.601, "step": 1107 }, { "epoch": 0.06352664621735515, "grad_norm": 0.0, "learning_rate": 6.4636340256134224e-06, "loss": 15.2509, "step": 1108 }, { "epoch": 0.06358398073560187, "grad_norm": 0.0, "learning_rate": 6.457714902084769e-06, "loss": 15.6251, "step": 1109 }, { "epoch": 0.06364131525384858, "grad_norm": 0.0, "learning_rate": 6.451793545040218e-06, "loss": 15.2786, "step": 1110 }, { "epoch": 0.06369864977209529, "grad_norm": 0.0, "learning_rate": 6.445869963552496e-06, "loss": 15.3809, "step": 1111 }, { "epoch": 0.063755984290342, "grad_norm": 0.0, "learning_rate": 6.439944166697731e-06, "loss": 15.7765, "step": 1112 }, { "epoch": 0.06381331880858872, "grad_norm": 0.0, "learning_rate": 6.434016163555452e-06, "loss": 15.0431, "step": 1113 }, { "epoch": 0.06387065332683542, "grad_norm": 0.0, "learning_rate": 6.428085963208567e-06, "loss": 15.2539, "step": 1114 }, { "epoch": 0.06392798784508213, "grad_norm": 0.0, "learning_rate": 6.422153574743348e-06, "loss": 15.2095, "step": 1115 }, { "epoch": 0.06398532236332884, "grad_norm": 0.0, "learning_rate": 6.416219007249424e-06, "loss": 15.5957, "step": 1116 }, { "epoch": 0.06404265688157555, "grad_norm": 0.0, "learning_rate": 6.410282269819756e-06, "loss": 15.3808, "step": 1117 }, { "epoch": 0.06409999139982227, "grad_norm": 0.0, "learning_rate": 6.404343371550639e-06, "loss": 15.4368, "step": 1118 }, { "epoch": 0.06415732591806897, "grad_norm": 0.0, "learning_rate": 6.39840232154167e-06, "loss": 15.2563, "step": 1119 }, { "epoch": 0.06421466043631568, "grad_norm": 0.0, "learning_rate": 6.392459128895747e-06, "loss": 14.9078, "step": 1120 }, { "epoch": 0.0642719949545624, "grad_norm": 0.0, "learning_rate": 6.3865138027190535e-06, "loss": 15.11, "step": 1121 }, { "epoch": 0.0643293294728091, "grad_norm": 0.0, "learning_rate": 6.380566352121037e-06, "loss": 15.2601, "step": 1122 }, { "epoch": 0.06438666399105582, "grad_norm": 0.0, "learning_rate": 6.374616786214402e-06, "loss": 15.2519, "step": 1123 }, { "epoch": 0.06444399850930252, "grad_norm": 0.0, "learning_rate": 6.368665114115096e-06, "loss": 15.1558, "step": 1124 }, { "epoch": 0.06450133302754923, "grad_norm": 0.0, "learning_rate": 6.362711344942289e-06, "loss": 15.2966, "step": 1125 }, { "epoch": 0.06455866754579595, "grad_norm": 0.0, "learning_rate": 6.356755487818371e-06, "loss": 15.4227, "step": 1126 }, { "epoch": 0.06461600206404265, "grad_norm": 0.0, "learning_rate": 6.350797551868923e-06, "loss": 15.6837, "step": 1127 }, { "epoch": 0.06467333658228937, "grad_norm": 0.0, "learning_rate": 6.344837546222718e-06, "loss": 15.2018, "step": 1128 }, { "epoch": 0.06473067110053608, "grad_norm": 0.0, "learning_rate": 6.338875480011698e-06, "loss": 15.0102, "step": 1129 }, { "epoch": 0.06478800561878278, "grad_norm": 0.0, "learning_rate": 6.33291136237096e-06, "loss": 15.2271, "step": 1130 }, { "epoch": 0.0648453401370295, "grad_norm": 0.0, "learning_rate": 6.326945202438748e-06, "loss": 15.2503, "step": 1131 }, { "epoch": 0.06490267465527622, "grad_norm": 0.0, "learning_rate": 6.3209770093564315e-06, "loss": 15.0981, "step": 1132 }, { "epoch": 0.06496000917352292, "grad_norm": 0.0, "learning_rate": 6.3150067922684965e-06, "loss": 15.335, "step": 1133 }, { "epoch": 0.06501734369176963, "grad_norm": 0.0, "learning_rate": 6.3090345603225324e-06, "loss": 15.3527, "step": 1134 }, { "epoch": 0.06507467821001633, "grad_norm": 0.0, "learning_rate": 6.303060322669214e-06, "loss": 15.294, "step": 1135 }, { "epoch": 0.06513201272826305, "grad_norm": 0.0, "learning_rate": 6.297084088462288e-06, "loss": 14.9693, "step": 1136 }, { "epoch": 0.06518934724650977, "grad_norm": 0.0, "learning_rate": 6.291105866858562e-06, "loss": 15.3394, "step": 1137 }, { "epoch": 0.06524668176475647, "grad_norm": 0.0, "learning_rate": 6.285125667017886e-06, "loss": 15.804, "step": 1138 }, { "epoch": 0.06530401628300318, "grad_norm": 0.0, "learning_rate": 6.279143498103149e-06, "loss": 15.4625, "step": 1139 }, { "epoch": 0.0653613508012499, "grad_norm": 0.0, "learning_rate": 6.273159369280244e-06, "loss": 15.1583, "step": 1140 }, { "epoch": 0.0654186853194966, "grad_norm": 0.0, "learning_rate": 6.267173289718079e-06, "loss": 15.213, "step": 1141 }, { "epoch": 0.06547601983774332, "grad_norm": 0.0, "learning_rate": 6.261185268588546e-06, "loss": 15.3858, "step": 1142 }, { "epoch": 0.06553335435599002, "grad_norm": 0.0, "learning_rate": 6.25519531506651e-06, "loss": 15.3585, "step": 1143 }, { "epoch": 0.06559068887423673, "grad_norm": 0.0, "learning_rate": 6.249203438329799e-06, "loss": 15.5088, "step": 1144 }, { "epoch": 0.06564802339248345, "grad_norm": 0.0, "learning_rate": 6.24320964755919e-06, "loss": 15.382, "step": 1145 }, { "epoch": 0.06570535791073015, "grad_norm": 0.0, "learning_rate": 6.237213951938389e-06, "loss": 15.5293, "step": 1146 }, { "epoch": 0.06576269242897687, "grad_norm": 0.0, "learning_rate": 6.23121636065402e-06, "loss": 15.1687, "step": 1147 }, { "epoch": 0.06582002694722358, "grad_norm": 0.0, "learning_rate": 6.225216882895615e-06, "loss": 15.5723, "step": 1148 }, { "epoch": 0.06587736146547028, "grad_norm": 0.0, "learning_rate": 6.219215527855596e-06, "loss": 15.4502, "step": 1149 }, { "epoch": 0.065934695983717, "grad_norm": 0.0, "learning_rate": 6.213212304729259e-06, "loss": 15.6545, "step": 1150 }, { "epoch": 0.06599203050196371, "grad_norm": 0.0, "learning_rate": 6.207207222714763e-06, "loss": 15.2629, "step": 1151 }, { "epoch": 0.06604936502021042, "grad_norm": 0.0, "learning_rate": 6.201200291013117e-06, "loss": 15.1331, "step": 1152 }, { "epoch": 0.06610669953845713, "grad_norm": 0.0, "learning_rate": 6.195191518828163e-06, "loss": 15.142, "step": 1153 }, { "epoch": 0.06616403405670383, "grad_norm": 0.0, "learning_rate": 6.1891809153665614e-06, "loss": 15.5586, "step": 1154 }, { "epoch": 0.06622136857495055, "grad_norm": 0.0, "learning_rate": 6.183168489837781e-06, "loss": 15.1567, "step": 1155 }, { "epoch": 0.06627870309319726, "grad_norm": 0.0, "learning_rate": 6.177154251454082e-06, "loss": 15.3777, "step": 1156 }, { "epoch": 0.06633603761144397, "grad_norm": 0.0, "learning_rate": 6.1711382094305e-06, "loss": 15.5164, "step": 1157 }, { "epoch": 0.06639337212969068, "grad_norm": 0.0, "learning_rate": 6.165120372984836e-06, "loss": 14.9784, "step": 1158 }, { "epoch": 0.0664507066479374, "grad_norm": 0.0, "learning_rate": 6.1591007513376425e-06, "loss": 15.284, "step": 1159 }, { "epoch": 0.0665080411661841, "grad_norm": 0.0, "learning_rate": 6.153079353712201e-06, "loss": 15.2941, "step": 1160 }, { "epoch": 0.06656537568443081, "grad_norm": 0.0, "learning_rate": 6.1470561893345215e-06, "loss": 15.3123, "step": 1161 }, { "epoch": 0.06662271020267752, "grad_norm": 0.0, "learning_rate": 6.141031267433316e-06, "loss": 15.1475, "step": 1162 }, { "epoch": 0.06668004472092423, "grad_norm": 0.0, "learning_rate": 6.1350045972399926e-06, "loss": 15.3684, "step": 1163 }, { "epoch": 0.06673737923917095, "grad_norm": 0.0, "learning_rate": 6.128976187988633e-06, "loss": 15.2453, "step": 1164 }, { "epoch": 0.06679471375741765, "grad_norm": 0.0, "learning_rate": 6.122946048915991e-06, "loss": 15.0222, "step": 1165 }, { "epoch": 0.06685204827566436, "grad_norm": 0.0, "learning_rate": 6.116914189261466e-06, "loss": 15.4126, "step": 1166 }, { "epoch": 0.06690938279391108, "grad_norm": 0.0, "learning_rate": 6.110880618267092e-06, "loss": 15.4454, "step": 1167 }, { "epoch": 0.06696671731215778, "grad_norm": 0.0, "learning_rate": 6.1048453451775305e-06, "loss": 15.4416, "step": 1168 }, { "epoch": 0.0670240518304045, "grad_norm": 0.0, "learning_rate": 6.0988083792400466e-06, "loss": 15.4173, "step": 1169 }, { "epoch": 0.06708138634865121, "grad_norm": 0.0, "learning_rate": 6.092769729704502e-06, "loss": 15.521, "step": 1170 }, { "epoch": 0.06713872086689791, "grad_norm": 0.0, "learning_rate": 6.086729405823335e-06, "loss": 14.8905, "step": 1171 }, { "epoch": 0.06719605538514463, "grad_norm": 0.0, "learning_rate": 6.080687416851553e-06, "loss": 15.0262, "step": 1172 }, { "epoch": 0.06725338990339133, "grad_norm": 0.0, "learning_rate": 6.074643772046712e-06, "loss": 15.3374, "step": 1173 }, { "epoch": 0.06731072442163805, "grad_norm": 0.0, "learning_rate": 6.0685984806689055e-06, "loss": 15.2824, "step": 1174 }, { "epoch": 0.06736805893988476, "grad_norm": 0.0, "learning_rate": 6.06255155198075e-06, "loss": 15.2405, "step": 1175 }, { "epoch": 0.06742539345813146, "grad_norm": 0.0, "learning_rate": 6.056502995247371e-06, "loss": 15.4861, "step": 1176 }, { "epoch": 0.06748272797637818, "grad_norm": 0.0, "learning_rate": 6.05045281973639e-06, "loss": 15.3327, "step": 1177 }, { "epoch": 0.0675400624946249, "grad_norm": 0.0, "learning_rate": 6.044401034717905e-06, "loss": 15.0999, "step": 1178 }, { "epoch": 0.0675973970128716, "grad_norm": 0.0, "learning_rate": 6.038347649464483e-06, "loss": 15.274, "step": 1179 }, { "epoch": 0.06765473153111831, "grad_norm": 0.0, "learning_rate": 6.032292673251143e-06, "loss": 15.4088, "step": 1180 }, { "epoch": 0.06771206604936501, "grad_norm": 0.0, "learning_rate": 6.0262361153553395e-06, "loss": 15.5959, "step": 1181 }, { "epoch": 0.06776940056761173, "grad_norm": 0.0, "learning_rate": 6.020177985056953e-06, "loss": 15.287, "step": 1182 }, { "epoch": 0.06782673508585844, "grad_norm": 0.0, "learning_rate": 6.014118291638272e-06, "loss": 15.0794, "step": 1183 }, { "epoch": 0.06788406960410515, "grad_norm": 0.0, "learning_rate": 6.008057044383978e-06, "loss": 15.3053, "step": 1184 }, { "epoch": 0.06794140412235186, "grad_norm": 0.0, "learning_rate": 6.0019942525811385e-06, "loss": 15.4402, "step": 1185 }, { "epoch": 0.06799873864059858, "grad_norm": 0.0, "learning_rate": 5.995929925519181e-06, "loss": 15.2618, "step": 1186 }, { "epoch": 0.06805607315884528, "grad_norm": 0.0, "learning_rate": 5.989864072489892e-06, "loss": 15.4546, "step": 1187 }, { "epoch": 0.068113407677092, "grad_norm": 0.0, "learning_rate": 5.98379670278739e-06, "loss": 15.4383, "step": 1188 }, { "epoch": 0.06817074219533871, "grad_norm": 0.0, "learning_rate": 5.977727825708123e-06, "loss": 15.2373, "step": 1189 }, { "epoch": 0.06822807671358541, "grad_norm": 0.0, "learning_rate": 5.971657450550844e-06, "loss": 15.3016, "step": 1190 }, { "epoch": 0.06828541123183213, "grad_norm": 0.0, "learning_rate": 5.965585586616602e-06, "loss": 15.619, "step": 1191 }, { "epoch": 0.06834274575007883, "grad_norm": 0.0, "learning_rate": 5.959512243208732e-06, "loss": 15.3967, "step": 1192 }, { "epoch": 0.06840008026832554, "grad_norm": 0.0, "learning_rate": 5.953437429632829e-06, "loss": 15.1485, "step": 1193 }, { "epoch": 0.06845741478657226, "grad_norm": 0.0, "learning_rate": 5.947361155196744e-06, "loss": 15.3058, "step": 1194 }, { "epoch": 0.06851474930481896, "grad_norm": 0.0, "learning_rate": 5.941283429210568e-06, "loss": 15.2705, "step": 1195 }, { "epoch": 0.06857208382306568, "grad_norm": 0.0, "learning_rate": 5.935204260986611e-06, "loss": 14.8969, "step": 1196 }, { "epoch": 0.06862941834131239, "grad_norm": 0.0, "learning_rate": 5.9291236598393996e-06, "loss": 15.0611, "step": 1197 }, { "epoch": 0.0686867528595591, "grad_norm": 0.0, "learning_rate": 5.9230416350856505e-06, "loss": 15.297, "step": 1198 }, { "epoch": 0.06874408737780581, "grad_norm": 0.0, "learning_rate": 5.9169581960442615e-06, "loss": 15.4516, "step": 1199 }, { "epoch": 0.06880142189605253, "grad_norm": 0.0, "learning_rate": 5.910873352036302e-06, "loss": 15.607, "step": 1200 }, { "epoch": 0.06885875641429923, "grad_norm": 0.0, "learning_rate": 5.904787112384991e-06, "loss": 15.0717, "step": 1201 }, { "epoch": 0.06891609093254594, "grad_norm": 0.0, "learning_rate": 5.898699486415686e-06, "loss": 15.1691, "step": 1202 }, { "epoch": 0.06897342545079264, "grad_norm": 0.0, "learning_rate": 5.892610483455867e-06, "loss": 15.5078, "step": 1203 }, { "epoch": 0.06903075996903936, "grad_norm": 0.0, "learning_rate": 5.886520112835128e-06, "loss": 15.2725, "step": 1204 }, { "epoch": 0.06908809448728608, "grad_norm": 0.0, "learning_rate": 5.880428383885157e-06, "loss": 15.6883, "step": 1205 }, { "epoch": 0.06914542900553278, "grad_norm": 0.0, "learning_rate": 5.874335305939719e-06, "loss": 15.4606, "step": 1206 }, { "epoch": 0.06920276352377949, "grad_norm": 0.0, "learning_rate": 5.8682408883346535e-06, "loss": 15.3959, "step": 1207 }, { "epoch": 0.06926009804202621, "grad_norm": 0.0, "learning_rate": 5.8621451404078455e-06, "loss": 15.3062, "step": 1208 }, { "epoch": 0.06931743256027291, "grad_norm": 0.0, "learning_rate": 5.856048071499223e-06, "loss": 15.5863, "step": 1209 }, { "epoch": 0.06937476707851963, "grad_norm": 0.0, "learning_rate": 5.849949690950736e-06, "loss": 15.1853, "step": 1210 }, { "epoch": 0.06943210159676633, "grad_norm": 0.0, "learning_rate": 5.843850008106344e-06, "loss": 14.9957, "step": 1211 }, { "epoch": 0.06948943611501304, "grad_norm": 0.0, "learning_rate": 5.837749032312005e-06, "loss": 15.2185, "step": 1212 }, { "epoch": 0.06954677063325976, "grad_norm": 0.0, "learning_rate": 5.831646772915651e-06, "loss": 15.132, "step": 1213 }, { "epoch": 0.06960410515150646, "grad_norm": 0.0, "learning_rate": 5.82554323926719e-06, "loss": 15.1604, "step": 1214 }, { "epoch": 0.06966143966975318, "grad_norm": 0.0, "learning_rate": 5.819438440718476e-06, "loss": 14.8557, "step": 1215 }, { "epoch": 0.06971877418799989, "grad_norm": 0.0, "learning_rate": 5.8133323866233005e-06, "loss": 15.6393, "step": 1216 }, { "epoch": 0.06977610870624659, "grad_norm": 0.0, "learning_rate": 5.807225086337383e-06, "loss": 15.1016, "step": 1217 }, { "epoch": 0.06983344322449331, "grad_norm": 0.0, "learning_rate": 5.8011165492183516e-06, "loss": 15.2926, "step": 1218 }, { "epoch": 0.06989077774274002, "grad_norm": 0.0, "learning_rate": 5.795006784625728e-06, "loss": 15.0293, "step": 1219 }, { "epoch": 0.06994811226098673, "grad_norm": 0.0, "learning_rate": 5.788895801920914e-06, "loss": 15.1197, "step": 1220 }, { "epoch": 0.07000544677923344, "grad_norm": 0.0, "learning_rate": 5.782783610467177e-06, "loss": 15.5134, "step": 1221 }, { "epoch": 0.07006278129748014, "grad_norm": 0.0, "learning_rate": 5.776670219629643e-06, "loss": 15.3721, "step": 1222 }, { "epoch": 0.07012011581572686, "grad_norm": 0.0, "learning_rate": 5.770555638775267e-06, "loss": 15.3128, "step": 1223 }, { "epoch": 0.07017745033397357, "grad_norm": 0.0, "learning_rate": 5.764439877272833e-06, "loss": 15.3769, "step": 1224 }, { "epoch": 0.07023478485222028, "grad_norm": 0.0, "learning_rate": 5.75832294449293e-06, "loss": 15.113, "step": 1225 }, { "epoch": 0.07029211937046699, "grad_norm": 0.0, "learning_rate": 5.752204849807948e-06, "loss": 15.3764, "step": 1226 }, { "epoch": 0.0703494538887137, "grad_norm": 0.0, "learning_rate": 5.74608560259205e-06, "loss": 15.2141, "step": 1227 }, { "epoch": 0.07040678840696041, "grad_norm": 0.0, "learning_rate": 5.739965212221168e-06, "loss": 15.0714, "step": 1228 }, { "epoch": 0.07046412292520712, "grad_norm": 0.0, "learning_rate": 5.733843688072987e-06, "loss": 15.5016, "step": 1229 }, { "epoch": 0.07052145744345382, "grad_norm": 0.0, "learning_rate": 5.727721039526928e-06, "loss": 15.3094, "step": 1230 }, { "epoch": 0.07057879196170054, "grad_norm": 0.0, "learning_rate": 5.7215972759641335e-06, "loss": 15.3874, "step": 1231 }, { "epoch": 0.07063612647994726, "grad_norm": 0.0, "learning_rate": 5.715472406767457e-06, "loss": 15.219, "step": 1232 }, { "epoch": 0.07069346099819396, "grad_norm": 0.0, "learning_rate": 5.709346441321443e-06, "loss": 15.4031, "step": 1233 }, { "epoch": 0.07075079551644067, "grad_norm": 0.0, "learning_rate": 5.703219389012317e-06, "loss": 15.1785, "step": 1234 }, { "epoch": 0.07080813003468739, "grad_norm": 0.0, "learning_rate": 5.697091259227973e-06, "loss": 15.2524, "step": 1235 }, { "epoch": 0.07086546455293409, "grad_norm": 0.0, "learning_rate": 5.69096206135795e-06, "loss": 15.2352, "step": 1236 }, { "epoch": 0.0709227990711808, "grad_norm": 0.0, "learning_rate": 5.684831804793427e-06, "loss": 15.1509, "step": 1237 }, { "epoch": 0.07098013358942752, "grad_norm": 0.0, "learning_rate": 5.6787004989272066e-06, "loss": 15.2295, "step": 1238 }, { "epoch": 0.07103746810767422, "grad_norm": 0.0, "learning_rate": 5.6725681531536955e-06, "loss": 15.2069, "step": 1239 }, { "epoch": 0.07109480262592094, "grad_norm": 0.0, "learning_rate": 5.666434776868895e-06, "loss": 15.4838, "step": 1240 }, { "epoch": 0.07115213714416764, "grad_norm": 0.0, "learning_rate": 5.660300379470387e-06, "loss": 15.1852, "step": 1241 }, { "epoch": 0.07120947166241436, "grad_norm": 0.0, "learning_rate": 5.654164970357316e-06, "loss": 15.2174, "step": 1242 }, { "epoch": 0.07126680618066107, "grad_norm": 0.0, "learning_rate": 5.64802855893038e-06, "loss": 15.5695, "step": 1243 }, { "epoch": 0.07132414069890777, "grad_norm": 0.0, "learning_rate": 5.641891154591805e-06, "loss": 15.093, "step": 1244 }, { "epoch": 0.07138147521715449, "grad_norm": 0.0, "learning_rate": 5.635752766745347e-06, "loss": 15.5662, "step": 1245 }, { "epoch": 0.0714388097354012, "grad_norm": 0.0, "learning_rate": 5.629613404796267e-06, "loss": 15.529, "step": 1246 }, { "epoch": 0.0714961442536479, "grad_norm": 0.0, "learning_rate": 5.623473078151313e-06, "loss": 15.3623, "step": 1247 }, { "epoch": 0.07155347877189462, "grad_norm": 0.0, "learning_rate": 5.617331796218717e-06, "loss": 15.7112, "step": 1248 }, { "epoch": 0.07161081329014132, "grad_norm": 0.0, "learning_rate": 5.611189568408173e-06, "loss": 15.6012, "step": 1249 }, { "epoch": 0.07166814780838804, "grad_norm": 0.0, "learning_rate": 5.605046404130824e-06, "loss": 15.1824, "step": 1250 }, { "epoch": 0.07172548232663475, "grad_norm": 0.0, "learning_rate": 5.598902312799247e-06, "loss": 15.3857, "step": 1251 }, { "epoch": 0.07178281684488146, "grad_norm": 0.0, "learning_rate": 5.592757303827441e-06, "loss": 15.3178, "step": 1252 }, { "epoch": 0.07184015136312817, "grad_norm": 0.0, "learning_rate": 5.586611386630811e-06, "loss": 14.9769, "step": 1253 }, { "epoch": 0.07189748588137489, "grad_norm": 0.0, "learning_rate": 5.5804645706261515e-06, "loss": 15.2135, "step": 1254 }, { "epoch": 0.07195482039962159, "grad_norm": 0.0, "learning_rate": 5.574316865231637e-06, "loss": 15.5411, "step": 1255 }, { "epoch": 0.0720121549178683, "grad_norm": 0.0, "learning_rate": 5.568168279866801e-06, "loss": 15.4522, "step": 1256 }, { "epoch": 0.07206948943611502, "grad_norm": 0.0, "learning_rate": 5.562018823952532e-06, "loss": 15.5142, "step": 1257 }, { "epoch": 0.07212682395436172, "grad_norm": 0.0, "learning_rate": 5.5558685069110444e-06, "loss": 15.1283, "step": 1258 }, { "epoch": 0.07218415847260844, "grad_norm": 0.0, "learning_rate": 5.549717338165876e-06, "loss": 15.3748, "step": 1259 }, { "epoch": 0.07224149299085514, "grad_norm": 0.0, "learning_rate": 5.5435653271418686e-06, "loss": 15.4054, "step": 1260 }, { "epoch": 0.07229882750910185, "grad_norm": 0.0, "learning_rate": 5.537412483265156e-06, "loss": 15.1071, "step": 1261 }, { "epoch": 0.07235616202734857, "grad_norm": 0.0, "learning_rate": 5.5312588159631485e-06, "loss": 15.3223, "step": 1262 }, { "epoch": 0.07241349654559527, "grad_norm": 0.0, "learning_rate": 5.525104334664517e-06, "loss": 15.4098, "step": 1263 }, { "epoch": 0.07247083106384199, "grad_norm": 0.0, "learning_rate": 5.518949048799176e-06, "loss": 15.1196, "step": 1264 }, { "epoch": 0.0725281655820887, "grad_norm": 0.0, "learning_rate": 5.512792967798278e-06, "loss": 15.1726, "step": 1265 }, { "epoch": 0.0725855001003354, "grad_norm": 0.0, "learning_rate": 5.506636101094193e-06, "loss": 15.07, "step": 1266 }, { "epoch": 0.07264283461858212, "grad_norm": 0.0, "learning_rate": 5.500478458120493e-06, "loss": 15.5946, "step": 1267 }, { "epoch": 0.07270016913682882, "grad_norm": 0.0, "learning_rate": 5.4943200483119385e-06, "loss": 15.0114, "step": 1268 }, { "epoch": 0.07275750365507554, "grad_norm": 0.0, "learning_rate": 5.48816088110447e-06, "loss": 15.1489, "step": 1269 }, { "epoch": 0.07281483817332225, "grad_norm": 0.0, "learning_rate": 5.482000965935182e-06, "loss": 15.2691, "step": 1270 }, { "epoch": 0.07287217269156895, "grad_norm": 0.0, "learning_rate": 5.475840312242321e-06, "loss": 15.2723, "step": 1271 }, { "epoch": 0.07292950720981567, "grad_norm": 0.0, "learning_rate": 5.4696789294652596e-06, "loss": 15.2848, "step": 1272 }, { "epoch": 0.07298684172806238, "grad_norm": 0.0, "learning_rate": 5.463516827044492e-06, "loss": 15.2138, "step": 1273 }, { "epoch": 0.07304417624630909, "grad_norm": 0.0, "learning_rate": 5.457354014421613e-06, "loss": 15.3442, "step": 1274 }, { "epoch": 0.0731015107645558, "grad_norm": 0.0, "learning_rate": 5.4511905010393055e-06, "loss": 15.102, "step": 1275 }, { "epoch": 0.07315884528280252, "grad_norm": 0.0, "learning_rate": 5.445026296341325e-06, "loss": 15.5483, "step": 1276 }, { "epoch": 0.07321617980104922, "grad_norm": 0.0, "learning_rate": 5.438861409772489e-06, "loss": 15.2401, "step": 1277 }, { "epoch": 0.07327351431929593, "grad_norm": 0.0, "learning_rate": 5.432695850778658e-06, "loss": 15.4661, "step": 1278 }, { "epoch": 0.07333084883754264, "grad_norm": 0.0, "learning_rate": 5.4265296288067235e-06, "loss": 15.4194, "step": 1279 }, { "epoch": 0.07338818335578935, "grad_norm": 0.0, "learning_rate": 5.420362753304594e-06, "loss": 15.1753, "step": 1280 }, { "epoch": 0.07344551787403607, "grad_norm": 0.0, "learning_rate": 5.414195233721175e-06, "loss": 15.5165, "step": 1281 }, { "epoch": 0.07350285239228277, "grad_norm": 0.0, "learning_rate": 5.408027079506362e-06, "loss": 15.08, "step": 1282 }, { "epoch": 0.07356018691052948, "grad_norm": 0.0, "learning_rate": 5.401858300111024e-06, "loss": 15.2593, "step": 1283 }, { "epoch": 0.0736175214287762, "grad_norm": 0.0, "learning_rate": 5.395688904986987e-06, "loss": 15.019, "step": 1284 }, { "epoch": 0.0736748559470229, "grad_norm": 0.0, "learning_rate": 5.389518903587016e-06, "loss": 15.4, "step": 1285 }, { "epoch": 0.07373219046526962, "grad_norm": 0.0, "learning_rate": 5.383348305364814e-06, "loss": 15.1214, "step": 1286 }, { "epoch": 0.07378952498351632, "grad_norm": 0.0, "learning_rate": 5.37717711977499e-06, "loss": 15.6768, "step": 1287 }, { "epoch": 0.07384685950176303, "grad_norm": 0.0, "learning_rate": 5.371005356273058e-06, "loss": 15.1213, "step": 1288 }, { "epoch": 0.07390419402000975, "grad_norm": 0.0, "learning_rate": 5.364833024315414e-06, "loss": 15.2674, "step": 1289 }, { "epoch": 0.07396152853825645, "grad_norm": 0.0, "learning_rate": 5.358660133359328e-06, "loss": 15.4868, "step": 1290 }, { "epoch": 0.07401886305650317, "grad_norm": 0.0, "learning_rate": 5.352486692862926e-06, "loss": 15.296, "step": 1291 }, { "epoch": 0.07407619757474988, "grad_norm": 0.0, "learning_rate": 5.346312712285172e-06, "loss": 15.4523, "step": 1292 }, { "epoch": 0.07413353209299658, "grad_norm": 0.0, "learning_rate": 5.340138201085864e-06, "loss": 15.1769, "step": 1293 }, { "epoch": 0.0741908666112433, "grad_norm": 0.0, "learning_rate": 5.3339631687256085e-06, "loss": 15.244, "step": 1294 }, { "epoch": 0.07424820112949002, "grad_norm": 0.0, "learning_rate": 5.327787624665811e-06, "loss": 15.4797, "step": 1295 }, { "epoch": 0.07430553564773672, "grad_norm": 0.0, "learning_rate": 5.321611578368664e-06, "loss": 15.2982, "step": 1296 }, { "epoch": 0.07436287016598343, "grad_norm": 0.0, "learning_rate": 5.3154350392971245e-06, "loss": 15.1468, "step": 1297 }, { "epoch": 0.07442020468423013, "grad_norm": 0.0, "learning_rate": 5.309258016914911e-06, "loss": 15.3865, "step": 1298 }, { "epoch": 0.07447753920247685, "grad_norm": 0.0, "learning_rate": 5.303080520686474e-06, "loss": 15.3151, "step": 1299 }, { "epoch": 0.07453487372072357, "grad_norm": 0.0, "learning_rate": 5.296902560077e-06, "loss": 15.5556, "step": 1300 }, { "epoch": 0.07459220823897027, "grad_norm": 0.0, "learning_rate": 5.290724144552379e-06, "loss": 15.4445, "step": 1301 }, { "epoch": 0.07464954275721698, "grad_norm": 0.0, "learning_rate": 5.284545283579204e-06, "loss": 15.0258, "step": 1302 }, { "epoch": 0.0747068772754637, "grad_norm": 0.0, "learning_rate": 5.278365986624743e-06, "loss": 15.1966, "step": 1303 }, { "epoch": 0.0747642117937104, "grad_norm": 0.0, "learning_rate": 5.272186263156937e-06, "loss": 15.1388, "step": 1304 }, { "epoch": 0.07482154631195712, "grad_norm": 0.0, "learning_rate": 5.266006122644385e-06, "loss": 15.0732, "step": 1305 }, { "epoch": 0.07487888083020383, "grad_norm": 0.0, "learning_rate": 5.259825574556315e-06, "loss": 15.3282, "step": 1306 }, { "epoch": 0.07493621534845053, "grad_norm": 0.0, "learning_rate": 5.2536446283625865e-06, "loss": 15.4205, "step": 1307 }, { "epoch": 0.07499354986669725, "grad_norm": 0.0, "learning_rate": 5.247463293533667e-06, "loss": 15.5121, "step": 1308 }, { "epoch": 0.07505088438494395, "grad_norm": 0.0, "learning_rate": 5.241281579540619e-06, "loss": 15.1642, "step": 1309 }, { "epoch": 0.07510821890319067, "grad_norm": 0.0, "learning_rate": 5.235099495855086e-06, "loss": 15.0035, "step": 1310 }, { "epoch": 0.07516555342143738, "grad_norm": 0.0, "learning_rate": 5.228917051949279e-06, "loss": 15.2187, "step": 1311 }, { "epoch": 0.07522288793968408, "grad_norm": 0.0, "learning_rate": 5.222734257295963e-06, "loss": 15.0756, "step": 1312 }, { "epoch": 0.0752802224579308, "grad_norm": 0.0, "learning_rate": 5.216551121368432e-06, "loss": 15.1728, "step": 1313 }, { "epoch": 0.07533755697617751, "grad_norm": 0.0, "learning_rate": 5.210367653640512e-06, "loss": 15.5529, "step": 1314 }, { "epoch": 0.07539489149442422, "grad_norm": 0.0, "learning_rate": 5.2041838635865336e-06, "loss": 14.919, "step": 1315 }, { "epoch": 0.07545222601267093, "grad_norm": 0.0, "learning_rate": 5.197999760681324e-06, "loss": 15.5087, "step": 1316 }, { "epoch": 0.07550956053091763, "grad_norm": 0.0, "learning_rate": 5.191815354400183e-06, "loss": 15.603, "step": 1317 }, { "epoch": 0.07556689504916435, "grad_norm": 0.0, "learning_rate": 5.1856306542188805e-06, "loss": 15.3248, "step": 1318 }, { "epoch": 0.07562422956741106, "grad_norm": 0.0, "learning_rate": 5.17944566961364e-06, "loss": 15.214, "step": 1319 }, { "epoch": 0.07568156408565777, "grad_norm": 0.0, "learning_rate": 5.173260410061112e-06, "loss": 15.3195, "step": 1320 }, { "epoch": 0.07573889860390448, "grad_norm": 0.0, "learning_rate": 5.1670748850383734e-06, "loss": 15.1759, "step": 1321 }, { "epoch": 0.0757962331221512, "grad_norm": 0.0, "learning_rate": 5.16088910402291e-06, "loss": 15.2272, "step": 1322 }, { "epoch": 0.0758535676403979, "grad_norm": 0.0, "learning_rate": 5.154703076492597e-06, "loss": 15.3173, "step": 1323 }, { "epoch": 0.07591090215864461, "grad_norm": 0.0, "learning_rate": 5.148516811925684e-06, "loss": 14.9612, "step": 1324 }, { "epoch": 0.07596823667689133, "grad_norm": 0.0, "learning_rate": 5.14233031980079e-06, "loss": 15.0465, "step": 1325 }, { "epoch": 0.07602557119513803, "grad_norm": 0.0, "learning_rate": 5.136143609596882e-06, "loss": 15.1786, "step": 1326 }, { "epoch": 0.07608290571338475, "grad_norm": 0.0, "learning_rate": 5.129956690793255e-06, "loss": 15.1265, "step": 1327 }, { "epoch": 0.07614024023163145, "grad_norm": 0.0, "learning_rate": 5.1237695728695294e-06, "loss": 15.5801, "step": 1328 }, { "epoch": 0.07619757474987816, "grad_norm": 0.0, "learning_rate": 5.117582265305629e-06, "loss": 15.5675, "step": 1329 }, { "epoch": 0.07625490926812488, "grad_norm": 0.0, "learning_rate": 5.111394777581769e-06, "loss": 15.4257, "step": 1330 }, { "epoch": 0.07631224378637158, "grad_norm": 0.0, "learning_rate": 5.105207119178439e-06, "loss": 15.2455, "step": 1331 }, { "epoch": 0.0763695783046183, "grad_norm": 0.0, "learning_rate": 5.099019299576391e-06, "loss": 15.3326, "step": 1332 }, { "epoch": 0.07642691282286501, "grad_norm": 0.0, "learning_rate": 5.0928313282566255e-06, "loss": 15.2436, "step": 1333 }, { "epoch": 0.07648424734111171, "grad_norm": 0.0, "learning_rate": 5.086643214700371e-06, "loss": 15.4032, "step": 1334 }, { "epoch": 0.07654158185935843, "grad_norm": 0.0, "learning_rate": 5.080454968389078e-06, "loss": 15.2234, "step": 1335 }, { "epoch": 0.07659891637760513, "grad_norm": 0.0, "learning_rate": 5.074266598804402e-06, "loss": 15.2235, "step": 1336 }, { "epoch": 0.07665625089585185, "grad_norm": 0.0, "learning_rate": 5.068078115428179e-06, "loss": 15.2521, "step": 1337 }, { "epoch": 0.07671358541409856, "grad_norm": 0.0, "learning_rate": 5.06188952774243e-06, "loss": 15.389, "step": 1338 }, { "epoch": 0.07677091993234526, "grad_norm": 0.0, "learning_rate": 5.0557008452293275e-06, "loss": 15.1074, "step": 1339 }, { "epoch": 0.07682825445059198, "grad_norm": 0.0, "learning_rate": 5.049512077371197e-06, "loss": 15.0319, "step": 1340 }, { "epoch": 0.0768855889688387, "grad_norm": 0.0, "learning_rate": 5.043323233650485e-06, "loss": 15.2615, "step": 1341 }, { "epoch": 0.0769429234870854, "grad_norm": 0.0, "learning_rate": 5.037134323549763e-06, "loss": 15.044, "step": 1342 }, { "epoch": 0.07700025800533211, "grad_norm": 0.0, "learning_rate": 5.030945356551701e-06, "loss": 15.0863, "step": 1343 }, { "epoch": 0.07705759252357883, "grad_norm": 0.0, "learning_rate": 5.024756342139053e-06, "loss": 15.4426, "step": 1344 }, { "epoch": 0.07711492704182553, "grad_norm": 0.0, "learning_rate": 5.0185672897946515e-06, "loss": 15.4382, "step": 1345 }, { "epoch": 0.07717226156007224, "grad_norm": 0.0, "learning_rate": 5.012378209001383e-06, "loss": 15.4366, "step": 1346 }, { "epoch": 0.07722959607831895, "grad_norm": 0.0, "learning_rate": 5.00618910924218e-06, "loss": 15.0979, "step": 1347 }, { "epoch": 0.07728693059656566, "grad_norm": 0.0, "learning_rate": 5e-06, "loss": 15.0105, "step": 1348 }, { "epoch": 0.07734426511481238, "grad_norm": 0.0, "learning_rate": 4.993810890757823e-06, "loss": 15.3259, "step": 1349 }, { "epoch": 0.07740159963305908, "grad_norm": 0.0, "learning_rate": 4.987621790998619e-06, "loss": 14.9897, "step": 1350 }, { "epoch": 0.0774589341513058, "grad_norm": 0.0, "learning_rate": 4.981432710205351e-06, "loss": 15.3484, "step": 1351 }, { "epoch": 0.07751626866955251, "grad_norm": 0.0, "learning_rate": 4.975243657860948e-06, "loss": 15.2349, "step": 1352 }, { "epoch": 0.07757360318779921, "grad_norm": 0.0, "learning_rate": 4.969054643448302e-06, "loss": 15.3359, "step": 1353 }, { "epoch": 0.07763093770604593, "grad_norm": 0.0, "learning_rate": 4.962865676450239e-06, "loss": 15.5123, "step": 1354 }, { "epoch": 0.07768827222429263, "grad_norm": 0.0, "learning_rate": 4.956676766349517e-06, "loss": 15.1643, "step": 1355 }, { "epoch": 0.07774560674253934, "grad_norm": 0.0, "learning_rate": 4.9504879226288045e-06, "loss": 15.3605, "step": 1356 }, { "epoch": 0.07780294126078606, "grad_norm": 0.0, "learning_rate": 4.944299154770673e-06, "loss": 14.9246, "step": 1357 }, { "epoch": 0.07786027577903276, "grad_norm": 0.0, "learning_rate": 4.938110472257572e-06, "loss": 15.3627, "step": 1358 }, { "epoch": 0.07791761029727948, "grad_norm": 0.0, "learning_rate": 4.931921884571823e-06, "loss": 15.2038, "step": 1359 }, { "epoch": 0.07797494481552619, "grad_norm": 0.0, "learning_rate": 4.925733401195601e-06, "loss": 15.4349, "step": 1360 }, { "epoch": 0.0780322793337729, "grad_norm": 0.0, "learning_rate": 4.919545031610925e-06, "loss": 15.1013, "step": 1361 }, { "epoch": 0.07808961385201961, "grad_norm": 0.0, "learning_rate": 4.913356785299631e-06, "loss": 15.0782, "step": 1362 }, { "epoch": 0.07814694837026633, "grad_norm": 0.0, "learning_rate": 4.907168671743377e-06, "loss": 15.248, "step": 1363 }, { "epoch": 0.07820428288851303, "grad_norm": 0.0, "learning_rate": 4.9009807004236105e-06, "loss": 15.2272, "step": 1364 }, { "epoch": 0.07826161740675974, "grad_norm": 0.0, "learning_rate": 4.894792880821563e-06, "loss": 15.2808, "step": 1365 }, { "epoch": 0.07831895192500644, "grad_norm": 0.0, "learning_rate": 4.888605222418232e-06, "loss": 15.2294, "step": 1366 }, { "epoch": 0.07837628644325316, "grad_norm": 0.0, "learning_rate": 4.882417734694372e-06, "loss": 15.302, "step": 1367 }, { "epoch": 0.07843362096149988, "grad_norm": 0.0, "learning_rate": 4.876230427130472e-06, "loss": 15.2778, "step": 1368 }, { "epoch": 0.07849095547974658, "grad_norm": 0.0, "learning_rate": 4.8700433092067474e-06, "loss": 15.2622, "step": 1369 }, { "epoch": 0.07854828999799329, "grad_norm": 0.0, "learning_rate": 4.86385639040312e-06, "loss": 15.3537, "step": 1370 }, { "epoch": 0.07860562451624001, "grad_norm": 0.0, "learning_rate": 4.8576696801992105e-06, "loss": 15.1753, "step": 1371 }, { "epoch": 0.07866295903448671, "grad_norm": 0.0, "learning_rate": 4.8514831880743175e-06, "loss": 15.3063, "step": 1372 }, { "epoch": 0.07872029355273343, "grad_norm": 0.0, "learning_rate": 4.845296923507406e-06, "loss": 15.5033, "step": 1373 }, { "epoch": 0.07877762807098013, "grad_norm": 0.0, "learning_rate": 4.839110895977092e-06, "loss": 15.3774, "step": 1374 }, { "epoch": 0.07883496258922684, "grad_norm": 0.0, "learning_rate": 4.832925114961629e-06, "loss": 14.7429, "step": 1375 }, { "epoch": 0.07889229710747356, "grad_norm": 0.0, "learning_rate": 4.8267395899388905e-06, "loss": 15.3443, "step": 1376 }, { "epoch": 0.07894963162572026, "grad_norm": 0.0, "learning_rate": 4.820554330386363e-06, "loss": 15.1586, "step": 1377 }, { "epoch": 0.07900696614396698, "grad_norm": 0.0, "learning_rate": 4.814369345781121e-06, "loss": 15.1614, "step": 1378 }, { "epoch": 0.07906430066221369, "grad_norm": 0.0, "learning_rate": 4.8081846455998205e-06, "loss": 15.2101, "step": 1379 }, { "epoch": 0.07912163518046039, "grad_norm": 0.0, "learning_rate": 4.802000239318678e-06, "loss": 15.6754, "step": 1380 }, { "epoch": 0.07917896969870711, "grad_norm": 0.0, "learning_rate": 4.795816136413467e-06, "loss": 15.2554, "step": 1381 }, { "epoch": 0.07923630421695382, "grad_norm": 0.0, "learning_rate": 4.789632346359489e-06, "loss": 15.013, "step": 1382 }, { "epoch": 0.07929363873520053, "grad_norm": 0.0, "learning_rate": 4.78344887863157e-06, "loss": 15.389, "step": 1383 }, { "epoch": 0.07935097325344724, "grad_norm": 0.0, "learning_rate": 4.777265742704039e-06, "loss": 15.3423, "step": 1384 }, { "epoch": 0.07940830777169394, "grad_norm": 0.0, "learning_rate": 4.771082948050722e-06, "loss": 15.0646, "step": 1385 }, { "epoch": 0.07946564228994066, "grad_norm": 0.0, "learning_rate": 4.764900504144915e-06, "loss": 15.0467, "step": 1386 }, { "epoch": 0.07952297680818737, "grad_norm": 0.0, "learning_rate": 4.758718420459383e-06, "loss": 15.2004, "step": 1387 }, { "epoch": 0.07958031132643408, "grad_norm": 0.0, "learning_rate": 4.7525367064663355e-06, "loss": 15.5153, "step": 1388 }, { "epoch": 0.07963764584468079, "grad_norm": 0.0, "learning_rate": 4.746355371637416e-06, "loss": 15.1112, "step": 1389 }, { "epoch": 0.0796949803629275, "grad_norm": 0.0, "learning_rate": 4.740174425443687e-06, "loss": 15.2085, "step": 1390 }, { "epoch": 0.07975231488117421, "grad_norm": 0.0, "learning_rate": 4.733993877355618e-06, "loss": 15.4787, "step": 1391 }, { "epoch": 0.07980964939942092, "grad_norm": 0.0, "learning_rate": 4.7278137368430635e-06, "loss": 15.3414, "step": 1392 }, { "epoch": 0.07986698391766762, "grad_norm": 0.0, "learning_rate": 4.7216340133752604e-06, "loss": 15.0015, "step": 1393 }, { "epoch": 0.07992431843591434, "grad_norm": 0.0, "learning_rate": 4.715454716420798e-06, "loss": 15.4287, "step": 1394 }, { "epoch": 0.07998165295416106, "grad_norm": 0.0, "learning_rate": 4.7092758554476215e-06, "loss": 15.2161, "step": 1395 }, { "epoch": 0.08003898747240776, "grad_norm": 0.0, "learning_rate": 4.703097439923e-06, "loss": 14.9452, "step": 1396 }, { "epoch": 0.08009632199065447, "grad_norm": 0.0, "learning_rate": 4.696919479313527e-06, "loss": 15.1395, "step": 1397 }, { "epoch": 0.08015365650890119, "grad_norm": 0.0, "learning_rate": 4.6907419830850906e-06, "loss": 15.1513, "step": 1398 }, { "epoch": 0.08021099102714789, "grad_norm": 0.0, "learning_rate": 4.684564960702877e-06, "loss": 15.5572, "step": 1399 }, { "epoch": 0.0802683255453946, "grad_norm": 0.0, "learning_rate": 4.678388421631337e-06, "loss": 15.3592, "step": 1400 }, { "epoch": 0.08032566006364132, "grad_norm": 0.0, "learning_rate": 4.67221237533419e-06, "loss": 15.0252, "step": 1401 }, { "epoch": 0.08038299458188802, "grad_norm": 0.0, "learning_rate": 4.666036831274392e-06, "loss": 15.476, "step": 1402 }, { "epoch": 0.08044032910013474, "grad_norm": 0.0, "learning_rate": 4.659861798914138e-06, "loss": 15.1597, "step": 1403 }, { "epoch": 0.08049766361838144, "grad_norm": 0.0, "learning_rate": 4.653687287714828e-06, "loss": 15.3732, "step": 1404 }, { "epoch": 0.08055499813662816, "grad_norm": 0.0, "learning_rate": 4.647513307137076e-06, "loss": 15.5174, "step": 1405 }, { "epoch": 0.08061233265487487, "grad_norm": 0.0, "learning_rate": 4.641339866640672e-06, "loss": 15.0124, "step": 1406 }, { "epoch": 0.08066966717312157, "grad_norm": 0.0, "learning_rate": 4.635166975684587e-06, "loss": 15.1956, "step": 1407 }, { "epoch": 0.08072700169136829, "grad_norm": 0.0, "learning_rate": 4.628994643726942e-06, "loss": 15.4477, "step": 1408 }, { "epoch": 0.080784336209615, "grad_norm": 0.0, "learning_rate": 4.622822880225012e-06, "loss": 15.0185, "step": 1409 }, { "epoch": 0.0808416707278617, "grad_norm": 0.0, "learning_rate": 4.616651694635186e-06, "loss": 14.8942, "step": 1410 }, { "epoch": 0.08089900524610842, "grad_norm": 0.0, "learning_rate": 4.610481096412985e-06, "loss": 15.2555, "step": 1411 }, { "epoch": 0.08095633976435514, "grad_norm": 0.0, "learning_rate": 4.604311095013015e-06, "loss": 15.2582, "step": 1412 }, { "epoch": 0.08101367428260184, "grad_norm": 0.0, "learning_rate": 4.598141699888978e-06, "loss": 15.1778, "step": 1413 }, { "epoch": 0.08107100880084855, "grad_norm": 0.0, "learning_rate": 4.591972920493638e-06, "loss": 15.8129, "step": 1414 }, { "epoch": 0.08112834331909526, "grad_norm": 0.0, "learning_rate": 4.585804766278827e-06, "loss": 15.2853, "step": 1415 }, { "epoch": 0.08118567783734197, "grad_norm": 0.0, "learning_rate": 4.579637246695407e-06, "loss": 15.2172, "step": 1416 }, { "epoch": 0.08124301235558869, "grad_norm": 0.0, "learning_rate": 4.573470371193277e-06, "loss": 15.5005, "step": 1417 }, { "epoch": 0.08130034687383539, "grad_norm": 0.0, "learning_rate": 4.5673041492213416e-06, "loss": 15.1312, "step": 1418 }, { "epoch": 0.0813576813920821, "grad_norm": 0.0, "learning_rate": 4.561138590227512e-06, "loss": 15.2109, "step": 1419 }, { "epoch": 0.08141501591032882, "grad_norm": 0.0, "learning_rate": 4.554973703658676e-06, "loss": 15.4162, "step": 1420 }, { "epoch": 0.08147235042857552, "grad_norm": 0.0, "learning_rate": 4.548809498960697e-06, "loss": 14.7716, "step": 1421 }, { "epoch": 0.08152968494682224, "grad_norm": 0.0, "learning_rate": 4.542645985578389e-06, "loss": 14.9493, "step": 1422 }, { "epoch": 0.08158701946506894, "grad_norm": 0.0, "learning_rate": 4.53648317295551e-06, "loss": 15.0915, "step": 1423 }, { "epoch": 0.08164435398331565, "grad_norm": 0.0, "learning_rate": 4.53032107053474e-06, "loss": 15.0074, "step": 1424 }, { "epoch": 0.08170168850156237, "grad_norm": 0.0, "learning_rate": 4.52415968775768e-06, "loss": 15.1501, "step": 1425 }, { "epoch": 0.08175902301980907, "grad_norm": 0.0, "learning_rate": 4.517999034064819e-06, "loss": 15.3762, "step": 1426 }, { "epoch": 0.08181635753805579, "grad_norm": 0.0, "learning_rate": 4.511839118895532e-06, "loss": 14.9713, "step": 1427 }, { "epoch": 0.0818736920563025, "grad_norm": 0.0, "learning_rate": 4.5056799516880615e-06, "loss": 15.0249, "step": 1428 }, { "epoch": 0.0819310265745492, "grad_norm": 0.0, "learning_rate": 4.499521541879508e-06, "loss": 14.9934, "step": 1429 }, { "epoch": 0.08198836109279592, "grad_norm": 0.0, "learning_rate": 4.493363898905808e-06, "loss": 15.6353, "step": 1430 }, { "epoch": 0.08204569561104263, "grad_norm": 0.0, "learning_rate": 4.4872070322017235e-06, "loss": 15.3187, "step": 1431 }, { "epoch": 0.08210303012928934, "grad_norm": 0.0, "learning_rate": 4.4810509512008245e-06, "loss": 15.0563, "step": 1432 }, { "epoch": 0.08216036464753605, "grad_norm": 0.0, "learning_rate": 4.474895665335484e-06, "loss": 15.2045, "step": 1433 }, { "epoch": 0.08221769916578275, "grad_norm": 0.0, "learning_rate": 4.4687411840368514e-06, "loss": 15.3326, "step": 1434 }, { "epoch": 0.08227503368402947, "grad_norm": 0.0, "learning_rate": 4.462587516734845e-06, "loss": 15.1827, "step": 1435 }, { "epoch": 0.08233236820227618, "grad_norm": 0.0, "learning_rate": 4.456434672858132e-06, "loss": 15.3744, "step": 1436 }, { "epoch": 0.08238970272052289, "grad_norm": 0.0, "learning_rate": 4.450282661834127e-06, "loss": 15.1566, "step": 1437 }, { "epoch": 0.0824470372387696, "grad_norm": 0.0, "learning_rate": 4.444131493088956e-06, "loss": 15.2045, "step": 1438 }, { "epoch": 0.08250437175701632, "grad_norm": 0.0, "learning_rate": 4.437981176047469e-06, "loss": 15.348, "step": 1439 }, { "epoch": 0.08256170627526302, "grad_norm": 0.0, "learning_rate": 4.431831720133198e-06, "loss": 15.0272, "step": 1440 }, { "epoch": 0.08261904079350973, "grad_norm": 0.0, "learning_rate": 4.425683134768365e-06, "loss": 15.2256, "step": 1441 }, { "epoch": 0.08267637531175644, "grad_norm": 0.0, "learning_rate": 4.4195354293738484e-06, "loss": 15.4462, "step": 1442 }, { "epoch": 0.08273370983000315, "grad_norm": 0.0, "learning_rate": 4.41338861336919e-06, "loss": 15.2976, "step": 1443 }, { "epoch": 0.08279104434824987, "grad_norm": 0.0, "learning_rate": 4.40724269617256e-06, "loss": 15.2583, "step": 1444 }, { "epoch": 0.08284837886649657, "grad_norm": 0.0, "learning_rate": 4.401097687200754e-06, "loss": 14.889, "step": 1445 }, { "epoch": 0.08290571338474328, "grad_norm": 0.0, "learning_rate": 4.394953595869178e-06, "loss": 15.0715, "step": 1446 }, { "epoch": 0.08296304790299, "grad_norm": 0.0, "learning_rate": 4.388810431591829e-06, "loss": 15.3028, "step": 1447 }, { "epoch": 0.0830203824212367, "grad_norm": 0.0, "learning_rate": 4.382668203781286e-06, "loss": 15.3078, "step": 1448 }, { "epoch": 0.08307771693948342, "grad_norm": 0.0, "learning_rate": 4.376526921848688e-06, "loss": 14.9887, "step": 1449 }, { "epoch": 0.08313505145773013, "grad_norm": 0.0, "learning_rate": 4.3703865952037354e-06, "loss": 15.148, "step": 1450 }, { "epoch": 0.08319238597597683, "grad_norm": 0.0, "learning_rate": 4.364247233254654e-06, "loss": 15.2904, "step": 1451 }, { "epoch": 0.08324972049422355, "grad_norm": 0.0, "learning_rate": 4.3581088454081975e-06, "loss": 14.9469, "step": 1452 }, { "epoch": 0.08330705501247025, "grad_norm": 0.0, "learning_rate": 4.351971441069622e-06, "loss": 15.0734, "step": 1453 }, { "epoch": 0.08336438953071697, "grad_norm": 0.0, "learning_rate": 4.345835029642685e-06, "loss": 15.4407, "step": 1454 }, { "epoch": 0.08342172404896368, "grad_norm": 0.0, "learning_rate": 4.339699620529614e-06, "loss": 15.0554, "step": 1455 }, { "epoch": 0.08347905856721038, "grad_norm": 0.0, "learning_rate": 4.333565223131107e-06, "loss": 15.0257, "step": 1456 }, { "epoch": 0.0835363930854571, "grad_norm": 0.0, "learning_rate": 4.327431846846306e-06, "loss": 15.2727, "step": 1457 }, { "epoch": 0.08359372760370382, "grad_norm": 0.0, "learning_rate": 4.321299501072797e-06, "loss": 15.2608, "step": 1458 }, { "epoch": 0.08365106212195052, "grad_norm": 0.0, "learning_rate": 4.315168195206574e-06, "loss": 15.1937, "step": 1459 }, { "epoch": 0.08370839664019723, "grad_norm": 0.0, "learning_rate": 4.309037938642053e-06, "loss": 15.3834, "step": 1460 }, { "epoch": 0.08376573115844393, "grad_norm": 0.0, "learning_rate": 4.30290874077203e-06, "loss": 15.4123, "step": 1461 }, { "epoch": 0.08382306567669065, "grad_norm": 0.0, "learning_rate": 4.296780610987685e-06, "loss": 14.9967, "step": 1462 }, { "epoch": 0.08388040019493737, "grad_norm": 0.0, "learning_rate": 4.290653558678559e-06, "loss": 15.1021, "step": 1463 }, { "epoch": 0.08393773471318407, "grad_norm": 0.0, "learning_rate": 4.284527593232545e-06, "loss": 15.4646, "step": 1464 }, { "epoch": 0.08399506923143078, "grad_norm": 0.0, "learning_rate": 4.278402724035868e-06, "loss": 15.189, "step": 1465 }, { "epoch": 0.0840524037496775, "grad_norm": 0.0, "learning_rate": 4.272278960473074e-06, "loss": 15.1943, "step": 1466 }, { "epoch": 0.0841097382679242, "grad_norm": 0.0, "learning_rate": 4.2661563119270135e-06, "loss": 15.3133, "step": 1467 }, { "epoch": 0.08416707278617092, "grad_norm": 0.0, "learning_rate": 4.260034787778833e-06, "loss": 14.9534, "step": 1468 }, { "epoch": 0.08422440730441763, "grad_norm": 0.0, "learning_rate": 4.253914397407952e-06, "loss": 15.3099, "step": 1469 }, { "epoch": 0.08428174182266433, "grad_norm": 0.0, "learning_rate": 4.247795150192054e-06, "loss": 14.879, "step": 1470 }, { "epoch": 0.08433907634091105, "grad_norm": 0.0, "learning_rate": 4.241677055507071e-06, "loss": 14.9269, "step": 1471 }, { "epoch": 0.08439641085915775, "grad_norm": 0.0, "learning_rate": 4.235560122727171e-06, "loss": 15.1852, "step": 1472 }, { "epoch": 0.08445374537740447, "grad_norm": 0.0, "learning_rate": 4.229444361224734e-06, "loss": 15.3213, "step": 1473 }, { "epoch": 0.08451107989565118, "grad_norm": 0.0, "learning_rate": 4.223329780370359e-06, "loss": 15.2615, "step": 1474 }, { "epoch": 0.08456841441389788, "grad_norm": 0.0, "learning_rate": 4.217216389532824e-06, "loss": 15.2346, "step": 1475 }, { "epoch": 0.0846257489321446, "grad_norm": 0.0, "learning_rate": 4.211104198079089e-06, "loss": 15.0628, "step": 1476 }, { "epoch": 0.08468308345039131, "grad_norm": 0.0, "learning_rate": 4.2049932153742726e-06, "loss": 15.0532, "step": 1477 }, { "epoch": 0.08474041796863802, "grad_norm": 0.0, "learning_rate": 4.198883450781649e-06, "loss": 15.0043, "step": 1478 }, { "epoch": 0.08479775248688473, "grad_norm": 0.0, "learning_rate": 4.192774913662618e-06, "loss": 15.0565, "step": 1479 }, { "epoch": 0.08485508700513143, "grad_norm": 0.0, "learning_rate": 4.186667613376702e-06, "loss": 15.3671, "step": 1480 }, { "epoch": 0.08491242152337815, "grad_norm": 0.0, "learning_rate": 4.180561559281526e-06, "loss": 15.0956, "step": 1481 }, { "epoch": 0.08496975604162486, "grad_norm": 0.0, "learning_rate": 4.174456760732813e-06, "loss": 15.2333, "step": 1482 }, { "epoch": 0.08502709055987157, "grad_norm": 0.0, "learning_rate": 4.1683532270843505e-06, "loss": 14.9798, "step": 1483 }, { "epoch": 0.08508442507811828, "grad_norm": 0.0, "learning_rate": 4.162250967687999e-06, "loss": 15.3353, "step": 1484 }, { "epoch": 0.085141759596365, "grad_norm": 0.0, "learning_rate": 4.1561499918936575e-06, "loss": 15.0471, "step": 1485 }, { "epoch": 0.0851990941146117, "grad_norm": 0.0, "learning_rate": 4.150050309049267e-06, "loss": 15.4525, "step": 1486 }, { "epoch": 0.08525642863285841, "grad_norm": 0.0, "learning_rate": 4.143951928500778e-06, "loss": 15.3145, "step": 1487 }, { "epoch": 0.08531376315110513, "grad_norm": 0.0, "learning_rate": 4.137854859592157e-06, "loss": 15.0502, "step": 1488 }, { "epoch": 0.08537109766935183, "grad_norm": 0.0, "learning_rate": 4.131759111665349e-06, "loss": 15.1505, "step": 1489 }, { "epoch": 0.08542843218759855, "grad_norm": 0.0, "learning_rate": 4.125664694060283e-06, "loss": 15.4517, "step": 1490 }, { "epoch": 0.08548576670584525, "grad_norm": 0.0, "learning_rate": 4.119571616114845e-06, "loss": 15.42, "step": 1491 }, { "epoch": 0.08554310122409196, "grad_norm": 0.0, "learning_rate": 4.113479887164873e-06, "loss": 15.2936, "step": 1492 }, { "epoch": 0.08560043574233868, "grad_norm": 0.0, "learning_rate": 4.107389516544134e-06, "loss": 15.0335, "step": 1493 }, { "epoch": 0.08565777026058538, "grad_norm": 0.0, "learning_rate": 4.101300513584317e-06, "loss": 14.8875, "step": 1494 }, { "epoch": 0.0857151047788321, "grad_norm": 0.0, "learning_rate": 4.09521288761501e-06, "loss": 15.1966, "step": 1495 }, { "epoch": 0.08577243929707881, "grad_norm": 0.0, "learning_rate": 4.0891266479637e-06, "loss": 15.1653, "step": 1496 }, { "epoch": 0.08582977381532551, "grad_norm": 0.0, "learning_rate": 4.083041803955739e-06, "loss": 15.4002, "step": 1497 }, { "epoch": 0.08588710833357223, "grad_norm": 0.0, "learning_rate": 4.076958364914352e-06, "loss": 15.1514, "step": 1498 }, { "epoch": 0.08594444285181893, "grad_norm": 0.0, "learning_rate": 4.070876340160601e-06, "loss": 14.9568, "step": 1499 }, { "epoch": 0.08600177737006565, "grad_norm": 0.0, "learning_rate": 4.064795739013389e-06, "loss": 15.1858, "step": 1500 }, { "epoch": 0.08605911188831236, "grad_norm": 0.0, "learning_rate": 4.0587165707894326e-06, "loss": 15.3825, "step": 1501 }, { "epoch": 0.08611644640655906, "grad_norm": 0.0, "learning_rate": 4.0526388448032564e-06, "loss": 15.4134, "step": 1502 }, { "epoch": 0.08617378092480578, "grad_norm": 0.0, "learning_rate": 4.046562570367172e-06, "loss": 15.1151, "step": 1503 }, { "epoch": 0.0862311154430525, "grad_norm": 0.0, "learning_rate": 4.04048775679127e-06, "loss": 15.2341, "step": 1504 }, { "epoch": 0.0862884499612992, "grad_norm": 0.0, "learning_rate": 4.034414413383397e-06, "loss": 15.0329, "step": 1505 }, { "epoch": 0.08634578447954591, "grad_norm": 0.0, "learning_rate": 4.028342549449157e-06, "loss": 15.1716, "step": 1506 }, { "epoch": 0.08640311899779263, "grad_norm": 0.0, "learning_rate": 4.022272174291878e-06, "loss": 15.3237, "step": 1507 }, { "epoch": 0.08646045351603933, "grad_norm": 0.0, "learning_rate": 4.0162032972126105e-06, "loss": 15.2783, "step": 1508 }, { "epoch": 0.08651778803428604, "grad_norm": 0.0, "learning_rate": 4.010135927510109e-06, "loss": 15.4047, "step": 1509 }, { "epoch": 0.08657512255253275, "grad_norm": 0.0, "learning_rate": 4.004070074480821e-06, "loss": 15.3116, "step": 1510 }, { "epoch": 0.08663245707077946, "grad_norm": 0.0, "learning_rate": 3.998005747418862e-06, "loss": 14.971, "step": 1511 }, { "epoch": 0.08668979158902618, "grad_norm": 0.0, "learning_rate": 3.991942955616023e-06, "loss": 15.1829, "step": 1512 }, { "epoch": 0.08674712610727288, "grad_norm": 0.0, "learning_rate": 3.985881708361729e-06, "loss": 15.1694, "step": 1513 }, { "epoch": 0.0868044606255196, "grad_norm": 0.0, "learning_rate": 3.979822014943049e-06, "loss": 15.276, "step": 1514 }, { "epoch": 0.08686179514376631, "grad_norm": 0.0, "learning_rate": 3.9737638846446605e-06, "loss": 15.0269, "step": 1515 }, { "epoch": 0.08691912966201301, "grad_norm": 0.0, "learning_rate": 3.967707326748857e-06, "loss": 15.2615, "step": 1516 }, { "epoch": 0.08697646418025973, "grad_norm": 0.0, "learning_rate": 3.961652350535517e-06, "loss": 15.2742, "step": 1517 }, { "epoch": 0.08703379869850644, "grad_norm": 0.0, "learning_rate": 3.955598965282096e-06, "loss": 15.3354, "step": 1518 }, { "epoch": 0.08709113321675314, "grad_norm": 0.0, "learning_rate": 3.94954718026361e-06, "loss": 15.3394, "step": 1519 }, { "epoch": 0.08714846773499986, "grad_norm": 0.0, "learning_rate": 3.94349700475263e-06, "loss": 15.1984, "step": 1520 }, { "epoch": 0.08720580225324656, "grad_norm": 0.0, "learning_rate": 3.93744844801925e-06, "loss": 14.8813, "step": 1521 }, { "epoch": 0.08726313677149328, "grad_norm": 0.0, "learning_rate": 3.931401519331095e-06, "loss": 15.358, "step": 1522 }, { "epoch": 0.08732047128973999, "grad_norm": 0.0, "learning_rate": 3.9253562279532884e-06, "loss": 15.1213, "step": 1523 }, { "epoch": 0.0873778058079867, "grad_norm": 0.0, "learning_rate": 3.919312583148449e-06, "loss": 15.029, "step": 1524 }, { "epoch": 0.08743514032623341, "grad_norm": 0.0, "learning_rate": 3.913270594176665e-06, "loss": 14.8806, "step": 1525 }, { "epoch": 0.08749247484448013, "grad_norm": 0.0, "learning_rate": 3.907230270295499e-06, "loss": 15.3686, "step": 1526 }, { "epoch": 0.08754980936272683, "grad_norm": 0.0, "learning_rate": 3.901191620759954e-06, "loss": 15.0427, "step": 1527 }, { "epoch": 0.08760714388097354, "grad_norm": 0.0, "learning_rate": 3.895154654822471e-06, "loss": 15.1566, "step": 1528 }, { "epoch": 0.08766447839922024, "grad_norm": 0.0, "learning_rate": 3.8891193817329085e-06, "loss": 15.1766, "step": 1529 }, { "epoch": 0.08772181291746696, "grad_norm": 0.0, "learning_rate": 3.883085810738536e-06, "loss": 15.1378, "step": 1530 }, { "epoch": 0.08777914743571368, "grad_norm": 0.0, "learning_rate": 3.87705395108401e-06, "loss": 15.2077, "step": 1531 }, { "epoch": 0.08783648195396038, "grad_norm": 0.0, "learning_rate": 3.8710238120113675e-06, "loss": 15.1772, "step": 1532 }, { "epoch": 0.08789381647220709, "grad_norm": 0.0, "learning_rate": 3.864995402760009e-06, "loss": 15.4355, "step": 1533 }, { "epoch": 0.08795115099045381, "grad_norm": 0.0, "learning_rate": 3.858968732566685e-06, "loss": 15.0356, "step": 1534 }, { "epoch": 0.08800848550870051, "grad_norm": 0.0, "learning_rate": 3.8529438106654785e-06, "loss": 15.1937, "step": 1535 }, { "epoch": 0.08806582002694723, "grad_norm": 0.0, "learning_rate": 3.8469206462878e-06, "loss": 15.0776, "step": 1536 }, { "epoch": 0.08812315454519394, "grad_norm": 0.0, "learning_rate": 3.840899248662358e-06, "loss": 15.2202, "step": 1537 }, { "epoch": 0.08818048906344064, "grad_norm": 0.0, "learning_rate": 3.834879627015165e-06, "loss": 15.2026, "step": 1538 }, { "epoch": 0.08823782358168736, "grad_norm": 0.0, "learning_rate": 3.8288617905695005e-06, "loss": 15.211, "step": 1539 }, { "epoch": 0.08829515809993406, "grad_norm": 0.0, "learning_rate": 3.822845748545919e-06, "loss": 15.3265, "step": 1540 }, { "epoch": 0.08835249261818078, "grad_norm": 0.0, "learning_rate": 3.816831510162219e-06, "loss": 15.3084, "step": 1541 }, { "epoch": 0.08840982713642749, "grad_norm": 0.0, "learning_rate": 3.8108190846334402e-06, "loss": 15.1557, "step": 1542 }, { "epoch": 0.08846716165467419, "grad_norm": 0.0, "learning_rate": 3.8048084811718377e-06, "loss": 15.0298, "step": 1543 }, { "epoch": 0.08852449617292091, "grad_norm": 0.0, "learning_rate": 3.798799708986885e-06, "loss": 15.454, "step": 1544 }, { "epoch": 0.08858183069116762, "grad_norm": 0.0, "learning_rate": 3.7927927772852395e-06, "loss": 15.0659, "step": 1545 }, { "epoch": 0.08863916520941433, "grad_norm": 0.0, "learning_rate": 3.786787695270743e-06, "loss": 15.1532, "step": 1546 }, { "epoch": 0.08869649972766104, "grad_norm": 0.0, "learning_rate": 3.7807844721444063e-06, "loss": 15.2533, "step": 1547 }, { "epoch": 0.08875383424590774, "grad_norm": 0.0, "learning_rate": 3.7747831171043865e-06, "loss": 14.971, "step": 1548 }, { "epoch": 0.08881116876415446, "grad_norm": 0.0, "learning_rate": 3.7687836393459828e-06, "loss": 15.3314, "step": 1549 }, { "epoch": 0.08886850328240117, "grad_norm": 0.0, "learning_rate": 3.7627860480616128e-06, "loss": 15.2095, "step": 1550 }, { "epoch": 0.08892583780064788, "grad_norm": 0.0, "learning_rate": 3.756790352440811e-06, "loss": 14.8912, "step": 1551 }, { "epoch": 0.08898317231889459, "grad_norm": 0.0, "learning_rate": 3.7507965616702015e-06, "loss": 15.3292, "step": 1552 }, { "epoch": 0.0890405068371413, "grad_norm": 0.0, "learning_rate": 3.744804684933492e-06, "loss": 15.1989, "step": 1553 }, { "epoch": 0.08909784135538801, "grad_norm": 0.0, "learning_rate": 3.7388147314114554e-06, "loss": 15.6162, "step": 1554 }, { "epoch": 0.08915517587363472, "grad_norm": 0.0, "learning_rate": 3.732826710281923e-06, "loss": 14.9143, "step": 1555 }, { "epoch": 0.08921251039188144, "grad_norm": 0.0, "learning_rate": 3.7268406307197568e-06, "loss": 14.7441, "step": 1556 }, { "epoch": 0.08926984491012814, "grad_norm": 0.0, "learning_rate": 3.7208565018968545e-06, "loss": 14.9323, "step": 1557 }, { "epoch": 0.08932717942837486, "grad_norm": 0.0, "learning_rate": 3.7148743329821146e-06, "loss": 15.1762, "step": 1558 }, { "epoch": 0.08938451394662156, "grad_norm": 0.0, "learning_rate": 3.7088941331414418e-06, "loss": 15.7279, "step": 1559 }, { "epoch": 0.08944184846486827, "grad_norm": 0.0, "learning_rate": 3.702915911537714e-06, "loss": 15.1863, "step": 1560 }, { "epoch": 0.08949918298311499, "grad_norm": 0.0, "learning_rate": 3.6969396773307888e-06, "loss": 15.29, "step": 1561 }, { "epoch": 0.08955651750136169, "grad_norm": 0.0, "learning_rate": 3.6909654396774684e-06, "loss": 14.9259, "step": 1562 }, { "epoch": 0.0896138520196084, "grad_norm": 0.0, "learning_rate": 3.684993207731505e-06, "loss": 14.9451, "step": 1563 }, { "epoch": 0.08967118653785512, "grad_norm": 0.0, "learning_rate": 3.6790229906435706e-06, "loss": 15.2632, "step": 1564 }, { "epoch": 0.08972852105610182, "grad_norm": 0.0, "learning_rate": 3.673054797561254e-06, "loss": 15.0482, "step": 1565 }, { "epoch": 0.08978585557434854, "grad_norm": 0.0, "learning_rate": 3.667088637629041e-06, "loss": 14.9746, "step": 1566 }, { "epoch": 0.08984319009259524, "grad_norm": 0.0, "learning_rate": 3.6611245199883037e-06, "loss": 15.5239, "step": 1567 }, { "epoch": 0.08990052461084196, "grad_norm": 0.0, "learning_rate": 3.6551624537772834e-06, "loss": 15.5796, "step": 1568 }, { "epoch": 0.08995785912908867, "grad_norm": 0.0, "learning_rate": 3.6492024481310793e-06, "loss": 15.051, "step": 1569 }, { "epoch": 0.09001519364733537, "grad_norm": 0.0, "learning_rate": 3.6432445121816308e-06, "loss": 15.3199, "step": 1570 }, { "epoch": 0.09007252816558209, "grad_norm": 0.0, "learning_rate": 3.6372886550577125e-06, "loss": 15.3081, "step": 1571 }, { "epoch": 0.0901298626838288, "grad_norm": 0.0, "learning_rate": 3.6313348858849064e-06, "loss": 15.1435, "step": 1572 }, { "epoch": 0.0901871972020755, "grad_norm": 0.0, "learning_rate": 3.6253832137856e-06, "loss": 14.6087, "step": 1573 }, { "epoch": 0.09024453172032222, "grad_norm": 0.0, "learning_rate": 3.6194336478789638e-06, "loss": 15.2436, "step": 1574 }, { "epoch": 0.09030186623856894, "grad_norm": 0.0, "learning_rate": 3.6134861972809477e-06, "loss": 15.3815, "step": 1575 }, { "epoch": 0.09035920075681564, "grad_norm": 0.0, "learning_rate": 3.6075408711042536e-06, "loss": 15.3305, "step": 1576 }, { "epoch": 0.09041653527506235, "grad_norm": 0.0, "learning_rate": 3.6015976784583327e-06, "loss": 14.9759, "step": 1577 }, { "epoch": 0.09047386979330906, "grad_norm": 0.0, "learning_rate": 3.595656628449362e-06, "loss": 15.1466, "step": 1578 }, { "epoch": 0.09053120431155577, "grad_norm": 0.0, "learning_rate": 3.5897177301802455e-06, "loss": 15.0655, "step": 1579 }, { "epoch": 0.09058853882980249, "grad_norm": 0.0, "learning_rate": 3.5837809927505783e-06, "loss": 15.1449, "step": 1580 }, { "epoch": 0.09064587334804919, "grad_norm": 0.0, "learning_rate": 3.5778464252566536e-06, "loss": 15.1359, "step": 1581 }, { "epoch": 0.0907032078662959, "grad_norm": 0.0, "learning_rate": 3.571914036791435e-06, "loss": 15.5221, "step": 1582 }, { "epoch": 0.09076054238454262, "grad_norm": 0.0, "learning_rate": 3.5659838364445505e-06, "loss": 14.8403, "step": 1583 }, { "epoch": 0.09081787690278932, "grad_norm": 0.0, "learning_rate": 3.5600558333022707e-06, "loss": 15.2719, "step": 1584 }, { "epoch": 0.09087521142103604, "grad_norm": 0.0, "learning_rate": 3.5541300364475067e-06, "loss": 14.9916, "step": 1585 }, { "epoch": 0.09093254593928274, "grad_norm": 0.0, "learning_rate": 3.548206454959783e-06, "loss": 14.9182, "step": 1586 }, { "epoch": 0.09098988045752945, "grad_norm": 0.0, "learning_rate": 3.5422850979152335e-06, "loss": 15.0797, "step": 1587 }, { "epoch": 0.09104721497577617, "grad_norm": 0.0, "learning_rate": 3.5363659743865797e-06, "loss": 15.3647, "step": 1588 }, { "epoch": 0.09110454949402287, "grad_norm": 0.0, "learning_rate": 3.5304490934431268e-06, "loss": 15.2592, "step": 1589 }, { "epoch": 0.09116188401226959, "grad_norm": 0.0, "learning_rate": 3.5245344641507384e-06, "loss": 15.2267, "step": 1590 }, { "epoch": 0.0912192185305163, "grad_norm": 0.0, "learning_rate": 3.518622095571831e-06, "loss": 15.1416, "step": 1591 }, { "epoch": 0.091276553048763, "grad_norm": 0.0, "learning_rate": 3.512711996765355e-06, "loss": 15.4173, "step": 1592 }, { "epoch": 0.09133388756700972, "grad_norm": 0.0, "learning_rate": 3.506804176786789e-06, "loss": 15.1714, "step": 1593 }, { "epoch": 0.09139122208525643, "grad_norm": 0.0, "learning_rate": 3.5008986446881088e-06, "loss": 15.3202, "step": 1594 }, { "epoch": 0.09144855660350314, "grad_norm": 0.0, "learning_rate": 3.4949954095177986e-06, "loss": 15.217, "step": 1595 }, { "epoch": 0.09150589112174985, "grad_norm": 0.0, "learning_rate": 3.4890944803208104e-06, "loss": 14.9886, "step": 1596 }, { "epoch": 0.09156322563999655, "grad_norm": 0.0, "learning_rate": 3.4831958661385716e-06, "loss": 14.7912, "step": 1597 }, { "epoch": 0.09162056015824327, "grad_norm": 0.0, "learning_rate": 3.4772995760089573e-06, "loss": 14.9861, "step": 1598 }, { "epoch": 0.09167789467648998, "grad_norm": 0.0, "learning_rate": 3.4714056189662877e-06, "loss": 15.3865, "step": 1599 }, { "epoch": 0.09173522919473669, "grad_norm": 0.0, "learning_rate": 3.465514004041301e-06, "loss": 14.9974, "step": 1600 }, { "epoch": 0.0917925637129834, "grad_norm": 0.0, "learning_rate": 3.459624740261153e-06, "loss": 14.9746, "step": 1601 }, { "epoch": 0.09184989823123012, "grad_norm": 0.0, "learning_rate": 3.45373783664939e-06, "loss": 14.9292, "step": 1602 }, { "epoch": 0.09190723274947682, "grad_norm": 0.0, "learning_rate": 3.4478533022259527e-06, "loss": 15.2711, "step": 1603 }, { "epoch": 0.09196456726772353, "grad_norm": 0.0, "learning_rate": 3.4419711460071405e-06, "loss": 15.109, "step": 1604 }, { "epoch": 0.09202190178597024, "grad_norm": 0.0, "learning_rate": 3.4360913770056166e-06, "loss": 15.533, "step": 1605 }, { "epoch": 0.09207923630421695, "grad_norm": 0.0, "learning_rate": 3.4302140042303813e-06, "loss": 15.1691, "step": 1606 }, { "epoch": 0.09213657082246367, "grad_norm": 0.0, "learning_rate": 3.424339036686768e-06, "loss": 14.9606, "step": 1607 }, { "epoch": 0.09219390534071037, "grad_norm": 0.0, "learning_rate": 3.41846648337642e-06, "loss": 15.1092, "step": 1608 }, { "epoch": 0.09225123985895708, "grad_norm": 0.0, "learning_rate": 3.4125963532972878e-06, "loss": 14.8195, "step": 1609 }, { "epoch": 0.0923085743772038, "grad_norm": 0.0, "learning_rate": 3.4067286554436024e-06, "loss": 14.756, "step": 1610 }, { "epoch": 0.0923659088954505, "grad_norm": 0.0, "learning_rate": 3.400863398805873e-06, "loss": 15.2289, "step": 1611 }, { "epoch": 0.09242324341369722, "grad_norm": 0.0, "learning_rate": 3.395000592370864e-06, "loss": 15.1732, "step": 1612 }, { "epoch": 0.09248057793194393, "grad_norm": 0.0, "learning_rate": 3.389140245121591e-06, "loss": 15.2813, "step": 1613 }, { "epoch": 0.09253791245019063, "grad_norm": 0.0, "learning_rate": 3.383282366037296e-06, "loss": 15.011, "step": 1614 }, { "epoch": 0.09259524696843735, "grad_norm": 0.0, "learning_rate": 3.3774269640934447e-06, "loss": 14.8524, "step": 1615 }, { "epoch": 0.09265258148668405, "grad_norm": 0.0, "learning_rate": 3.371574048261701e-06, "loss": 15.4559, "step": 1616 }, { "epoch": 0.09270991600493077, "grad_norm": 0.0, "learning_rate": 3.3657236275099275e-06, "loss": 15.3174, "step": 1617 }, { "epoch": 0.09276725052317748, "grad_norm": 0.0, "learning_rate": 3.3598757108021546e-06, "loss": 15.1581, "step": 1618 }, { "epoch": 0.09282458504142418, "grad_norm": 0.0, "learning_rate": 3.354030307098585e-06, "loss": 15.4304, "step": 1619 }, { "epoch": 0.0928819195596709, "grad_norm": 0.0, "learning_rate": 3.348187425355564e-06, "loss": 15.2791, "step": 1620 }, { "epoch": 0.09293925407791762, "grad_norm": 0.0, "learning_rate": 3.342347074525578e-06, "loss": 15.3398, "step": 1621 }, { "epoch": 0.09299658859616432, "grad_norm": 0.0, "learning_rate": 3.3365092635572295e-06, "loss": 14.9245, "step": 1622 }, { "epoch": 0.09305392311441103, "grad_norm": 0.0, "learning_rate": 3.3306740013952368e-06, "loss": 15.1071, "step": 1623 }, { "epoch": 0.09311125763265775, "grad_norm": 0.0, "learning_rate": 3.3248412969804065e-06, "loss": 15.2702, "step": 1624 }, { "epoch": 0.09316859215090445, "grad_norm": 0.0, "learning_rate": 3.319011159249631e-06, "loss": 14.9664, "step": 1625 }, { "epoch": 0.09322592666915117, "grad_norm": 0.0, "learning_rate": 3.313183597135865e-06, "loss": 15.1732, "step": 1626 }, { "epoch": 0.09328326118739787, "grad_norm": 0.0, "learning_rate": 3.307358619568123e-06, "loss": 15.2397, "step": 1627 }, { "epoch": 0.09334059570564458, "grad_norm": 0.0, "learning_rate": 3.301536235471453e-06, "loss": 15.1465, "step": 1628 }, { "epoch": 0.0933979302238913, "grad_norm": 0.0, "learning_rate": 3.295716453766935e-06, "loss": 15.2098, "step": 1629 }, { "epoch": 0.093455264742138, "grad_norm": 0.0, "learning_rate": 3.289899283371657e-06, "loss": 15.2483, "step": 1630 }, { "epoch": 0.09351259926038472, "grad_norm": 0.0, "learning_rate": 3.2840847331987093e-06, "loss": 15.0997, "step": 1631 }, { "epoch": 0.09356993377863143, "grad_norm": 0.0, "learning_rate": 3.2782728121571632e-06, "loss": 15.2503, "step": 1632 }, { "epoch": 0.09362726829687813, "grad_norm": 0.0, "learning_rate": 3.2724635291520697e-06, "loss": 15.3095, "step": 1633 }, { "epoch": 0.09368460281512485, "grad_norm": 0.0, "learning_rate": 3.266656893084428e-06, "loss": 15.4218, "step": 1634 }, { "epoch": 0.09374193733337155, "grad_norm": 0.0, "learning_rate": 3.2608529128511896e-06, "loss": 15.0612, "step": 1635 }, { "epoch": 0.09379927185161827, "grad_norm": 0.0, "learning_rate": 3.2550515973452295e-06, "loss": 15.3992, "step": 1636 }, { "epoch": 0.09385660636986498, "grad_norm": 0.0, "learning_rate": 3.2492529554553485e-06, "loss": 15.1745, "step": 1637 }, { "epoch": 0.09391394088811168, "grad_norm": 0.0, "learning_rate": 3.243456996066242e-06, "loss": 14.9587, "step": 1638 }, { "epoch": 0.0939712754063584, "grad_norm": 0.0, "learning_rate": 3.2376637280585025e-06, "loss": 15.0485, "step": 1639 }, { "epoch": 0.09402860992460511, "grad_norm": 0.0, "learning_rate": 3.2318731603085923e-06, "loss": 15.0185, "step": 1640 }, { "epoch": 0.09408594444285182, "grad_norm": 0.0, "learning_rate": 3.2260853016888443e-06, "loss": 15.2848, "step": 1641 }, { "epoch": 0.09414327896109853, "grad_norm": 0.0, "learning_rate": 3.2203001610674322e-06, "loss": 15.0875, "step": 1642 }, { "epoch": 0.09420061347934525, "grad_norm": 0.0, "learning_rate": 3.214517747308368e-06, "loss": 15.0593, "step": 1643 }, { "epoch": 0.09425794799759195, "grad_norm": 0.0, "learning_rate": 3.2087380692714887e-06, "loss": 15.1293, "step": 1644 }, { "epoch": 0.09431528251583866, "grad_norm": 0.0, "learning_rate": 3.202961135812437e-06, "loss": 14.9762, "step": 1645 }, { "epoch": 0.09437261703408537, "grad_norm": 0.0, "learning_rate": 3.1971869557826507e-06, "loss": 15.3738, "step": 1646 }, { "epoch": 0.09442995155233208, "grad_norm": 0.0, "learning_rate": 3.191415538029346e-06, "loss": 15.1781, "step": 1647 }, { "epoch": 0.0944872860705788, "grad_norm": 0.0, "learning_rate": 3.185646891395514e-06, "loss": 15.2245, "step": 1648 }, { "epoch": 0.0945446205888255, "grad_norm": 0.0, "learning_rate": 3.1798810247198925e-06, "loss": 15.0903, "step": 1649 }, { "epoch": 0.09460195510707221, "grad_norm": 0.0, "learning_rate": 3.174117946836964e-06, "loss": 15.0486, "step": 1650 }, { "epoch": 0.09465928962531893, "grad_norm": 0.0, "learning_rate": 3.1683576665769344e-06, "loss": 15.0967, "step": 1651 }, { "epoch": 0.09471662414356563, "grad_norm": 0.0, "learning_rate": 3.1626001927657287e-06, "loss": 15.1772, "step": 1652 }, { "epoch": 0.09477395866181235, "grad_norm": 0.0, "learning_rate": 3.1568455342249654e-06, "loss": 15.0888, "step": 1653 }, { "epoch": 0.09483129318005905, "grad_norm": 0.0, "learning_rate": 3.1510936997719557e-06, "loss": 15.2379, "step": 1654 }, { "epoch": 0.09488862769830576, "grad_norm": 0.0, "learning_rate": 3.145344698219677e-06, "loss": 14.9315, "step": 1655 }, { "epoch": 0.09494596221655248, "grad_norm": 0.0, "learning_rate": 3.1395985383767734e-06, "loss": 15.681, "step": 1656 }, { "epoch": 0.09500329673479918, "grad_norm": 0.0, "learning_rate": 3.1338552290475265e-06, "loss": 15.0557, "step": 1657 }, { "epoch": 0.0950606312530459, "grad_norm": 0.0, "learning_rate": 3.12811477903186e-06, "loss": 15.0777, "step": 1658 }, { "epoch": 0.09511796577129261, "grad_norm": 0.0, "learning_rate": 3.1223771971253093e-06, "loss": 15.2504, "step": 1659 }, { "epoch": 0.09517530028953931, "grad_norm": 0.0, "learning_rate": 3.1166424921190174e-06, "loss": 15.0185, "step": 1660 }, { "epoch": 0.09523263480778603, "grad_norm": 0.0, "learning_rate": 3.1109106727997184e-06, "loss": 15.0898, "step": 1661 }, { "epoch": 0.09528996932603274, "grad_norm": 0.0, "learning_rate": 3.1051817479497297e-06, "loss": 15.0596, "step": 1662 }, { "epoch": 0.09534730384427945, "grad_norm": 0.0, "learning_rate": 3.0994557263469267e-06, "loss": 15.1607, "step": 1663 }, { "epoch": 0.09540463836252616, "grad_norm": 0.0, "learning_rate": 3.093732616764742e-06, "loss": 15.1243, "step": 1664 }, { "epoch": 0.09546197288077286, "grad_norm": 0.0, "learning_rate": 3.0880124279721408e-06, "loss": 15.0445, "step": 1665 }, { "epoch": 0.09551930739901958, "grad_norm": 0.0, "learning_rate": 3.0822951687336215e-06, "loss": 14.8608, "step": 1666 }, { "epoch": 0.0955766419172663, "grad_norm": 0.0, "learning_rate": 3.076580847809184e-06, "loss": 15.4273, "step": 1667 }, { "epoch": 0.095633976435513, "grad_norm": 0.0, "learning_rate": 3.0708694739543345e-06, "loss": 15.1485, "step": 1668 }, { "epoch": 0.09569131095375971, "grad_norm": 0.0, "learning_rate": 3.065161055920057e-06, "loss": 15.4583, "step": 1669 }, { "epoch": 0.09574864547200643, "grad_norm": 0.0, "learning_rate": 3.0594556024528134e-06, "loss": 14.7834, "step": 1670 }, { "epoch": 0.09580597999025313, "grad_norm": 0.0, "learning_rate": 3.053753122294515e-06, "loss": 14.9889, "step": 1671 }, { "epoch": 0.09586331450849984, "grad_norm": 0.0, "learning_rate": 3.0480536241825263e-06, "loss": 14.956, "step": 1672 }, { "epoch": 0.09592064902674655, "grad_norm": 0.0, "learning_rate": 3.0423571168496356e-06, "loss": 15.1446, "step": 1673 }, { "epoch": 0.09597798354499326, "grad_norm": 0.0, "learning_rate": 3.036663609024054e-06, "loss": 15.4386, "step": 1674 }, { "epoch": 0.09603531806323998, "grad_norm": 0.0, "learning_rate": 3.03097310942939e-06, "loss": 14.9984, "step": 1675 }, { "epoch": 0.09609265258148668, "grad_norm": 0.0, "learning_rate": 3.025285626784651e-06, "loss": 15.175, "step": 1676 }, { "epoch": 0.0961499870997334, "grad_norm": 0.0, "learning_rate": 3.019601169804216e-06, "loss": 15.1857, "step": 1677 }, { "epoch": 0.09620732161798011, "grad_norm": 0.0, "learning_rate": 3.013919747197832e-06, "loss": 14.9624, "step": 1678 }, { "epoch": 0.09626465613622681, "grad_norm": 0.0, "learning_rate": 3.0082413676705914e-06, "loss": 15.1623, "step": 1679 }, { "epoch": 0.09632199065447353, "grad_norm": 0.0, "learning_rate": 3.00256603992293e-06, "loss": 15.2423, "step": 1680 }, { "epoch": 0.09637932517272024, "grad_norm": 0.0, "learning_rate": 2.996893772650602e-06, "loss": 15.1929, "step": 1681 }, { "epoch": 0.09643665969096694, "grad_norm": 0.0, "learning_rate": 2.99122457454468e-06, "loss": 15.4669, "step": 1682 }, { "epoch": 0.09649399420921366, "grad_norm": 0.0, "learning_rate": 2.985558454291525e-06, "loss": 15.1124, "step": 1683 }, { "epoch": 0.09655132872746036, "grad_norm": 0.0, "learning_rate": 2.9798954205727886e-06, "loss": 15.2577, "step": 1684 }, { "epoch": 0.09660866324570708, "grad_norm": 0.0, "learning_rate": 2.9742354820653884e-06, "loss": 15.0487, "step": 1685 }, { "epoch": 0.09666599776395379, "grad_norm": 0.0, "learning_rate": 2.9685786474415057e-06, "loss": 14.9933, "step": 1686 }, { "epoch": 0.0967233322822005, "grad_norm": 0.0, "learning_rate": 2.96292492536856e-06, "loss": 15.0056, "step": 1687 }, { "epoch": 0.09678066680044721, "grad_norm": 0.0, "learning_rate": 2.957274324509206e-06, "loss": 15.1144, "step": 1688 }, { "epoch": 0.09683800131869393, "grad_norm": 0.0, "learning_rate": 2.95162685352131e-06, "loss": 15.3127, "step": 1689 }, { "epoch": 0.09689533583694063, "grad_norm": 0.0, "learning_rate": 2.9459825210579534e-06, "loss": 15.1207, "step": 1690 }, { "epoch": 0.09695267035518734, "grad_norm": 0.0, "learning_rate": 2.9403413357673955e-06, "loss": 14.9744, "step": 1691 }, { "epoch": 0.09701000487343404, "grad_norm": 0.0, "learning_rate": 2.9347033062930856e-06, "loss": 15.2412, "step": 1692 }, { "epoch": 0.09706733939168076, "grad_norm": 0.0, "learning_rate": 2.929068441273629e-06, "loss": 14.9284, "step": 1693 }, { "epoch": 0.09712467390992748, "grad_norm": 0.0, "learning_rate": 2.923436749342788e-06, "loss": 15.1222, "step": 1694 }, { "epoch": 0.09718200842817418, "grad_norm": 0.0, "learning_rate": 2.9178082391294573e-06, "loss": 15.1443, "step": 1695 }, { "epoch": 0.09723934294642089, "grad_norm": 0.0, "learning_rate": 2.9121829192576647e-06, "loss": 15.1553, "step": 1696 }, { "epoch": 0.09729667746466761, "grad_norm": 0.0, "learning_rate": 2.90656079834654e-06, "loss": 15.2868, "step": 1697 }, { "epoch": 0.09735401198291431, "grad_norm": 0.0, "learning_rate": 2.9009418850103218e-06, "loss": 15.0563, "step": 1698 }, { "epoch": 0.09741134650116103, "grad_norm": 0.0, "learning_rate": 2.8953261878583263e-06, "loss": 15.0829, "step": 1699 }, { "epoch": 0.09746868101940774, "grad_norm": 0.0, "learning_rate": 2.889713715494944e-06, "loss": 15.2201, "step": 1700 }, { "epoch": 0.09752601553765444, "grad_norm": 0.0, "learning_rate": 2.8841044765196236e-06, "loss": 15.1362, "step": 1701 }, { "epoch": 0.09758335005590116, "grad_norm": 0.0, "learning_rate": 2.8784984795268644e-06, "loss": 15.1359, "step": 1702 }, { "epoch": 0.09764068457414786, "grad_norm": 0.0, "learning_rate": 2.8728957331061914e-06, "loss": 15.3242, "step": 1703 }, { "epoch": 0.09769801909239458, "grad_norm": 0.0, "learning_rate": 2.8672962458421548e-06, "loss": 15.243, "step": 1704 }, { "epoch": 0.09775535361064129, "grad_norm": 0.0, "learning_rate": 2.861700026314308e-06, "loss": 15.295, "step": 1705 }, { "epoch": 0.09781268812888799, "grad_norm": 0.0, "learning_rate": 2.8561070830971975e-06, "loss": 14.8623, "step": 1706 }, { "epoch": 0.09787002264713471, "grad_norm": 0.0, "learning_rate": 2.8505174247603495e-06, "loss": 15.1138, "step": 1707 }, { "epoch": 0.09792735716538142, "grad_norm": 0.0, "learning_rate": 2.844931059868261e-06, "loss": 15.2223, "step": 1708 }, { "epoch": 0.09798469168362813, "grad_norm": 0.0, "learning_rate": 2.839347996980376e-06, "loss": 15.2052, "step": 1709 }, { "epoch": 0.09804202620187484, "grad_norm": 0.0, "learning_rate": 2.8337682446510883e-06, "loss": 15.1068, "step": 1710 }, { "epoch": 0.09809936072012154, "grad_norm": 0.0, "learning_rate": 2.828191811429709e-06, "loss": 15.4515, "step": 1711 }, { "epoch": 0.09815669523836826, "grad_norm": 0.0, "learning_rate": 2.8226187058604735e-06, "loss": 15.5278, "step": 1712 }, { "epoch": 0.09821402975661497, "grad_norm": 0.0, "learning_rate": 2.8170489364825106e-06, "loss": 14.9237, "step": 1713 }, { "epoch": 0.09827136427486168, "grad_norm": 0.0, "learning_rate": 2.811482511829842e-06, "loss": 15.1981, "step": 1714 }, { "epoch": 0.09832869879310839, "grad_norm": 0.0, "learning_rate": 2.805919440431359e-06, "loss": 15.1981, "step": 1715 }, { "epoch": 0.0983860333113551, "grad_norm": 0.0, "learning_rate": 2.8003597308108246e-06, "loss": 14.7001, "step": 1716 }, { "epoch": 0.09844336782960181, "grad_norm": 0.0, "learning_rate": 2.7948033914868415e-06, "loss": 15.3086, "step": 1717 }, { "epoch": 0.09850070234784852, "grad_norm": 0.0, "learning_rate": 2.7892504309728564e-06, "loss": 14.7995, "step": 1718 }, { "epoch": 0.09855803686609524, "grad_norm": 0.0, "learning_rate": 2.7837008577771317e-06, "loss": 15.2355, "step": 1719 }, { "epoch": 0.09861537138434194, "grad_norm": 0.0, "learning_rate": 2.778154680402745e-06, "loss": 14.8578, "step": 1720 }, { "epoch": 0.09867270590258866, "grad_norm": 0.0, "learning_rate": 2.7726119073475643e-06, "loss": 15.1245, "step": 1721 }, { "epoch": 0.09873004042083536, "grad_norm": 0.0, "learning_rate": 2.7670725471042526e-06, "loss": 14.927, "step": 1722 }, { "epoch": 0.09878737493908207, "grad_norm": 0.0, "learning_rate": 2.7615366081602306e-06, "loss": 15.2329, "step": 1723 }, { "epoch": 0.09884470945732879, "grad_norm": 0.0, "learning_rate": 2.7560040989976894e-06, "loss": 15.0808, "step": 1724 }, { "epoch": 0.09890204397557549, "grad_norm": 0.0, "learning_rate": 2.750475028093554e-06, "loss": 15.1372, "step": 1725 }, { "epoch": 0.0989593784938222, "grad_norm": 0.0, "learning_rate": 2.74494940391949e-06, "loss": 15.2879, "step": 1726 }, { "epoch": 0.09901671301206892, "grad_norm": 0.0, "learning_rate": 2.7394272349418776e-06, "loss": 15.1674, "step": 1727 }, { "epoch": 0.09907404753031562, "grad_norm": 0.0, "learning_rate": 2.733908529621802e-06, "loss": 15.0526, "step": 1728 }, { "epoch": 0.09913138204856234, "grad_norm": 0.0, "learning_rate": 2.7283932964150417e-06, "loss": 15.5379, "step": 1729 }, { "epoch": 0.09918871656680905, "grad_norm": 0.0, "learning_rate": 2.7228815437720602e-06, "loss": 15.2825, "step": 1730 }, { "epoch": 0.09924605108505576, "grad_norm": 0.0, "learning_rate": 2.7173732801379805e-06, "loss": 15.0891, "step": 1731 }, { "epoch": 0.09930338560330247, "grad_norm": 0.0, "learning_rate": 2.711868513952587e-06, "loss": 15.1538, "step": 1732 }, { "epoch": 0.09936072012154917, "grad_norm": 0.0, "learning_rate": 2.7063672536502995e-06, "loss": 15.2978, "step": 1733 }, { "epoch": 0.09941805463979589, "grad_norm": 0.0, "learning_rate": 2.7008695076601693e-06, "loss": 14.8973, "step": 1734 }, { "epoch": 0.0994753891580426, "grad_norm": 0.0, "learning_rate": 2.69537528440586e-06, "loss": 15.4109, "step": 1735 }, { "epoch": 0.0995327236762893, "grad_norm": 0.0, "learning_rate": 2.6898845923056437e-06, "loss": 15.1761, "step": 1736 }, { "epoch": 0.09959005819453602, "grad_norm": 0.0, "learning_rate": 2.6843974397723736e-06, "loss": 14.8358, "step": 1737 }, { "epoch": 0.09964739271278274, "grad_norm": 0.0, "learning_rate": 2.6789138352134885e-06, "loss": 14.9992, "step": 1738 }, { "epoch": 0.09970472723102944, "grad_norm": 0.0, "learning_rate": 2.6734337870309844e-06, "loss": 15.0057, "step": 1739 }, { "epoch": 0.09976206174927615, "grad_norm": 0.0, "learning_rate": 2.6679573036214112e-06, "loss": 14.9869, "step": 1740 }, { "epoch": 0.09981939626752286, "grad_norm": 0.0, "learning_rate": 2.6624843933758547e-06, "loss": 15.4995, "step": 1741 }, { "epoch": 0.09987673078576957, "grad_norm": 0.0, "learning_rate": 2.6570150646799266e-06, "loss": 15.1863, "step": 1742 }, { "epoch": 0.09993406530401629, "grad_norm": 0.0, "learning_rate": 2.6515493259137546e-06, "loss": 15.227, "step": 1743 }, { "epoch": 0.09999139982226299, "grad_norm": 0.0, "learning_rate": 2.6460871854519594e-06, "loss": 14.8933, "step": 1744 }, { "epoch": 0.1000487343405097, "grad_norm": 0.0, "learning_rate": 2.6406286516636546e-06, "loss": 14.9753, "step": 1745 }, { "epoch": 0.10010606885875642, "grad_norm": 0.0, "learning_rate": 2.635173732912423e-06, "loss": 15.2712, "step": 1746 }, { "epoch": 0.10016340337700312, "grad_norm": 0.0, "learning_rate": 2.6297224375563126e-06, "loss": 15.0092, "step": 1747 }, { "epoch": 0.10022073789524984, "grad_norm": 0.0, "learning_rate": 2.6242747739478158e-06, "loss": 15.1965, "step": 1748 }, { "epoch": 0.10027807241349655, "grad_norm": 0.0, "learning_rate": 2.618830750433862e-06, "loss": 15.0236, "step": 1749 }, { "epoch": 0.10033540693174325, "grad_norm": 0.0, "learning_rate": 2.613390375355801e-06, "loss": 14.9518, "step": 1750 }, { "epoch": 0.10039274144998997, "grad_norm": 0.0, "learning_rate": 2.607953657049398e-06, "loss": 14.8813, "step": 1751 }, { "epoch": 0.10045007596823667, "grad_norm": 0.0, "learning_rate": 2.60252060384481e-06, "loss": 15.0054, "step": 1752 }, { "epoch": 0.10050741048648339, "grad_norm": 0.0, "learning_rate": 2.5970912240665815e-06, "loss": 14.9681, "step": 1753 }, { "epoch": 0.1005647450047301, "grad_norm": 0.0, "learning_rate": 2.591665526033628e-06, "loss": 14.9709, "step": 1754 }, { "epoch": 0.1006220795229768, "grad_norm": 0.0, "learning_rate": 2.5862435180592203e-06, "loss": 15.0781, "step": 1755 }, { "epoch": 0.10067941404122352, "grad_norm": 0.0, "learning_rate": 2.5808252084509784e-06, "loss": 14.9999, "step": 1756 }, { "epoch": 0.10073674855947023, "grad_norm": 0.0, "learning_rate": 2.575410605510858e-06, "loss": 15.0287, "step": 1757 }, { "epoch": 0.10079408307771694, "grad_norm": 0.0, "learning_rate": 2.5699997175351293e-06, "loss": 15.1299, "step": 1758 }, { "epoch": 0.10085141759596365, "grad_norm": 0.0, "learning_rate": 2.5645925528143778e-06, "loss": 14.9807, "step": 1759 }, { "epoch": 0.10090875211421035, "grad_norm": 0.0, "learning_rate": 2.559189119633476e-06, "loss": 15.2697, "step": 1760 }, { "epoch": 0.10096608663245707, "grad_norm": 0.0, "learning_rate": 2.553789426271588e-06, "loss": 15.1754, "step": 1761 }, { "epoch": 0.10102342115070378, "grad_norm": 0.0, "learning_rate": 2.54839348100214e-06, "loss": 15.2112, "step": 1762 }, { "epoch": 0.10108075566895049, "grad_norm": 0.0, "learning_rate": 2.543001292092819e-06, "loss": 15.0921, "step": 1763 }, { "epoch": 0.1011380901871972, "grad_norm": 0.0, "learning_rate": 2.5376128678055536e-06, "loss": 14.9949, "step": 1764 }, { "epoch": 0.10119542470544392, "grad_norm": 0.0, "learning_rate": 2.5322282163965096e-06, "loss": 15.0155, "step": 1765 }, { "epoch": 0.10125275922369062, "grad_norm": 0.0, "learning_rate": 2.5268473461160665e-06, "loss": 15.1644, "step": 1766 }, { "epoch": 0.10131009374193733, "grad_norm": 0.0, "learning_rate": 2.521470265208815e-06, "loss": 15.0194, "step": 1767 }, { "epoch": 0.10136742826018405, "grad_norm": 0.0, "learning_rate": 2.5160969819135368e-06, "loss": 14.571, "step": 1768 }, { "epoch": 0.10142476277843075, "grad_norm": 0.0, "learning_rate": 2.5107275044631942e-06, "loss": 15.3127, "step": 1769 }, { "epoch": 0.10148209729667747, "grad_norm": 0.0, "learning_rate": 2.5053618410849186e-06, "loss": 15.0523, "step": 1770 }, { "epoch": 0.10153943181492417, "grad_norm": 0.0, "learning_rate": 2.5000000000000015e-06, "loss": 15.1352, "step": 1771 }, { "epoch": 0.10159676633317088, "grad_norm": 0.0, "learning_rate": 2.4946419894238705e-06, "loss": 15.0326, "step": 1772 }, { "epoch": 0.1016541008514176, "grad_norm": 0.0, "learning_rate": 2.4892878175660927e-06, "loss": 15.1512, "step": 1773 }, { "epoch": 0.1017114353696643, "grad_norm": 0.0, "learning_rate": 2.483937492630345e-06, "loss": 15.2138, "step": 1774 }, { "epoch": 0.10176876988791102, "grad_norm": 0.0, "learning_rate": 2.47859102281442e-06, "loss": 15.042, "step": 1775 }, { "epoch": 0.10182610440615773, "grad_norm": 0.0, "learning_rate": 2.4732484163101896e-06, "loss": 15.1799, "step": 1776 }, { "epoch": 0.10188343892440443, "grad_norm": 0.0, "learning_rate": 2.4679096813036202e-06, "loss": 15.3713, "step": 1777 }, { "epoch": 0.10194077344265115, "grad_norm": 0.0, "learning_rate": 2.4625748259747363e-06, "loss": 14.9062, "step": 1778 }, { "epoch": 0.10199810796089785, "grad_norm": 0.0, "learning_rate": 2.457243858497626e-06, "loss": 15.33, "step": 1779 }, { "epoch": 0.10205544247914457, "grad_norm": 0.0, "learning_rate": 2.4519167870404126e-06, "loss": 15.1443, "step": 1780 }, { "epoch": 0.10211277699739128, "grad_norm": 0.0, "learning_rate": 2.4465936197652573e-06, "loss": 15.3425, "step": 1781 }, { "epoch": 0.10217011151563798, "grad_norm": 0.0, "learning_rate": 2.4412743648283343e-06, "loss": 14.8019, "step": 1782 }, { "epoch": 0.1022274460338847, "grad_norm": 0.0, "learning_rate": 2.4359590303798243e-06, "loss": 14.9075, "step": 1783 }, { "epoch": 0.10228478055213142, "grad_norm": 0.0, "learning_rate": 2.4306476245638995e-06, "loss": 15.0322, "step": 1784 }, { "epoch": 0.10234211507037812, "grad_norm": 0.0, "learning_rate": 2.4253401555187183e-06, "loss": 14.9531, "step": 1785 }, { "epoch": 0.10239944958862483, "grad_norm": 0.0, "learning_rate": 2.4200366313764e-06, "loss": 14.9875, "step": 1786 }, { "epoch": 0.10245678410687155, "grad_norm": 0.0, "learning_rate": 2.4147370602630267e-06, "loss": 14.8213, "step": 1787 }, { "epoch": 0.10251411862511825, "grad_norm": 0.0, "learning_rate": 2.4094414502986176e-06, "loss": 15.1506, "step": 1788 }, { "epoch": 0.10257145314336497, "grad_norm": 0.0, "learning_rate": 2.4041498095971253e-06, "loss": 14.9495, "step": 1789 }, { "epoch": 0.10262878766161167, "grad_norm": 0.0, "learning_rate": 2.398862146266418e-06, "loss": 15.2569, "step": 1790 }, { "epoch": 0.10268612217985838, "grad_norm": 0.0, "learning_rate": 2.3935784684082763e-06, "loss": 15.5546, "step": 1791 }, { "epoch": 0.1027434566981051, "grad_norm": 0.0, "learning_rate": 2.388298784118366e-06, "loss": 14.7149, "step": 1792 }, { "epoch": 0.1028007912163518, "grad_norm": 0.0, "learning_rate": 2.3830231014862415e-06, "loss": 15.0869, "step": 1793 }, { "epoch": 0.10285812573459852, "grad_norm": 0.0, "learning_rate": 2.3777514285953192e-06, "loss": 15.0755, "step": 1794 }, { "epoch": 0.10291546025284523, "grad_norm": 0.0, "learning_rate": 2.3724837735228773e-06, "loss": 14.853, "step": 1795 }, { "epoch": 0.10297279477109193, "grad_norm": 0.0, "learning_rate": 2.367220144340035e-06, "loss": 15.2218, "step": 1796 }, { "epoch": 0.10303012928933865, "grad_norm": 0.0, "learning_rate": 2.361960549111742e-06, "loss": 15.0583, "step": 1797 }, { "epoch": 0.10308746380758535, "grad_norm": 0.0, "learning_rate": 2.356704995896768e-06, "loss": 15.1339, "step": 1798 }, { "epoch": 0.10314479832583207, "grad_norm": 0.0, "learning_rate": 2.3514534927476935e-06, "loss": 15.0067, "step": 1799 }, { "epoch": 0.10320213284407878, "grad_norm": 0.0, "learning_rate": 2.3462060477108856e-06, "loss": 15.0885, "step": 1800 }, { "epoch": 0.10325946736232548, "grad_norm": 0.0, "learning_rate": 2.340962668826503e-06, "loss": 15.144, "step": 1801 }, { "epoch": 0.1033168018805722, "grad_norm": 0.0, "learning_rate": 2.3357233641284665e-06, "loss": 15.266, "step": 1802 }, { "epoch": 0.10337413639881891, "grad_norm": 0.0, "learning_rate": 2.330488141644457e-06, "loss": 15.1685, "step": 1803 }, { "epoch": 0.10343147091706562, "grad_norm": 0.0, "learning_rate": 2.3252570093959e-06, "loss": 15.1289, "step": 1804 }, { "epoch": 0.10348880543531233, "grad_norm": 0.0, "learning_rate": 2.320029975397957e-06, "loss": 15.1866, "step": 1805 }, { "epoch": 0.10354613995355905, "grad_norm": 0.0, "learning_rate": 2.314807047659506e-06, "loss": 15.1786, "step": 1806 }, { "epoch": 0.10360347447180575, "grad_norm": 0.0, "learning_rate": 2.309588234183137e-06, "loss": 14.7894, "step": 1807 }, { "epoch": 0.10366080899005246, "grad_norm": 0.0, "learning_rate": 2.304373542965132e-06, "loss": 15.1901, "step": 1808 }, { "epoch": 0.10371814350829917, "grad_norm": 0.0, "learning_rate": 2.2991629819954626e-06, "loss": 14.9909, "step": 1809 }, { "epoch": 0.10377547802654588, "grad_norm": 0.0, "learning_rate": 2.293956559257766e-06, "loss": 14.6973, "step": 1810 }, { "epoch": 0.1038328125447926, "grad_norm": 0.0, "learning_rate": 2.2887542827293424e-06, "loss": 15.1475, "step": 1811 }, { "epoch": 0.1038901470630393, "grad_norm": 0.0, "learning_rate": 2.2835561603811363e-06, "loss": 14.8105, "step": 1812 }, { "epoch": 0.10394748158128601, "grad_norm": 0.0, "learning_rate": 2.2783622001777322e-06, "loss": 14.9498, "step": 1813 }, { "epoch": 0.10400481609953273, "grad_norm": 0.0, "learning_rate": 2.2731724100773305e-06, "loss": 15.0761, "step": 1814 }, { "epoch": 0.10406215061777943, "grad_norm": 0.0, "learning_rate": 2.26798679803175e-06, "loss": 15.3239, "step": 1815 }, { "epoch": 0.10411948513602615, "grad_norm": 0.0, "learning_rate": 2.262805371986402e-06, "loss": 15.0212, "step": 1816 }, { "epoch": 0.10417681965427285, "grad_norm": 0.0, "learning_rate": 2.257628139880285e-06, "loss": 14.8911, "step": 1817 }, { "epoch": 0.10423415417251956, "grad_norm": 0.0, "learning_rate": 2.2524551096459703e-06, "loss": 15.3894, "step": 1818 }, { "epoch": 0.10429148869076628, "grad_norm": 0.0, "learning_rate": 2.247286289209597e-06, "loss": 15.4648, "step": 1819 }, { "epoch": 0.10434882320901298, "grad_norm": 0.0, "learning_rate": 2.242121686490847e-06, "loss": 15.2684, "step": 1820 }, { "epoch": 0.1044061577272597, "grad_norm": 0.0, "learning_rate": 2.236961309402945e-06, "loss": 15.3737, "step": 1821 }, { "epoch": 0.10446349224550641, "grad_norm": 0.0, "learning_rate": 2.231805165852637e-06, "loss": 14.9809, "step": 1822 }, { "epoch": 0.10452082676375311, "grad_norm": 0.0, "learning_rate": 2.2266532637401867e-06, "loss": 14.7812, "step": 1823 }, { "epoch": 0.10457816128199983, "grad_norm": 0.0, "learning_rate": 2.2215056109593547e-06, "loss": 15.1497, "step": 1824 }, { "epoch": 0.10463549580024654, "grad_norm": 0.0, "learning_rate": 2.216362215397393e-06, "loss": 14.7571, "step": 1825 }, { "epoch": 0.10469283031849325, "grad_norm": 0.0, "learning_rate": 2.2112230849350286e-06, "loss": 15.2152, "step": 1826 }, { "epoch": 0.10475016483673996, "grad_norm": 0.0, "learning_rate": 2.206088227446459e-06, "loss": 14.8404, "step": 1827 }, { "epoch": 0.10480749935498666, "grad_norm": 0.0, "learning_rate": 2.2009576507993273e-06, "loss": 14.8393, "step": 1828 }, { "epoch": 0.10486483387323338, "grad_norm": 0.0, "learning_rate": 2.1958313628547247e-06, "loss": 15.1098, "step": 1829 }, { "epoch": 0.1049221683914801, "grad_norm": 0.0, "learning_rate": 2.190709371467165e-06, "loss": 15.2116, "step": 1830 }, { "epoch": 0.1049795029097268, "grad_norm": 0.0, "learning_rate": 2.1855916844845827e-06, "loss": 15.1286, "step": 1831 }, { "epoch": 0.10503683742797351, "grad_norm": 0.0, "learning_rate": 2.180478309748313e-06, "loss": 15.0697, "step": 1832 }, { "epoch": 0.10509417194622023, "grad_norm": 0.0, "learning_rate": 2.175369255093091e-06, "loss": 14.8526, "step": 1833 }, { "epoch": 0.10515150646446693, "grad_norm": 0.0, "learning_rate": 2.1702645283470238e-06, "loss": 15.0791, "step": 1834 }, { "epoch": 0.10520884098271364, "grad_norm": 0.0, "learning_rate": 2.165164137331596e-06, "loss": 15.1782, "step": 1835 }, { "epoch": 0.10526617550096036, "grad_norm": 0.0, "learning_rate": 2.16006808986164e-06, "loss": 15.0698, "step": 1836 }, { "epoch": 0.10532351001920706, "grad_norm": 0.0, "learning_rate": 2.1549763937453445e-06, "loss": 15.0492, "step": 1837 }, { "epoch": 0.10538084453745378, "grad_norm": 0.0, "learning_rate": 2.1498890567842175e-06, "loss": 15.3157, "step": 1838 }, { "epoch": 0.10543817905570048, "grad_norm": 0.0, "learning_rate": 2.144806086773095e-06, "loss": 14.9877, "step": 1839 }, { "epoch": 0.1054955135739472, "grad_norm": 0.0, "learning_rate": 2.1397274915001254e-06, "loss": 15.2216, "step": 1840 }, { "epoch": 0.10555284809219391, "grad_norm": 0.0, "learning_rate": 2.1346532787467466e-06, "loss": 15.1515, "step": 1841 }, { "epoch": 0.10561018261044061, "grad_norm": 0.0, "learning_rate": 2.129583456287689e-06, "loss": 15.2111, "step": 1842 }, { "epoch": 0.10566751712868733, "grad_norm": 0.0, "learning_rate": 2.1245180318909482e-06, "loss": 14.9736, "step": 1843 }, { "epoch": 0.10572485164693404, "grad_norm": 0.0, "learning_rate": 2.119457013317789e-06, "loss": 15.1239, "step": 1844 }, { "epoch": 0.10578218616518074, "grad_norm": 0.0, "learning_rate": 2.11440040832272e-06, "loss": 15.1009, "step": 1845 }, { "epoch": 0.10583952068342746, "grad_norm": 0.0, "learning_rate": 2.1093482246534896e-06, "loss": 14.9144, "step": 1846 }, { "epoch": 0.10589685520167416, "grad_norm": 0.0, "learning_rate": 2.1043004700510694e-06, "loss": 15.2687, "step": 1847 }, { "epoch": 0.10595418971992088, "grad_norm": 0.0, "learning_rate": 2.0992571522496502e-06, "loss": 15.3205, "step": 1848 }, { "epoch": 0.10601152423816759, "grad_norm": 0.0, "learning_rate": 2.0942182789766174e-06, "loss": 15.1931, "step": 1849 }, { "epoch": 0.1060688587564143, "grad_norm": 0.0, "learning_rate": 2.0891838579525547e-06, "loss": 15.2253, "step": 1850 }, { "epoch": 0.10612619327466101, "grad_norm": 0.0, "learning_rate": 2.084153896891217e-06, "loss": 15.0989, "step": 1851 }, { "epoch": 0.10618352779290773, "grad_norm": 0.0, "learning_rate": 2.0791284034995296e-06, "loss": 15.1758, "step": 1852 }, { "epoch": 0.10624086231115443, "grad_norm": 0.0, "learning_rate": 2.074107385477568e-06, "loss": 15.0728, "step": 1853 }, { "epoch": 0.10629819682940114, "grad_norm": 0.0, "learning_rate": 2.0690908505185577e-06, "loss": 14.9472, "step": 1854 }, { "epoch": 0.10635553134764786, "grad_norm": 0.0, "learning_rate": 2.064078806308848e-06, "loss": 14.9618, "step": 1855 }, { "epoch": 0.10641286586589456, "grad_norm": 0.0, "learning_rate": 2.0590712605279135e-06, "loss": 14.966, "step": 1856 }, { "epoch": 0.10647020038414128, "grad_norm": 0.0, "learning_rate": 2.054068220848331e-06, "loss": 14.9155, "step": 1857 }, { "epoch": 0.10652753490238798, "grad_norm": 0.0, "learning_rate": 2.0490696949357774e-06, "loss": 14.9877, "step": 1858 }, { "epoch": 0.10658486942063469, "grad_norm": 0.0, "learning_rate": 2.0440756904490115e-06, "loss": 14.8584, "step": 1859 }, { "epoch": 0.10664220393888141, "grad_norm": 0.0, "learning_rate": 2.0390862150398637e-06, "loss": 15.2422, "step": 1860 }, { "epoch": 0.10669953845712811, "grad_norm": 0.0, "learning_rate": 2.0341012763532243e-06, "loss": 14.9152, "step": 1861 }, { "epoch": 0.10675687297537483, "grad_norm": 0.0, "learning_rate": 2.0291208820270368e-06, "loss": 15.1744, "step": 1862 }, { "epoch": 0.10681420749362154, "grad_norm": 0.0, "learning_rate": 2.024145039692277e-06, "loss": 14.7057, "step": 1863 }, { "epoch": 0.10687154201186824, "grad_norm": 0.0, "learning_rate": 2.0191737569729492e-06, "loss": 15.0924, "step": 1864 }, { "epoch": 0.10692887653011496, "grad_norm": 0.0, "learning_rate": 2.0142070414860704e-06, "loss": 14.8843, "step": 1865 }, { "epoch": 0.10698621104836166, "grad_norm": 0.0, "learning_rate": 2.009244900841658e-06, "loss": 14.9535, "step": 1866 }, { "epoch": 0.10704354556660838, "grad_norm": 0.0, "learning_rate": 2.004287342642721e-06, "loss": 15.2298, "step": 1867 }, { "epoch": 0.10710088008485509, "grad_norm": 0.0, "learning_rate": 1.9993343744852504e-06, "loss": 14.8105, "step": 1868 }, { "epoch": 0.10715821460310179, "grad_norm": 0.0, "learning_rate": 1.994386003958198e-06, "loss": 15.1573, "step": 1869 }, { "epoch": 0.10721554912134851, "grad_norm": 0.0, "learning_rate": 1.989442238643478e-06, "loss": 15.06, "step": 1870 }, { "epoch": 0.10727288363959522, "grad_norm": 0.0, "learning_rate": 1.9845030861159416e-06, "loss": 14.703, "step": 1871 }, { "epoch": 0.10733021815784193, "grad_norm": 0.0, "learning_rate": 1.9795685539433785e-06, "loss": 15.1016, "step": 1872 }, { "epoch": 0.10738755267608864, "grad_norm": 0.0, "learning_rate": 1.974638649686495e-06, "loss": 15.0317, "step": 1873 }, { "epoch": 0.10744488719433536, "grad_norm": 0.0, "learning_rate": 1.9697133808989084e-06, "loss": 14.9478, "step": 1874 }, { "epoch": 0.10750222171258206, "grad_norm": 0.0, "learning_rate": 1.9647927551271302e-06, "loss": 15.0411, "step": 1875 }, { "epoch": 0.10755955623082877, "grad_norm": 0.0, "learning_rate": 1.959876779910564e-06, "loss": 15.0192, "step": 1876 }, { "epoch": 0.10761689074907548, "grad_norm": 0.0, "learning_rate": 1.954965462781481e-06, "loss": 14.9757, "step": 1877 }, { "epoch": 0.10767422526732219, "grad_norm": 0.0, "learning_rate": 1.950058811265022e-06, "loss": 15.2199, "step": 1878 }, { "epoch": 0.1077315597855689, "grad_norm": 0.0, "learning_rate": 1.945156832879174e-06, "loss": 15.1254, "step": 1879 }, { "epoch": 0.10778889430381561, "grad_norm": 0.0, "learning_rate": 1.9402595351347656e-06, "loss": 15.0774, "step": 1880 }, { "epoch": 0.10784622882206232, "grad_norm": 0.0, "learning_rate": 1.935366925535452e-06, "loss": 14.932, "step": 1881 }, { "epoch": 0.10790356334030904, "grad_norm": 0.0, "learning_rate": 1.930479011577711e-06, "loss": 15.0582, "step": 1882 }, { "epoch": 0.10796089785855574, "grad_norm": 0.0, "learning_rate": 1.925595800750816e-06, "loss": 15.268, "step": 1883 }, { "epoch": 0.10801823237680246, "grad_norm": 0.0, "learning_rate": 1.920717300536846e-06, "loss": 15.4256, "step": 1884 }, { "epoch": 0.10807556689504916, "grad_norm": 0.0, "learning_rate": 1.91584351841065e-06, "loss": 14.9376, "step": 1885 }, { "epoch": 0.10813290141329587, "grad_norm": 0.0, "learning_rate": 1.9109744618398607e-06, "loss": 15.2782, "step": 1886 }, { "epoch": 0.10819023593154259, "grad_norm": 0.0, "learning_rate": 1.9061101382848567e-06, "loss": 14.8349, "step": 1887 }, { "epoch": 0.10824757044978929, "grad_norm": 0.0, "learning_rate": 1.9012505551987764e-06, "loss": 15.1059, "step": 1888 }, { "epoch": 0.108304904968036, "grad_norm": 0.0, "learning_rate": 1.8963957200274874e-06, "loss": 14.9271, "step": 1889 }, { "epoch": 0.10836223948628272, "grad_norm": 0.0, "learning_rate": 1.8915456402095883e-06, "loss": 15.0641, "step": 1890 }, { "epoch": 0.10841957400452942, "grad_norm": 0.0, "learning_rate": 1.8867003231763847e-06, "loss": 14.84, "step": 1891 }, { "epoch": 0.10847690852277614, "grad_norm": 0.0, "learning_rate": 1.8818597763518926e-06, "loss": 14.9408, "step": 1892 }, { "epoch": 0.10853424304102285, "grad_norm": 0.0, "learning_rate": 1.8770240071528117e-06, "loss": 15.1839, "step": 1893 }, { "epoch": 0.10859157755926956, "grad_norm": 0.0, "learning_rate": 1.872193022988526e-06, "loss": 15.2123, "step": 1894 }, { "epoch": 0.10864891207751627, "grad_norm": 0.0, "learning_rate": 1.8673668312610843e-06, "loss": 15.3868, "step": 1895 }, { "epoch": 0.10870624659576297, "grad_norm": 0.0, "learning_rate": 1.8625454393651976e-06, "loss": 14.9713, "step": 1896 }, { "epoch": 0.10876358111400969, "grad_norm": 0.0, "learning_rate": 1.8577288546882167e-06, "loss": 15.1364, "step": 1897 }, { "epoch": 0.1088209156322564, "grad_norm": 0.0, "learning_rate": 1.8529170846101318e-06, "loss": 15.0209, "step": 1898 }, { "epoch": 0.1088782501505031, "grad_norm": 0.0, "learning_rate": 1.8481101365035537e-06, "loss": 15.1795, "step": 1899 }, { "epoch": 0.10893558466874982, "grad_norm": 0.0, "learning_rate": 1.8433080177337043e-06, "loss": 15.1728, "step": 1900 }, { "epoch": 0.10899291918699654, "grad_norm": 0.0, "learning_rate": 1.8385107356584058e-06, "loss": 14.9725, "step": 1901 }, { "epoch": 0.10905025370524324, "grad_norm": 0.0, "learning_rate": 1.8337182976280731e-06, "loss": 15.1804, "step": 1902 }, { "epoch": 0.10910758822348995, "grad_norm": 0.0, "learning_rate": 1.8289307109856941e-06, "loss": 14.9713, "step": 1903 }, { "epoch": 0.10916492274173666, "grad_norm": 0.0, "learning_rate": 1.8241479830668291e-06, "loss": 15.0086, "step": 1904 }, { "epoch": 0.10922225725998337, "grad_norm": 0.0, "learning_rate": 1.8193701211995862e-06, "loss": 15.3655, "step": 1905 }, { "epoch": 0.10927959177823009, "grad_norm": 0.0, "learning_rate": 1.8145971327046274e-06, "loss": 14.9315, "step": 1906 }, { "epoch": 0.10933692629647679, "grad_norm": 0.0, "learning_rate": 1.8098290248951394e-06, "loss": 15.0965, "step": 1907 }, { "epoch": 0.1093942608147235, "grad_norm": 0.0, "learning_rate": 1.8050658050768338e-06, "loss": 15.2293, "step": 1908 }, { "epoch": 0.10945159533297022, "grad_norm": 0.0, "learning_rate": 1.8003074805479314e-06, "loss": 15.0582, "step": 1909 }, { "epoch": 0.10950892985121692, "grad_norm": 0.0, "learning_rate": 1.7955540585991577e-06, "loss": 15.4075, "step": 1910 }, { "epoch": 0.10956626436946364, "grad_norm": 0.0, "learning_rate": 1.7908055465137181e-06, "loss": 15.0865, "step": 1911 }, { "epoch": 0.10962359888771035, "grad_norm": 0.0, "learning_rate": 1.7860619515673034e-06, "loss": 14.9347, "step": 1912 }, { "epoch": 0.10968093340595705, "grad_norm": 0.0, "learning_rate": 1.781323281028065e-06, "loss": 15.1036, "step": 1913 }, { "epoch": 0.10973826792420377, "grad_norm": 0.0, "learning_rate": 1.7765895421566099e-06, "loss": 14.9977, "step": 1914 }, { "epoch": 0.10979560244245047, "grad_norm": 0.0, "learning_rate": 1.771860742205988e-06, "loss": 15.3883, "step": 1915 }, { "epoch": 0.10985293696069719, "grad_norm": 0.0, "learning_rate": 1.7671368884216873e-06, "loss": 15.2364, "step": 1916 }, { "epoch": 0.1099102714789439, "grad_norm": 0.0, "learning_rate": 1.7624179880416087e-06, "loss": 15.0877, "step": 1917 }, { "epoch": 0.1099676059971906, "grad_norm": 0.0, "learning_rate": 1.7577040482960723e-06, "loss": 14.7803, "step": 1918 }, { "epoch": 0.11002494051543732, "grad_norm": 0.0, "learning_rate": 1.7529950764077885e-06, "loss": 14.9745, "step": 1919 }, { "epoch": 0.11008227503368403, "grad_norm": 0.0, "learning_rate": 1.7482910795918655e-06, "loss": 15.092, "step": 1920 }, { "epoch": 0.11013960955193074, "grad_norm": 0.0, "learning_rate": 1.7435920650557808e-06, "loss": 15.1327, "step": 1921 }, { "epoch": 0.11019694407017745, "grad_norm": 0.0, "learning_rate": 1.7388980399993822e-06, "loss": 15.2231, "step": 1922 }, { "epoch": 0.11025427858842415, "grad_norm": 0.0, "learning_rate": 1.7342090116148684e-06, "loss": 15.0259, "step": 1923 }, { "epoch": 0.11031161310667087, "grad_norm": 0.0, "learning_rate": 1.7295249870867898e-06, "loss": 15.2957, "step": 1924 }, { "epoch": 0.11036894762491758, "grad_norm": 0.0, "learning_rate": 1.7248459735920203e-06, "loss": 15.0902, "step": 1925 }, { "epoch": 0.11042628214316429, "grad_norm": 0.0, "learning_rate": 1.720171978299766e-06, "loss": 15.0184, "step": 1926 }, { "epoch": 0.110483616661411, "grad_norm": 0.0, "learning_rate": 1.7155030083715362e-06, "loss": 14.5436, "step": 1927 }, { "epoch": 0.11054095117965772, "grad_norm": 0.0, "learning_rate": 1.7108390709611427e-06, "loss": 14.8727, "step": 1928 }, { "epoch": 0.11059828569790442, "grad_norm": 0.0, "learning_rate": 1.7061801732146865e-06, "loss": 14.7645, "step": 1929 }, { "epoch": 0.11065562021615113, "grad_norm": 0.0, "learning_rate": 1.7015263222705492e-06, "loss": 15.1675, "step": 1930 }, { "epoch": 0.11071295473439785, "grad_norm": 0.0, "learning_rate": 1.6968775252593745e-06, "loss": 15.1032, "step": 1931 }, { "epoch": 0.11077028925264455, "grad_norm": 0.0, "learning_rate": 1.692233789304069e-06, "loss": 15.134, "step": 1932 }, { "epoch": 0.11082762377089127, "grad_norm": 0.0, "learning_rate": 1.6875951215197779e-06, "loss": 15.1558, "step": 1933 }, { "epoch": 0.11088495828913797, "grad_norm": 0.0, "learning_rate": 1.682961529013889e-06, "loss": 14.7275, "step": 1934 }, { "epoch": 0.11094229280738468, "grad_norm": 0.0, "learning_rate": 1.6783330188860047e-06, "loss": 14.8734, "step": 1935 }, { "epoch": 0.1109996273256314, "grad_norm": 0.0, "learning_rate": 1.6737095982279444e-06, "loss": 15.6697, "step": 1936 }, { "epoch": 0.1110569618438781, "grad_norm": 0.0, "learning_rate": 1.669091274123732e-06, "loss": 15.2523, "step": 1937 }, { "epoch": 0.11111429636212482, "grad_norm": 0.0, "learning_rate": 1.6644780536495775e-06, "loss": 15.1574, "step": 1938 }, { "epoch": 0.11117163088037153, "grad_norm": 0.0, "learning_rate": 1.6598699438738764e-06, "loss": 15.0501, "step": 1939 }, { "epoch": 0.11122896539861823, "grad_norm": 0.0, "learning_rate": 1.6552669518571873e-06, "loss": 14.9379, "step": 1940 }, { "epoch": 0.11128629991686495, "grad_norm": 0.0, "learning_rate": 1.6506690846522338e-06, "loss": 14.8896, "step": 1941 }, { "epoch": 0.11134363443511167, "grad_norm": 0.0, "learning_rate": 1.646076349303884e-06, "loss": 14.9501, "step": 1942 }, { "epoch": 0.11140096895335837, "grad_norm": 0.0, "learning_rate": 1.6414887528491414e-06, "loss": 15.0372, "step": 1943 }, { "epoch": 0.11145830347160508, "grad_norm": 0.0, "learning_rate": 1.6369063023171367e-06, "loss": 15.1974, "step": 1944 }, { "epoch": 0.11151563798985178, "grad_norm": 0.0, "learning_rate": 1.6323290047291196e-06, "loss": 14.8393, "step": 1945 }, { "epoch": 0.1115729725080985, "grad_norm": 0.0, "learning_rate": 1.6277568670984384e-06, "loss": 15.2872, "step": 1946 }, { "epoch": 0.11163030702634522, "grad_norm": 0.0, "learning_rate": 1.623189896430542e-06, "loss": 15.1409, "step": 1947 }, { "epoch": 0.11168764154459192, "grad_norm": 0.0, "learning_rate": 1.618628099722957e-06, "loss": 15.1526, "step": 1948 }, { "epoch": 0.11174497606283863, "grad_norm": 0.0, "learning_rate": 1.6140714839652838e-06, "loss": 15.0921, "step": 1949 }, { "epoch": 0.11180231058108535, "grad_norm": 0.0, "learning_rate": 1.609520056139185e-06, "loss": 15.1327, "step": 1950 }, { "epoch": 0.11185964509933205, "grad_norm": 0.0, "learning_rate": 1.604973823218376e-06, "loss": 14.6891, "step": 1951 }, { "epoch": 0.11191697961757877, "grad_norm": 0.0, "learning_rate": 1.6004327921686086e-06, "loss": 15.1599, "step": 1952 }, { "epoch": 0.11197431413582547, "grad_norm": 0.0, "learning_rate": 1.5958969699476689e-06, "loss": 14.7748, "step": 1953 }, { "epoch": 0.11203164865407218, "grad_norm": 0.0, "learning_rate": 1.5913663635053578e-06, "loss": 14.9859, "step": 1954 }, { "epoch": 0.1120889831723189, "grad_norm": 0.0, "learning_rate": 1.5868409797834882e-06, "loss": 15.2427, "step": 1955 }, { "epoch": 0.1121463176905656, "grad_norm": 0.0, "learning_rate": 1.582320825715868e-06, "loss": 14.8814, "step": 1956 }, { "epoch": 0.11220365220881232, "grad_norm": 0.0, "learning_rate": 1.5778059082282932e-06, "loss": 15.1055, "step": 1957 }, { "epoch": 0.11226098672705903, "grad_norm": 0.0, "learning_rate": 1.573296234238534e-06, "loss": 15.1705, "step": 1958 }, { "epoch": 0.11231832124530573, "grad_norm": 0.0, "learning_rate": 1.5687918106563326e-06, "loss": 14.8643, "step": 1959 }, { "epoch": 0.11237565576355245, "grad_norm": 0.0, "learning_rate": 1.56429264438338e-06, "loss": 14.8809, "step": 1960 }, { "epoch": 0.11243299028179916, "grad_norm": 0.0, "learning_rate": 1.5597987423133166e-06, "loss": 14.9778, "step": 1961 }, { "epoch": 0.11249032480004587, "grad_norm": 0.0, "learning_rate": 1.5553101113317137e-06, "loss": 14.9691, "step": 1962 }, { "epoch": 0.11254765931829258, "grad_norm": 0.0, "learning_rate": 1.550826758316068e-06, "loss": 15.1386, "step": 1963 }, { "epoch": 0.11260499383653928, "grad_norm": 0.0, "learning_rate": 1.546348690135786e-06, "loss": 15.2445, "step": 1964 }, { "epoch": 0.112662328354786, "grad_norm": 0.0, "learning_rate": 1.5418759136521844e-06, "loss": 15.0999, "step": 1965 }, { "epoch": 0.11271966287303271, "grad_norm": 0.0, "learning_rate": 1.5374084357184621e-06, "loss": 15.0017, "step": 1966 }, { "epoch": 0.11277699739127942, "grad_norm": 0.0, "learning_rate": 1.5329462631797092e-06, "loss": 15.1215, "step": 1967 }, { "epoch": 0.11283433190952613, "grad_norm": 0.0, "learning_rate": 1.528489402872878e-06, "loss": 15.2549, "step": 1968 }, { "epoch": 0.11289166642777285, "grad_norm": 0.0, "learning_rate": 1.5240378616267887e-06, "loss": 14.8262, "step": 1969 }, { "epoch": 0.11294900094601955, "grad_norm": 0.0, "learning_rate": 1.5195916462621074e-06, "loss": 15.219, "step": 1970 }, { "epoch": 0.11300633546426626, "grad_norm": 0.0, "learning_rate": 1.5151507635913403e-06, "loss": 15.0064, "step": 1971 }, { "epoch": 0.11306366998251297, "grad_norm": 0.0, "learning_rate": 1.510715220418823e-06, "loss": 15.0459, "step": 1972 }, { "epoch": 0.11312100450075968, "grad_norm": 0.0, "learning_rate": 1.5062850235407118e-06, "loss": 14.745, "step": 1973 }, { "epoch": 0.1131783390190064, "grad_norm": 0.0, "learning_rate": 1.5018601797449683e-06, "loss": 15.1417, "step": 1974 }, { "epoch": 0.1132356735372531, "grad_norm": 0.0, "learning_rate": 1.4974406958113557e-06, "loss": 14.9677, "step": 1975 }, { "epoch": 0.11329300805549981, "grad_norm": 0.0, "learning_rate": 1.4930265785114224e-06, "loss": 15.0245, "step": 1976 }, { "epoch": 0.11335034257374653, "grad_norm": 0.0, "learning_rate": 1.4886178346084934e-06, "loss": 15.0466, "step": 1977 }, { "epoch": 0.11340767709199323, "grad_norm": 0.0, "learning_rate": 1.4842144708576606e-06, "loss": 15.0459, "step": 1978 }, { "epoch": 0.11346501161023995, "grad_norm": 0.0, "learning_rate": 1.4798164940057769e-06, "loss": 15.342, "step": 1979 }, { "epoch": 0.11352234612848666, "grad_norm": 0.0, "learning_rate": 1.4754239107914337e-06, "loss": 15.3308, "step": 1980 }, { "epoch": 0.11357968064673336, "grad_norm": 0.0, "learning_rate": 1.4710367279449662e-06, "loss": 15.0959, "step": 1981 }, { "epoch": 0.11363701516498008, "grad_norm": 0.0, "learning_rate": 1.4666549521884283e-06, "loss": 15.3118, "step": 1982 }, { "epoch": 0.11369434968322678, "grad_norm": 0.0, "learning_rate": 1.4622785902355967e-06, "loss": 15.0102, "step": 1983 }, { "epoch": 0.1137516842014735, "grad_norm": 0.0, "learning_rate": 1.457907648791943e-06, "loss": 15.2011, "step": 1984 }, { "epoch": 0.11380901871972021, "grad_norm": 0.0, "learning_rate": 1.4535421345546424e-06, "loss": 14.8329, "step": 1985 }, { "epoch": 0.11386635323796691, "grad_norm": 0.0, "learning_rate": 1.4491820542125495e-06, "loss": 14.8631, "step": 1986 }, { "epoch": 0.11392368775621363, "grad_norm": 0.0, "learning_rate": 1.4448274144461965e-06, "loss": 15.0568, "step": 1987 }, { "epoch": 0.11398102227446034, "grad_norm": 0.0, "learning_rate": 1.4404782219277758e-06, "loss": 14.9962, "step": 1988 }, { "epoch": 0.11403835679270705, "grad_norm": 0.0, "learning_rate": 1.4361344833211377e-06, "loss": 14.908, "step": 1989 }, { "epoch": 0.11409569131095376, "grad_norm": 0.0, "learning_rate": 1.431796205281773e-06, "loss": 14.9463, "step": 1990 }, { "epoch": 0.11415302582920046, "grad_norm": 0.0, "learning_rate": 1.4274633944568056e-06, "loss": 14.7642, "step": 1991 }, { "epoch": 0.11421036034744718, "grad_norm": 0.0, "learning_rate": 1.423136057484983e-06, "loss": 14.9915, "step": 1992 }, { "epoch": 0.1142676948656939, "grad_norm": 0.0, "learning_rate": 1.4188142009966689e-06, "loss": 15.1879, "step": 1993 }, { "epoch": 0.1143250293839406, "grad_norm": 0.0, "learning_rate": 1.414497831613823e-06, "loss": 15.253, "step": 1994 }, { "epoch": 0.11438236390218731, "grad_norm": 0.0, "learning_rate": 1.410186955950006e-06, "loss": 15.1298, "step": 1995 }, { "epoch": 0.11443969842043403, "grad_norm": 0.0, "learning_rate": 1.4058815806103542e-06, "loss": 15.3996, "step": 1996 }, { "epoch": 0.11449703293868073, "grad_norm": 0.0, "learning_rate": 1.4015817121915792e-06, "loss": 14.8465, "step": 1997 }, { "epoch": 0.11455436745692744, "grad_norm": 0.0, "learning_rate": 1.3972873572819535e-06, "loss": 15.2481, "step": 1998 }, { "epoch": 0.11461170197517416, "grad_norm": 0.0, "learning_rate": 1.3929985224613051e-06, "loss": 15.0481, "step": 1999 }, { "epoch": 0.11466903649342086, "grad_norm": 0.0, "learning_rate": 1.3887152143009992e-06, "loss": 15.2159, "step": 2000 }, { "epoch": 0.11472637101166758, "grad_norm": 0.0, "learning_rate": 1.384437439363938e-06, "loss": 15.0068, "step": 2001 }, { "epoch": 0.11478370552991428, "grad_norm": 0.0, "learning_rate": 1.3801652042045416e-06, "loss": 15.0704, "step": 2002 }, { "epoch": 0.114841040048161, "grad_norm": 0.0, "learning_rate": 1.3758985153687455e-06, "loss": 15.1483, "step": 2003 }, { "epoch": 0.11489837456640771, "grad_norm": 0.0, "learning_rate": 1.3716373793939841e-06, "loss": 14.9319, "step": 2004 }, { "epoch": 0.11495570908465441, "grad_norm": 0.0, "learning_rate": 1.367381802809185e-06, "loss": 14.7106, "step": 2005 }, { "epoch": 0.11501304360290113, "grad_norm": 0.0, "learning_rate": 1.3631317921347564e-06, "loss": 14.9806, "step": 2006 }, { "epoch": 0.11507037812114784, "grad_norm": 0.0, "learning_rate": 1.3588873538825825e-06, "loss": 14.9406, "step": 2007 }, { "epoch": 0.11512771263939454, "grad_norm": 0.0, "learning_rate": 1.3546484945560029e-06, "loss": 15.1008, "step": 2008 }, { "epoch": 0.11518504715764126, "grad_norm": 0.0, "learning_rate": 1.3504152206498167e-06, "loss": 15.1592, "step": 2009 }, { "epoch": 0.11524238167588796, "grad_norm": 0.0, "learning_rate": 1.3461875386502588e-06, "loss": 15.1066, "step": 2010 }, { "epoch": 0.11529971619413468, "grad_norm": 0.0, "learning_rate": 1.3419654550349987e-06, "loss": 15.0134, "step": 2011 }, { "epoch": 0.11535705071238139, "grad_norm": 0.0, "learning_rate": 1.3377489762731265e-06, "loss": 14.8637, "step": 2012 }, { "epoch": 0.1154143852306281, "grad_norm": 0.0, "learning_rate": 1.3335381088251481e-06, "loss": 14.944, "step": 2013 }, { "epoch": 0.11547171974887481, "grad_norm": 0.0, "learning_rate": 1.329332859142967e-06, "loss": 14.8632, "step": 2014 }, { "epoch": 0.11552905426712153, "grad_norm": 0.0, "learning_rate": 1.3251332336698847e-06, "loss": 15.0903, "step": 2015 }, { "epoch": 0.11558638878536823, "grad_norm": 0.0, "learning_rate": 1.320939238840579e-06, "loss": 15.032, "step": 2016 }, { "epoch": 0.11564372330361494, "grad_norm": 0.0, "learning_rate": 1.3167508810811058e-06, "loss": 15.1402, "step": 2017 }, { "epoch": 0.11570105782186166, "grad_norm": 0.0, "learning_rate": 1.3125681668088808e-06, "loss": 15.0755, "step": 2018 }, { "epoch": 0.11575839234010836, "grad_norm": 0.0, "learning_rate": 1.3083911024326735e-06, "loss": 14.8549, "step": 2019 }, { "epoch": 0.11581572685835508, "grad_norm": 0.0, "learning_rate": 1.3042196943525942e-06, "loss": 15.2336, "step": 2020 }, { "epoch": 0.11587306137660178, "grad_norm": 0.0, "learning_rate": 1.300053948960094e-06, "loss": 14.775, "step": 2021 }, { "epoch": 0.11593039589484849, "grad_norm": 0.0, "learning_rate": 1.295893872637937e-06, "loss": 15.0152, "step": 2022 }, { "epoch": 0.11598773041309521, "grad_norm": 0.0, "learning_rate": 1.2917394717602123e-06, "loss": 15.2695, "step": 2023 }, { "epoch": 0.11604506493134191, "grad_norm": 0.0, "learning_rate": 1.2875907526923043e-06, "loss": 14.9267, "step": 2024 }, { "epoch": 0.11610239944958863, "grad_norm": 0.0, "learning_rate": 1.283447721790896e-06, "loss": 14.9806, "step": 2025 }, { "epoch": 0.11615973396783534, "grad_norm": 0.0, "learning_rate": 1.2793103854039518e-06, "loss": 15.1366, "step": 2026 }, { "epoch": 0.11621706848608204, "grad_norm": 0.0, "learning_rate": 1.2751787498707158e-06, "loss": 15.0491, "step": 2027 }, { "epoch": 0.11627440300432876, "grad_norm": 0.0, "learning_rate": 1.271052821521692e-06, "loss": 14.899, "step": 2028 }, { "epoch": 0.11633173752257546, "grad_norm": 0.0, "learning_rate": 1.2669326066786458e-06, "loss": 15.222, "step": 2029 }, { "epoch": 0.11638907204082218, "grad_norm": 0.0, "learning_rate": 1.2628181116545813e-06, "loss": 14.9507, "step": 2030 }, { "epoch": 0.11644640655906889, "grad_norm": 0.0, "learning_rate": 1.2587093427537445e-06, "loss": 14.9697, "step": 2031 }, { "epoch": 0.11650374107731559, "grad_norm": 0.0, "learning_rate": 1.2546063062716069e-06, "loss": 15.2222, "step": 2032 }, { "epoch": 0.11656107559556231, "grad_norm": 0.0, "learning_rate": 1.2505090084948496e-06, "loss": 14.7064, "step": 2033 }, { "epoch": 0.11661841011380902, "grad_norm": 0.0, "learning_rate": 1.2464174557013724e-06, "loss": 14.6919, "step": 2034 }, { "epoch": 0.11667574463205573, "grad_norm": 0.0, "learning_rate": 1.242331654160263e-06, "loss": 15.0021, "step": 2035 }, { "epoch": 0.11673307915030244, "grad_norm": 0.0, "learning_rate": 1.2382516101318048e-06, "loss": 15.1285, "step": 2036 }, { "epoch": 0.11679041366854916, "grad_norm": 0.0, "learning_rate": 1.2341773298674515e-06, "loss": 15.1657, "step": 2037 }, { "epoch": 0.11684774818679586, "grad_norm": 0.0, "learning_rate": 1.2301088196098332e-06, "loss": 14.9391, "step": 2038 }, { "epoch": 0.11690508270504257, "grad_norm": 0.0, "learning_rate": 1.2260460855927342e-06, "loss": 15.1881, "step": 2039 }, { "epoch": 0.11696241722328928, "grad_norm": 0.0, "learning_rate": 1.22198913404109e-06, "loss": 14.8563, "step": 2040 }, { "epoch": 0.11701975174153599, "grad_norm": 0.0, "learning_rate": 1.2179379711709738e-06, "loss": 14.9152, "step": 2041 }, { "epoch": 0.1170770862597827, "grad_norm": 0.0, "learning_rate": 1.2138926031895953e-06, "loss": 15.2588, "step": 2042 }, { "epoch": 0.11713442077802941, "grad_norm": 0.0, "learning_rate": 1.2098530362952782e-06, "loss": 14.9143, "step": 2043 }, { "epoch": 0.11719175529627612, "grad_norm": 0.0, "learning_rate": 1.205819276677464e-06, "loss": 15.1479, "step": 2044 }, { "epoch": 0.11724908981452284, "grad_norm": 0.0, "learning_rate": 1.2017913305166922e-06, "loss": 14.8345, "step": 2045 }, { "epoch": 0.11730642433276954, "grad_norm": 0.0, "learning_rate": 1.1977692039845962e-06, "loss": 15.2254, "step": 2046 }, { "epoch": 0.11736375885101626, "grad_norm": 0.0, "learning_rate": 1.1937529032438905e-06, "loss": 15.1603, "step": 2047 }, { "epoch": 0.11742109336926297, "grad_norm": 0.0, "learning_rate": 1.1897424344483682e-06, "loss": 14.9226, "step": 2048 }, { "epoch": 0.11747842788750967, "grad_norm": 0.0, "learning_rate": 1.1857378037428807e-06, "loss": 15.1255, "step": 2049 }, { "epoch": 0.11753576240575639, "grad_norm": 0.0, "learning_rate": 1.1817390172633402e-06, "loss": 14.7665, "step": 2050 }, { "epoch": 0.11759309692400309, "grad_norm": 0.0, "learning_rate": 1.1777460811366992e-06, "loss": 15.0083, "step": 2051 }, { "epoch": 0.1176504314422498, "grad_norm": 0.0, "learning_rate": 1.1737590014809507e-06, "loss": 14.8567, "step": 2052 }, { "epoch": 0.11770776596049652, "grad_norm": 0.0, "learning_rate": 1.1697777844051105e-06, "loss": 14.8262, "step": 2053 }, { "epoch": 0.11776510047874322, "grad_norm": 0.0, "learning_rate": 1.165802436009214e-06, "loss": 15.0429, "step": 2054 }, { "epoch": 0.11782243499698994, "grad_norm": 0.0, "learning_rate": 1.1618329623843022e-06, "loss": 14.8701, "step": 2055 }, { "epoch": 0.11787976951523665, "grad_norm": 0.0, "learning_rate": 1.1578693696124193e-06, "loss": 15.0955, "step": 2056 }, { "epoch": 0.11793710403348336, "grad_norm": 0.0, "learning_rate": 1.1539116637665931e-06, "loss": 15.1814, "step": 2057 }, { "epoch": 0.11799443855173007, "grad_norm": 0.0, "learning_rate": 1.1499598509108383e-06, "loss": 14.7819, "step": 2058 }, { "epoch": 0.11805177306997677, "grad_norm": 0.0, "learning_rate": 1.1460139371001339e-06, "loss": 14.8229, "step": 2059 }, { "epoch": 0.11810910758822349, "grad_norm": 0.0, "learning_rate": 1.1420739283804234e-06, "loss": 15.1278, "step": 2060 }, { "epoch": 0.1181664421064702, "grad_norm": 0.0, "learning_rate": 1.1381398307886e-06, "loss": 14.9941, "step": 2061 }, { "epoch": 0.1182237766247169, "grad_norm": 0.0, "learning_rate": 1.1342116503525059e-06, "loss": 14.8314, "step": 2062 }, { "epoch": 0.11828111114296362, "grad_norm": 0.0, "learning_rate": 1.1302893930909086e-06, "loss": 14.9274, "step": 2063 }, { "epoch": 0.11833844566121034, "grad_norm": 0.0, "learning_rate": 1.1263730650135084e-06, "loss": 15.0834, "step": 2064 }, { "epoch": 0.11839578017945704, "grad_norm": 0.0, "learning_rate": 1.1224626721209141e-06, "loss": 14.7524, "step": 2065 }, { "epoch": 0.11845311469770375, "grad_norm": 0.0, "learning_rate": 1.1185582204046463e-06, "loss": 15.1774, "step": 2066 }, { "epoch": 0.11851044921595047, "grad_norm": 0.0, "learning_rate": 1.1146597158471172e-06, "loss": 14.9888, "step": 2067 }, { "epoch": 0.11856778373419717, "grad_norm": 0.0, "learning_rate": 1.1107671644216305e-06, "loss": 14.9197, "step": 2068 }, { "epoch": 0.11862511825244389, "grad_norm": 0.0, "learning_rate": 1.1068805720923647e-06, "loss": 15.1572, "step": 2069 }, { "epoch": 0.11868245277069059, "grad_norm": 0.0, "learning_rate": 1.1029999448143736e-06, "loss": 14.7622, "step": 2070 }, { "epoch": 0.1187397872889373, "grad_norm": 0.0, "learning_rate": 1.0991252885335651e-06, "loss": 14.93, "step": 2071 }, { "epoch": 0.11879712180718402, "grad_norm": 0.0, "learning_rate": 1.0952566091867044e-06, "loss": 14.7888, "step": 2072 }, { "epoch": 0.11885445632543072, "grad_norm": 0.0, "learning_rate": 1.0913939127013935e-06, "loss": 15.0324, "step": 2073 }, { "epoch": 0.11891179084367744, "grad_norm": 0.0, "learning_rate": 1.0875372049960697e-06, "loss": 14.7762, "step": 2074 }, { "epoch": 0.11896912536192415, "grad_norm": 0.0, "learning_rate": 1.0836864919799927e-06, "loss": 15.2926, "step": 2075 }, { "epoch": 0.11902645988017085, "grad_norm": 0.0, "learning_rate": 1.0798417795532411e-06, "loss": 14.7496, "step": 2076 }, { "epoch": 0.11908379439841757, "grad_norm": 0.0, "learning_rate": 1.0760030736066952e-06, "loss": 14.703, "step": 2077 }, { "epoch": 0.11914112891666427, "grad_norm": 0.0, "learning_rate": 1.0721703800220351e-06, "loss": 15.0809, "step": 2078 }, { "epoch": 0.11919846343491099, "grad_norm": 0.0, "learning_rate": 1.0683437046717248e-06, "loss": 14.8517, "step": 2079 }, { "epoch": 0.1192557979531577, "grad_norm": 0.0, "learning_rate": 1.064523053419015e-06, "loss": 15.1544, "step": 2080 }, { "epoch": 0.1193131324714044, "grad_norm": 0.0, "learning_rate": 1.060708432117914e-06, "loss": 14.813, "step": 2081 }, { "epoch": 0.11937046698965112, "grad_norm": 0.0, "learning_rate": 1.056899846613203e-06, "loss": 14.9166, "step": 2082 }, { "epoch": 0.11942780150789783, "grad_norm": 0.0, "learning_rate": 1.0530973027404073e-06, "loss": 15.1148, "step": 2083 }, { "epoch": 0.11948513602614454, "grad_norm": 0.0, "learning_rate": 1.0493008063258004e-06, "loss": 15.2666, "step": 2084 }, { "epoch": 0.11954247054439125, "grad_norm": 0.0, "learning_rate": 1.045510363186385e-06, "loss": 15.2038, "step": 2085 }, { "epoch": 0.11959980506263797, "grad_norm": 0.0, "learning_rate": 1.041725979129894e-06, "loss": 14.9586, "step": 2086 }, { "epoch": 0.11965713958088467, "grad_norm": 0.0, "learning_rate": 1.0379476599547728e-06, "loss": 15.0918, "step": 2087 }, { "epoch": 0.11971447409913138, "grad_norm": 0.0, "learning_rate": 1.0341754114501745e-06, "loss": 14.9972, "step": 2088 }, { "epoch": 0.11977180861737809, "grad_norm": 0.0, "learning_rate": 1.0304092393959513e-06, "loss": 14.9952, "step": 2089 }, { "epoch": 0.1198291431356248, "grad_norm": 0.0, "learning_rate": 1.0266491495626473e-06, "loss": 15.3989, "step": 2090 }, { "epoch": 0.11988647765387152, "grad_norm": 0.0, "learning_rate": 1.022895147711483e-06, "loss": 14.8923, "step": 2091 }, { "epoch": 0.11994381217211822, "grad_norm": 0.0, "learning_rate": 1.0191472395943552e-06, "loss": 14.705, "step": 2092 }, { "epoch": 0.12000114669036493, "grad_norm": 0.0, "learning_rate": 1.0154054309538214e-06, "loss": 14.9932, "step": 2093 }, { "epoch": 0.12005848120861165, "grad_norm": 0.0, "learning_rate": 1.011669727523094e-06, "loss": 15.036, "step": 2094 }, { "epoch": 0.12011581572685835, "grad_norm": 0.0, "learning_rate": 1.0079401350260288e-06, "loss": 14.8307, "step": 2095 }, { "epoch": 0.12017315024510507, "grad_norm": 0.0, "learning_rate": 1.0042166591771235e-06, "loss": 15.046, "step": 2096 }, { "epoch": 0.12023048476335177, "grad_norm": 0.0, "learning_rate": 1.0004993056814987e-06, "loss": 15.1879, "step": 2097 }, { "epoch": 0.12028781928159848, "grad_norm": 0.0, "learning_rate": 9.967880802348989e-07, "loss": 15.0036, "step": 2098 }, { "epoch": 0.1203451537998452, "grad_norm": 0.0, "learning_rate": 9.930829885236737e-07, "loss": 14.8795, "step": 2099 }, { "epoch": 0.1204024883180919, "grad_norm": 0.0, "learning_rate": 9.893840362247809e-07, "loss": 14.789, "step": 2100 }, { "epoch": 0.12045982283633862, "grad_norm": 0.0, "learning_rate": 9.856912290057668e-07, "loss": 14.9005, "step": 2101 }, { "epoch": 0.12051715735458533, "grad_norm": 0.0, "learning_rate": 9.82004572524763e-07, "loss": 14.9725, "step": 2102 }, { "epoch": 0.12057449187283203, "grad_norm": 0.0, "learning_rate": 9.78324072430477e-07, "loss": 14.8546, "step": 2103 }, { "epoch": 0.12063182639107875, "grad_norm": 0.0, "learning_rate": 9.746497343621857e-07, "loss": 14.8822, "step": 2104 }, { "epoch": 0.12068916090932547, "grad_norm": 0.0, "learning_rate": 9.709815639497211e-07, "loss": 14.9052, "step": 2105 }, { "epoch": 0.12074649542757217, "grad_norm": 0.0, "learning_rate": 9.673195668134693e-07, "loss": 15.2188, "step": 2106 }, { "epoch": 0.12080382994581888, "grad_norm": 0.0, "learning_rate": 9.63663748564353e-07, "loss": 15.071, "step": 2107 }, { "epoch": 0.12086116446406558, "grad_norm": 0.0, "learning_rate": 9.600141148038312e-07, "loss": 14.954, "step": 2108 }, { "epoch": 0.1209184989823123, "grad_norm": 0.0, "learning_rate": 9.563706711238829e-07, "loss": 14.9532, "step": 2109 }, { "epoch": 0.12097583350055902, "grad_norm": 0.0, "learning_rate": 9.527334231070084e-07, "loss": 14.8819, "step": 2110 }, { "epoch": 0.12103316801880572, "grad_norm": 0.0, "learning_rate": 9.491023763262097e-07, "loss": 14.8817, "step": 2111 }, { "epoch": 0.12109050253705243, "grad_norm": 0.0, "learning_rate": 9.454775363449914e-07, "loss": 15.0113, "step": 2112 }, { "epoch": 0.12114783705529915, "grad_norm": 0.0, "learning_rate": 9.418589087173441e-07, "loss": 15.0403, "step": 2113 }, { "epoch": 0.12120517157354585, "grad_norm": 0.0, "learning_rate": 9.382464989877437e-07, "loss": 14.8847, "step": 2114 }, { "epoch": 0.12126250609179257, "grad_norm": 0.0, "learning_rate": 9.346403126911364e-07, "loss": 15.1507, "step": 2115 }, { "epoch": 0.12131984061003927, "grad_norm": 0.0, "learning_rate": 9.310403553529335e-07, "loss": 15.09, "step": 2116 }, { "epoch": 0.12137717512828598, "grad_norm": 0.0, "learning_rate": 9.274466324890002e-07, "loss": 14.8794, "step": 2117 }, { "epoch": 0.1214345096465327, "grad_norm": 0.0, "learning_rate": 9.238591496056548e-07, "loss": 14.9661, "step": 2118 }, { "epoch": 0.1214918441647794, "grad_norm": 0.0, "learning_rate": 9.20277912199648e-07, "loss": 14.7049, "step": 2119 }, { "epoch": 0.12154917868302612, "grad_norm": 0.0, "learning_rate": 9.167029257581672e-07, "loss": 15.0118, "step": 2120 }, { "epoch": 0.12160651320127283, "grad_norm": 0.0, "learning_rate": 9.131341957588185e-07, "loss": 14.9982, "step": 2121 }, { "epoch": 0.12166384771951953, "grad_norm": 0.0, "learning_rate": 9.095717276696214e-07, "loss": 14.9866, "step": 2122 }, { "epoch": 0.12172118223776625, "grad_norm": 0.0, "learning_rate": 9.060155269490012e-07, "loss": 14.8572, "step": 2123 }, { "epoch": 0.12177851675601296, "grad_norm": 0.0, "learning_rate": 9.024655990457842e-07, "loss": 14.8884, "step": 2124 }, { "epoch": 0.12183585127425967, "grad_norm": 0.0, "learning_rate": 8.989219493991791e-07, "loss": 15.0148, "step": 2125 }, { "epoch": 0.12189318579250638, "grad_norm": 0.0, "learning_rate": 8.953845834387814e-07, "loss": 15.0696, "step": 2126 }, { "epoch": 0.12195052031075308, "grad_norm": 0.0, "learning_rate": 8.918535065845529e-07, "loss": 15.0508, "step": 2127 }, { "epoch": 0.1220078548289998, "grad_norm": 0.0, "learning_rate": 8.883287242468242e-07, "loss": 14.9761, "step": 2128 }, { "epoch": 0.12206518934724651, "grad_norm": 0.0, "learning_rate": 8.8481024182628e-07, "loss": 14.9608, "step": 2129 }, { "epoch": 0.12212252386549322, "grad_norm": 0.0, "learning_rate": 8.812980647139468e-07, "loss": 14.5928, "step": 2130 }, { "epoch": 0.12217985838373993, "grad_norm": 0.0, "learning_rate": 8.777921982911996e-07, "loss": 15.2196, "step": 2131 }, { "epoch": 0.12223719290198665, "grad_norm": 0.0, "learning_rate": 8.742926479297354e-07, "loss": 15.2472, "step": 2132 }, { "epoch": 0.12229452742023335, "grad_norm": 0.0, "learning_rate": 8.707994189915808e-07, "loss": 14.9897, "step": 2133 }, { "epoch": 0.12235186193848006, "grad_norm": 0.0, "learning_rate": 8.673125168290713e-07, "loss": 15.0112, "step": 2134 }, { "epoch": 0.12240919645672678, "grad_norm": 0.0, "learning_rate": 8.638319467848522e-07, "loss": 15.0721, "step": 2135 }, { "epoch": 0.12246653097497348, "grad_norm": 0.0, "learning_rate": 8.603577141918646e-07, "loss": 15.0848, "step": 2136 }, { "epoch": 0.1225238654932202, "grad_norm": 0.0, "learning_rate": 8.568898243733398e-07, "loss": 14.8995, "step": 2137 }, { "epoch": 0.1225812000114669, "grad_norm": 0.0, "learning_rate": 8.534282826427892e-07, "loss": 14.8245, "step": 2138 }, { "epoch": 0.12263853452971361, "grad_norm": 0.0, "learning_rate": 8.499730943040024e-07, "loss": 15.1215, "step": 2139 }, { "epoch": 0.12269586904796033, "grad_norm": 0.0, "learning_rate": 8.46524264651028e-07, "loss": 14.826, "step": 2140 }, { "epoch": 0.12275320356620703, "grad_norm": 0.0, "learning_rate": 8.430817989681773e-07, "loss": 14.9976, "step": 2141 }, { "epoch": 0.12281053808445375, "grad_norm": 0.0, "learning_rate": 8.396457025300075e-07, "loss": 14.9849, "step": 2142 }, { "epoch": 0.12286787260270046, "grad_norm": 0.0, "learning_rate": 8.362159806013176e-07, "loss": 15.114, "step": 2143 }, { "epoch": 0.12292520712094716, "grad_norm": 0.0, "learning_rate": 8.327926384371382e-07, "loss": 15.1518, "step": 2144 }, { "epoch": 0.12298254163919388, "grad_norm": 0.0, "learning_rate": 8.293756812827286e-07, "loss": 15.0122, "step": 2145 }, { "epoch": 0.12303987615744058, "grad_norm": 0.0, "learning_rate": 8.259651143735603e-07, "loss": 15.0731, "step": 2146 }, { "epoch": 0.1230972106756873, "grad_norm": 0.0, "learning_rate": 8.225609429353187e-07, "loss": 14.6555, "step": 2147 }, { "epoch": 0.12315454519393401, "grad_norm": 0.0, "learning_rate": 8.191631721838838e-07, "loss": 14.8526, "step": 2148 }, { "epoch": 0.12321187971218071, "grad_norm": 0.0, "learning_rate": 8.157718073253351e-07, "loss": 15.1423, "step": 2149 }, { "epoch": 0.12326921423042743, "grad_norm": 0.0, "learning_rate": 8.123868535559326e-07, "loss": 14.725, "step": 2150 }, { "epoch": 0.12332654874867414, "grad_norm": 0.0, "learning_rate": 8.090083160621138e-07, "loss": 14.9794, "step": 2151 }, { "epoch": 0.12338388326692085, "grad_norm": 0.0, "learning_rate": 8.056362000204848e-07, "loss": 15.2893, "step": 2152 }, { "epoch": 0.12344121778516756, "grad_norm": 0.0, "learning_rate": 8.022705105978163e-07, "loss": 14.8075, "step": 2153 }, { "epoch": 0.12349855230341428, "grad_norm": 0.0, "learning_rate": 7.989112529510268e-07, "loss": 14.8811, "step": 2154 }, { "epoch": 0.12355588682166098, "grad_norm": 0.0, "learning_rate": 7.955584322271853e-07, "loss": 15.002, "step": 2155 }, { "epoch": 0.1236132213399077, "grad_norm": 0.0, "learning_rate": 7.922120535634937e-07, "loss": 14.9995, "step": 2156 }, { "epoch": 0.1236705558581544, "grad_norm": 0.0, "learning_rate": 7.888721220872858e-07, "loss": 15.032, "step": 2157 }, { "epoch": 0.12372789037640111, "grad_norm": 0.0, "learning_rate": 7.85538642916015e-07, "loss": 14.9032, "step": 2158 }, { "epoch": 0.12378522489464783, "grad_norm": 0.0, "learning_rate": 7.822116211572522e-07, "loss": 15.1257, "step": 2159 }, { "epoch": 0.12384255941289453, "grad_norm": 0.0, "learning_rate": 7.788910619086693e-07, "loss": 15.0515, "step": 2160 }, { "epoch": 0.12389989393114124, "grad_norm": 0.0, "learning_rate": 7.755769702580412e-07, "loss": 15.1874, "step": 2161 }, { "epoch": 0.12395722844938796, "grad_norm": 0.0, "learning_rate": 7.722693512832291e-07, "loss": 15.0487, "step": 2162 }, { "epoch": 0.12401456296763466, "grad_norm": 0.0, "learning_rate": 7.689682100521795e-07, "loss": 15.1177, "step": 2163 }, { "epoch": 0.12407189748588138, "grad_norm": 0.0, "learning_rate": 7.656735516229125e-07, "loss": 14.875, "step": 2164 }, { "epoch": 0.12412923200412808, "grad_norm": 0.0, "learning_rate": 7.62385381043515e-07, "loss": 15.0821, "step": 2165 }, { "epoch": 0.1241865665223748, "grad_norm": 0.0, "learning_rate": 7.59103703352132e-07, "loss": 15.0575, "step": 2166 }, { "epoch": 0.12424390104062151, "grad_norm": 0.0, "learning_rate": 7.558285235769647e-07, "loss": 14.8869, "step": 2167 }, { "epoch": 0.12430123555886821, "grad_norm": 0.0, "learning_rate": 7.525598467362516e-07, "loss": 14.7568, "step": 2168 }, { "epoch": 0.12435857007711493, "grad_norm": 0.0, "learning_rate": 7.492976778382743e-07, "loss": 15.2972, "step": 2169 }, { "epoch": 0.12441590459536164, "grad_norm": 0.0, "learning_rate": 7.46042021881338e-07, "loss": 14.8084, "step": 2170 }, { "epoch": 0.12447323911360834, "grad_norm": 0.0, "learning_rate": 7.427928838537695e-07, "loss": 14.9452, "step": 2171 }, { "epoch": 0.12453057363185506, "grad_norm": 0.0, "learning_rate": 7.395502687339096e-07, "loss": 14.9905, "step": 2172 }, { "epoch": 0.12458790815010178, "grad_norm": 0.0, "learning_rate": 7.363141814901054e-07, "loss": 14.9325, "step": 2173 }, { "epoch": 0.12464524266834848, "grad_norm": 0.0, "learning_rate": 7.330846270807002e-07, "loss": 15.1709, "step": 2174 }, { "epoch": 0.12470257718659519, "grad_norm": 0.0, "learning_rate": 7.298616104540302e-07, "loss": 15.2077, "step": 2175 }, { "epoch": 0.1247599117048419, "grad_norm": 0.0, "learning_rate": 7.266451365484106e-07, "loss": 14.9368, "step": 2176 }, { "epoch": 0.12481724622308861, "grad_norm": 0.0, "learning_rate": 7.23435210292136e-07, "loss": 14.8902, "step": 2177 }, { "epoch": 0.12487458074133533, "grad_norm": 0.0, "learning_rate": 7.202318366034666e-07, "loss": 15.1098, "step": 2178 }, { "epoch": 0.12493191525958203, "grad_norm": 0.0, "learning_rate": 7.170350203906218e-07, "loss": 14.7791, "step": 2179 }, { "epoch": 0.12498924977782874, "grad_norm": 0.0, "learning_rate": 7.138447665517739e-07, "loss": 15.006, "step": 2180 }, { "epoch": 0.12504658429607546, "grad_norm": 0.0, "learning_rate": 7.106610799750435e-07, "loss": 15.0605, "step": 2181 }, { "epoch": 0.12510391881432217, "grad_norm": 0.0, "learning_rate": 7.074839655384835e-07, "loss": 15.1214, "step": 2182 }, { "epoch": 0.12516125333256886, "grad_norm": 0.0, "learning_rate": 7.043134281100827e-07, "loss": 15.0752, "step": 2183 }, { "epoch": 0.12521858785081558, "grad_norm": 0.0, "learning_rate": 7.01149472547748e-07, "loss": 14.9167, "step": 2184 }, { "epoch": 0.1252759223690623, "grad_norm": 0.0, "learning_rate": 6.979921036993042e-07, "loss": 14.9749, "step": 2185 }, { "epoch": 0.125333256887309, "grad_norm": 0.0, "learning_rate": 6.948413264024806e-07, "loss": 14.8556, "step": 2186 }, { "epoch": 0.12539059140555572, "grad_norm": 0.0, "learning_rate": 6.91697145484912e-07, "loss": 14.9484, "step": 2187 }, { "epoch": 0.12544792592380244, "grad_norm": 0.0, "learning_rate": 6.885595657641214e-07, "loss": 14.9618, "step": 2188 }, { "epoch": 0.12550526044204913, "grad_norm": 0.0, "learning_rate": 6.854285920475217e-07, "loss": 15.0715, "step": 2189 }, { "epoch": 0.12556259496029584, "grad_norm": 0.0, "learning_rate": 6.823042291323995e-07, "loss": 14.9546, "step": 2190 }, { "epoch": 0.12561992947854256, "grad_norm": 0.0, "learning_rate": 6.791864818059179e-07, "loss": 14.7551, "step": 2191 }, { "epoch": 0.12567726399678927, "grad_norm": 0.0, "learning_rate": 6.76075354845096e-07, "loss": 14.8915, "step": 2192 }, { "epoch": 0.125734598515036, "grad_norm": 0.0, "learning_rate": 6.729708530168172e-07, "loss": 14.911, "step": 2193 }, { "epoch": 0.12579193303328268, "grad_norm": 0.0, "learning_rate": 6.698729810778065e-07, "loss": 15.3383, "step": 2194 }, { "epoch": 0.1258492675515294, "grad_norm": 0.0, "learning_rate": 6.667817437746383e-07, "loss": 14.9373, "step": 2195 }, { "epoch": 0.1259066020697761, "grad_norm": 0.0, "learning_rate": 6.63697145843713e-07, "loss": 15.0106, "step": 2196 }, { "epoch": 0.12596393658802282, "grad_norm": 0.0, "learning_rate": 6.606191920112664e-07, "loss": 14.8685, "step": 2197 }, { "epoch": 0.12602127110626954, "grad_norm": 0.0, "learning_rate": 6.575478869933483e-07, "loss": 15.2732, "step": 2198 }, { "epoch": 0.12607860562451623, "grad_norm": 0.0, "learning_rate": 6.54483235495823e-07, "loss": 15.2462, "step": 2199 }, { "epoch": 0.12613594014276294, "grad_norm": 0.0, "learning_rate": 6.514252422143591e-07, "loss": 15.0648, "step": 2200 }, { "epoch": 0.12619327466100966, "grad_norm": 0.0, "learning_rate": 6.483739118344273e-07, "loss": 14.9853, "step": 2201 }, { "epoch": 0.12625060917925637, "grad_norm": 0.0, "learning_rate": 6.453292490312845e-07, "loss": 15.0055, "step": 2202 }, { "epoch": 0.1263079436975031, "grad_norm": 0.0, "learning_rate": 6.422912584699753e-07, "loss": 15.1227, "step": 2203 }, { "epoch": 0.1263652782157498, "grad_norm": 0.0, "learning_rate": 6.392599448053194e-07, "loss": 15.1982, "step": 2204 }, { "epoch": 0.1264226127339965, "grad_norm": 0.0, "learning_rate": 6.362353126819054e-07, "loss": 14.9656, "step": 2205 }, { "epoch": 0.1264799472522432, "grad_norm": 0.0, "learning_rate": 6.332173667340841e-07, "loss": 14.9772, "step": 2206 }, { "epoch": 0.12653728177048992, "grad_norm": 0.0, "learning_rate": 6.302061115859659e-07, "loss": 15.1111, "step": 2207 }, { "epoch": 0.12659461628873664, "grad_norm": 0.0, "learning_rate": 6.272015518514041e-07, "loss": 14.9165, "step": 2208 }, { "epoch": 0.12665195080698335, "grad_norm": 0.0, "learning_rate": 6.242036921339973e-07, "loss": 15.2089, "step": 2209 }, { "epoch": 0.12670928532523004, "grad_norm": 0.0, "learning_rate": 6.212125370270749e-07, "loss": 14.7921, "step": 2210 }, { "epoch": 0.12676661984347676, "grad_norm": 0.0, "learning_rate": 6.182280911136979e-07, "loss": 14.8548, "step": 2211 }, { "epoch": 0.12682395436172347, "grad_norm": 0.0, "learning_rate": 6.152503589666426e-07, "loss": 15.0428, "step": 2212 }, { "epoch": 0.1268812888799702, "grad_norm": 0.0, "learning_rate": 6.122793451484016e-07, "loss": 14.7792, "step": 2213 }, { "epoch": 0.1269386233982169, "grad_norm": 0.0, "learning_rate": 6.093150542111715e-07, "loss": 14.7011, "step": 2214 }, { "epoch": 0.12699595791646362, "grad_norm": 0.0, "learning_rate": 6.063574906968511e-07, "loss": 15.273, "step": 2215 }, { "epoch": 0.1270532924347103, "grad_norm": 0.0, "learning_rate": 6.03406659137028e-07, "loss": 14.7624, "step": 2216 }, { "epoch": 0.12711062695295702, "grad_norm": 0.0, "learning_rate": 6.004625640529788e-07, "loss": 15.0017, "step": 2217 }, { "epoch": 0.12716796147120374, "grad_norm": 0.0, "learning_rate": 5.975252099556544e-07, "loss": 15.2556, "step": 2218 }, { "epoch": 0.12722529598945045, "grad_norm": 0.0, "learning_rate": 5.9459460134568e-07, "loss": 15.125, "step": 2219 }, { "epoch": 0.12728263050769717, "grad_norm": 0.0, "learning_rate": 5.916707427133428e-07, "loss": 15.0283, "step": 2220 }, { "epoch": 0.12733996502594386, "grad_norm": 0.0, "learning_rate": 5.887536385385917e-07, "loss": 14.8537, "step": 2221 }, { "epoch": 0.12739729954419057, "grad_norm": 0.0, "learning_rate": 5.858432932910213e-07, "loss": 14.9437, "step": 2222 }, { "epoch": 0.1274546340624373, "grad_norm": 0.0, "learning_rate": 5.829397114298762e-07, "loss": 14.8582, "step": 2223 }, { "epoch": 0.127511968580684, "grad_norm": 0.0, "learning_rate": 5.800428974040311e-07, "loss": 14.9024, "step": 2224 }, { "epoch": 0.12756930309893072, "grad_norm": 0.0, "learning_rate": 5.771528556519978e-07, "loss": 14.8501, "step": 2225 }, { "epoch": 0.12762663761717744, "grad_norm": 0.0, "learning_rate": 5.742695906019075e-07, "loss": 14.8616, "step": 2226 }, { "epoch": 0.12768397213542412, "grad_norm": 0.0, "learning_rate": 5.713931066715078e-07, "loss": 14.6437, "step": 2227 }, { "epoch": 0.12774130665367084, "grad_norm": 0.0, "learning_rate": 5.685234082681584e-07, "loss": 14.9212, "step": 2228 }, { "epoch": 0.12779864117191755, "grad_norm": 0.0, "learning_rate": 5.656604997888204e-07, "loss": 15.1091, "step": 2229 }, { "epoch": 0.12785597569016427, "grad_norm": 0.0, "learning_rate": 5.628043856200543e-07, "loss": 14.866, "step": 2230 }, { "epoch": 0.12791331020841099, "grad_norm": 0.0, "learning_rate": 5.599550701380058e-07, "loss": 15.0584, "step": 2231 }, { "epoch": 0.12797064472665767, "grad_norm": 0.0, "learning_rate": 5.57112557708408e-07, "loss": 14.9854, "step": 2232 }, { "epoch": 0.1280279792449044, "grad_norm": 0.0, "learning_rate": 5.542768526865678e-07, "loss": 15.1223, "step": 2233 }, { "epoch": 0.1280853137631511, "grad_norm": 0.0, "learning_rate": 5.514479594173622e-07, "loss": 14.8802, "step": 2234 }, { "epoch": 0.12814264828139782, "grad_norm": 0.0, "learning_rate": 5.486258822352308e-07, "loss": 14.9911, "step": 2235 }, { "epoch": 0.12819998279964454, "grad_norm": 0.0, "learning_rate": 5.458106254641715e-07, "loss": 14.9738, "step": 2236 }, { "epoch": 0.12825731731789122, "grad_norm": 0.0, "learning_rate": 5.430021934177293e-07, "loss": 15.1995, "step": 2237 }, { "epoch": 0.12831465183613794, "grad_norm": 0.0, "learning_rate": 5.402005903989955e-07, "loss": 15.234, "step": 2238 }, { "epoch": 0.12837198635438465, "grad_norm": 0.0, "learning_rate": 5.374058207005945e-07, "loss": 15.0916, "step": 2239 }, { "epoch": 0.12842932087263137, "grad_norm": 0.0, "learning_rate": 5.346178886046832e-07, "loss": 15.0225, "step": 2240 }, { "epoch": 0.12848665539087809, "grad_norm": 0.0, "learning_rate": 5.318367983829393e-07, "loss": 15.1548, "step": 2241 }, { "epoch": 0.1285439899091248, "grad_norm": 0.0, "learning_rate": 5.290625542965611e-07, "loss": 15.063, "step": 2242 }, { "epoch": 0.1286013244273715, "grad_norm": 0.0, "learning_rate": 5.262951605962536e-07, "loss": 14.7698, "step": 2243 }, { "epoch": 0.1286586589456182, "grad_norm": 0.0, "learning_rate": 5.235346215222281e-07, "loss": 15.1815, "step": 2244 }, { "epoch": 0.12871599346386492, "grad_norm": 0.0, "learning_rate": 5.207809413041914e-07, "loss": 15.076, "step": 2245 }, { "epoch": 0.12877332798211164, "grad_norm": 0.0, "learning_rate": 5.180341241613434e-07, "loss": 14.6274, "step": 2246 }, { "epoch": 0.12883066250035835, "grad_norm": 0.0, "learning_rate": 5.152941743023665e-07, "loss": 14.8674, "step": 2247 }, { "epoch": 0.12888799701860504, "grad_norm": 0.0, "learning_rate": 5.125610959254213e-07, "loss": 14.8717, "step": 2248 }, { "epoch": 0.12894533153685175, "grad_norm": 0.0, "learning_rate": 5.098348932181385e-07, "loss": 14.6858, "step": 2249 }, { "epoch": 0.12900266605509847, "grad_norm": 0.0, "learning_rate": 5.071155703576181e-07, "loss": 15.1964, "step": 2250 }, { "epoch": 0.12906000057334518, "grad_norm": 0.0, "learning_rate": 5.044031315104136e-07, "loss": 15.1815, "step": 2251 }, { "epoch": 0.1291173350915919, "grad_norm": 0.0, "learning_rate": 5.016975808325364e-07, "loss": 15.2373, "step": 2252 }, { "epoch": 0.12917466960983862, "grad_norm": 0.0, "learning_rate": 4.989989224694386e-07, "loss": 15.0961, "step": 2253 }, { "epoch": 0.1292320041280853, "grad_norm": 0.0, "learning_rate": 4.963071605560144e-07, "loss": 15.0816, "step": 2254 }, { "epoch": 0.12928933864633202, "grad_norm": 0.0, "learning_rate": 4.936222992165907e-07, "loss": 14.9343, "step": 2255 }, { "epoch": 0.12934667316457873, "grad_norm": 0.0, "learning_rate": 4.909443425649218e-07, "loss": 15.037, "step": 2256 }, { "epoch": 0.12940400768282545, "grad_norm": 0.0, "learning_rate": 4.882732947041818e-07, "loss": 15.021, "step": 2257 }, { "epoch": 0.12946134220107217, "grad_norm": 0.0, "learning_rate": 4.856091597269613e-07, "loss": 15.1118, "step": 2258 }, { "epoch": 0.12951867671931885, "grad_norm": 0.0, "learning_rate": 4.829519417152545e-07, "loss": 15.146, "step": 2259 }, { "epoch": 0.12957601123756557, "grad_norm": 0.0, "learning_rate": 4.803016447404629e-07, "loss": 15.2024, "step": 2260 }, { "epoch": 0.12963334575581228, "grad_norm": 0.0, "learning_rate": 4.7765827286338e-07, "loss": 14.5083, "step": 2261 }, { "epoch": 0.129690680274059, "grad_norm": 0.0, "learning_rate": 4.7502183013418854e-07, "loss": 15.0645, "step": 2262 }, { "epoch": 0.12974801479230572, "grad_norm": 0.0, "learning_rate": 4.723923205924558e-07, "loss": 15.1787, "step": 2263 }, { "epoch": 0.12980534931055243, "grad_norm": 0.0, "learning_rate": 4.6976974826712596e-07, "loss": 15.0671, "step": 2264 }, { "epoch": 0.12986268382879912, "grad_norm": 0.0, "learning_rate": 4.671541171765126e-07, "loss": 14.8042, "step": 2265 }, { "epoch": 0.12992001834704583, "grad_norm": 0.0, "learning_rate": 4.6454543132829653e-07, "loss": 14.8989, "step": 2266 }, { "epoch": 0.12997735286529255, "grad_norm": 0.0, "learning_rate": 4.619436947195144e-07, "loss": 14.8098, "step": 2267 }, { "epoch": 0.13003468738353927, "grad_norm": 0.0, "learning_rate": 4.593489113365562e-07, "loss": 14.8988, "step": 2268 }, { "epoch": 0.13009202190178598, "grad_norm": 0.0, "learning_rate": 4.5676108515515684e-07, "loss": 14.6229, "step": 2269 }, { "epoch": 0.13014935642003267, "grad_norm": 0.0, "learning_rate": 4.541802201403955e-07, "loss": 14.9352, "step": 2270 }, { "epoch": 0.13020669093827938, "grad_norm": 0.0, "learning_rate": 4.516063202466797e-07, "loss": 14.9318, "step": 2271 }, { "epoch": 0.1302640254565261, "grad_norm": 0.0, "learning_rate": 4.4903938941775084e-07, "loss": 15.0311, "step": 2272 }, { "epoch": 0.13032135997477282, "grad_norm": 0.0, "learning_rate": 4.464794315866666e-07, "loss": 15.1561, "step": 2273 }, { "epoch": 0.13037869449301953, "grad_norm": 0.0, "learning_rate": 4.439264506758056e-07, "loss": 15.3379, "step": 2274 }, { "epoch": 0.13043602901126622, "grad_norm": 0.0, "learning_rate": 4.413804505968533e-07, "loss": 15.0224, "step": 2275 }, { "epoch": 0.13049336352951293, "grad_norm": 0.0, "learning_rate": 4.388414352507997e-07, "loss": 14.6675, "step": 2276 }, { "epoch": 0.13055069804775965, "grad_norm": 0.0, "learning_rate": 4.3630940852793233e-07, "loss": 15.2326, "step": 2277 }, { "epoch": 0.13060803256600637, "grad_norm": 0.0, "learning_rate": 4.3378437430783294e-07, "loss": 15.1475, "step": 2278 }, { "epoch": 0.13066536708425308, "grad_norm": 0.0, "learning_rate": 4.312663364593667e-07, "loss": 14.8431, "step": 2279 }, { "epoch": 0.1307227016024998, "grad_norm": 0.0, "learning_rate": 4.2875529884068123e-07, "loss": 15.0427, "step": 2280 }, { "epoch": 0.13078003612074648, "grad_norm": 0.0, "learning_rate": 4.262512652991968e-07, "loss": 14.8556, "step": 2281 }, { "epoch": 0.1308373706389932, "grad_norm": 0.0, "learning_rate": 4.2375423967160156e-07, "loss": 15.0491, "step": 2282 }, { "epoch": 0.13089470515723992, "grad_norm": 0.0, "learning_rate": 4.212642257838473e-07, "loss": 14.7967, "step": 2283 }, { "epoch": 0.13095203967548663, "grad_norm": 0.0, "learning_rate": 4.187812274511427e-07, "loss": 15.0764, "step": 2284 }, { "epoch": 0.13100937419373335, "grad_norm": 0.0, "learning_rate": 4.163052484779456e-07, "loss": 15.3877, "step": 2285 }, { "epoch": 0.13106670871198003, "grad_norm": 0.0, "learning_rate": 4.138362926579609e-07, "loss": 14.9205, "step": 2286 }, { "epoch": 0.13112404323022675, "grad_norm": 0.0, "learning_rate": 4.113743637741296e-07, "loss": 15.1598, "step": 2287 }, { "epoch": 0.13118137774847347, "grad_norm": 0.0, "learning_rate": 4.089194655986306e-07, "loss": 15.1938, "step": 2288 }, { "epoch": 0.13123871226672018, "grad_norm": 0.0, "learning_rate": 4.0647160189286436e-07, "loss": 15.1903, "step": 2289 }, { "epoch": 0.1312960467849669, "grad_norm": 0.0, "learning_rate": 4.040307764074586e-07, "loss": 15.1026, "step": 2290 }, { "epoch": 0.1313533813032136, "grad_norm": 0.0, "learning_rate": 4.015969928822527e-07, "loss": 15.0722, "step": 2291 }, { "epoch": 0.1314107158214603, "grad_norm": 0.0, "learning_rate": 3.991702550463e-07, "loss": 14.9654, "step": 2292 }, { "epoch": 0.13146805033970702, "grad_norm": 0.0, "learning_rate": 3.9675056661785563e-07, "loss": 14.9845, "step": 2293 }, { "epoch": 0.13152538485795373, "grad_norm": 0.0, "learning_rate": 3.943379313043766e-07, "loss": 15.222, "step": 2294 }, { "epoch": 0.13158271937620045, "grad_norm": 0.0, "learning_rate": 3.919323528025093e-07, "loss": 14.8688, "step": 2295 }, { "epoch": 0.13164005389444716, "grad_norm": 0.0, "learning_rate": 3.895338347980898e-07, "loss": 15.1766, "step": 2296 }, { "epoch": 0.13169738841269385, "grad_norm": 0.0, "learning_rate": 3.871423809661362e-07, "loss": 15.0152, "step": 2297 }, { "epoch": 0.13175472293094057, "grad_norm": 0.0, "learning_rate": 3.847579949708424e-07, "loss": 15.0119, "step": 2298 }, { "epoch": 0.13181205744918728, "grad_norm": 0.0, "learning_rate": 3.8238068046557276e-07, "loss": 14.7913, "step": 2299 }, { "epoch": 0.131869391967434, "grad_norm": 0.0, "learning_rate": 3.800104410928579e-07, "loss": 15.1182, "step": 2300 }, { "epoch": 0.1319267264856807, "grad_norm": 0.0, "learning_rate": 3.7764728048438614e-07, "loss": 15.3026, "step": 2301 }, { "epoch": 0.13198406100392743, "grad_norm": 0.0, "learning_rate": 3.752912022610006e-07, "loss": 14.8471, "step": 2302 }, { "epoch": 0.13204139552217412, "grad_norm": 0.0, "learning_rate": 3.7294221003269126e-07, "loss": 14.7744, "step": 2303 }, { "epoch": 0.13209873004042083, "grad_norm": 0.0, "learning_rate": 3.7060030739859475e-07, "loss": 14.5103, "step": 2304 }, { "epoch": 0.13215606455866755, "grad_norm": 0.0, "learning_rate": 3.6826549794698074e-07, "loss": 14.8963, "step": 2305 }, { "epoch": 0.13221339907691426, "grad_norm": 0.0, "learning_rate": 3.659377852552537e-07, "loss": 15.0329, "step": 2306 }, { "epoch": 0.13227073359516098, "grad_norm": 0.0, "learning_rate": 3.636171728899418e-07, "loss": 14.9432, "step": 2307 }, { "epoch": 0.13232806811340767, "grad_norm": 0.0, "learning_rate": 3.6130366440669693e-07, "loss": 14.7968, "step": 2308 }, { "epoch": 0.13238540263165438, "grad_norm": 0.0, "learning_rate": 3.5899726335028417e-07, "loss": 14.9833, "step": 2309 }, { "epoch": 0.1324427371499011, "grad_norm": 0.0, "learning_rate": 3.566979732545794e-07, "loss": 15.1287, "step": 2310 }, { "epoch": 0.1325000716681478, "grad_norm": 0.0, "learning_rate": 3.544057976425619e-07, "loss": 15.1688, "step": 2311 }, { "epoch": 0.13255740618639453, "grad_norm": 0.0, "learning_rate": 3.521207400263138e-07, "loss": 14.9778, "step": 2312 }, { "epoch": 0.13261474070464124, "grad_norm": 0.0, "learning_rate": 3.4984280390700565e-07, "loss": 15.0486, "step": 2313 }, { "epoch": 0.13267207522288793, "grad_norm": 0.0, "learning_rate": 3.4757199277490106e-07, "loss": 14.708, "step": 2314 }, { "epoch": 0.13272940974113465, "grad_norm": 0.0, "learning_rate": 3.453083101093441e-07, "loss": 14.7461, "step": 2315 }, { "epoch": 0.13278674425938136, "grad_norm": 0.0, "learning_rate": 3.43051759378758e-07, "loss": 15.0442, "step": 2316 }, { "epoch": 0.13284407877762808, "grad_norm": 0.0, "learning_rate": 3.408023440406355e-07, "loss": 14.64, "step": 2317 }, { "epoch": 0.1329014132958748, "grad_norm": 0.0, "learning_rate": 3.385600675415418e-07, "loss": 15.1115, "step": 2318 }, { "epoch": 0.13295874781412148, "grad_norm": 0.0, "learning_rate": 3.3632493331709815e-07, "loss": 14.7104, "step": 2319 }, { "epoch": 0.1330160823323682, "grad_norm": 0.0, "learning_rate": 3.340969447919873e-07, "loss": 15.135, "step": 2320 }, { "epoch": 0.1330734168506149, "grad_norm": 0.0, "learning_rate": 3.318761053799391e-07, "loss": 15.1093, "step": 2321 }, { "epoch": 0.13313075136886163, "grad_norm": 0.0, "learning_rate": 3.2966241848373325e-07, "loss": 15.0765, "step": 2322 }, { "epoch": 0.13318808588710834, "grad_norm": 0.0, "learning_rate": 3.2745588749518775e-07, "loss": 14.9632, "step": 2323 }, { "epoch": 0.13324542040535503, "grad_norm": 0.0, "learning_rate": 3.2525651579515705e-07, "loss": 15.0704, "step": 2324 }, { "epoch": 0.13330275492360175, "grad_norm": 0.0, "learning_rate": 3.2306430675352664e-07, "loss": 15.0037, "step": 2325 }, { "epoch": 0.13336008944184846, "grad_norm": 0.0, "learning_rate": 3.2087926372920577e-07, "loss": 15.014, "step": 2326 }, { "epoch": 0.13341742396009518, "grad_norm": 0.0, "learning_rate": 3.1870139007012636e-07, "loss": 15.0621, "step": 2327 }, { "epoch": 0.1334747584783419, "grad_norm": 0.0, "learning_rate": 3.1653068911323306e-07, "loss": 14.7647, "step": 2328 }, { "epoch": 0.1335320929965886, "grad_norm": 0.0, "learning_rate": 3.143671641844831e-07, "loss": 15.0683, "step": 2329 }, { "epoch": 0.1335894275148353, "grad_norm": 0.0, "learning_rate": 3.122108185988354e-07, "loss": 15.0593, "step": 2330 }, { "epoch": 0.133646762033082, "grad_norm": 0.0, "learning_rate": 3.1006165566025094e-07, "loss": 15.0096, "step": 2331 }, { "epoch": 0.13370409655132873, "grad_norm": 0.0, "learning_rate": 3.0791967866168394e-07, "loss": 14.9295, "step": 2332 }, { "epoch": 0.13376143106957544, "grad_norm": 0.0, "learning_rate": 3.057848908850808e-07, "loss": 14.827, "step": 2333 }, { "epoch": 0.13381876558782216, "grad_norm": 0.0, "learning_rate": 3.0365729560136834e-07, "loss": 14.7542, "step": 2334 }, { "epoch": 0.13387610010606885, "grad_norm": 0.0, "learning_rate": 3.015368960704584e-07, "loss": 14.8691, "step": 2335 }, { "epoch": 0.13393343462431556, "grad_norm": 0.0, "learning_rate": 2.9942369554123216e-07, "loss": 15.0813, "step": 2336 }, { "epoch": 0.13399076914256228, "grad_norm": 0.0, "learning_rate": 2.9731769725154515e-07, "loss": 14.7787, "step": 2337 }, { "epoch": 0.134048103660809, "grad_norm": 0.0, "learning_rate": 2.9521890442821276e-07, "loss": 15.2619, "step": 2338 }, { "epoch": 0.1341054381790557, "grad_norm": 0.0, "learning_rate": 2.9312732028701494e-07, "loss": 14.9959, "step": 2339 }, { "epoch": 0.13416277269730242, "grad_norm": 0.0, "learning_rate": 2.9104294803268265e-07, "loss": 15.048, "step": 2340 }, { "epoch": 0.1342201072155491, "grad_norm": 0.0, "learning_rate": 2.889657908589e-07, "loss": 15.0371, "step": 2341 }, { "epoch": 0.13427744173379583, "grad_norm": 0.0, "learning_rate": 2.868958519482928e-07, "loss": 14.9773, "step": 2342 }, { "epoch": 0.13433477625204254, "grad_norm": 0.0, "learning_rate": 2.8483313447243033e-07, "loss": 15.0038, "step": 2343 }, { "epoch": 0.13439211077028926, "grad_norm": 0.0, "learning_rate": 2.8277764159181484e-07, "loss": 15.0863, "step": 2344 }, { "epoch": 0.13444944528853597, "grad_norm": 0.0, "learning_rate": 2.807293764558794e-07, "loss": 14.7964, "step": 2345 }, { "epoch": 0.13450677980678266, "grad_norm": 0.0, "learning_rate": 2.786883422029829e-07, "loss": 15.0627, "step": 2346 }, { "epoch": 0.13456411432502938, "grad_norm": 0.0, "learning_rate": 2.7665454196040665e-07, "loss": 14.9265, "step": 2347 }, { "epoch": 0.1346214488432761, "grad_norm": 0.0, "learning_rate": 2.7462797884434456e-07, "loss": 14.8301, "step": 2348 }, { "epoch": 0.1346787833615228, "grad_norm": 0.0, "learning_rate": 2.7260865595990506e-07, "loss": 14.9195, "step": 2349 }, { "epoch": 0.13473611787976952, "grad_norm": 0.0, "learning_rate": 2.7059657640110204e-07, "loss": 14.7075, "step": 2350 }, { "epoch": 0.13479345239801624, "grad_norm": 0.0, "learning_rate": 2.6859174325085005e-07, "loss": 14.9169, "step": 2351 }, { "epoch": 0.13485078691626293, "grad_norm": 0.0, "learning_rate": 2.665941595809612e-07, "loss": 14.8047, "step": 2352 }, { "epoch": 0.13490812143450964, "grad_norm": 0.0, "learning_rate": 2.6460382845214125e-07, "loss": 14.699, "step": 2353 }, { "epoch": 0.13496545595275636, "grad_norm": 0.0, "learning_rate": 2.6262075291398125e-07, "loss": 14.9294, "step": 2354 }, { "epoch": 0.13502279047100307, "grad_norm": 0.0, "learning_rate": 2.606449360049584e-07, "loss": 15.1167, "step": 2355 }, { "epoch": 0.1350801249892498, "grad_norm": 0.0, "learning_rate": 2.5867638075242454e-07, "loss": 14.8653, "step": 2356 }, { "epoch": 0.13513745950749648, "grad_norm": 0.0, "learning_rate": 2.5671509017260786e-07, "loss": 14.8822, "step": 2357 }, { "epoch": 0.1351947940257432, "grad_norm": 0.0, "learning_rate": 2.547610672706047e-07, "loss": 15.0604, "step": 2358 }, { "epoch": 0.1352521285439899, "grad_norm": 0.0, "learning_rate": 2.5281431504037555e-07, "loss": 15.1022, "step": 2359 }, { "epoch": 0.13530946306223662, "grad_norm": 0.0, "learning_rate": 2.508748364647401e-07, "loss": 15.2797, "step": 2360 }, { "epoch": 0.13536679758048334, "grad_norm": 0.0, "learning_rate": 2.4894263451537617e-07, "loss": 14.7028, "step": 2361 }, { "epoch": 0.13542413209873003, "grad_norm": 0.0, "learning_rate": 2.470177121528089e-07, "loss": 15.0521, "step": 2362 }, { "epoch": 0.13548146661697674, "grad_norm": 0.0, "learning_rate": 2.4510007232641186e-07, "loss": 14.9062, "step": 2363 }, { "epoch": 0.13553880113522346, "grad_norm": 0.0, "learning_rate": 2.4318971797439973e-07, "loss": 14.8275, "step": 2364 }, { "epoch": 0.13559613565347017, "grad_norm": 0.0, "learning_rate": 2.4128665202382327e-07, "loss": 14.9824, "step": 2365 }, { "epoch": 0.1356534701717169, "grad_norm": 0.0, "learning_rate": 2.3939087739056677e-07, "loss": 14.7628, "step": 2366 }, { "epoch": 0.1357108046899636, "grad_norm": 0.0, "learning_rate": 2.3750239697934385e-07, "loss": 14.9101, "step": 2367 }, { "epoch": 0.1357681392082103, "grad_norm": 0.0, "learning_rate": 2.356212136836894e-07, "loss": 14.9607, "step": 2368 }, { "epoch": 0.135825473726457, "grad_norm": 0.0, "learning_rate": 2.3374733038596109e-07, "loss": 15.0791, "step": 2369 }, { "epoch": 0.13588280824470372, "grad_norm": 0.0, "learning_rate": 2.3188074995732722e-07, "loss": 15.0595, "step": 2370 }, { "epoch": 0.13594014276295044, "grad_norm": 0.0, "learning_rate": 2.3002147525777118e-07, "loss": 14.8886, "step": 2371 }, { "epoch": 0.13599747728119715, "grad_norm": 0.0, "learning_rate": 2.281695091360786e-07, "loss": 15.0146, "step": 2372 }, { "epoch": 0.13605481179944384, "grad_norm": 0.0, "learning_rate": 2.2632485442983908e-07, "loss": 15.0491, "step": 2373 }, { "epoch": 0.13611214631769056, "grad_norm": 0.0, "learning_rate": 2.2448751396543788e-07, "loss": 15.1609, "step": 2374 }, { "epoch": 0.13616948083593727, "grad_norm": 0.0, "learning_rate": 2.2265749055805642e-07, "loss": 15.1201, "step": 2375 }, { "epoch": 0.136226815354184, "grad_norm": 0.0, "learning_rate": 2.2083478701166116e-07, "loss": 14.9542, "step": 2376 }, { "epoch": 0.1362841498724307, "grad_norm": 0.0, "learning_rate": 2.1901940611900707e-07, "loss": 15.0658, "step": 2377 }, { "epoch": 0.13634148439067742, "grad_norm": 0.0, "learning_rate": 2.1721135066162524e-07, "loss": 15.1337, "step": 2378 }, { "epoch": 0.1363988189089241, "grad_norm": 0.0, "learning_rate": 2.1541062340982578e-07, "loss": 14.969, "step": 2379 }, { "epoch": 0.13645615342717082, "grad_norm": 0.0, "learning_rate": 2.1361722712268772e-07, "loss": 14.7518, "step": 2380 }, { "epoch": 0.13651348794541754, "grad_norm": 0.0, "learning_rate": 2.1183116454806086e-07, "loss": 14.9232, "step": 2381 }, { "epoch": 0.13657082246366425, "grad_norm": 0.0, "learning_rate": 2.1005243842255552e-07, "loss": 14.9849, "step": 2382 }, { "epoch": 0.13662815698191097, "grad_norm": 0.0, "learning_rate": 2.0828105147154275e-07, "loss": 14.6287, "step": 2383 }, { "epoch": 0.13668549150015766, "grad_norm": 0.0, "learning_rate": 2.0651700640914764e-07, "loss": 15.4342, "step": 2384 }, { "epoch": 0.13674282601840437, "grad_norm": 0.0, "learning_rate": 2.0476030593824692e-07, "loss": 15.031, "step": 2385 }, { "epoch": 0.1368001605366511, "grad_norm": 0.0, "learning_rate": 2.0301095275046145e-07, "loss": 14.9257, "step": 2386 }, { "epoch": 0.1368574950548978, "grad_norm": 0.0, "learning_rate": 2.0126894952615773e-07, "loss": 14.7566, "step": 2387 }, { "epoch": 0.13691482957314452, "grad_norm": 0.0, "learning_rate": 1.9953429893443842e-07, "loss": 14.7706, "step": 2388 }, { "epoch": 0.13697216409139124, "grad_norm": 0.0, "learning_rate": 1.9780700363314255e-07, "loss": 14.7179, "step": 2389 }, { "epoch": 0.13702949860963792, "grad_norm": 0.0, "learning_rate": 1.9608706626883632e-07, "loss": 15.0128, "step": 2390 }, { "epoch": 0.13708683312788464, "grad_norm": 0.0, "learning_rate": 1.943744894768157e-07, "loss": 15.0172, "step": 2391 }, { "epoch": 0.13714416764613135, "grad_norm": 0.0, "learning_rate": 1.926692758810955e-07, "loss": 15.3181, "step": 2392 }, { "epoch": 0.13720150216437807, "grad_norm": 0.0, "learning_rate": 1.9097142809441084e-07, "loss": 15.1065, "step": 2393 }, { "epoch": 0.13725883668262479, "grad_norm": 0.0, "learning_rate": 1.8928094871820846e-07, "loss": 15.1422, "step": 2394 }, { "epoch": 0.13731617120087147, "grad_norm": 0.0, "learning_rate": 1.8759784034264927e-07, "loss": 14.8676, "step": 2395 }, { "epoch": 0.1373735057191182, "grad_norm": 0.0, "learning_rate": 1.859221055465954e-07, "loss": 15.2351, "step": 2396 }, { "epoch": 0.1374308402373649, "grad_norm": 0.0, "learning_rate": 1.8425374689761522e-07, "loss": 14.9144, "step": 2397 }, { "epoch": 0.13748817475561162, "grad_norm": 0.0, "learning_rate": 1.825927669519728e-07, "loss": 15.2977, "step": 2398 }, { "epoch": 0.13754550927385834, "grad_norm": 0.0, "learning_rate": 1.809391682546291e-07, "loss": 14.9887, "step": 2399 }, { "epoch": 0.13760284379210505, "grad_norm": 0.0, "learning_rate": 1.7929295333923125e-07, "loss": 14.8764, "step": 2400 }, { "epoch": 0.13766017831035174, "grad_norm": 0.0, "learning_rate": 1.776541247281177e-07, "loss": 15.0761, "step": 2401 }, { "epoch": 0.13771751282859845, "grad_norm": 0.0, "learning_rate": 1.7602268493230545e-07, "loss": 15.2546, "step": 2402 }, { "epoch": 0.13777484734684517, "grad_norm": 0.0, "learning_rate": 1.7439863645149434e-07, "loss": 15.009, "step": 2403 }, { "epoch": 0.13783218186509189, "grad_norm": 0.0, "learning_rate": 1.7278198177405614e-07, "loss": 15.1681, "step": 2404 }, { "epoch": 0.1378895163833386, "grad_norm": 0.0, "learning_rate": 1.7117272337703495e-07, "loss": 14.8229, "step": 2405 }, { "epoch": 0.1379468509015853, "grad_norm": 0.0, "learning_rate": 1.6957086372614285e-07, "loss": 14.649, "step": 2406 }, { "epoch": 0.138004185419832, "grad_norm": 0.0, "learning_rate": 1.679764052757532e-07, "loss": 14.8612, "step": 2407 }, { "epoch": 0.13806151993807872, "grad_norm": 0.0, "learning_rate": 1.6638935046890182e-07, "loss": 14.9759, "step": 2408 }, { "epoch": 0.13811885445632544, "grad_norm": 0.0, "learning_rate": 1.6480970173728017e-07, "loss": 14.9145, "step": 2409 }, { "epoch": 0.13817618897457215, "grad_norm": 0.0, "learning_rate": 1.6323746150123e-07, "loss": 15.1433, "step": 2410 }, { "epoch": 0.13823352349281884, "grad_norm": 0.0, "learning_rate": 1.6167263216974539e-07, "loss": 14.7141, "step": 2411 }, { "epoch": 0.13829085801106555, "grad_norm": 0.0, "learning_rate": 1.6011521614046233e-07, "loss": 14.7465, "step": 2412 }, { "epoch": 0.13834819252931227, "grad_norm": 0.0, "learning_rate": 1.5856521579965866e-07, "loss": 14.9783, "step": 2413 }, { "epoch": 0.13840552704755898, "grad_norm": 0.0, "learning_rate": 1.5702263352225077e-07, "loss": 15.1232, "step": 2414 }, { "epoch": 0.1384628615658057, "grad_norm": 0.0, "learning_rate": 1.5548747167178912e-07, "loss": 14.9879, "step": 2415 }, { "epoch": 0.13852019608405242, "grad_norm": 0.0, "learning_rate": 1.5395973260045273e-07, "loss": 15.2024, "step": 2416 }, { "epoch": 0.1385775306022991, "grad_norm": 0.0, "learning_rate": 1.5243941864905076e-07, "loss": 14.9653, "step": 2417 }, { "epoch": 0.13863486512054582, "grad_norm": 0.0, "learning_rate": 1.5092653214701158e-07, "loss": 15.0952, "step": 2418 }, { "epoch": 0.13869219963879253, "grad_norm": 0.0, "learning_rate": 1.4942107541238705e-07, "loss": 15.0942, "step": 2419 }, { "epoch": 0.13874953415703925, "grad_norm": 0.0, "learning_rate": 1.4792305075184255e-07, "loss": 15.1668, "step": 2420 }, { "epoch": 0.13880686867528597, "grad_norm": 0.0, "learning_rate": 1.4643246046065596e-07, "loss": 14.8822, "step": 2421 }, { "epoch": 0.13886420319353265, "grad_norm": 0.0, "learning_rate": 1.449493068227159e-07, "loss": 14.8281, "step": 2422 }, { "epoch": 0.13892153771177937, "grad_norm": 0.0, "learning_rate": 1.434735921105146e-07, "loss": 14.9618, "step": 2423 }, { "epoch": 0.13897887223002608, "grad_norm": 0.0, "learning_rate": 1.4200531858514888e-07, "loss": 14.8698, "step": 2424 }, { "epoch": 0.1390362067482728, "grad_norm": 0.0, "learning_rate": 1.4054448849631087e-07, "loss": 14.9689, "step": 2425 }, { "epoch": 0.13909354126651952, "grad_norm": 0.0, "learning_rate": 1.3909110408229065e-07, "loss": 15.1449, "step": 2426 }, { "epoch": 0.13915087578476623, "grad_norm": 0.0, "learning_rate": 1.3764516756996914e-07, "loss": 15.0107, "step": 2427 }, { "epoch": 0.13920821030301292, "grad_norm": 0.0, "learning_rate": 1.3620668117481471e-07, "loss": 15.3218, "step": 2428 }, { "epoch": 0.13926554482125963, "grad_norm": 0.0, "learning_rate": 1.3477564710088097e-07, "loss": 14.9193, "step": 2429 }, { "epoch": 0.13932287933950635, "grad_norm": 0.0, "learning_rate": 1.3335206754080454e-07, "loss": 15.2119, "step": 2430 }, { "epoch": 0.13938021385775307, "grad_norm": 0.0, "learning_rate": 1.319359446757973e-07, "loss": 14.6098, "step": 2431 }, { "epoch": 0.13943754837599978, "grad_norm": 0.0, "learning_rate": 1.3052728067564967e-07, "loss": 14.8737, "step": 2432 }, { "epoch": 0.13949488289424647, "grad_norm": 0.0, "learning_rate": 1.291260776987202e-07, "loss": 14.9515, "step": 2433 }, { "epoch": 0.13955221741249318, "grad_norm": 0.0, "learning_rate": 1.2773233789193816e-07, "loss": 14.721, "step": 2434 }, { "epoch": 0.1396095519307399, "grad_norm": 0.0, "learning_rate": 1.263460633907948e-07, "loss": 14.832, "step": 2435 }, { "epoch": 0.13966688644898662, "grad_norm": 0.0, "learning_rate": 1.2496725631934657e-07, "loss": 14.9489, "step": 2436 }, { "epoch": 0.13972422096723333, "grad_norm": 0.0, "learning_rate": 1.2359591879020528e-07, "loss": 14.5837, "step": 2437 }, { "epoch": 0.13978155548548005, "grad_norm": 0.0, "learning_rate": 1.2223205290453953e-07, "loss": 14.7659, "step": 2438 }, { "epoch": 0.13983889000372673, "grad_norm": 0.0, "learning_rate": 1.2087566075206948e-07, "loss": 14.8871, "step": 2439 }, { "epoch": 0.13989622452197345, "grad_norm": 0.0, "learning_rate": 1.1952674441106483e-07, "loss": 15.126, "step": 2440 }, { "epoch": 0.13995355904022017, "grad_norm": 0.0, "learning_rate": 1.1818530594833844e-07, "loss": 15.1116, "step": 2441 }, { "epoch": 0.14001089355846688, "grad_norm": 0.0, "learning_rate": 1.1685134741924841e-07, "loss": 14.8211, "step": 2442 }, { "epoch": 0.1400682280767136, "grad_norm": 0.0, "learning_rate": 1.1552487086768871e-07, "loss": 15.2424, "step": 2443 }, { "epoch": 0.14012556259496028, "grad_norm": 0.0, "learning_rate": 1.1420587832609353e-07, "loss": 14.8161, "step": 2444 }, { "epoch": 0.140182897113207, "grad_norm": 0.0, "learning_rate": 1.1289437181542684e-07, "loss": 14.9046, "step": 2445 }, { "epoch": 0.14024023163145372, "grad_norm": 0.0, "learning_rate": 1.1159035334518343e-07, "loss": 14.9444, "step": 2446 }, { "epoch": 0.14029756614970043, "grad_norm": 0.0, "learning_rate": 1.1029382491338558e-07, "loss": 15.2636, "step": 2447 }, { "epoch": 0.14035490066794715, "grad_norm": 0.0, "learning_rate": 1.090047885065787e-07, "loss": 15.1296, "step": 2448 }, { "epoch": 0.14041223518619383, "grad_norm": 0.0, "learning_rate": 1.0772324609982787e-07, "loss": 14.771, "step": 2449 }, { "epoch": 0.14046956970444055, "grad_norm": 0.0, "learning_rate": 1.0644919965671741e-07, "loss": 15.0778, "step": 2450 }, { "epoch": 0.14052690422268727, "grad_norm": 0.0, "learning_rate": 1.0518265112934634e-07, "loss": 14.9974, "step": 2451 }, { "epoch": 0.14058423874093398, "grad_norm": 0.0, "learning_rate": 1.03923602458324e-07, "loss": 14.9652, "step": 2452 }, { "epoch": 0.1406415732591807, "grad_norm": 0.0, "learning_rate": 1.026720555727695e-07, "loss": 14.926, "step": 2453 }, { "epoch": 0.1406989077774274, "grad_norm": 0.0, "learning_rate": 1.014280123903083e-07, "loss": 14.7243, "step": 2454 }, { "epoch": 0.1407562422956741, "grad_norm": 0.0, "learning_rate": 1.0019147481706626e-07, "loss": 14.9074, "step": 2455 }, { "epoch": 0.14081357681392082, "grad_norm": 0.0, "learning_rate": 9.896244474767114e-08, "loss": 14.7378, "step": 2456 }, { "epoch": 0.14087091133216753, "grad_norm": 0.0, "learning_rate": 9.774092406524715e-08, "loss": 14.8125, "step": 2457 }, { "epoch": 0.14092824585041425, "grad_norm": 0.0, "learning_rate": 9.652691464141273e-08, "loss": 14.9454, "step": 2458 }, { "epoch": 0.14098558036866096, "grad_norm": 0.0, "learning_rate": 9.532041833627658e-08, "loss": 14.9199, "step": 2459 }, { "epoch": 0.14104291488690765, "grad_norm": 0.0, "learning_rate": 9.412143699843779e-08, "loss": 14.9574, "step": 2460 }, { "epoch": 0.14110024940515437, "grad_norm": 0.0, "learning_rate": 9.292997246497959e-08, "loss": 15.0777, "step": 2461 }, { "epoch": 0.14115758392340108, "grad_norm": 0.0, "learning_rate": 9.17460265614678e-08, "loss": 15.0816, "step": 2462 }, { "epoch": 0.1412149184416478, "grad_norm": 0.0, "learning_rate": 9.056960110194746e-08, "loss": 14.7772, "step": 2463 }, { "epoch": 0.1412722529598945, "grad_norm": 0.0, "learning_rate": 8.940069788894389e-08, "loss": 15.0991, "step": 2464 }, { "epoch": 0.14132958747814123, "grad_norm": 0.0, "learning_rate": 8.823931871345281e-08, "loss": 15.0157, "step": 2465 }, { "epoch": 0.14138692199638792, "grad_norm": 0.0, "learning_rate": 8.708546535494466e-08, "loss": 14.8052, "step": 2466 }, { "epoch": 0.14144425651463463, "grad_norm": 0.0, "learning_rate": 8.593913958135691e-08, "loss": 14.9028, "step": 2467 }, { "epoch": 0.14150159103288135, "grad_norm": 0.0, "learning_rate": 8.480034314909513e-08, "loss": 15.0728, "step": 2468 }, { "epoch": 0.14155892555112806, "grad_norm": 0.0, "learning_rate": 8.36690778030258e-08, "loss": 14.8735, "step": 2469 }, { "epoch": 0.14161626006937478, "grad_norm": 0.0, "learning_rate": 8.254534527647851e-08, "loss": 15.0529, "step": 2470 }, { "epoch": 0.14167359458762147, "grad_norm": 0.0, "learning_rate": 8.14291472912393e-08, "loss": 14.7234, "step": 2471 }, { "epoch": 0.14173092910586818, "grad_norm": 0.0, "learning_rate": 8.032048555755178e-08, "loss": 14.7678, "step": 2472 }, { "epoch": 0.1417882636241149, "grad_norm": 0.0, "learning_rate": 7.921936177411049e-08, "loss": 15.2047, "step": 2473 }, { "epoch": 0.1418455981423616, "grad_norm": 0.0, "learning_rate": 7.81257776280614e-08, "loss": 14.755, "step": 2474 }, { "epoch": 0.14190293266060833, "grad_norm": 0.0, "learning_rate": 7.70397347949986e-08, "loss": 14.8613, "step": 2475 }, { "epoch": 0.14196026717885504, "grad_norm": 0.0, "learning_rate": 7.59612349389599e-08, "loss": 15.203, "step": 2476 }, { "epoch": 0.14201760169710173, "grad_norm": 0.0, "learning_rate": 7.48902797124268e-08, "loss": 15.1043, "step": 2477 }, { "epoch": 0.14207493621534845, "grad_norm": 0.0, "learning_rate": 7.382687075632111e-08, "loss": 15.1766, "step": 2478 }, { "epoch": 0.14213227073359516, "grad_norm": 0.0, "learning_rate": 7.277100970000062e-08, "loss": 14.8559, "step": 2479 }, { "epoch": 0.14218960525184188, "grad_norm": 0.0, "learning_rate": 7.172269816126065e-08, "loss": 15.1087, "step": 2480 }, { "epoch": 0.1422469397700886, "grad_norm": 0.0, "learning_rate": 7.068193774632692e-08, "loss": 15.0655, "step": 2481 }, { "epoch": 0.14230427428833528, "grad_norm": 0.0, "learning_rate": 6.964873004985717e-08, "loss": 14.9476, "step": 2482 }, { "epoch": 0.142361608806582, "grad_norm": 0.0, "learning_rate": 6.862307665493506e-08, "loss": 14.9225, "step": 2483 }, { "epoch": 0.1424189433248287, "grad_norm": 0.0, "learning_rate": 6.760497913307073e-08, "loss": 14.8404, "step": 2484 }, { "epoch": 0.14247627784307543, "grad_norm": 0.0, "learning_rate": 6.659443904419638e-08, "loss": 14.892, "step": 2485 }, { "epoch": 0.14253361236132214, "grad_norm": 0.0, "learning_rate": 6.55914579366651e-08, "loss": 14.9005, "step": 2486 }, { "epoch": 0.14259094687956883, "grad_norm": 0.0, "learning_rate": 6.459603734724873e-08, "loss": 15.0596, "step": 2487 }, { "epoch": 0.14264828139781555, "grad_norm": 0.0, "learning_rate": 6.360817880113335e-08, "loss": 14.959, "step": 2488 }, { "epoch": 0.14270561591606226, "grad_norm": 0.0, "learning_rate": 6.262788381191987e-08, "loss": 15.2925, "step": 2489 }, { "epoch": 0.14276295043430898, "grad_norm": 0.0, "learning_rate": 6.165515388161958e-08, "loss": 15.1075, "step": 2490 }, { "epoch": 0.1428202849525557, "grad_norm": 0.0, "learning_rate": 6.06899905006525e-08, "loss": 15.1714, "step": 2491 }, { "epoch": 0.1428776194708024, "grad_norm": 0.0, "learning_rate": 5.973239514784568e-08, "loss": 14.885, "step": 2492 }, { "epoch": 0.1429349539890491, "grad_norm": 0.0, "learning_rate": 5.878236929042991e-08, "loss": 15.1822, "step": 2493 }, { "epoch": 0.1429922885072958, "grad_norm": 0.0, "learning_rate": 5.783991438403802e-08, "loss": 14.9818, "step": 2494 }, { "epoch": 0.14304962302554253, "grad_norm": 0.0, "learning_rate": 5.690503187270269e-08, "loss": 14.8411, "step": 2495 }, { "epoch": 0.14310695754378924, "grad_norm": 0.0, "learning_rate": 5.59777231888553e-08, "loss": 15.0035, "step": 2496 }, { "epoch": 0.14316429206203596, "grad_norm": 0.0, "learning_rate": 5.505798975331933e-08, "loss": 14.9753, "step": 2497 }, { "epoch": 0.14322162658028265, "grad_norm": 0.0, "learning_rate": 5.414583297531528e-08, "loss": 14.9677, "step": 2498 }, { "epoch": 0.14327896109852936, "grad_norm": 0.0, "learning_rate": 5.3241254252452414e-08, "loss": 15.2268, "step": 2499 }, { "epoch": 0.14333629561677608, "grad_norm": 0.0, "learning_rate": 5.234425497072981e-08, "loss": 14.6797, "step": 2500 }, { "epoch": 0.1433936301350228, "grad_norm": 0.0, "learning_rate": 5.1454836504532535e-08, "loss": 14.9487, "step": 2501 }, { "epoch": 0.1434509646532695, "grad_norm": 0.0, "learning_rate": 5.0573000216630475e-08, "loss": 15.053, "step": 2502 }, { "epoch": 0.14350829917151622, "grad_norm": 0.0, "learning_rate": 4.9698747458176714e-08, "loss": 14.945, "step": 2503 }, { "epoch": 0.1435656336897629, "grad_norm": 0.0, "learning_rate": 4.883207956870473e-08, "loss": 15.1995, "step": 2504 }, { "epoch": 0.14362296820800963, "grad_norm": 0.0, "learning_rate": 4.7972997876125086e-08, "loss": 14.8424, "step": 2505 }, { "epoch": 0.14368030272625634, "grad_norm": 0.0, "learning_rate": 4.712150369672652e-08, "loss": 15.1739, "step": 2506 }, { "epoch": 0.14373763724450306, "grad_norm": 0.0, "learning_rate": 4.627759833517098e-08, "loss": 15.014, "step": 2507 }, { "epoch": 0.14379497176274977, "grad_norm": 0.0, "learning_rate": 4.544128308449358e-08, "loss": 14.8966, "step": 2508 }, { "epoch": 0.14385230628099646, "grad_norm": 0.0, "learning_rate": 4.461255922609986e-08, "loss": 14.9599, "step": 2509 }, { "epoch": 0.14390964079924318, "grad_norm": 0.0, "learning_rate": 4.379142802976244e-08, "loss": 15.0118, "step": 2510 }, { "epoch": 0.1439669753174899, "grad_norm": 0.0, "learning_rate": 4.297789075362158e-08, "loss": 14.8471, "step": 2511 }, { "epoch": 0.1440243098357366, "grad_norm": 0.0, "learning_rate": 4.217194864418295e-08, "loss": 14.6829, "step": 2512 }, { "epoch": 0.14408164435398332, "grad_norm": 0.0, "learning_rate": 4.137360293631321e-08, "loss": 14.7532, "step": 2513 }, { "epoch": 0.14413897887223004, "grad_norm": 0.0, "learning_rate": 4.058285485324054e-08, "loss": 14.936, "step": 2514 }, { "epoch": 0.14419631339047673, "grad_norm": 0.0, "learning_rate": 3.979970560655133e-08, "loss": 15.0286, "step": 2515 }, { "epoch": 0.14425364790872344, "grad_norm": 0.0, "learning_rate": 3.9024156396190724e-08, "loss": 15.0832, "step": 2516 }, { "epoch": 0.14431098242697016, "grad_norm": 0.0, "learning_rate": 3.8256208410457074e-08, "loss": 15.0615, "step": 2517 }, { "epoch": 0.14436831694521687, "grad_norm": 0.0, "learning_rate": 3.749586282600359e-08, "loss": 14.7907, "step": 2518 }, { "epoch": 0.1444256514634636, "grad_norm": 0.0, "learning_rate": 3.674312080783393e-08, "loss": 15.0773, "step": 2519 }, { "epoch": 0.14448298598171028, "grad_norm": 0.0, "learning_rate": 3.599798350930217e-08, "loss": 14.9388, "step": 2520 }, { "epoch": 0.144540320499957, "grad_norm": 0.0, "learning_rate": 3.526045207211059e-08, "loss": 15.2011, "step": 2521 }, { "epoch": 0.1445976550182037, "grad_norm": 0.0, "learning_rate": 3.453052762630749e-08, "loss": 15.2172, "step": 2522 }, { "epoch": 0.14465498953645042, "grad_norm": 0.0, "learning_rate": 3.3808211290284886e-08, "loss": 14.8134, "step": 2523 }, { "epoch": 0.14471232405469714, "grad_norm": 0.0, "learning_rate": 3.309350417077972e-08, "loss": 15.2569, "step": 2524 }, { "epoch": 0.14476965857294385, "grad_norm": 0.0, "learning_rate": 3.238640736286769e-08, "loss": 14.8034, "step": 2525 }, { "epoch": 0.14482699309119054, "grad_norm": 0.0, "learning_rate": 3.168692194996492e-08, "loss": 15.0441, "step": 2526 }, { "epoch": 0.14488432760943726, "grad_norm": 0.0, "learning_rate": 3.0995049003826325e-08, "loss": 14.7524, "step": 2527 }, { "epoch": 0.14494166212768397, "grad_norm": 0.0, "learning_rate": 3.0310789584541144e-08, "loss": 15.1303, "step": 2528 }, { "epoch": 0.1449989966459307, "grad_norm": 0.0, "learning_rate": 2.9634144740535166e-08, "loss": 15.0751, "step": 2529 }, { "epoch": 0.1450563311641774, "grad_norm": 0.0, "learning_rate": 2.8965115508564622e-08, "loss": 15.0048, "step": 2530 }, { "epoch": 0.1451136656824241, "grad_norm": 0.0, "learning_rate": 2.8303702913719534e-08, "loss": 15.2124, "step": 2531 }, { "epoch": 0.1451710002006708, "grad_norm": 0.0, "learning_rate": 2.7649907969418133e-08, "loss": 15.1546, "step": 2532 }, { "epoch": 0.14522833471891752, "grad_norm": 0.0, "learning_rate": 2.700373167740744e-08, "loss": 14.9962, "step": 2533 }, { "epoch": 0.14528566923716424, "grad_norm": 0.0, "learning_rate": 2.6365175027761036e-08, "loss": 14.8515, "step": 2534 }, { "epoch": 0.14534300375541095, "grad_norm": 0.0, "learning_rate": 2.5734238998877947e-08, "loss": 14.7741, "step": 2535 }, { "epoch": 0.14540033827365764, "grad_norm": 0.0, "learning_rate": 2.511092455747932e-08, "loss": 14.9341, "step": 2536 }, { "epoch": 0.14545767279190436, "grad_norm": 0.0, "learning_rate": 2.4495232658611755e-08, "loss": 14.9971, "step": 2537 }, { "epoch": 0.14551500731015107, "grad_norm": 0.0, "learning_rate": 2.3887164245638972e-08, "loss": 15.19, "step": 2538 }, { "epoch": 0.1455723418283978, "grad_norm": 0.0, "learning_rate": 2.3286720250246255e-08, "loss": 14.9714, "step": 2539 }, { "epoch": 0.1456296763466445, "grad_norm": 0.0, "learning_rate": 2.2693901592435453e-08, "loss": 15.0091, "step": 2540 }, { "epoch": 0.14568701086489122, "grad_norm": 0.0, "learning_rate": 2.21087091805261e-08, "loss": 15.0067, "step": 2541 }, { "epoch": 0.1457443453831379, "grad_norm": 0.0, "learning_rate": 2.153114391115152e-08, "loss": 14.5833, "step": 2542 }, { "epoch": 0.14580167990138462, "grad_norm": 0.0, "learning_rate": 2.096120666925938e-08, "loss": 14.8599, "step": 2543 }, { "epoch": 0.14585901441963134, "grad_norm": 0.0, "learning_rate": 2.039889832811004e-08, "loss": 14.6738, "step": 2544 }, { "epoch": 0.14591634893787805, "grad_norm": 0.0, "learning_rate": 1.984421974927375e-08, "loss": 15.1142, "step": 2545 }, { "epoch": 0.14597368345612477, "grad_norm": 0.0, "learning_rate": 1.9297171782630685e-08, "loss": 14.8885, "step": 2546 }, { "epoch": 0.14603101797437146, "grad_norm": 0.0, "learning_rate": 1.8757755266370358e-08, "loss": 14.9943, "step": 2547 }, { "epoch": 0.14608835249261817, "grad_norm": 0.0, "learning_rate": 1.8225971026987755e-08, "loss": 14.852, "step": 2548 }, { "epoch": 0.1461456870108649, "grad_norm": 0.0, "learning_rate": 1.7701819879285544e-08, "loss": 15.0191, "step": 2549 }, { "epoch": 0.1462030215291116, "grad_norm": 0.0, "learning_rate": 1.7185302626369636e-08, "loss": 15.2774, "step": 2550 }, { "epoch": 0.14626035604735832, "grad_norm": 0.0, "learning_rate": 1.6676420059649756e-08, "loss": 14.7861, "step": 2551 }, { "epoch": 0.14631769056560504, "grad_norm": 0.0, "learning_rate": 1.6175172958837193e-08, "loss": 14.8792, "step": 2552 }, { "epoch": 0.14637502508385172, "grad_norm": 0.0, "learning_rate": 1.568156209194427e-08, "loss": 15.2584, "step": 2553 }, { "epoch": 0.14643235960209844, "grad_norm": 0.0, "learning_rate": 1.5195588215283773e-08, "loss": 14.9359, "step": 2554 }, { "epoch": 0.14648969412034515, "grad_norm": 0.0, "learning_rate": 1.4717252073466747e-08, "loss": 14.9727, "step": 2555 }, { "epoch": 0.14654702863859187, "grad_norm": 0.0, "learning_rate": 1.4246554399401368e-08, "loss": 15.1714, "step": 2556 }, { "epoch": 0.14660436315683859, "grad_norm": 0.0, "learning_rate": 1.3783495914291844e-08, "loss": 14.8627, "step": 2557 }, { "epoch": 0.14666169767508527, "grad_norm": 0.0, "learning_rate": 1.332807732763841e-08, "loss": 14.9667, "step": 2558 }, { "epoch": 0.146719032193332, "grad_norm": 0.0, "learning_rate": 1.2880299337235113e-08, "loss": 14.9889, "step": 2559 }, { "epoch": 0.1467763667115787, "grad_norm": 0.0, "learning_rate": 1.244016262916814e-08, "loss": 14.9744, "step": 2560 }, { "epoch": 0.14683370122982542, "grad_norm": 0.0, "learning_rate": 1.2007667877817487e-08, "loss": 14.775, "step": 2561 }, { "epoch": 0.14689103574807214, "grad_norm": 0.0, "learning_rate": 1.1582815745853071e-08, "loss": 14.7272, "step": 2562 }, { "epoch": 0.14694837026631885, "grad_norm": 0.0, "learning_rate": 1.1165606884234182e-08, "loss": 15.2383, "step": 2563 }, { "epoch": 0.14700570478456554, "grad_norm": 0.0, "learning_rate": 1.0756041932210027e-08, "loss": 15.079, "step": 2564 }, { "epoch": 0.14706303930281225, "grad_norm": 0.0, "learning_rate": 1.0354121517318072e-08, "loss": 15.5389, "step": 2565 }, { "epoch": 0.14712037382105897, "grad_norm": 0.0, "learning_rate": 9.959846255381267e-09, "loss": 15.004, "step": 2566 }, { "epoch": 0.14717770833930569, "grad_norm": 0.0, "learning_rate": 9.573216750509707e-09, "loss": 14.918, "step": 2567 }, { "epoch": 0.1472350428575524, "grad_norm": 0.0, "learning_rate": 9.194233595098412e-09, "loss": 15.1843, "step": 2568 }, { "epoch": 0.1472923773757991, "grad_norm": 0.0, "learning_rate": 8.822897369827333e-09, "loss": 15.0132, "step": 2569 }, { "epoch": 0.1473497118940458, "grad_norm": 0.0, "learning_rate": 8.459208643659122e-09, "loss": 15.035, "step": 2570 }, { "epoch": 0.14740704641229252, "grad_norm": 0.0, "learning_rate": 8.103167973838033e-09, "loss": 15.0593, "step": 2571 }, { "epoch": 0.14746438093053924, "grad_norm": 0.0, "learning_rate": 7.754775905891576e-09, "loss": 15.1713, "step": 2572 }, { "epoch": 0.14752171544878595, "grad_norm": 0.0, "learning_rate": 7.414032973627194e-09, "loss": 15.2088, "step": 2573 }, { "epoch": 0.14757904996703264, "grad_norm": 0.0, "learning_rate": 7.080939699132261e-09, "loss": 14.9977, "step": 2574 }, { "epoch": 0.14763638448527935, "grad_norm": 0.0, "learning_rate": 6.755496592773525e-09, "loss": 14.6728, "step": 2575 }, { "epoch": 0.14769371900352607, "grad_norm": 0.0, "learning_rate": 6.437704153196e-09, "loss": 14.9983, "step": 2576 }, { "epoch": 0.14775105352177278, "grad_norm": 0.0, "learning_rate": 6.127562867322412e-09, "loss": 14.8182, "step": 2577 }, { "epoch": 0.1478083880400195, "grad_norm": 0.0, "learning_rate": 5.825073210352084e-09, "loss": 15.1046, "step": 2578 }, { "epoch": 0.14786572255826622, "grad_norm": 0.0, "learning_rate": 5.530235645761495e-09, "loss": 14.981, "step": 2579 }, { "epoch": 0.1479230570765129, "grad_norm": 0.0, "learning_rate": 5.243050625300949e-09, "loss": 14.9505, "step": 2580 }, { "epoch": 0.14798039159475962, "grad_norm": 0.0, "learning_rate": 4.9635185889967966e-09, "loss": 14.577, "step": 2581 }, { "epoch": 0.14803772611300633, "grad_norm": 0.0, "learning_rate": 4.691639965148653e-09, "loss": 15.0579, "step": 2582 }, { "epoch": 0.14809506063125305, "grad_norm": 0.0, "learning_rate": 4.4274151703305176e-09, "loss": 15.1184, "step": 2583 }, { "epoch": 0.14815239514949977, "grad_norm": 0.0, "learning_rate": 4.170844609387992e-09, "loss": 14.8572, "step": 2584 }, { "epoch": 0.14820972966774645, "grad_norm": 0.0, "learning_rate": 3.92192867543939e-09, "loss": 15.1873, "step": 2585 }, { "epoch": 0.14826706418599317, "grad_norm": 0.0, "learning_rate": 3.6806677498751888e-09, "loss": 14.9352, "step": 2586 }, { "epoch": 0.14832439870423988, "grad_norm": 0.0, "learning_rate": 3.4470622023558e-09, "loss": 14.6492, "step": 2587 }, { "epoch": 0.1483817332224866, "grad_norm": 0.0, "learning_rate": 3.2211123908121313e-09, "loss": 15.1136, "step": 2588 }, { "epoch": 0.14843906774073332, "grad_norm": 0.0, "learning_rate": 3.0028186614455833e-09, "loss": 14.7483, "step": 2589 }, { "epoch": 0.14849640225898003, "grad_norm": 0.0, "learning_rate": 2.792181348726941e-09, "loss": 15.0311, "step": 2590 }, { "epoch": 0.14855373677722672, "grad_norm": 0.0, "learning_rate": 2.589200775395262e-09, "loss": 14.8759, "step": 2591 }, { "epoch": 0.14861107129547343, "grad_norm": 0.0, "learning_rate": 2.3938772524573215e-09, "loss": 14.9465, "step": 2592 }, { "epoch": 0.14866840581372015, "grad_norm": 0.0, "learning_rate": 2.20621107918928e-09, "loss": 14.975, "step": 2593 }, { "epoch": 0.14872574033196687, "grad_norm": 0.0, "learning_rate": 2.0262025431339037e-09, "loss": 14.876, "step": 2594 }, { "epoch": 0.14878307485021358, "grad_norm": 0.0, "learning_rate": 1.8538519201011241e-09, "loss": 14.6099, "step": 2595 }, { "epoch": 0.14884040936846027, "grad_norm": 0.0, "learning_rate": 1.6891594741663686e-09, "loss": 14.9404, "step": 2596 }, { "epoch": 0.14889774388670698, "grad_norm": 0.0, "learning_rate": 1.5321254576722288e-09, "loss": 14.8766, "step": 2597 }, { "epoch": 0.1489550784049537, "grad_norm": 0.0, "learning_rate": 1.382750111227349e-09, "loss": 14.8599, "step": 2598 }, { "epoch": 0.14901241292320042, "grad_norm": 0.0, "learning_rate": 1.2410336637047604e-09, "loss": 14.8934, "step": 2599 }, { "epoch": 0.14906974744144713, "grad_norm": 0.0, "learning_rate": 1.1069763322424377e-09, "loss": 15.4799, "step": 2600 }, { "epoch": 0.14912708195969385, "grad_norm": 0.0, "learning_rate": 9.805783222444077e-10, "loss": 15.1307, "step": 2601 }, { "epoch": 0.14918441647794053, "grad_norm": 0.0, "learning_rate": 8.618398273779749e-10, "loss": 14.8672, "step": 2602 }, { "epoch": 0.14924175099618725, "grad_norm": 0.0, "learning_rate": 7.507610295737211e-10, "loss": 14.9388, "step": 2603 }, { "epoch": 0.14929908551443397, "grad_norm": 0.0, "learning_rate": 6.473420990282809e-10, "loss": 14.6506, "step": 2604 }, { "epoch": 0.14935642003268068, "grad_norm": 0.0, "learning_rate": 5.515831941993455e-10, "loss": 15.1409, "step": 2605 }, { "epoch": 0.1494137545509274, "grad_norm": 0.0, "learning_rate": 4.634844618101042e-10, "loss": 15.2039, "step": 2606 }, { "epoch": 0.14947108906917408, "grad_norm": 0.0, "learning_rate": 3.83046036844803e-10, "loss": 14.7781, "step": 2607 }, { "epoch": 0.1495284235874208, "grad_norm": 0.0, "learning_rate": 3.1026804255207544e-10, "loss": 14.9932, "step": 2608 }, { "epoch": 0.14958575810566752, "grad_norm": 0.0, "learning_rate": 2.4515059044216693e-10, "loss": 14.8711, "step": 2609 }, { "epoch": 0.14964309262391423, "grad_norm": 0.0, "learning_rate": 1.8769378028915542e-10, "loss": 15.0264, "step": 2610 }, { "epoch": 0.14970042714216095, "grad_norm": 0.0, "learning_rate": 1.378977001276205e-10, "loss": 15.1297, "step": 2611 }, { "epoch": 0.14975776166040766, "grad_norm": 0.0, "learning_rate": 9.57624262554191e-11, "loss": 14.8943, "step": 2612 }, { "epoch": 0.14981509617865435, "grad_norm": 0.0, "learning_rate": 6.128802323257521e-11, "loss": 14.7685, "step": 2613 }, { "epoch": 0.14987243069690107, "grad_norm": 0.0, "learning_rate": 3.447454388127991e-11, "loss": 14.9251, "step": 2614 }, { "epoch": 0.14992976521514778, "grad_norm": 0.0, "learning_rate": 1.5322029284781105e-11, "loss": 14.9398, "step": 2615 }, { "epoch": 0.1499870997333945, "grad_norm": 0.0, "learning_rate": 3.8305087884937805e-12, "loss": 14.8883, "step": 2616 }, { "epoch": 0.1500444342516412, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 14.7136, "step": 2617 }, { "epoch": 0.1500444342516412, "step": 2617, "total_flos": 2.237397245844521e+18, "train_loss": 15.299445352346604, "train_runtime": 55657.3277, "train_samples_per_second": 1.504, "train_steps_per_second": 0.047 } ], "logging_steps": 1.0, "max_steps": 2617, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 2.237397245844521e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }