{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.11466903649342086, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.7334518246710435e-05, "grad_norm": 0.0, "learning_rate": 1.2658227848101266e-07, "loss": 15.7887, "step": 1 }, { "epoch": 0.00011466903649342087, "grad_norm": 0.0, "learning_rate": 2.5316455696202533e-07, "loss": 15.8118, "step": 2 }, { "epoch": 0.0001720035547401313, "grad_norm": 0.0, "learning_rate": 3.79746835443038e-07, "loss": 15.5044, "step": 3 }, { "epoch": 0.00022933807298684174, "grad_norm": 0.0, "learning_rate": 5.063291139240507e-07, "loss": 15.726, "step": 4 }, { "epoch": 0.00028667259123355216, "grad_norm": 0.0, "learning_rate": 6.329113924050634e-07, "loss": 15.5511, "step": 5 }, { "epoch": 0.0003440071094802626, "grad_norm": 0.0, "learning_rate": 7.59493670886076e-07, "loss": 15.6542, "step": 6 }, { "epoch": 0.00040134162772697304, "grad_norm": 0.0, "learning_rate": 8.860759493670887e-07, "loss": 15.7995, "step": 7 }, { "epoch": 0.0004586761459736835, "grad_norm": 0.0, "learning_rate": 1.0126582278481013e-06, "loss": 16.2208, "step": 8 }, { "epoch": 0.0005160106642203939, "grad_norm": 0.0, "learning_rate": 1.139240506329114e-06, "loss": 15.7266, "step": 9 }, { "epoch": 0.0005733451824671043, "grad_norm": 0.0, "learning_rate": 1.2658227848101267e-06, "loss": 16.0773, "step": 10 }, { "epoch": 0.0006306797007138148, "grad_norm": 0.0, "learning_rate": 1.3924050632911392e-06, "loss": 16.0366, "step": 11 }, { "epoch": 0.0006880142189605252, "grad_norm": 0.0, "learning_rate": 1.518987341772152e-06, "loss": 15.5359, "step": 12 }, { "epoch": 0.0007453487372072356, "grad_norm": 0.0, "learning_rate": 1.6455696202531647e-06, "loss": 15.7016, "step": 13 }, { "epoch": 0.0008026832554539461, "grad_norm": 0.0, "learning_rate": 1.7721518987341774e-06, "loss": 16.0417, "step": 14 }, { "epoch": 0.0008600177737006565, "grad_norm": 0.0, "learning_rate": 1.8987341772151901e-06, "loss": 15.7408, "step": 15 }, { "epoch": 0.000917352291947367, "grad_norm": 0.0, "learning_rate": 2.0253164556962026e-06, "loss": 16.0203, "step": 16 }, { "epoch": 0.0009746868101940773, "grad_norm": 0.0, "learning_rate": 2.1518987341772153e-06, "loss": 15.6151, "step": 17 }, { "epoch": 0.0010320213284407877, "grad_norm": 0.0, "learning_rate": 2.278481012658228e-06, "loss": 15.7387, "step": 18 }, { "epoch": 0.0010893558466874983, "grad_norm": 0.0, "learning_rate": 2.4050632911392408e-06, "loss": 15.9719, "step": 19 }, { "epoch": 0.0011466903649342086, "grad_norm": 0.0, "learning_rate": 2.5316455696202535e-06, "loss": 15.6512, "step": 20 }, { "epoch": 0.0012040248831809192, "grad_norm": 0.0, "learning_rate": 2.6582278481012658e-06, "loss": 15.6905, "step": 21 }, { "epoch": 0.0012613594014276295, "grad_norm": 0.0, "learning_rate": 2.7848101265822785e-06, "loss": 15.8747, "step": 22 }, { "epoch": 0.0013186939196743398, "grad_norm": 0.0, "learning_rate": 2.9113924050632912e-06, "loss": 15.9172, "step": 23 }, { "epoch": 0.0013760284379210504, "grad_norm": 0.0, "learning_rate": 3.037974683544304e-06, "loss": 15.4097, "step": 24 }, { "epoch": 0.0014333629561677607, "grad_norm": 0.0, "learning_rate": 3.164556962025317e-06, "loss": 15.9326, "step": 25 }, { "epoch": 0.0014906974744144713, "grad_norm": 0.0, "learning_rate": 3.2911392405063294e-06, "loss": 15.7093, "step": 26 }, { "epoch": 0.0015480319926611816, "grad_norm": 0.0, "learning_rate": 3.417721518987342e-06, "loss": 16.1732, "step": 27 }, { "epoch": 0.0016053665109078922, "grad_norm": 0.0, "learning_rate": 3.544303797468355e-06, "loss": 15.9809, "step": 28 }, { "epoch": 0.0016627010291546025, "grad_norm": 0.0, "learning_rate": 3.6708860759493675e-06, "loss": 15.6426, "step": 29 }, { "epoch": 0.001720035547401313, "grad_norm": 0.0, "learning_rate": 3.7974683544303802e-06, "loss": 15.5717, "step": 30 }, { "epoch": 0.0017773700656480234, "grad_norm": 0.0, "learning_rate": 3.924050632911393e-06, "loss": 15.6596, "step": 31 }, { "epoch": 0.001834704583894734, "grad_norm": 0.0, "learning_rate": 4.050632911392405e-06, "loss": 15.4429, "step": 32 }, { "epoch": 0.0018920391021414443, "grad_norm": 0.0, "learning_rate": 4.177215189873418e-06, "loss": 15.9317, "step": 33 }, { "epoch": 0.0019493736203881546, "grad_norm": 0.0, "learning_rate": 4.303797468354431e-06, "loss": 15.6599, "step": 34 }, { "epoch": 0.002006708138634865, "grad_norm": 0.0, "learning_rate": 4.430379746835443e-06, "loss": 15.8037, "step": 35 }, { "epoch": 0.0020640426568815755, "grad_norm": 0.0, "learning_rate": 4.556962025316456e-06, "loss": 15.976, "step": 36 }, { "epoch": 0.002121377175128286, "grad_norm": 0.0, "learning_rate": 4.683544303797468e-06, "loss": 15.8437, "step": 37 }, { "epoch": 0.0021787116933749966, "grad_norm": 0.0, "learning_rate": 4.8101265822784815e-06, "loss": 15.8187, "step": 38 }, { "epoch": 0.002236046211621707, "grad_norm": 0.0, "learning_rate": 4.936708860759495e-06, "loss": 15.8865, "step": 39 }, { "epoch": 0.0022933807298684173, "grad_norm": 0.0, "learning_rate": 5.063291139240507e-06, "loss": 15.9997, "step": 40 }, { "epoch": 0.0023507152481151276, "grad_norm": 0.0, "learning_rate": 5.189873417721519e-06, "loss": 16.2616, "step": 41 }, { "epoch": 0.0024080497663618384, "grad_norm": 0.0, "learning_rate": 5.3164556962025316e-06, "loss": 15.7347, "step": 42 }, { "epoch": 0.0024653842846085487, "grad_norm": 0.0, "learning_rate": 5.443037974683545e-06, "loss": 15.7051, "step": 43 }, { "epoch": 0.002522718802855259, "grad_norm": 0.0, "learning_rate": 5.569620253164557e-06, "loss": 15.9378, "step": 44 }, { "epoch": 0.0025800533211019694, "grad_norm": 0.0, "learning_rate": 5.69620253164557e-06, "loss": 15.7706, "step": 45 }, { "epoch": 0.0026373878393486797, "grad_norm": 0.0, "learning_rate": 5.8227848101265824e-06, "loss": 15.4018, "step": 46 }, { "epoch": 0.0026947223575953905, "grad_norm": 0.0, "learning_rate": 5.949367088607595e-06, "loss": 15.4771, "step": 47 }, { "epoch": 0.002752056875842101, "grad_norm": 0.0, "learning_rate": 6.075949367088608e-06, "loss": 15.8046, "step": 48 }, { "epoch": 0.002809391394088811, "grad_norm": 0.0, "learning_rate": 6.20253164556962e-06, "loss": 16.0405, "step": 49 }, { "epoch": 0.0028667259123355215, "grad_norm": 0.0, "learning_rate": 6.329113924050634e-06, "loss": 16.0027, "step": 50 }, { "epoch": 0.0029240604305822322, "grad_norm": 0.0, "learning_rate": 6.4556962025316464e-06, "loss": 16.0728, "step": 51 }, { "epoch": 0.0029813949488289426, "grad_norm": 0.0, "learning_rate": 6.582278481012659e-06, "loss": 15.831, "step": 52 }, { "epoch": 0.003038729467075653, "grad_norm": 0.0, "learning_rate": 6.708860759493672e-06, "loss": 15.4141, "step": 53 }, { "epoch": 0.0030960639853223632, "grad_norm": 0.0, "learning_rate": 6.835443037974684e-06, "loss": 15.6401, "step": 54 }, { "epoch": 0.0031533985035690736, "grad_norm": 0.0, "learning_rate": 6.962025316455697e-06, "loss": 15.7223, "step": 55 }, { "epoch": 0.0032107330218157843, "grad_norm": 0.0, "learning_rate": 7.08860759493671e-06, "loss": 15.805, "step": 56 }, { "epoch": 0.0032680675400624947, "grad_norm": 0.0, "learning_rate": 7.215189873417722e-06, "loss": 15.6997, "step": 57 }, { "epoch": 0.003325402058309205, "grad_norm": 0.0, "learning_rate": 7.341772151898735e-06, "loss": 15.8021, "step": 58 }, { "epoch": 0.0033827365765559153, "grad_norm": 0.0, "learning_rate": 7.468354430379747e-06, "loss": 15.6945, "step": 59 }, { "epoch": 0.003440071094802626, "grad_norm": 0.0, "learning_rate": 7.5949367088607605e-06, "loss": 15.7754, "step": 60 }, { "epoch": 0.0034974056130493364, "grad_norm": 0.0, "learning_rate": 7.721518987341773e-06, "loss": 15.6585, "step": 61 }, { "epoch": 0.0035547401312960468, "grad_norm": 0.0, "learning_rate": 7.848101265822786e-06, "loss": 16.1158, "step": 62 }, { "epoch": 0.003612074649542757, "grad_norm": 0.0, "learning_rate": 7.974683544303799e-06, "loss": 15.8004, "step": 63 }, { "epoch": 0.003669409167789468, "grad_norm": 0.0, "learning_rate": 8.10126582278481e-06, "loss": 15.3772, "step": 64 }, { "epoch": 0.003726743686036178, "grad_norm": 0.0, "learning_rate": 8.227848101265824e-06, "loss": 15.7289, "step": 65 }, { "epoch": 0.0037840782042828885, "grad_norm": 0.0, "learning_rate": 8.354430379746837e-06, "loss": 15.7923, "step": 66 }, { "epoch": 0.003841412722529599, "grad_norm": 0.0, "learning_rate": 8.481012658227848e-06, "loss": 15.5226, "step": 67 }, { "epoch": 0.003898747240776309, "grad_norm": 0.0, "learning_rate": 8.607594936708861e-06, "loss": 15.7584, "step": 68 }, { "epoch": 0.00395608175902302, "grad_norm": 0.0, "learning_rate": 8.734177215189874e-06, "loss": 15.9545, "step": 69 }, { "epoch": 0.00401341627726973, "grad_norm": 0.0, "learning_rate": 8.860759493670886e-06, "loss": 15.9191, "step": 70 }, { "epoch": 0.004070750795516441, "grad_norm": 0.0, "learning_rate": 8.987341772151899e-06, "loss": 15.7233, "step": 71 }, { "epoch": 0.004128085313763151, "grad_norm": 0.0, "learning_rate": 9.113924050632912e-06, "loss": 15.5079, "step": 72 }, { "epoch": 0.004185419832009861, "grad_norm": 0.0, "learning_rate": 9.240506329113925e-06, "loss": 15.7607, "step": 73 }, { "epoch": 0.004242754350256572, "grad_norm": 0.0, "learning_rate": 9.367088607594937e-06, "loss": 16.0051, "step": 74 }, { "epoch": 0.004300088868503283, "grad_norm": 0.0, "learning_rate": 9.49367088607595e-06, "loss": 15.9513, "step": 75 }, { "epoch": 0.004357423386749993, "grad_norm": 0.0, "learning_rate": 9.620253164556963e-06, "loss": 15.9781, "step": 76 }, { "epoch": 0.0044147579049967035, "grad_norm": 0.0, "learning_rate": 9.746835443037975e-06, "loss": 16.0359, "step": 77 }, { "epoch": 0.004472092423243414, "grad_norm": 0.0, "learning_rate": 9.87341772151899e-06, "loss": 15.8052, "step": 78 }, { "epoch": 0.004529426941490124, "grad_norm": 0.0, "learning_rate": 1e-05, "loss": 15.4073, "step": 79 }, { "epoch": 0.0045867614597368345, "grad_norm": 0.0, "learning_rate": 9.999996169491213e-06, "loss": 15.8073, "step": 80 }, { "epoch": 0.004644095977983545, "grad_norm": 0.0, "learning_rate": 9.999984677970716e-06, "loss": 15.9295, "step": 81 }, { "epoch": 0.004701430496230255, "grad_norm": 0.0, "learning_rate": 9.99996552545612e-06, "loss": 15.5126, "step": 82 }, { "epoch": 0.0047587650144769655, "grad_norm": 0.0, "learning_rate": 9.999938711976769e-06, "loss": 15.5249, "step": 83 }, { "epoch": 0.004816099532723677, "grad_norm": 0.0, "learning_rate": 9.999904237573746e-06, "loss": 15.7576, "step": 84 }, { "epoch": 0.004873434050970387, "grad_norm": 0.0, "learning_rate": 9.999862102299874e-06, "loss": 16.1043, "step": 85 }, { "epoch": 0.004930768569217097, "grad_norm": 0.0, "learning_rate": 9.999812306219712e-06, "loss": 15.7214, "step": 86 }, { "epoch": 0.004988103087463808, "grad_norm": 0.0, "learning_rate": 9.999754849409559e-06, "loss": 16.0242, "step": 87 }, { "epoch": 0.005045437605710518, "grad_norm": 0.0, "learning_rate": 9.99968973195745e-06, "loss": 15.825, "step": 88 }, { "epoch": 0.005102772123957228, "grad_norm": 0.0, "learning_rate": 9.999616953963156e-06, "loss": 15.7653, "step": 89 }, { "epoch": 0.005160106642203939, "grad_norm": 0.0, "learning_rate": 9.99953651553819e-06, "loss": 15.7074, "step": 90 }, { "epoch": 0.005217441160450649, "grad_norm": 0.0, "learning_rate": 9.999448416805802e-06, "loss": 16.1031, "step": 91 }, { "epoch": 0.005274775678697359, "grad_norm": 0.0, "learning_rate": 9.999352657900973e-06, "loss": 15.7045, "step": 92 }, { "epoch": 0.005332110196944071, "grad_norm": 0.0, "learning_rate": 9.999249238970427e-06, "loss": 16.0837, "step": 93 }, { "epoch": 0.005389444715190781, "grad_norm": 0.0, "learning_rate": 9.999138160172624e-06, "loss": 15.6533, "step": 94 }, { "epoch": 0.005446779233437491, "grad_norm": 0.0, "learning_rate": 9.999019421677755e-06, "loss": 15.8828, "step": 95 }, { "epoch": 0.005504113751684202, "grad_norm": 0.0, "learning_rate": 9.998893023667758e-06, "loss": 15.9728, "step": 96 }, { "epoch": 0.005561448269930912, "grad_norm": 0.0, "learning_rate": 9.998758966336296e-06, "loss": 15.9939, "step": 97 }, { "epoch": 0.005618782788177622, "grad_norm": 0.0, "learning_rate": 9.998617249888773e-06, "loss": 15.896, "step": 98 }, { "epoch": 0.005676117306424333, "grad_norm": 0.0, "learning_rate": 9.998467874542328e-06, "loss": 15.8819, "step": 99 }, { "epoch": 0.005733451824671043, "grad_norm": 0.0, "learning_rate": 9.998310840525835e-06, "loss": 15.7407, "step": 100 }, { "epoch": 0.005790786342917753, "grad_norm": 0.0, "learning_rate": 9.9981461480799e-06, "loss": 15.5921, "step": 101 }, { "epoch": 0.0058481208611644644, "grad_norm": 0.0, "learning_rate": 9.997973797456867e-06, "loss": 16.1465, "step": 102 }, { "epoch": 0.005905455379411175, "grad_norm": 0.0, "learning_rate": 9.99779378892081e-06, "loss": 15.5944, "step": 103 }, { "epoch": 0.005962789897657885, "grad_norm": 0.0, "learning_rate": 9.997606122747543e-06, "loss": 15.6872, "step": 104 }, { "epoch": 0.0060201244159045954, "grad_norm": 0.0, "learning_rate": 9.997410799224604e-06, "loss": 15.5546, "step": 105 }, { "epoch": 0.006077458934151306, "grad_norm": 0.0, "learning_rate": 9.997207818651273e-06, "loss": 15.9227, "step": 106 }, { "epoch": 0.006134793452398016, "grad_norm": 0.0, "learning_rate": 9.996997181338554e-06, "loss": 15.7962, "step": 107 }, { "epoch": 0.0061921279706447264, "grad_norm": 0.0, "learning_rate": 9.996778887609189e-06, "loss": 15.4476, "step": 108 }, { "epoch": 0.006249462488891437, "grad_norm": 0.0, "learning_rate": 9.996552937797646e-06, "loss": 15.6365, "step": 109 }, { "epoch": 0.006306797007138147, "grad_norm": 0.0, "learning_rate": 9.996319332250127e-06, "loss": 15.935, "step": 110 }, { "epoch": 0.006364131525384858, "grad_norm": 0.0, "learning_rate": 9.996078071324562e-06, "loss": 15.8665, "step": 111 }, { "epoch": 0.006421466043631569, "grad_norm": 0.0, "learning_rate": 9.995829155390613e-06, "loss": 15.5091, "step": 112 }, { "epoch": 0.006478800561878279, "grad_norm": 0.0, "learning_rate": 9.99557258482967e-06, "loss": 15.586, "step": 113 }, { "epoch": 0.006536135080124989, "grad_norm": 0.0, "learning_rate": 9.995308360034852e-06, "loss": 15.6547, "step": 114 }, { "epoch": 0.0065934695983717, "grad_norm": 0.0, "learning_rate": 9.995036481411005e-06, "loss": 15.3795, "step": 115 }, { "epoch": 0.00665080411661841, "grad_norm": 0.0, "learning_rate": 9.9947569493747e-06, "loss": 15.7044, "step": 116 }, { "epoch": 0.00670813863486512, "grad_norm": 0.0, "learning_rate": 9.99446976435424e-06, "loss": 15.8559, "step": 117 }, { "epoch": 0.006765473153111831, "grad_norm": 0.0, "learning_rate": 9.994174926789648e-06, "loss": 15.6059, "step": 118 }, { "epoch": 0.006822807671358541, "grad_norm": 0.0, "learning_rate": 9.993872437132678e-06, "loss": 16.1036, "step": 119 }, { "epoch": 0.006880142189605252, "grad_norm": 0.0, "learning_rate": 9.993562295846806e-06, "loss": 15.5074, "step": 120 }, { "epoch": 0.0069374767078519625, "grad_norm": 0.0, "learning_rate": 9.993244503407227e-06, "loss": 15.5059, "step": 121 }, { "epoch": 0.006994811226098673, "grad_norm": 0.0, "learning_rate": 9.99291906030087e-06, "loss": 15.5546, "step": 122 }, { "epoch": 0.007052145744345383, "grad_norm": 0.0, "learning_rate": 9.992585967026374e-06, "loss": 15.8098, "step": 123 }, { "epoch": 0.0071094802625920935, "grad_norm": 0.0, "learning_rate": 9.99224522409411e-06, "loss": 15.9037, "step": 124 }, { "epoch": 0.007166814780838804, "grad_norm": 0.0, "learning_rate": 9.991896832026162e-06, "loss": 16.0274, "step": 125 }, { "epoch": 0.007224149299085514, "grad_norm": 0.0, "learning_rate": 9.991540791356342e-06, "loss": 15.5932, "step": 126 }, { "epoch": 0.0072814838173322245, "grad_norm": 0.0, "learning_rate": 9.991177102630173e-06, "loss": 15.7162, "step": 127 }, { "epoch": 0.007338818335578936, "grad_norm": 0.0, "learning_rate": 9.990805766404902e-06, "loss": 15.2172, "step": 128 }, { "epoch": 0.007396152853825646, "grad_norm": 0.0, "learning_rate": 9.990426783249492e-06, "loss": 15.444, "step": 129 }, { "epoch": 0.007453487372072356, "grad_norm": 0.0, "learning_rate": 9.99004015374462e-06, "loss": 15.4409, "step": 130 }, { "epoch": 0.007510821890319067, "grad_norm": 0.0, "learning_rate": 9.989645878482684e-06, "loss": 15.941, "step": 131 }, { "epoch": 0.007568156408565777, "grad_norm": 0.0, "learning_rate": 9.989243958067791e-06, "loss": 16.0396, "step": 132 }, { "epoch": 0.007625490926812487, "grad_norm": 0.0, "learning_rate": 9.988834393115768e-06, "loss": 15.4225, "step": 133 }, { "epoch": 0.007682825445059198, "grad_norm": 0.0, "learning_rate": 9.988417184254148e-06, "loss": 15.82, "step": 134 }, { "epoch": 0.007740159963305908, "grad_norm": 0.0, "learning_rate": 9.987992332122182e-06, "loss": 15.7309, "step": 135 }, { "epoch": 0.007797494481552618, "grad_norm": 0.0, "learning_rate": 9.987559837370832e-06, "loss": 15.7048, "step": 136 }, { "epoch": 0.00785482899979933, "grad_norm": 0.0, "learning_rate": 9.987119700662766e-06, "loss": 15.3024, "step": 137 }, { "epoch": 0.00791216351804604, "grad_norm": 0.0, "learning_rate": 9.986671922672362e-06, "loss": 15.7417, "step": 138 }, { "epoch": 0.00796949803629275, "grad_norm": 0.0, "learning_rate": 9.986216504085709e-06, "loss": 15.8964, "step": 139 }, { "epoch": 0.00802683255453946, "grad_norm": 0.0, "learning_rate": 9.9857534456006e-06, "loss": 15.7391, "step": 140 }, { "epoch": 0.008084167072786171, "grad_norm": 0.0, "learning_rate": 9.985282747926535e-06, "loss": 15.558, "step": 141 }, { "epoch": 0.008141501591032881, "grad_norm": 0.0, "learning_rate": 9.984804411784717e-06, "loss": 15.2734, "step": 142 }, { "epoch": 0.008198836109279592, "grad_norm": 0.0, "learning_rate": 9.984318437908056e-06, "loss": 15.6669, "step": 143 }, { "epoch": 0.008256170627526302, "grad_norm": 0.0, "learning_rate": 9.983824827041164e-06, "loss": 15.8501, "step": 144 }, { "epoch": 0.008313505145773012, "grad_norm": 0.0, "learning_rate": 9.983323579940351e-06, "loss": 15.7229, "step": 145 }, { "epoch": 0.008370839664019723, "grad_norm": 0.0, "learning_rate": 9.98281469737363e-06, "loss": 15.7334, "step": 146 }, { "epoch": 0.008428174182266433, "grad_norm": 0.0, "learning_rate": 9.982298180120715e-06, "loss": 15.3766, "step": 147 }, { "epoch": 0.008485508700513143, "grad_norm": 0.0, "learning_rate": 9.981774028973013e-06, "loss": 15.6116, "step": 148 }, { "epoch": 0.008542843218759854, "grad_norm": 0.0, "learning_rate": 9.981242244733631e-06, "loss": 15.4852, "step": 149 }, { "epoch": 0.008600177737006566, "grad_norm": 0.0, "learning_rate": 9.98070282821737e-06, "loss": 15.7986, "step": 150 }, { "epoch": 0.008657512255253276, "grad_norm": 0.0, "learning_rate": 9.980155780250728e-06, "loss": 15.6175, "step": 151 }, { "epoch": 0.008714846773499986, "grad_norm": 0.0, "learning_rate": 9.97960110167189e-06, "loss": 15.7265, "step": 152 }, { "epoch": 0.008772181291746697, "grad_norm": 0.0, "learning_rate": 9.979038793330743e-06, "loss": 15.3184, "step": 153 }, { "epoch": 0.008829515809993407, "grad_norm": 0.0, "learning_rate": 9.97846885608885e-06, "loss": 15.6656, "step": 154 }, { "epoch": 0.008886850328240117, "grad_norm": 0.0, "learning_rate": 9.977891290819474e-06, "loss": 15.5521, "step": 155 }, { "epoch": 0.008944184846486828, "grad_norm": 0.0, "learning_rate": 9.977306098407566e-06, "loss": 15.6549, "step": 156 }, { "epoch": 0.009001519364733538, "grad_norm": 0.0, "learning_rate": 9.976713279749754e-06, "loss": 15.719, "step": 157 }, { "epoch": 0.009058853882980248, "grad_norm": 0.0, "learning_rate": 9.976112835754362e-06, "loss": 15.5373, "step": 158 }, { "epoch": 0.009116188401226959, "grad_norm": 0.0, "learning_rate": 9.975504767341388e-06, "loss": 15.9543, "step": 159 }, { "epoch": 0.009173522919473669, "grad_norm": 0.0, "learning_rate": 9.97488907544252e-06, "loss": 15.6243, "step": 160 }, { "epoch": 0.00923085743772038, "grad_norm": 0.0, "learning_rate": 9.974265761001123e-06, "loss": 15.4797, "step": 161 }, { "epoch": 0.00928819195596709, "grad_norm": 0.0, "learning_rate": 9.97363482497224e-06, "loss": 15.9338, "step": 162 }, { "epoch": 0.0093455264742138, "grad_norm": 0.0, "learning_rate": 9.972996268322594e-06, "loss": 15.7102, "step": 163 }, { "epoch": 0.00940286099246051, "grad_norm": 0.0, "learning_rate": 9.972350092030583e-06, "loss": 15.4883, "step": 164 }, { "epoch": 0.00946019551070722, "grad_norm": 0.0, "learning_rate": 9.971696297086282e-06, "loss": 15.7564, "step": 165 }, { "epoch": 0.009517530028953931, "grad_norm": 0.0, "learning_rate": 9.971034884491436e-06, "loss": 15.8265, "step": 166 }, { "epoch": 0.009574864547200641, "grad_norm": 0.0, "learning_rate": 9.970365855259465e-06, "loss": 15.7952, "step": 167 }, { "epoch": 0.009632199065447353, "grad_norm": 0.0, "learning_rate": 9.96968921041546e-06, "loss": 15.3301, "step": 168 }, { "epoch": 0.009689533583694064, "grad_norm": 0.0, "learning_rate": 9.969004950996175e-06, "loss": 15.6668, "step": 169 }, { "epoch": 0.009746868101940774, "grad_norm": 0.0, "learning_rate": 9.968313078050035e-06, "loss": 15.7378, "step": 170 }, { "epoch": 0.009804202620187484, "grad_norm": 0.0, "learning_rate": 9.967613592637133e-06, "loss": 15.3782, "step": 171 }, { "epoch": 0.009861537138434195, "grad_norm": 0.0, "learning_rate": 9.96690649582922e-06, "loss": 15.5379, "step": 172 }, { "epoch": 0.009918871656680905, "grad_norm": 0.0, "learning_rate": 9.966191788709716e-06, "loss": 15.4814, "step": 173 }, { "epoch": 0.009976206174927615, "grad_norm": 0.0, "learning_rate": 9.965469472373693e-06, "loss": 15.6, "step": 174 }, { "epoch": 0.010033540693174326, "grad_norm": 0.0, "learning_rate": 9.964739547927892e-06, "loss": 15.8427, "step": 175 }, { "epoch": 0.010090875211421036, "grad_norm": 0.0, "learning_rate": 9.964002016490698e-06, "loss": 15.403, "step": 176 }, { "epoch": 0.010148209729667746, "grad_norm": 0.0, "learning_rate": 9.963256879192167e-06, "loss": 15.7499, "step": 177 }, { "epoch": 0.010205544247914457, "grad_norm": 0.0, "learning_rate": 9.962504137173997e-06, "loss": 15.9071, "step": 178 }, { "epoch": 0.010262878766161167, "grad_norm": 0.0, "learning_rate": 9.961743791589544e-06, "loss": 15.852, "step": 179 }, { "epoch": 0.010320213284407877, "grad_norm": 0.0, "learning_rate": 9.96097584360381e-06, "loss": 15.6302, "step": 180 }, { "epoch": 0.010377547802654588, "grad_norm": 0.0, "learning_rate": 9.96020029439345e-06, "loss": 15.6271, "step": 181 }, { "epoch": 0.010434882320901298, "grad_norm": 0.0, "learning_rate": 9.959417145146761e-06, "loss": 15.8202, "step": 182 }, { "epoch": 0.010492216839148008, "grad_norm": 0.0, "learning_rate": 9.958626397063688e-06, "loss": 15.4802, "step": 183 }, { "epoch": 0.010549551357394719, "grad_norm": 0.0, "learning_rate": 9.957828051355817e-06, "loss": 15.7718, "step": 184 }, { "epoch": 0.010606885875641429, "grad_norm": 0.0, "learning_rate": 9.95702210924638e-06, "loss": 15.905, "step": 185 }, { "epoch": 0.010664220393888141, "grad_norm": 0.0, "learning_rate": 9.956208571970238e-06, "loss": 15.4488, "step": 186 }, { "epoch": 0.010721554912134851, "grad_norm": 0.0, "learning_rate": 9.955387440773902e-06, "loss": 15.6706, "step": 187 }, { "epoch": 0.010778889430381562, "grad_norm": 0.0, "learning_rate": 9.954558716915508e-06, "loss": 15.7807, "step": 188 }, { "epoch": 0.010836223948628272, "grad_norm": 0.0, "learning_rate": 9.953722401664829e-06, "loss": 15.9079, "step": 189 }, { "epoch": 0.010893558466874982, "grad_norm": 0.0, "learning_rate": 9.952878496303274e-06, "loss": 15.8757, "step": 190 }, { "epoch": 0.010950892985121693, "grad_norm": 0.0, "learning_rate": 9.952027002123877e-06, "loss": 15.8773, "step": 191 }, { "epoch": 0.011008227503368403, "grad_norm": 0.0, "learning_rate": 9.951167920431297e-06, "loss": 15.3914, "step": 192 }, { "epoch": 0.011065562021615113, "grad_norm": 0.0, "learning_rate": 9.950301252541824e-06, "loss": 15.5592, "step": 193 }, { "epoch": 0.011122896539861824, "grad_norm": 0.0, "learning_rate": 9.94942699978337e-06, "loss": 15.9349, "step": 194 }, { "epoch": 0.011180231058108534, "grad_norm": 0.0, "learning_rate": 9.94854516349547e-06, "loss": 15.3999, "step": 195 }, { "epoch": 0.011237565576355244, "grad_norm": 0.0, "learning_rate": 9.94765574502927e-06, "loss": 15.7352, "step": 196 }, { "epoch": 0.011294900094601955, "grad_norm": 0.0, "learning_rate": 9.946758745747549e-06, "loss": 16.1242, "step": 197 }, { "epoch": 0.011352234612848665, "grad_norm": 0.0, "learning_rate": 9.945854167024685e-06, "loss": 15.617, "step": 198 }, { "epoch": 0.011409569131095375, "grad_norm": 0.0, "learning_rate": 9.944942010246681e-06, "loss": 15.5772, "step": 199 }, { "epoch": 0.011466903649342086, "grad_norm": 0.0, "learning_rate": 9.944022276811147e-06, "loss": 15.8159, "step": 200 }, { "epoch": 0.011524238167588796, "grad_norm": 0.0, "learning_rate": 9.943094968127298e-06, "loss": 15.7496, "step": 201 }, { "epoch": 0.011581572685835506, "grad_norm": 0.0, "learning_rate": 9.942160085615963e-06, "loss": 15.694, "step": 202 }, { "epoch": 0.011638907204082219, "grad_norm": 0.0, "learning_rate": 9.941217630709571e-06, "loss": 15.2256, "step": 203 }, { "epoch": 0.011696241722328929, "grad_norm": 0.0, "learning_rate": 9.940267604852155e-06, "loss": 15.7162, "step": 204 }, { "epoch": 0.01175357624057564, "grad_norm": 0.0, "learning_rate": 9.939310009499348e-06, "loss": 16.0539, "step": 205 }, { "epoch": 0.01181091075882235, "grad_norm": 0.0, "learning_rate": 9.938344846118382e-06, "loss": 15.3012, "step": 206 }, { "epoch": 0.01186824527706906, "grad_norm": 0.0, "learning_rate": 9.937372116188081e-06, "loss": 15.5806, "step": 207 }, { "epoch": 0.01192557979531577, "grad_norm": 0.0, "learning_rate": 9.936391821198868e-06, "loss": 15.522, "step": 208 }, { "epoch": 0.01198291431356248, "grad_norm": 0.0, "learning_rate": 9.935403962652753e-06, "loss": 15.522, "step": 209 }, { "epoch": 0.012040248831809191, "grad_norm": 0.0, "learning_rate": 9.934408542063337e-06, "loss": 16.0803, "step": 210 }, { "epoch": 0.012097583350055901, "grad_norm": 0.0, "learning_rate": 9.933405560955805e-06, "loss": 15.6953, "step": 211 }, { "epoch": 0.012154917868302612, "grad_norm": 0.0, "learning_rate": 9.932395020866929e-06, "loss": 15.6063, "step": 212 }, { "epoch": 0.012212252386549322, "grad_norm": 0.0, "learning_rate": 9.931376923345067e-06, "loss": 15.7092, "step": 213 }, { "epoch": 0.012269586904796032, "grad_norm": 0.0, "learning_rate": 9.930351269950144e-06, "loss": 15.7337, "step": 214 }, { "epoch": 0.012326921423042743, "grad_norm": 0.0, "learning_rate": 9.929318062253673e-06, "loss": 15.5571, "step": 215 }, { "epoch": 0.012384255941289453, "grad_norm": 0.0, "learning_rate": 9.92827730183874e-06, "loss": 16.0815, "step": 216 }, { "epoch": 0.012441590459536163, "grad_norm": 0.0, "learning_rate": 9.9272289903e-06, "loss": 15.7825, "step": 217 }, { "epoch": 0.012498924977782874, "grad_norm": 0.0, "learning_rate": 9.92617312924368e-06, "loss": 15.7294, "step": 218 }, { "epoch": 0.012556259496029584, "grad_norm": 0.0, "learning_rate": 9.925109720287574e-06, "loss": 15.7288, "step": 219 }, { "epoch": 0.012613594014276294, "grad_norm": 0.0, "learning_rate": 9.924038765061042e-06, "loss": 15.5877, "step": 220 }, { "epoch": 0.012670928532523006, "grad_norm": 0.0, "learning_rate": 9.922960265205001e-06, "loss": 15.5727, "step": 221 }, { "epoch": 0.012728263050769717, "grad_norm": 0.0, "learning_rate": 9.921874222371939e-06, "loss": 15.5752, "step": 222 }, { "epoch": 0.012785597569016427, "grad_norm": 0.0, "learning_rate": 9.92078063822589e-06, "loss": 15.5827, "step": 223 }, { "epoch": 0.012842932087263137, "grad_norm": 0.0, "learning_rate": 9.919679514442449e-06, "loss": 15.451, "step": 224 }, { "epoch": 0.012900266605509848, "grad_norm": 0.0, "learning_rate": 9.918570852708762e-06, "loss": 15.7206, "step": 225 }, { "epoch": 0.012957601123756558, "grad_norm": 0.0, "learning_rate": 9.917454654723522e-06, "loss": 16.032, "step": 226 }, { "epoch": 0.013014935642003268, "grad_norm": 0.0, "learning_rate": 9.916330922196975e-06, "loss": 15.7988, "step": 227 }, { "epoch": 0.013072270160249979, "grad_norm": 0.0, "learning_rate": 9.915199656850906e-06, "loss": 15.6503, "step": 228 }, { "epoch": 0.013129604678496689, "grad_norm": 0.0, "learning_rate": 9.914060860418644e-06, "loss": 15.5872, "step": 229 }, { "epoch": 0.0131869391967434, "grad_norm": 0.0, "learning_rate": 9.912914534645056e-06, "loss": 15.89, "step": 230 }, { "epoch": 0.01324427371499011, "grad_norm": 0.0, "learning_rate": 9.91176068128655e-06, "loss": 15.5145, "step": 231 }, { "epoch": 0.01330160823323682, "grad_norm": 0.0, "learning_rate": 9.910599302111057e-06, "loss": 15.885, "step": 232 }, { "epoch": 0.01335894275148353, "grad_norm": 0.0, "learning_rate": 9.909430398898053e-06, "loss": 15.4595, "step": 233 }, { "epoch": 0.01341627726973024, "grad_norm": 0.0, "learning_rate": 9.908253973438533e-06, "loss": 15.66, "step": 234 }, { "epoch": 0.013473611787976951, "grad_norm": 0.0, "learning_rate": 9.907070027535022e-06, "loss": 15.8289, "step": 235 }, { "epoch": 0.013530946306223661, "grad_norm": 0.0, "learning_rate": 9.905878563001563e-06, "loss": 15.5208, "step": 236 }, { "epoch": 0.013588280824470372, "grad_norm": 0.0, "learning_rate": 9.904679581663725e-06, "loss": 15.7992, "step": 237 }, { "epoch": 0.013645615342717082, "grad_norm": 0.0, "learning_rate": 9.903473085358589e-06, "loss": 15.7632, "step": 238 }, { "epoch": 0.013702949860963794, "grad_norm": 0.0, "learning_rate": 9.902259075934755e-06, "loss": 15.5836, "step": 239 }, { "epoch": 0.013760284379210504, "grad_norm": 0.0, "learning_rate": 9.90103755525233e-06, "loss": 15.3104, "step": 240 }, { "epoch": 0.013817618897457215, "grad_norm": 0.0, "learning_rate": 9.899808525182935e-06, "loss": 15.7533, "step": 241 }, { "epoch": 0.013874953415703925, "grad_norm": 0.0, "learning_rate": 9.898571987609692e-06, "loss": 15.4214, "step": 242 }, { "epoch": 0.013932287933950635, "grad_norm": 0.0, "learning_rate": 9.897327944427231e-06, "loss": 15.9372, "step": 243 }, { "epoch": 0.013989622452197346, "grad_norm": 0.0, "learning_rate": 9.896076397541676e-06, "loss": 15.4472, "step": 244 }, { "epoch": 0.014046956970444056, "grad_norm": 0.0, "learning_rate": 9.894817348870654e-06, "loss": 15.308, "step": 245 }, { "epoch": 0.014104291488690766, "grad_norm": 0.0, "learning_rate": 9.893550800343283e-06, "loss": 15.892, "step": 246 }, { "epoch": 0.014161626006937477, "grad_norm": 0.0, "learning_rate": 9.892276753900173e-06, "loss": 15.5055, "step": 247 }, { "epoch": 0.014218960525184187, "grad_norm": 0.0, "learning_rate": 9.890995211493422e-06, "loss": 15.7145, "step": 248 }, { "epoch": 0.014276295043430897, "grad_norm": 0.0, "learning_rate": 9.889706175086615e-06, "loss": 15.7733, "step": 249 }, { "epoch": 0.014333629561677608, "grad_norm": 0.0, "learning_rate": 9.888409646654818e-06, "loss": 15.4903, "step": 250 }, { "epoch": 0.014390964079924318, "grad_norm": 0.0, "learning_rate": 9.887105628184575e-06, "loss": 15.7896, "step": 251 }, { "epoch": 0.014448298598171028, "grad_norm": 0.0, "learning_rate": 9.885794121673907e-06, "loss": 15.5118, "step": 252 }, { "epoch": 0.014505633116417739, "grad_norm": 0.0, "learning_rate": 9.884475129132312e-06, "loss": 15.8531, "step": 253 }, { "epoch": 0.014562967634664449, "grad_norm": 0.0, "learning_rate": 9.883148652580752e-06, "loss": 15.7605, "step": 254 }, { "epoch": 0.01462030215291116, "grad_norm": 0.0, "learning_rate": 9.881814694051662e-06, "loss": 15.462, "step": 255 }, { "epoch": 0.014677636671157871, "grad_norm": 0.0, "learning_rate": 9.880473255588937e-06, "loss": 15.7006, "step": 256 }, { "epoch": 0.014734971189404582, "grad_norm": 0.0, "learning_rate": 9.879124339247931e-06, "loss": 15.4329, "step": 257 }, { "epoch": 0.014792305707651292, "grad_norm": 0.0, "learning_rate": 9.877767947095462e-06, "loss": 15.7845, "step": 258 }, { "epoch": 0.014849640225898002, "grad_norm": 0.0, "learning_rate": 9.876404081209796e-06, "loss": 15.8761, "step": 259 }, { "epoch": 0.014906974744144713, "grad_norm": 0.0, "learning_rate": 9.875032743680656e-06, "loss": 15.4054, "step": 260 }, { "epoch": 0.014964309262391423, "grad_norm": 0.0, "learning_rate": 9.873653936609207e-06, "loss": 15.3106, "step": 261 }, { "epoch": 0.015021643780638133, "grad_norm": 0.0, "learning_rate": 9.872267662108064e-06, "loss": 15.5101, "step": 262 }, { "epoch": 0.015078978298884844, "grad_norm": 0.0, "learning_rate": 9.870873922301281e-06, "loss": 16.1312, "step": 263 }, { "epoch": 0.015136312817131554, "grad_norm": 0.0, "learning_rate": 9.869472719324351e-06, "loss": 16.0715, "step": 264 }, { "epoch": 0.015193647335378264, "grad_norm": 0.0, "learning_rate": 9.868064055324204e-06, "loss": 15.661, "step": 265 }, { "epoch": 0.015250981853624975, "grad_norm": 0.0, "learning_rate": 9.866647932459196e-06, "loss": 15.7833, "step": 266 }, { "epoch": 0.015308316371871685, "grad_norm": 0.0, "learning_rate": 9.86522435289912e-06, "loss": 15.845, "step": 267 }, { "epoch": 0.015365650890118395, "grad_norm": 0.0, "learning_rate": 9.863793318825186e-06, "loss": 15.9774, "step": 268 }, { "epoch": 0.015422985408365106, "grad_norm": 0.0, "learning_rate": 9.862354832430033e-06, "loss": 16.1399, "step": 269 }, { "epoch": 0.015480319926611816, "grad_norm": 0.0, "learning_rate": 9.86090889591771e-06, "loss": 15.7273, "step": 270 }, { "epoch": 0.015537654444858526, "grad_norm": 0.0, "learning_rate": 9.859455511503691e-06, "loss": 15.6735, "step": 271 }, { "epoch": 0.015594988963105237, "grad_norm": 0.0, "learning_rate": 9.857994681414853e-06, "loss": 15.7031, "step": 272 }, { "epoch": 0.015652323481351947, "grad_norm": 0.0, "learning_rate": 9.856526407889486e-06, "loss": 15.4029, "step": 273 }, { "epoch": 0.01570965799959866, "grad_norm": 0.0, "learning_rate": 9.855050693177286e-06, "loss": 15.4254, "step": 274 }, { "epoch": 0.015766992517845368, "grad_norm": 0.0, "learning_rate": 9.853567539539345e-06, "loss": 15.9419, "step": 275 }, { "epoch": 0.01582432703609208, "grad_norm": 0.0, "learning_rate": 9.85207694924816e-06, "loss": 15.5074, "step": 276 }, { "epoch": 0.01588166155433879, "grad_norm": 0.0, "learning_rate": 9.850578924587614e-06, "loss": 15.6744, "step": 277 }, { "epoch": 0.0159389960725855, "grad_norm": 0.0, "learning_rate": 9.849073467852988e-06, "loss": 15.56, "step": 278 }, { "epoch": 0.01599633059083221, "grad_norm": 0.0, "learning_rate": 9.84756058135095e-06, "loss": 15.5879, "step": 279 }, { "epoch": 0.01605366510907892, "grad_norm": 0.0, "learning_rate": 9.846040267399548e-06, "loss": 15.8704, "step": 280 }, { "epoch": 0.01611099962732563, "grad_norm": 0.0, "learning_rate": 9.844512528328212e-06, "loss": 15.7365, "step": 281 }, { "epoch": 0.016168334145572342, "grad_norm": 0.0, "learning_rate": 9.84297736647775e-06, "loss": 15.4265, "step": 282 }, { "epoch": 0.016225668663819054, "grad_norm": 0.0, "learning_rate": 9.841434784200341e-06, "loss": 15.3965, "step": 283 }, { "epoch": 0.016283003182065763, "grad_norm": 0.0, "learning_rate": 9.83988478385954e-06, "loss": 15.5008, "step": 284 }, { "epoch": 0.016340337700312475, "grad_norm": 0.0, "learning_rate": 9.838327367830257e-06, "loss": 15.6644, "step": 285 }, { "epoch": 0.016397672218559183, "grad_norm": 0.0, "learning_rate": 9.83676253849877e-06, "loss": 15.5689, "step": 286 }, { "epoch": 0.016455006736805895, "grad_norm": 0.0, "learning_rate": 9.835190298262721e-06, "loss": 15.2807, "step": 287 }, { "epoch": 0.016512341255052604, "grad_norm": 0.0, "learning_rate": 9.833610649531099e-06, "loss": 15.4072, "step": 288 }, { "epoch": 0.016569675773299316, "grad_norm": 0.0, "learning_rate": 9.832023594724248e-06, "loss": 15.7064, "step": 289 }, { "epoch": 0.016627010291546025, "grad_norm": 0.0, "learning_rate": 9.830429136273858e-06, "loss": 15.4726, "step": 290 }, { "epoch": 0.016684344809792737, "grad_norm": 0.0, "learning_rate": 9.828827276622965e-06, "loss": 15.932, "step": 291 }, { "epoch": 0.016741679328039445, "grad_norm": 0.0, "learning_rate": 9.827218018225944e-06, "loss": 15.5607, "step": 292 }, { "epoch": 0.016799013846286157, "grad_norm": 0.0, "learning_rate": 9.825601363548507e-06, "loss": 15.568, "step": 293 }, { "epoch": 0.016856348364532866, "grad_norm": 0.0, "learning_rate": 9.823977315067696e-06, "loss": 15.6851, "step": 294 }, { "epoch": 0.016913682882779578, "grad_norm": 0.0, "learning_rate": 9.822345875271884e-06, "loss": 15.5914, "step": 295 }, { "epoch": 0.016971017401026287, "grad_norm": 0.0, "learning_rate": 9.82070704666077e-06, "loss": 15.9541, "step": 296 }, { "epoch": 0.017028351919273, "grad_norm": 0.0, "learning_rate": 9.819060831745373e-06, "loss": 15.3636, "step": 297 }, { "epoch": 0.017085686437519707, "grad_norm": 0.0, "learning_rate": 9.817407233048028e-06, "loss": 15.6332, "step": 298 }, { "epoch": 0.01714302095576642, "grad_norm": 0.0, "learning_rate": 9.815746253102385e-06, "loss": 15.562, "step": 299 }, { "epoch": 0.01720035547401313, "grad_norm": 0.0, "learning_rate": 9.814077894453406e-06, "loss": 15.6414, "step": 300 }, { "epoch": 0.01725768999225984, "grad_norm": 0.0, "learning_rate": 9.812402159657352e-06, "loss": 15.2265, "step": 301 }, { "epoch": 0.017315024510506552, "grad_norm": 0.0, "learning_rate": 9.810719051281791e-06, "loss": 15.8043, "step": 302 }, { "epoch": 0.01737235902875326, "grad_norm": 0.0, "learning_rate": 9.80902857190559e-06, "loss": 15.646, "step": 303 }, { "epoch": 0.017429693546999973, "grad_norm": 0.0, "learning_rate": 9.807330724118906e-06, "loss": 15.5725, "step": 304 }, { "epoch": 0.01748702806524668, "grad_norm": 0.0, "learning_rate": 9.805625510523184e-06, "loss": 15.6773, "step": 305 }, { "epoch": 0.017544362583493393, "grad_norm": 0.0, "learning_rate": 9.803912933731163e-06, "loss": 15.5197, "step": 306 }, { "epoch": 0.017601697101740102, "grad_norm": 0.0, "learning_rate": 9.802192996366859e-06, "loss": 15.7341, "step": 307 }, { "epoch": 0.017659031619986814, "grad_norm": 0.0, "learning_rate": 9.800465701065562e-06, "loss": 15.765, "step": 308 }, { "epoch": 0.017716366138233523, "grad_norm": 0.0, "learning_rate": 9.798731050473843e-06, "loss": 15.3062, "step": 309 }, { "epoch": 0.017773700656480235, "grad_norm": 0.0, "learning_rate": 9.796989047249539e-06, "loss": 15.6744, "step": 310 }, { "epoch": 0.017831035174726943, "grad_norm": 0.0, "learning_rate": 9.795239694061754e-06, "loss": 15.4008, "step": 311 }, { "epoch": 0.017888369692973655, "grad_norm": 0.0, "learning_rate": 9.793482993590853e-06, "loss": 15.5721, "step": 312 }, { "epoch": 0.017945704211220364, "grad_norm": 0.0, "learning_rate": 9.791718948528457e-06, "loss": 15.8716, "step": 313 }, { "epoch": 0.018003038729467076, "grad_norm": 0.0, "learning_rate": 9.789947561577445e-06, "loss": 15.5011, "step": 314 }, { "epoch": 0.018060373247713785, "grad_norm": 0.0, "learning_rate": 9.78816883545194e-06, "loss": 15.3945, "step": 315 }, { "epoch": 0.018117707765960497, "grad_norm": 0.0, "learning_rate": 9.786382772877312e-06, "loss": 15.6252, "step": 316 }, { "epoch": 0.018175042284207205, "grad_norm": 0.0, "learning_rate": 9.784589376590175e-06, "loss": 15.1961, "step": 317 }, { "epoch": 0.018232376802453917, "grad_norm": 0.0, "learning_rate": 9.782788649338376e-06, "loss": 15.7459, "step": 318 }, { "epoch": 0.01828971132070063, "grad_norm": 0.0, "learning_rate": 9.780980593880993e-06, "loss": 15.658, "step": 319 }, { "epoch": 0.018347045838947338, "grad_norm": 0.0, "learning_rate": 9.779165212988339e-06, "loss": 15.5801, "step": 320 }, { "epoch": 0.01840438035719405, "grad_norm": 0.0, "learning_rate": 9.777342509441946e-06, "loss": 15.3656, "step": 321 }, { "epoch": 0.01846171487544076, "grad_norm": 0.0, "learning_rate": 9.775512486034564e-06, "loss": 15.6884, "step": 322 }, { "epoch": 0.01851904939368747, "grad_norm": 0.0, "learning_rate": 9.773675145570163e-06, "loss": 15.7698, "step": 323 }, { "epoch": 0.01857638391193418, "grad_norm": 0.0, "learning_rate": 9.771830490863923e-06, "loss": 15.5413, "step": 324 }, { "epoch": 0.01863371843018089, "grad_norm": 0.0, "learning_rate": 9.76997852474223e-06, "loss": 15.227, "step": 325 }, { "epoch": 0.0186910529484276, "grad_norm": 0.0, "learning_rate": 9.768119250042673e-06, "loss": 16.0092, "step": 326 }, { "epoch": 0.018748387466674312, "grad_norm": 0.0, "learning_rate": 9.76625266961404e-06, "loss": 15.5157, "step": 327 }, { "epoch": 0.01880572198492102, "grad_norm": 0.0, "learning_rate": 9.76437878631631e-06, "loss": 15.4693, "step": 328 }, { "epoch": 0.018863056503167733, "grad_norm": 0.0, "learning_rate": 9.762497603020658e-06, "loss": 15.336, "step": 329 }, { "epoch": 0.01892039102141444, "grad_norm": 0.0, "learning_rate": 9.760609122609434e-06, "loss": 15.4514, "step": 330 }, { "epoch": 0.018977725539661153, "grad_norm": 0.0, "learning_rate": 9.758713347976179e-06, "loss": 15.8285, "step": 331 }, { "epoch": 0.019035060057907862, "grad_norm": 0.0, "learning_rate": 9.756810282025602e-06, "loss": 16.0446, "step": 332 }, { "epoch": 0.019092394576154574, "grad_norm": 0.0, "learning_rate": 9.754899927673588e-06, "loss": 15.9323, "step": 333 }, { "epoch": 0.019149729094401283, "grad_norm": 0.0, "learning_rate": 9.752982287847193e-06, "loss": 15.4573, "step": 334 }, { "epoch": 0.019207063612647995, "grad_norm": 0.0, "learning_rate": 9.751057365484625e-06, "loss": 15.6772, "step": 335 }, { "epoch": 0.019264398130894707, "grad_norm": 0.0, "learning_rate": 9.74912516353526e-06, "loss": 15.7258, "step": 336 }, { "epoch": 0.019321732649141415, "grad_norm": 0.0, "learning_rate": 9.747185684959626e-06, "loss": 15.716, "step": 337 }, { "epoch": 0.019379067167388127, "grad_norm": 0.0, "learning_rate": 9.745238932729397e-06, "loss": 15.6332, "step": 338 }, { "epoch": 0.019436401685634836, "grad_norm": 0.0, "learning_rate": 9.743284909827393e-06, "loss": 15.0069, "step": 339 }, { "epoch": 0.019493736203881548, "grad_norm": 0.0, "learning_rate": 9.741323619247575e-06, "loss": 15.6709, "step": 340 }, { "epoch": 0.019551070722128257, "grad_norm": 0.0, "learning_rate": 9.739355063995042e-06, "loss": 15.6234, "step": 341 }, { "epoch": 0.01960840524037497, "grad_norm": 0.0, "learning_rate": 9.73737924708602e-06, "loss": 15.7564, "step": 342 }, { "epoch": 0.019665739758621677, "grad_norm": 0.0, "learning_rate": 9.735396171547859e-06, "loss": 15.582, "step": 343 }, { "epoch": 0.01972307427686839, "grad_norm": 0.0, "learning_rate": 9.73340584041904e-06, "loss": 15.6952, "step": 344 }, { "epoch": 0.019780408795115098, "grad_norm": 0.0, "learning_rate": 9.73140825674915e-06, "loss": 15.4585, "step": 345 }, { "epoch": 0.01983774331336181, "grad_norm": 0.0, "learning_rate": 9.7294034235989e-06, "loss": 15.6929, "step": 346 }, { "epoch": 0.01989507783160852, "grad_norm": 0.0, "learning_rate": 9.727391344040095e-06, "loss": 15.7319, "step": 347 }, { "epoch": 0.01995241234985523, "grad_norm": 0.0, "learning_rate": 9.725372021155656e-06, "loss": 15.6427, "step": 348 }, { "epoch": 0.02000974686810194, "grad_norm": 0.0, "learning_rate": 9.723345458039595e-06, "loss": 15.3811, "step": 349 }, { "epoch": 0.02006708138634865, "grad_norm": 0.0, "learning_rate": 9.721311657797018e-06, "loss": 15.6416, "step": 350 }, { "epoch": 0.02012441590459536, "grad_norm": 0.0, "learning_rate": 9.719270623544122e-06, "loss": 15.5834, "step": 351 }, { "epoch": 0.020181750422842072, "grad_norm": 0.0, "learning_rate": 9.717222358408188e-06, "loss": 15.8382, "step": 352 }, { "epoch": 0.020239084941088784, "grad_norm": 0.0, "learning_rate": 9.71516686552757e-06, "loss": 15.8291, "step": 353 }, { "epoch": 0.020296419459335493, "grad_norm": 0.0, "learning_rate": 9.71310414805171e-06, "loss": 15.6789, "step": 354 }, { "epoch": 0.020353753977582205, "grad_norm": 0.0, "learning_rate": 9.711034209141102e-06, "loss": 15.5076, "step": 355 }, { "epoch": 0.020411088495828913, "grad_norm": 0.0, "learning_rate": 9.708957051967318e-06, "loss": 15.5615, "step": 356 }, { "epoch": 0.020468423014075626, "grad_norm": 0.0, "learning_rate": 9.706872679712986e-06, "loss": 15.6241, "step": 357 }, { "epoch": 0.020525757532322334, "grad_norm": 0.0, "learning_rate": 9.704781095571788e-06, "loss": 15.4126, "step": 358 }, { "epoch": 0.020583092050569046, "grad_norm": 0.0, "learning_rate": 9.702682302748456e-06, "loss": 15.6394, "step": 359 }, { "epoch": 0.020640426568815755, "grad_norm": 0.0, "learning_rate": 9.700576304458769e-06, "loss": 15.7437, "step": 360 }, { "epoch": 0.020697761087062467, "grad_norm": 0.0, "learning_rate": 9.698463103929542e-06, "loss": 15.425, "step": 361 }, { "epoch": 0.020755095605309175, "grad_norm": 0.0, "learning_rate": 9.696342704398632e-06, "loss": 15.5889, "step": 362 }, { "epoch": 0.020812430123555888, "grad_norm": 0.0, "learning_rate": 9.69421510911492e-06, "loss": 15.4247, "step": 363 }, { "epoch": 0.020869764641802596, "grad_norm": 0.0, "learning_rate": 9.692080321338317e-06, "loss": 15.572, "step": 364 }, { "epoch": 0.020927099160049308, "grad_norm": 0.0, "learning_rate": 9.689938344339751e-06, "loss": 15.6727, "step": 365 }, { "epoch": 0.020984433678296017, "grad_norm": 0.0, "learning_rate": 9.687789181401166e-06, "loss": 15.5547, "step": 366 }, { "epoch": 0.02104176819654273, "grad_norm": 0.0, "learning_rate": 9.685632835815519e-06, "loss": 15.5774, "step": 367 }, { "epoch": 0.021099102714789437, "grad_norm": 0.0, "learning_rate": 9.683469310886769e-06, "loss": 15.5133, "step": 368 }, { "epoch": 0.02115643723303615, "grad_norm": 0.0, "learning_rate": 9.681298609929875e-06, "loss": 15.7577, "step": 369 }, { "epoch": 0.021213771751282858, "grad_norm": 0.0, "learning_rate": 9.679120736270796e-06, "loss": 15.8414, "step": 370 }, { "epoch": 0.02127110626952957, "grad_norm": 0.0, "learning_rate": 9.676935693246475e-06, "loss": 15.5358, "step": 371 }, { "epoch": 0.021328440787776282, "grad_norm": 0.0, "learning_rate": 9.674743484204844e-06, "loss": 15.4978, "step": 372 }, { "epoch": 0.02138577530602299, "grad_norm": 0.0, "learning_rate": 9.672544112504813e-06, "loss": 15.9646, "step": 373 }, { "epoch": 0.021443109824269703, "grad_norm": 0.0, "learning_rate": 9.670337581516268e-06, "loss": 15.6926, "step": 374 }, { "epoch": 0.02150044434251641, "grad_norm": 0.0, "learning_rate": 9.668123894620062e-06, "loss": 15.8415, "step": 375 }, { "epoch": 0.021557778860763124, "grad_norm": 0.0, "learning_rate": 9.665903055208013e-06, "loss": 15.7397, "step": 376 }, { "epoch": 0.021615113379009832, "grad_norm": 0.0, "learning_rate": 9.663675066682903e-06, "loss": 15.8171, "step": 377 }, { "epoch": 0.021672447897256544, "grad_norm": 0.0, "learning_rate": 9.66143993245846e-06, "loss": 15.5251, "step": 378 }, { "epoch": 0.021729782415503253, "grad_norm": 0.0, "learning_rate": 9.659197655959364e-06, "loss": 15.9786, "step": 379 }, { "epoch": 0.021787116933749965, "grad_norm": 0.0, "learning_rate": 9.656948240621244e-06, "loss": 15.2645, "step": 380 }, { "epoch": 0.021844451451996674, "grad_norm": 0.0, "learning_rate": 9.654691689890656e-06, "loss": 15.4899, "step": 381 }, { "epoch": 0.021901785970243386, "grad_norm": 0.0, "learning_rate": 9.6524280072251e-06, "loss": 15.5523, "step": 382 }, { "epoch": 0.021959120488490094, "grad_norm": 0.0, "learning_rate": 9.650157196092995e-06, "loss": 16.0337, "step": 383 }, { "epoch": 0.022016455006736806, "grad_norm": 0.0, "learning_rate": 9.647879259973687e-06, "loss": 15.6746, "step": 384 }, { "epoch": 0.022073789524983515, "grad_norm": 0.0, "learning_rate": 9.645594202357438e-06, "loss": 15.5839, "step": 385 }, { "epoch": 0.022131124043230227, "grad_norm": 0.0, "learning_rate": 9.643302026745423e-06, "loss": 15.6965, "step": 386 }, { "epoch": 0.022188458561476936, "grad_norm": 0.0, "learning_rate": 9.641002736649718e-06, "loss": 15.6181, "step": 387 }, { "epoch": 0.022245793079723648, "grad_norm": 0.0, "learning_rate": 9.638696335593304e-06, "loss": 15.6196, "step": 388 }, { "epoch": 0.02230312759797036, "grad_norm": 0.0, "learning_rate": 9.636382827110059e-06, "loss": 15.7294, "step": 389 }, { "epoch": 0.02236046211621707, "grad_norm": 0.0, "learning_rate": 9.634062214744749e-06, "loss": 15.3191, "step": 390 }, { "epoch": 0.02241779663446378, "grad_norm": 0.0, "learning_rate": 9.63173450205302e-06, "loss": 15.4134, "step": 391 }, { "epoch": 0.02247513115271049, "grad_norm": 0.0, "learning_rate": 9.629399692601406e-06, "loss": 15.3963, "step": 392 }, { "epoch": 0.0225324656709572, "grad_norm": 0.0, "learning_rate": 9.62705778996731e-06, "loss": 15.6359, "step": 393 }, { "epoch": 0.02258980018920391, "grad_norm": 0.0, "learning_rate": 9.624708797739002e-06, "loss": 15.721, "step": 394 }, { "epoch": 0.02264713470745062, "grad_norm": 0.0, "learning_rate": 9.622352719515615e-06, "loss": 15.2731, "step": 395 }, { "epoch": 0.02270446922569733, "grad_norm": 0.0, "learning_rate": 9.619989558907144e-06, "loss": 15.5629, "step": 396 }, { "epoch": 0.022761803743944042, "grad_norm": 0.0, "learning_rate": 9.617619319534427e-06, "loss": 15.9466, "step": 397 }, { "epoch": 0.02281913826219075, "grad_norm": 0.0, "learning_rate": 9.615242005029159e-06, "loss": 15.4187, "step": 398 }, { "epoch": 0.022876472780437463, "grad_norm": 0.0, "learning_rate": 9.612857619033865e-06, "loss": 15.7401, "step": 399 }, { "epoch": 0.02293380729868417, "grad_norm": 0.0, "learning_rate": 9.610466165201912e-06, "loss": 15.6079, "step": 400 }, { "epoch": 0.022991141816930884, "grad_norm": 0.0, "learning_rate": 9.608067647197492e-06, "loss": 15.53, "step": 401 }, { "epoch": 0.023048476335177592, "grad_norm": 0.0, "learning_rate": 9.605662068695625e-06, "loss": 15.7122, "step": 402 }, { "epoch": 0.023105810853424304, "grad_norm": 0.0, "learning_rate": 9.603249433382145e-06, "loss": 15.3243, "step": 403 }, { "epoch": 0.023163145371671013, "grad_norm": 0.0, "learning_rate": 9.6008297449537e-06, "loss": 15.6436, "step": 404 }, { "epoch": 0.023220479889917725, "grad_norm": 0.0, "learning_rate": 9.598403007117748e-06, "loss": 15.6834, "step": 405 }, { "epoch": 0.023277814408164437, "grad_norm": 0.0, "learning_rate": 9.595969223592544e-06, "loss": 15.6098, "step": 406 }, { "epoch": 0.023335148926411146, "grad_norm": 0.0, "learning_rate": 9.593528398107137e-06, "loss": 15.7385, "step": 407 }, { "epoch": 0.023392483444657858, "grad_norm": 0.0, "learning_rate": 9.591080534401371e-06, "loss": 15.3407, "step": 408 }, { "epoch": 0.023449817962904566, "grad_norm": 0.0, "learning_rate": 9.588625636225871e-06, "loss": 15.4649, "step": 409 }, { "epoch": 0.02350715248115128, "grad_norm": 0.0, "learning_rate": 9.58616370734204e-06, "loss": 16.0059, "step": 410 }, { "epoch": 0.023564486999397987, "grad_norm": 0.0, "learning_rate": 9.583694751522054e-06, "loss": 15.6544, "step": 411 }, { "epoch": 0.0236218215176447, "grad_norm": 0.0, "learning_rate": 9.58121877254886e-06, "loss": 15.3054, "step": 412 }, { "epoch": 0.023679156035891408, "grad_norm": 0.0, "learning_rate": 9.578735774216155e-06, "loss": 15.3782, "step": 413 }, { "epoch": 0.02373649055413812, "grad_norm": 0.0, "learning_rate": 9.5762457603284e-06, "loss": 15.851, "step": 414 }, { "epoch": 0.02379382507238483, "grad_norm": 0.0, "learning_rate": 9.573748734700806e-06, "loss": 15.4994, "step": 415 }, { "epoch": 0.02385115959063154, "grad_norm": 0.0, "learning_rate": 9.57124470115932e-06, "loss": 15.3128, "step": 416 }, { "epoch": 0.02390849410887825, "grad_norm": 0.0, "learning_rate": 9.568733663540634e-06, "loss": 15.3998, "step": 417 }, { "epoch": 0.02396582862712496, "grad_norm": 0.0, "learning_rate": 9.566215625692168e-06, "loss": 15.5673, "step": 418 }, { "epoch": 0.02402316314537167, "grad_norm": 0.0, "learning_rate": 9.563690591472067e-06, "loss": 15.642, "step": 419 }, { "epoch": 0.024080497663618382, "grad_norm": 0.0, "learning_rate": 9.561158564749202e-06, "loss": 15.7555, "step": 420 }, { "epoch": 0.02413783218186509, "grad_norm": 0.0, "learning_rate": 9.558619549403148e-06, "loss": 15.5263, "step": 421 }, { "epoch": 0.024195166700111802, "grad_norm": 0.0, "learning_rate": 9.556073549324195e-06, "loss": 15.6044, "step": 422 }, { "epoch": 0.02425250121835851, "grad_norm": 0.0, "learning_rate": 9.553520568413335e-06, "loss": 15.8033, "step": 423 }, { "epoch": 0.024309835736605223, "grad_norm": 0.0, "learning_rate": 9.550960610582251e-06, "loss": 15.8008, "step": 424 }, { "epoch": 0.024367170254851935, "grad_norm": 0.0, "learning_rate": 9.548393679753321e-06, "loss": 15.6639, "step": 425 }, { "epoch": 0.024424504773098644, "grad_norm": 0.0, "learning_rate": 9.545819779859607e-06, "loss": 15.2476, "step": 426 }, { "epoch": 0.024481839291345356, "grad_norm": 0.0, "learning_rate": 9.543238914844844e-06, "loss": 15.331, "step": 427 }, { "epoch": 0.024539173809592064, "grad_norm": 0.0, "learning_rate": 9.540651088663446e-06, "loss": 15.4785, "step": 428 }, { "epoch": 0.024596508327838777, "grad_norm": 0.0, "learning_rate": 9.538056305280487e-06, "loss": 15.6987, "step": 429 }, { "epoch": 0.024653842846085485, "grad_norm": 0.0, "learning_rate": 9.535454568671705e-06, "loss": 15.2466, "step": 430 }, { "epoch": 0.024711177364332197, "grad_norm": 0.0, "learning_rate": 9.532845882823489e-06, "loss": 15.8279, "step": 431 }, { "epoch": 0.024768511882578906, "grad_norm": 0.0, "learning_rate": 9.530230251732875e-06, "loss": 15.8509, "step": 432 }, { "epoch": 0.024825846400825618, "grad_norm": 0.0, "learning_rate": 9.527607679407545e-06, "loss": 15.6834, "step": 433 }, { "epoch": 0.024883180919072326, "grad_norm": 0.0, "learning_rate": 9.524978169865813e-06, "loss": 15.2951, "step": 434 }, { "epoch": 0.02494051543731904, "grad_norm": 0.0, "learning_rate": 9.522341727136622e-06, "loss": 15.8398, "step": 435 }, { "epoch": 0.024997849955565747, "grad_norm": 0.0, "learning_rate": 9.519698355259537e-06, "loss": 15.735, "step": 436 }, { "epoch": 0.02505518447381246, "grad_norm": 0.0, "learning_rate": 9.517048058284746e-06, "loss": 15.2163, "step": 437 }, { "epoch": 0.025112518992059168, "grad_norm": 0.0, "learning_rate": 9.51439084027304e-06, "loss": 15.9837, "step": 438 }, { "epoch": 0.02516985351030588, "grad_norm": 0.0, "learning_rate": 9.51172670529582e-06, "loss": 15.5941, "step": 439 }, { "epoch": 0.02522718802855259, "grad_norm": 0.0, "learning_rate": 9.50905565743508e-06, "loss": 15.6597, "step": 440 }, { "epoch": 0.0252845225467993, "grad_norm": 0.0, "learning_rate": 9.506377700783412e-06, "loss": 15.6818, "step": 441 }, { "epoch": 0.025341857065046013, "grad_norm": 0.0, "learning_rate": 9.503692839443988e-06, "loss": 16.0401, "step": 442 }, { "epoch": 0.02539919158329272, "grad_norm": 0.0, "learning_rate": 9.501001077530563e-06, "loss": 15.2495, "step": 443 }, { "epoch": 0.025456526101539433, "grad_norm": 0.0, "learning_rate": 9.498302419167465e-06, "loss": 15.6474, "step": 444 }, { "epoch": 0.025513860619786142, "grad_norm": 0.0, "learning_rate": 9.495596868489588e-06, "loss": 15.633, "step": 445 }, { "epoch": 0.025571195138032854, "grad_norm": 0.0, "learning_rate": 9.492884429642383e-06, "loss": 15.1942, "step": 446 }, { "epoch": 0.025628529656279563, "grad_norm": 0.0, "learning_rate": 9.490165106781863e-06, "loss": 15.7698, "step": 447 }, { "epoch": 0.025685864174526275, "grad_norm": 0.0, "learning_rate": 9.487438904074581e-06, "loss": 15.7203, "step": 448 }, { "epoch": 0.025743198692772983, "grad_norm": 0.0, "learning_rate": 9.484705825697635e-06, "loss": 15.8956, "step": 449 }, { "epoch": 0.025800533211019695, "grad_norm": 0.0, "learning_rate": 9.481965875838657e-06, "loss": 15.7252, "step": 450 }, { "epoch": 0.025857867729266404, "grad_norm": 0.0, "learning_rate": 9.47921905869581e-06, "loss": 15.5917, "step": 451 }, { "epoch": 0.025915202247513116, "grad_norm": 0.0, "learning_rate": 9.476465378477773e-06, "loss": 15.7833, "step": 452 }, { "epoch": 0.025972536765759825, "grad_norm": 0.0, "learning_rate": 9.473704839403748e-06, "loss": 15.8857, "step": 453 }, { "epoch": 0.026029871284006537, "grad_norm": 0.0, "learning_rate": 9.47093744570344e-06, "loss": 15.9607, "step": 454 }, { "epoch": 0.026087205802253245, "grad_norm": 0.0, "learning_rate": 9.468163201617063e-06, "loss": 15.846, "step": 455 }, { "epoch": 0.026144540320499957, "grad_norm": 0.0, "learning_rate": 9.465382111395319e-06, "loss": 15.6212, "step": 456 }, { "epoch": 0.026201874838746666, "grad_norm": 0.0, "learning_rate": 9.462594179299408e-06, "loss": 15.5892, "step": 457 }, { "epoch": 0.026259209356993378, "grad_norm": 0.0, "learning_rate": 9.459799409601006e-06, "loss": 15.3363, "step": 458 }, { "epoch": 0.02631654387524009, "grad_norm": 0.0, "learning_rate": 9.456997806582272e-06, "loss": 15.6226, "step": 459 }, { "epoch": 0.0263738783934868, "grad_norm": 0.0, "learning_rate": 9.45418937453583e-06, "loss": 15.5168, "step": 460 }, { "epoch": 0.02643121291173351, "grad_norm": 0.0, "learning_rate": 9.45137411776477e-06, "loss": 15.5484, "step": 461 }, { "epoch": 0.02648854742998022, "grad_norm": 0.0, "learning_rate": 9.44855204058264e-06, "loss": 15.8039, "step": 462 }, { "epoch": 0.02654588194822693, "grad_norm": 0.0, "learning_rate": 9.445723147313434e-06, "loss": 15.3607, "step": 463 }, { "epoch": 0.02660321646647364, "grad_norm": 0.0, "learning_rate": 9.442887442291593e-06, "loss": 15.6776, "step": 464 }, { "epoch": 0.026660550984720352, "grad_norm": 0.0, "learning_rate": 9.440044929861995e-06, "loss": 15.5051, "step": 465 }, { "epoch": 0.02671788550296706, "grad_norm": 0.0, "learning_rate": 9.437195614379947e-06, "loss": 15.5395, "step": 466 }, { "epoch": 0.026775220021213773, "grad_norm": 0.0, "learning_rate": 9.43433950021118e-06, "loss": 15.0945, "step": 467 }, { "epoch": 0.02683255453946048, "grad_norm": 0.0, "learning_rate": 9.431476591731842e-06, "loss": 15.5566, "step": 468 }, { "epoch": 0.026889889057707193, "grad_norm": 0.0, "learning_rate": 9.428606893328493e-06, "loss": 15.6923, "step": 469 }, { "epoch": 0.026947223575953902, "grad_norm": 0.0, "learning_rate": 9.425730409398094e-06, "loss": 15.7286, "step": 470 }, { "epoch": 0.027004558094200614, "grad_norm": 0.0, "learning_rate": 9.422847144348002e-06, "loss": 15.5483, "step": 471 }, { "epoch": 0.027061892612447323, "grad_norm": 0.0, "learning_rate": 9.41995710259597e-06, "loss": 15.5248, "step": 472 }, { "epoch": 0.027119227130694035, "grad_norm": 0.0, "learning_rate": 9.417060288570126e-06, "loss": 15.9598, "step": 473 }, { "epoch": 0.027176561648940743, "grad_norm": 0.0, "learning_rate": 9.414156706708978e-06, "loss": 15.779, "step": 474 }, { "epoch": 0.027233896167187455, "grad_norm": 0.0, "learning_rate": 9.41124636146141e-06, "loss": 15.7317, "step": 475 }, { "epoch": 0.027291230685434164, "grad_norm": 0.0, "learning_rate": 9.408329257286658e-06, "loss": 15.4797, "step": 476 }, { "epoch": 0.027348565203680876, "grad_norm": 0.0, "learning_rate": 9.405405398654322e-06, "loss": 15.865, "step": 477 }, { "epoch": 0.027405899721927588, "grad_norm": 0.0, "learning_rate": 9.402474790044348e-06, "loss": 15.3192, "step": 478 }, { "epoch": 0.027463234240174297, "grad_norm": 0.0, "learning_rate": 9.399537435947023e-06, "loss": 15.4034, "step": 479 }, { "epoch": 0.02752056875842101, "grad_norm": 0.0, "learning_rate": 9.396593340862972e-06, "loss": 15.5196, "step": 480 }, { "epoch": 0.027577903276667717, "grad_norm": 0.0, "learning_rate": 9.39364250930315e-06, "loss": 16.0287, "step": 481 }, { "epoch": 0.02763523779491443, "grad_norm": 0.0, "learning_rate": 9.39068494578883e-06, "loss": 15.6026, "step": 482 }, { "epoch": 0.027692572313161138, "grad_norm": 0.0, "learning_rate": 9.3877206548516e-06, "loss": 15.3239, "step": 483 }, { "epoch": 0.02774990683140785, "grad_norm": 0.0, "learning_rate": 9.384749641033358e-06, "loss": 15.66, "step": 484 }, { "epoch": 0.02780724134965456, "grad_norm": 0.0, "learning_rate": 9.381771908886303e-06, "loss": 15.6269, "step": 485 }, { "epoch": 0.02786457586790127, "grad_norm": 0.0, "learning_rate": 9.378787462972925e-06, "loss": 15.63, "step": 486 }, { "epoch": 0.02792191038614798, "grad_norm": 0.0, "learning_rate": 9.375796307866003e-06, "loss": 15.5099, "step": 487 }, { "epoch": 0.02797924490439469, "grad_norm": 0.0, "learning_rate": 9.372798448148597e-06, "loss": 15.456, "step": 488 }, { "epoch": 0.0280365794226414, "grad_norm": 0.0, "learning_rate": 9.369793888414036e-06, "loss": 15.8082, "step": 489 }, { "epoch": 0.028093913940888112, "grad_norm": 0.0, "learning_rate": 9.366782633265917e-06, "loss": 15.6783, "step": 490 }, { "epoch": 0.02815124845913482, "grad_norm": 0.0, "learning_rate": 9.363764687318097e-06, "loss": 15.4574, "step": 491 }, { "epoch": 0.028208582977381533, "grad_norm": 0.0, "learning_rate": 9.360740055194682e-06, "loss": 15.6102, "step": 492 }, { "epoch": 0.02826591749562824, "grad_norm": 0.0, "learning_rate": 9.357708741530025e-06, "loss": 15.3994, "step": 493 }, { "epoch": 0.028323252013874953, "grad_norm": 0.0, "learning_rate": 9.354670750968716e-06, "loss": 15.3506, "step": 494 }, { "epoch": 0.028380586532121665, "grad_norm": 0.0, "learning_rate": 9.351626088165574e-06, "loss": 15.783, "step": 495 }, { "epoch": 0.028437921050368374, "grad_norm": 0.0, "learning_rate": 9.348574757785642e-06, "loss": 15.5606, "step": 496 }, { "epoch": 0.028495255568615086, "grad_norm": 0.0, "learning_rate": 9.345516764504179e-06, "loss": 15.3805, "step": 497 }, { "epoch": 0.028552590086861795, "grad_norm": 0.0, "learning_rate": 9.342452113006653e-06, "loss": 15.6996, "step": 498 }, { "epoch": 0.028609924605108507, "grad_norm": 0.0, "learning_rate": 9.339380807988734e-06, "loss": 15.3441, "step": 499 }, { "epoch": 0.028667259123355215, "grad_norm": 0.0, "learning_rate": 9.336302854156287e-06, "loss": 15.5148, "step": 500 }, { "epoch": 0.028724593641601927, "grad_norm": 0.0, "learning_rate": 9.333218256225362e-06, "loss": 15.529, "step": 501 }, { "epoch": 0.028781928159848636, "grad_norm": 0.0, "learning_rate": 9.330127018922195e-06, "loss": 15.8365, "step": 502 }, { "epoch": 0.028839262678095348, "grad_norm": 0.0, "learning_rate": 9.327029146983184e-06, "loss": 15.9286, "step": 503 }, { "epoch": 0.028896597196342057, "grad_norm": 0.0, "learning_rate": 9.323924645154906e-06, "loss": 15.4567, "step": 504 }, { "epoch": 0.02895393171458877, "grad_norm": 0.0, "learning_rate": 9.320813518194084e-06, "loss": 15.8201, "step": 505 }, { "epoch": 0.029011266232835477, "grad_norm": 0.0, "learning_rate": 9.317695770867601e-06, "loss": 15.7347, "step": 506 }, { "epoch": 0.02906860075108219, "grad_norm": 0.0, "learning_rate": 9.31457140795248e-06, "loss": 15.5777, "step": 507 }, { "epoch": 0.029125935269328898, "grad_norm": 0.0, "learning_rate": 9.311440434235879e-06, "loss": 15.6935, "step": 508 }, { "epoch": 0.02918326978757561, "grad_norm": 0.0, "learning_rate": 9.30830285451509e-06, "loss": 15.7309, "step": 509 }, { "epoch": 0.02924060430582232, "grad_norm": 0.0, "learning_rate": 9.30515867359752e-06, "loss": 15.6683, "step": 510 }, { "epoch": 0.02929793882406903, "grad_norm": 0.0, "learning_rate": 9.302007896300697e-06, "loss": 15.4964, "step": 511 }, { "epoch": 0.029355273342315743, "grad_norm": 0.0, "learning_rate": 9.298850527452253e-06, "loss": 15.5383, "step": 512 }, { "epoch": 0.02941260786056245, "grad_norm": 0.0, "learning_rate": 9.295686571889919e-06, "loss": 15.6289, "step": 513 }, { "epoch": 0.029469942378809164, "grad_norm": 0.0, "learning_rate": 9.292516034461517e-06, "loss": 15.329, "step": 514 }, { "epoch": 0.029527276897055872, "grad_norm": 0.0, "learning_rate": 9.289338920024958e-06, "loss": 15.5542, "step": 515 }, { "epoch": 0.029584611415302584, "grad_norm": 0.0, "learning_rate": 9.286155233448226e-06, "loss": 15.3286, "step": 516 }, { "epoch": 0.029641945933549293, "grad_norm": 0.0, "learning_rate": 9.28296497960938e-06, "loss": 15.4839, "step": 517 }, { "epoch": 0.029699280451796005, "grad_norm": 0.0, "learning_rate": 9.279768163396535e-06, "loss": 15.6228, "step": 518 }, { "epoch": 0.029756614970042713, "grad_norm": 0.0, "learning_rate": 9.276564789707865e-06, "loss": 15.5924, "step": 519 }, { "epoch": 0.029813949488289426, "grad_norm": 0.0, "learning_rate": 9.273354863451589e-06, "loss": 15.719, "step": 520 }, { "epoch": 0.029871284006536134, "grad_norm": 0.0, "learning_rate": 9.27013838954597e-06, "loss": 15.6843, "step": 521 }, { "epoch": 0.029928618524782846, "grad_norm": 0.0, "learning_rate": 9.266915372919301e-06, "loss": 15.5317, "step": 522 }, { "epoch": 0.029985953043029555, "grad_norm": 0.0, "learning_rate": 9.263685818509895e-06, "loss": 15.5594, "step": 523 }, { "epoch": 0.030043287561276267, "grad_norm": 0.0, "learning_rate": 9.260449731266092e-06, "loss": 15.8769, "step": 524 }, { "epoch": 0.030100622079522975, "grad_norm": 0.0, "learning_rate": 9.257207116146231e-06, "loss": 15.1793, "step": 525 }, { "epoch": 0.030157956597769688, "grad_norm": 0.0, "learning_rate": 9.253957978118664e-06, "loss": 15.6719, "step": 526 }, { "epoch": 0.030215291116016396, "grad_norm": 0.0, "learning_rate": 9.250702322161726e-06, "loss": 15.5782, "step": 527 }, { "epoch": 0.030272625634263108, "grad_norm": 0.0, "learning_rate": 9.24744015326375e-06, "loss": 15.6119, "step": 528 }, { "epoch": 0.030329960152509817, "grad_norm": 0.0, "learning_rate": 9.244171476423037e-06, "loss": 15.4868, "step": 529 }, { "epoch": 0.03038729467075653, "grad_norm": 0.0, "learning_rate": 9.24089629664787e-06, "loss": 15.7071, "step": 530 }, { "epoch": 0.03044462918900324, "grad_norm": 0.0, "learning_rate": 9.237614618956488e-06, "loss": 15.7531, "step": 531 }, { "epoch": 0.03050196370724995, "grad_norm": 0.0, "learning_rate": 9.234326448377089e-06, "loss": 15.436, "step": 532 }, { "epoch": 0.03055929822549666, "grad_norm": 0.0, "learning_rate": 9.231031789947822e-06, "loss": 15.4103, "step": 533 }, { "epoch": 0.03061663274374337, "grad_norm": 0.0, "learning_rate": 9.227730648716771e-06, "loss": 15.3176, "step": 534 }, { "epoch": 0.030673967261990082, "grad_norm": 0.0, "learning_rate": 9.22442302974196e-06, "loss": 15.5685, "step": 535 }, { "epoch": 0.03073130178023679, "grad_norm": 0.0, "learning_rate": 9.221108938091333e-06, "loss": 15.9073, "step": 536 }, { "epoch": 0.030788636298483503, "grad_norm": 0.0, "learning_rate": 9.217788378842749e-06, "loss": 15.6944, "step": 537 }, { "epoch": 0.03084597081673021, "grad_norm": 0.0, "learning_rate": 9.214461357083986e-06, "loss": 15.636, "step": 538 }, { "epoch": 0.030903305334976924, "grad_norm": 0.0, "learning_rate": 9.211127877912715e-06, "loss": 15.4674, "step": 539 }, { "epoch": 0.030960639853223632, "grad_norm": 0.0, "learning_rate": 9.207787946436509e-06, "loss": 16.0203, "step": 540 }, { "epoch": 0.031017974371470344, "grad_norm": 0.0, "learning_rate": 9.204441567772817e-06, "loss": 15.6157, "step": 541 }, { "epoch": 0.031075308889717053, "grad_norm": 0.0, "learning_rate": 9.201088747048974e-06, "loss": 15.929, "step": 542 }, { "epoch": 0.031132643407963765, "grad_norm": 0.0, "learning_rate": 9.197729489402185e-06, "loss": 15.4643, "step": 543 }, { "epoch": 0.031189977926210474, "grad_norm": 0.0, "learning_rate": 9.194363799979517e-06, "loss": 15.5438, "step": 544 }, { "epoch": 0.031247312444457186, "grad_norm": 0.0, "learning_rate": 9.19099168393789e-06, "loss": 15.5475, "step": 545 }, { "epoch": 0.031304646962703894, "grad_norm": 0.0, "learning_rate": 9.18761314644407e-06, "loss": 15.1468, "step": 546 }, { "epoch": 0.03136198148095061, "grad_norm": 0.0, "learning_rate": 9.184228192674667e-06, "loss": 15.3172, "step": 547 }, { "epoch": 0.03141931599919732, "grad_norm": 0.0, "learning_rate": 9.180836827816118e-06, "loss": 15.5644, "step": 548 }, { "epoch": 0.03147665051744403, "grad_norm": 0.0, "learning_rate": 9.177439057064684e-06, "loss": 15.5696, "step": 549 }, { "epoch": 0.031533985035690736, "grad_norm": 0.0, "learning_rate": 9.17403488562644e-06, "loss": 15.3968, "step": 550 }, { "epoch": 0.03159131955393745, "grad_norm": 0.0, "learning_rate": 9.170624318717274e-06, "loss": 15.6295, "step": 551 }, { "epoch": 0.03164865407218416, "grad_norm": 0.0, "learning_rate": 9.167207361562863e-06, "loss": 15.5716, "step": 552 }, { "epoch": 0.03170598859043087, "grad_norm": 0.0, "learning_rate": 9.163784019398686e-06, "loss": 15.6282, "step": 553 }, { "epoch": 0.03176332310867758, "grad_norm": 0.0, "learning_rate": 9.160354297469994e-06, "loss": 15.2821, "step": 554 }, { "epoch": 0.03182065762692429, "grad_norm": 0.0, "learning_rate": 9.156918201031823e-06, "loss": 15.4259, "step": 555 }, { "epoch": 0.031877992145171, "grad_norm": 0.0, "learning_rate": 9.153475735348973e-06, "loss": 15.3835, "step": 556 }, { "epoch": 0.03193532666341771, "grad_norm": 0.0, "learning_rate": 9.150026905696e-06, "loss": 15.6208, "step": 557 }, { "epoch": 0.03199266118166442, "grad_norm": 0.0, "learning_rate": 9.146571717357211e-06, "loss": 15.3262, "step": 558 }, { "epoch": 0.032049995699911134, "grad_norm": 0.0, "learning_rate": 9.143110175626662e-06, "loss": 15.6206, "step": 559 }, { "epoch": 0.03210733021815784, "grad_norm": 0.0, "learning_rate": 9.139642285808137e-06, "loss": 15.5763, "step": 560 }, { "epoch": 0.03216466473640455, "grad_norm": 0.0, "learning_rate": 9.136168053215148e-06, "loss": 15.5814, "step": 561 }, { "epoch": 0.03222199925465126, "grad_norm": 0.0, "learning_rate": 9.13268748317093e-06, "loss": 15.8486, "step": 562 }, { "epoch": 0.032279333772897975, "grad_norm": 0.0, "learning_rate": 9.12920058100842e-06, "loss": 15.5351, "step": 563 }, { "epoch": 0.032336668291144684, "grad_norm": 0.0, "learning_rate": 9.125707352070265e-06, "loss": 15.677, "step": 564 }, { "epoch": 0.03239400280939139, "grad_norm": 0.0, "learning_rate": 9.122207801708802e-06, "loss": 15.7423, "step": 565 }, { "epoch": 0.03245133732763811, "grad_norm": 0.0, "learning_rate": 9.118701935286054e-06, "loss": 15.7189, "step": 566 }, { "epoch": 0.032508671845884816, "grad_norm": 0.0, "learning_rate": 9.115189758173721e-06, "loss": 15.9269, "step": 567 }, { "epoch": 0.032566006364131525, "grad_norm": 0.0, "learning_rate": 9.111671275753175e-06, "loss": 15.6356, "step": 568 }, { "epoch": 0.032623340882378234, "grad_norm": 0.0, "learning_rate": 9.108146493415448e-06, "loss": 15.61, "step": 569 }, { "epoch": 0.03268067540062495, "grad_norm": 0.0, "learning_rate": 9.10461541656122e-06, "loss": 15.6629, "step": 570 }, { "epoch": 0.03273800991887166, "grad_norm": 0.0, "learning_rate": 9.101078050600823e-06, "loss": 15.712, "step": 571 }, { "epoch": 0.032795344437118366, "grad_norm": 0.0, "learning_rate": 9.097534400954218e-06, "loss": 15.4088, "step": 572 }, { "epoch": 0.032852678955365075, "grad_norm": 0.0, "learning_rate": 9.093984473051e-06, "loss": 15.3972, "step": 573 }, { "epoch": 0.03291001347361179, "grad_norm": 0.0, "learning_rate": 9.090428272330381e-06, "loss": 15.8337, "step": 574 }, { "epoch": 0.0329673479918585, "grad_norm": 0.0, "learning_rate": 9.086865804241184e-06, "loss": 15.5395, "step": 575 }, { "epoch": 0.03302468251010521, "grad_norm": 0.0, "learning_rate": 9.083297074241833e-06, "loss": 15.8856, "step": 576 }, { "epoch": 0.033082017028351916, "grad_norm": 0.0, "learning_rate": 9.079722087800353e-06, "loss": 15.5518, "step": 577 }, { "epoch": 0.03313935154659863, "grad_norm": 0.0, "learning_rate": 9.076140850394345e-06, "loss": 15.6313, "step": 578 }, { "epoch": 0.03319668606484534, "grad_norm": 0.0, "learning_rate": 9.072553367511e-06, "loss": 15.6402, "step": 579 }, { "epoch": 0.03325402058309205, "grad_norm": 0.0, "learning_rate": 9.06895964464707e-06, "loss": 15.7837, "step": 580 }, { "epoch": 0.03331135510133876, "grad_norm": 0.0, "learning_rate": 9.065359687308865e-06, "loss": 15.3619, "step": 581 }, { "epoch": 0.03336868961958547, "grad_norm": 0.0, "learning_rate": 9.061753501012257e-06, "loss": 15.6569, "step": 582 }, { "epoch": 0.03342602413783218, "grad_norm": 0.0, "learning_rate": 9.058141091282656e-06, "loss": 15.8729, "step": 583 }, { "epoch": 0.03348335865607889, "grad_norm": 0.0, "learning_rate": 9.054522463655008e-06, "loss": 15.5701, "step": 584 }, { "epoch": 0.033540693174325606, "grad_norm": 0.0, "learning_rate": 9.050897623673791e-06, "loss": 15.7377, "step": 585 }, { "epoch": 0.033598027692572315, "grad_norm": 0.0, "learning_rate": 9.047266576892993e-06, "loss": 15.5294, "step": 586 }, { "epoch": 0.03365536221081902, "grad_norm": 0.0, "learning_rate": 9.043629328876117e-06, "loss": 15.7416, "step": 587 }, { "epoch": 0.03371269672906573, "grad_norm": 0.0, "learning_rate": 9.039985885196171e-06, "loss": 15.628, "step": 588 }, { "epoch": 0.03377003124731245, "grad_norm": 0.0, "learning_rate": 9.036336251435647e-06, "loss": 15.6772, "step": 589 }, { "epoch": 0.033827365765559156, "grad_norm": 0.0, "learning_rate": 9.032680433186531e-06, "loss": 16.1895, "step": 590 }, { "epoch": 0.033884700283805864, "grad_norm": 0.0, "learning_rate": 9.029018436050278e-06, "loss": 15.6948, "step": 591 }, { "epoch": 0.03394203480205257, "grad_norm": 0.0, "learning_rate": 9.025350265637816e-06, "loss": 15.3231, "step": 592 }, { "epoch": 0.03399936932029929, "grad_norm": 0.0, "learning_rate": 9.021675927569525e-06, "loss": 15.5173, "step": 593 }, { "epoch": 0.034056703838546, "grad_norm": 0.0, "learning_rate": 9.017995427475238e-06, "loss": 15.3488, "step": 594 }, { "epoch": 0.034114038356792706, "grad_norm": 0.0, "learning_rate": 9.014308770994235e-06, "loss": 15.8426, "step": 595 }, { "epoch": 0.034171372875039414, "grad_norm": 0.0, "learning_rate": 9.01061596377522e-06, "loss": 15.4642, "step": 596 }, { "epoch": 0.03422870739328613, "grad_norm": 0.0, "learning_rate": 9.006917011476326e-06, "loss": 15.4795, "step": 597 }, { "epoch": 0.03428604191153284, "grad_norm": 0.0, "learning_rate": 9.003211919765102e-06, "loss": 15.8165, "step": 598 }, { "epoch": 0.03434337642977955, "grad_norm": 0.0, "learning_rate": 8.999500694318501e-06, "loss": 15.5455, "step": 599 }, { "epoch": 0.03440071094802626, "grad_norm": 0.0, "learning_rate": 8.995783340822878e-06, "loss": 15.7814, "step": 600 }, { "epoch": 0.03445804546627297, "grad_norm": 0.0, "learning_rate": 8.992059864973972e-06, "loss": 15.9455, "step": 601 }, { "epoch": 0.03451537998451968, "grad_norm": 0.0, "learning_rate": 8.988330272476908e-06, "loss": 15.2725, "step": 602 }, { "epoch": 0.03457271450276639, "grad_norm": 0.0, "learning_rate": 8.98459456904618e-06, "loss": 15.4554, "step": 603 }, { "epoch": 0.034630049021013104, "grad_norm": 0.0, "learning_rate": 8.980852760405645e-06, "loss": 15.5684, "step": 604 }, { "epoch": 0.03468738353925981, "grad_norm": 0.0, "learning_rate": 8.977104852288519e-06, "loss": 15.7634, "step": 605 }, { "epoch": 0.03474471805750652, "grad_norm": 0.0, "learning_rate": 8.973350850437355e-06, "loss": 15.6467, "step": 606 }, { "epoch": 0.03480205257575323, "grad_norm": 0.0, "learning_rate": 8.96959076060405e-06, "loss": 15.7511, "step": 607 }, { "epoch": 0.034859387093999945, "grad_norm": 0.0, "learning_rate": 8.965824588549827e-06, "loss": 15.5423, "step": 608 }, { "epoch": 0.034916721612246654, "grad_norm": 0.0, "learning_rate": 8.962052340045228e-06, "loss": 15.4482, "step": 609 }, { "epoch": 0.03497405613049336, "grad_norm": 0.0, "learning_rate": 8.958274020870107e-06, "loss": 15.5465, "step": 610 }, { "epoch": 0.03503139064874007, "grad_norm": 0.0, "learning_rate": 8.954489636813615e-06, "loss": 15.5435, "step": 611 }, { "epoch": 0.03508872516698679, "grad_norm": 0.0, "learning_rate": 8.9506991936742e-06, "loss": 15.6975, "step": 612 }, { "epoch": 0.035146059685233495, "grad_norm": 0.0, "learning_rate": 8.946902697259593e-06, "loss": 15.788, "step": 613 }, { "epoch": 0.035203394203480204, "grad_norm": 0.0, "learning_rate": 8.943100153386798e-06, "loss": 15.9397, "step": 614 }, { "epoch": 0.03526072872172691, "grad_norm": 0.0, "learning_rate": 8.939291567882087e-06, "loss": 15.7003, "step": 615 }, { "epoch": 0.03531806323997363, "grad_norm": 0.0, "learning_rate": 8.935476946580988e-06, "loss": 15.5504, "step": 616 }, { "epoch": 0.03537539775822034, "grad_norm": 0.0, "learning_rate": 8.931656295328275e-06, "loss": 15.5768, "step": 617 }, { "epoch": 0.035432732276467045, "grad_norm": 0.0, "learning_rate": 8.927829619977965e-06, "loss": 15.6244, "step": 618 }, { "epoch": 0.03549006679471376, "grad_norm": 0.0, "learning_rate": 8.923996926393306e-06, "loss": 15.4595, "step": 619 }, { "epoch": 0.03554740131296047, "grad_norm": 0.0, "learning_rate": 8.92015822044676e-06, "loss": 15.5153, "step": 620 }, { "epoch": 0.03560473583120718, "grad_norm": 0.0, "learning_rate": 8.91631350802001e-06, "loss": 15.5014, "step": 621 }, { "epoch": 0.03566207034945389, "grad_norm": 0.0, "learning_rate": 8.912462795003932e-06, "loss": 15.3184, "step": 622 }, { "epoch": 0.0357194048677006, "grad_norm": 0.0, "learning_rate": 8.908606087298608e-06, "loss": 15.8297, "step": 623 }, { "epoch": 0.03577673938594731, "grad_norm": 0.0, "learning_rate": 8.904743390813296e-06, "loss": 15.3076, "step": 624 }, { "epoch": 0.03583407390419402, "grad_norm": 0.0, "learning_rate": 8.900874711466436e-06, "loss": 15.5906, "step": 625 }, { "epoch": 0.03589140842244073, "grad_norm": 0.0, "learning_rate": 8.897000055185628e-06, "loss": 15.3853, "step": 626 }, { "epoch": 0.03594874294068744, "grad_norm": 0.0, "learning_rate": 8.893119427907636e-06, "loss": 15.7627, "step": 627 }, { "epoch": 0.03600607745893415, "grad_norm": 0.0, "learning_rate": 8.889232835578372e-06, "loss": 15.5701, "step": 628 }, { "epoch": 0.03606341197718086, "grad_norm": 0.0, "learning_rate": 8.885340284152883e-06, "loss": 15.4473, "step": 629 }, { "epoch": 0.03612074649542757, "grad_norm": 0.0, "learning_rate": 8.881441779595355e-06, "loss": 15.4852, "step": 630 }, { "epoch": 0.036178081013674285, "grad_norm": 0.0, "learning_rate": 8.877537327879087e-06, "loss": 15.9832, "step": 631 }, { "epoch": 0.03623541553192099, "grad_norm": 0.0, "learning_rate": 8.873626934986492e-06, "loss": 15.3101, "step": 632 }, { "epoch": 0.0362927500501677, "grad_norm": 0.0, "learning_rate": 8.869710606909091e-06, "loss": 15.407, "step": 633 }, { "epoch": 0.03635008456841441, "grad_norm": 0.0, "learning_rate": 8.865788349647496e-06, "loss": 15.7147, "step": 634 }, { "epoch": 0.036407419086661126, "grad_norm": 0.0, "learning_rate": 8.8618601692114e-06, "loss": 15.4474, "step": 635 }, { "epoch": 0.036464753604907835, "grad_norm": 0.0, "learning_rate": 8.857926071619578e-06, "loss": 15.5297, "step": 636 }, { "epoch": 0.03652208812315454, "grad_norm": 0.0, "learning_rate": 8.853986062899869e-06, "loss": 15.1538, "step": 637 }, { "epoch": 0.03657942264140126, "grad_norm": 0.0, "learning_rate": 8.850040149089164e-06, "loss": 15.865, "step": 638 }, { "epoch": 0.03663675715964797, "grad_norm": 0.0, "learning_rate": 8.846088336233407e-06, "loss": 15.3478, "step": 639 }, { "epoch": 0.036694091677894676, "grad_norm": 0.0, "learning_rate": 8.842130630387583e-06, "loss": 15.6966, "step": 640 }, { "epoch": 0.036751426196141385, "grad_norm": 0.0, "learning_rate": 8.838167037615699e-06, "loss": 15.8752, "step": 641 }, { "epoch": 0.0368087607143881, "grad_norm": 0.0, "learning_rate": 8.834197563990789e-06, "loss": 15.4659, "step": 642 }, { "epoch": 0.03686609523263481, "grad_norm": 0.0, "learning_rate": 8.83022221559489e-06, "loss": 15.51, "step": 643 }, { "epoch": 0.03692342975088152, "grad_norm": 0.0, "learning_rate": 8.826240998519052e-06, "loss": 15.7284, "step": 644 }, { "epoch": 0.036980764269128226, "grad_norm": 0.0, "learning_rate": 8.822253918863301e-06, "loss": 15.3795, "step": 645 }, { "epoch": 0.03703809878737494, "grad_norm": 0.0, "learning_rate": 8.818260982736662e-06, "loss": 15.6749, "step": 646 }, { "epoch": 0.03709543330562165, "grad_norm": 0.0, "learning_rate": 8.814262196257121e-06, "loss": 15.7075, "step": 647 }, { "epoch": 0.03715276782386836, "grad_norm": 0.0, "learning_rate": 8.810257565551634e-06, "loss": 15.3014, "step": 648 }, { "epoch": 0.03721010234211507, "grad_norm": 0.0, "learning_rate": 8.80624709675611e-06, "loss": 15.6637, "step": 649 }, { "epoch": 0.03726743686036178, "grad_norm": 0.0, "learning_rate": 8.802230796015406e-06, "loss": 15.8715, "step": 650 }, { "epoch": 0.03732477137860849, "grad_norm": 0.0, "learning_rate": 8.79820866948331e-06, "loss": 15.6308, "step": 651 }, { "epoch": 0.0373821058968552, "grad_norm": 0.0, "learning_rate": 8.794180723322537e-06, "loss": 15.8427, "step": 652 }, { "epoch": 0.037439440415101916, "grad_norm": 0.0, "learning_rate": 8.790146963704722e-06, "loss": 15.6036, "step": 653 }, { "epoch": 0.037496774933348624, "grad_norm": 0.0, "learning_rate": 8.786107396810405e-06, "loss": 15.7593, "step": 654 }, { "epoch": 0.03755410945159533, "grad_norm": 0.0, "learning_rate": 8.782062028829028e-06, "loss": 15.302, "step": 655 }, { "epoch": 0.03761144396984204, "grad_norm": 0.0, "learning_rate": 8.778010865958913e-06, "loss": 15.7448, "step": 656 }, { "epoch": 0.03766877848808876, "grad_norm": 0.0, "learning_rate": 8.773953914407267e-06, "loss": 15.527, "step": 657 }, { "epoch": 0.037726113006335465, "grad_norm": 0.0, "learning_rate": 8.769891180390168e-06, "loss": 15.6792, "step": 658 }, { "epoch": 0.037783447524582174, "grad_norm": 0.0, "learning_rate": 8.765822670132549e-06, "loss": 15.4365, "step": 659 }, { "epoch": 0.03784078204282888, "grad_norm": 0.0, "learning_rate": 8.761748389868197e-06, "loss": 15.2451, "step": 660 }, { "epoch": 0.0378981165610756, "grad_norm": 0.0, "learning_rate": 8.757668345839739e-06, "loss": 15.2096, "step": 661 }, { "epoch": 0.03795545107932231, "grad_norm": 0.0, "learning_rate": 8.75358254429863e-06, "loss": 15.507, "step": 662 }, { "epoch": 0.038012785597569015, "grad_norm": 0.0, "learning_rate": 8.749490991505153e-06, "loss": 15.5514, "step": 663 }, { "epoch": 0.038070120115815724, "grad_norm": 0.0, "learning_rate": 8.745393693728395e-06, "loss": 15.6321, "step": 664 }, { "epoch": 0.03812745463406244, "grad_norm": 0.0, "learning_rate": 8.741290657246255e-06, "loss": 15.6004, "step": 665 }, { "epoch": 0.03818478915230915, "grad_norm": 0.0, "learning_rate": 8.737181888345419e-06, "loss": 15.2442, "step": 666 }, { "epoch": 0.03824212367055586, "grad_norm": 0.0, "learning_rate": 8.733067393321354e-06, "loss": 15.4412, "step": 667 }, { "epoch": 0.038299458188802565, "grad_norm": 0.0, "learning_rate": 8.728947178478308e-06, "loss": 15.5328, "step": 668 }, { "epoch": 0.03835679270704928, "grad_norm": 0.0, "learning_rate": 8.724821250129286e-06, "loss": 15.3495, "step": 669 }, { "epoch": 0.03841412722529599, "grad_norm": 0.0, "learning_rate": 8.72068961459605e-06, "loss": 15.3269, "step": 670 }, { "epoch": 0.0384714617435427, "grad_norm": 0.0, "learning_rate": 8.716552278209106e-06, "loss": 15.337, "step": 671 }, { "epoch": 0.038528796261789414, "grad_norm": 0.0, "learning_rate": 8.712409247307696e-06, "loss": 15.5897, "step": 672 }, { "epoch": 0.03858613078003612, "grad_norm": 0.0, "learning_rate": 8.708260528239788e-06, "loss": 15.5198, "step": 673 }, { "epoch": 0.03864346529828283, "grad_norm": 0.0, "learning_rate": 8.704106127362063e-06, "loss": 15.4092, "step": 674 }, { "epoch": 0.03870079981652954, "grad_norm": 0.0, "learning_rate": 8.699946051039907e-06, "loss": 15.6915, "step": 675 }, { "epoch": 0.038758134334776255, "grad_norm": 0.0, "learning_rate": 8.695780305647405e-06, "loss": 15.4175, "step": 676 }, { "epoch": 0.038815468853022964, "grad_norm": 0.0, "learning_rate": 8.691608897567328e-06, "loss": 15.4632, "step": 677 }, { "epoch": 0.03887280337126967, "grad_norm": 0.0, "learning_rate": 8.68743183319112e-06, "loss": 15.816, "step": 678 }, { "epoch": 0.03893013788951638, "grad_norm": 0.0, "learning_rate": 8.683249118918895e-06, "loss": 15.2258, "step": 679 }, { "epoch": 0.038987472407763096, "grad_norm": 0.0, "learning_rate": 8.67906076115942e-06, "loss": 15.7341, "step": 680 }, { "epoch": 0.039044806926009805, "grad_norm": 0.0, "learning_rate": 8.674866766330117e-06, "loss": 15.3757, "step": 681 }, { "epoch": 0.039102141444256514, "grad_norm": 0.0, "learning_rate": 8.670667140857034e-06, "loss": 15.7797, "step": 682 }, { "epoch": 0.03915947596250322, "grad_norm": 0.0, "learning_rate": 8.666461891174854e-06, "loss": 15.3798, "step": 683 }, { "epoch": 0.03921681048074994, "grad_norm": 0.0, "learning_rate": 8.662251023726874e-06, "loss": 15.3629, "step": 684 }, { "epoch": 0.039274144998996646, "grad_norm": 0.0, "learning_rate": 8.658034544965003e-06, "loss": 15.5817, "step": 685 }, { "epoch": 0.039331479517243355, "grad_norm": 0.0, "learning_rate": 8.653812461349742e-06, "loss": 15.8776, "step": 686 }, { "epoch": 0.03938881403549006, "grad_norm": 0.0, "learning_rate": 8.649584779350184e-06, "loss": 15.4464, "step": 687 }, { "epoch": 0.03944614855373678, "grad_norm": 0.0, "learning_rate": 8.645351505443997e-06, "loss": 15.5939, "step": 688 }, { "epoch": 0.03950348307198349, "grad_norm": 0.0, "learning_rate": 8.641112646117419e-06, "loss": 15.7406, "step": 689 }, { "epoch": 0.039560817590230196, "grad_norm": 0.0, "learning_rate": 8.636868207865244e-06, "loss": 15.4987, "step": 690 }, { "epoch": 0.03961815210847691, "grad_norm": 0.0, "learning_rate": 8.632618197190817e-06, "loss": 15.8416, "step": 691 }, { "epoch": 0.03967548662672362, "grad_norm": 0.0, "learning_rate": 8.628362620606017e-06, "loss": 15.942, "step": 692 }, { "epoch": 0.03973282114497033, "grad_norm": 0.0, "learning_rate": 8.624101484631255e-06, "loss": 15.5289, "step": 693 }, { "epoch": 0.03979015566321704, "grad_norm": 0.0, "learning_rate": 8.619834795795458e-06, "loss": 15.5221, "step": 694 }, { "epoch": 0.03984749018146375, "grad_norm": 0.0, "learning_rate": 8.615562560636063e-06, "loss": 15.4384, "step": 695 }, { "epoch": 0.03990482469971046, "grad_norm": 0.0, "learning_rate": 8.611284785699001e-06, "loss": 15.4652, "step": 696 }, { "epoch": 0.03996215921795717, "grad_norm": 0.0, "learning_rate": 8.607001477538697e-06, "loss": 15.6511, "step": 697 }, { "epoch": 0.04001949373620388, "grad_norm": 0.0, "learning_rate": 8.602712642718047e-06, "loss": 15.5522, "step": 698 }, { "epoch": 0.040076828254450594, "grad_norm": 0.0, "learning_rate": 8.598418287808424e-06, "loss": 15.8704, "step": 699 }, { "epoch": 0.0401341627726973, "grad_norm": 0.0, "learning_rate": 8.594118419389648e-06, "loss": 15.3411, "step": 700 }, { "epoch": 0.04019149729094401, "grad_norm": 0.0, "learning_rate": 8.589813044049995e-06, "loss": 15.7049, "step": 701 }, { "epoch": 0.04024883180919072, "grad_norm": 0.0, "learning_rate": 8.585502168386177e-06, "loss": 15.7626, "step": 702 }, { "epoch": 0.040306166327437436, "grad_norm": 0.0, "learning_rate": 8.581185799003334e-06, "loss": 15.8192, "step": 703 }, { "epoch": 0.040363500845684144, "grad_norm": 0.0, "learning_rate": 8.576863942515019e-06, "loss": 15.7336, "step": 704 }, { "epoch": 0.04042083536393085, "grad_norm": 0.0, "learning_rate": 8.572536605543197e-06, "loss": 15.7258, "step": 705 }, { "epoch": 0.04047816988217757, "grad_norm": 0.0, "learning_rate": 8.568203794718228e-06, "loss": 15.6306, "step": 706 }, { "epoch": 0.04053550440042428, "grad_norm": 0.0, "learning_rate": 8.563865516678863e-06, "loss": 15.858, "step": 707 }, { "epoch": 0.040592838918670986, "grad_norm": 0.0, "learning_rate": 8.559521778072225e-06, "loss": 15.4747, "step": 708 }, { "epoch": 0.040650173436917694, "grad_norm": 0.0, "learning_rate": 8.555172585553804e-06, "loss": 15.491, "step": 709 }, { "epoch": 0.04070750795516441, "grad_norm": 0.0, "learning_rate": 8.550817945787452e-06, "loss": 15.6943, "step": 710 }, { "epoch": 0.04076484247341112, "grad_norm": 0.0, "learning_rate": 8.546457865445359e-06, "loss": 15.9682, "step": 711 }, { "epoch": 0.04082217699165783, "grad_norm": 0.0, "learning_rate": 8.542092351208058e-06, "loss": 15.6275, "step": 712 }, { "epoch": 0.040879511509904536, "grad_norm": 0.0, "learning_rate": 8.537721409764406e-06, "loss": 15.8521, "step": 713 }, { "epoch": 0.04093684602815125, "grad_norm": 0.0, "learning_rate": 8.533345047811572e-06, "loss": 15.4229, "step": 714 }, { "epoch": 0.04099418054639796, "grad_norm": 0.0, "learning_rate": 8.528963272055036e-06, "loss": 15.3714, "step": 715 }, { "epoch": 0.04105151506464467, "grad_norm": 0.0, "learning_rate": 8.524576089208567e-06, "loss": 15.2679, "step": 716 }, { "epoch": 0.04110884958289138, "grad_norm": 0.0, "learning_rate": 8.520183505994227e-06, "loss": 15.6539, "step": 717 }, { "epoch": 0.04116618410113809, "grad_norm": 0.0, "learning_rate": 8.515785529142339e-06, "loss": 15.5492, "step": 718 }, { "epoch": 0.0412235186193848, "grad_norm": 0.0, "learning_rate": 8.511382165391508e-06, "loss": 15.3739, "step": 719 }, { "epoch": 0.04128085313763151, "grad_norm": 0.0, "learning_rate": 8.50697342148858e-06, "loss": 15.6877, "step": 720 }, { "epoch": 0.04133818765587822, "grad_norm": 0.0, "learning_rate": 8.502559304188644e-06, "loss": 15.6241, "step": 721 }, { "epoch": 0.041395522174124934, "grad_norm": 0.0, "learning_rate": 8.498139820255033e-06, "loss": 15.3811, "step": 722 }, { "epoch": 0.04145285669237164, "grad_norm": 0.0, "learning_rate": 8.49371497645929e-06, "loss": 15.1943, "step": 723 }, { "epoch": 0.04151019121061835, "grad_norm": 0.0, "learning_rate": 8.489284779581179e-06, "loss": 15.5301, "step": 724 }, { "epoch": 0.041567525728865067, "grad_norm": 0.0, "learning_rate": 8.48484923640866e-06, "loss": 15.8323, "step": 725 }, { "epoch": 0.041624860247111775, "grad_norm": 0.0, "learning_rate": 8.480408353737894e-06, "loss": 15.6009, "step": 726 }, { "epoch": 0.041682194765358484, "grad_norm": 0.0, "learning_rate": 8.475962138373212e-06, "loss": 15.5931, "step": 727 }, { "epoch": 0.04173952928360519, "grad_norm": 0.0, "learning_rate": 8.471510597127122e-06, "loss": 15.7055, "step": 728 }, { "epoch": 0.04179686380185191, "grad_norm": 0.0, "learning_rate": 8.467053736820292e-06, "loss": 15.3792, "step": 729 }, { "epoch": 0.041854198320098616, "grad_norm": 0.0, "learning_rate": 8.46259156428154e-06, "loss": 15.7116, "step": 730 }, { "epoch": 0.041911532838345325, "grad_norm": 0.0, "learning_rate": 8.458124086347818e-06, "loss": 15.0395, "step": 731 }, { "epoch": 0.041968867356592034, "grad_norm": 0.0, "learning_rate": 8.453651309864215e-06, "loss": 15.159, "step": 732 }, { "epoch": 0.04202620187483875, "grad_norm": 0.0, "learning_rate": 8.449173241683934e-06, "loss": 15.8408, "step": 733 }, { "epoch": 0.04208353639308546, "grad_norm": 0.0, "learning_rate": 8.444689888668288e-06, "loss": 15.5556, "step": 734 }, { "epoch": 0.042140870911332166, "grad_norm": 0.0, "learning_rate": 8.440201257686684e-06, "loss": 15.5377, "step": 735 }, { "epoch": 0.042198205429578875, "grad_norm": 0.0, "learning_rate": 8.43570735561662e-06, "loss": 15.1917, "step": 736 }, { "epoch": 0.04225553994782559, "grad_norm": 0.0, "learning_rate": 8.43120818934367e-06, "loss": 15.3535, "step": 737 }, { "epoch": 0.0423128744660723, "grad_norm": 0.0, "learning_rate": 8.426703765761468e-06, "loss": 15.2347, "step": 738 }, { "epoch": 0.04237020898431901, "grad_norm": 0.0, "learning_rate": 8.422194091771709e-06, "loss": 15.5976, "step": 739 }, { "epoch": 0.042427543502565716, "grad_norm": 0.0, "learning_rate": 8.417679174284135e-06, "loss": 15.6626, "step": 740 }, { "epoch": 0.04248487802081243, "grad_norm": 0.0, "learning_rate": 8.413159020216512e-06, "loss": 15.7098, "step": 741 }, { "epoch": 0.04254221253905914, "grad_norm": 0.0, "learning_rate": 8.408633636494643e-06, "loss": 15.3637, "step": 742 }, { "epoch": 0.04259954705730585, "grad_norm": 0.0, "learning_rate": 8.404103030052332e-06, "loss": 15.7865, "step": 743 }, { "epoch": 0.042656881575552565, "grad_norm": 0.0, "learning_rate": 8.399567207831394e-06, "loss": 16.0023, "step": 744 }, { "epoch": 0.04271421609379927, "grad_norm": 0.0, "learning_rate": 8.395026176781627e-06, "loss": 15.3886, "step": 745 }, { "epoch": 0.04277155061204598, "grad_norm": 0.0, "learning_rate": 8.390479943860817e-06, "loss": 15.4875, "step": 746 }, { "epoch": 0.04282888513029269, "grad_norm": 0.0, "learning_rate": 8.385928516034718e-06, "loss": 15.4501, "step": 747 }, { "epoch": 0.042886219648539406, "grad_norm": 0.0, "learning_rate": 8.381371900277045e-06, "loss": 15.6047, "step": 748 }, { "epoch": 0.042943554166786115, "grad_norm": 0.0, "learning_rate": 8.37681010356946e-06, "loss": 15.6225, "step": 749 }, { "epoch": 0.04300088868503282, "grad_norm": 0.0, "learning_rate": 8.372243132901563e-06, "loss": 15.376, "step": 750 }, { "epoch": 0.04305822320327953, "grad_norm": 0.0, "learning_rate": 8.367670995270883e-06, "loss": 15.7378, "step": 751 }, { "epoch": 0.04311555772152625, "grad_norm": 0.0, "learning_rate": 8.363093697682865e-06, "loss": 15.644, "step": 752 }, { "epoch": 0.043172892239772956, "grad_norm": 0.0, "learning_rate": 8.358511247150861e-06, "loss": 15.7673, "step": 753 }, { "epoch": 0.043230226758019664, "grad_norm": 0.0, "learning_rate": 8.353923650696119e-06, "loss": 15.1973, "step": 754 }, { "epoch": 0.04328756127626637, "grad_norm": 0.0, "learning_rate": 8.349330915347766e-06, "loss": 15.6869, "step": 755 }, { "epoch": 0.04334489579451309, "grad_norm": 0.0, "learning_rate": 8.344733048142814e-06, "loss": 15.4703, "step": 756 }, { "epoch": 0.0434022303127598, "grad_norm": 0.0, "learning_rate": 8.340130056126126e-06, "loss": 15.4401, "step": 757 }, { "epoch": 0.043459564831006506, "grad_norm": 0.0, "learning_rate": 8.335521946350424e-06, "loss": 15.6927, "step": 758 }, { "epoch": 0.04351689934925322, "grad_norm": 0.0, "learning_rate": 8.33090872587627e-06, "loss": 15.32, "step": 759 }, { "epoch": 0.04357423386749993, "grad_norm": 0.0, "learning_rate": 8.326290401772057e-06, "loss": 15.8423, "step": 760 }, { "epoch": 0.04363156838574664, "grad_norm": 0.0, "learning_rate": 8.321666981113998e-06, "loss": 15.8161, "step": 761 }, { "epoch": 0.04368890290399335, "grad_norm": 0.0, "learning_rate": 8.317038470986113e-06, "loss": 15.6991, "step": 762 }, { "epoch": 0.04374623742224006, "grad_norm": 0.0, "learning_rate": 8.312404878480222e-06, "loss": 15.5216, "step": 763 }, { "epoch": 0.04380357194048677, "grad_norm": 0.0, "learning_rate": 8.307766210695933e-06, "loss": 15.4135, "step": 764 }, { "epoch": 0.04386090645873348, "grad_norm": 0.0, "learning_rate": 8.303122474740625e-06, "loss": 15.435, "step": 765 }, { "epoch": 0.04391824097698019, "grad_norm": 0.0, "learning_rate": 8.298473677729453e-06, "loss": 15.6051, "step": 766 }, { "epoch": 0.043975575495226904, "grad_norm": 0.0, "learning_rate": 8.293819826785315e-06, "loss": 15.5189, "step": 767 }, { "epoch": 0.04403291001347361, "grad_norm": 0.0, "learning_rate": 8.289160929038858e-06, "loss": 15.6029, "step": 768 }, { "epoch": 0.04409024453172032, "grad_norm": 0.0, "learning_rate": 8.284496991628465e-06, "loss": 15.4684, "step": 769 }, { "epoch": 0.04414757904996703, "grad_norm": 0.0, "learning_rate": 8.279828021700235e-06, "loss": 15.5391, "step": 770 }, { "epoch": 0.044204913568213745, "grad_norm": 0.0, "learning_rate": 8.27515402640798e-06, "loss": 15.3327, "step": 771 }, { "epoch": 0.044262248086460454, "grad_norm": 0.0, "learning_rate": 8.270475012913212e-06, "loss": 15.4333, "step": 772 }, { "epoch": 0.04431958260470716, "grad_norm": 0.0, "learning_rate": 8.265790988385132e-06, "loss": 15.4344, "step": 773 }, { "epoch": 0.04437691712295387, "grad_norm": 0.0, "learning_rate": 8.261101960000619e-06, "loss": 15.1989, "step": 774 }, { "epoch": 0.04443425164120059, "grad_norm": 0.0, "learning_rate": 8.25640793494422e-06, "loss": 15.5202, "step": 775 }, { "epoch": 0.044491586159447295, "grad_norm": 0.0, "learning_rate": 8.251708920408135e-06, "loss": 15.9113, "step": 776 }, { "epoch": 0.044548920677694004, "grad_norm": 0.0, "learning_rate": 8.247004923592212e-06, "loss": 15.5743, "step": 777 }, { "epoch": 0.04460625519594072, "grad_norm": 0.0, "learning_rate": 8.24229595170393e-06, "loss": 15.4708, "step": 778 }, { "epoch": 0.04466358971418743, "grad_norm": 0.0, "learning_rate": 8.237582011958392e-06, "loss": 15.5196, "step": 779 }, { "epoch": 0.04472092423243414, "grad_norm": 0.0, "learning_rate": 8.232863111578314e-06, "loss": 15.7244, "step": 780 }, { "epoch": 0.044778258750680845, "grad_norm": 0.0, "learning_rate": 8.228139257794012e-06, "loss": 15.2314, "step": 781 }, { "epoch": 0.04483559326892756, "grad_norm": 0.0, "learning_rate": 8.223410457843392e-06, "loss": 15.4909, "step": 782 }, { "epoch": 0.04489292778717427, "grad_norm": 0.0, "learning_rate": 8.218676718971936e-06, "loss": 15.3051, "step": 783 }, { "epoch": 0.04495026230542098, "grad_norm": 0.0, "learning_rate": 8.213938048432697e-06, "loss": 15.151, "step": 784 }, { "epoch": 0.04500759682366769, "grad_norm": 0.0, "learning_rate": 8.209194453486283e-06, "loss": 15.5094, "step": 785 }, { "epoch": 0.0450649313419144, "grad_norm": 0.0, "learning_rate": 8.204445941400844e-06, "loss": 15.2905, "step": 786 }, { "epoch": 0.04512226586016111, "grad_norm": 0.0, "learning_rate": 8.19969251945207e-06, "loss": 15.5901, "step": 787 }, { "epoch": 0.04517960037840782, "grad_norm": 0.0, "learning_rate": 8.194934194923167e-06, "loss": 15.7173, "step": 788 }, { "epoch": 0.04523693489665453, "grad_norm": 0.0, "learning_rate": 8.190170975104862e-06, "loss": 15.4733, "step": 789 }, { "epoch": 0.04529426941490124, "grad_norm": 0.0, "learning_rate": 8.185402867295373e-06, "loss": 15.7784, "step": 790 }, { "epoch": 0.04535160393314795, "grad_norm": 0.0, "learning_rate": 8.180629878800413e-06, "loss": 15.7074, "step": 791 }, { "epoch": 0.04540893845139466, "grad_norm": 0.0, "learning_rate": 8.175852016933172e-06, "loss": 15.3187, "step": 792 }, { "epoch": 0.04546627296964137, "grad_norm": 0.0, "learning_rate": 8.171069289014307e-06, "loss": 15.8274, "step": 793 }, { "epoch": 0.045523607487888085, "grad_norm": 0.0, "learning_rate": 8.166281702371929e-06, "loss": 15.3859, "step": 794 }, { "epoch": 0.04558094200613479, "grad_norm": 0.0, "learning_rate": 8.161489264341596e-06, "loss": 15.6264, "step": 795 }, { "epoch": 0.0456382765243815, "grad_norm": 0.0, "learning_rate": 8.156691982266299e-06, "loss": 15.773, "step": 796 }, { "epoch": 0.04569561104262822, "grad_norm": 0.0, "learning_rate": 8.151889863496448e-06, "loss": 15.4782, "step": 797 }, { "epoch": 0.045752945560874926, "grad_norm": 0.0, "learning_rate": 8.14708291538987e-06, "loss": 15.7671, "step": 798 }, { "epoch": 0.045810280079121635, "grad_norm": 0.0, "learning_rate": 8.142271145311784e-06, "loss": 15.0243, "step": 799 }, { "epoch": 0.04586761459736834, "grad_norm": 0.0, "learning_rate": 8.137454560634803e-06, "loss": 15.6344, "step": 800 }, { "epoch": 0.04592494911561506, "grad_norm": 0.0, "learning_rate": 8.132633168738917e-06, "loss": 15.5289, "step": 801 }, { "epoch": 0.04598228363386177, "grad_norm": 0.0, "learning_rate": 8.127806977011476e-06, "loss": 15.4143, "step": 802 }, { "epoch": 0.046039618152108476, "grad_norm": 0.0, "learning_rate": 8.122975992847189e-06, "loss": 15.4817, "step": 803 }, { "epoch": 0.046096952670355185, "grad_norm": 0.0, "learning_rate": 8.118140223648108e-06, "loss": 15.7881, "step": 804 }, { "epoch": 0.0461542871886019, "grad_norm": 0.0, "learning_rate": 8.113299676823614e-06, "loss": 15.3555, "step": 805 }, { "epoch": 0.04621162170684861, "grad_norm": 0.0, "learning_rate": 8.108454359790414e-06, "loss": 15.3229, "step": 806 }, { "epoch": 0.04626895622509532, "grad_norm": 0.0, "learning_rate": 8.103604279972513e-06, "loss": 15.589, "step": 807 }, { "epoch": 0.046326290743342026, "grad_norm": 0.0, "learning_rate": 8.098749444801226e-06, "loss": 15.1841, "step": 808 }, { "epoch": 0.04638362526158874, "grad_norm": 0.0, "learning_rate": 8.093889861715144e-06, "loss": 15.4068, "step": 809 }, { "epoch": 0.04644095977983545, "grad_norm": 0.0, "learning_rate": 8.089025538160142e-06, "loss": 14.9371, "step": 810 }, { "epoch": 0.04649829429808216, "grad_norm": 0.0, "learning_rate": 8.08415648158935e-06, "loss": 15.4922, "step": 811 }, { "epoch": 0.046555628816328874, "grad_norm": 0.0, "learning_rate": 8.079282699463155e-06, "loss": 15.5137, "step": 812 }, { "epoch": 0.04661296333457558, "grad_norm": 0.0, "learning_rate": 8.074404199249184e-06, "loss": 15.7668, "step": 813 }, { "epoch": 0.04667029785282229, "grad_norm": 0.0, "learning_rate": 8.069520988422292e-06, "loss": 15.527, "step": 814 }, { "epoch": 0.046727632371069, "grad_norm": 0.0, "learning_rate": 8.064633074464548e-06, "loss": 15.4981, "step": 815 }, { "epoch": 0.046784966889315716, "grad_norm": 0.0, "learning_rate": 8.059740464865237e-06, "loss": 15.3851, "step": 816 }, { "epoch": 0.046842301407562424, "grad_norm": 0.0, "learning_rate": 8.054843167120827e-06, "loss": 15.5792, "step": 817 }, { "epoch": 0.04689963592580913, "grad_norm": 0.0, "learning_rate": 8.04994118873498e-06, "loss": 15.5655, "step": 818 }, { "epoch": 0.04695697044405584, "grad_norm": 0.0, "learning_rate": 8.04503453721852e-06, "loss": 15.2363, "step": 819 }, { "epoch": 0.04701430496230256, "grad_norm": 0.0, "learning_rate": 8.040123220089437e-06, "loss": 15.2825, "step": 820 }, { "epoch": 0.047071639480549265, "grad_norm": 0.0, "learning_rate": 8.035207244872871e-06, "loss": 15.3609, "step": 821 }, { "epoch": 0.047128973998795974, "grad_norm": 0.0, "learning_rate": 8.030286619101094e-06, "loss": 15.4033, "step": 822 }, { "epoch": 0.04718630851704268, "grad_norm": 0.0, "learning_rate": 8.025361350313506e-06, "loss": 15.7496, "step": 823 }, { "epoch": 0.0472436430352894, "grad_norm": 0.0, "learning_rate": 8.020431446056622e-06, "loss": 15.4934, "step": 824 }, { "epoch": 0.04730097755353611, "grad_norm": 0.0, "learning_rate": 8.01549691388406e-06, "loss": 15.8934, "step": 825 }, { "epoch": 0.047358312071782815, "grad_norm": 0.0, "learning_rate": 8.010557761356523e-06, "loss": 15.6078, "step": 826 }, { "epoch": 0.047415646590029524, "grad_norm": 0.0, "learning_rate": 8.005613996041803e-06, "loss": 15.3614, "step": 827 }, { "epoch": 0.04747298110827624, "grad_norm": 0.0, "learning_rate": 8.000665625514752e-06, "loss": 15.5648, "step": 828 }, { "epoch": 0.04753031562652295, "grad_norm": 0.0, "learning_rate": 7.99571265735728e-06, "loss": 15.2218, "step": 829 }, { "epoch": 0.04758765014476966, "grad_norm": 0.0, "learning_rate": 7.990755099158346e-06, "loss": 15.5548, "step": 830 }, { "epoch": 0.04764498466301637, "grad_norm": 0.0, "learning_rate": 7.985792958513932e-06, "loss": 15.6068, "step": 831 }, { "epoch": 0.04770231918126308, "grad_norm": 0.0, "learning_rate": 7.980826243027052e-06, "loss": 15.4351, "step": 832 }, { "epoch": 0.04775965369950979, "grad_norm": 0.0, "learning_rate": 7.975854960307724e-06, "loss": 15.438, "step": 833 }, { "epoch": 0.0478169882177565, "grad_norm": 0.0, "learning_rate": 7.970879117972964e-06, "loss": 15.4016, "step": 834 }, { "epoch": 0.047874322736003214, "grad_norm": 0.0, "learning_rate": 7.965898723646777e-06, "loss": 15.4042, "step": 835 }, { "epoch": 0.04793165725424992, "grad_norm": 0.0, "learning_rate": 7.960913784960138e-06, "loss": 15.2968, "step": 836 }, { "epoch": 0.04798899177249663, "grad_norm": 0.0, "learning_rate": 7.955924309550991e-06, "loss": 15.1225, "step": 837 }, { "epoch": 0.04804632629074334, "grad_norm": 0.0, "learning_rate": 7.950930305064224e-06, "loss": 15.3629, "step": 838 }, { "epoch": 0.048103660808990055, "grad_norm": 0.0, "learning_rate": 7.94593177915167e-06, "loss": 15.3011, "step": 839 }, { "epoch": 0.048160995327236764, "grad_norm": 0.0, "learning_rate": 7.940928739472088e-06, "loss": 15.3236, "step": 840 }, { "epoch": 0.04821832984548347, "grad_norm": 0.0, "learning_rate": 7.935921193691153e-06, "loss": 15.7638, "step": 841 }, { "epoch": 0.04827566436373018, "grad_norm": 0.0, "learning_rate": 7.930909149481446e-06, "loss": 15.2309, "step": 842 }, { "epoch": 0.048332998881976896, "grad_norm": 0.0, "learning_rate": 7.925892614522433e-06, "loss": 15.6848, "step": 843 }, { "epoch": 0.048390333400223605, "grad_norm": 0.0, "learning_rate": 7.920871596500473e-06, "loss": 15.4714, "step": 844 }, { "epoch": 0.048447667918470314, "grad_norm": 0.0, "learning_rate": 7.915846103108784e-06, "loss": 15.3015, "step": 845 }, { "epoch": 0.04850500243671702, "grad_norm": 0.0, "learning_rate": 7.910816142047447e-06, "loss": 15.3926, "step": 846 }, { "epoch": 0.04856233695496374, "grad_norm": 0.0, "learning_rate": 7.905781721023384e-06, "loss": 15.4996, "step": 847 }, { "epoch": 0.048619671473210446, "grad_norm": 0.0, "learning_rate": 7.900742847750352e-06, "loss": 15.3543, "step": 848 }, { "epoch": 0.048677005991457155, "grad_norm": 0.0, "learning_rate": 7.895699529948932e-06, "loss": 15.3942, "step": 849 }, { "epoch": 0.04873434050970387, "grad_norm": 0.0, "learning_rate": 7.890651775346512e-06, "loss": 15.5402, "step": 850 }, { "epoch": 0.04879167502795058, "grad_norm": 0.0, "learning_rate": 7.885599591677283e-06, "loss": 15.9481, "step": 851 }, { "epoch": 0.04884900954619729, "grad_norm": 0.0, "learning_rate": 7.880542986682212e-06, "loss": 15.3632, "step": 852 }, { "epoch": 0.048906344064443996, "grad_norm": 0.0, "learning_rate": 7.875481968109052e-06, "loss": 15.7589, "step": 853 }, { "epoch": 0.04896367858269071, "grad_norm": 0.0, "learning_rate": 7.870416543712315e-06, "loss": 15.648, "step": 854 }, { "epoch": 0.04902101310093742, "grad_norm": 0.0, "learning_rate": 7.865346721253256e-06, "loss": 15.5267, "step": 855 }, { "epoch": 0.04907834761918413, "grad_norm": 0.0, "learning_rate": 7.860272508499877e-06, "loss": 15.6383, "step": 856 }, { "epoch": 0.04913568213743084, "grad_norm": 0.0, "learning_rate": 7.855193913226907e-06, "loss": 15.2878, "step": 857 }, { "epoch": 0.04919301665567755, "grad_norm": 0.0, "learning_rate": 7.850110943215785e-06, "loss": 15.734, "step": 858 }, { "epoch": 0.04925035117392426, "grad_norm": 0.0, "learning_rate": 7.845023606254658e-06, "loss": 15.7939, "step": 859 }, { "epoch": 0.04930768569217097, "grad_norm": 0.0, "learning_rate": 7.83993191013836e-06, "loss": 15.3507, "step": 860 }, { "epoch": 0.04936502021041768, "grad_norm": 0.0, "learning_rate": 7.834835862668405e-06, "loss": 15.2812, "step": 861 }, { "epoch": 0.049422354728664394, "grad_norm": 0.0, "learning_rate": 7.829735471652978e-06, "loss": 15.1451, "step": 862 }, { "epoch": 0.0494796892469111, "grad_norm": 0.0, "learning_rate": 7.82463074490691e-06, "loss": 15.6257, "step": 863 }, { "epoch": 0.04953702376515781, "grad_norm": 0.0, "learning_rate": 7.819521690251688e-06, "loss": 15.3859, "step": 864 }, { "epoch": 0.04959435828340453, "grad_norm": 0.0, "learning_rate": 7.814408315515419e-06, "loss": 15.6041, "step": 865 }, { "epoch": 0.049651692801651236, "grad_norm": 0.0, "learning_rate": 7.809290628532836e-06, "loss": 15.5338, "step": 866 }, { "epoch": 0.049709027319897944, "grad_norm": 0.0, "learning_rate": 7.804168637145276e-06, "loss": 15.3608, "step": 867 }, { "epoch": 0.04976636183814465, "grad_norm": 0.0, "learning_rate": 7.799042349200672e-06, "loss": 15.3891, "step": 868 }, { "epoch": 0.04982369635639137, "grad_norm": 0.0, "learning_rate": 7.793911772553542e-06, "loss": 15.2893, "step": 869 }, { "epoch": 0.04988103087463808, "grad_norm": 0.0, "learning_rate": 7.788776915064972e-06, "loss": 15.6297, "step": 870 }, { "epoch": 0.049938365392884786, "grad_norm": 0.0, "learning_rate": 7.783637784602608e-06, "loss": 15.6214, "step": 871 }, { "epoch": 0.049995699911131494, "grad_norm": 0.0, "learning_rate": 7.778494389040646e-06, "loss": 15.3664, "step": 872 }, { "epoch": 0.05005303442937821, "grad_norm": 0.0, "learning_rate": 7.773346736259815e-06, "loss": 15.2891, "step": 873 }, { "epoch": 0.05011036894762492, "grad_norm": 0.0, "learning_rate": 7.768194834147362e-06, "loss": 15.8051, "step": 874 }, { "epoch": 0.05016770346587163, "grad_norm": 0.0, "learning_rate": 7.763038690597055e-06, "loss": 15.5375, "step": 875 }, { "epoch": 0.050225037984118336, "grad_norm": 0.0, "learning_rate": 7.757878313509153e-06, "loss": 15.6352, "step": 876 }, { "epoch": 0.05028237250236505, "grad_norm": 0.0, "learning_rate": 7.752713710790405e-06, "loss": 15.6374, "step": 877 }, { "epoch": 0.05033970702061176, "grad_norm": 0.0, "learning_rate": 7.747544890354031e-06, "loss": 15.4307, "step": 878 }, { "epoch": 0.05039704153885847, "grad_norm": 0.0, "learning_rate": 7.742371860119718e-06, "loss": 15.0681, "step": 879 }, { "epoch": 0.05045437605710518, "grad_norm": 0.0, "learning_rate": 7.7371946280136e-06, "loss": 15.284, "step": 880 }, { "epoch": 0.05051171057535189, "grad_norm": 0.0, "learning_rate": 7.73201320196825e-06, "loss": 15.3285, "step": 881 }, { "epoch": 0.0505690450935986, "grad_norm": 0.0, "learning_rate": 7.72682758992267e-06, "loss": 15.2923, "step": 882 }, { "epoch": 0.05062637961184531, "grad_norm": 0.0, "learning_rate": 7.721637799822269e-06, "loss": 15.4685, "step": 883 }, { "epoch": 0.050683714130092025, "grad_norm": 0.0, "learning_rate": 7.716443839618863e-06, "loss": 15.4828, "step": 884 }, { "epoch": 0.050741048648338734, "grad_norm": 0.0, "learning_rate": 7.711245717270659e-06, "loss": 15.5624, "step": 885 }, { "epoch": 0.05079838316658544, "grad_norm": 0.0, "learning_rate": 7.706043440742235e-06, "loss": 15.46, "step": 886 }, { "epoch": 0.05085571768483215, "grad_norm": 0.0, "learning_rate": 7.70083701800454e-06, "loss": 15.5009, "step": 887 }, { "epoch": 0.05091305220307887, "grad_norm": 0.0, "learning_rate": 7.695626457034867e-06, "loss": 15.2884, "step": 888 }, { "epoch": 0.050970386721325575, "grad_norm": 0.0, "learning_rate": 7.690411765816864e-06, "loss": 16.1451, "step": 889 }, { "epoch": 0.051027721239572284, "grad_norm": 0.0, "learning_rate": 7.685192952340495e-06, "loss": 15.5103, "step": 890 }, { "epoch": 0.05108505575781899, "grad_norm": 0.0, "learning_rate": 7.679970024602044e-06, "loss": 15.2557, "step": 891 }, { "epoch": 0.05114239027606571, "grad_norm": 0.0, "learning_rate": 7.674742990604101e-06, "loss": 15.7524, "step": 892 }, { "epoch": 0.051199724794312416, "grad_norm": 0.0, "learning_rate": 7.669511858355545e-06, "loss": 15.4103, "step": 893 }, { "epoch": 0.051257059312559125, "grad_norm": 0.0, "learning_rate": 7.664276635871535e-06, "loss": 15.8326, "step": 894 }, { "epoch": 0.051314393830805834, "grad_norm": 0.0, "learning_rate": 7.659037331173498e-06, "loss": 15.4746, "step": 895 }, { "epoch": 0.05137172834905255, "grad_norm": 0.0, "learning_rate": 7.653793952289114e-06, "loss": 15.2673, "step": 896 }, { "epoch": 0.05142906286729926, "grad_norm": 0.0, "learning_rate": 7.648546507252308e-06, "loss": 15.5551, "step": 897 }, { "epoch": 0.051486397385545966, "grad_norm": 0.0, "learning_rate": 7.643295004103232e-06, "loss": 15.3011, "step": 898 }, { "epoch": 0.051543731903792675, "grad_norm": 0.0, "learning_rate": 7.638039450888259e-06, "loss": 15.2572, "step": 899 }, { "epoch": 0.05160106642203939, "grad_norm": 0.0, "learning_rate": 7.632779855659966e-06, "loss": 15.536, "step": 900 }, { "epoch": 0.0516584009402861, "grad_norm": 0.0, "learning_rate": 7.627516226477123e-06, "loss": 15.4528, "step": 901 }, { "epoch": 0.05171573545853281, "grad_norm": 0.0, "learning_rate": 7.62224857140468e-06, "loss": 15.1914, "step": 902 }, { "epoch": 0.05177306997677952, "grad_norm": 0.0, "learning_rate": 7.616976898513759e-06, "loss": 15.3216, "step": 903 }, { "epoch": 0.05183040449502623, "grad_norm": 0.0, "learning_rate": 7.611701215881635e-06, "loss": 15.4943, "step": 904 }, { "epoch": 0.05188773901327294, "grad_norm": 0.0, "learning_rate": 7.606421531591725e-06, "loss": 15.8347, "step": 905 }, { "epoch": 0.05194507353151965, "grad_norm": 0.0, "learning_rate": 7.601137853733583e-06, "loss": 15.4142, "step": 906 }, { "epoch": 0.052002408049766365, "grad_norm": 0.0, "learning_rate": 7.595850190402877e-06, "loss": 15.3806, "step": 907 }, { "epoch": 0.05205974256801307, "grad_norm": 0.0, "learning_rate": 7.590558549701383e-06, "loss": 15.4033, "step": 908 }, { "epoch": 0.05211707708625978, "grad_norm": 0.0, "learning_rate": 7.585262939736975e-06, "loss": 15.6882, "step": 909 }, { "epoch": 0.05217441160450649, "grad_norm": 0.0, "learning_rate": 7.579963368623602e-06, "loss": 15.5859, "step": 910 }, { "epoch": 0.052231746122753206, "grad_norm": 0.0, "learning_rate": 7.574659844481285e-06, "loss": 15.5471, "step": 911 }, { "epoch": 0.052289080640999915, "grad_norm": 0.0, "learning_rate": 7.569352375436102e-06, "loss": 15.3833, "step": 912 }, { "epoch": 0.05234641515924662, "grad_norm": 0.0, "learning_rate": 7.564040969620179e-06, "loss": 15.4828, "step": 913 }, { "epoch": 0.05240374967749333, "grad_norm": 0.0, "learning_rate": 7.558725635171669e-06, "loss": 15.7124, "step": 914 }, { "epoch": 0.05246108419574005, "grad_norm": 0.0, "learning_rate": 7.553406380234744e-06, "loss": 15.6596, "step": 915 }, { "epoch": 0.052518418713986756, "grad_norm": 0.0, "learning_rate": 7.548083212959588e-06, "loss": 15.318, "step": 916 }, { "epoch": 0.052575753232233464, "grad_norm": 0.0, "learning_rate": 7.542756141502376e-06, "loss": 15.3831, "step": 917 }, { "epoch": 0.05263308775048018, "grad_norm": 0.0, "learning_rate": 7.537425174025265e-06, "loss": 15.5979, "step": 918 }, { "epoch": 0.05269042226872689, "grad_norm": 0.0, "learning_rate": 7.532090318696382e-06, "loss": 15.5016, "step": 919 }, { "epoch": 0.0527477567869736, "grad_norm": 0.0, "learning_rate": 7.526751583689812e-06, "loss": 15.1329, "step": 920 }, { "epoch": 0.052805091305220306, "grad_norm": 0.0, "learning_rate": 7.521408977185584e-06, "loss": 15.3954, "step": 921 }, { "epoch": 0.05286242582346702, "grad_norm": 0.0, "learning_rate": 7.516062507369655e-06, "loss": 15.5931, "step": 922 }, { "epoch": 0.05291976034171373, "grad_norm": 0.0, "learning_rate": 7.510712182433908e-06, "loss": 15.7145, "step": 923 }, { "epoch": 0.05297709485996044, "grad_norm": 0.0, "learning_rate": 7.505358010576132e-06, "loss": 15.1615, "step": 924 }, { "epoch": 0.05303442937820715, "grad_norm": 0.0, "learning_rate": 7.500000000000001e-06, "loss": 15.3657, "step": 925 }, { "epoch": 0.05309176389645386, "grad_norm": 0.0, "learning_rate": 7.494638158915083e-06, "loss": 15.1117, "step": 926 }, { "epoch": 0.05314909841470057, "grad_norm": 0.0, "learning_rate": 7.489272495536809e-06, "loss": 15.3789, "step": 927 }, { "epoch": 0.05320643293294728, "grad_norm": 0.0, "learning_rate": 7.483903018086466e-06, "loss": 15.4555, "step": 928 }, { "epoch": 0.05326376745119399, "grad_norm": 0.0, "learning_rate": 7.4785297347911865e-06, "loss": 15.0974, "step": 929 }, { "epoch": 0.053321101969440704, "grad_norm": 0.0, "learning_rate": 7.473152653883934e-06, "loss": 15.3593, "step": 930 }, { "epoch": 0.05337843648768741, "grad_norm": 0.0, "learning_rate": 7.467771783603492e-06, "loss": 15.8678, "step": 931 }, { "epoch": 0.05343577100593412, "grad_norm": 0.0, "learning_rate": 7.4623871321944485e-06, "loss": 15.7244, "step": 932 }, { "epoch": 0.05349310552418083, "grad_norm": 0.0, "learning_rate": 7.456998707907184e-06, "loss": 15.1704, "step": 933 }, { "epoch": 0.053550440042427545, "grad_norm": 0.0, "learning_rate": 7.4516065189978625e-06, "loss": 15.4617, "step": 934 }, { "epoch": 0.053607774560674254, "grad_norm": 0.0, "learning_rate": 7.446210573728414e-06, "loss": 15.3451, "step": 935 }, { "epoch": 0.05366510907892096, "grad_norm": 0.0, "learning_rate": 7.440810880366524e-06, "loss": 15.4365, "step": 936 }, { "epoch": 0.05372244359716768, "grad_norm": 0.0, "learning_rate": 7.435407447185623e-06, "loss": 15.4767, "step": 937 }, { "epoch": 0.05377977811541439, "grad_norm": 0.0, "learning_rate": 7.430000282464872e-06, "loss": 15.3876, "step": 938 }, { "epoch": 0.053837112633661095, "grad_norm": 0.0, "learning_rate": 7.424589394489145e-06, "loss": 15.5479, "step": 939 }, { "epoch": 0.053894447151907804, "grad_norm": 0.0, "learning_rate": 7.419174791549023e-06, "loss": 15.3345, "step": 940 }, { "epoch": 0.05395178167015452, "grad_norm": 0.0, "learning_rate": 7.413756481940783e-06, "loss": 15.145, "step": 941 }, { "epoch": 0.05400911618840123, "grad_norm": 0.0, "learning_rate": 7.408334473966375e-06, "loss": 15.428, "step": 942 }, { "epoch": 0.05406645070664794, "grad_norm": 0.0, "learning_rate": 7.402908775933419e-06, "loss": 15.3543, "step": 943 }, { "epoch": 0.054123785224894645, "grad_norm": 0.0, "learning_rate": 7.39747939615519e-06, "loss": 15.5398, "step": 944 }, { "epoch": 0.05418111974314136, "grad_norm": 0.0, "learning_rate": 7.392046342950604e-06, "loss": 15.2644, "step": 945 }, { "epoch": 0.05423845426138807, "grad_norm": 0.0, "learning_rate": 7.386609624644201e-06, "loss": 15.641, "step": 946 }, { "epoch": 0.05429578877963478, "grad_norm": 0.0, "learning_rate": 7.38116924956614e-06, "loss": 15.3402, "step": 947 }, { "epoch": 0.05435312329788149, "grad_norm": 0.0, "learning_rate": 7.375725226052186e-06, "loss": 15.4802, "step": 948 }, { "epoch": 0.0544104578161282, "grad_norm": 0.0, "learning_rate": 7.370277562443689e-06, "loss": 15.6399, "step": 949 }, { "epoch": 0.05446779233437491, "grad_norm": 0.0, "learning_rate": 7.364826267087577e-06, "loss": 15.3748, "step": 950 }, { "epoch": 0.05452512685262162, "grad_norm": 0.0, "learning_rate": 7.359371348336346e-06, "loss": 15.559, "step": 951 }, { "epoch": 0.05458246137086833, "grad_norm": 0.0, "learning_rate": 7.353912814548042e-06, "loss": 15.786, "step": 952 }, { "epoch": 0.05463979588911504, "grad_norm": 0.0, "learning_rate": 7.348450674086247e-06, "loss": 15.3269, "step": 953 }, { "epoch": 0.05469713040736175, "grad_norm": 0.0, "learning_rate": 7.342984935320074e-06, "loss": 15.3853, "step": 954 }, { "epoch": 0.05475446492560846, "grad_norm": 0.0, "learning_rate": 7.337515606624148e-06, "loss": 15.5078, "step": 955 }, { "epoch": 0.054811799443855176, "grad_norm": 0.0, "learning_rate": 7.332042696378591e-06, "loss": 15.6212, "step": 956 }, { "epoch": 0.054869133962101885, "grad_norm": 0.0, "learning_rate": 7.326566212969016e-06, "loss": 15.6121, "step": 957 }, { "epoch": 0.05492646848034859, "grad_norm": 0.0, "learning_rate": 7.321086164786513e-06, "loss": 15.406, "step": 958 }, { "epoch": 0.0549838029985953, "grad_norm": 0.0, "learning_rate": 7.315602560227627e-06, "loss": 15.0786, "step": 959 }, { "epoch": 0.05504113751684202, "grad_norm": 0.0, "learning_rate": 7.310115407694358e-06, "loss": 15.2173, "step": 960 }, { "epoch": 0.055098472035088726, "grad_norm": 0.0, "learning_rate": 7.30462471559414e-06, "loss": 15.5237, "step": 961 }, { "epoch": 0.055155806553335435, "grad_norm": 0.0, "learning_rate": 7.299130492339833e-06, "loss": 15.7292, "step": 962 }, { "epoch": 0.05521314107158214, "grad_norm": 0.0, "learning_rate": 7.293632746349702e-06, "loss": 15.5719, "step": 963 }, { "epoch": 0.05527047558982886, "grad_norm": 0.0, "learning_rate": 7.288131486047414e-06, "loss": 15.5459, "step": 964 }, { "epoch": 0.05532781010807557, "grad_norm": 0.0, "learning_rate": 7.282626719862021e-06, "loss": 15.7095, "step": 965 }, { "epoch": 0.055385144626322276, "grad_norm": 0.0, "learning_rate": 7.277118456227941e-06, "loss": 15.6278, "step": 966 }, { "epoch": 0.055442479144568985, "grad_norm": 0.0, "learning_rate": 7.2716067035849595e-06, "loss": 15.4754, "step": 967 }, { "epoch": 0.0554998136628157, "grad_norm": 0.0, "learning_rate": 7.266091470378199e-06, "loss": 15.3922, "step": 968 }, { "epoch": 0.05555714818106241, "grad_norm": 0.0, "learning_rate": 7.260572765058124e-06, "loss": 15.4944, "step": 969 }, { "epoch": 0.05561448269930912, "grad_norm": 0.0, "learning_rate": 7.25505059608051e-06, "loss": 15.5733, "step": 970 }, { "epoch": 0.05567181721755583, "grad_norm": 0.0, "learning_rate": 7.249524971906445e-06, "loss": 15.4449, "step": 971 }, { "epoch": 0.05572915173580254, "grad_norm": 0.0, "learning_rate": 7.243995901002312e-06, "loss": 15.4991, "step": 972 }, { "epoch": 0.05578648625404925, "grad_norm": 0.0, "learning_rate": 7.23846339183977e-06, "loss": 15.4896, "step": 973 }, { "epoch": 0.05584382077229596, "grad_norm": 0.0, "learning_rate": 7.232927452895749e-06, "loss": 15.4687, "step": 974 }, { "epoch": 0.055901155290542674, "grad_norm": 0.0, "learning_rate": 7.227388092652436e-06, "loss": 15.5742, "step": 975 }, { "epoch": 0.05595848980878938, "grad_norm": 0.0, "learning_rate": 7.221845319597258e-06, "loss": 15.5478, "step": 976 }, { "epoch": 0.05601582432703609, "grad_norm": 0.0, "learning_rate": 7.216299142222869e-06, "loss": 15.545, "step": 977 }, { "epoch": 0.0560731588452828, "grad_norm": 0.0, "learning_rate": 7.210749569027145e-06, "loss": 15.5581, "step": 978 }, { "epoch": 0.056130493363529516, "grad_norm": 0.0, "learning_rate": 7.2051966085131584e-06, "loss": 15.317, "step": 979 }, { "epoch": 0.056187827881776224, "grad_norm": 0.0, "learning_rate": 7.199640269189176e-06, "loss": 15.5235, "step": 980 }, { "epoch": 0.05624516240002293, "grad_norm": 0.0, "learning_rate": 7.194080559568642e-06, "loss": 15.6216, "step": 981 }, { "epoch": 0.05630249691826964, "grad_norm": 0.0, "learning_rate": 7.18851748817016e-06, "loss": 15.7069, "step": 982 }, { "epoch": 0.05635983143651636, "grad_norm": 0.0, "learning_rate": 7.18295106351749e-06, "loss": 15.3323, "step": 983 }, { "epoch": 0.056417165954763066, "grad_norm": 0.0, "learning_rate": 7.177381294139527e-06, "loss": 15.568, "step": 984 }, { "epoch": 0.056474500473009774, "grad_norm": 0.0, "learning_rate": 7.1718081885702905e-06, "loss": 15.2707, "step": 985 }, { "epoch": 0.05653183499125648, "grad_norm": 0.0, "learning_rate": 7.1662317553489126e-06, "loss": 15.5602, "step": 986 }, { "epoch": 0.0565891695095032, "grad_norm": 0.0, "learning_rate": 7.160652003019624e-06, "loss": 15.4934, "step": 987 }, { "epoch": 0.05664650402774991, "grad_norm": 0.0, "learning_rate": 7.155068940131741e-06, "loss": 15.3652, "step": 988 }, { "epoch": 0.056703838545996615, "grad_norm": 0.0, "learning_rate": 7.149482575239653e-06, "loss": 15.1606, "step": 989 }, { "epoch": 0.05676117306424333, "grad_norm": 0.0, "learning_rate": 7.143892916902805e-06, "loss": 15.6928, "step": 990 }, { "epoch": 0.05681850758249004, "grad_norm": 0.0, "learning_rate": 7.138299973685694e-06, "loss": 15.3702, "step": 991 }, { "epoch": 0.05687584210073675, "grad_norm": 0.0, "learning_rate": 7.132703754157846e-06, "loss": 15.5082, "step": 992 }, { "epoch": 0.05693317661898346, "grad_norm": 0.0, "learning_rate": 7.1271042668938094e-06, "loss": 15.4877, "step": 993 }, { "epoch": 0.05699051113723017, "grad_norm": 0.0, "learning_rate": 7.121501520473137e-06, "loss": 15.6682, "step": 994 }, { "epoch": 0.05704784565547688, "grad_norm": 0.0, "learning_rate": 7.115895523480376e-06, "loss": 15.3158, "step": 995 }, { "epoch": 0.05710518017372359, "grad_norm": 0.0, "learning_rate": 7.110286284505058e-06, "loss": 15.2529, "step": 996 }, { "epoch": 0.0571625146919703, "grad_norm": 0.0, "learning_rate": 7.104673812141676e-06, "loss": 15.392, "step": 997 }, { "epoch": 0.057219849210217014, "grad_norm": 0.0, "learning_rate": 7.099058114989679e-06, "loss": 15.4152, "step": 998 }, { "epoch": 0.05727718372846372, "grad_norm": 0.0, "learning_rate": 7.09343920165346e-06, "loss": 15.5532, "step": 999 }, { "epoch": 0.05733451824671043, "grad_norm": 0.0, "learning_rate": 7.087817080742337e-06, "loss": 15.4105, "step": 1000 }, { "epoch": 0.05739185276495714, "grad_norm": 0.0, "learning_rate": 7.082191760870543e-06, "loss": 15.8258, "step": 1001 }, { "epoch": 0.057449187283203855, "grad_norm": 0.0, "learning_rate": 7.076563250657213e-06, "loss": 15.3265, "step": 1002 }, { "epoch": 0.057506521801450564, "grad_norm": 0.0, "learning_rate": 7.070931558726373e-06, "loss": 15.5073, "step": 1003 }, { "epoch": 0.05756385631969727, "grad_norm": 0.0, "learning_rate": 7.065296693706916e-06, "loss": 15.2938, "step": 1004 }, { "epoch": 0.05762119083794398, "grad_norm": 0.0, "learning_rate": 7.059658664232605e-06, "loss": 15.242, "step": 1005 }, { "epoch": 0.057678525356190696, "grad_norm": 0.0, "learning_rate": 7.054017478942048e-06, "loss": 15.3581, "step": 1006 }, { "epoch": 0.057735859874437405, "grad_norm": 0.0, "learning_rate": 7.048373146478691e-06, "loss": 15.524, "step": 1007 }, { "epoch": 0.057793194392684114, "grad_norm": 0.0, "learning_rate": 7.042725675490797e-06, "loss": 15.3818, "step": 1008 }, { "epoch": 0.05785052891093083, "grad_norm": 0.0, "learning_rate": 7.037075074631441e-06, "loss": 15.0783, "step": 1009 }, { "epoch": 0.05790786342917754, "grad_norm": 0.0, "learning_rate": 7.031421352558495e-06, "loss": 15.3572, "step": 1010 }, { "epoch": 0.057965197947424246, "grad_norm": 0.0, "learning_rate": 7.025764517934612e-06, "loss": 15.0614, "step": 1011 }, { "epoch": 0.058022532465670955, "grad_norm": 0.0, "learning_rate": 7.0201045794272135e-06, "loss": 15.2281, "step": 1012 }, { "epoch": 0.05807986698391767, "grad_norm": 0.0, "learning_rate": 7.0144415457084765e-06, "loss": 15.6632, "step": 1013 }, { "epoch": 0.05813720150216438, "grad_norm": 0.0, "learning_rate": 7.008775425455323e-06, "loss": 15.3456, "step": 1014 }, { "epoch": 0.05819453602041109, "grad_norm": 0.0, "learning_rate": 7.003106227349399e-06, "loss": 15.2589, "step": 1015 }, { "epoch": 0.058251870538657796, "grad_norm": 0.0, "learning_rate": 6.997433960077072e-06, "loss": 15.1371, "step": 1016 }, { "epoch": 0.05830920505690451, "grad_norm": 0.0, "learning_rate": 6.991758632329411e-06, "loss": 15.6141, "step": 1017 }, { "epoch": 0.05836653957515122, "grad_norm": 0.0, "learning_rate": 6.9860802528021705e-06, "loss": 15.4954, "step": 1018 }, { "epoch": 0.05842387409339793, "grad_norm": 0.0, "learning_rate": 6.980398830195785e-06, "loss": 15.5896, "step": 1019 }, { "epoch": 0.05848120861164464, "grad_norm": 0.0, "learning_rate": 6.97471437321535e-06, "loss": 15.4669, "step": 1020 }, { "epoch": 0.05853854312989135, "grad_norm": 0.0, "learning_rate": 6.969026890570612e-06, "loss": 15.2941, "step": 1021 }, { "epoch": 0.05859587764813806, "grad_norm": 0.0, "learning_rate": 6.963336390975949e-06, "loss": 15.3614, "step": 1022 }, { "epoch": 0.05865321216638477, "grad_norm": 0.0, "learning_rate": 6.957642883150365e-06, "loss": 15.4045, "step": 1023 }, { "epoch": 0.058710546684631486, "grad_norm": 0.0, "learning_rate": 6.9519463758174745e-06, "loss": 15.6422, "step": 1024 }, { "epoch": 0.058767881202878194, "grad_norm": 0.0, "learning_rate": 6.9462468777054855e-06, "loss": 15.4819, "step": 1025 }, { "epoch": 0.0588252157211249, "grad_norm": 0.0, "learning_rate": 6.940544397547189e-06, "loss": 15.569, "step": 1026 }, { "epoch": 0.05888255023937161, "grad_norm": 0.0, "learning_rate": 6.934838944079944e-06, "loss": 15.6353, "step": 1027 }, { "epoch": 0.05893988475761833, "grad_norm": 0.0, "learning_rate": 6.929130526045667e-06, "loss": 15.1708, "step": 1028 }, { "epoch": 0.058997219275865036, "grad_norm": 0.0, "learning_rate": 6.9234191521908176e-06, "loss": 15.071, "step": 1029 }, { "epoch": 0.059054553794111744, "grad_norm": 0.0, "learning_rate": 6.917704831266381e-06, "loss": 15.3808, "step": 1030 }, { "epoch": 0.05911188831235845, "grad_norm": 0.0, "learning_rate": 6.911987572027861e-06, "loss": 15.6273, "step": 1031 }, { "epoch": 0.05916922283060517, "grad_norm": 0.0, "learning_rate": 6.906267383235261e-06, "loss": 15.4842, "step": 1032 }, { "epoch": 0.05922655734885188, "grad_norm": 0.0, "learning_rate": 6.9005442736530745e-06, "loss": 15.3772, "step": 1033 }, { "epoch": 0.059283891867098586, "grad_norm": 0.0, "learning_rate": 6.894818252050272e-06, "loss": 15.1444, "step": 1034 }, { "epoch": 0.059341226385345294, "grad_norm": 0.0, "learning_rate": 6.889089327200282e-06, "loss": 15.694, "step": 1035 }, { "epoch": 0.05939856090359201, "grad_norm": 0.0, "learning_rate": 6.883357507880985e-06, "loss": 15.2363, "step": 1036 }, { "epoch": 0.05945589542183872, "grad_norm": 0.0, "learning_rate": 6.877622802874693e-06, "loss": 15.2058, "step": 1037 }, { "epoch": 0.05951322994008543, "grad_norm": 0.0, "learning_rate": 6.871885220968142e-06, "loss": 15.431, "step": 1038 }, { "epoch": 0.059570564458332136, "grad_norm": 0.0, "learning_rate": 6.866144770952474e-06, "loss": 15.2124, "step": 1039 }, { "epoch": 0.05962789897657885, "grad_norm": 0.0, "learning_rate": 6.86040146162323e-06, "loss": 15.531, "step": 1040 }, { "epoch": 0.05968523349482556, "grad_norm": 0.0, "learning_rate": 6.854655301780324e-06, "loss": 15.1198, "step": 1041 }, { "epoch": 0.05974256801307227, "grad_norm": 0.0, "learning_rate": 6.848906300228047e-06, "loss": 15.3151, "step": 1042 }, { "epoch": 0.059799902531318984, "grad_norm": 0.0, "learning_rate": 6.843154465775036e-06, "loss": 15.3529, "step": 1043 }, { "epoch": 0.05985723704956569, "grad_norm": 0.0, "learning_rate": 6.837399807234273e-06, "loss": 15.306, "step": 1044 }, { "epoch": 0.0599145715678124, "grad_norm": 0.0, "learning_rate": 6.831642333423068e-06, "loss": 15.5283, "step": 1045 }, { "epoch": 0.05997190608605911, "grad_norm": 0.0, "learning_rate": 6.825882053163039e-06, "loss": 15.5263, "step": 1046 }, { "epoch": 0.060029240604305825, "grad_norm": 0.0, "learning_rate": 6.820118975280109e-06, "loss": 15.101, "step": 1047 }, { "epoch": 0.060086575122552534, "grad_norm": 0.0, "learning_rate": 6.814353108604488e-06, "loss": 15.4583, "step": 1048 }, { "epoch": 0.06014390964079924, "grad_norm": 0.0, "learning_rate": 6.8085844619706555e-06, "loss": 15.0133, "step": 1049 }, { "epoch": 0.06020124415904595, "grad_norm": 0.0, "learning_rate": 6.802813044217353e-06, "loss": 15.3445, "step": 1050 }, { "epoch": 0.06025857867729267, "grad_norm": 0.0, "learning_rate": 6.797038864187564e-06, "loss": 15.4779, "step": 1051 }, { "epoch": 0.060315913195539375, "grad_norm": 0.0, "learning_rate": 6.791261930728513e-06, "loss": 15.5129, "step": 1052 }, { "epoch": 0.060373247713786084, "grad_norm": 0.0, "learning_rate": 6.785482252691634e-06, "loss": 15.3129, "step": 1053 }, { "epoch": 0.06043058223203279, "grad_norm": 0.0, "learning_rate": 6.77969983893257e-06, "loss": 15.2355, "step": 1054 }, { "epoch": 0.06048791675027951, "grad_norm": 0.0, "learning_rate": 6.773914698311157e-06, "loss": 15.3794, "step": 1055 }, { "epoch": 0.060545251268526216, "grad_norm": 0.0, "learning_rate": 6.768126839691408e-06, "loss": 15.4321, "step": 1056 }, { "epoch": 0.060602585786772925, "grad_norm": 0.0, "learning_rate": 6.762336271941499e-06, "loss": 15.3637, "step": 1057 }, { "epoch": 0.060659920305019634, "grad_norm": 0.0, "learning_rate": 6.756543003933758e-06, "loss": 15.3693, "step": 1058 }, { "epoch": 0.06071725482326635, "grad_norm": 0.0, "learning_rate": 6.750747044544654e-06, "loss": 15.0256, "step": 1059 }, { "epoch": 0.06077458934151306, "grad_norm": 0.0, "learning_rate": 6.7449484026547705e-06, "loss": 15.4155, "step": 1060 }, { "epoch": 0.060831923859759766, "grad_norm": 0.0, "learning_rate": 6.739147087148812e-06, "loss": 15.5012, "step": 1061 }, { "epoch": 0.06088925837800648, "grad_norm": 0.0, "learning_rate": 6.733343106915573e-06, "loss": 15.3416, "step": 1062 }, { "epoch": 0.06094659289625319, "grad_norm": 0.0, "learning_rate": 6.7275364708479316e-06, "loss": 15.4652, "step": 1063 }, { "epoch": 0.0610039274144999, "grad_norm": 0.0, "learning_rate": 6.721727187842837e-06, "loss": 15.2292, "step": 1064 }, { "epoch": 0.06106126193274661, "grad_norm": 0.0, "learning_rate": 6.715915266801292e-06, "loss": 15.1645, "step": 1065 }, { "epoch": 0.06111859645099332, "grad_norm": 0.0, "learning_rate": 6.710100716628345e-06, "loss": 15.1633, "step": 1066 }, { "epoch": 0.06117593096924003, "grad_norm": 0.0, "learning_rate": 6.704283546233066e-06, "loss": 15.2905, "step": 1067 }, { "epoch": 0.06123326548748674, "grad_norm": 0.0, "learning_rate": 6.6984637645285475e-06, "loss": 14.9021, "step": 1068 }, { "epoch": 0.06129060000573345, "grad_norm": 0.0, "learning_rate": 6.692641380431879e-06, "loss": 15.3016, "step": 1069 }, { "epoch": 0.061347934523980165, "grad_norm": 0.0, "learning_rate": 6.6868164028641355e-06, "loss": 15.2637, "step": 1070 }, { "epoch": 0.06140526904222687, "grad_norm": 0.0, "learning_rate": 6.68098884075037e-06, "loss": 15.2153, "step": 1071 }, { "epoch": 0.06146260356047358, "grad_norm": 0.0, "learning_rate": 6.675158703019594e-06, "loss": 15.416, "step": 1072 }, { "epoch": 0.06151993807872029, "grad_norm": 0.0, "learning_rate": 6.669325998604766e-06, "loss": 15.0624, "step": 1073 }, { "epoch": 0.061577272596967006, "grad_norm": 0.0, "learning_rate": 6.663490736442771e-06, "loss": 14.9949, "step": 1074 }, { "epoch": 0.061634607115213715, "grad_norm": 0.0, "learning_rate": 6.657652925474424e-06, "loss": 15.2689, "step": 1075 }, { "epoch": 0.06169194163346042, "grad_norm": 0.0, "learning_rate": 6.6518125746444376e-06, "loss": 14.9976, "step": 1076 }, { "epoch": 0.06174927615170714, "grad_norm": 0.0, "learning_rate": 6.645969692901416e-06, "loss": 15.2178, "step": 1077 }, { "epoch": 0.06180661066995385, "grad_norm": 0.0, "learning_rate": 6.640124289197845e-06, "loss": 15.2663, "step": 1078 }, { "epoch": 0.061863945188200556, "grad_norm": 0.0, "learning_rate": 6.634276372490074e-06, "loss": 15.3322, "step": 1079 }, { "epoch": 0.061921279706447264, "grad_norm": 0.0, "learning_rate": 6.6284259517383e-06, "loss": 15.8089, "step": 1080 }, { "epoch": 0.06197861422469398, "grad_norm": 0.0, "learning_rate": 6.622573035906557e-06, "loss": 15.5136, "step": 1081 }, { "epoch": 0.06203594874294069, "grad_norm": 0.0, "learning_rate": 6.616717633962703e-06, "loss": 15.4216, "step": 1082 }, { "epoch": 0.0620932832611874, "grad_norm": 0.0, "learning_rate": 6.6108597548784104e-06, "loss": 15.3619, "step": 1083 }, { "epoch": 0.062150617779434106, "grad_norm": 0.0, "learning_rate": 6.604999407629137e-06, "loss": 15.339, "step": 1084 }, { "epoch": 0.06220795229768082, "grad_norm": 0.0, "learning_rate": 6.599136601194128e-06, "loss": 14.9992, "step": 1085 }, { "epoch": 0.06226528681592753, "grad_norm": 0.0, "learning_rate": 6.593271344556399e-06, "loss": 15.297, "step": 1086 }, { "epoch": 0.06232262133417424, "grad_norm": 0.0, "learning_rate": 6.5874036467027135e-06, "loss": 15.0719, "step": 1087 }, { "epoch": 0.06237995585242095, "grad_norm": 0.0, "learning_rate": 6.58153351662358e-06, "loss": 15.1815, "step": 1088 }, { "epoch": 0.06243729037066766, "grad_norm": 0.0, "learning_rate": 6.575660963313233e-06, "loss": 15.363, "step": 1089 }, { "epoch": 0.06249462488891437, "grad_norm": 0.0, "learning_rate": 6.5697859957696195e-06, "loss": 15.193, "step": 1090 }, { "epoch": 0.06255195940716109, "grad_norm": 0.0, "learning_rate": 6.563908622994385e-06, "loss": 15.1983, "step": 1091 }, { "epoch": 0.06260929392540779, "grad_norm": 0.0, "learning_rate": 6.558028853992859e-06, "loss": 15.357, "step": 1092 }, { "epoch": 0.0626666284436545, "grad_norm": 0.0, "learning_rate": 6.552146697774049e-06, "loss": 15.4091, "step": 1093 }, { "epoch": 0.06272396296190122, "grad_norm": 0.0, "learning_rate": 6.546262163350609e-06, "loss": 15.4367, "step": 1094 }, { "epoch": 0.06278129748014792, "grad_norm": 0.0, "learning_rate": 6.540375259738849e-06, "loss": 15.3776, "step": 1095 }, { "epoch": 0.06283863199839464, "grad_norm": 0.0, "learning_rate": 6.534485995958699e-06, "loss": 15.3741, "step": 1096 }, { "epoch": 0.06289596651664134, "grad_norm": 0.0, "learning_rate": 6.528594381033714e-06, "loss": 15.4107, "step": 1097 }, { "epoch": 0.06295330103488805, "grad_norm": 0.0, "learning_rate": 6.522700423991043e-06, "loss": 15.1333, "step": 1098 }, { "epoch": 0.06301063555313477, "grad_norm": 0.0, "learning_rate": 6.51680413386143e-06, "loss": 15.1594, "step": 1099 }, { "epoch": 0.06306797007138147, "grad_norm": 0.0, "learning_rate": 6.510905519679192e-06, "loss": 15.0248, "step": 1100 }, { "epoch": 0.06312530458962819, "grad_norm": 0.0, "learning_rate": 6.5050045904822035e-06, "loss": 15.3396, "step": 1101 }, { "epoch": 0.0631826391078749, "grad_norm": 0.0, "learning_rate": 6.499101355311891e-06, "loss": 15.5805, "step": 1102 }, { "epoch": 0.0632399736261216, "grad_norm": 0.0, "learning_rate": 6.493195823213212e-06, "loss": 15.3565, "step": 1103 }, { "epoch": 0.06329730814436832, "grad_norm": 0.0, "learning_rate": 6.487288003234646e-06, "loss": 15.2634, "step": 1104 }, { "epoch": 0.06335464266261502, "grad_norm": 0.0, "learning_rate": 6.481377904428171e-06, "loss": 15.388, "step": 1105 }, { "epoch": 0.06341197718086174, "grad_norm": 0.0, "learning_rate": 6.475465535849263e-06, "loss": 15.4613, "step": 1106 }, { "epoch": 0.06346931169910845, "grad_norm": 0.0, "learning_rate": 6.469550906556874e-06, "loss": 15.601, "step": 1107 }, { "epoch": 0.06352664621735515, "grad_norm": 0.0, "learning_rate": 6.4636340256134224e-06, "loss": 15.2509, "step": 1108 }, { "epoch": 0.06358398073560187, "grad_norm": 0.0, "learning_rate": 6.457714902084769e-06, "loss": 15.6251, "step": 1109 }, { "epoch": 0.06364131525384858, "grad_norm": 0.0, "learning_rate": 6.451793545040218e-06, "loss": 15.2786, "step": 1110 }, { "epoch": 0.06369864977209529, "grad_norm": 0.0, "learning_rate": 6.445869963552496e-06, "loss": 15.3809, "step": 1111 }, { "epoch": 0.063755984290342, "grad_norm": 0.0, "learning_rate": 6.439944166697731e-06, "loss": 15.7765, "step": 1112 }, { "epoch": 0.06381331880858872, "grad_norm": 0.0, "learning_rate": 6.434016163555452e-06, "loss": 15.0431, "step": 1113 }, { "epoch": 0.06387065332683542, "grad_norm": 0.0, "learning_rate": 6.428085963208567e-06, "loss": 15.2539, "step": 1114 }, { "epoch": 0.06392798784508213, "grad_norm": 0.0, "learning_rate": 6.422153574743348e-06, "loss": 15.2095, "step": 1115 }, { "epoch": 0.06398532236332884, "grad_norm": 0.0, "learning_rate": 6.416219007249424e-06, "loss": 15.5957, "step": 1116 }, { "epoch": 0.06404265688157555, "grad_norm": 0.0, "learning_rate": 6.410282269819756e-06, "loss": 15.3808, "step": 1117 }, { "epoch": 0.06409999139982227, "grad_norm": 0.0, "learning_rate": 6.404343371550639e-06, "loss": 15.4368, "step": 1118 }, { "epoch": 0.06415732591806897, "grad_norm": 0.0, "learning_rate": 6.39840232154167e-06, "loss": 15.2563, "step": 1119 }, { "epoch": 0.06421466043631568, "grad_norm": 0.0, "learning_rate": 6.392459128895747e-06, "loss": 14.9078, "step": 1120 }, { "epoch": 0.0642719949545624, "grad_norm": 0.0, "learning_rate": 6.3865138027190535e-06, "loss": 15.11, "step": 1121 }, { "epoch": 0.0643293294728091, "grad_norm": 0.0, "learning_rate": 6.380566352121037e-06, "loss": 15.2601, "step": 1122 }, { "epoch": 0.06438666399105582, "grad_norm": 0.0, "learning_rate": 6.374616786214402e-06, "loss": 15.2519, "step": 1123 }, { "epoch": 0.06444399850930252, "grad_norm": 0.0, "learning_rate": 6.368665114115096e-06, "loss": 15.1558, "step": 1124 }, { "epoch": 0.06450133302754923, "grad_norm": 0.0, "learning_rate": 6.362711344942289e-06, "loss": 15.2966, "step": 1125 }, { "epoch": 0.06455866754579595, "grad_norm": 0.0, "learning_rate": 6.356755487818371e-06, "loss": 15.4227, "step": 1126 }, { "epoch": 0.06461600206404265, "grad_norm": 0.0, "learning_rate": 6.350797551868923e-06, "loss": 15.6837, "step": 1127 }, { "epoch": 0.06467333658228937, "grad_norm": 0.0, "learning_rate": 6.344837546222718e-06, "loss": 15.2018, "step": 1128 }, { "epoch": 0.06473067110053608, "grad_norm": 0.0, "learning_rate": 6.338875480011698e-06, "loss": 15.0102, "step": 1129 }, { "epoch": 0.06478800561878278, "grad_norm": 0.0, "learning_rate": 6.33291136237096e-06, "loss": 15.2271, "step": 1130 }, { "epoch": 0.0648453401370295, "grad_norm": 0.0, "learning_rate": 6.326945202438748e-06, "loss": 15.2503, "step": 1131 }, { "epoch": 0.06490267465527622, "grad_norm": 0.0, "learning_rate": 6.3209770093564315e-06, "loss": 15.0981, "step": 1132 }, { "epoch": 0.06496000917352292, "grad_norm": 0.0, "learning_rate": 6.3150067922684965e-06, "loss": 15.335, "step": 1133 }, { "epoch": 0.06501734369176963, "grad_norm": 0.0, "learning_rate": 6.3090345603225324e-06, "loss": 15.3527, "step": 1134 }, { "epoch": 0.06507467821001633, "grad_norm": 0.0, "learning_rate": 6.303060322669214e-06, "loss": 15.294, "step": 1135 }, { "epoch": 0.06513201272826305, "grad_norm": 0.0, "learning_rate": 6.297084088462288e-06, "loss": 14.9693, "step": 1136 }, { "epoch": 0.06518934724650977, "grad_norm": 0.0, "learning_rate": 6.291105866858562e-06, "loss": 15.3394, "step": 1137 }, { "epoch": 0.06524668176475647, "grad_norm": 0.0, "learning_rate": 6.285125667017886e-06, "loss": 15.804, "step": 1138 }, { "epoch": 0.06530401628300318, "grad_norm": 0.0, "learning_rate": 6.279143498103149e-06, "loss": 15.4625, "step": 1139 }, { "epoch": 0.0653613508012499, "grad_norm": 0.0, "learning_rate": 6.273159369280244e-06, "loss": 15.1583, "step": 1140 }, { "epoch": 0.0654186853194966, "grad_norm": 0.0, "learning_rate": 6.267173289718079e-06, "loss": 15.213, "step": 1141 }, { "epoch": 0.06547601983774332, "grad_norm": 0.0, "learning_rate": 6.261185268588546e-06, "loss": 15.3858, "step": 1142 }, { "epoch": 0.06553335435599002, "grad_norm": 0.0, "learning_rate": 6.25519531506651e-06, "loss": 15.3585, "step": 1143 }, { "epoch": 0.06559068887423673, "grad_norm": 0.0, "learning_rate": 6.249203438329799e-06, "loss": 15.5088, "step": 1144 }, { "epoch": 0.06564802339248345, "grad_norm": 0.0, "learning_rate": 6.24320964755919e-06, "loss": 15.382, "step": 1145 }, { "epoch": 0.06570535791073015, "grad_norm": 0.0, "learning_rate": 6.237213951938389e-06, "loss": 15.5293, "step": 1146 }, { "epoch": 0.06576269242897687, "grad_norm": 0.0, "learning_rate": 6.23121636065402e-06, "loss": 15.1687, "step": 1147 }, { "epoch": 0.06582002694722358, "grad_norm": 0.0, "learning_rate": 6.225216882895615e-06, "loss": 15.5723, "step": 1148 }, { "epoch": 0.06587736146547028, "grad_norm": 0.0, "learning_rate": 6.219215527855596e-06, "loss": 15.4502, "step": 1149 }, { "epoch": 0.065934695983717, "grad_norm": 0.0, "learning_rate": 6.213212304729259e-06, "loss": 15.6545, "step": 1150 }, { "epoch": 0.06599203050196371, "grad_norm": 0.0, "learning_rate": 6.207207222714763e-06, "loss": 15.2629, "step": 1151 }, { "epoch": 0.06604936502021042, "grad_norm": 0.0, "learning_rate": 6.201200291013117e-06, "loss": 15.1331, "step": 1152 }, { "epoch": 0.06610669953845713, "grad_norm": 0.0, "learning_rate": 6.195191518828163e-06, "loss": 15.142, "step": 1153 }, { "epoch": 0.06616403405670383, "grad_norm": 0.0, "learning_rate": 6.1891809153665614e-06, "loss": 15.5586, "step": 1154 }, { "epoch": 0.06622136857495055, "grad_norm": 0.0, "learning_rate": 6.183168489837781e-06, "loss": 15.1567, "step": 1155 }, { "epoch": 0.06627870309319726, "grad_norm": 0.0, "learning_rate": 6.177154251454082e-06, "loss": 15.3777, "step": 1156 }, { "epoch": 0.06633603761144397, "grad_norm": 0.0, "learning_rate": 6.1711382094305e-06, "loss": 15.5164, "step": 1157 }, { "epoch": 0.06639337212969068, "grad_norm": 0.0, "learning_rate": 6.165120372984836e-06, "loss": 14.9784, "step": 1158 }, { "epoch": 0.0664507066479374, "grad_norm": 0.0, "learning_rate": 6.1591007513376425e-06, "loss": 15.284, "step": 1159 }, { "epoch": 0.0665080411661841, "grad_norm": 0.0, "learning_rate": 6.153079353712201e-06, "loss": 15.2941, "step": 1160 }, { "epoch": 0.06656537568443081, "grad_norm": 0.0, "learning_rate": 6.1470561893345215e-06, "loss": 15.3123, "step": 1161 }, { "epoch": 0.06662271020267752, "grad_norm": 0.0, "learning_rate": 6.141031267433316e-06, "loss": 15.1475, "step": 1162 }, { "epoch": 0.06668004472092423, "grad_norm": 0.0, "learning_rate": 6.1350045972399926e-06, "loss": 15.3684, "step": 1163 }, { "epoch": 0.06673737923917095, "grad_norm": 0.0, "learning_rate": 6.128976187988633e-06, "loss": 15.2453, "step": 1164 }, { "epoch": 0.06679471375741765, "grad_norm": 0.0, "learning_rate": 6.122946048915991e-06, "loss": 15.0222, "step": 1165 }, { "epoch": 0.06685204827566436, "grad_norm": 0.0, "learning_rate": 6.116914189261466e-06, "loss": 15.4126, "step": 1166 }, { "epoch": 0.06690938279391108, "grad_norm": 0.0, "learning_rate": 6.110880618267092e-06, "loss": 15.4454, "step": 1167 }, { "epoch": 0.06696671731215778, "grad_norm": 0.0, "learning_rate": 6.1048453451775305e-06, "loss": 15.4416, "step": 1168 }, { "epoch": 0.0670240518304045, "grad_norm": 0.0, "learning_rate": 6.0988083792400466e-06, "loss": 15.4173, "step": 1169 }, { "epoch": 0.06708138634865121, "grad_norm": 0.0, "learning_rate": 6.092769729704502e-06, "loss": 15.521, "step": 1170 }, { "epoch": 0.06713872086689791, "grad_norm": 0.0, "learning_rate": 6.086729405823335e-06, "loss": 14.8905, "step": 1171 }, { "epoch": 0.06719605538514463, "grad_norm": 0.0, "learning_rate": 6.080687416851553e-06, "loss": 15.0262, "step": 1172 }, { "epoch": 0.06725338990339133, "grad_norm": 0.0, "learning_rate": 6.074643772046712e-06, "loss": 15.3374, "step": 1173 }, { "epoch": 0.06731072442163805, "grad_norm": 0.0, "learning_rate": 6.0685984806689055e-06, "loss": 15.2824, "step": 1174 }, { "epoch": 0.06736805893988476, "grad_norm": 0.0, "learning_rate": 6.06255155198075e-06, "loss": 15.2405, "step": 1175 }, { "epoch": 0.06742539345813146, "grad_norm": 0.0, "learning_rate": 6.056502995247371e-06, "loss": 15.4861, "step": 1176 }, { "epoch": 0.06748272797637818, "grad_norm": 0.0, "learning_rate": 6.05045281973639e-06, "loss": 15.3327, "step": 1177 }, { "epoch": 0.0675400624946249, "grad_norm": 0.0, "learning_rate": 6.044401034717905e-06, "loss": 15.0999, "step": 1178 }, { "epoch": 0.0675973970128716, "grad_norm": 0.0, "learning_rate": 6.038347649464483e-06, "loss": 15.274, "step": 1179 }, { "epoch": 0.06765473153111831, "grad_norm": 0.0, "learning_rate": 6.032292673251143e-06, "loss": 15.4088, "step": 1180 }, { "epoch": 0.06771206604936501, "grad_norm": 0.0, "learning_rate": 6.0262361153553395e-06, "loss": 15.5959, "step": 1181 }, { "epoch": 0.06776940056761173, "grad_norm": 0.0, "learning_rate": 6.020177985056953e-06, "loss": 15.287, "step": 1182 }, { "epoch": 0.06782673508585844, "grad_norm": 0.0, "learning_rate": 6.014118291638272e-06, "loss": 15.0794, "step": 1183 }, { "epoch": 0.06788406960410515, "grad_norm": 0.0, "learning_rate": 6.008057044383978e-06, "loss": 15.3053, "step": 1184 }, { "epoch": 0.06794140412235186, "grad_norm": 0.0, "learning_rate": 6.0019942525811385e-06, "loss": 15.4402, "step": 1185 }, { "epoch": 0.06799873864059858, "grad_norm": 0.0, "learning_rate": 5.995929925519181e-06, "loss": 15.2618, "step": 1186 }, { "epoch": 0.06805607315884528, "grad_norm": 0.0, "learning_rate": 5.989864072489892e-06, "loss": 15.4546, "step": 1187 }, { "epoch": 0.068113407677092, "grad_norm": 0.0, "learning_rate": 5.98379670278739e-06, "loss": 15.4383, "step": 1188 }, { "epoch": 0.06817074219533871, "grad_norm": 0.0, "learning_rate": 5.977727825708123e-06, "loss": 15.2373, "step": 1189 }, { "epoch": 0.06822807671358541, "grad_norm": 0.0, "learning_rate": 5.971657450550844e-06, "loss": 15.3016, "step": 1190 }, { "epoch": 0.06828541123183213, "grad_norm": 0.0, "learning_rate": 5.965585586616602e-06, "loss": 15.619, "step": 1191 }, { "epoch": 0.06834274575007883, "grad_norm": 0.0, "learning_rate": 5.959512243208732e-06, "loss": 15.3967, "step": 1192 }, { "epoch": 0.06840008026832554, "grad_norm": 0.0, "learning_rate": 5.953437429632829e-06, "loss": 15.1485, "step": 1193 }, { "epoch": 0.06845741478657226, "grad_norm": 0.0, "learning_rate": 5.947361155196744e-06, "loss": 15.3058, "step": 1194 }, { "epoch": 0.06851474930481896, "grad_norm": 0.0, "learning_rate": 5.941283429210568e-06, "loss": 15.2705, "step": 1195 }, { "epoch": 0.06857208382306568, "grad_norm": 0.0, "learning_rate": 5.935204260986611e-06, "loss": 14.8969, "step": 1196 }, { "epoch": 0.06862941834131239, "grad_norm": 0.0, "learning_rate": 5.9291236598393996e-06, "loss": 15.0611, "step": 1197 }, { "epoch": 0.0686867528595591, "grad_norm": 0.0, "learning_rate": 5.9230416350856505e-06, "loss": 15.297, "step": 1198 }, { "epoch": 0.06874408737780581, "grad_norm": 0.0, "learning_rate": 5.9169581960442615e-06, "loss": 15.4516, "step": 1199 }, { "epoch": 0.06880142189605253, "grad_norm": 0.0, "learning_rate": 5.910873352036302e-06, "loss": 15.607, "step": 1200 }, { "epoch": 0.06885875641429923, "grad_norm": 0.0, "learning_rate": 5.904787112384991e-06, "loss": 15.0717, "step": 1201 }, { "epoch": 0.06891609093254594, "grad_norm": 0.0, "learning_rate": 5.898699486415686e-06, "loss": 15.1691, "step": 1202 }, { "epoch": 0.06897342545079264, "grad_norm": 0.0, "learning_rate": 5.892610483455867e-06, "loss": 15.5078, "step": 1203 }, { "epoch": 0.06903075996903936, "grad_norm": 0.0, "learning_rate": 5.886520112835128e-06, "loss": 15.2725, "step": 1204 }, { "epoch": 0.06908809448728608, "grad_norm": 0.0, "learning_rate": 5.880428383885157e-06, "loss": 15.6883, "step": 1205 }, { "epoch": 0.06914542900553278, "grad_norm": 0.0, "learning_rate": 5.874335305939719e-06, "loss": 15.4606, "step": 1206 }, { "epoch": 0.06920276352377949, "grad_norm": 0.0, "learning_rate": 5.8682408883346535e-06, "loss": 15.3959, "step": 1207 }, { "epoch": 0.06926009804202621, "grad_norm": 0.0, "learning_rate": 5.8621451404078455e-06, "loss": 15.3062, "step": 1208 }, { "epoch": 0.06931743256027291, "grad_norm": 0.0, "learning_rate": 5.856048071499223e-06, "loss": 15.5863, "step": 1209 }, { "epoch": 0.06937476707851963, "grad_norm": 0.0, "learning_rate": 5.849949690950736e-06, "loss": 15.1853, "step": 1210 }, { "epoch": 0.06943210159676633, "grad_norm": 0.0, "learning_rate": 5.843850008106344e-06, "loss": 14.9957, "step": 1211 }, { "epoch": 0.06948943611501304, "grad_norm": 0.0, "learning_rate": 5.837749032312005e-06, "loss": 15.2185, "step": 1212 }, { "epoch": 0.06954677063325976, "grad_norm": 0.0, "learning_rate": 5.831646772915651e-06, "loss": 15.132, "step": 1213 }, { "epoch": 0.06960410515150646, "grad_norm": 0.0, "learning_rate": 5.82554323926719e-06, "loss": 15.1604, "step": 1214 }, { "epoch": 0.06966143966975318, "grad_norm": 0.0, "learning_rate": 5.819438440718476e-06, "loss": 14.8557, "step": 1215 }, { "epoch": 0.06971877418799989, "grad_norm": 0.0, "learning_rate": 5.8133323866233005e-06, "loss": 15.6393, "step": 1216 }, { "epoch": 0.06977610870624659, "grad_norm": 0.0, "learning_rate": 5.807225086337383e-06, "loss": 15.1016, "step": 1217 }, { "epoch": 0.06983344322449331, "grad_norm": 0.0, "learning_rate": 5.8011165492183516e-06, "loss": 15.2926, "step": 1218 }, { "epoch": 0.06989077774274002, "grad_norm": 0.0, "learning_rate": 5.795006784625728e-06, "loss": 15.0293, "step": 1219 }, { "epoch": 0.06994811226098673, "grad_norm": 0.0, "learning_rate": 5.788895801920914e-06, "loss": 15.1197, "step": 1220 }, { "epoch": 0.07000544677923344, "grad_norm": 0.0, "learning_rate": 5.782783610467177e-06, "loss": 15.5134, "step": 1221 }, { "epoch": 0.07006278129748014, "grad_norm": 0.0, "learning_rate": 5.776670219629643e-06, "loss": 15.3721, "step": 1222 }, { "epoch": 0.07012011581572686, "grad_norm": 0.0, "learning_rate": 5.770555638775267e-06, "loss": 15.3128, "step": 1223 }, { "epoch": 0.07017745033397357, "grad_norm": 0.0, "learning_rate": 5.764439877272833e-06, "loss": 15.3769, "step": 1224 }, { "epoch": 0.07023478485222028, "grad_norm": 0.0, "learning_rate": 5.75832294449293e-06, "loss": 15.113, "step": 1225 }, { "epoch": 0.07029211937046699, "grad_norm": 0.0, "learning_rate": 5.752204849807948e-06, "loss": 15.3764, "step": 1226 }, { "epoch": 0.0703494538887137, "grad_norm": 0.0, "learning_rate": 5.74608560259205e-06, "loss": 15.2141, "step": 1227 }, { "epoch": 0.07040678840696041, "grad_norm": 0.0, "learning_rate": 5.739965212221168e-06, "loss": 15.0714, "step": 1228 }, { "epoch": 0.07046412292520712, "grad_norm": 0.0, "learning_rate": 5.733843688072987e-06, "loss": 15.5016, "step": 1229 }, { "epoch": 0.07052145744345382, "grad_norm": 0.0, "learning_rate": 5.727721039526928e-06, "loss": 15.3094, "step": 1230 }, { "epoch": 0.07057879196170054, "grad_norm": 0.0, "learning_rate": 5.7215972759641335e-06, "loss": 15.3874, "step": 1231 }, { "epoch": 0.07063612647994726, "grad_norm": 0.0, "learning_rate": 5.715472406767457e-06, "loss": 15.219, "step": 1232 }, { "epoch": 0.07069346099819396, "grad_norm": 0.0, "learning_rate": 5.709346441321443e-06, "loss": 15.4031, "step": 1233 }, { "epoch": 0.07075079551644067, "grad_norm": 0.0, "learning_rate": 5.703219389012317e-06, "loss": 15.1785, "step": 1234 }, { "epoch": 0.07080813003468739, "grad_norm": 0.0, "learning_rate": 5.697091259227973e-06, "loss": 15.2524, "step": 1235 }, { "epoch": 0.07086546455293409, "grad_norm": 0.0, "learning_rate": 5.69096206135795e-06, "loss": 15.2352, "step": 1236 }, { "epoch": 0.0709227990711808, "grad_norm": 0.0, "learning_rate": 5.684831804793427e-06, "loss": 15.1509, "step": 1237 }, { "epoch": 0.07098013358942752, "grad_norm": 0.0, "learning_rate": 5.6787004989272066e-06, "loss": 15.2295, "step": 1238 }, { "epoch": 0.07103746810767422, "grad_norm": 0.0, "learning_rate": 5.6725681531536955e-06, "loss": 15.2069, "step": 1239 }, { "epoch": 0.07109480262592094, "grad_norm": 0.0, "learning_rate": 5.666434776868895e-06, "loss": 15.4838, "step": 1240 }, { "epoch": 0.07115213714416764, "grad_norm": 0.0, "learning_rate": 5.660300379470387e-06, "loss": 15.1852, "step": 1241 }, { "epoch": 0.07120947166241436, "grad_norm": 0.0, "learning_rate": 5.654164970357316e-06, "loss": 15.2174, "step": 1242 }, { "epoch": 0.07126680618066107, "grad_norm": 0.0, "learning_rate": 5.64802855893038e-06, "loss": 15.5695, "step": 1243 }, { "epoch": 0.07132414069890777, "grad_norm": 0.0, "learning_rate": 5.641891154591805e-06, "loss": 15.093, "step": 1244 }, { "epoch": 0.07138147521715449, "grad_norm": 0.0, "learning_rate": 5.635752766745347e-06, "loss": 15.5662, "step": 1245 }, { "epoch": 0.0714388097354012, "grad_norm": 0.0, "learning_rate": 5.629613404796267e-06, "loss": 15.529, "step": 1246 }, { "epoch": 0.0714961442536479, "grad_norm": 0.0, "learning_rate": 5.623473078151313e-06, "loss": 15.3623, "step": 1247 }, { "epoch": 0.07155347877189462, "grad_norm": 0.0, "learning_rate": 5.617331796218717e-06, "loss": 15.7112, "step": 1248 }, { "epoch": 0.07161081329014132, "grad_norm": 0.0, "learning_rate": 5.611189568408173e-06, "loss": 15.6012, "step": 1249 }, { "epoch": 0.07166814780838804, "grad_norm": 0.0, "learning_rate": 5.605046404130824e-06, "loss": 15.1824, "step": 1250 }, { "epoch": 0.07172548232663475, "grad_norm": 0.0, "learning_rate": 5.598902312799247e-06, "loss": 15.3857, "step": 1251 }, { "epoch": 0.07178281684488146, "grad_norm": 0.0, "learning_rate": 5.592757303827441e-06, "loss": 15.3178, "step": 1252 }, { "epoch": 0.07184015136312817, "grad_norm": 0.0, "learning_rate": 5.586611386630811e-06, "loss": 14.9769, "step": 1253 }, { "epoch": 0.07189748588137489, "grad_norm": 0.0, "learning_rate": 5.5804645706261515e-06, "loss": 15.2135, "step": 1254 }, { "epoch": 0.07195482039962159, "grad_norm": 0.0, "learning_rate": 5.574316865231637e-06, "loss": 15.5411, "step": 1255 }, { "epoch": 0.0720121549178683, "grad_norm": 0.0, "learning_rate": 5.568168279866801e-06, "loss": 15.4522, "step": 1256 }, { "epoch": 0.07206948943611502, "grad_norm": 0.0, "learning_rate": 5.562018823952532e-06, "loss": 15.5142, "step": 1257 }, { "epoch": 0.07212682395436172, "grad_norm": 0.0, "learning_rate": 5.5558685069110444e-06, "loss": 15.1283, "step": 1258 }, { "epoch": 0.07218415847260844, "grad_norm": 0.0, "learning_rate": 5.549717338165876e-06, "loss": 15.3748, "step": 1259 }, { "epoch": 0.07224149299085514, "grad_norm": 0.0, "learning_rate": 5.5435653271418686e-06, "loss": 15.4054, "step": 1260 }, { "epoch": 0.07229882750910185, "grad_norm": 0.0, "learning_rate": 5.537412483265156e-06, "loss": 15.1071, "step": 1261 }, { "epoch": 0.07235616202734857, "grad_norm": 0.0, "learning_rate": 5.5312588159631485e-06, "loss": 15.3223, "step": 1262 }, { "epoch": 0.07241349654559527, "grad_norm": 0.0, "learning_rate": 5.525104334664517e-06, "loss": 15.4098, "step": 1263 }, { "epoch": 0.07247083106384199, "grad_norm": 0.0, "learning_rate": 5.518949048799176e-06, "loss": 15.1196, "step": 1264 }, { "epoch": 0.0725281655820887, "grad_norm": 0.0, "learning_rate": 5.512792967798278e-06, "loss": 15.1726, "step": 1265 }, { "epoch": 0.0725855001003354, "grad_norm": 0.0, "learning_rate": 5.506636101094193e-06, "loss": 15.07, "step": 1266 }, { "epoch": 0.07264283461858212, "grad_norm": 0.0, "learning_rate": 5.500478458120493e-06, "loss": 15.5946, "step": 1267 }, { "epoch": 0.07270016913682882, "grad_norm": 0.0, "learning_rate": 5.4943200483119385e-06, "loss": 15.0114, "step": 1268 }, { "epoch": 0.07275750365507554, "grad_norm": 0.0, "learning_rate": 5.48816088110447e-06, "loss": 15.1489, "step": 1269 }, { "epoch": 0.07281483817332225, "grad_norm": 0.0, "learning_rate": 5.482000965935182e-06, "loss": 15.2691, "step": 1270 }, { "epoch": 0.07287217269156895, "grad_norm": 0.0, "learning_rate": 5.475840312242321e-06, "loss": 15.2723, "step": 1271 }, { "epoch": 0.07292950720981567, "grad_norm": 0.0, "learning_rate": 5.4696789294652596e-06, "loss": 15.2848, "step": 1272 }, { "epoch": 0.07298684172806238, "grad_norm": 0.0, "learning_rate": 5.463516827044492e-06, "loss": 15.2138, "step": 1273 }, { "epoch": 0.07304417624630909, "grad_norm": 0.0, "learning_rate": 5.457354014421613e-06, "loss": 15.3442, "step": 1274 }, { "epoch": 0.0731015107645558, "grad_norm": 0.0, "learning_rate": 5.4511905010393055e-06, "loss": 15.102, "step": 1275 }, { "epoch": 0.07315884528280252, "grad_norm": 0.0, "learning_rate": 5.445026296341325e-06, "loss": 15.5483, "step": 1276 }, { "epoch": 0.07321617980104922, "grad_norm": 0.0, "learning_rate": 5.438861409772489e-06, "loss": 15.2401, "step": 1277 }, { "epoch": 0.07327351431929593, "grad_norm": 0.0, "learning_rate": 5.432695850778658e-06, "loss": 15.4661, "step": 1278 }, { "epoch": 0.07333084883754264, "grad_norm": 0.0, "learning_rate": 5.4265296288067235e-06, "loss": 15.4194, "step": 1279 }, { "epoch": 0.07338818335578935, "grad_norm": 0.0, "learning_rate": 5.420362753304594e-06, "loss": 15.1753, "step": 1280 }, { "epoch": 0.07344551787403607, "grad_norm": 0.0, "learning_rate": 5.414195233721175e-06, "loss": 15.5165, "step": 1281 }, { "epoch": 0.07350285239228277, "grad_norm": 0.0, "learning_rate": 5.408027079506362e-06, "loss": 15.08, "step": 1282 }, { "epoch": 0.07356018691052948, "grad_norm": 0.0, "learning_rate": 5.401858300111024e-06, "loss": 15.2593, "step": 1283 }, { "epoch": 0.0736175214287762, "grad_norm": 0.0, "learning_rate": 5.395688904986987e-06, "loss": 15.019, "step": 1284 }, { "epoch": 0.0736748559470229, "grad_norm": 0.0, "learning_rate": 5.389518903587016e-06, "loss": 15.4, "step": 1285 }, { "epoch": 0.07373219046526962, "grad_norm": 0.0, "learning_rate": 5.383348305364814e-06, "loss": 15.1214, "step": 1286 }, { "epoch": 0.07378952498351632, "grad_norm": 0.0, "learning_rate": 5.37717711977499e-06, "loss": 15.6768, "step": 1287 }, { "epoch": 0.07384685950176303, "grad_norm": 0.0, "learning_rate": 5.371005356273058e-06, "loss": 15.1213, "step": 1288 }, { "epoch": 0.07390419402000975, "grad_norm": 0.0, "learning_rate": 5.364833024315414e-06, "loss": 15.2674, "step": 1289 }, { "epoch": 0.07396152853825645, "grad_norm": 0.0, "learning_rate": 5.358660133359328e-06, "loss": 15.4868, "step": 1290 }, { "epoch": 0.07401886305650317, "grad_norm": 0.0, "learning_rate": 5.352486692862926e-06, "loss": 15.296, "step": 1291 }, { "epoch": 0.07407619757474988, "grad_norm": 0.0, "learning_rate": 5.346312712285172e-06, "loss": 15.4523, "step": 1292 }, { "epoch": 0.07413353209299658, "grad_norm": 0.0, "learning_rate": 5.340138201085864e-06, "loss": 15.1769, "step": 1293 }, { "epoch": 0.0741908666112433, "grad_norm": 0.0, "learning_rate": 5.3339631687256085e-06, "loss": 15.244, "step": 1294 }, { "epoch": 0.07424820112949002, "grad_norm": 0.0, "learning_rate": 5.327787624665811e-06, "loss": 15.4797, "step": 1295 }, { "epoch": 0.07430553564773672, "grad_norm": 0.0, "learning_rate": 5.321611578368664e-06, "loss": 15.2982, "step": 1296 }, { "epoch": 0.07436287016598343, "grad_norm": 0.0, "learning_rate": 5.3154350392971245e-06, "loss": 15.1468, "step": 1297 }, { "epoch": 0.07442020468423013, "grad_norm": 0.0, "learning_rate": 5.309258016914911e-06, "loss": 15.3865, "step": 1298 }, { "epoch": 0.07447753920247685, "grad_norm": 0.0, "learning_rate": 5.303080520686474e-06, "loss": 15.3151, "step": 1299 }, { "epoch": 0.07453487372072357, "grad_norm": 0.0, "learning_rate": 5.296902560077e-06, "loss": 15.5556, "step": 1300 }, { "epoch": 0.07459220823897027, "grad_norm": 0.0, "learning_rate": 5.290724144552379e-06, "loss": 15.4445, "step": 1301 }, { "epoch": 0.07464954275721698, "grad_norm": 0.0, "learning_rate": 5.284545283579204e-06, "loss": 15.0258, "step": 1302 }, { "epoch": 0.0747068772754637, "grad_norm": 0.0, "learning_rate": 5.278365986624743e-06, "loss": 15.1966, "step": 1303 }, { "epoch": 0.0747642117937104, "grad_norm": 0.0, "learning_rate": 5.272186263156937e-06, "loss": 15.1388, "step": 1304 }, { "epoch": 0.07482154631195712, "grad_norm": 0.0, "learning_rate": 5.266006122644385e-06, "loss": 15.0732, "step": 1305 }, { "epoch": 0.07487888083020383, "grad_norm": 0.0, "learning_rate": 5.259825574556315e-06, "loss": 15.3282, "step": 1306 }, { "epoch": 0.07493621534845053, "grad_norm": 0.0, "learning_rate": 5.2536446283625865e-06, "loss": 15.4205, "step": 1307 }, { "epoch": 0.07499354986669725, "grad_norm": 0.0, "learning_rate": 5.247463293533667e-06, "loss": 15.5121, "step": 1308 }, { "epoch": 0.07505088438494395, "grad_norm": 0.0, "learning_rate": 5.241281579540619e-06, "loss": 15.1642, "step": 1309 }, { "epoch": 0.07510821890319067, "grad_norm": 0.0, "learning_rate": 5.235099495855086e-06, "loss": 15.0035, "step": 1310 }, { "epoch": 0.07516555342143738, "grad_norm": 0.0, "learning_rate": 5.228917051949279e-06, "loss": 15.2187, "step": 1311 }, { "epoch": 0.07522288793968408, "grad_norm": 0.0, "learning_rate": 5.222734257295963e-06, "loss": 15.0756, "step": 1312 }, { "epoch": 0.0752802224579308, "grad_norm": 0.0, "learning_rate": 5.216551121368432e-06, "loss": 15.1728, "step": 1313 }, { "epoch": 0.07533755697617751, "grad_norm": 0.0, "learning_rate": 5.210367653640512e-06, "loss": 15.5529, "step": 1314 }, { "epoch": 0.07539489149442422, "grad_norm": 0.0, "learning_rate": 5.2041838635865336e-06, "loss": 14.919, "step": 1315 }, { "epoch": 0.07545222601267093, "grad_norm": 0.0, "learning_rate": 5.197999760681324e-06, "loss": 15.5087, "step": 1316 }, { "epoch": 0.07550956053091763, "grad_norm": 0.0, "learning_rate": 5.191815354400183e-06, "loss": 15.603, "step": 1317 }, { "epoch": 0.07556689504916435, "grad_norm": 0.0, "learning_rate": 5.1856306542188805e-06, "loss": 15.3248, "step": 1318 }, { "epoch": 0.07562422956741106, "grad_norm": 0.0, "learning_rate": 5.17944566961364e-06, "loss": 15.214, "step": 1319 }, { "epoch": 0.07568156408565777, "grad_norm": 0.0, "learning_rate": 5.173260410061112e-06, "loss": 15.3195, "step": 1320 }, { "epoch": 0.07573889860390448, "grad_norm": 0.0, "learning_rate": 5.1670748850383734e-06, "loss": 15.1759, "step": 1321 }, { "epoch": 0.0757962331221512, "grad_norm": 0.0, "learning_rate": 5.16088910402291e-06, "loss": 15.2272, "step": 1322 }, { "epoch": 0.0758535676403979, "grad_norm": 0.0, "learning_rate": 5.154703076492597e-06, "loss": 15.3173, "step": 1323 }, { "epoch": 0.07591090215864461, "grad_norm": 0.0, "learning_rate": 5.148516811925684e-06, "loss": 14.9612, "step": 1324 }, { "epoch": 0.07596823667689133, "grad_norm": 0.0, "learning_rate": 5.14233031980079e-06, "loss": 15.0465, "step": 1325 }, { "epoch": 0.07602557119513803, "grad_norm": 0.0, "learning_rate": 5.136143609596882e-06, "loss": 15.1786, "step": 1326 }, { "epoch": 0.07608290571338475, "grad_norm": 0.0, "learning_rate": 5.129956690793255e-06, "loss": 15.1265, "step": 1327 }, { "epoch": 0.07614024023163145, "grad_norm": 0.0, "learning_rate": 5.1237695728695294e-06, "loss": 15.5801, "step": 1328 }, { "epoch": 0.07619757474987816, "grad_norm": 0.0, "learning_rate": 5.117582265305629e-06, "loss": 15.5675, "step": 1329 }, { "epoch": 0.07625490926812488, "grad_norm": 0.0, "learning_rate": 5.111394777581769e-06, "loss": 15.4257, "step": 1330 }, { "epoch": 0.07631224378637158, "grad_norm": 0.0, "learning_rate": 5.105207119178439e-06, "loss": 15.2455, "step": 1331 }, { "epoch": 0.0763695783046183, "grad_norm": 0.0, "learning_rate": 5.099019299576391e-06, "loss": 15.3326, "step": 1332 }, { "epoch": 0.07642691282286501, "grad_norm": 0.0, "learning_rate": 5.0928313282566255e-06, "loss": 15.2436, "step": 1333 }, { "epoch": 0.07648424734111171, "grad_norm": 0.0, "learning_rate": 5.086643214700371e-06, "loss": 15.4032, "step": 1334 }, { "epoch": 0.07654158185935843, "grad_norm": 0.0, "learning_rate": 5.080454968389078e-06, "loss": 15.2234, "step": 1335 }, { "epoch": 0.07659891637760513, "grad_norm": 0.0, "learning_rate": 5.074266598804402e-06, "loss": 15.2235, "step": 1336 }, { "epoch": 0.07665625089585185, "grad_norm": 0.0, "learning_rate": 5.068078115428179e-06, "loss": 15.2521, "step": 1337 }, { "epoch": 0.07671358541409856, "grad_norm": 0.0, "learning_rate": 5.06188952774243e-06, "loss": 15.389, "step": 1338 }, { "epoch": 0.07677091993234526, "grad_norm": 0.0, "learning_rate": 5.0557008452293275e-06, "loss": 15.1074, "step": 1339 }, { "epoch": 0.07682825445059198, "grad_norm": 0.0, "learning_rate": 5.049512077371197e-06, "loss": 15.0319, "step": 1340 }, { "epoch": 0.0768855889688387, "grad_norm": 0.0, "learning_rate": 5.043323233650485e-06, "loss": 15.2615, "step": 1341 }, { "epoch": 0.0769429234870854, "grad_norm": 0.0, "learning_rate": 5.037134323549763e-06, "loss": 15.044, "step": 1342 }, { "epoch": 0.07700025800533211, "grad_norm": 0.0, "learning_rate": 5.030945356551701e-06, "loss": 15.0863, "step": 1343 }, { "epoch": 0.07705759252357883, "grad_norm": 0.0, "learning_rate": 5.024756342139053e-06, "loss": 15.4426, "step": 1344 }, { "epoch": 0.07711492704182553, "grad_norm": 0.0, "learning_rate": 5.0185672897946515e-06, "loss": 15.4382, "step": 1345 }, { "epoch": 0.07717226156007224, "grad_norm": 0.0, "learning_rate": 5.012378209001383e-06, "loss": 15.4366, "step": 1346 }, { "epoch": 0.07722959607831895, "grad_norm": 0.0, "learning_rate": 5.00618910924218e-06, "loss": 15.0979, "step": 1347 }, { "epoch": 0.07728693059656566, "grad_norm": 0.0, "learning_rate": 5e-06, "loss": 15.0105, "step": 1348 }, { "epoch": 0.07734426511481238, "grad_norm": 0.0, "learning_rate": 4.993810890757823e-06, "loss": 15.3259, "step": 1349 }, { "epoch": 0.07740159963305908, "grad_norm": 0.0, "learning_rate": 4.987621790998619e-06, "loss": 14.9897, "step": 1350 }, { "epoch": 0.0774589341513058, "grad_norm": 0.0, "learning_rate": 4.981432710205351e-06, "loss": 15.3484, "step": 1351 }, { "epoch": 0.07751626866955251, "grad_norm": 0.0, "learning_rate": 4.975243657860948e-06, "loss": 15.2349, "step": 1352 }, { "epoch": 0.07757360318779921, "grad_norm": 0.0, "learning_rate": 4.969054643448302e-06, "loss": 15.3359, "step": 1353 }, { "epoch": 0.07763093770604593, "grad_norm": 0.0, "learning_rate": 4.962865676450239e-06, "loss": 15.5123, "step": 1354 }, { "epoch": 0.07768827222429263, "grad_norm": 0.0, "learning_rate": 4.956676766349517e-06, "loss": 15.1643, "step": 1355 }, { "epoch": 0.07774560674253934, "grad_norm": 0.0, "learning_rate": 4.9504879226288045e-06, "loss": 15.3605, "step": 1356 }, { "epoch": 0.07780294126078606, "grad_norm": 0.0, "learning_rate": 4.944299154770673e-06, "loss": 14.9246, "step": 1357 }, { "epoch": 0.07786027577903276, "grad_norm": 0.0, "learning_rate": 4.938110472257572e-06, "loss": 15.3627, "step": 1358 }, { "epoch": 0.07791761029727948, "grad_norm": 0.0, "learning_rate": 4.931921884571823e-06, "loss": 15.2038, "step": 1359 }, { "epoch": 0.07797494481552619, "grad_norm": 0.0, "learning_rate": 4.925733401195601e-06, "loss": 15.4349, "step": 1360 }, { "epoch": 0.0780322793337729, "grad_norm": 0.0, "learning_rate": 4.919545031610925e-06, "loss": 15.1013, "step": 1361 }, { "epoch": 0.07808961385201961, "grad_norm": 0.0, "learning_rate": 4.913356785299631e-06, "loss": 15.0782, "step": 1362 }, { "epoch": 0.07814694837026633, "grad_norm": 0.0, "learning_rate": 4.907168671743377e-06, "loss": 15.248, "step": 1363 }, { "epoch": 0.07820428288851303, "grad_norm": 0.0, "learning_rate": 4.9009807004236105e-06, "loss": 15.2272, "step": 1364 }, { "epoch": 0.07826161740675974, "grad_norm": 0.0, "learning_rate": 4.894792880821563e-06, "loss": 15.2808, "step": 1365 }, { "epoch": 0.07831895192500644, "grad_norm": 0.0, "learning_rate": 4.888605222418232e-06, "loss": 15.2294, "step": 1366 }, { "epoch": 0.07837628644325316, "grad_norm": 0.0, "learning_rate": 4.882417734694372e-06, "loss": 15.302, "step": 1367 }, { "epoch": 0.07843362096149988, "grad_norm": 0.0, "learning_rate": 4.876230427130472e-06, "loss": 15.2778, "step": 1368 }, { "epoch": 0.07849095547974658, "grad_norm": 0.0, "learning_rate": 4.8700433092067474e-06, "loss": 15.2622, "step": 1369 }, { "epoch": 0.07854828999799329, "grad_norm": 0.0, "learning_rate": 4.86385639040312e-06, "loss": 15.3537, "step": 1370 }, { "epoch": 0.07860562451624001, "grad_norm": 0.0, "learning_rate": 4.8576696801992105e-06, "loss": 15.1753, "step": 1371 }, { "epoch": 0.07866295903448671, "grad_norm": 0.0, "learning_rate": 4.8514831880743175e-06, "loss": 15.3063, "step": 1372 }, { "epoch": 0.07872029355273343, "grad_norm": 0.0, "learning_rate": 4.845296923507406e-06, "loss": 15.5033, "step": 1373 }, { "epoch": 0.07877762807098013, "grad_norm": 0.0, "learning_rate": 4.839110895977092e-06, "loss": 15.3774, "step": 1374 }, { "epoch": 0.07883496258922684, "grad_norm": 0.0, "learning_rate": 4.832925114961629e-06, "loss": 14.7429, "step": 1375 }, { "epoch": 0.07889229710747356, "grad_norm": 0.0, "learning_rate": 4.8267395899388905e-06, "loss": 15.3443, "step": 1376 }, { "epoch": 0.07894963162572026, "grad_norm": 0.0, "learning_rate": 4.820554330386363e-06, "loss": 15.1586, "step": 1377 }, { "epoch": 0.07900696614396698, "grad_norm": 0.0, "learning_rate": 4.814369345781121e-06, "loss": 15.1614, "step": 1378 }, { "epoch": 0.07906430066221369, "grad_norm": 0.0, "learning_rate": 4.8081846455998205e-06, "loss": 15.2101, "step": 1379 }, { "epoch": 0.07912163518046039, "grad_norm": 0.0, "learning_rate": 4.802000239318678e-06, "loss": 15.6754, "step": 1380 }, { "epoch": 0.07917896969870711, "grad_norm": 0.0, "learning_rate": 4.795816136413467e-06, "loss": 15.2554, "step": 1381 }, { "epoch": 0.07923630421695382, "grad_norm": 0.0, "learning_rate": 4.789632346359489e-06, "loss": 15.013, "step": 1382 }, { "epoch": 0.07929363873520053, "grad_norm": 0.0, "learning_rate": 4.78344887863157e-06, "loss": 15.389, "step": 1383 }, { "epoch": 0.07935097325344724, "grad_norm": 0.0, "learning_rate": 4.777265742704039e-06, "loss": 15.3423, "step": 1384 }, { "epoch": 0.07940830777169394, "grad_norm": 0.0, "learning_rate": 4.771082948050722e-06, "loss": 15.0646, "step": 1385 }, { "epoch": 0.07946564228994066, "grad_norm": 0.0, "learning_rate": 4.764900504144915e-06, "loss": 15.0467, "step": 1386 }, { "epoch": 0.07952297680818737, "grad_norm": 0.0, "learning_rate": 4.758718420459383e-06, "loss": 15.2004, "step": 1387 }, { "epoch": 0.07958031132643408, "grad_norm": 0.0, "learning_rate": 4.7525367064663355e-06, "loss": 15.5153, "step": 1388 }, { "epoch": 0.07963764584468079, "grad_norm": 0.0, "learning_rate": 4.746355371637416e-06, "loss": 15.1112, "step": 1389 }, { "epoch": 0.0796949803629275, "grad_norm": 0.0, "learning_rate": 4.740174425443687e-06, "loss": 15.2085, "step": 1390 }, { "epoch": 0.07975231488117421, "grad_norm": 0.0, "learning_rate": 4.733993877355618e-06, "loss": 15.4787, "step": 1391 }, { "epoch": 0.07980964939942092, "grad_norm": 0.0, "learning_rate": 4.7278137368430635e-06, "loss": 15.3414, "step": 1392 }, { "epoch": 0.07986698391766762, "grad_norm": 0.0, "learning_rate": 4.7216340133752604e-06, "loss": 15.0015, "step": 1393 }, { "epoch": 0.07992431843591434, "grad_norm": 0.0, "learning_rate": 4.715454716420798e-06, "loss": 15.4287, "step": 1394 }, { "epoch": 0.07998165295416106, "grad_norm": 0.0, "learning_rate": 4.7092758554476215e-06, "loss": 15.2161, "step": 1395 }, { "epoch": 0.08003898747240776, "grad_norm": 0.0, "learning_rate": 4.703097439923e-06, "loss": 14.9452, "step": 1396 }, { "epoch": 0.08009632199065447, "grad_norm": 0.0, "learning_rate": 4.696919479313527e-06, "loss": 15.1395, "step": 1397 }, { "epoch": 0.08015365650890119, "grad_norm": 0.0, "learning_rate": 4.6907419830850906e-06, "loss": 15.1513, "step": 1398 }, { "epoch": 0.08021099102714789, "grad_norm": 0.0, "learning_rate": 4.684564960702877e-06, "loss": 15.5572, "step": 1399 }, { "epoch": 0.0802683255453946, "grad_norm": 0.0, "learning_rate": 4.678388421631337e-06, "loss": 15.3592, "step": 1400 }, { "epoch": 0.08032566006364132, "grad_norm": 0.0, "learning_rate": 4.67221237533419e-06, "loss": 15.0252, "step": 1401 }, { "epoch": 0.08038299458188802, "grad_norm": 0.0, "learning_rate": 4.666036831274392e-06, "loss": 15.476, "step": 1402 }, { "epoch": 0.08044032910013474, "grad_norm": 0.0, "learning_rate": 4.659861798914138e-06, "loss": 15.1597, "step": 1403 }, { "epoch": 0.08049766361838144, "grad_norm": 0.0, "learning_rate": 4.653687287714828e-06, "loss": 15.3732, "step": 1404 }, { "epoch": 0.08055499813662816, "grad_norm": 0.0, "learning_rate": 4.647513307137076e-06, "loss": 15.5174, "step": 1405 }, { "epoch": 0.08061233265487487, "grad_norm": 0.0, "learning_rate": 4.641339866640672e-06, "loss": 15.0124, "step": 1406 }, { "epoch": 0.08066966717312157, "grad_norm": 0.0, "learning_rate": 4.635166975684587e-06, "loss": 15.1956, "step": 1407 }, { "epoch": 0.08072700169136829, "grad_norm": 0.0, "learning_rate": 4.628994643726942e-06, "loss": 15.4477, "step": 1408 }, { "epoch": 0.080784336209615, "grad_norm": 0.0, "learning_rate": 4.622822880225012e-06, "loss": 15.0185, "step": 1409 }, { "epoch": 0.0808416707278617, "grad_norm": 0.0, "learning_rate": 4.616651694635186e-06, "loss": 14.8942, "step": 1410 }, { "epoch": 0.08089900524610842, "grad_norm": 0.0, "learning_rate": 4.610481096412985e-06, "loss": 15.2555, "step": 1411 }, { "epoch": 0.08095633976435514, "grad_norm": 0.0, "learning_rate": 4.604311095013015e-06, "loss": 15.2582, "step": 1412 }, { "epoch": 0.08101367428260184, "grad_norm": 0.0, "learning_rate": 4.598141699888978e-06, "loss": 15.1778, "step": 1413 }, { "epoch": 0.08107100880084855, "grad_norm": 0.0, "learning_rate": 4.591972920493638e-06, "loss": 15.8129, "step": 1414 }, { "epoch": 0.08112834331909526, "grad_norm": 0.0, "learning_rate": 4.585804766278827e-06, "loss": 15.2853, "step": 1415 }, { "epoch": 0.08118567783734197, "grad_norm": 0.0, "learning_rate": 4.579637246695407e-06, "loss": 15.2172, "step": 1416 }, { "epoch": 0.08124301235558869, "grad_norm": 0.0, "learning_rate": 4.573470371193277e-06, "loss": 15.5005, "step": 1417 }, { "epoch": 0.08130034687383539, "grad_norm": 0.0, "learning_rate": 4.5673041492213416e-06, "loss": 15.1312, "step": 1418 }, { "epoch": 0.0813576813920821, "grad_norm": 0.0, "learning_rate": 4.561138590227512e-06, "loss": 15.2109, "step": 1419 }, { "epoch": 0.08141501591032882, "grad_norm": 0.0, "learning_rate": 4.554973703658676e-06, "loss": 15.4162, "step": 1420 }, { "epoch": 0.08147235042857552, "grad_norm": 0.0, "learning_rate": 4.548809498960697e-06, "loss": 14.7716, "step": 1421 }, { "epoch": 0.08152968494682224, "grad_norm": 0.0, "learning_rate": 4.542645985578389e-06, "loss": 14.9493, "step": 1422 }, { "epoch": 0.08158701946506894, "grad_norm": 0.0, "learning_rate": 4.53648317295551e-06, "loss": 15.0915, "step": 1423 }, { "epoch": 0.08164435398331565, "grad_norm": 0.0, "learning_rate": 4.53032107053474e-06, "loss": 15.0074, "step": 1424 }, { "epoch": 0.08170168850156237, "grad_norm": 0.0, "learning_rate": 4.52415968775768e-06, "loss": 15.1501, "step": 1425 }, { "epoch": 0.08175902301980907, "grad_norm": 0.0, "learning_rate": 4.517999034064819e-06, "loss": 15.3762, "step": 1426 }, { "epoch": 0.08181635753805579, "grad_norm": 0.0, "learning_rate": 4.511839118895532e-06, "loss": 14.9713, "step": 1427 }, { "epoch": 0.0818736920563025, "grad_norm": 0.0, "learning_rate": 4.5056799516880615e-06, "loss": 15.0249, "step": 1428 }, { "epoch": 0.0819310265745492, "grad_norm": 0.0, "learning_rate": 4.499521541879508e-06, "loss": 14.9934, "step": 1429 }, { "epoch": 0.08198836109279592, "grad_norm": 0.0, "learning_rate": 4.493363898905808e-06, "loss": 15.6353, "step": 1430 }, { "epoch": 0.08204569561104263, "grad_norm": 0.0, "learning_rate": 4.4872070322017235e-06, "loss": 15.3187, "step": 1431 }, { "epoch": 0.08210303012928934, "grad_norm": 0.0, "learning_rate": 4.4810509512008245e-06, "loss": 15.0563, "step": 1432 }, { "epoch": 0.08216036464753605, "grad_norm": 0.0, "learning_rate": 4.474895665335484e-06, "loss": 15.2045, "step": 1433 }, { "epoch": 0.08221769916578275, "grad_norm": 0.0, "learning_rate": 4.4687411840368514e-06, "loss": 15.3326, "step": 1434 }, { "epoch": 0.08227503368402947, "grad_norm": 0.0, "learning_rate": 4.462587516734845e-06, "loss": 15.1827, "step": 1435 }, { "epoch": 0.08233236820227618, "grad_norm": 0.0, "learning_rate": 4.456434672858132e-06, "loss": 15.3744, "step": 1436 }, { "epoch": 0.08238970272052289, "grad_norm": 0.0, "learning_rate": 4.450282661834127e-06, "loss": 15.1566, "step": 1437 }, { "epoch": 0.0824470372387696, "grad_norm": 0.0, "learning_rate": 4.444131493088956e-06, "loss": 15.2045, "step": 1438 }, { "epoch": 0.08250437175701632, "grad_norm": 0.0, "learning_rate": 4.437981176047469e-06, "loss": 15.348, "step": 1439 }, { "epoch": 0.08256170627526302, "grad_norm": 0.0, "learning_rate": 4.431831720133198e-06, "loss": 15.0272, "step": 1440 }, { "epoch": 0.08261904079350973, "grad_norm": 0.0, "learning_rate": 4.425683134768365e-06, "loss": 15.2256, "step": 1441 }, { "epoch": 0.08267637531175644, "grad_norm": 0.0, "learning_rate": 4.4195354293738484e-06, "loss": 15.4462, "step": 1442 }, { "epoch": 0.08273370983000315, "grad_norm": 0.0, "learning_rate": 4.41338861336919e-06, "loss": 15.2976, "step": 1443 }, { "epoch": 0.08279104434824987, "grad_norm": 0.0, "learning_rate": 4.40724269617256e-06, "loss": 15.2583, "step": 1444 }, { "epoch": 0.08284837886649657, "grad_norm": 0.0, "learning_rate": 4.401097687200754e-06, "loss": 14.889, "step": 1445 }, { "epoch": 0.08290571338474328, "grad_norm": 0.0, "learning_rate": 4.394953595869178e-06, "loss": 15.0715, "step": 1446 }, { "epoch": 0.08296304790299, "grad_norm": 0.0, "learning_rate": 4.388810431591829e-06, "loss": 15.3028, "step": 1447 }, { "epoch": 0.0830203824212367, "grad_norm": 0.0, "learning_rate": 4.382668203781286e-06, "loss": 15.3078, "step": 1448 }, { "epoch": 0.08307771693948342, "grad_norm": 0.0, "learning_rate": 4.376526921848688e-06, "loss": 14.9887, "step": 1449 }, { "epoch": 0.08313505145773013, "grad_norm": 0.0, "learning_rate": 4.3703865952037354e-06, "loss": 15.148, "step": 1450 }, { "epoch": 0.08319238597597683, "grad_norm": 0.0, "learning_rate": 4.364247233254654e-06, "loss": 15.2904, "step": 1451 }, { "epoch": 0.08324972049422355, "grad_norm": 0.0, "learning_rate": 4.3581088454081975e-06, "loss": 14.9469, "step": 1452 }, { "epoch": 0.08330705501247025, "grad_norm": 0.0, "learning_rate": 4.351971441069622e-06, "loss": 15.0734, "step": 1453 }, { "epoch": 0.08336438953071697, "grad_norm": 0.0, "learning_rate": 4.345835029642685e-06, "loss": 15.4407, "step": 1454 }, { "epoch": 0.08342172404896368, "grad_norm": 0.0, "learning_rate": 4.339699620529614e-06, "loss": 15.0554, "step": 1455 }, { "epoch": 0.08347905856721038, "grad_norm": 0.0, "learning_rate": 4.333565223131107e-06, "loss": 15.0257, "step": 1456 }, { "epoch": 0.0835363930854571, "grad_norm": 0.0, "learning_rate": 4.327431846846306e-06, "loss": 15.2727, "step": 1457 }, { "epoch": 0.08359372760370382, "grad_norm": 0.0, "learning_rate": 4.321299501072797e-06, "loss": 15.2608, "step": 1458 }, { "epoch": 0.08365106212195052, "grad_norm": 0.0, "learning_rate": 4.315168195206574e-06, "loss": 15.1937, "step": 1459 }, { "epoch": 0.08370839664019723, "grad_norm": 0.0, "learning_rate": 4.309037938642053e-06, "loss": 15.3834, "step": 1460 }, { "epoch": 0.08376573115844393, "grad_norm": 0.0, "learning_rate": 4.30290874077203e-06, "loss": 15.4123, "step": 1461 }, { "epoch": 0.08382306567669065, "grad_norm": 0.0, "learning_rate": 4.296780610987685e-06, "loss": 14.9967, "step": 1462 }, { "epoch": 0.08388040019493737, "grad_norm": 0.0, "learning_rate": 4.290653558678559e-06, "loss": 15.1021, "step": 1463 }, { "epoch": 0.08393773471318407, "grad_norm": 0.0, "learning_rate": 4.284527593232545e-06, "loss": 15.4646, "step": 1464 }, { "epoch": 0.08399506923143078, "grad_norm": 0.0, "learning_rate": 4.278402724035868e-06, "loss": 15.189, "step": 1465 }, { "epoch": 0.0840524037496775, "grad_norm": 0.0, "learning_rate": 4.272278960473074e-06, "loss": 15.1943, "step": 1466 }, { "epoch": 0.0841097382679242, "grad_norm": 0.0, "learning_rate": 4.2661563119270135e-06, "loss": 15.3133, "step": 1467 }, { "epoch": 0.08416707278617092, "grad_norm": 0.0, "learning_rate": 4.260034787778833e-06, "loss": 14.9534, "step": 1468 }, { "epoch": 0.08422440730441763, "grad_norm": 0.0, "learning_rate": 4.253914397407952e-06, "loss": 15.3099, "step": 1469 }, { "epoch": 0.08428174182266433, "grad_norm": 0.0, "learning_rate": 4.247795150192054e-06, "loss": 14.879, "step": 1470 }, { "epoch": 0.08433907634091105, "grad_norm": 0.0, "learning_rate": 4.241677055507071e-06, "loss": 14.9269, "step": 1471 }, { "epoch": 0.08439641085915775, "grad_norm": 0.0, "learning_rate": 4.235560122727171e-06, "loss": 15.1852, "step": 1472 }, { "epoch": 0.08445374537740447, "grad_norm": 0.0, "learning_rate": 4.229444361224734e-06, "loss": 15.3213, "step": 1473 }, { "epoch": 0.08451107989565118, "grad_norm": 0.0, "learning_rate": 4.223329780370359e-06, "loss": 15.2615, "step": 1474 }, { "epoch": 0.08456841441389788, "grad_norm": 0.0, "learning_rate": 4.217216389532824e-06, "loss": 15.2346, "step": 1475 }, { "epoch": 0.0846257489321446, "grad_norm": 0.0, "learning_rate": 4.211104198079089e-06, "loss": 15.0628, "step": 1476 }, { "epoch": 0.08468308345039131, "grad_norm": 0.0, "learning_rate": 4.2049932153742726e-06, "loss": 15.0532, "step": 1477 }, { "epoch": 0.08474041796863802, "grad_norm": 0.0, "learning_rate": 4.198883450781649e-06, "loss": 15.0043, "step": 1478 }, { "epoch": 0.08479775248688473, "grad_norm": 0.0, "learning_rate": 4.192774913662618e-06, "loss": 15.0565, "step": 1479 }, { "epoch": 0.08485508700513143, "grad_norm": 0.0, "learning_rate": 4.186667613376702e-06, "loss": 15.3671, "step": 1480 }, { "epoch": 0.08491242152337815, "grad_norm": 0.0, "learning_rate": 4.180561559281526e-06, "loss": 15.0956, "step": 1481 }, { "epoch": 0.08496975604162486, "grad_norm": 0.0, "learning_rate": 4.174456760732813e-06, "loss": 15.2333, "step": 1482 }, { "epoch": 0.08502709055987157, "grad_norm": 0.0, "learning_rate": 4.1683532270843505e-06, "loss": 14.9798, "step": 1483 }, { "epoch": 0.08508442507811828, "grad_norm": 0.0, "learning_rate": 4.162250967687999e-06, "loss": 15.3353, "step": 1484 }, { "epoch": 0.085141759596365, "grad_norm": 0.0, "learning_rate": 4.1561499918936575e-06, "loss": 15.0471, "step": 1485 }, { "epoch": 0.0851990941146117, "grad_norm": 0.0, "learning_rate": 4.150050309049267e-06, "loss": 15.4525, "step": 1486 }, { "epoch": 0.08525642863285841, "grad_norm": 0.0, "learning_rate": 4.143951928500778e-06, "loss": 15.3145, "step": 1487 }, { "epoch": 0.08531376315110513, "grad_norm": 0.0, "learning_rate": 4.137854859592157e-06, "loss": 15.0502, "step": 1488 }, { "epoch": 0.08537109766935183, "grad_norm": 0.0, "learning_rate": 4.131759111665349e-06, "loss": 15.1505, "step": 1489 }, { "epoch": 0.08542843218759855, "grad_norm": 0.0, "learning_rate": 4.125664694060283e-06, "loss": 15.4517, "step": 1490 }, { "epoch": 0.08548576670584525, "grad_norm": 0.0, "learning_rate": 4.119571616114845e-06, "loss": 15.42, "step": 1491 }, { "epoch": 0.08554310122409196, "grad_norm": 0.0, "learning_rate": 4.113479887164873e-06, "loss": 15.2936, "step": 1492 }, { "epoch": 0.08560043574233868, "grad_norm": 0.0, "learning_rate": 4.107389516544134e-06, "loss": 15.0335, "step": 1493 }, { "epoch": 0.08565777026058538, "grad_norm": 0.0, "learning_rate": 4.101300513584317e-06, "loss": 14.8875, "step": 1494 }, { "epoch": 0.0857151047788321, "grad_norm": 0.0, "learning_rate": 4.09521288761501e-06, "loss": 15.1966, "step": 1495 }, { "epoch": 0.08577243929707881, "grad_norm": 0.0, "learning_rate": 4.0891266479637e-06, "loss": 15.1653, "step": 1496 }, { "epoch": 0.08582977381532551, "grad_norm": 0.0, "learning_rate": 4.083041803955739e-06, "loss": 15.4002, "step": 1497 }, { "epoch": 0.08588710833357223, "grad_norm": 0.0, "learning_rate": 4.076958364914352e-06, "loss": 15.1514, "step": 1498 }, { "epoch": 0.08594444285181893, "grad_norm": 0.0, "learning_rate": 4.070876340160601e-06, "loss": 14.9568, "step": 1499 }, { "epoch": 0.08600177737006565, "grad_norm": 0.0, "learning_rate": 4.064795739013389e-06, "loss": 15.1858, "step": 1500 }, { "epoch": 0.08605911188831236, "grad_norm": 0.0, "learning_rate": 4.0587165707894326e-06, "loss": 15.3825, "step": 1501 }, { "epoch": 0.08611644640655906, "grad_norm": 0.0, "learning_rate": 4.0526388448032564e-06, "loss": 15.4134, "step": 1502 }, { "epoch": 0.08617378092480578, "grad_norm": 0.0, "learning_rate": 4.046562570367172e-06, "loss": 15.1151, "step": 1503 }, { "epoch": 0.0862311154430525, "grad_norm": 0.0, "learning_rate": 4.04048775679127e-06, "loss": 15.2341, "step": 1504 }, { "epoch": 0.0862884499612992, "grad_norm": 0.0, "learning_rate": 4.034414413383397e-06, "loss": 15.0329, "step": 1505 }, { "epoch": 0.08634578447954591, "grad_norm": 0.0, "learning_rate": 4.028342549449157e-06, "loss": 15.1716, "step": 1506 }, { "epoch": 0.08640311899779263, "grad_norm": 0.0, "learning_rate": 4.022272174291878e-06, "loss": 15.3237, "step": 1507 }, { "epoch": 0.08646045351603933, "grad_norm": 0.0, "learning_rate": 4.0162032972126105e-06, "loss": 15.2783, "step": 1508 }, { "epoch": 0.08651778803428604, "grad_norm": 0.0, "learning_rate": 4.010135927510109e-06, "loss": 15.4047, "step": 1509 }, { "epoch": 0.08657512255253275, "grad_norm": 0.0, "learning_rate": 4.004070074480821e-06, "loss": 15.3116, "step": 1510 }, { "epoch": 0.08663245707077946, "grad_norm": 0.0, "learning_rate": 3.998005747418862e-06, "loss": 14.971, "step": 1511 }, { "epoch": 0.08668979158902618, "grad_norm": 0.0, "learning_rate": 3.991942955616023e-06, "loss": 15.1829, "step": 1512 }, { "epoch": 0.08674712610727288, "grad_norm": 0.0, "learning_rate": 3.985881708361729e-06, "loss": 15.1694, "step": 1513 }, { "epoch": 0.0868044606255196, "grad_norm": 0.0, "learning_rate": 3.979822014943049e-06, "loss": 15.276, "step": 1514 }, { "epoch": 0.08686179514376631, "grad_norm": 0.0, "learning_rate": 3.9737638846446605e-06, "loss": 15.0269, "step": 1515 }, { "epoch": 0.08691912966201301, "grad_norm": 0.0, "learning_rate": 3.967707326748857e-06, "loss": 15.2615, "step": 1516 }, { "epoch": 0.08697646418025973, "grad_norm": 0.0, "learning_rate": 3.961652350535517e-06, "loss": 15.2742, "step": 1517 }, { "epoch": 0.08703379869850644, "grad_norm": 0.0, "learning_rate": 3.955598965282096e-06, "loss": 15.3354, "step": 1518 }, { "epoch": 0.08709113321675314, "grad_norm": 0.0, "learning_rate": 3.94954718026361e-06, "loss": 15.3394, "step": 1519 }, { "epoch": 0.08714846773499986, "grad_norm": 0.0, "learning_rate": 3.94349700475263e-06, "loss": 15.1984, "step": 1520 }, { "epoch": 0.08720580225324656, "grad_norm": 0.0, "learning_rate": 3.93744844801925e-06, "loss": 14.8813, "step": 1521 }, { "epoch": 0.08726313677149328, "grad_norm": 0.0, "learning_rate": 3.931401519331095e-06, "loss": 15.358, "step": 1522 }, { "epoch": 0.08732047128973999, "grad_norm": 0.0, "learning_rate": 3.9253562279532884e-06, "loss": 15.1213, "step": 1523 }, { "epoch": 0.0873778058079867, "grad_norm": 0.0, "learning_rate": 3.919312583148449e-06, "loss": 15.029, "step": 1524 }, { "epoch": 0.08743514032623341, "grad_norm": 0.0, "learning_rate": 3.913270594176665e-06, "loss": 14.8806, "step": 1525 }, { "epoch": 0.08749247484448013, "grad_norm": 0.0, "learning_rate": 3.907230270295499e-06, "loss": 15.3686, "step": 1526 }, { "epoch": 0.08754980936272683, "grad_norm": 0.0, "learning_rate": 3.901191620759954e-06, "loss": 15.0427, "step": 1527 }, { "epoch": 0.08760714388097354, "grad_norm": 0.0, "learning_rate": 3.895154654822471e-06, "loss": 15.1566, "step": 1528 }, { "epoch": 0.08766447839922024, "grad_norm": 0.0, "learning_rate": 3.8891193817329085e-06, "loss": 15.1766, "step": 1529 }, { "epoch": 0.08772181291746696, "grad_norm": 0.0, "learning_rate": 3.883085810738536e-06, "loss": 15.1378, "step": 1530 }, { "epoch": 0.08777914743571368, "grad_norm": 0.0, "learning_rate": 3.87705395108401e-06, "loss": 15.2077, "step": 1531 }, { "epoch": 0.08783648195396038, "grad_norm": 0.0, "learning_rate": 3.8710238120113675e-06, "loss": 15.1772, "step": 1532 }, { "epoch": 0.08789381647220709, "grad_norm": 0.0, "learning_rate": 3.864995402760009e-06, "loss": 15.4355, "step": 1533 }, { "epoch": 0.08795115099045381, "grad_norm": 0.0, "learning_rate": 3.858968732566685e-06, "loss": 15.0356, "step": 1534 }, { "epoch": 0.08800848550870051, "grad_norm": 0.0, "learning_rate": 3.8529438106654785e-06, "loss": 15.1937, "step": 1535 }, { "epoch": 0.08806582002694723, "grad_norm": 0.0, "learning_rate": 3.8469206462878e-06, "loss": 15.0776, "step": 1536 }, { "epoch": 0.08812315454519394, "grad_norm": 0.0, "learning_rate": 3.840899248662358e-06, "loss": 15.2202, "step": 1537 }, { "epoch": 0.08818048906344064, "grad_norm": 0.0, "learning_rate": 3.834879627015165e-06, "loss": 15.2026, "step": 1538 }, { "epoch": 0.08823782358168736, "grad_norm": 0.0, "learning_rate": 3.8288617905695005e-06, "loss": 15.211, "step": 1539 }, { "epoch": 0.08829515809993406, "grad_norm": 0.0, "learning_rate": 3.822845748545919e-06, "loss": 15.3265, "step": 1540 }, { "epoch": 0.08835249261818078, "grad_norm": 0.0, "learning_rate": 3.816831510162219e-06, "loss": 15.3084, "step": 1541 }, { "epoch": 0.08840982713642749, "grad_norm": 0.0, "learning_rate": 3.8108190846334402e-06, "loss": 15.1557, "step": 1542 }, { "epoch": 0.08846716165467419, "grad_norm": 0.0, "learning_rate": 3.8048084811718377e-06, "loss": 15.0298, "step": 1543 }, { "epoch": 0.08852449617292091, "grad_norm": 0.0, "learning_rate": 3.798799708986885e-06, "loss": 15.454, "step": 1544 }, { "epoch": 0.08858183069116762, "grad_norm": 0.0, "learning_rate": 3.7927927772852395e-06, "loss": 15.0659, "step": 1545 }, { "epoch": 0.08863916520941433, "grad_norm": 0.0, "learning_rate": 3.786787695270743e-06, "loss": 15.1532, "step": 1546 }, { "epoch": 0.08869649972766104, "grad_norm": 0.0, "learning_rate": 3.7807844721444063e-06, "loss": 15.2533, "step": 1547 }, { "epoch": 0.08875383424590774, "grad_norm": 0.0, "learning_rate": 3.7747831171043865e-06, "loss": 14.971, "step": 1548 }, { "epoch": 0.08881116876415446, "grad_norm": 0.0, "learning_rate": 3.7687836393459828e-06, "loss": 15.3314, "step": 1549 }, { "epoch": 0.08886850328240117, "grad_norm": 0.0, "learning_rate": 3.7627860480616128e-06, "loss": 15.2095, "step": 1550 }, { "epoch": 0.08892583780064788, "grad_norm": 0.0, "learning_rate": 3.756790352440811e-06, "loss": 14.8912, "step": 1551 }, { "epoch": 0.08898317231889459, "grad_norm": 0.0, "learning_rate": 3.7507965616702015e-06, "loss": 15.3292, "step": 1552 }, { "epoch": 0.0890405068371413, "grad_norm": 0.0, "learning_rate": 3.744804684933492e-06, "loss": 15.1989, "step": 1553 }, { "epoch": 0.08909784135538801, "grad_norm": 0.0, "learning_rate": 3.7388147314114554e-06, "loss": 15.6162, "step": 1554 }, { "epoch": 0.08915517587363472, "grad_norm": 0.0, "learning_rate": 3.732826710281923e-06, "loss": 14.9143, "step": 1555 }, { "epoch": 0.08921251039188144, "grad_norm": 0.0, "learning_rate": 3.7268406307197568e-06, "loss": 14.7441, "step": 1556 }, { "epoch": 0.08926984491012814, "grad_norm": 0.0, "learning_rate": 3.7208565018968545e-06, "loss": 14.9323, "step": 1557 }, { "epoch": 0.08932717942837486, "grad_norm": 0.0, "learning_rate": 3.7148743329821146e-06, "loss": 15.1762, "step": 1558 }, { "epoch": 0.08938451394662156, "grad_norm": 0.0, "learning_rate": 3.7088941331414418e-06, "loss": 15.7279, "step": 1559 }, { "epoch": 0.08944184846486827, "grad_norm": 0.0, "learning_rate": 3.702915911537714e-06, "loss": 15.1863, "step": 1560 }, { "epoch": 0.08949918298311499, "grad_norm": 0.0, "learning_rate": 3.6969396773307888e-06, "loss": 15.29, "step": 1561 }, { "epoch": 0.08955651750136169, "grad_norm": 0.0, "learning_rate": 3.6909654396774684e-06, "loss": 14.9259, "step": 1562 }, { "epoch": 0.0896138520196084, "grad_norm": 0.0, "learning_rate": 3.684993207731505e-06, "loss": 14.9451, "step": 1563 }, { "epoch": 0.08967118653785512, "grad_norm": 0.0, "learning_rate": 3.6790229906435706e-06, "loss": 15.2632, "step": 1564 }, { "epoch": 0.08972852105610182, "grad_norm": 0.0, "learning_rate": 3.673054797561254e-06, "loss": 15.0482, "step": 1565 }, { "epoch": 0.08978585557434854, "grad_norm": 0.0, "learning_rate": 3.667088637629041e-06, "loss": 14.9746, "step": 1566 }, { "epoch": 0.08984319009259524, "grad_norm": 0.0, "learning_rate": 3.6611245199883037e-06, "loss": 15.5239, "step": 1567 }, { "epoch": 0.08990052461084196, "grad_norm": 0.0, "learning_rate": 3.6551624537772834e-06, "loss": 15.5796, "step": 1568 }, { "epoch": 0.08995785912908867, "grad_norm": 0.0, "learning_rate": 3.6492024481310793e-06, "loss": 15.051, "step": 1569 }, { "epoch": 0.09001519364733537, "grad_norm": 0.0, "learning_rate": 3.6432445121816308e-06, "loss": 15.3199, "step": 1570 }, { "epoch": 0.09007252816558209, "grad_norm": 0.0, "learning_rate": 3.6372886550577125e-06, "loss": 15.3081, "step": 1571 }, { "epoch": 0.0901298626838288, "grad_norm": 0.0, "learning_rate": 3.6313348858849064e-06, "loss": 15.1435, "step": 1572 }, { "epoch": 0.0901871972020755, "grad_norm": 0.0, "learning_rate": 3.6253832137856e-06, "loss": 14.6087, "step": 1573 }, { "epoch": 0.09024453172032222, "grad_norm": 0.0, "learning_rate": 3.6194336478789638e-06, "loss": 15.2436, "step": 1574 }, { "epoch": 0.09030186623856894, "grad_norm": 0.0, "learning_rate": 3.6134861972809477e-06, "loss": 15.3815, "step": 1575 }, { "epoch": 0.09035920075681564, "grad_norm": 0.0, "learning_rate": 3.6075408711042536e-06, "loss": 15.3305, "step": 1576 }, { "epoch": 0.09041653527506235, "grad_norm": 0.0, "learning_rate": 3.6015976784583327e-06, "loss": 14.9759, "step": 1577 }, { "epoch": 0.09047386979330906, "grad_norm": 0.0, "learning_rate": 3.595656628449362e-06, "loss": 15.1466, "step": 1578 }, { "epoch": 0.09053120431155577, "grad_norm": 0.0, "learning_rate": 3.5897177301802455e-06, "loss": 15.0655, "step": 1579 }, { "epoch": 0.09058853882980249, "grad_norm": 0.0, "learning_rate": 3.5837809927505783e-06, "loss": 15.1449, "step": 1580 }, { "epoch": 0.09064587334804919, "grad_norm": 0.0, "learning_rate": 3.5778464252566536e-06, "loss": 15.1359, "step": 1581 }, { "epoch": 0.0907032078662959, "grad_norm": 0.0, "learning_rate": 3.571914036791435e-06, "loss": 15.5221, "step": 1582 }, { "epoch": 0.09076054238454262, "grad_norm": 0.0, "learning_rate": 3.5659838364445505e-06, "loss": 14.8403, "step": 1583 }, { "epoch": 0.09081787690278932, "grad_norm": 0.0, "learning_rate": 3.5600558333022707e-06, "loss": 15.2719, "step": 1584 }, { "epoch": 0.09087521142103604, "grad_norm": 0.0, "learning_rate": 3.5541300364475067e-06, "loss": 14.9916, "step": 1585 }, { "epoch": 0.09093254593928274, "grad_norm": 0.0, "learning_rate": 3.548206454959783e-06, "loss": 14.9182, "step": 1586 }, { "epoch": 0.09098988045752945, "grad_norm": 0.0, "learning_rate": 3.5422850979152335e-06, "loss": 15.0797, "step": 1587 }, { "epoch": 0.09104721497577617, "grad_norm": 0.0, "learning_rate": 3.5363659743865797e-06, "loss": 15.3647, "step": 1588 }, { "epoch": 0.09110454949402287, "grad_norm": 0.0, "learning_rate": 3.5304490934431268e-06, "loss": 15.2592, "step": 1589 }, { "epoch": 0.09116188401226959, "grad_norm": 0.0, "learning_rate": 3.5245344641507384e-06, "loss": 15.2267, "step": 1590 }, { "epoch": 0.0912192185305163, "grad_norm": 0.0, "learning_rate": 3.518622095571831e-06, "loss": 15.1416, "step": 1591 }, { "epoch": 0.091276553048763, "grad_norm": 0.0, "learning_rate": 3.512711996765355e-06, "loss": 15.4173, "step": 1592 }, { "epoch": 0.09133388756700972, "grad_norm": 0.0, "learning_rate": 3.506804176786789e-06, "loss": 15.1714, "step": 1593 }, { "epoch": 0.09139122208525643, "grad_norm": 0.0, "learning_rate": 3.5008986446881088e-06, "loss": 15.3202, "step": 1594 }, { "epoch": 0.09144855660350314, "grad_norm": 0.0, "learning_rate": 3.4949954095177986e-06, "loss": 15.217, "step": 1595 }, { "epoch": 0.09150589112174985, "grad_norm": 0.0, "learning_rate": 3.4890944803208104e-06, "loss": 14.9886, "step": 1596 }, { "epoch": 0.09156322563999655, "grad_norm": 0.0, "learning_rate": 3.4831958661385716e-06, "loss": 14.7912, "step": 1597 }, { "epoch": 0.09162056015824327, "grad_norm": 0.0, "learning_rate": 3.4772995760089573e-06, "loss": 14.9861, "step": 1598 }, { "epoch": 0.09167789467648998, "grad_norm": 0.0, "learning_rate": 3.4714056189662877e-06, "loss": 15.3865, "step": 1599 }, { "epoch": 0.09173522919473669, "grad_norm": 0.0, "learning_rate": 3.465514004041301e-06, "loss": 14.9974, "step": 1600 }, { "epoch": 0.0917925637129834, "grad_norm": 0.0, "learning_rate": 3.459624740261153e-06, "loss": 14.9746, "step": 1601 }, { "epoch": 0.09184989823123012, "grad_norm": 0.0, "learning_rate": 3.45373783664939e-06, "loss": 14.9292, "step": 1602 }, { "epoch": 0.09190723274947682, "grad_norm": 0.0, "learning_rate": 3.4478533022259527e-06, "loss": 15.2711, "step": 1603 }, { "epoch": 0.09196456726772353, "grad_norm": 0.0, "learning_rate": 3.4419711460071405e-06, "loss": 15.109, "step": 1604 }, { "epoch": 0.09202190178597024, "grad_norm": 0.0, "learning_rate": 3.4360913770056166e-06, "loss": 15.533, "step": 1605 }, { "epoch": 0.09207923630421695, "grad_norm": 0.0, "learning_rate": 3.4302140042303813e-06, "loss": 15.1691, "step": 1606 }, { "epoch": 0.09213657082246367, "grad_norm": 0.0, "learning_rate": 3.424339036686768e-06, "loss": 14.9606, "step": 1607 }, { "epoch": 0.09219390534071037, "grad_norm": 0.0, "learning_rate": 3.41846648337642e-06, "loss": 15.1092, "step": 1608 }, { "epoch": 0.09225123985895708, "grad_norm": 0.0, "learning_rate": 3.4125963532972878e-06, "loss": 14.8195, "step": 1609 }, { "epoch": 0.0923085743772038, "grad_norm": 0.0, "learning_rate": 3.4067286554436024e-06, "loss": 14.756, "step": 1610 }, { "epoch": 0.0923659088954505, "grad_norm": 0.0, "learning_rate": 3.400863398805873e-06, "loss": 15.2289, "step": 1611 }, { "epoch": 0.09242324341369722, "grad_norm": 0.0, "learning_rate": 3.395000592370864e-06, "loss": 15.1732, "step": 1612 }, { "epoch": 0.09248057793194393, "grad_norm": 0.0, "learning_rate": 3.389140245121591e-06, "loss": 15.2813, "step": 1613 }, { "epoch": 0.09253791245019063, "grad_norm": 0.0, "learning_rate": 3.383282366037296e-06, "loss": 15.011, "step": 1614 }, { "epoch": 0.09259524696843735, "grad_norm": 0.0, "learning_rate": 3.3774269640934447e-06, "loss": 14.8524, "step": 1615 }, { "epoch": 0.09265258148668405, "grad_norm": 0.0, "learning_rate": 3.371574048261701e-06, "loss": 15.4559, "step": 1616 }, { "epoch": 0.09270991600493077, "grad_norm": 0.0, "learning_rate": 3.3657236275099275e-06, "loss": 15.3174, "step": 1617 }, { "epoch": 0.09276725052317748, "grad_norm": 0.0, "learning_rate": 3.3598757108021546e-06, "loss": 15.1581, "step": 1618 }, { "epoch": 0.09282458504142418, "grad_norm": 0.0, "learning_rate": 3.354030307098585e-06, "loss": 15.4304, "step": 1619 }, { "epoch": 0.0928819195596709, "grad_norm": 0.0, "learning_rate": 3.348187425355564e-06, "loss": 15.2791, "step": 1620 }, { "epoch": 0.09293925407791762, "grad_norm": 0.0, "learning_rate": 3.342347074525578e-06, "loss": 15.3398, "step": 1621 }, { "epoch": 0.09299658859616432, "grad_norm": 0.0, "learning_rate": 3.3365092635572295e-06, "loss": 14.9245, "step": 1622 }, { "epoch": 0.09305392311441103, "grad_norm": 0.0, "learning_rate": 3.3306740013952368e-06, "loss": 15.1071, "step": 1623 }, { "epoch": 0.09311125763265775, "grad_norm": 0.0, "learning_rate": 3.3248412969804065e-06, "loss": 15.2702, "step": 1624 }, { "epoch": 0.09316859215090445, "grad_norm": 0.0, "learning_rate": 3.319011159249631e-06, "loss": 14.9664, "step": 1625 }, { "epoch": 0.09322592666915117, "grad_norm": 0.0, "learning_rate": 3.313183597135865e-06, "loss": 15.1732, "step": 1626 }, { "epoch": 0.09328326118739787, "grad_norm": 0.0, "learning_rate": 3.307358619568123e-06, "loss": 15.2397, "step": 1627 }, { "epoch": 0.09334059570564458, "grad_norm": 0.0, "learning_rate": 3.301536235471453e-06, "loss": 15.1465, "step": 1628 }, { "epoch": 0.0933979302238913, "grad_norm": 0.0, "learning_rate": 3.295716453766935e-06, "loss": 15.2098, "step": 1629 }, { "epoch": 0.093455264742138, "grad_norm": 0.0, "learning_rate": 3.289899283371657e-06, "loss": 15.2483, "step": 1630 }, { "epoch": 0.09351259926038472, "grad_norm": 0.0, "learning_rate": 3.2840847331987093e-06, "loss": 15.0997, "step": 1631 }, { "epoch": 0.09356993377863143, "grad_norm": 0.0, "learning_rate": 3.2782728121571632e-06, "loss": 15.2503, "step": 1632 }, { "epoch": 0.09362726829687813, "grad_norm": 0.0, "learning_rate": 3.2724635291520697e-06, "loss": 15.3095, "step": 1633 }, { "epoch": 0.09368460281512485, "grad_norm": 0.0, "learning_rate": 3.266656893084428e-06, "loss": 15.4218, "step": 1634 }, { "epoch": 0.09374193733337155, "grad_norm": 0.0, "learning_rate": 3.2608529128511896e-06, "loss": 15.0612, "step": 1635 }, { "epoch": 0.09379927185161827, "grad_norm": 0.0, "learning_rate": 3.2550515973452295e-06, "loss": 15.3992, "step": 1636 }, { "epoch": 0.09385660636986498, "grad_norm": 0.0, "learning_rate": 3.2492529554553485e-06, "loss": 15.1745, "step": 1637 }, { "epoch": 0.09391394088811168, "grad_norm": 0.0, "learning_rate": 3.243456996066242e-06, "loss": 14.9587, "step": 1638 }, { "epoch": 0.0939712754063584, "grad_norm": 0.0, "learning_rate": 3.2376637280585025e-06, "loss": 15.0485, "step": 1639 }, { "epoch": 0.09402860992460511, "grad_norm": 0.0, "learning_rate": 3.2318731603085923e-06, "loss": 15.0185, "step": 1640 }, { "epoch": 0.09408594444285182, "grad_norm": 0.0, "learning_rate": 3.2260853016888443e-06, "loss": 15.2848, "step": 1641 }, { "epoch": 0.09414327896109853, "grad_norm": 0.0, "learning_rate": 3.2203001610674322e-06, "loss": 15.0875, "step": 1642 }, { "epoch": 0.09420061347934525, "grad_norm": 0.0, "learning_rate": 3.214517747308368e-06, "loss": 15.0593, "step": 1643 }, { "epoch": 0.09425794799759195, "grad_norm": 0.0, "learning_rate": 3.2087380692714887e-06, "loss": 15.1293, "step": 1644 }, { "epoch": 0.09431528251583866, "grad_norm": 0.0, "learning_rate": 3.202961135812437e-06, "loss": 14.9762, "step": 1645 }, { "epoch": 0.09437261703408537, "grad_norm": 0.0, "learning_rate": 3.1971869557826507e-06, "loss": 15.3738, "step": 1646 }, { "epoch": 0.09442995155233208, "grad_norm": 0.0, "learning_rate": 3.191415538029346e-06, "loss": 15.1781, "step": 1647 }, { "epoch": 0.0944872860705788, "grad_norm": 0.0, "learning_rate": 3.185646891395514e-06, "loss": 15.2245, "step": 1648 }, { "epoch": 0.0945446205888255, "grad_norm": 0.0, "learning_rate": 3.1798810247198925e-06, "loss": 15.0903, "step": 1649 }, { "epoch": 0.09460195510707221, "grad_norm": 0.0, "learning_rate": 3.174117946836964e-06, "loss": 15.0486, "step": 1650 }, { "epoch": 0.09465928962531893, "grad_norm": 0.0, "learning_rate": 3.1683576665769344e-06, "loss": 15.0967, "step": 1651 }, { "epoch": 0.09471662414356563, "grad_norm": 0.0, "learning_rate": 3.1626001927657287e-06, "loss": 15.1772, "step": 1652 }, { "epoch": 0.09477395866181235, "grad_norm": 0.0, "learning_rate": 3.1568455342249654e-06, "loss": 15.0888, "step": 1653 }, { "epoch": 0.09483129318005905, "grad_norm": 0.0, "learning_rate": 3.1510936997719557e-06, "loss": 15.2379, "step": 1654 }, { "epoch": 0.09488862769830576, "grad_norm": 0.0, "learning_rate": 3.145344698219677e-06, "loss": 14.9315, "step": 1655 }, { "epoch": 0.09494596221655248, "grad_norm": 0.0, "learning_rate": 3.1395985383767734e-06, "loss": 15.681, "step": 1656 }, { "epoch": 0.09500329673479918, "grad_norm": 0.0, "learning_rate": 3.1338552290475265e-06, "loss": 15.0557, "step": 1657 }, { "epoch": 0.0950606312530459, "grad_norm": 0.0, "learning_rate": 3.12811477903186e-06, "loss": 15.0777, "step": 1658 }, { "epoch": 0.09511796577129261, "grad_norm": 0.0, "learning_rate": 3.1223771971253093e-06, "loss": 15.2504, "step": 1659 }, { "epoch": 0.09517530028953931, "grad_norm": 0.0, "learning_rate": 3.1166424921190174e-06, "loss": 15.0185, "step": 1660 }, { "epoch": 0.09523263480778603, "grad_norm": 0.0, "learning_rate": 3.1109106727997184e-06, "loss": 15.0898, "step": 1661 }, { "epoch": 0.09528996932603274, "grad_norm": 0.0, "learning_rate": 3.1051817479497297e-06, "loss": 15.0596, "step": 1662 }, { "epoch": 0.09534730384427945, "grad_norm": 0.0, "learning_rate": 3.0994557263469267e-06, "loss": 15.1607, "step": 1663 }, { "epoch": 0.09540463836252616, "grad_norm": 0.0, "learning_rate": 3.093732616764742e-06, "loss": 15.1243, "step": 1664 }, { "epoch": 0.09546197288077286, "grad_norm": 0.0, "learning_rate": 3.0880124279721408e-06, "loss": 15.0445, "step": 1665 }, { "epoch": 0.09551930739901958, "grad_norm": 0.0, "learning_rate": 3.0822951687336215e-06, "loss": 14.8608, "step": 1666 }, { "epoch": 0.0955766419172663, "grad_norm": 0.0, "learning_rate": 3.076580847809184e-06, "loss": 15.4273, "step": 1667 }, { "epoch": 0.095633976435513, "grad_norm": 0.0, "learning_rate": 3.0708694739543345e-06, "loss": 15.1485, "step": 1668 }, { "epoch": 0.09569131095375971, "grad_norm": 0.0, "learning_rate": 3.065161055920057e-06, "loss": 15.4583, "step": 1669 }, { "epoch": 0.09574864547200643, "grad_norm": 0.0, "learning_rate": 3.0594556024528134e-06, "loss": 14.7834, "step": 1670 }, { "epoch": 0.09580597999025313, "grad_norm": 0.0, "learning_rate": 3.053753122294515e-06, "loss": 14.9889, "step": 1671 }, { "epoch": 0.09586331450849984, "grad_norm": 0.0, "learning_rate": 3.0480536241825263e-06, "loss": 14.956, "step": 1672 }, { "epoch": 0.09592064902674655, "grad_norm": 0.0, "learning_rate": 3.0423571168496356e-06, "loss": 15.1446, "step": 1673 }, { "epoch": 0.09597798354499326, "grad_norm": 0.0, "learning_rate": 3.036663609024054e-06, "loss": 15.4386, "step": 1674 }, { "epoch": 0.09603531806323998, "grad_norm": 0.0, "learning_rate": 3.03097310942939e-06, "loss": 14.9984, "step": 1675 }, { "epoch": 0.09609265258148668, "grad_norm": 0.0, "learning_rate": 3.025285626784651e-06, "loss": 15.175, "step": 1676 }, { "epoch": 0.0961499870997334, "grad_norm": 0.0, "learning_rate": 3.019601169804216e-06, "loss": 15.1857, "step": 1677 }, { "epoch": 0.09620732161798011, "grad_norm": 0.0, "learning_rate": 3.013919747197832e-06, "loss": 14.9624, "step": 1678 }, { "epoch": 0.09626465613622681, "grad_norm": 0.0, "learning_rate": 3.0082413676705914e-06, "loss": 15.1623, "step": 1679 }, { "epoch": 0.09632199065447353, "grad_norm": 0.0, "learning_rate": 3.00256603992293e-06, "loss": 15.2423, "step": 1680 }, { "epoch": 0.09637932517272024, "grad_norm": 0.0, "learning_rate": 2.996893772650602e-06, "loss": 15.1929, "step": 1681 }, { "epoch": 0.09643665969096694, "grad_norm": 0.0, "learning_rate": 2.99122457454468e-06, "loss": 15.4669, "step": 1682 }, { "epoch": 0.09649399420921366, "grad_norm": 0.0, "learning_rate": 2.985558454291525e-06, "loss": 15.1124, "step": 1683 }, { "epoch": 0.09655132872746036, "grad_norm": 0.0, "learning_rate": 2.9798954205727886e-06, "loss": 15.2577, "step": 1684 }, { "epoch": 0.09660866324570708, "grad_norm": 0.0, "learning_rate": 2.9742354820653884e-06, "loss": 15.0487, "step": 1685 }, { "epoch": 0.09666599776395379, "grad_norm": 0.0, "learning_rate": 2.9685786474415057e-06, "loss": 14.9933, "step": 1686 }, { "epoch": 0.0967233322822005, "grad_norm": 0.0, "learning_rate": 2.96292492536856e-06, "loss": 15.0056, "step": 1687 }, { "epoch": 0.09678066680044721, "grad_norm": 0.0, "learning_rate": 2.957274324509206e-06, "loss": 15.1144, "step": 1688 }, { "epoch": 0.09683800131869393, "grad_norm": 0.0, "learning_rate": 2.95162685352131e-06, "loss": 15.3127, "step": 1689 }, { "epoch": 0.09689533583694063, "grad_norm": 0.0, "learning_rate": 2.9459825210579534e-06, "loss": 15.1207, "step": 1690 }, { "epoch": 0.09695267035518734, "grad_norm": 0.0, "learning_rate": 2.9403413357673955e-06, "loss": 14.9744, "step": 1691 }, { "epoch": 0.09701000487343404, "grad_norm": 0.0, "learning_rate": 2.9347033062930856e-06, "loss": 15.2412, "step": 1692 }, { "epoch": 0.09706733939168076, "grad_norm": 0.0, "learning_rate": 2.929068441273629e-06, "loss": 14.9284, "step": 1693 }, { "epoch": 0.09712467390992748, "grad_norm": 0.0, "learning_rate": 2.923436749342788e-06, "loss": 15.1222, "step": 1694 }, { "epoch": 0.09718200842817418, "grad_norm": 0.0, "learning_rate": 2.9178082391294573e-06, "loss": 15.1443, "step": 1695 }, { "epoch": 0.09723934294642089, "grad_norm": 0.0, "learning_rate": 2.9121829192576647e-06, "loss": 15.1553, "step": 1696 }, { "epoch": 0.09729667746466761, "grad_norm": 0.0, "learning_rate": 2.90656079834654e-06, "loss": 15.2868, "step": 1697 }, { "epoch": 0.09735401198291431, "grad_norm": 0.0, "learning_rate": 2.9009418850103218e-06, "loss": 15.0563, "step": 1698 }, { "epoch": 0.09741134650116103, "grad_norm": 0.0, "learning_rate": 2.8953261878583263e-06, "loss": 15.0829, "step": 1699 }, { "epoch": 0.09746868101940774, "grad_norm": 0.0, "learning_rate": 2.889713715494944e-06, "loss": 15.2201, "step": 1700 }, { "epoch": 0.09752601553765444, "grad_norm": 0.0, "learning_rate": 2.8841044765196236e-06, "loss": 15.1362, "step": 1701 }, { "epoch": 0.09758335005590116, "grad_norm": 0.0, "learning_rate": 2.8784984795268644e-06, "loss": 15.1359, "step": 1702 }, { "epoch": 0.09764068457414786, "grad_norm": 0.0, "learning_rate": 2.8728957331061914e-06, "loss": 15.3242, "step": 1703 }, { "epoch": 0.09769801909239458, "grad_norm": 0.0, "learning_rate": 2.8672962458421548e-06, "loss": 15.243, "step": 1704 }, { "epoch": 0.09775535361064129, "grad_norm": 0.0, "learning_rate": 2.861700026314308e-06, "loss": 15.295, "step": 1705 }, { "epoch": 0.09781268812888799, "grad_norm": 0.0, "learning_rate": 2.8561070830971975e-06, "loss": 14.8623, "step": 1706 }, { "epoch": 0.09787002264713471, "grad_norm": 0.0, "learning_rate": 2.8505174247603495e-06, "loss": 15.1138, "step": 1707 }, { "epoch": 0.09792735716538142, "grad_norm": 0.0, "learning_rate": 2.844931059868261e-06, "loss": 15.2223, "step": 1708 }, { "epoch": 0.09798469168362813, "grad_norm": 0.0, "learning_rate": 2.839347996980376e-06, "loss": 15.2052, "step": 1709 }, { "epoch": 0.09804202620187484, "grad_norm": 0.0, "learning_rate": 2.8337682446510883e-06, "loss": 15.1068, "step": 1710 }, { "epoch": 0.09809936072012154, "grad_norm": 0.0, "learning_rate": 2.828191811429709e-06, "loss": 15.4515, "step": 1711 }, { "epoch": 0.09815669523836826, "grad_norm": 0.0, "learning_rate": 2.8226187058604735e-06, "loss": 15.5278, "step": 1712 }, { "epoch": 0.09821402975661497, "grad_norm": 0.0, "learning_rate": 2.8170489364825106e-06, "loss": 14.9237, "step": 1713 }, { "epoch": 0.09827136427486168, "grad_norm": 0.0, "learning_rate": 2.811482511829842e-06, "loss": 15.1981, "step": 1714 }, { "epoch": 0.09832869879310839, "grad_norm": 0.0, "learning_rate": 2.805919440431359e-06, "loss": 15.1981, "step": 1715 }, { "epoch": 0.0983860333113551, "grad_norm": 0.0, "learning_rate": 2.8003597308108246e-06, "loss": 14.7001, "step": 1716 }, { "epoch": 0.09844336782960181, "grad_norm": 0.0, "learning_rate": 2.7948033914868415e-06, "loss": 15.3086, "step": 1717 }, { "epoch": 0.09850070234784852, "grad_norm": 0.0, "learning_rate": 2.7892504309728564e-06, "loss": 14.7995, "step": 1718 }, { "epoch": 0.09855803686609524, "grad_norm": 0.0, "learning_rate": 2.7837008577771317e-06, "loss": 15.2355, "step": 1719 }, { "epoch": 0.09861537138434194, "grad_norm": 0.0, "learning_rate": 2.778154680402745e-06, "loss": 14.8578, "step": 1720 }, { "epoch": 0.09867270590258866, "grad_norm": 0.0, "learning_rate": 2.7726119073475643e-06, "loss": 15.1245, "step": 1721 }, { "epoch": 0.09873004042083536, "grad_norm": 0.0, "learning_rate": 2.7670725471042526e-06, "loss": 14.927, "step": 1722 }, { "epoch": 0.09878737493908207, "grad_norm": 0.0, "learning_rate": 2.7615366081602306e-06, "loss": 15.2329, "step": 1723 }, { "epoch": 0.09884470945732879, "grad_norm": 0.0, "learning_rate": 2.7560040989976894e-06, "loss": 15.0808, "step": 1724 }, { "epoch": 0.09890204397557549, "grad_norm": 0.0, "learning_rate": 2.750475028093554e-06, "loss": 15.1372, "step": 1725 }, { "epoch": 0.0989593784938222, "grad_norm": 0.0, "learning_rate": 2.74494940391949e-06, "loss": 15.2879, "step": 1726 }, { "epoch": 0.09901671301206892, "grad_norm": 0.0, "learning_rate": 2.7394272349418776e-06, "loss": 15.1674, "step": 1727 }, { "epoch": 0.09907404753031562, "grad_norm": 0.0, "learning_rate": 2.733908529621802e-06, "loss": 15.0526, "step": 1728 }, { "epoch": 0.09913138204856234, "grad_norm": 0.0, "learning_rate": 2.7283932964150417e-06, "loss": 15.5379, "step": 1729 }, { "epoch": 0.09918871656680905, "grad_norm": 0.0, "learning_rate": 2.7228815437720602e-06, "loss": 15.2825, "step": 1730 }, { "epoch": 0.09924605108505576, "grad_norm": 0.0, "learning_rate": 2.7173732801379805e-06, "loss": 15.0891, "step": 1731 }, { "epoch": 0.09930338560330247, "grad_norm": 0.0, "learning_rate": 2.711868513952587e-06, "loss": 15.1538, "step": 1732 }, { "epoch": 0.09936072012154917, "grad_norm": 0.0, "learning_rate": 2.7063672536502995e-06, "loss": 15.2978, "step": 1733 }, { "epoch": 0.09941805463979589, "grad_norm": 0.0, "learning_rate": 2.7008695076601693e-06, "loss": 14.8973, "step": 1734 }, { "epoch": 0.0994753891580426, "grad_norm": 0.0, "learning_rate": 2.69537528440586e-06, "loss": 15.4109, "step": 1735 }, { "epoch": 0.0995327236762893, "grad_norm": 0.0, "learning_rate": 2.6898845923056437e-06, "loss": 15.1761, "step": 1736 }, { "epoch": 0.09959005819453602, "grad_norm": 0.0, "learning_rate": 2.6843974397723736e-06, "loss": 14.8358, "step": 1737 }, { "epoch": 0.09964739271278274, "grad_norm": 0.0, "learning_rate": 2.6789138352134885e-06, "loss": 14.9992, "step": 1738 }, { "epoch": 0.09970472723102944, "grad_norm": 0.0, "learning_rate": 2.6734337870309844e-06, "loss": 15.0057, "step": 1739 }, { "epoch": 0.09976206174927615, "grad_norm": 0.0, "learning_rate": 2.6679573036214112e-06, "loss": 14.9869, "step": 1740 }, { "epoch": 0.09981939626752286, "grad_norm": 0.0, "learning_rate": 2.6624843933758547e-06, "loss": 15.4995, "step": 1741 }, { "epoch": 0.09987673078576957, "grad_norm": 0.0, "learning_rate": 2.6570150646799266e-06, "loss": 15.1863, "step": 1742 }, { "epoch": 0.09993406530401629, "grad_norm": 0.0, "learning_rate": 2.6515493259137546e-06, "loss": 15.227, "step": 1743 }, { "epoch": 0.09999139982226299, "grad_norm": 0.0, "learning_rate": 2.6460871854519594e-06, "loss": 14.8933, "step": 1744 }, { "epoch": 0.1000487343405097, "grad_norm": 0.0, "learning_rate": 2.6406286516636546e-06, "loss": 14.9753, "step": 1745 }, { "epoch": 0.10010606885875642, "grad_norm": 0.0, "learning_rate": 2.635173732912423e-06, "loss": 15.2712, "step": 1746 }, { "epoch": 0.10016340337700312, "grad_norm": 0.0, "learning_rate": 2.6297224375563126e-06, "loss": 15.0092, "step": 1747 }, { "epoch": 0.10022073789524984, "grad_norm": 0.0, "learning_rate": 2.6242747739478158e-06, "loss": 15.1965, "step": 1748 }, { "epoch": 0.10027807241349655, "grad_norm": 0.0, "learning_rate": 2.618830750433862e-06, "loss": 15.0236, "step": 1749 }, { "epoch": 0.10033540693174325, "grad_norm": 0.0, "learning_rate": 2.613390375355801e-06, "loss": 14.9518, "step": 1750 }, { "epoch": 0.10039274144998997, "grad_norm": 0.0, "learning_rate": 2.607953657049398e-06, "loss": 14.8813, "step": 1751 }, { "epoch": 0.10045007596823667, "grad_norm": 0.0, "learning_rate": 2.60252060384481e-06, "loss": 15.0054, "step": 1752 }, { "epoch": 0.10050741048648339, "grad_norm": 0.0, "learning_rate": 2.5970912240665815e-06, "loss": 14.9681, "step": 1753 }, { "epoch": 0.1005647450047301, "grad_norm": 0.0, "learning_rate": 2.591665526033628e-06, "loss": 14.9709, "step": 1754 }, { "epoch": 0.1006220795229768, "grad_norm": 0.0, "learning_rate": 2.5862435180592203e-06, "loss": 15.0781, "step": 1755 }, { "epoch": 0.10067941404122352, "grad_norm": 0.0, "learning_rate": 2.5808252084509784e-06, "loss": 14.9999, "step": 1756 }, { "epoch": 0.10073674855947023, "grad_norm": 0.0, "learning_rate": 2.575410605510858e-06, "loss": 15.0287, "step": 1757 }, { "epoch": 0.10079408307771694, "grad_norm": 0.0, "learning_rate": 2.5699997175351293e-06, "loss": 15.1299, "step": 1758 }, { "epoch": 0.10085141759596365, "grad_norm": 0.0, "learning_rate": 2.5645925528143778e-06, "loss": 14.9807, "step": 1759 }, { "epoch": 0.10090875211421035, "grad_norm": 0.0, "learning_rate": 2.559189119633476e-06, "loss": 15.2697, "step": 1760 }, { "epoch": 0.10096608663245707, "grad_norm": 0.0, "learning_rate": 2.553789426271588e-06, "loss": 15.1754, "step": 1761 }, { "epoch": 0.10102342115070378, "grad_norm": 0.0, "learning_rate": 2.54839348100214e-06, "loss": 15.2112, "step": 1762 }, { "epoch": 0.10108075566895049, "grad_norm": 0.0, "learning_rate": 2.543001292092819e-06, "loss": 15.0921, "step": 1763 }, { "epoch": 0.1011380901871972, "grad_norm": 0.0, "learning_rate": 2.5376128678055536e-06, "loss": 14.9949, "step": 1764 }, { "epoch": 0.10119542470544392, "grad_norm": 0.0, "learning_rate": 2.5322282163965096e-06, "loss": 15.0155, "step": 1765 }, { "epoch": 0.10125275922369062, "grad_norm": 0.0, "learning_rate": 2.5268473461160665e-06, "loss": 15.1644, "step": 1766 }, { "epoch": 0.10131009374193733, "grad_norm": 0.0, "learning_rate": 2.521470265208815e-06, "loss": 15.0194, "step": 1767 }, { "epoch": 0.10136742826018405, "grad_norm": 0.0, "learning_rate": 2.5160969819135368e-06, "loss": 14.571, "step": 1768 }, { "epoch": 0.10142476277843075, "grad_norm": 0.0, "learning_rate": 2.5107275044631942e-06, "loss": 15.3127, "step": 1769 }, { "epoch": 0.10148209729667747, "grad_norm": 0.0, "learning_rate": 2.5053618410849186e-06, "loss": 15.0523, "step": 1770 }, { "epoch": 0.10153943181492417, "grad_norm": 0.0, "learning_rate": 2.5000000000000015e-06, "loss": 15.1352, "step": 1771 }, { "epoch": 0.10159676633317088, "grad_norm": 0.0, "learning_rate": 2.4946419894238705e-06, "loss": 15.0326, "step": 1772 }, { "epoch": 0.1016541008514176, "grad_norm": 0.0, "learning_rate": 2.4892878175660927e-06, "loss": 15.1512, "step": 1773 }, { "epoch": 0.1017114353696643, "grad_norm": 0.0, "learning_rate": 2.483937492630345e-06, "loss": 15.2138, "step": 1774 }, { "epoch": 0.10176876988791102, "grad_norm": 0.0, "learning_rate": 2.47859102281442e-06, "loss": 15.042, "step": 1775 }, { "epoch": 0.10182610440615773, "grad_norm": 0.0, "learning_rate": 2.4732484163101896e-06, "loss": 15.1799, "step": 1776 }, { "epoch": 0.10188343892440443, "grad_norm": 0.0, "learning_rate": 2.4679096813036202e-06, "loss": 15.3713, "step": 1777 }, { "epoch": 0.10194077344265115, "grad_norm": 0.0, "learning_rate": 2.4625748259747363e-06, "loss": 14.9062, "step": 1778 }, { "epoch": 0.10199810796089785, "grad_norm": 0.0, "learning_rate": 2.457243858497626e-06, "loss": 15.33, "step": 1779 }, { "epoch": 0.10205544247914457, "grad_norm": 0.0, "learning_rate": 2.4519167870404126e-06, "loss": 15.1443, "step": 1780 }, { "epoch": 0.10211277699739128, "grad_norm": 0.0, "learning_rate": 2.4465936197652573e-06, "loss": 15.3425, "step": 1781 }, { "epoch": 0.10217011151563798, "grad_norm": 0.0, "learning_rate": 2.4412743648283343e-06, "loss": 14.8019, "step": 1782 }, { "epoch": 0.1022274460338847, "grad_norm": 0.0, "learning_rate": 2.4359590303798243e-06, "loss": 14.9075, "step": 1783 }, { "epoch": 0.10228478055213142, "grad_norm": 0.0, "learning_rate": 2.4306476245638995e-06, "loss": 15.0322, "step": 1784 }, { "epoch": 0.10234211507037812, "grad_norm": 0.0, "learning_rate": 2.4253401555187183e-06, "loss": 14.9531, "step": 1785 }, { "epoch": 0.10239944958862483, "grad_norm": 0.0, "learning_rate": 2.4200366313764e-06, "loss": 14.9875, "step": 1786 }, { "epoch": 0.10245678410687155, "grad_norm": 0.0, "learning_rate": 2.4147370602630267e-06, "loss": 14.8213, "step": 1787 }, { "epoch": 0.10251411862511825, "grad_norm": 0.0, "learning_rate": 2.4094414502986176e-06, "loss": 15.1506, "step": 1788 }, { "epoch": 0.10257145314336497, "grad_norm": 0.0, "learning_rate": 2.4041498095971253e-06, "loss": 14.9495, "step": 1789 }, { "epoch": 0.10262878766161167, "grad_norm": 0.0, "learning_rate": 2.398862146266418e-06, "loss": 15.2569, "step": 1790 }, { "epoch": 0.10268612217985838, "grad_norm": 0.0, "learning_rate": 2.3935784684082763e-06, "loss": 15.5546, "step": 1791 }, { "epoch": 0.1027434566981051, "grad_norm": 0.0, "learning_rate": 2.388298784118366e-06, "loss": 14.7149, "step": 1792 }, { "epoch": 0.1028007912163518, "grad_norm": 0.0, "learning_rate": 2.3830231014862415e-06, "loss": 15.0869, "step": 1793 }, { "epoch": 0.10285812573459852, "grad_norm": 0.0, "learning_rate": 2.3777514285953192e-06, "loss": 15.0755, "step": 1794 }, { "epoch": 0.10291546025284523, "grad_norm": 0.0, "learning_rate": 2.3724837735228773e-06, "loss": 14.853, "step": 1795 }, { "epoch": 0.10297279477109193, "grad_norm": 0.0, "learning_rate": 2.367220144340035e-06, "loss": 15.2218, "step": 1796 }, { "epoch": 0.10303012928933865, "grad_norm": 0.0, "learning_rate": 2.361960549111742e-06, "loss": 15.0583, "step": 1797 }, { "epoch": 0.10308746380758535, "grad_norm": 0.0, "learning_rate": 2.356704995896768e-06, "loss": 15.1339, "step": 1798 }, { "epoch": 0.10314479832583207, "grad_norm": 0.0, "learning_rate": 2.3514534927476935e-06, "loss": 15.0067, "step": 1799 }, { "epoch": 0.10320213284407878, "grad_norm": 0.0, "learning_rate": 2.3462060477108856e-06, "loss": 15.0885, "step": 1800 }, { "epoch": 0.10325946736232548, "grad_norm": 0.0, "learning_rate": 2.340962668826503e-06, "loss": 15.144, "step": 1801 }, { "epoch": 0.1033168018805722, "grad_norm": 0.0, "learning_rate": 2.3357233641284665e-06, "loss": 15.266, "step": 1802 }, { "epoch": 0.10337413639881891, "grad_norm": 0.0, "learning_rate": 2.330488141644457e-06, "loss": 15.1685, "step": 1803 }, { "epoch": 0.10343147091706562, "grad_norm": 0.0, "learning_rate": 2.3252570093959e-06, "loss": 15.1289, "step": 1804 }, { "epoch": 0.10348880543531233, "grad_norm": 0.0, "learning_rate": 2.320029975397957e-06, "loss": 15.1866, "step": 1805 }, { "epoch": 0.10354613995355905, "grad_norm": 0.0, "learning_rate": 2.314807047659506e-06, "loss": 15.1786, "step": 1806 }, { "epoch": 0.10360347447180575, "grad_norm": 0.0, "learning_rate": 2.309588234183137e-06, "loss": 14.7894, "step": 1807 }, { "epoch": 0.10366080899005246, "grad_norm": 0.0, "learning_rate": 2.304373542965132e-06, "loss": 15.1901, "step": 1808 }, { "epoch": 0.10371814350829917, "grad_norm": 0.0, "learning_rate": 2.2991629819954626e-06, "loss": 14.9909, "step": 1809 }, { "epoch": 0.10377547802654588, "grad_norm": 0.0, "learning_rate": 2.293956559257766e-06, "loss": 14.6973, "step": 1810 }, { "epoch": 0.1038328125447926, "grad_norm": 0.0, "learning_rate": 2.2887542827293424e-06, "loss": 15.1475, "step": 1811 }, { "epoch": 0.1038901470630393, "grad_norm": 0.0, "learning_rate": 2.2835561603811363e-06, "loss": 14.8105, "step": 1812 }, { "epoch": 0.10394748158128601, "grad_norm": 0.0, "learning_rate": 2.2783622001777322e-06, "loss": 14.9498, "step": 1813 }, { "epoch": 0.10400481609953273, "grad_norm": 0.0, "learning_rate": 2.2731724100773305e-06, "loss": 15.0761, "step": 1814 }, { "epoch": 0.10406215061777943, "grad_norm": 0.0, "learning_rate": 2.26798679803175e-06, "loss": 15.3239, "step": 1815 }, { "epoch": 0.10411948513602615, "grad_norm": 0.0, "learning_rate": 2.262805371986402e-06, "loss": 15.0212, "step": 1816 }, { "epoch": 0.10417681965427285, "grad_norm": 0.0, "learning_rate": 2.257628139880285e-06, "loss": 14.8911, "step": 1817 }, { "epoch": 0.10423415417251956, "grad_norm": 0.0, "learning_rate": 2.2524551096459703e-06, "loss": 15.3894, "step": 1818 }, { "epoch": 0.10429148869076628, "grad_norm": 0.0, "learning_rate": 2.247286289209597e-06, "loss": 15.4648, "step": 1819 }, { "epoch": 0.10434882320901298, "grad_norm": 0.0, "learning_rate": 2.242121686490847e-06, "loss": 15.2684, "step": 1820 }, { "epoch": 0.1044061577272597, "grad_norm": 0.0, "learning_rate": 2.236961309402945e-06, "loss": 15.3737, "step": 1821 }, { "epoch": 0.10446349224550641, "grad_norm": 0.0, "learning_rate": 2.231805165852637e-06, "loss": 14.9809, "step": 1822 }, { "epoch": 0.10452082676375311, "grad_norm": 0.0, "learning_rate": 2.2266532637401867e-06, "loss": 14.7812, "step": 1823 }, { "epoch": 0.10457816128199983, "grad_norm": 0.0, "learning_rate": 2.2215056109593547e-06, "loss": 15.1497, "step": 1824 }, { "epoch": 0.10463549580024654, "grad_norm": 0.0, "learning_rate": 2.216362215397393e-06, "loss": 14.7571, "step": 1825 }, { "epoch": 0.10469283031849325, "grad_norm": 0.0, "learning_rate": 2.2112230849350286e-06, "loss": 15.2152, "step": 1826 }, { "epoch": 0.10475016483673996, "grad_norm": 0.0, "learning_rate": 2.206088227446459e-06, "loss": 14.8404, "step": 1827 }, { "epoch": 0.10480749935498666, "grad_norm": 0.0, "learning_rate": 2.2009576507993273e-06, "loss": 14.8393, "step": 1828 }, { "epoch": 0.10486483387323338, "grad_norm": 0.0, "learning_rate": 2.1958313628547247e-06, "loss": 15.1098, "step": 1829 }, { "epoch": 0.1049221683914801, "grad_norm": 0.0, "learning_rate": 2.190709371467165e-06, "loss": 15.2116, "step": 1830 }, { "epoch": 0.1049795029097268, "grad_norm": 0.0, "learning_rate": 2.1855916844845827e-06, "loss": 15.1286, "step": 1831 }, { "epoch": 0.10503683742797351, "grad_norm": 0.0, "learning_rate": 2.180478309748313e-06, "loss": 15.0697, "step": 1832 }, { "epoch": 0.10509417194622023, "grad_norm": 0.0, "learning_rate": 2.175369255093091e-06, "loss": 14.8526, "step": 1833 }, { "epoch": 0.10515150646446693, "grad_norm": 0.0, "learning_rate": 2.1702645283470238e-06, "loss": 15.0791, "step": 1834 }, { "epoch": 0.10520884098271364, "grad_norm": 0.0, "learning_rate": 2.165164137331596e-06, "loss": 15.1782, "step": 1835 }, { "epoch": 0.10526617550096036, "grad_norm": 0.0, "learning_rate": 2.16006808986164e-06, "loss": 15.0698, "step": 1836 }, { "epoch": 0.10532351001920706, "grad_norm": 0.0, "learning_rate": 2.1549763937453445e-06, "loss": 15.0492, "step": 1837 }, { "epoch": 0.10538084453745378, "grad_norm": 0.0, "learning_rate": 2.1498890567842175e-06, "loss": 15.3157, "step": 1838 }, { "epoch": 0.10543817905570048, "grad_norm": 0.0, "learning_rate": 2.144806086773095e-06, "loss": 14.9877, "step": 1839 }, { "epoch": 0.1054955135739472, "grad_norm": 0.0, "learning_rate": 2.1397274915001254e-06, "loss": 15.2216, "step": 1840 }, { "epoch": 0.10555284809219391, "grad_norm": 0.0, "learning_rate": 2.1346532787467466e-06, "loss": 15.1515, "step": 1841 }, { "epoch": 0.10561018261044061, "grad_norm": 0.0, "learning_rate": 2.129583456287689e-06, "loss": 15.2111, "step": 1842 }, { "epoch": 0.10566751712868733, "grad_norm": 0.0, "learning_rate": 2.1245180318909482e-06, "loss": 14.9736, "step": 1843 }, { "epoch": 0.10572485164693404, "grad_norm": 0.0, "learning_rate": 2.119457013317789e-06, "loss": 15.1239, "step": 1844 }, { "epoch": 0.10578218616518074, "grad_norm": 0.0, "learning_rate": 2.11440040832272e-06, "loss": 15.1009, "step": 1845 }, { "epoch": 0.10583952068342746, "grad_norm": 0.0, "learning_rate": 2.1093482246534896e-06, "loss": 14.9144, "step": 1846 }, { "epoch": 0.10589685520167416, "grad_norm": 0.0, "learning_rate": 2.1043004700510694e-06, "loss": 15.2687, "step": 1847 }, { "epoch": 0.10595418971992088, "grad_norm": 0.0, "learning_rate": 2.0992571522496502e-06, "loss": 15.3205, "step": 1848 }, { "epoch": 0.10601152423816759, "grad_norm": 0.0, "learning_rate": 2.0942182789766174e-06, "loss": 15.1931, "step": 1849 }, { "epoch": 0.1060688587564143, "grad_norm": 0.0, "learning_rate": 2.0891838579525547e-06, "loss": 15.2253, "step": 1850 }, { "epoch": 0.10612619327466101, "grad_norm": 0.0, "learning_rate": 2.084153896891217e-06, "loss": 15.0989, "step": 1851 }, { "epoch": 0.10618352779290773, "grad_norm": 0.0, "learning_rate": 2.0791284034995296e-06, "loss": 15.1758, "step": 1852 }, { "epoch": 0.10624086231115443, "grad_norm": 0.0, "learning_rate": 2.074107385477568e-06, "loss": 15.0728, "step": 1853 }, { "epoch": 0.10629819682940114, "grad_norm": 0.0, "learning_rate": 2.0690908505185577e-06, "loss": 14.9472, "step": 1854 }, { "epoch": 0.10635553134764786, "grad_norm": 0.0, "learning_rate": 2.064078806308848e-06, "loss": 14.9618, "step": 1855 }, { "epoch": 0.10641286586589456, "grad_norm": 0.0, "learning_rate": 2.0590712605279135e-06, "loss": 14.966, "step": 1856 }, { "epoch": 0.10647020038414128, "grad_norm": 0.0, "learning_rate": 2.054068220848331e-06, "loss": 14.9155, "step": 1857 }, { "epoch": 0.10652753490238798, "grad_norm": 0.0, "learning_rate": 2.0490696949357774e-06, "loss": 14.9877, "step": 1858 }, { "epoch": 0.10658486942063469, "grad_norm": 0.0, "learning_rate": 2.0440756904490115e-06, "loss": 14.8584, "step": 1859 }, { "epoch": 0.10664220393888141, "grad_norm": 0.0, "learning_rate": 2.0390862150398637e-06, "loss": 15.2422, "step": 1860 }, { "epoch": 0.10669953845712811, "grad_norm": 0.0, "learning_rate": 2.0341012763532243e-06, "loss": 14.9152, "step": 1861 }, { "epoch": 0.10675687297537483, "grad_norm": 0.0, "learning_rate": 2.0291208820270368e-06, "loss": 15.1744, "step": 1862 }, { "epoch": 0.10681420749362154, "grad_norm": 0.0, "learning_rate": 2.024145039692277e-06, "loss": 14.7057, "step": 1863 }, { "epoch": 0.10687154201186824, "grad_norm": 0.0, "learning_rate": 2.0191737569729492e-06, "loss": 15.0924, "step": 1864 }, { "epoch": 0.10692887653011496, "grad_norm": 0.0, "learning_rate": 2.0142070414860704e-06, "loss": 14.8843, "step": 1865 }, { "epoch": 0.10698621104836166, "grad_norm": 0.0, "learning_rate": 2.009244900841658e-06, "loss": 14.9535, "step": 1866 }, { "epoch": 0.10704354556660838, "grad_norm": 0.0, "learning_rate": 2.004287342642721e-06, "loss": 15.2298, "step": 1867 }, { "epoch": 0.10710088008485509, "grad_norm": 0.0, "learning_rate": 1.9993343744852504e-06, "loss": 14.8105, "step": 1868 }, { "epoch": 0.10715821460310179, "grad_norm": 0.0, "learning_rate": 1.994386003958198e-06, "loss": 15.1573, "step": 1869 }, { "epoch": 0.10721554912134851, "grad_norm": 0.0, "learning_rate": 1.989442238643478e-06, "loss": 15.06, "step": 1870 }, { "epoch": 0.10727288363959522, "grad_norm": 0.0, "learning_rate": 1.9845030861159416e-06, "loss": 14.703, "step": 1871 }, { "epoch": 0.10733021815784193, "grad_norm": 0.0, "learning_rate": 1.9795685539433785e-06, "loss": 15.1016, "step": 1872 }, { "epoch": 0.10738755267608864, "grad_norm": 0.0, "learning_rate": 1.974638649686495e-06, "loss": 15.0317, "step": 1873 }, { "epoch": 0.10744488719433536, "grad_norm": 0.0, "learning_rate": 1.9697133808989084e-06, "loss": 14.9478, "step": 1874 }, { "epoch": 0.10750222171258206, "grad_norm": 0.0, "learning_rate": 1.9647927551271302e-06, "loss": 15.0411, "step": 1875 }, { "epoch": 0.10755955623082877, "grad_norm": 0.0, "learning_rate": 1.959876779910564e-06, "loss": 15.0192, "step": 1876 }, { "epoch": 0.10761689074907548, "grad_norm": 0.0, "learning_rate": 1.954965462781481e-06, "loss": 14.9757, "step": 1877 }, { "epoch": 0.10767422526732219, "grad_norm": 0.0, "learning_rate": 1.950058811265022e-06, "loss": 15.2199, "step": 1878 }, { "epoch": 0.1077315597855689, "grad_norm": 0.0, "learning_rate": 1.945156832879174e-06, "loss": 15.1254, "step": 1879 }, { "epoch": 0.10778889430381561, "grad_norm": 0.0, "learning_rate": 1.9402595351347656e-06, "loss": 15.0774, "step": 1880 }, { "epoch": 0.10784622882206232, "grad_norm": 0.0, "learning_rate": 1.935366925535452e-06, "loss": 14.932, "step": 1881 }, { "epoch": 0.10790356334030904, "grad_norm": 0.0, "learning_rate": 1.930479011577711e-06, "loss": 15.0582, "step": 1882 }, { "epoch": 0.10796089785855574, "grad_norm": 0.0, "learning_rate": 1.925595800750816e-06, "loss": 15.268, "step": 1883 }, { "epoch": 0.10801823237680246, "grad_norm": 0.0, "learning_rate": 1.920717300536846e-06, "loss": 15.4256, "step": 1884 }, { "epoch": 0.10807556689504916, "grad_norm": 0.0, "learning_rate": 1.91584351841065e-06, "loss": 14.9376, "step": 1885 }, { "epoch": 0.10813290141329587, "grad_norm": 0.0, "learning_rate": 1.9109744618398607e-06, "loss": 15.2782, "step": 1886 }, { "epoch": 0.10819023593154259, "grad_norm": 0.0, "learning_rate": 1.9061101382848567e-06, "loss": 14.8349, "step": 1887 }, { "epoch": 0.10824757044978929, "grad_norm": 0.0, "learning_rate": 1.9012505551987764e-06, "loss": 15.1059, "step": 1888 }, { "epoch": 0.108304904968036, "grad_norm": 0.0, "learning_rate": 1.8963957200274874e-06, "loss": 14.9271, "step": 1889 }, { "epoch": 0.10836223948628272, "grad_norm": 0.0, "learning_rate": 1.8915456402095883e-06, "loss": 15.0641, "step": 1890 }, { "epoch": 0.10841957400452942, "grad_norm": 0.0, "learning_rate": 1.8867003231763847e-06, "loss": 14.84, "step": 1891 }, { "epoch": 0.10847690852277614, "grad_norm": 0.0, "learning_rate": 1.8818597763518926e-06, "loss": 14.9408, "step": 1892 }, { "epoch": 0.10853424304102285, "grad_norm": 0.0, "learning_rate": 1.8770240071528117e-06, "loss": 15.1839, "step": 1893 }, { "epoch": 0.10859157755926956, "grad_norm": 0.0, "learning_rate": 1.872193022988526e-06, "loss": 15.2123, "step": 1894 }, { "epoch": 0.10864891207751627, "grad_norm": 0.0, "learning_rate": 1.8673668312610843e-06, "loss": 15.3868, "step": 1895 }, { "epoch": 0.10870624659576297, "grad_norm": 0.0, "learning_rate": 1.8625454393651976e-06, "loss": 14.9713, "step": 1896 }, { "epoch": 0.10876358111400969, "grad_norm": 0.0, "learning_rate": 1.8577288546882167e-06, "loss": 15.1364, "step": 1897 }, { "epoch": 0.1088209156322564, "grad_norm": 0.0, "learning_rate": 1.8529170846101318e-06, "loss": 15.0209, "step": 1898 }, { "epoch": 0.1088782501505031, "grad_norm": 0.0, "learning_rate": 1.8481101365035537e-06, "loss": 15.1795, "step": 1899 }, { "epoch": 0.10893558466874982, "grad_norm": 0.0, "learning_rate": 1.8433080177337043e-06, "loss": 15.1728, "step": 1900 }, { "epoch": 0.10899291918699654, "grad_norm": 0.0, "learning_rate": 1.8385107356584058e-06, "loss": 14.9725, "step": 1901 }, { "epoch": 0.10905025370524324, "grad_norm": 0.0, "learning_rate": 1.8337182976280731e-06, "loss": 15.1804, "step": 1902 }, { "epoch": 0.10910758822348995, "grad_norm": 0.0, "learning_rate": 1.8289307109856941e-06, "loss": 14.9713, "step": 1903 }, { "epoch": 0.10916492274173666, "grad_norm": 0.0, "learning_rate": 1.8241479830668291e-06, "loss": 15.0086, "step": 1904 }, { "epoch": 0.10922225725998337, "grad_norm": 0.0, "learning_rate": 1.8193701211995862e-06, "loss": 15.3655, "step": 1905 }, { "epoch": 0.10927959177823009, "grad_norm": 0.0, "learning_rate": 1.8145971327046274e-06, "loss": 14.9315, "step": 1906 }, { "epoch": 0.10933692629647679, "grad_norm": 0.0, "learning_rate": 1.8098290248951394e-06, "loss": 15.0965, "step": 1907 }, { "epoch": 0.1093942608147235, "grad_norm": 0.0, "learning_rate": 1.8050658050768338e-06, "loss": 15.2293, "step": 1908 }, { "epoch": 0.10945159533297022, "grad_norm": 0.0, "learning_rate": 1.8003074805479314e-06, "loss": 15.0582, "step": 1909 }, { "epoch": 0.10950892985121692, "grad_norm": 0.0, "learning_rate": 1.7955540585991577e-06, "loss": 15.4075, "step": 1910 }, { "epoch": 0.10956626436946364, "grad_norm": 0.0, "learning_rate": 1.7908055465137181e-06, "loss": 15.0865, "step": 1911 }, { "epoch": 0.10962359888771035, "grad_norm": 0.0, "learning_rate": 1.7860619515673034e-06, "loss": 14.9347, "step": 1912 }, { "epoch": 0.10968093340595705, "grad_norm": 0.0, "learning_rate": 1.781323281028065e-06, "loss": 15.1036, "step": 1913 }, { "epoch": 0.10973826792420377, "grad_norm": 0.0, "learning_rate": 1.7765895421566099e-06, "loss": 14.9977, "step": 1914 }, { "epoch": 0.10979560244245047, "grad_norm": 0.0, "learning_rate": 1.771860742205988e-06, "loss": 15.3883, "step": 1915 }, { "epoch": 0.10985293696069719, "grad_norm": 0.0, "learning_rate": 1.7671368884216873e-06, "loss": 15.2364, "step": 1916 }, { "epoch": 0.1099102714789439, "grad_norm": 0.0, "learning_rate": 1.7624179880416087e-06, "loss": 15.0877, "step": 1917 }, { "epoch": 0.1099676059971906, "grad_norm": 0.0, "learning_rate": 1.7577040482960723e-06, "loss": 14.7803, "step": 1918 }, { "epoch": 0.11002494051543732, "grad_norm": 0.0, "learning_rate": 1.7529950764077885e-06, "loss": 14.9745, "step": 1919 }, { "epoch": 0.11008227503368403, "grad_norm": 0.0, "learning_rate": 1.7482910795918655e-06, "loss": 15.092, "step": 1920 }, { "epoch": 0.11013960955193074, "grad_norm": 0.0, "learning_rate": 1.7435920650557808e-06, "loss": 15.1327, "step": 1921 }, { "epoch": 0.11019694407017745, "grad_norm": 0.0, "learning_rate": 1.7388980399993822e-06, "loss": 15.2231, "step": 1922 }, { "epoch": 0.11025427858842415, "grad_norm": 0.0, "learning_rate": 1.7342090116148684e-06, "loss": 15.0259, "step": 1923 }, { "epoch": 0.11031161310667087, "grad_norm": 0.0, "learning_rate": 1.7295249870867898e-06, "loss": 15.2957, "step": 1924 }, { "epoch": 0.11036894762491758, "grad_norm": 0.0, "learning_rate": 1.7248459735920203e-06, "loss": 15.0902, "step": 1925 }, { "epoch": 0.11042628214316429, "grad_norm": 0.0, "learning_rate": 1.720171978299766e-06, "loss": 15.0184, "step": 1926 }, { "epoch": 0.110483616661411, "grad_norm": 0.0, "learning_rate": 1.7155030083715362e-06, "loss": 14.5436, "step": 1927 }, { "epoch": 0.11054095117965772, "grad_norm": 0.0, "learning_rate": 1.7108390709611427e-06, "loss": 14.8727, "step": 1928 }, { "epoch": 0.11059828569790442, "grad_norm": 0.0, "learning_rate": 1.7061801732146865e-06, "loss": 14.7645, "step": 1929 }, { "epoch": 0.11065562021615113, "grad_norm": 0.0, "learning_rate": 1.7015263222705492e-06, "loss": 15.1675, "step": 1930 }, { "epoch": 0.11071295473439785, "grad_norm": 0.0, "learning_rate": 1.6968775252593745e-06, "loss": 15.1032, "step": 1931 }, { "epoch": 0.11077028925264455, "grad_norm": 0.0, "learning_rate": 1.692233789304069e-06, "loss": 15.134, "step": 1932 }, { "epoch": 0.11082762377089127, "grad_norm": 0.0, "learning_rate": 1.6875951215197779e-06, "loss": 15.1558, "step": 1933 }, { "epoch": 0.11088495828913797, "grad_norm": 0.0, "learning_rate": 1.682961529013889e-06, "loss": 14.7275, "step": 1934 }, { "epoch": 0.11094229280738468, "grad_norm": 0.0, "learning_rate": 1.6783330188860047e-06, "loss": 14.8734, "step": 1935 }, { "epoch": 0.1109996273256314, "grad_norm": 0.0, "learning_rate": 1.6737095982279444e-06, "loss": 15.6697, "step": 1936 }, { "epoch": 0.1110569618438781, "grad_norm": 0.0, "learning_rate": 1.669091274123732e-06, "loss": 15.2523, "step": 1937 }, { "epoch": 0.11111429636212482, "grad_norm": 0.0, "learning_rate": 1.6644780536495775e-06, "loss": 15.1574, "step": 1938 }, { "epoch": 0.11117163088037153, "grad_norm": 0.0, "learning_rate": 1.6598699438738764e-06, "loss": 15.0501, "step": 1939 }, { "epoch": 0.11122896539861823, "grad_norm": 0.0, "learning_rate": 1.6552669518571873e-06, "loss": 14.9379, "step": 1940 }, { "epoch": 0.11128629991686495, "grad_norm": 0.0, "learning_rate": 1.6506690846522338e-06, "loss": 14.8896, "step": 1941 }, { "epoch": 0.11134363443511167, "grad_norm": 0.0, "learning_rate": 1.646076349303884e-06, "loss": 14.9501, "step": 1942 }, { "epoch": 0.11140096895335837, "grad_norm": 0.0, "learning_rate": 1.6414887528491414e-06, "loss": 15.0372, "step": 1943 }, { "epoch": 0.11145830347160508, "grad_norm": 0.0, "learning_rate": 1.6369063023171367e-06, "loss": 15.1974, "step": 1944 }, { "epoch": 0.11151563798985178, "grad_norm": 0.0, "learning_rate": 1.6323290047291196e-06, "loss": 14.8393, "step": 1945 }, { "epoch": 0.1115729725080985, "grad_norm": 0.0, "learning_rate": 1.6277568670984384e-06, "loss": 15.2872, "step": 1946 }, { "epoch": 0.11163030702634522, "grad_norm": 0.0, "learning_rate": 1.623189896430542e-06, "loss": 15.1409, "step": 1947 }, { "epoch": 0.11168764154459192, "grad_norm": 0.0, "learning_rate": 1.618628099722957e-06, "loss": 15.1526, "step": 1948 }, { "epoch": 0.11174497606283863, "grad_norm": 0.0, "learning_rate": 1.6140714839652838e-06, "loss": 15.0921, "step": 1949 }, { "epoch": 0.11180231058108535, "grad_norm": 0.0, "learning_rate": 1.609520056139185e-06, "loss": 15.1327, "step": 1950 }, { "epoch": 0.11185964509933205, "grad_norm": 0.0, "learning_rate": 1.604973823218376e-06, "loss": 14.6891, "step": 1951 }, { "epoch": 0.11191697961757877, "grad_norm": 0.0, "learning_rate": 1.6004327921686086e-06, "loss": 15.1599, "step": 1952 }, { "epoch": 0.11197431413582547, "grad_norm": 0.0, "learning_rate": 1.5958969699476689e-06, "loss": 14.7748, "step": 1953 }, { "epoch": 0.11203164865407218, "grad_norm": 0.0, "learning_rate": 1.5913663635053578e-06, "loss": 14.9859, "step": 1954 }, { "epoch": 0.1120889831723189, "grad_norm": 0.0, "learning_rate": 1.5868409797834882e-06, "loss": 15.2427, "step": 1955 }, { "epoch": 0.1121463176905656, "grad_norm": 0.0, "learning_rate": 1.582320825715868e-06, "loss": 14.8814, "step": 1956 }, { "epoch": 0.11220365220881232, "grad_norm": 0.0, "learning_rate": 1.5778059082282932e-06, "loss": 15.1055, "step": 1957 }, { "epoch": 0.11226098672705903, "grad_norm": 0.0, "learning_rate": 1.573296234238534e-06, "loss": 15.1705, "step": 1958 }, { "epoch": 0.11231832124530573, "grad_norm": 0.0, "learning_rate": 1.5687918106563326e-06, "loss": 14.8643, "step": 1959 }, { "epoch": 0.11237565576355245, "grad_norm": 0.0, "learning_rate": 1.56429264438338e-06, "loss": 14.8809, "step": 1960 }, { "epoch": 0.11243299028179916, "grad_norm": 0.0, "learning_rate": 1.5597987423133166e-06, "loss": 14.9778, "step": 1961 }, { "epoch": 0.11249032480004587, "grad_norm": 0.0, "learning_rate": 1.5553101113317137e-06, "loss": 14.9691, "step": 1962 }, { "epoch": 0.11254765931829258, "grad_norm": 0.0, "learning_rate": 1.550826758316068e-06, "loss": 15.1386, "step": 1963 }, { "epoch": 0.11260499383653928, "grad_norm": 0.0, "learning_rate": 1.546348690135786e-06, "loss": 15.2445, "step": 1964 }, { "epoch": 0.112662328354786, "grad_norm": 0.0, "learning_rate": 1.5418759136521844e-06, "loss": 15.0999, "step": 1965 }, { "epoch": 0.11271966287303271, "grad_norm": 0.0, "learning_rate": 1.5374084357184621e-06, "loss": 15.0017, "step": 1966 }, { "epoch": 0.11277699739127942, "grad_norm": 0.0, "learning_rate": 1.5329462631797092e-06, "loss": 15.1215, "step": 1967 }, { "epoch": 0.11283433190952613, "grad_norm": 0.0, "learning_rate": 1.528489402872878e-06, "loss": 15.2549, "step": 1968 }, { "epoch": 0.11289166642777285, "grad_norm": 0.0, "learning_rate": 1.5240378616267887e-06, "loss": 14.8262, "step": 1969 }, { "epoch": 0.11294900094601955, "grad_norm": 0.0, "learning_rate": 1.5195916462621074e-06, "loss": 15.219, "step": 1970 }, { "epoch": 0.11300633546426626, "grad_norm": 0.0, "learning_rate": 1.5151507635913403e-06, "loss": 15.0064, "step": 1971 }, { "epoch": 0.11306366998251297, "grad_norm": 0.0, "learning_rate": 1.510715220418823e-06, "loss": 15.0459, "step": 1972 }, { "epoch": 0.11312100450075968, "grad_norm": 0.0, "learning_rate": 1.5062850235407118e-06, "loss": 14.745, "step": 1973 }, { "epoch": 0.1131783390190064, "grad_norm": 0.0, "learning_rate": 1.5018601797449683e-06, "loss": 15.1417, "step": 1974 }, { "epoch": 0.1132356735372531, "grad_norm": 0.0, "learning_rate": 1.4974406958113557e-06, "loss": 14.9677, "step": 1975 }, { "epoch": 0.11329300805549981, "grad_norm": 0.0, "learning_rate": 1.4930265785114224e-06, "loss": 15.0245, "step": 1976 }, { "epoch": 0.11335034257374653, "grad_norm": 0.0, "learning_rate": 1.4886178346084934e-06, "loss": 15.0466, "step": 1977 }, { "epoch": 0.11340767709199323, "grad_norm": 0.0, "learning_rate": 1.4842144708576606e-06, "loss": 15.0459, "step": 1978 }, { "epoch": 0.11346501161023995, "grad_norm": 0.0, "learning_rate": 1.4798164940057769e-06, "loss": 15.342, "step": 1979 }, { "epoch": 0.11352234612848666, "grad_norm": 0.0, "learning_rate": 1.4754239107914337e-06, "loss": 15.3308, "step": 1980 }, { "epoch": 0.11357968064673336, "grad_norm": 0.0, "learning_rate": 1.4710367279449662e-06, "loss": 15.0959, "step": 1981 }, { "epoch": 0.11363701516498008, "grad_norm": 0.0, "learning_rate": 1.4666549521884283e-06, "loss": 15.3118, "step": 1982 }, { "epoch": 0.11369434968322678, "grad_norm": 0.0, "learning_rate": 1.4622785902355967e-06, "loss": 15.0102, "step": 1983 }, { "epoch": 0.1137516842014735, "grad_norm": 0.0, "learning_rate": 1.457907648791943e-06, "loss": 15.2011, "step": 1984 }, { "epoch": 0.11380901871972021, "grad_norm": 0.0, "learning_rate": 1.4535421345546424e-06, "loss": 14.8329, "step": 1985 }, { "epoch": 0.11386635323796691, "grad_norm": 0.0, "learning_rate": 1.4491820542125495e-06, "loss": 14.8631, "step": 1986 }, { "epoch": 0.11392368775621363, "grad_norm": 0.0, "learning_rate": 1.4448274144461965e-06, "loss": 15.0568, "step": 1987 }, { "epoch": 0.11398102227446034, "grad_norm": 0.0, "learning_rate": 1.4404782219277758e-06, "loss": 14.9962, "step": 1988 }, { "epoch": 0.11403835679270705, "grad_norm": 0.0, "learning_rate": 1.4361344833211377e-06, "loss": 14.908, "step": 1989 }, { "epoch": 0.11409569131095376, "grad_norm": 0.0, "learning_rate": 1.431796205281773e-06, "loss": 14.9463, "step": 1990 }, { "epoch": 0.11415302582920046, "grad_norm": 0.0, "learning_rate": 1.4274633944568056e-06, "loss": 14.7642, "step": 1991 }, { "epoch": 0.11421036034744718, "grad_norm": 0.0, "learning_rate": 1.423136057484983e-06, "loss": 14.9915, "step": 1992 }, { "epoch": 0.1142676948656939, "grad_norm": 0.0, "learning_rate": 1.4188142009966689e-06, "loss": 15.1879, "step": 1993 }, { "epoch": 0.1143250293839406, "grad_norm": 0.0, "learning_rate": 1.414497831613823e-06, "loss": 15.253, "step": 1994 }, { "epoch": 0.11438236390218731, "grad_norm": 0.0, "learning_rate": 1.410186955950006e-06, "loss": 15.1298, "step": 1995 }, { "epoch": 0.11443969842043403, "grad_norm": 0.0, "learning_rate": 1.4058815806103542e-06, "loss": 15.3996, "step": 1996 }, { "epoch": 0.11449703293868073, "grad_norm": 0.0, "learning_rate": 1.4015817121915792e-06, "loss": 14.8465, "step": 1997 }, { "epoch": 0.11455436745692744, "grad_norm": 0.0, "learning_rate": 1.3972873572819535e-06, "loss": 15.2481, "step": 1998 }, { "epoch": 0.11461170197517416, "grad_norm": 0.0, "learning_rate": 1.3929985224613051e-06, "loss": 15.0481, "step": 1999 }, { "epoch": 0.11466903649342086, "grad_norm": 0.0, "learning_rate": 1.3887152143009992e-06, "loss": 15.2159, "step": 2000 } ], "logging_steps": 1.0, "max_steps": 2617, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.7098723774976492e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }