{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.964060264287518, "eval_steps": 500, "global_step": 4455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.7251, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.684, "step": 2 }, { "epoch": 0.0, "learning_rate": 6e-06, "loss": 0.7066, "step": 3 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 0.6894, "step": 4 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 0.7617, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "loss": 0.6869, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.4e-05, "loss": 0.6697, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.6000000000000003e-05, "loss": 0.7293, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.8e-05, "loss": 0.6381, "step": 9 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.6163, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.9999998596668246e-05, "loss": 0.6307, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.9999994386673375e-05, "loss": 0.6536, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.9999987370016564e-05, "loss": 0.6721, "step": 13 }, { "epoch": 0.01, "learning_rate": 1.9999977546699793e-05, "loss": 0.6227, "step": 14 }, { "epoch": 0.01, "learning_rate": 1.9999964916725805e-05, "loss": 0.6545, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.9999949480098158e-05, "loss": 0.6805, "step": 16 }, { "epoch": 0.01, "learning_rate": 1.999993123682118e-05, "loss": 0.661, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.9999910186899984e-05, "loss": 0.6475, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.999988633034049e-05, "loss": 0.6131, "step": 19 }, { "epoch": 0.01, "learning_rate": 1.9999859667149386e-05, "loss": 0.6337, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.9999830197334157e-05, "loss": 0.7843, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.9999797920903076e-05, "loss": 0.6236, "step": 22 }, { "epoch": 0.02, "learning_rate": 1.9999762837865202e-05, "loss": 0.644, "step": 23 }, { "epoch": 0.02, "learning_rate": 1.999972494823038e-05, "loss": 0.6497, "step": 24 }, { "epoch": 0.02, "learning_rate": 1.9999684252009243e-05, "loss": 0.6422, "step": 25 }, { "epoch": 0.02, "learning_rate": 1.9999640749213215e-05, "loss": 0.622, "step": 26 }, { "epoch": 0.02, "learning_rate": 1.9999594439854504e-05, "loss": 0.6153, "step": 27 }, { "epoch": 0.02, "learning_rate": 1.999954532394611e-05, "loss": 0.6442, "step": 28 }, { "epoch": 0.02, "learning_rate": 1.999949340150182e-05, "loss": 0.62, "step": 29 }, { "epoch": 0.02, "learning_rate": 1.9999438672536202e-05, "loss": 0.625, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.9999381137064617e-05, "loss": 0.5887, "step": 31 }, { "epoch": 0.02, "learning_rate": 1.9999320795103215e-05, "loss": 0.5998, "step": 32 }, { "epoch": 0.02, "learning_rate": 1.9999257646668936e-05, "loss": 0.5878, "step": 33 }, { "epoch": 0.02, "learning_rate": 1.9999191691779494e-05, "loss": 0.5898, "step": 34 }, { "epoch": 0.02, "learning_rate": 1.999912293045341e-05, "loss": 0.6015, "step": 35 }, { "epoch": 0.02, "learning_rate": 1.999905136270998e-05, "loss": 0.5943, "step": 36 }, { "epoch": 0.02, "learning_rate": 1.9998976988569283e-05, "loss": 0.6411, "step": 37 }, { "epoch": 0.03, "learning_rate": 1.9998899808052203e-05, "loss": 0.6758, "step": 38 }, { "epoch": 0.03, "learning_rate": 1.9998819821180398e-05, "loss": 0.6665, "step": 39 }, { "epoch": 0.03, "learning_rate": 1.9998737027976323e-05, "loss": 0.5868, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.9998651428463205e-05, "loss": 0.6656, "step": 41 }, { "epoch": 0.03, "learning_rate": 1.9998563022665078e-05, "loss": 0.5947, "step": 42 }, { "epoch": 0.03, "learning_rate": 1.999847181060675e-05, "loss": 0.6319, "step": 43 }, { "epoch": 0.03, "learning_rate": 1.999837779231382e-05, "loss": 0.5821, "step": 44 }, { "epoch": 0.03, "learning_rate": 1.999828096781268e-05, "loss": 0.6216, "step": 45 }, { "epoch": 0.03, "learning_rate": 1.9998181337130503e-05, "loss": 0.6095, "step": 46 }, { "epoch": 0.03, "learning_rate": 1.9998078900295254e-05, "loss": 0.5949, "step": 47 }, { "epoch": 0.03, "learning_rate": 1.999797365733568e-05, "loss": 0.6879, "step": 48 }, { "epoch": 0.03, "learning_rate": 1.999786560828132e-05, "loss": 0.599, "step": 49 }, { "epoch": 0.03, "learning_rate": 1.99977547531625e-05, "loss": 0.5634, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.999764109201034e-05, "loss": 0.6556, "step": 51 }, { "epoch": 0.04, "learning_rate": 1.999752462485673e-05, "loss": 0.576, "step": 52 }, { "epoch": 0.04, "learning_rate": 1.9997405351734365e-05, "loss": 0.5823, "step": 53 }, { "epoch": 0.04, "learning_rate": 1.999728327267672e-05, "loss": 0.5989, "step": 54 }, { "epoch": 0.04, "learning_rate": 1.9997158387718057e-05, "loss": 0.5854, "step": 55 }, { "epoch": 0.04, "learning_rate": 1.9997030696893427e-05, "loss": 0.6511, "step": 56 }, { "epoch": 0.04, "learning_rate": 1.9996900200238668e-05, "loss": 0.6048, "step": 57 }, { "epoch": 0.04, "learning_rate": 1.9996766897790412e-05, "loss": 0.5831, "step": 58 }, { "epoch": 0.04, "learning_rate": 1.9996630789586065e-05, "loss": 0.565, "step": 59 }, { "epoch": 0.04, "learning_rate": 1.9996491875663833e-05, "loss": 0.5952, "step": 60 }, { "epoch": 0.04, "learning_rate": 1.9996350156062697e-05, "loss": 0.5649, "step": 61 }, { "epoch": 0.04, "learning_rate": 1.999620563082244e-05, "loss": 0.6088, "step": 62 }, { "epoch": 0.04, "learning_rate": 1.999605829998363e-05, "loss": 0.5991, "step": 63 }, { "epoch": 0.04, "learning_rate": 1.9995908163587607e-05, "loss": 0.621, "step": 64 }, { "epoch": 0.04, "learning_rate": 1.999575522167651e-05, "loss": 0.6172, "step": 65 }, { "epoch": 0.04, "learning_rate": 1.9995599474293272e-05, "loss": 0.6655, "step": 66 }, { "epoch": 0.05, "learning_rate": 1.9995440921481605e-05, "loss": 0.6356, "step": 67 }, { "epoch": 0.05, "learning_rate": 1.9995279563286004e-05, "loss": 0.5898, "step": 68 }, { "epoch": 0.05, "learning_rate": 1.999511539975176e-05, "loss": 0.5648, "step": 69 }, { "epoch": 0.05, "learning_rate": 1.9994948430924944e-05, "loss": 0.629, "step": 70 }, { "epoch": 0.05, "learning_rate": 1.9994778656852428e-05, "loss": 0.6588, "step": 71 }, { "epoch": 0.05, "learning_rate": 1.999460607758185e-05, "loss": 0.5803, "step": 72 }, { "epoch": 0.05, "learning_rate": 1.9994430693161662e-05, "loss": 0.5956, "step": 73 }, { "epoch": 0.05, "learning_rate": 1.9994252503641076e-05, "loss": 0.594, "step": 74 }, { "epoch": 0.05, "learning_rate": 1.9994071509070104e-05, "loss": 0.5705, "step": 75 }, { "epoch": 0.05, "learning_rate": 1.9993887709499553e-05, "loss": 0.6173, "step": 76 }, { "epoch": 0.05, "learning_rate": 1.9993701104981003e-05, "loss": 0.6115, "step": 77 }, { "epoch": 0.05, "learning_rate": 1.999351169556683e-05, "loss": 0.6033, "step": 78 }, { "epoch": 0.05, "learning_rate": 1.9993319481310195e-05, "loss": 0.6066, "step": 79 }, { "epoch": 0.05, "learning_rate": 1.9993124462265045e-05, "loss": 0.5697, "step": 80 }, { "epoch": 0.05, "learning_rate": 1.9992926638486118e-05, "loss": 0.6293, "step": 81 }, { "epoch": 0.06, "learning_rate": 1.9992726010028935e-05, "loss": 0.6349, "step": 82 }, { "epoch": 0.06, "learning_rate": 1.9992522576949803e-05, "loss": 0.5912, "step": 83 }, { "epoch": 0.06, "learning_rate": 1.999231633930582e-05, "loss": 0.6073, "step": 84 }, { "epoch": 0.06, "learning_rate": 1.9992107297154872e-05, "loss": 0.5638, "step": 85 }, { "epoch": 0.06, "learning_rate": 1.999189545055563e-05, "loss": 0.587, "step": 86 }, { "epoch": 0.06, "learning_rate": 1.999168079956755e-05, "loss": 0.6084, "step": 87 }, { "epoch": 0.06, "learning_rate": 1.999146334425088e-05, "loss": 0.5811, "step": 88 }, { "epoch": 0.06, "learning_rate": 1.999124308466665e-05, "loss": 0.5603, "step": 89 }, { "epoch": 0.06, "learning_rate": 1.9991020020876676e-05, "loss": 0.6015, "step": 90 }, { "epoch": 0.06, "learning_rate": 1.9990794152943574e-05, "loss": 0.63, "step": 91 }, { "epoch": 0.06, "learning_rate": 1.9990565480930734e-05, "loss": 0.6245, "step": 92 }, { "epoch": 0.06, "learning_rate": 1.999033400490233e-05, "loss": 0.626, "step": 93 }, { "epoch": 0.06, "learning_rate": 1.9990099724923337e-05, "loss": 0.5502, "step": 94 }, { "epoch": 0.06, "learning_rate": 1.9989862641059504e-05, "loss": 0.6085, "step": 95 }, { "epoch": 0.06, "learning_rate": 1.998962275337738e-05, "loss": 0.6174, "step": 96 }, { "epoch": 0.07, "learning_rate": 1.998938006194429e-05, "loss": 0.6137, "step": 97 }, { "epoch": 0.07, "learning_rate": 1.9989134566828344e-05, "loss": 0.5377, "step": 98 }, { "epoch": 0.07, "learning_rate": 1.9988886268098446e-05, "loss": 0.6077, "step": 99 }, { "epoch": 0.07, "learning_rate": 1.9988635165824293e-05, "loss": 0.5585, "step": 100 }, { "epoch": 0.07, "learning_rate": 1.998838126007635e-05, "loss": 0.6025, "step": 101 }, { "epoch": 0.07, "learning_rate": 1.998812455092589e-05, "loss": 0.6041, "step": 102 }, { "epoch": 0.07, "learning_rate": 1.9987865038444955e-05, "loss": 0.5728, "step": 103 }, { "epoch": 0.07, "learning_rate": 1.9987602722706387e-05, "loss": 0.5588, "step": 104 }, { "epoch": 0.07, "learning_rate": 1.9987337603783806e-05, "loss": 0.5903, "step": 105 }, { "epoch": 0.07, "learning_rate": 1.998706968175162e-05, "loss": 0.5814, "step": 106 }, { "epoch": 0.07, "learning_rate": 1.9986798956685033e-05, "loss": 0.6156, "step": 107 }, { "epoch": 0.07, "learning_rate": 1.9986525428660018e-05, "loss": 0.5985, "step": 108 }, { "epoch": 0.07, "learning_rate": 1.998624909775335e-05, "loss": 0.6005, "step": 109 }, { "epoch": 0.07, "learning_rate": 1.998596996404259e-05, "loss": 0.576, "step": 110 }, { "epoch": 0.07, "learning_rate": 1.998568802760608e-05, "loss": 0.5777, "step": 111 }, { "epoch": 0.08, "learning_rate": 1.998540328852294e-05, "loss": 0.6087, "step": 112 }, { "epoch": 0.08, "learning_rate": 1.99851157468731e-05, "loss": 0.5819, "step": 113 }, { "epoch": 0.08, "learning_rate": 1.9984825402737262e-05, "loss": 0.6039, "step": 114 }, { "epoch": 0.08, "learning_rate": 1.9984532256196905e-05, "loss": 0.6003, "step": 115 }, { "epoch": 0.08, "learning_rate": 1.9984236307334313e-05, "loss": 0.6095, "step": 116 }, { "epoch": 0.08, "learning_rate": 1.998393755623255e-05, "loss": 0.6607, "step": 117 }, { "epoch": 0.08, "learning_rate": 1.9983636002975462e-05, "loss": 0.6127, "step": 118 }, { "epoch": 0.08, "learning_rate": 1.9983331647647687e-05, "loss": 0.6099, "step": 119 }, { "epoch": 0.08, "learning_rate": 1.9983024490334645e-05, "loss": 0.6032, "step": 120 }, { "epoch": 0.08, "learning_rate": 1.998271453112255e-05, "loss": 0.5811, "step": 121 }, { "epoch": 0.08, "learning_rate": 1.998240177009839e-05, "loss": 0.6107, "step": 122 }, { "epoch": 0.08, "learning_rate": 1.998208620734995e-05, "loss": 0.5947, "step": 123 }, { "epoch": 0.08, "learning_rate": 1.99817678429658e-05, "loss": 0.5572, "step": 124 }, { "epoch": 0.08, "learning_rate": 1.998144667703529e-05, "loss": 0.5635, "step": 125 }, { "epoch": 0.08, "learning_rate": 1.9981122709648558e-05, "loss": 0.5965, "step": 126 }, { "epoch": 0.09, "learning_rate": 1.9980795940896544e-05, "loss": 0.6202, "step": 127 }, { "epoch": 0.09, "learning_rate": 1.9980466370870947e-05, "loss": 0.556, "step": 128 }, { "epoch": 0.09, "learning_rate": 1.9980133999664272e-05, "loss": 0.6325, "step": 129 }, { "epoch": 0.09, "learning_rate": 1.99797988273698e-05, "loss": 0.6009, "step": 130 }, { "epoch": 0.09, "learning_rate": 1.997946085408161e-05, "loss": 0.6068, "step": 131 }, { "epoch": 0.09, "learning_rate": 1.9979120079894558e-05, "loss": 0.5491, "step": 132 }, { "epoch": 0.09, "learning_rate": 1.9978776504904282e-05, "loss": 0.5876, "step": 133 }, { "epoch": 0.09, "learning_rate": 1.997843012920722e-05, "loss": 0.5731, "step": 134 }, { "epoch": 0.09, "learning_rate": 1.997808095290058e-05, "loss": 0.5959, "step": 135 }, { "epoch": 0.09, "learning_rate": 1.9977728976082367e-05, "loss": 0.5916, "step": 136 }, { "epoch": 0.09, "learning_rate": 1.9977374198851374e-05, "loss": 0.5775, "step": 137 }, { "epoch": 0.09, "learning_rate": 1.9977016621307167e-05, "loss": 0.6175, "step": 138 }, { "epoch": 0.09, "learning_rate": 1.9976656243550115e-05, "loss": 0.6082, "step": 139 }, { "epoch": 0.09, "learning_rate": 1.9976293065681355e-05, "loss": 0.5714, "step": 140 }, { "epoch": 0.09, "learning_rate": 1.9975927087802822e-05, "loss": 0.5886, "step": 141 }, { "epoch": 0.1, "learning_rate": 1.9975558310017238e-05, "loss": 0.5499, "step": 142 }, { "epoch": 0.1, "learning_rate": 1.9975186732428102e-05, "loss": 0.5873, "step": 143 }, { "epoch": 0.1, "learning_rate": 1.99748123551397e-05, "loss": 0.588, "step": 144 }, { "epoch": 0.1, "learning_rate": 1.9974435178257114e-05, "loss": 0.5705, "step": 145 }, { "epoch": 0.1, "learning_rate": 1.99740552018862e-05, "loss": 0.5878, "step": 146 }, { "epoch": 0.1, "learning_rate": 1.997367242613361e-05, "loss": 0.5783, "step": 147 }, { "epoch": 0.1, "learning_rate": 1.997328685110677e-05, "loss": 0.6105, "step": 148 }, { "epoch": 0.1, "learning_rate": 1.9972898476913906e-05, "loss": 0.5822, "step": 149 }, { "epoch": 0.1, "learning_rate": 1.997250730366401e-05, "loss": 0.5816, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.9972113331466883e-05, "loss": 0.6001, "step": 151 }, { "epoch": 0.1, "learning_rate": 1.997171656043309e-05, "loss": 0.567, "step": 152 }, { "epoch": 0.1, "learning_rate": 1.9971316990673997e-05, "loss": 0.5925, "step": 153 }, { "epoch": 0.1, "learning_rate": 1.997091462230175e-05, "loss": 0.5198, "step": 154 }, { "epoch": 0.1, "learning_rate": 1.997050945542928e-05, "loss": 0.6043, "step": 155 }, { "epoch": 0.11, "learning_rate": 1.99701014901703e-05, "loss": 0.5603, "step": 156 }, { "epoch": 0.11, "learning_rate": 1.996969072663931e-05, "loss": 0.544, "step": 157 }, { "epoch": 0.11, "learning_rate": 1.9969277164951612e-05, "loss": 0.6085, "step": 158 }, { "epoch": 0.11, "learning_rate": 1.9968860805223264e-05, "loss": 0.5777, "step": 159 }, { "epoch": 0.11, "learning_rate": 1.9968441647571124e-05, "loss": 0.5832, "step": 160 }, { "epoch": 0.11, "learning_rate": 1.996801969211285e-05, "loss": 0.5975, "step": 161 }, { "epoch": 0.11, "learning_rate": 1.9967594938966856e-05, "loss": 0.5982, "step": 162 }, { "epoch": 0.11, "learning_rate": 1.9967167388252358e-05, "loss": 0.5485, "step": 163 }, { "epoch": 0.11, "learning_rate": 1.9966737040089362e-05, "loss": 0.5663, "step": 164 }, { "epoch": 0.11, "learning_rate": 1.9966303894598645e-05, "loss": 0.5549, "step": 165 }, { "epoch": 0.11, "learning_rate": 1.9965867951901783e-05, "loss": 0.5951, "step": 166 }, { "epoch": 0.11, "learning_rate": 1.996542921212113e-05, "loss": 0.5974, "step": 167 }, { "epoch": 0.11, "learning_rate": 1.996498767537982e-05, "loss": 0.597, "step": 168 }, { "epoch": 0.11, "learning_rate": 1.9964543341801778e-05, "loss": 0.5683, "step": 169 }, { "epoch": 0.11, "learning_rate": 1.996409621151172e-05, "loss": 0.5452, "step": 170 }, { "epoch": 0.12, "learning_rate": 1.9963646284635134e-05, "loss": 0.6023, "step": 171 }, { "epoch": 0.12, "learning_rate": 1.99631935612983e-05, "loss": 0.598, "step": 172 }, { "epoch": 0.12, "learning_rate": 1.9962738041628286e-05, "loss": 0.6609, "step": 173 }, { "epoch": 0.12, "learning_rate": 1.996227972575294e-05, "loss": 0.5362, "step": 174 }, { "epoch": 0.12, "learning_rate": 1.9961818613800892e-05, "loss": 0.6304, "step": 175 }, { "epoch": 0.12, "learning_rate": 1.9961354705901567e-05, "loss": 0.573, "step": 176 }, { "epoch": 0.12, "learning_rate": 1.996088800218516e-05, "loss": 0.585, "step": 177 }, { "epoch": 0.12, "learning_rate": 1.9960418502782663e-05, "loss": 0.5762, "step": 178 }, { "epoch": 0.12, "learning_rate": 1.995994620782585e-05, "loss": 0.6077, "step": 179 }, { "epoch": 0.12, "learning_rate": 1.995947111744728e-05, "loss": 0.5582, "step": 180 }, { "epoch": 0.12, "learning_rate": 1.9958993231780294e-05, "loss": 0.5809, "step": 181 }, { "epoch": 0.12, "learning_rate": 1.9958512550959013e-05, "loss": 0.5751, "step": 182 }, { "epoch": 0.12, "learning_rate": 1.995802907511835e-05, "loss": 0.6164, "step": 183 }, { "epoch": 0.12, "learning_rate": 1.9957542804394008e-05, "loss": 0.5551, "step": 184 }, { "epoch": 0.12, "learning_rate": 1.995705373892246e-05, "loss": 0.5957, "step": 185 }, { "epoch": 0.13, "learning_rate": 1.9956561878840967e-05, "loss": 0.5508, "step": 186 }, { "epoch": 0.13, "learning_rate": 1.9956067224287585e-05, "loss": 0.5901, "step": 187 }, { "epoch": 0.13, "learning_rate": 1.9955569775401144e-05, "loss": 0.5543, "step": 188 }, { "epoch": 0.13, "learning_rate": 1.9955069532321257e-05, "loss": 0.6488, "step": 189 }, { "epoch": 0.13, "learning_rate": 1.9954566495188333e-05, "loss": 0.5791, "step": 190 }, { "epoch": 0.13, "learning_rate": 1.9954060664143555e-05, "loss": 0.5738, "step": 191 }, { "epoch": 0.13, "learning_rate": 1.995355203932889e-05, "loss": 0.6277, "step": 192 }, { "epoch": 0.13, "learning_rate": 1.9953040620887093e-05, "loss": 0.6511, "step": 193 }, { "epoch": 0.13, "learning_rate": 1.9952526408961703e-05, "loss": 0.6071, "step": 194 }, { "epoch": 0.13, "learning_rate": 1.995200940369704e-05, "loss": 0.5688, "step": 195 }, { "epoch": 0.13, "learning_rate": 1.9951489605238214e-05, "loss": 0.5631, "step": 196 }, { "epoch": 0.13, "learning_rate": 1.995096701373111e-05, "loss": 0.5913, "step": 197 }, { "epoch": 0.13, "learning_rate": 1.9950441629322407e-05, "loss": 0.5427, "step": 198 }, { "epoch": 0.13, "learning_rate": 1.9949913452159556e-05, "loss": 0.5748, "step": 199 }, { "epoch": 0.13, "learning_rate": 1.9949382482390803e-05, "loss": 0.5769, "step": 200 }, { "epoch": 0.14, "learning_rate": 1.9948848720165178e-05, "loss": 0.5554, "step": 201 }, { "epoch": 0.14, "learning_rate": 1.994831216563248e-05, "loss": 0.6145, "step": 202 }, { "epoch": 0.14, "learning_rate": 1.9947772818943308e-05, "loss": 0.5717, "step": 203 }, { "epoch": 0.14, "learning_rate": 1.9947230680249036e-05, "loss": 0.5828, "step": 204 }, { "epoch": 0.14, "learning_rate": 1.994668574970183e-05, "loss": 0.5667, "step": 205 }, { "epoch": 0.14, "learning_rate": 1.9946138027454624e-05, "loss": 0.5935, "step": 206 }, { "epoch": 0.14, "learning_rate": 1.994558751366115e-05, "loss": 0.5763, "step": 207 }, { "epoch": 0.14, "learning_rate": 1.9945034208475922e-05, "loss": 0.5388, "step": 208 }, { "epoch": 0.14, "learning_rate": 1.994447811205423e-05, "loss": 0.5679, "step": 209 }, { "epoch": 0.14, "learning_rate": 1.9943919224552154e-05, "loss": 0.5664, "step": 210 }, { "epoch": 0.14, "learning_rate": 1.9943357546126548e-05, "loss": 0.6093, "step": 211 }, { "epoch": 0.14, "learning_rate": 1.9942793076935067e-05, "loss": 0.6113, "step": 212 }, { "epoch": 0.14, "learning_rate": 1.9942225817136132e-05, "loss": 0.5914, "step": 213 }, { "epoch": 0.14, "learning_rate": 1.9941655766888956e-05, "loss": 0.549, "step": 214 }, { "epoch": 0.14, "learning_rate": 1.994108292635353e-05, "loss": 0.5554, "step": 215 }, { "epoch": 0.15, "learning_rate": 1.9940507295690632e-05, "loss": 0.5492, "step": 216 }, { "epoch": 0.15, "learning_rate": 1.9939928875061825e-05, "loss": 0.5747, "step": 217 }, { "epoch": 0.15, "learning_rate": 1.9939347664629453e-05, "loss": 0.5847, "step": 218 }, { "epoch": 0.15, "learning_rate": 1.9938763664556634e-05, "loss": 0.6064, "step": 219 }, { "epoch": 0.15, "learning_rate": 1.9938176875007284e-05, "loss": 0.5568, "step": 220 }, { "epoch": 0.15, "learning_rate": 1.9937587296146095e-05, "loss": 0.5476, "step": 221 }, { "epoch": 0.15, "learning_rate": 1.9936994928138543e-05, "loss": 0.5761, "step": 222 }, { "epoch": 0.15, "learning_rate": 1.993639977115088e-05, "loss": 0.5651, "step": 223 }, { "epoch": 0.15, "learning_rate": 1.993580182535015e-05, "loss": 0.5788, "step": 224 }, { "epoch": 0.15, "learning_rate": 1.9935201090904177e-05, "loss": 0.5649, "step": 225 }, { "epoch": 0.15, "learning_rate": 1.9934597567981567e-05, "loss": 0.564, "step": 226 }, { "epoch": 0.15, "learning_rate": 1.9933991256751707e-05, "loss": 0.5939, "step": 227 }, { "epoch": 0.15, "learning_rate": 1.993338215738477e-05, "loss": 0.5858, "step": 228 }, { "epoch": 0.15, "learning_rate": 1.9932770270051706e-05, "loss": 0.5521, "step": 229 }, { "epoch": 0.15, "learning_rate": 1.993215559492426e-05, "loss": 0.5711, "step": 230 }, { "epoch": 0.16, "learning_rate": 1.993153813217494e-05, "loss": 0.5948, "step": 231 }, { "epoch": 0.16, "learning_rate": 1.9930917881977053e-05, "loss": 0.5852, "step": 232 }, { "epoch": 0.16, "learning_rate": 1.9930294844504677e-05, "loss": 0.6058, "step": 233 }, { "epoch": 0.16, "learning_rate": 1.9929669019932686e-05, "loss": 0.603, "step": 234 }, { "epoch": 0.16, "learning_rate": 1.992904040843672e-05, "loss": 0.5764, "step": 235 }, { "epoch": 0.16, "learning_rate": 1.9928409010193213e-05, "loss": 0.5627, "step": 236 }, { "epoch": 0.16, "learning_rate": 1.992777482537938e-05, "loss": 0.5668, "step": 237 }, { "epoch": 0.16, "learning_rate": 1.992713785417321e-05, "loss": 0.5348, "step": 238 }, { "epoch": 0.16, "learning_rate": 1.992649809675348e-05, "loss": 0.59, "step": 239 }, { "epoch": 0.16, "learning_rate": 1.9925855553299755e-05, "loss": 0.5641, "step": 240 }, { "epoch": 0.16, "learning_rate": 1.9925210223992365e-05, "loss": 0.5417, "step": 241 }, { "epoch": 0.16, "learning_rate": 1.992456210901244e-05, "loss": 0.5635, "step": 242 }, { "epoch": 0.16, "learning_rate": 1.992391120854188e-05, "loss": 0.5527, "step": 243 }, { "epoch": 0.16, "learning_rate": 1.9923257522763373e-05, "loss": 0.5966, "step": 244 }, { "epoch": 0.16, "learning_rate": 1.9922601051860386e-05, "loss": 0.5538, "step": 245 }, { "epoch": 0.17, "learning_rate": 1.9921941796017168e-05, "loss": 0.6451, "step": 246 }, { "epoch": 0.17, "learning_rate": 1.992127975541875e-05, "loss": 0.5891, "step": 247 }, { "epoch": 0.17, "learning_rate": 1.9920614930250945e-05, "loss": 0.5767, "step": 248 }, { "epoch": 0.17, "learning_rate": 1.9919947320700346e-05, "loss": 0.573, "step": 249 }, { "epoch": 0.17, "learning_rate": 1.991927692695433e-05, "loss": 0.5795, "step": 250 }, { "epoch": 0.17, "learning_rate": 1.9918603749201053e-05, "loss": 0.5831, "step": 251 }, { "epoch": 0.17, "learning_rate": 1.9917927787629454e-05, "loss": 0.5439, "step": 252 }, { "epoch": 0.17, "learning_rate": 1.9917249042429253e-05, "loss": 0.5367, "step": 253 }, { "epoch": 0.17, "learning_rate": 1.9916567513790946e-05, "loss": 0.6075, "step": 254 }, { "epoch": 0.17, "learning_rate": 1.9915883201905824e-05, "loss": 0.5493, "step": 255 }, { "epoch": 0.17, "learning_rate": 1.9915196106965944e-05, "loss": 0.5906, "step": 256 }, { "epoch": 0.17, "learning_rate": 1.991450622916415e-05, "loss": 0.5685, "step": 257 }, { "epoch": 0.17, "learning_rate": 1.9913813568694076e-05, "loss": 0.583, "step": 258 }, { "epoch": 0.17, "learning_rate": 1.9913118125750115e-05, "loss": 0.5709, "step": 259 }, { "epoch": 0.18, "learning_rate": 1.9912419900527467e-05, "loss": 0.5741, "step": 260 }, { "epoch": 0.18, "learning_rate": 1.9911718893222088e-05, "loss": 0.5701, "step": 261 }, { "epoch": 0.18, "learning_rate": 1.991101510403074e-05, "loss": 0.5916, "step": 262 }, { "epoch": 0.18, "learning_rate": 1.9910308533150946e-05, "loss": 0.5826, "step": 263 }, { "epoch": 0.18, "learning_rate": 1.9909599180781016e-05, "loss": 0.5845, "step": 264 }, { "epoch": 0.18, "learning_rate": 1.9908887047120046e-05, "loss": 0.5835, "step": 265 }, { "epoch": 0.18, "learning_rate": 1.99081721323679e-05, "loss": 0.611, "step": 266 }, { "epoch": 0.18, "learning_rate": 1.9907454436725237e-05, "loss": 0.5917, "step": 267 }, { "epoch": 0.18, "learning_rate": 1.990673396039349e-05, "loss": 0.5403, "step": 268 }, { "epoch": 0.18, "learning_rate": 1.990601070357487e-05, "loss": 0.5883, "step": 269 }, { "epoch": 0.18, "learning_rate": 1.9905284666472374e-05, "loss": 0.5528, "step": 270 }, { "epoch": 0.18, "learning_rate": 1.990455584928977e-05, "loss": 0.5626, "step": 271 }, { "epoch": 0.18, "learning_rate": 1.990382425223162e-05, "loss": 0.5666, "step": 272 }, { "epoch": 0.18, "learning_rate": 1.9903089875503252e-05, "loss": 0.5835, "step": 273 }, { "epoch": 0.18, "learning_rate": 1.9902352719310784e-05, "loss": 0.5518, "step": 274 }, { "epoch": 0.19, "learning_rate": 1.9901612783861114e-05, "loss": 0.6029, "step": 275 }, { "epoch": 0.19, "learning_rate": 1.9900870069361914e-05, "loss": 0.5343, "step": 276 }, { "epoch": 0.19, "learning_rate": 1.9900124576021637e-05, "loss": 0.5709, "step": 277 }, { "epoch": 0.19, "learning_rate": 1.9899376304049516e-05, "loss": 0.5802, "step": 278 }, { "epoch": 0.19, "learning_rate": 1.9898625253655574e-05, "loss": 0.5744, "step": 279 }, { "epoch": 0.19, "learning_rate": 1.9897871425050598e-05, "loss": 0.6037, "step": 280 }, { "epoch": 0.19, "learning_rate": 1.989711481844617e-05, "loss": 0.568, "step": 281 }, { "epoch": 0.19, "learning_rate": 1.9896355434054636e-05, "loss": 0.5699, "step": 282 }, { "epoch": 0.19, "learning_rate": 1.9895593272089135e-05, "loss": 0.5644, "step": 283 }, { "epoch": 0.19, "learning_rate": 1.9894828332763573e-05, "loss": 0.5546, "step": 284 }, { "epoch": 0.19, "learning_rate": 1.989406061629265e-05, "loss": 0.5781, "step": 285 }, { "epoch": 0.19, "learning_rate": 1.989329012289184e-05, "loss": 0.5749, "step": 286 }, { "epoch": 0.19, "learning_rate": 1.989251685277739e-05, "loss": 0.5518, "step": 287 }, { "epoch": 0.19, "learning_rate": 1.989174080616633e-05, "loss": 0.6277, "step": 288 }, { "epoch": 0.19, "learning_rate": 1.9890961983276472e-05, "loss": 0.569, "step": 289 }, { "epoch": 0.2, "learning_rate": 1.9890180384326404e-05, "loss": 0.5386, "step": 290 }, { "epoch": 0.2, "learning_rate": 1.9889396009535496e-05, "loss": 0.5793, "step": 291 }, { "epoch": 0.2, "learning_rate": 1.9888608859123895e-05, "loss": 0.5583, "step": 292 }, { "epoch": 0.2, "learning_rate": 1.9887818933312532e-05, "loss": 0.5799, "step": 293 }, { "epoch": 0.2, "learning_rate": 1.9887026232323105e-05, "loss": 0.5621, "step": 294 }, { "epoch": 0.2, "learning_rate": 1.9886230756378102e-05, "loss": 0.5738, "step": 295 }, { "epoch": 0.2, "learning_rate": 1.9885432505700786e-05, "loss": 0.5704, "step": 296 }, { "epoch": 0.2, "learning_rate": 1.98846314805152e-05, "loss": 0.6115, "step": 297 }, { "epoch": 0.2, "learning_rate": 1.9883827681046167e-05, "loss": 0.5488, "step": 298 }, { "epoch": 0.2, "learning_rate": 1.988302110751928e-05, "loss": 0.5473, "step": 299 }, { "epoch": 0.2, "learning_rate": 1.9882211760160924e-05, "loss": 0.5707, "step": 300 }, { "epoch": 0.2, "learning_rate": 1.9881399639198248e-05, "loss": 0.5873, "step": 301 }, { "epoch": 0.2, "learning_rate": 1.9880584744859195e-05, "loss": 0.5659, "step": 302 }, { "epoch": 0.2, "learning_rate": 1.9879767077372474e-05, "loss": 0.5966, "step": 303 }, { "epoch": 0.2, "learning_rate": 1.9878946636967576e-05, "loss": 0.5478, "step": 304 }, { "epoch": 0.21, "learning_rate": 1.9878123423874777e-05, "loss": 0.5877, "step": 305 }, { "epoch": 0.21, "learning_rate": 1.9877297438325115e-05, "loss": 0.597, "step": 306 }, { "epoch": 0.21, "learning_rate": 1.9876468680550425e-05, "loss": 0.5697, "step": 307 }, { "epoch": 0.21, "learning_rate": 1.9875637150783307e-05, "loss": 0.5785, "step": 308 }, { "epoch": 0.21, "learning_rate": 1.9874802849257148e-05, "loss": 0.5755, "step": 309 }, { "epoch": 0.21, "learning_rate": 1.9873965776206103e-05, "loss": 0.5772, "step": 310 }, { "epoch": 0.21, "learning_rate": 1.9873125931865113e-05, "loss": 0.5743, "step": 311 }, { "epoch": 0.21, "learning_rate": 1.9872283316469893e-05, "loss": 0.5606, "step": 312 }, { "epoch": 0.21, "learning_rate": 1.987143793025694e-05, "loss": 0.598, "step": 313 }, { "epoch": 0.21, "learning_rate": 1.9870589773463517e-05, "loss": 0.5784, "step": 314 }, { "epoch": 0.21, "learning_rate": 1.9869738846327685e-05, "loss": 0.6226, "step": 315 }, { "epoch": 0.21, "learning_rate": 1.9868885149088262e-05, "loss": 0.571, "step": 316 }, { "epoch": 0.21, "learning_rate": 1.9868028681984857e-05, "loss": 0.5919, "step": 317 }, { "epoch": 0.21, "learning_rate": 1.9867169445257848e-05, "loss": 0.5657, "step": 318 }, { "epoch": 0.21, "learning_rate": 1.986630743914839e-05, "loss": 0.516, "step": 319 }, { "epoch": 0.22, "learning_rate": 1.986544266389843e-05, "loss": 0.568, "step": 320 }, { "epoch": 0.22, "learning_rate": 1.9864575119750677e-05, "loss": 0.603, "step": 321 }, { "epoch": 0.22, "learning_rate": 1.9863704806948612e-05, "loss": 0.5999, "step": 322 }, { "epoch": 0.22, "learning_rate": 1.986283172573652e-05, "loss": 0.5558, "step": 323 }, { "epoch": 0.22, "learning_rate": 1.9861955876359428e-05, "loss": 0.5507, "step": 324 }, { "epoch": 0.22, "learning_rate": 1.986107725906317e-05, "loss": 0.5849, "step": 325 }, { "epoch": 0.22, "learning_rate": 1.986019587409434e-05, "loss": 0.5876, "step": 326 }, { "epoch": 0.22, "learning_rate": 1.9859311721700306e-05, "loss": 0.5929, "step": 327 }, { "epoch": 0.22, "learning_rate": 1.9858424802129232e-05, "loss": 0.5592, "step": 328 }, { "epoch": 0.22, "learning_rate": 1.985753511563004e-05, "loss": 0.5555, "step": 329 }, { "epoch": 0.22, "learning_rate": 1.9856642662452437e-05, "loss": 0.6035, "step": 330 }, { "epoch": 0.22, "learning_rate": 1.9855747442846903e-05, "loss": 0.5433, "step": 331 }, { "epoch": 0.22, "learning_rate": 1.9854849457064697e-05, "loss": 0.5967, "step": 332 }, { "epoch": 0.22, "learning_rate": 1.985394870535785e-05, "loss": 0.5547, "step": 333 }, { "epoch": 0.22, "learning_rate": 1.9853045187979184e-05, "loss": 0.5452, "step": 334 }, { "epoch": 0.23, "learning_rate": 1.985213890518227e-05, "loss": 0.5866, "step": 335 }, { "epoch": 0.23, "learning_rate": 1.9851229857221483e-05, "loss": 0.5557, "step": 336 }, { "epoch": 0.23, "learning_rate": 1.9850318044351958e-05, "loss": 0.5449, "step": 337 }, { "epoch": 0.23, "learning_rate": 1.9849403466829607e-05, "loss": 0.6002, "step": 338 }, { "epoch": 0.23, "learning_rate": 1.9848486124911125e-05, "loss": 0.5653, "step": 339 }, { "epoch": 0.23, "learning_rate": 1.984756601885398e-05, "loss": 0.5496, "step": 340 }, { "epoch": 0.23, "learning_rate": 1.9846643148916414e-05, "loss": 0.5881, "step": 341 }, { "epoch": 0.23, "learning_rate": 1.9845717515357443e-05, "loss": 0.5904, "step": 342 }, { "epoch": 0.23, "learning_rate": 1.9844789118436863e-05, "loss": 0.5717, "step": 343 }, { "epoch": 0.23, "learning_rate": 1.9843857958415243e-05, "loss": 0.5834, "step": 344 }, { "epoch": 0.23, "learning_rate": 1.984292403555393e-05, "loss": 0.564, "step": 345 }, { "epoch": 0.23, "learning_rate": 1.9841987350115043e-05, "loss": 0.5871, "step": 346 }, { "epoch": 0.23, "learning_rate": 1.9841047902361475e-05, "loss": 0.5872, "step": 347 }, { "epoch": 0.23, "learning_rate": 1.984010569255691e-05, "loss": 0.5544, "step": 348 }, { "epoch": 0.23, "learning_rate": 1.983916072096578e-05, "loss": 0.5841, "step": 349 }, { "epoch": 0.24, "learning_rate": 1.9838212987853312e-05, "loss": 0.5427, "step": 350 }, { "epoch": 0.24, "learning_rate": 1.9837262493485505e-05, "loss": 0.5476, "step": 351 }, { "epoch": 0.24, "learning_rate": 1.9836309238129127e-05, "loss": 0.539, "step": 352 }, { "epoch": 0.24, "learning_rate": 1.9835353222051728e-05, "loss": 0.5847, "step": 353 }, { "epoch": 0.24, "learning_rate": 1.983439444552163e-05, "loss": 0.5792, "step": 354 }, { "epoch": 0.24, "learning_rate": 1.9833432908807927e-05, "loss": 0.5919, "step": 355 }, { "epoch": 0.24, "learning_rate": 1.983246861218049e-05, "loss": 0.5298, "step": 356 }, { "epoch": 0.24, "learning_rate": 1.9831501555909964e-05, "loss": 0.594, "step": 357 }, { "epoch": 0.24, "learning_rate": 1.9830531740267772e-05, "loss": 0.5643, "step": 358 }, { "epoch": 0.24, "learning_rate": 1.9829559165526103e-05, "loss": 0.5999, "step": 359 }, { "epoch": 0.24, "learning_rate": 1.9828583831957935e-05, "loss": 0.5895, "step": 360 }, { "epoch": 0.24, "learning_rate": 1.9827605739837004e-05, "loss": 0.5686, "step": 361 }, { "epoch": 0.24, "learning_rate": 1.982662488943783e-05, "loss": 0.612, "step": 362 }, { "epoch": 0.24, "learning_rate": 1.9825641281035707e-05, "loss": 0.5482, "step": 363 }, { "epoch": 0.25, "learning_rate": 1.9824654914906694e-05, "loss": 0.5584, "step": 364 }, { "epoch": 0.25, "learning_rate": 1.9823665791327635e-05, "loss": 0.5635, "step": 365 }, { "epoch": 0.25, "learning_rate": 1.9822673910576148e-05, "loss": 0.6189, "step": 366 }, { "epoch": 0.25, "learning_rate": 1.9821679272930614e-05, "loss": 0.5824, "step": 367 }, { "epoch": 0.25, "learning_rate": 1.9820681878670197e-05, "loss": 0.5695, "step": 368 }, { "epoch": 0.25, "learning_rate": 1.981968172807483e-05, "loss": 0.5872, "step": 369 }, { "epoch": 0.25, "learning_rate": 1.9818678821425227e-05, "loss": 0.556, "step": 370 }, { "epoch": 0.25, "learning_rate": 1.9817673159002864e-05, "loss": 0.5743, "step": 371 }, { "epoch": 0.25, "learning_rate": 1.981666474109e-05, "loss": 0.5832, "step": 372 }, { "epoch": 0.25, "learning_rate": 1.981565356796966e-05, "loss": 0.6243, "step": 373 }, { "epoch": 0.25, "learning_rate": 1.981463963992565e-05, "loss": 0.5777, "step": 374 }, { "epoch": 0.25, "learning_rate": 1.981362295724255e-05, "loss": 0.6033, "step": 375 }, { "epoch": 0.25, "learning_rate": 1.98126035202057e-05, "loss": 0.5583, "step": 376 }, { "epoch": 0.25, "learning_rate": 1.9811581329101224e-05, "loss": 0.578, "step": 377 }, { "epoch": 0.25, "learning_rate": 1.981055638421602e-05, "loss": 0.6008, "step": 378 }, { "epoch": 0.26, "learning_rate": 1.980952868583775e-05, "loss": 0.5858, "step": 379 }, { "epoch": 0.26, "learning_rate": 1.980849823425486e-05, "loss": 0.5801, "step": 380 }, { "epoch": 0.26, "learning_rate": 1.980746502975656e-05, "loss": 0.5443, "step": 381 }, { "epoch": 0.26, "learning_rate": 1.9806429072632835e-05, "loss": 0.5571, "step": 382 }, { "epoch": 0.26, "learning_rate": 1.9805390363174447e-05, "loss": 0.5739, "step": 383 }, { "epoch": 0.26, "learning_rate": 1.9804348901672922e-05, "loss": 0.5362, "step": 384 }, { "epoch": 0.26, "learning_rate": 1.9803304688420568e-05, "loss": 0.529, "step": 385 }, { "epoch": 0.26, "learning_rate": 1.980225772371046e-05, "loss": 0.5601, "step": 386 }, { "epoch": 0.26, "learning_rate": 1.9801208007836442e-05, "loss": 0.5871, "step": 387 }, { "epoch": 0.26, "learning_rate": 1.9800155541093135e-05, "loss": 0.5552, "step": 388 }, { "epoch": 0.26, "learning_rate": 1.9799100323775933e-05, "loss": 0.5537, "step": 389 }, { "epoch": 0.26, "learning_rate": 1.9798042356181e-05, "loss": 0.5929, "step": 390 }, { "epoch": 0.26, "learning_rate": 1.979698163860527e-05, "loss": 0.5586, "step": 391 }, { "epoch": 0.26, "learning_rate": 1.9795918171346448e-05, "loss": 0.5758, "step": 392 }, { "epoch": 0.26, "learning_rate": 1.9794851954703024e-05, "loss": 0.5652, "step": 393 }, { "epoch": 0.27, "learning_rate": 1.9793782988974233e-05, "loss": 0.5536, "step": 394 }, { "epoch": 0.27, "learning_rate": 1.9792711274460116e-05, "loss": 0.5974, "step": 395 }, { "epoch": 0.27, "learning_rate": 1.9791636811461454e-05, "loss": 0.543, "step": 396 }, { "epoch": 0.27, "learning_rate": 1.9790559600279816e-05, "loss": 0.5734, "step": 397 }, { "epoch": 0.27, "learning_rate": 1.9789479641217538e-05, "loss": 0.5894, "step": 398 }, { "epoch": 0.27, "learning_rate": 1.9788396934577733e-05, "loss": 0.587, "step": 399 }, { "epoch": 0.27, "learning_rate": 1.978731148066428e-05, "loss": 0.5684, "step": 400 }, { "epoch": 0.27, "learning_rate": 1.9786223279781823e-05, "loss": 0.5832, "step": 401 }, { "epoch": 0.27, "learning_rate": 1.9785132332235787e-05, "loss": 0.5801, "step": 402 }, { "epoch": 0.27, "learning_rate": 1.9784038638332363e-05, "loss": 0.5815, "step": 403 }, { "epoch": 0.27, "learning_rate": 1.9782942198378516e-05, "loss": 0.5693, "step": 404 }, { "epoch": 0.27, "learning_rate": 1.978184301268198e-05, "loss": 0.57, "step": 405 }, { "epoch": 0.27, "learning_rate": 1.978074108155126e-05, "loss": 0.5642, "step": 406 }, { "epoch": 0.27, "learning_rate": 1.9779636405295624e-05, "loss": 0.5553, "step": 407 }, { "epoch": 0.27, "learning_rate": 1.9778528984225126e-05, "loss": 0.5476, "step": 408 }, { "epoch": 0.28, "learning_rate": 1.977741881865058e-05, "loss": 0.5916, "step": 409 }, { "epoch": 0.28, "learning_rate": 1.977630590888357e-05, "loss": 0.586, "step": 410 }, { "epoch": 0.28, "learning_rate": 1.977519025523645e-05, "loss": 0.5014, "step": 411 }, { "epoch": 0.28, "learning_rate": 1.9774071858022354e-05, "loss": 0.5413, "step": 412 }, { "epoch": 0.28, "learning_rate": 1.9772950717555174e-05, "loss": 0.567, "step": 413 }, { "epoch": 0.28, "learning_rate": 1.9771826834149577e-05, "loss": 0.599, "step": 414 }, { "epoch": 0.28, "learning_rate": 1.9770700208120996e-05, "loss": 0.607, "step": 415 }, { "epoch": 0.28, "learning_rate": 1.9769570839785638e-05, "loss": 0.5516, "step": 416 }, { "epoch": 0.28, "learning_rate": 1.9768438729460487e-05, "loss": 0.5413, "step": 417 }, { "epoch": 0.28, "learning_rate": 1.9767303877463275e-05, "loss": 0.5653, "step": 418 }, { "epoch": 0.28, "learning_rate": 1.976616628411253e-05, "loss": 0.5838, "step": 419 }, { "epoch": 0.28, "learning_rate": 1.9765025949727526e-05, "loss": 0.5575, "step": 420 }, { "epoch": 0.28, "learning_rate": 1.9763882874628318e-05, "loss": 0.581, "step": 421 }, { "epoch": 0.28, "learning_rate": 1.9762737059135735e-05, "loss": 0.5922, "step": 422 }, { "epoch": 0.28, "learning_rate": 1.976158850357136e-05, "loss": 0.5656, "step": 423 }, { "epoch": 0.29, "learning_rate": 1.9760437208257564e-05, "loss": 0.5436, "step": 424 }, { "epoch": 0.29, "learning_rate": 1.975928317351747e-05, "loss": 0.5676, "step": 425 }, { "epoch": 0.29, "learning_rate": 1.9758126399674974e-05, "loss": 0.6124, "step": 426 }, { "epoch": 0.29, "learning_rate": 1.975696688705475e-05, "loss": 0.5773, "step": 427 }, { "epoch": 0.29, "learning_rate": 1.9755804635982233e-05, "loss": 0.5855, "step": 428 }, { "epoch": 0.29, "learning_rate": 1.9754639646783627e-05, "loss": 0.5294, "step": 429 }, { "epoch": 0.29, "learning_rate": 1.975347191978591e-05, "loss": 0.5977, "step": 430 }, { "epoch": 0.29, "learning_rate": 1.9752301455316808e-05, "loss": 0.5552, "step": 431 }, { "epoch": 0.29, "learning_rate": 1.9751128253704848e-05, "loss": 0.5413, "step": 432 }, { "epoch": 0.29, "learning_rate": 1.97499523152793e-05, "loss": 0.5817, "step": 433 }, { "epoch": 0.29, "learning_rate": 1.9748773640370212e-05, "loss": 0.5867, "step": 434 }, { "epoch": 0.29, "learning_rate": 1.97475922293084e-05, "loss": 0.5578, "step": 435 }, { "epoch": 0.29, "learning_rate": 1.9746408082425443e-05, "loss": 0.5483, "step": 436 }, { "epoch": 0.29, "learning_rate": 1.9745221200053694e-05, "loss": 0.5564, "step": 437 }, { "epoch": 0.29, "learning_rate": 1.974403158252627e-05, "loss": 0.5528, "step": 438 }, { "epoch": 0.3, "learning_rate": 1.9742839230177056e-05, "loss": 0.582, "step": 439 }, { "epoch": 0.3, "learning_rate": 1.9741644143340707e-05, "loss": 0.5711, "step": 440 }, { "epoch": 0.3, "learning_rate": 1.9740446322352643e-05, "loss": 0.6, "step": 441 }, { "epoch": 0.3, "learning_rate": 1.9739245767549048e-05, "loss": 0.5799, "step": 442 }, { "epoch": 0.3, "learning_rate": 1.9738042479266886e-05, "loss": 0.5483, "step": 443 }, { "epoch": 0.3, "learning_rate": 1.973683645784387e-05, "loss": 0.5807, "step": 444 }, { "epoch": 0.3, "learning_rate": 1.9735627703618494e-05, "loss": 0.596, "step": 445 }, { "epoch": 0.3, "learning_rate": 1.973441621693002e-05, "loss": 0.6182, "step": 446 }, { "epoch": 0.3, "learning_rate": 1.973320199811846e-05, "loss": 0.5435, "step": 447 }, { "epoch": 0.3, "learning_rate": 1.9731985047524612e-05, "loss": 0.569, "step": 448 }, { "epoch": 0.3, "learning_rate": 1.9730765365490033e-05, "loss": 0.5865, "step": 449 }, { "epoch": 0.3, "learning_rate": 1.9729542952357045e-05, "loss": 0.5882, "step": 450 }, { "epoch": 0.3, "learning_rate": 1.972831780846874e-05, "loss": 0.5532, "step": 451 }, { "epoch": 0.3, "learning_rate": 1.972708993416897e-05, "loss": 0.5525, "step": 452 }, { "epoch": 0.31, "learning_rate": 1.9725859329802363e-05, "loss": 0.6435, "step": 453 }, { "epoch": 0.31, "learning_rate": 1.9724625995714307e-05, "loss": 0.5676, "step": 454 }, { "epoch": 0.31, "learning_rate": 1.9723389932250955e-05, "loss": 0.5593, "step": 455 }, { "epoch": 0.31, "learning_rate": 1.9722151139759232e-05, "loss": 0.5657, "step": 456 }, { "epoch": 0.31, "learning_rate": 1.9720909618586824e-05, "loss": 0.5595, "step": 457 }, { "epoch": 0.31, "learning_rate": 1.971966536908218e-05, "loss": 0.5849, "step": 458 }, { "epoch": 0.31, "learning_rate": 1.9718418391594526e-05, "loss": 0.5468, "step": 459 }, { "epoch": 0.31, "learning_rate": 1.9717168686473845e-05, "loss": 0.6109, "step": 460 }, { "epoch": 0.31, "learning_rate": 1.9715916254070883e-05, "loss": 0.5672, "step": 461 }, { "epoch": 0.31, "learning_rate": 1.971466109473716e-05, "loss": 0.5713, "step": 462 }, { "epoch": 0.31, "learning_rate": 1.9713403208824957e-05, "loss": 0.5531, "step": 463 }, { "epoch": 0.31, "learning_rate": 1.9712142596687314e-05, "loss": 0.6088, "step": 464 }, { "epoch": 0.31, "learning_rate": 1.9710879258678045e-05, "loss": 0.542, "step": 465 }, { "epoch": 0.31, "learning_rate": 1.9709613195151736e-05, "loss": 0.579, "step": 466 }, { "epoch": 0.31, "learning_rate": 1.9708344406463714e-05, "loss": 0.5614, "step": 467 }, { "epoch": 0.32, "learning_rate": 1.9707072892970095e-05, "loss": 0.5719, "step": 468 }, { "epoch": 0.32, "learning_rate": 1.9705798655027747e-05, "loss": 0.592, "step": 469 }, { "epoch": 0.32, "learning_rate": 1.9704521692994305e-05, "loss": 0.5665, "step": 470 }, { "epoch": 0.32, "learning_rate": 1.9703242007228172e-05, "loss": 0.5465, "step": 471 }, { "epoch": 0.32, "learning_rate": 1.970195959808851e-05, "loss": 0.541, "step": 472 }, { "epoch": 0.32, "learning_rate": 1.970067446593525e-05, "loss": 0.5469, "step": 473 }, { "epoch": 0.32, "learning_rate": 1.9699386611129082e-05, "loss": 0.5905, "step": 474 }, { "epoch": 0.32, "learning_rate": 1.969809603403147e-05, "loss": 0.6295, "step": 475 }, { "epoch": 0.32, "learning_rate": 1.9696802735004627e-05, "loss": 0.5631, "step": 476 }, { "epoch": 0.32, "learning_rate": 1.9695506714411543e-05, "loss": 0.5421, "step": 477 }, { "epoch": 0.32, "learning_rate": 1.9694207972615967e-05, "loss": 0.5511, "step": 478 }, { "epoch": 0.32, "learning_rate": 1.9692906509982416e-05, "loss": 0.5908, "step": 479 }, { "epoch": 0.32, "learning_rate": 1.969160232687616e-05, "loss": 0.5496, "step": 480 }, { "epoch": 0.32, "learning_rate": 1.9690295423663244e-05, "loss": 0.5526, "step": 481 }, { "epoch": 0.32, "learning_rate": 1.968898580071047e-05, "loss": 0.592, "step": 482 }, { "epoch": 0.33, "learning_rate": 1.9687673458385406e-05, "loss": 0.5887, "step": 483 }, { "epoch": 0.33, "learning_rate": 1.968635839705638e-05, "loss": 0.5629, "step": 484 }, { "epoch": 0.33, "learning_rate": 1.968504061709249e-05, "loss": 0.5538, "step": 485 }, { "epoch": 0.33, "learning_rate": 1.968372011886359e-05, "loss": 0.5865, "step": 486 }, { "epoch": 0.33, "learning_rate": 1.9682396902740294e-05, "loss": 0.5421, "step": 487 }, { "epoch": 0.33, "learning_rate": 1.968107096909399e-05, "loss": 0.5903, "step": 488 }, { "epoch": 0.33, "learning_rate": 1.9679742318296827e-05, "loss": 0.6114, "step": 489 }, { "epoch": 0.33, "learning_rate": 1.96784109507217e-05, "loss": 0.5873, "step": 490 }, { "epoch": 0.33, "learning_rate": 1.9677076866742295e-05, "loss": 0.5646, "step": 491 }, { "epoch": 0.33, "learning_rate": 1.9675740066733035e-05, "loss": 0.6312, "step": 492 }, { "epoch": 0.33, "learning_rate": 1.967440055106911e-05, "loss": 0.5399, "step": 493 }, { "epoch": 0.33, "learning_rate": 1.967305832012649e-05, "loss": 0.5604, "step": 494 }, { "epoch": 0.33, "learning_rate": 1.9671713374281883e-05, "loss": 0.5732, "step": 495 }, { "epoch": 0.33, "learning_rate": 1.9670365713912776e-05, "loss": 0.5533, "step": 496 }, { "epoch": 0.33, "learning_rate": 1.966901533939741e-05, "loss": 0.5677, "step": 497 }, { "epoch": 0.34, "learning_rate": 1.9667662251114793e-05, "loss": 0.5533, "step": 498 }, { "epoch": 0.34, "learning_rate": 1.9666306449444682e-05, "loss": 0.5834, "step": 499 }, { "epoch": 0.34, "learning_rate": 1.9664947934767614e-05, "loss": 0.587, "step": 500 }, { "epoch": 0.34, "learning_rate": 1.9663586707464876e-05, "loss": 0.5544, "step": 501 }, { "epoch": 0.34, "learning_rate": 1.9662222767918518e-05, "loss": 0.5128, "step": 502 }, { "epoch": 0.34, "learning_rate": 1.9660856116511354e-05, "loss": 0.6291, "step": 503 }, { "epoch": 0.34, "learning_rate": 1.9659486753626954e-05, "loss": 0.6063, "step": 504 }, { "epoch": 0.34, "learning_rate": 1.965811467964965e-05, "loss": 0.5799, "step": 505 }, { "epoch": 0.34, "learning_rate": 1.9656739894964544e-05, "loss": 0.5628, "step": 506 }, { "epoch": 0.34, "learning_rate": 1.9655362399957486e-05, "loss": 0.5554, "step": 507 }, { "epoch": 0.34, "learning_rate": 1.9653982195015095e-05, "loss": 0.5484, "step": 508 }, { "epoch": 0.34, "learning_rate": 1.965259928052475e-05, "loss": 0.5388, "step": 509 }, { "epoch": 0.34, "learning_rate": 1.965121365687458e-05, "loss": 0.575, "step": 510 }, { "epoch": 0.34, "learning_rate": 1.964982532445349e-05, "loss": 0.5393, "step": 511 }, { "epoch": 0.34, "learning_rate": 1.9648434283651144e-05, "loss": 0.5949, "step": 512 }, { "epoch": 0.35, "learning_rate": 1.9647040534857948e-05, "loss": 0.5473, "step": 513 }, { "epoch": 0.35, "learning_rate": 1.9645644078465088e-05, "loss": 0.541, "step": 514 }, { "epoch": 0.35, "learning_rate": 1.9644244914864502e-05, "loss": 0.5781, "step": 515 }, { "epoch": 0.35, "learning_rate": 1.9642843044448886e-05, "loss": 0.642, "step": 516 }, { "epoch": 0.35, "learning_rate": 1.9641438467611696e-05, "loss": 0.5579, "step": 517 }, { "epoch": 0.35, "learning_rate": 1.9640031184747152e-05, "loss": 0.5438, "step": 518 }, { "epoch": 0.35, "learning_rate": 1.9638621196250232e-05, "loss": 0.5216, "step": 519 }, { "epoch": 0.35, "learning_rate": 1.9637208502516673e-05, "loss": 0.5954, "step": 520 }, { "epoch": 0.35, "learning_rate": 1.9635793103942964e-05, "loss": 0.5758, "step": 521 }, { "epoch": 0.35, "learning_rate": 1.9634375000926367e-05, "loss": 0.5904, "step": 522 }, { "epoch": 0.35, "learning_rate": 1.9632954193864894e-05, "loss": 0.5624, "step": 523 }, { "epoch": 0.35, "learning_rate": 1.9631530683157316e-05, "loss": 0.548, "step": 524 }, { "epoch": 0.35, "learning_rate": 1.9630104469203165e-05, "loss": 0.5552, "step": 525 }, { "epoch": 0.35, "learning_rate": 1.9628675552402734e-05, "loss": 0.5761, "step": 526 }, { "epoch": 0.35, "learning_rate": 1.962724393315707e-05, "loss": 0.5475, "step": 527 }, { "epoch": 0.36, "learning_rate": 1.9625809611867977e-05, "loss": 0.5524, "step": 528 }, { "epoch": 0.36, "learning_rate": 1.9624372588938025e-05, "loss": 0.5618, "step": 529 }, { "epoch": 0.36, "learning_rate": 1.9622932864770538e-05, "loss": 0.535, "step": 530 }, { "epoch": 0.36, "learning_rate": 1.9621490439769594e-05, "loss": 0.5327, "step": 531 }, { "epoch": 0.36, "learning_rate": 1.9620045314340037e-05, "loss": 0.5834, "step": 532 }, { "epoch": 0.36, "learning_rate": 1.9618597488887462e-05, "loss": 0.5423, "step": 533 }, { "epoch": 0.36, "learning_rate": 1.9617146963818233e-05, "loss": 0.5831, "step": 534 }, { "epoch": 0.36, "learning_rate": 1.9615693739539452e-05, "loss": 0.5654, "step": 535 }, { "epoch": 0.36, "learning_rate": 1.9614237816459e-05, "loss": 0.5795, "step": 536 }, { "epoch": 0.36, "learning_rate": 1.96127791949855e-05, "loss": 0.5691, "step": 537 }, { "epoch": 0.36, "learning_rate": 1.9611317875528338e-05, "loss": 0.537, "step": 538 }, { "epoch": 0.36, "learning_rate": 1.9609853858497655e-05, "loss": 0.5723, "step": 539 }, { "epoch": 0.36, "learning_rate": 1.9608387144304363e-05, "loss": 0.5443, "step": 540 }, { "epoch": 0.36, "learning_rate": 1.9606917733360106e-05, "loss": 0.5524, "step": 541 }, { "epoch": 0.36, "learning_rate": 1.9605445626077305e-05, "loss": 0.5858, "step": 542 }, { "epoch": 0.37, "learning_rate": 1.9603970822869125e-05, "loss": 0.573, "step": 543 }, { "epoch": 0.37, "learning_rate": 1.9602493324149502e-05, "loss": 0.5726, "step": 544 }, { "epoch": 0.37, "learning_rate": 1.960101313033312e-05, "loss": 0.5576, "step": 545 }, { "epoch": 0.37, "learning_rate": 1.9599530241835407e-05, "loss": 0.5967, "step": 546 }, { "epoch": 0.37, "learning_rate": 1.9598044659072573e-05, "loss": 0.5562, "step": 547 }, { "epoch": 0.37, "learning_rate": 1.9596556382461567e-05, "loss": 0.5736, "step": 548 }, { "epoch": 0.37, "learning_rate": 1.9595065412420097e-05, "loss": 0.5825, "step": 549 }, { "epoch": 0.37, "learning_rate": 1.959357174936663e-05, "loss": 0.5675, "step": 550 }, { "epoch": 0.37, "learning_rate": 1.9592075393720385e-05, "loss": 0.5587, "step": 551 }, { "epoch": 0.37, "learning_rate": 1.959057634590134e-05, "loss": 0.5538, "step": 552 }, { "epoch": 0.37, "learning_rate": 1.9589074606330227e-05, "loss": 0.5589, "step": 553 }, { "epoch": 0.37, "learning_rate": 1.9587570175428533e-05, "loss": 0.5796, "step": 554 }, { "epoch": 0.37, "learning_rate": 1.95860630536185e-05, "loss": 0.5732, "step": 555 }, { "epoch": 0.37, "learning_rate": 1.9584553241323133e-05, "loss": 0.5658, "step": 556 }, { "epoch": 0.38, "learning_rate": 1.9583040738966178e-05, "loss": 0.5565, "step": 557 }, { "epoch": 0.38, "learning_rate": 1.958152554697215e-05, "loss": 0.5791, "step": 558 }, { "epoch": 0.38, "learning_rate": 1.9580007665766306e-05, "loss": 0.6232, "step": 559 }, { "epoch": 0.38, "learning_rate": 1.9578487095774666e-05, "loss": 0.5555, "step": 560 }, { "epoch": 0.38, "learning_rate": 1.9576963837424e-05, "loss": 0.5638, "step": 561 }, { "epoch": 0.38, "learning_rate": 1.9575437891141843e-05, "loss": 0.5563, "step": 562 }, { "epoch": 0.38, "learning_rate": 1.9573909257356474e-05, "loss": 0.5773, "step": 563 }, { "epoch": 0.38, "learning_rate": 1.9572377936496926e-05, "loss": 0.5515, "step": 564 }, { "epoch": 0.38, "learning_rate": 1.957084392899299e-05, "loss": 0.5485, "step": 565 }, { "epoch": 0.38, "learning_rate": 1.9569307235275214e-05, "loss": 0.6222, "step": 566 }, { "epoch": 0.38, "learning_rate": 1.9567767855774892e-05, "loss": 0.5472, "step": 567 }, { "epoch": 0.38, "learning_rate": 1.956622579092408e-05, "loss": 0.6121, "step": 568 }, { "epoch": 0.38, "learning_rate": 1.9564681041155576e-05, "loss": 0.5712, "step": 569 }, { "epoch": 0.38, "learning_rate": 1.956313360690295e-05, "loss": 0.623, "step": 570 }, { "epoch": 0.38, "learning_rate": 1.956158348860051e-05, "loss": 0.5701, "step": 571 }, { "epoch": 0.39, "learning_rate": 1.9560030686683316e-05, "loss": 0.5747, "step": 572 }, { "epoch": 0.39, "learning_rate": 1.9558475201587198e-05, "loss": 0.6337, "step": 573 }, { "epoch": 0.39, "learning_rate": 1.955691703374872e-05, "loss": 0.5592, "step": 574 }, { "epoch": 0.39, "learning_rate": 1.955535618360521e-05, "loss": 0.6027, "step": 575 }, { "epoch": 0.39, "learning_rate": 1.955379265159475e-05, "loss": 0.5717, "step": 576 }, { "epoch": 0.39, "learning_rate": 1.955222643815616e-05, "loss": 0.6055, "step": 577 }, { "epoch": 0.39, "learning_rate": 1.9550657543729038e-05, "loss": 0.5543, "step": 578 }, { "epoch": 0.39, "learning_rate": 1.9549085968753707e-05, "loss": 0.5172, "step": 579 }, { "epoch": 0.39, "learning_rate": 1.9547511713671264e-05, "loss": 0.5725, "step": 580 }, { "epoch": 0.39, "learning_rate": 1.9545934778923545e-05, "loss": 0.5769, "step": 581 }, { "epoch": 0.39, "learning_rate": 1.9544355164953143e-05, "loss": 0.555, "step": 582 }, { "epoch": 0.39, "learning_rate": 1.9542772872203404e-05, "loss": 0.5751, "step": 583 }, { "epoch": 0.39, "learning_rate": 1.9541187901118423e-05, "loss": 0.6036, "step": 584 }, { "epoch": 0.39, "learning_rate": 1.953960025214305e-05, "loss": 0.6035, "step": 585 }, { "epoch": 0.39, "learning_rate": 1.953800992572288e-05, "loss": 0.5255, "step": 586 }, { "epoch": 0.4, "learning_rate": 1.953641692230427e-05, "loss": 0.5611, "step": 587 }, { "epoch": 0.4, "learning_rate": 1.953482124233432e-05, "loss": 0.5736, "step": 588 }, { "epoch": 0.4, "learning_rate": 1.953322288626088e-05, "loss": 0.5818, "step": 589 }, { "epoch": 0.4, "learning_rate": 1.9531621854532562e-05, "loss": 0.5095, "step": 590 }, { "epoch": 0.4, "learning_rate": 1.9530018147598717e-05, "loss": 0.5513, "step": 591 }, { "epoch": 0.4, "learning_rate": 1.9528411765909452e-05, "loss": 0.5914, "step": 592 }, { "epoch": 0.4, "learning_rate": 1.9526802709915623e-05, "loss": 0.5548, "step": 593 }, { "epoch": 0.4, "learning_rate": 1.9525190980068843e-05, "loss": 0.5366, "step": 594 }, { "epoch": 0.4, "learning_rate": 1.9523576576821463e-05, "loss": 0.5367, "step": 595 }, { "epoch": 0.4, "learning_rate": 1.95219595006266e-05, "loss": 0.5409, "step": 596 }, { "epoch": 0.4, "learning_rate": 1.9520339751938103e-05, "loss": 0.5539, "step": 597 }, { "epoch": 0.4, "learning_rate": 1.9518717331210594e-05, "loss": 0.5785, "step": 598 }, { "epoch": 0.4, "learning_rate": 1.951709223889942e-05, "loss": 0.5517, "step": 599 }, { "epoch": 0.4, "learning_rate": 1.9515464475460692e-05, "loss": 0.5972, "step": 600 }, { "epoch": 0.4, "learning_rate": 1.9513834041351277e-05, "loss": 0.5328, "step": 601 }, { "epoch": 0.41, "learning_rate": 1.9512200937028767e-05, "loss": 0.6033, "step": 602 }, { "epoch": 0.41, "learning_rate": 1.9510565162951538e-05, "loss": 0.5525, "step": 603 }, { "epoch": 0.41, "learning_rate": 1.9508926719578683e-05, "loss": 0.5965, "step": 604 }, { "epoch": 0.41, "learning_rate": 1.9507285607370065e-05, "loss": 0.5734, "step": 605 }, { "epoch": 0.41, "learning_rate": 1.9505641826786282e-05, "loss": 0.56, "step": 606 }, { "epoch": 0.41, "learning_rate": 1.9503995378288697e-05, "loss": 0.5585, "step": 607 }, { "epoch": 0.41, "learning_rate": 1.950234626233941e-05, "loss": 0.5904, "step": 608 }, { "epoch": 0.41, "learning_rate": 1.9500694479401266e-05, "loss": 0.5417, "step": 609 }, { "epoch": 0.41, "learning_rate": 1.949904002993787e-05, "loss": 0.5612, "step": 610 }, { "epoch": 0.41, "learning_rate": 1.949738291441357e-05, "loss": 0.5519, "step": 611 }, { "epoch": 0.41, "learning_rate": 1.9495723133293465e-05, "loss": 0.5326, "step": 612 }, { "epoch": 0.41, "learning_rate": 1.9494060687043393e-05, "loss": 0.6025, "step": 613 }, { "epoch": 0.41, "learning_rate": 1.9492395576129953e-05, "loss": 0.5566, "step": 614 }, { "epoch": 0.41, "learning_rate": 1.9490727801020485e-05, "loss": 0.5631, "step": 615 }, { "epoch": 0.41, "learning_rate": 1.9489057362183074e-05, "loss": 0.5973, "step": 616 }, { "epoch": 0.42, "learning_rate": 1.9487384260086557e-05, "loss": 0.5548, "step": 617 }, { "epoch": 0.42, "learning_rate": 1.9485708495200517e-05, "loss": 0.5686, "step": 618 }, { "epoch": 0.42, "learning_rate": 1.948403006799529e-05, "loss": 0.5612, "step": 619 }, { "epoch": 0.42, "learning_rate": 1.9482348978941947e-05, "loss": 0.6014, "step": 620 }, { "epoch": 0.42, "learning_rate": 1.9480665228512314e-05, "loss": 0.5584, "step": 621 }, { "epoch": 0.42, "learning_rate": 1.9478978817178967e-05, "loss": 0.5733, "step": 622 }, { "epoch": 0.42, "learning_rate": 1.9477289745415225e-05, "loss": 0.5554, "step": 623 }, { "epoch": 0.42, "learning_rate": 1.947559801369515e-05, "loss": 0.5889, "step": 624 }, { "epoch": 0.42, "learning_rate": 1.9473903622493554e-05, "loss": 0.517, "step": 625 }, { "epoch": 0.42, "learning_rate": 1.9472206572286003e-05, "loss": 0.5979, "step": 626 }, { "epoch": 0.42, "learning_rate": 1.947050686354879e-05, "loss": 0.5714, "step": 627 }, { "epoch": 0.42, "learning_rate": 1.946880449675898e-05, "loss": 0.5673, "step": 628 }, { "epoch": 0.42, "learning_rate": 1.9467099472394357e-05, "loss": 0.5075, "step": 629 }, { "epoch": 0.42, "learning_rate": 1.946539179093347e-05, "loss": 0.5806, "step": 630 }, { "epoch": 0.42, "learning_rate": 1.9463681452855607e-05, "loss": 0.5948, "step": 631 }, { "epoch": 0.43, "learning_rate": 1.9461968458640802e-05, "loss": 0.5642, "step": 632 }, { "epoch": 0.43, "learning_rate": 1.9460252808769836e-05, "loss": 0.5767, "step": 633 }, { "epoch": 0.43, "learning_rate": 1.9458534503724236e-05, "loss": 0.5629, "step": 634 }, { "epoch": 0.43, "learning_rate": 1.945681354398627e-05, "loss": 0.5585, "step": 635 }, { "epoch": 0.43, "learning_rate": 1.945508993003895e-05, "loss": 0.5811, "step": 636 }, { "epoch": 0.43, "learning_rate": 1.945336366236604e-05, "loss": 0.5724, "step": 637 }, { "epoch": 0.43, "learning_rate": 1.9451634741452047e-05, "loss": 0.5595, "step": 638 }, { "epoch": 0.43, "learning_rate": 1.944990316778222e-05, "loss": 0.5611, "step": 639 }, { "epoch": 0.43, "learning_rate": 1.944816894184255e-05, "loss": 0.5891, "step": 640 }, { "epoch": 0.43, "learning_rate": 1.944643206411978e-05, "loss": 0.5402, "step": 641 }, { "epoch": 0.43, "learning_rate": 1.944469253510139e-05, "loss": 0.5984, "step": 642 }, { "epoch": 0.43, "learning_rate": 1.9442950355275613e-05, "loss": 0.5142, "step": 643 }, { "epoch": 0.43, "learning_rate": 1.944120552513141e-05, "loss": 0.5582, "step": 644 }, { "epoch": 0.43, "learning_rate": 1.943945804515851e-05, "loss": 0.5764, "step": 645 }, { "epoch": 0.43, "learning_rate": 1.9437707915847355e-05, "loss": 0.558, "step": 646 }, { "epoch": 0.44, "learning_rate": 1.943595513768916e-05, "loss": 0.5538, "step": 647 }, { "epoch": 0.44, "learning_rate": 1.943419971117587e-05, "loss": 0.5488, "step": 648 }, { "epoch": 0.44, "learning_rate": 1.943244163680017e-05, "loss": 0.5556, "step": 649 }, { "epoch": 0.44, "learning_rate": 1.9430680915055492e-05, "loss": 0.5655, "step": 650 }, { "epoch": 0.44, "learning_rate": 1.9428917546436014e-05, "loss": 0.5852, "step": 651 }, { "epoch": 0.44, "learning_rate": 1.9427151531436652e-05, "loss": 0.5719, "step": 652 }, { "epoch": 0.44, "learning_rate": 1.942538287055307e-05, "loss": 0.5358, "step": 653 }, { "epoch": 0.44, "learning_rate": 1.942361156428167e-05, "loss": 0.5587, "step": 654 }, { "epoch": 0.44, "learning_rate": 1.9421837613119597e-05, "loss": 0.5437, "step": 655 }, { "epoch": 0.44, "learning_rate": 1.9420061017564743e-05, "loss": 0.5874, "step": 656 }, { "epoch": 0.44, "learning_rate": 1.941828177811573e-05, "loss": 0.5545, "step": 657 }, { "epoch": 0.44, "learning_rate": 1.941649989527194e-05, "loss": 0.5799, "step": 658 }, { "epoch": 0.44, "learning_rate": 1.9414715369533485e-05, "loss": 0.5957, "step": 659 }, { "epoch": 0.44, "learning_rate": 1.941292820140122e-05, "loss": 0.5477, "step": 660 }, { "epoch": 0.45, "learning_rate": 1.9411138391376742e-05, "loss": 0.5512, "step": 661 }, { "epoch": 0.45, "learning_rate": 1.9409345939962393e-05, "loss": 0.541, "step": 662 }, { "epoch": 0.45, "learning_rate": 1.9407550847661256e-05, "loss": 0.5706, "step": 663 }, { "epoch": 0.45, "learning_rate": 1.9405753114977145e-05, "loss": 0.5745, "step": 664 }, { "epoch": 0.45, "learning_rate": 1.940395274241463e-05, "loss": 0.6005, "step": 665 }, { "epoch": 0.45, "learning_rate": 1.940214973047901e-05, "loss": 0.5466, "step": 666 }, { "epoch": 0.45, "learning_rate": 1.9400344079676335e-05, "loss": 0.562, "step": 667 }, { "epoch": 0.45, "learning_rate": 1.939853579051339e-05, "loss": 0.5724, "step": 668 }, { "epoch": 0.45, "learning_rate": 1.9396724863497695e-05, "loss": 0.5713, "step": 669 }, { "epoch": 0.45, "learning_rate": 1.9394911299137522e-05, "loss": 0.6093, "step": 670 }, { "epoch": 0.45, "learning_rate": 1.9393095097941873e-05, "loss": 0.5525, "step": 671 }, { "epoch": 0.45, "learning_rate": 1.93912762604205e-05, "loss": 0.5629, "step": 672 }, { "epoch": 0.45, "learning_rate": 1.9389454787083884e-05, "loss": 0.604, "step": 673 }, { "epoch": 0.45, "learning_rate": 1.938763067844326e-05, "loss": 0.5516, "step": 674 }, { "epoch": 0.45, "learning_rate": 1.938580393501058e-05, "loss": 0.5371, "step": 675 }, { "epoch": 0.46, "learning_rate": 1.9383974557298563e-05, "loss": 0.557, "step": 676 }, { "epoch": 0.46, "learning_rate": 1.938214254582065e-05, "loss": 0.5684, "step": 677 }, { "epoch": 0.46, "learning_rate": 1.9380307901091014e-05, "loss": 0.5569, "step": 678 }, { "epoch": 0.46, "learning_rate": 1.9378470623624594e-05, "loss": 0.551, "step": 679 }, { "epoch": 0.46, "learning_rate": 1.9376630713937043e-05, "loss": 0.6101, "step": 680 }, { "epoch": 0.46, "learning_rate": 1.9374788172544765e-05, "loss": 0.569, "step": 681 }, { "epoch": 0.46, "learning_rate": 1.9372942999964895e-05, "loss": 0.5669, "step": 682 }, { "epoch": 0.46, "learning_rate": 1.9371095196715316e-05, "loss": 0.5921, "step": 683 }, { "epoch": 0.46, "learning_rate": 1.9369244763314645e-05, "loss": 0.5578, "step": 684 }, { "epoch": 0.46, "learning_rate": 1.9367391700282228e-05, "loss": 0.5318, "step": 685 }, { "epoch": 0.46, "learning_rate": 1.9365536008138172e-05, "loss": 0.557, "step": 686 }, { "epoch": 0.46, "learning_rate": 1.936367768740329e-05, "loss": 0.539, "step": 687 }, { "epoch": 0.46, "learning_rate": 1.936181673859916e-05, "loss": 0.6057, "step": 688 }, { "epoch": 0.46, "learning_rate": 1.935995316224809e-05, "loss": 0.5804, "step": 689 }, { "epoch": 0.46, "learning_rate": 1.9358086958873116e-05, "loss": 0.5773, "step": 690 }, { "epoch": 0.47, "learning_rate": 1.935621812899802e-05, "loss": 0.5493, "step": 691 }, { "epoch": 0.47, "learning_rate": 1.9354346673147323e-05, "loss": 0.5728, "step": 692 }, { "epoch": 0.47, "learning_rate": 1.9352472591846282e-05, "loss": 0.5789, "step": 693 }, { "epoch": 0.47, "learning_rate": 1.935059588562088e-05, "loss": 0.5603, "step": 694 }, { "epoch": 0.47, "learning_rate": 1.9348716554997854e-05, "loss": 0.5734, "step": 695 }, { "epoch": 0.47, "learning_rate": 1.9346834600504664e-05, "loss": 0.5711, "step": 696 }, { "epoch": 0.47, "learning_rate": 1.9344950022669507e-05, "loss": 0.5498, "step": 697 }, { "epoch": 0.47, "learning_rate": 1.9343062822021332e-05, "loss": 0.5745, "step": 698 }, { "epoch": 0.47, "learning_rate": 1.9341172999089805e-05, "loss": 0.5516, "step": 699 }, { "epoch": 0.47, "learning_rate": 1.9339280554405336e-05, "loss": 0.5672, "step": 700 }, { "epoch": 0.47, "learning_rate": 1.9337385488499074e-05, "loss": 0.5404, "step": 701 }, { "epoch": 0.47, "learning_rate": 1.9335487801902896e-05, "loss": 0.5741, "step": 702 }, { "epoch": 0.47, "learning_rate": 1.9333587495149418e-05, "loss": 0.5714, "step": 703 }, { "epoch": 0.47, "learning_rate": 1.9331684568772e-05, "loss": 0.5669, "step": 704 }, { "epoch": 0.47, "learning_rate": 1.9329779023304724e-05, "loss": 0.5737, "step": 705 }, { "epoch": 0.48, "learning_rate": 1.932787085928241e-05, "loss": 0.5828, "step": 706 }, { "epoch": 0.48, "learning_rate": 1.932596007724062e-05, "loss": 0.5557, "step": 707 }, { "epoch": 0.48, "learning_rate": 1.9324046677715644e-05, "loss": 0.5757, "step": 708 }, { "epoch": 0.48, "learning_rate": 1.9322130661244508e-05, "loss": 0.6158, "step": 709 }, { "epoch": 0.48, "learning_rate": 1.9320212028364976e-05, "loss": 0.531, "step": 710 }, { "epoch": 0.48, "learning_rate": 1.9318290779615545e-05, "loss": 0.5411, "step": 711 }, { "epoch": 0.48, "learning_rate": 1.931636691553544e-05, "loss": 0.5593, "step": 712 }, { "epoch": 0.48, "learning_rate": 1.9314440436664626e-05, "loss": 0.5658, "step": 713 }, { "epoch": 0.48, "learning_rate": 1.9312511343543802e-05, "loss": 0.5591, "step": 714 }, { "epoch": 0.48, "learning_rate": 1.9310579636714402e-05, "loss": 0.5617, "step": 715 }, { "epoch": 0.48, "learning_rate": 1.930864531671859e-05, "loss": 0.5429, "step": 716 }, { "epoch": 0.48, "learning_rate": 1.9306708384099258e-05, "loss": 0.5369, "step": 717 }, { "epoch": 0.48, "learning_rate": 1.9304768839400046e-05, "loss": 0.5455, "step": 718 }, { "epoch": 0.48, "learning_rate": 1.9302826683165315e-05, "loss": 0.5672, "step": 719 }, { "epoch": 0.48, "learning_rate": 1.9300881915940163e-05, "loss": 0.5997, "step": 720 }, { "epoch": 0.49, "learning_rate": 1.9298934538270423e-05, "loss": 0.5621, "step": 721 }, { "epoch": 0.49, "learning_rate": 1.9296984550702656e-05, "loss": 0.5844, "step": 722 }, { "epoch": 0.49, "learning_rate": 1.929503195378416e-05, "loss": 0.6057, "step": 723 }, { "epoch": 0.49, "learning_rate": 1.929307674806296e-05, "loss": 0.5534, "step": 724 }, { "epoch": 0.49, "learning_rate": 1.929111893408782e-05, "loss": 0.5135, "step": 725 }, { "epoch": 0.49, "learning_rate": 1.928915851240823e-05, "loss": 0.5553, "step": 726 }, { "epoch": 0.49, "learning_rate": 1.928719548357442e-05, "loss": 0.5783, "step": 727 }, { "epoch": 0.49, "learning_rate": 1.9285229848137335e-05, "loss": 0.5537, "step": 728 }, { "epoch": 0.49, "learning_rate": 1.9283261606648672e-05, "loss": 0.5447, "step": 729 }, { "epoch": 0.49, "learning_rate": 1.928129075966085e-05, "loss": 0.5886, "step": 730 }, { "epoch": 0.49, "learning_rate": 1.9279317307727012e-05, "loss": 0.5832, "step": 731 }, { "epoch": 0.49, "learning_rate": 1.9277341251401047e-05, "loss": 0.5591, "step": 732 }, { "epoch": 0.49, "learning_rate": 1.9275362591237564e-05, "loss": 0.5592, "step": 733 }, { "epoch": 0.49, "learning_rate": 1.927338132779191e-05, "loss": 0.536, "step": 734 }, { "epoch": 0.49, "learning_rate": 1.9271397461620154e-05, "loss": 0.5591, "step": 735 }, { "epoch": 0.5, "learning_rate": 1.9269410993279104e-05, "loss": 0.5325, "step": 736 }, { "epoch": 0.5, "learning_rate": 1.926742192332629e-05, "loss": 0.5783, "step": 737 }, { "epoch": 0.5, "learning_rate": 1.9265430252319984e-05, "loss": 0.5576, "step": 738 }, { "epoch": 0.5, "learning_rate": 1.9263435980819177e-05, "loss": 0.6244, "step": 739 }, { "epoch": 0.5, "learning_rate": 1.9261439109383594e-05, "loss": 0.5723, "step": 740 }, { "epoch": 0.5, "learning_rate": 1.9259439638573688e-05, "loss": 0.5856, "step": 741 }, { "epoch": 0.5, "learning_rate": 1.925743756895065e-05, "loss": 0.5135, "step": 742 }, { "epoch": 0.5, "learning_rate": 1.9255432901076386e-05, "loss": 0.5601, "step": 743 }, { "epoch": 0.5, "learning_rate": 1.9253425635513543e-05, "loss": 0.5737, "step": 744 }, { "epoch": 0.5, "learning_rate": 1.925141577282549e-05, "loss": 0.5594, "step": 745 }, { "epoch": 0.5, "learning_rate": 1.9249403313576328e-05, "loss": 0.5622, "step": 746 }, { "epoch": 0.5, "learning_rate": 1.9247388258330892e-05, "loss": 0.5669, "step": 747 }, { "epoch": 0.5, "learning_rate": 1.9245370607654733e-05, "loss": 0.5544, "step": 748 }, { "epoch": 0.5, "learning_rate": 1.9243350362114143e-05, "loss": 0.5297, "step": 749 }, { "epoch": 0.51, "learning_rate": 1.9241327522276133e-05, "loss": 0.5723, "step": 750 }, { "epoch": 0.51, "learning_rate": 1.9239302088708452e-05, "loss": 0.5341, "step": 751 }, { "epoch": 0.51, "learning_rate": 1.9237274061979564e-05, "loss": 0.6035, "step": 752 }, { "epoch": 0.51, "learning_rate": 1.923524344265867e-05, "loss": 0.5262, "step": 753 }, { "epoch": 0.51, "learning_rate": 1.9233210231315697e-05, "loss": 0.5501, "step": 754 }, { "epoch": 0.51, "learning_rate": 1.92311744285213e-05, "loss": 0.5783, "step": 755 }, { "epoch": 0.51, "learning_rate": 1.9229136034846858e-05, "loss": 0.5909, "step": 756 }, { "epoch": 0.51, "learning_rate": 1.9227095050864485e-05, "loss": 0.5896, "step": 757 }, { "epoch": 0.51, "learning_rate": 1.922505147714701e-05, "loss": 0.5653, "step": 758 }, { "epoch": 0.51, "learning_rate": 1.9223005314268e-05, "loss": 0.5263, "step": 759 }, { "epoch": 0.51, "learning_rate": 1.922095656280174e-05, "loss": 0.5538, "step": 760 }, { "epoch": 0.51, "learning_rate": 1.921890522332325e-05, "loss": 0.5532, "step": 761 }, { "epoch": 0.51, "learning_rate": 1.921685129640827e-05, "loss": 0.5551, "step": 762 }, { "epoch": 0.51, "learning_rate": 1.9214794782633264e-05, "loss": 0.5391, "step": 763 }, { "epoch": 0.51, "learning_rate": 1.921273568257543e-05, "loss": 0.5658, "step": 764 }, { "epoch": 0.52, "learning_rate": 1.9210673996812694e-05, "loss": 0.5353, "step": 765 }, { "epoch": 0.52, "learning_rate": 1.920860972592369e-05, "loss": 0.549, "step": 766 }, { "epoch": 0.52, "learning_rate": 1.92065428704878e-05, "loss": 0.5588, "step": 767 }, { "epoch": 0.52, "learning_rate": 1.9204473431085114e-05, "loss": 0.5849, "step": 768 }, { "epoch": 0.52, "learning_rate": 1.9202401408296457e-05, "loss": 0.5869, "step": 769 }, { "epoch": 0.52, "learning_rate": 1.9200326802703374e-05, "loss": 0.5734, "step": 770 }, { "epoch": 0.52, "learning_rate": 1.9198249614888138e-05, "loss": 0.5652, "step": 771 }, { "epoch": 0.52, "learning_rate": 1.919616984543375e-05, "loss": 0.6093, "step": 772 }, { "epoch": 0.52, "learning_rate": 1.919408749492392e-05, "loss": 0.562, "step": 773 }, { "epoch": 0.52, "learning_rate": 1.919200256394311e-05, "loss": 0.5512, "step": 774 }, { "epoch": 0.52, "learning_rate": 1.9189915053076472e-05, "loss": 0.5685, "step": 775 }, { "epoch": 0.52, "learning_rate": 1.9187824962909912e-05, "loss": 0.513, "step": 776 }, { "epoch": 0.52, "learning_rate": 1.918573229403005e-05, "loss": 0.5912, "step": 777 }, { "epoch": 0.52, "learning_rate": 1.9183637047024218e-05, "loss": 0.5767, "step": 778 }, { "epoch": 0.52, "learning_rate": 1.9181539222480484e-05, "loss": 0.5945, "step": 779 }, { "epoch": 0.53, "learning_rate": 1.9179438820987645e-05, "loss": 0.5604, "step": 780 }, { "epoch": 0.53, "learning_rate": 1.9177335843135202e-05, "loss": 0.5295, "step": 781 }, { "epoch": 0.53, "learning_rate": 1.91752302895134e-05, "loss": 0.5536, "step": 782 }, { "epoch": 0.53, "learning_rate": 1.9173122160713187e-05, "loss": 0.549, "step": 783 }, { "epoch": 0.53, "learning_rate": 1.917101145732625e-05, "loss": 0.5432, "step": 784 }, { "epoch": 0.53, "learning_rate": 1.9168898179944994e-05, "loss": 0.5829, "step": 785 }, { "epoch": 0.53, "learning_rate": 1.9166782329162538e-05, "loss": 0.5574, "step": 786 }, { "epoch": 0.53, "learning_rate": 1.9164663905572734e-05, "loss": 0.5803, "step": 787 }, { "epoch": 0.53, "learning_rate": 1.9162542909770153e-05, "loss": 0.5806, "step": 788 }, { "epoch": 0.53, "learning_rate": 1.916041934235009e-05, "loss": 0.5486, "step": 789 }, { "epoch": 0.53, "learning_rate": 1.9158293203908552e-05, "loss": 0.5411, "step": 790 }, { "epoch": 0.53, "learning_rate": 1.9156164495042283e-05, "loss": 0.5524, "step": 791 }, { "epoch": 0.53, "learning_rate": 1.9154033216348732e-05, "loss": 0.5782, "step": 792 }, { "epoch": 0.53, "learning_rate": 1.915189936842608e-05, "loss": 0.5995, "step": 793 }, { "epoch": 0.53, "learning_rate": 1.9149762951873224e-05, "loss": 0.5672, "step": 794 }, { "epoch": 0.54, "learning_rate": 1.914762396728979e-05, "loss": 0.5396, "step": 795 }, { "epoch": 0.54, "learning_rate": 1.9145482415276113e-05, "loss": 0.5665, "step": 796 }, { "epoch": 0.54, "learning_rate": 1.9143338296433262e-05, "loss": 0.5891, "step": 797 }, { "epoch": 0.54, "learning_rate": 1.914119161136301e-05, "loss": 0.5535, "step": 798 }, { "epoch": 0.54, "learning_rate": 1.9139042360667865e-05, "loss": 0.5399, "step": 799 }, { "epoch": 0.54, "learning_rate": 1.9136890544951046e-05, "loss": 0.555, "step": 800 }, { "epoch": 0.54, "learning_rate": 1.91347361648165e-05, "loss": 0.5976, "step": 801 }, { "epoch": 0.54, "learning_rate": 1.913257922086889e-05, "loss": 0.5292, "step": 802 }, { "epoch": 0.54, "learning_rate": 1.9130419713713588e-05, "loss": 0.5547, "step": 803 }, { "epoch": 0.54, "learning_rate": 1.9128257643956703e-05, "loss": 0.5403, "step": 804 }, { "epoch": 0.54, "learning_rate": 1.912609301220505e-05, "loss": 0.5898, "step": 805 }, { "epoch": 0.54, "learning_rate": 1.9123925819066172e-05, "loss": 0.5698, "step": 806 }, { "epoch": 0.54, "learning_rate": 1.9121756065148333e-05, "loss": 0.6113, "step": 807 }, { "epoch": 0.54, "learning_rate": 1.9119583751060495e-05, "loss": 0.5581, "step": 808 }, { "epoch": 0.54, "learning_rate": 1.9117408877412366e-05, "loss": 0.5537, "step": 809 }, { "epoch": 0.55, "learning_rate": 1.9115231444814356e-05, "loss": 0.5646, "step": 810 }, { "epoch": 0.55, "learning_rate": 1.9113051453877595e-05, "loss": 0.5092, "step": 811 }, { "epoch": 0.55, "learning_rate": 1.911086890521394e-05, "loss": 0.5775, "step": 812 }, { "epoch": 0.55, "learning_rate": 1.910868379943595e-05, "loss": 0.5368, "step": 813 }, { "epoch": 0.55, "learning_rate": 1.910649613715691e-05, "loss": 0.5578, "step": 814 }, { "epoch": 0.55, "learning_rate": 1.9104305918990832e-05, "loss": 0.5948, "step": 815 }, { "epoch": 0.55, "learning_rate": 1.9102113145552434e-05, "loss": 0.5245, "step": 816 }, { "epoch": 0.55, "learning_rate": 1.9099917817457152e-05, "loss": 0.5549, "step": 817 }, { "epoch": 0.55, "learning_rate": 1.9097719935321137e-05, "loss": 0.5862, "step": 818 }, { "epoch": 0.55, "learning_rate": 1.909551949976127e-05, "loss": 0.5024, "step": 819 }, { "epoch": 0.55, "learning_rate": 1.9093316511395128e-05, "loss": 0.5631, "step": 820 }, { "epoch": 0.55, "learning_rate": 1.9091110970841024e-05, "loss": 0.5404, "step": 821 }, { "epoch": 0.55, "learning_rate": 1.9088902878717978e-05, "loss": 0.5944, "step": 822 }, { "epoch": 0.55, "learning_rate": 1.908669223564572e-05, "loss": 0.5525, "step": 823 }, { "epoch": 0.55, "learning_rate": 1.9084479042244714e-05, "loss": 0.6176, "step": 824 }, { "epoch": 0.56, "learning_rate": 1.908226329913612e-05, "loss": 0.5513, "step": 825 }, { "epoch": 0.56, "learning_rate": 1.9080045006941828e-05, "loss": 0.5757, "step": 826 }, { "epoch": 0.56, "learning_rate": 1.9077824166284434e-05, "loss": 0.579, "step": 827 }, { "epoch": 0.56, "learning_rate": 1.9075600777787256e-05, "loss": 0.5777, "step": 828 }, { "epoch": 0.56, "learning_rate": 1.9073374842074323e-05, "loss": 0.5759, "step": 829 }, { "epoch": 0.56, "learning_rate": 1.9071146359770384e-05, "loss": 0.5795, "step": 830 }, { "epoch": 0.56, "learning_rate": 1.906891533150089e-05, "loss": 0.5528, "step": 831 }, { "epoch": 0.56, "learning_rate": 1.9066681757892026e-05, "loss": 0.5879, "step": 832 }, { "epoch": 0.56, "learning_rate": 1.9064445639570675e-05, "loss": 0.6238, "step": 833 }, { "epoch": 0.56, "learning_rate": 1.906220697716444e-05, "loss": 0.5294, "step": 834 }, { "epoch": 0.56, "learning_rate": 1.9059965771301644e-05, "loss": 0.5764, "step": 835 }, { "epoch": 0.56, "learning_rate": 1.905772202261131e-05, "loss": 0.5574, "step": 836 }, { "epoch": 0.56, "learning_rate": 1.9055475731723187e-05, "loss": 0.5675, "step": 837 }, { "epoch": 0.56, "learning_rate": 1.905322689926773e-05, "loss": 0.5536, "step": 838 }, { "epoch": 0.56, "learning_rate": 1.905097552587612e-05, "loss": 0.5418, "step": 839 }, { "epoch": 0.57, "learning_rate": 1.9048721612180232e-05, "loss": 0.5389, "step": 840 }, { "epoch": 0.57, "learning_rate": 1.9046465158812666e-05, "loss": 0.5604, "step": 841 }, { "epoch": 0.57, "learning_rate": 1.9044206166406734e-05, "loss": 0.5755, "step": 842 }, { "epoch": 0.57, "learning_rate": 1.904194463559646e-05, "loss": 0.5955, "step": 843 }, { "epoch": 0.57, "learning_rate": 1.9039680567016576e-05, "loss": 0.5437, "step": 844 }, { "epoch": 0.57, "learning_rate": 1.9037413961302534e-05, "loss": 0.5514, "step": 845 }, { "epoch": 0.57, "learning_rate": 1.903514481909049e-05, "loss": 0.544, "step": 846 }, { "epoch": 0.57, "learning_rate": 1.9032873141017316e-05, "loss": 0.5387, "step": 847 }, { "epoch": 0.57, "learning_rate": 1.9030598927720603e-05, "loss": 0.5393, "step": 848 }, { "epoch": 0.57, "learning_rate": 1.9028322179838636e-05, "loss": 0.518, "step": 849 }, { "epoch": 0.57, "learning_rate": 1.9026042898010428e-05, "loss": 0.5585, "step": 850 }, { "epoch": 0.57, "learning_rate": 1.9023761082875695e-05, "loss": 0.6034, "step": 851 }, { "epoch": 0.57, "learning_rate": 1.9021476735074865e-05, "loss": 0.5387, "step": 852 }, { "epoch": 0.57, "learning_rate": 1.9019189855249078e-05, "loss": 0.5533, "step": 853 }, { "epoch": 0.58, "learning_rate": 1.9016900444040184e-05, "loss": 0.5328, "step": 854 }, { "epoch": 0.58, "learning_rate": 1.9014608502090744e-05, "loss": 0.5882, "step": 855 }, { "epoch": 0.58, "learning_rate": 1.901231403004403e-05, "loss": 0.5415, "step": 856 }, { "epoch": 0.58, "learning_rate": 1.9010017028544023e-05, "loss": 0.6163, "step": 857 }, { "epoch": 0.58, "learning_rate": 1.9007717498235412e-05, "loss": 0.5451, "step": 858 }, { "epoch": 0.58, "learning_rate": 1.90054154397636e-05, "loss": 0.5427, "step": 859 }, { "epoch": 0.58, "learning_rate": 1.9003110853774694e-05, "loss": 0.6107, "step": 860 }, { "epoch": 0.58, "learning_rate": 1.9000803740915515e-05, "loss": 0.5687, "step": 861 }, { "epoch": 0.58, "learning_rate": 1.89984941018336e-05, "loss": 0.5573, "step": 862 }, { "epoch": 0.58, "learning_rate": 1.8996181937177173e-05, "loss": 0.5551, "step": 863 }, { "epoch": 0.58, "learning_rate": 1.899386724759519e-05, "loss": 0.5538, "step": 864 }, { "epoch": 0.58, "learning_rate": 1.89915500337373e-05, "loss": 0.5824, "step": 865 }, { "epoch": 0.58, "learning_rate": 1.898923029625388e-05, "loss": 0.6187, "step": 866 }, { "epoch": 0.58, "learning_rate": 1.8986908035795986e-05, "loss": 0.6308, "step": 867 }, { "epoch": 0.58, "learning_rate": 1.898458325301541e-05, "loss": 0.5597, "step": 868 }, { "epoch": 0.59, "learning_rate": 1.8982255948564635e-05, "loss": 0.5874, "step": 869 }, { "epoch": 0.59, "learning_rate": 1.8979926123096858e-05, "loss": 0.5634, "step": 870 }, { "epoch": 0.59, "learning_rate": 1.8977593777265978e-05, "loss": 0.5411, "step": 871 }, { "epoch": 0.59, "learning_rate": 1.8975258911726616e-05, "loss": 0.5591, "step": 872 }, { "epoch": 0.59, "learning_rate": 1.8972921527134085e-05, "loss": 0.5356, "step": 873 }, { "epoch": 0.59, "learning_rate": 1.8970581624144412e-05, "loss": 0.5493, "step": 874 }, { "epoch": 0.59, "learning_rate": 1.896823920341432e-05, "loss": 0.5647, "step": 875 }, { "epoch": 0.59, "learning_rate": 1.8965894265601263e-05, "loss": 0.6083, "step": 876 }, { "epoch": 0.59, "learning_rate": 1.8963546811363375e-05, "loss": 0.5754, "step": 877 }, { "epoch": 0.59, "learning_rate": 1.8961196841359507e-05, "loss": 0.5571, "step": 878 }, { "epoch": 0.59, "learning_rate": 1.8958844356249223e-05, "loss": 0.5892, "step": 879 }, { "epoch": 0.59, "learning_rate": 1.895648935669278e-05, "loss": 0.5377, "step": 880 }, { "epoch": 0.59, "learning_rate": 1.895413184335116e-05, "loss": 0.5311, "step": 881 }, { "epoch": 0.59, "learning_rate": 1.895177181688602e-05, "loss": 0.524, "step": 882 }, { "epoch": 0.59, "learning_rate": 1.894940927795975e-05, "loss": 0.5935, "step": 883 }, { "epoch": 0.6, "learning_rate": 1.894704422723543e-05, "loss": 0.5717, "step": 884 }, { "epoch": 0.6, "learning_rate": 1.8944676665376858e-05, "loss": 0.5819, "step": 885 }, { "epoch": 0.6, "learning_rate": 1.8942306593048522e-05, "loss": 0.5643, "step": 886 }, { "epoch": 0.6, "learning_rate": 1.8939934010915627e-05, "loss": 0.5349, "step": 887 }, { "epoch": 0.6, "learning_rate": 1.893755891964407e-05, "loss": 0.5445, "step": 888 }, { "epoch": 0.6, "learning_rate": 1.8935181319900467e-05, "loss": 0.526, "step": 889 }, { "epoch": 0.6, "learning_rate": 1.8932801212352124e-05, "loss": 0.5165, "step": 890 }, { "epoch": 0.6, "learning_rate": 1.893041859766706e-05, "loss": 0.5332, "step": 891 }, { "epoch": 0.6, "learning_rate": 1.8928033476513996e-05, "loss": 0.5564, "step": 892 }, { "epoch": 0.6, "learning_rate": 1.892564584956235e-05, "loss": 0.5446, "step": 893 }, { "epoch": 0.6, "learning_rate": 1.8923255717482256e-05, "loss": 0.5496, "step": 894 }, { "epoch": 0.6, "learning_rate": 1.8920863080944534e-05, "loss": 0.5787, "step": 895 }, { "epoch": 0.6, "learning_rate": 1.8918467940620728e-05, "loss": 0.5992, "step": 896 }, { "epoch": 0.6, "learning_rate": 1.8916070297183066e-05, "loss": 0.5864, "step": 897 }, { "epoch": 0.6, "learning_rate": 1.891367015130449e-05, "loss": 0.5299, "step": 898 }, { "epoch": 0.61, "learning_rate": 1.891126750365863e-05, "loss": 0.5238, "step": 899 }, { "epoch": 0.61, "learning_rate": 1.8908862354919843e-05, "loss": 0.5545, "step": 900 }, { "epoch": 0.61, "learning_rate": 1.8906454705763163e-05, "loss": 0.5887, "step": 901 }, { "epoch": 0.61, "learning_rate": 1.8904044556864343e-05, "loss": 0.5461, "step": 902 }, { "epoch": 0.61, "learning_rate": 1.8901631908899824e-05, "loss": 0.5636, "step": 903 }, { "epoch": 0.61, "learning_rate": 1.8899216762546758e-05, "loss": 0.5595, "step": 904 }, { "epoch": 0.61, "learning_rate": 1.8896799118482995e-05, "loss": 0.5476, "step": 905 }, { "epoch": 0.61, "learning_rate": 1.889437897738709e-05, "loss": 0.5804, "step": 906 }, { "epoch": 0.61, "learning_rate": 1.8891956339938287e-05, "loss": 0.5566, "step": 907 }, { "epoch": 0.61, "learning_rate": 1.8889531206816546e-05, "loss": 0.5321, "step": 908 }, { "epoch": 0.61, "learning_rate": 1.888710357870252e-05, "loss": 0.5781, "step": 909 }, { "epoch": 0.61, "learning_rate": 1.888467345627756e-05, "loss": 0.5637, "step": 910 }, { "epoch": 0.61, "learning_rate": 1.888224084022372e-05, "loss": 0.5695, "step": 911 }, { "epoch": 0.61, "learning_rate": 1.8879805731223752e-05, "loss": 0.5982, "step": 912 }, { "epoch": 0.61, "learning_rate": 1.8877368129961115e-05, "loss": 0.5444, "step": 913 }, { "epoch": 0.62, "learning_rate": 1.8874928037119953e-05, "loss": 0.5021, "step": 914 }, { "epoch": 0.62, "learning_rate": 1.8872485453385124e-05, "loss": 0.5272, "step": 915 }, { "epoch": 0.62, "learning_rate": 1.8870040379442175e-05, "loss": 0.5637, "step": 916 }, { "epoch": 0.62, "learning_rate": 1.8867592815977362e-05, "loss": 0.5697, "step": 917 }, { "epoch": 0.62, "learning_rate": 1.886514276367763e-05, "loss": 0.5661, "step": 918 }, { "epoch": 0.62, "learning_rate": 1.8862690223230625e-05, "loss": 0.5432, "step": 919 }, { "epoch": 0.62, "learning_rate": 1.8860235195324695e-05, "loss": 0.5486, "step": 920 }, { "epoch": 0.62, "learning_rate": 1.8857777680648884e-05, "loss": 0.5475, "step": 921 }, { "epoch": 0.62, "learning_rate": 1.885531767989293e-05, "loss": 0.5502, "step": 922 }, { "epoch": 0.62, "learning_rate": 1.8852855193747274e-05, "loss": 0.536, "step": 923 }, { "epoch": 0.62, "learning_rate": 1.8850390222903057e-05, "loss": 0.5224, "step": 924 }, { "epoch": 0.62, "learning_rate": 1.8847922768052105e-05, "loss": 0.5563, "step": 925 }, { "epoch": 0.62, "learning_rate": 1.8845452829886957e-05, "loss": 0.5282, "step": 926 }, { "epoch": 0.62, "learning_rate": 1.8842980409100845e-05, "loss": 0.5655, "step": 927 }, { "epoch": 0.62, "learning_rate": 1.8840505506387684e-05, "loss": 0.5653, "step": 928 }, { "epoch": 0.63, "learning_rate": 1.88380281224421e-05, "loss": 0.5747, "step": 929 }, { "epoch": 0.63, "learning_rate": 1.8835548257959413e-05, "loss": 0.5567, "step": 930 }, { "epoch": 0.63, "learning_rate": 1.8833065913635634e-05, "loss": 0.5576, "step": 931 }, { "epoch": 0.63, "learning_rate": 1.8830581090167477e-05, "loss": 0.5928, "step": 932 }, { "epoch": 0.63, "learning_rate": 1.8828093788252348e-05, "loss": 0.6024, "step": 933 }, { "epoch": 0.63, "learning_rate": 1.882560400858835e-05, "loss": 0.5754, "step": 934 }, { "epoch": 0.63, "learning_rate": 1.8823111751874277e-05, "loss": 0.6022, "step": 935 }, { "epoch": 0.63, "learning_rate": 1.882061701880962e-05, "loss": 0.5968, "step": 936 }, { "epoch": 0.63, "learning_rate": 1.8818119810094576e-05, "loss": 0.5366, "step": 937 }, { "epoch": 0.63, "learning_rate": 1.8815620126430016e-05, "loss": 0.5694, "step": 938 }, { "epoch": 0.63, "learning_rate": 1.8813117968517524e-05, "loss": 0.5477, "step": 939 }, { "epoch": 0.63, "learning_rate": 1.881061333705937e-05, "loss": 0.5917, "step": 940 }, { "epoch": 0.63, "learning_rate": 1.880810623275852e-05, "loss": 0.5322, "step": 941 }, { "epoch": 0.63, "learning_rate": 1.8805596656318635e-05, "loss": 0.5812, "step": 942 }, { "epoch": 0.63, "learning_rate": 1.8803084608444063e-05, "loss": 0.5655, "step": 943 }, { "epoch": 0.64, "learning_rate": 1.880057008983986e-05, "loss": 0.541, "step": 944 }, { "epoch": 0.64, "learning_rate": 1.879805310121176e-05, "loss": 0.5597, "step": 945 }, { "epoch": 0.64, "learning_rate": 1.87955336432662e-05, "loss": 0.5374, "step": 946 }, { "epoch": 0.64, "learning_rate": 1.8793011716710302e-05, "loss": 0.5676, "step": 947 }, { "epoch": 0.64, "learning_rate": 1.8790487322251896e-05, "loss": 0.5439, "step": 948 }, { "epoch": 0.64, "learning_rate": 1.8787960460599482e-05, "loss": 0.5994, "step": 949 }, { "epoch": 0.64, "learning_rate": 1.8785431132462278e-05, "loss": 0.531, "step": 950 }, { "epoch": 0.64, "learning_rate": 1.8782899338550175e-05, "loss": 0.5691, "step": 951 }, { "epoch": 0.64, "learning_rate": 1.878036507957376e-05, "loss": 0.5385, "step": 952 }, { "epoch": 0.64, "learning_rate": 1.8777828356244316e-05, "loss": 0.5474, "step": 953 }, { "epoch": 0.64, "learning_rate": 1.877528916927382e-05, "loss": 0.5952, "step": 954 }, { "epoch": 0.64, "learning_rate": 1.8772747519374927e-05, "loss": 0.5825, "step": 955 }, { "epoch": 0.64, "learning_rate": 1.8770203407261004e-05, "loss": 0.5566, "step": 956 }, { "epoch": 0.64, "learning_rate": 1.876765683364609e-05, "loss": 0.5683, "step": 957 }, { "epoch": 0.65, "learning_rate": 1.8765107799244927e-05, "loss": 0.5128, "step": 958 }, { "epoch": 0.65, "learning_rate": 1.8762556304772936e-05, "loss": 0.5252, "step": 959 }, { "epoch": 0.65, "learning_rate": 1.8760002350946244e-05, "loss": 0.5762, "step": 960 }, { "epoch": 0.65, "learning_rate": 1.8757445938481654e-05, "loss": 0.5178, "step": 961 }, { "epoch": 0.65, "learning_rate": 1.8754887068096672e-05, "loss": 0.5271, "step": 962 }, { "epoch": 0.65, "learning_rate": 1.875232574050948e-05, "loss": 0.5369, "step": 963 }, { "epoch": 0.65, "learning_rate": 1.8749761956438956e-05, "loss": 0.6264, "step": 964 }, { "epoch": 0.65, "learning_rate": 1.8747195716604675e-05, "loss": 0.5599, "step": 965 }, { "epoch": 0.65, "learning_rate": 1.8744627021726887e-05, "loss": 0.5862, "step": 966 }, { "epoch": 0.65, "learning_rate": 1.874205587252654e-05, "loss": 0.5156, "step": 967 }, { "epoch": 0.65, "learning_rate": 1.8739482269725272e-05, "loss": 0.5341, "step": 968 }, { "epoch": 0.65, "learning_rate": 1.8736906214045405e-05, "loss": 0.518, "step": 969 }, { "epoch": 0.65, "learning_rate": 1.873432770620995e-05, "loss": 0.5551, "step": 970 }, { "epoch": 0.65, "learning_rate": 1.8731746746942606e-05, "loss": 0.5534, "step": 971 }, { "epoch": 0.65, "learning_rate": 1.8729163336967767e-05, "loss": 0.5365, "step": 972 }, { "epoch": 0.66, "learning_rate": 1.8726577477010506e-05, "loss": 0.5266, "step": 973 }, { "epoch": 0.66, "learning_rate": 1.8723989167796585e-05, "loss": 0.5612, "step": 974 }, { "epoch": 0.66, "learning_rate": 1.872139841005246e-05, "loss": 0.5674, "step": 975 }, { "epoch": 0.66, "learning_rate": 1.8718805204505265e-05, "loss": 0.56, "step": 976 }, { "epoch": 0.66, "learning_rate": 1.8716209551882826e-05, "loss": 0.5646, "step": 977 }, { "epoch": 0.66, "learning_rate": 1.8713611452913657e-05, "loss": 0.5802, "step": 978 }, { "epoch": 0.66, "learning_rate": 1.8711010908326957e-05, "loss": 0.5417, "step": 979 }, { "epoch": 0.66, "learning_rate": 1.8708407918852608e-05, "loss": 0.5585, "step": 980 }, { "epoch": 0.66, "learning_rate": 1.870580248522119e-05, "loss": 0.5539, "step": 981 }, { "epoch": 0.66, "learning_rate": 1.870319460816395e-05, "loss": 0.5482, "step": 982 }, { "epoch": 0.66, "learning_rate": 1.8700584288412836e-05, "loss": 0.5743, "step": 983 }, { "epoch": 0.66, "learning_rate": 1.8697971526700482e-05, "loss": 0.5737, "step": 984 }, { "epoch": 0.66, "learning_rate": 1.8695356323760197e-05, "loss": 0.5375, "step": 985 }, { "epoch": 0.66, "learning_rate": 1.8692738680325976e-05, "loss": 0.5992, "step": 986 }, { "epoch": 0.66, "learning_rate": 1.8690118597132514e-05, "loss": 0.5642, "step": 987 }, { "epoch": 0.67, "learning_rate": 1.868749607491517e-05, "loss": 0.5322, "step": 988 }, { "epoch": 0.67, "learning_rate": 1.8684871114410006e-05, "loss": 0.5381, "step": 989 }, { "epoch": 0.67, "learning_rate": 1.8682243716353754e-05, "loss": 0.5528, "step": 990 }, { "epoch": 0.67, "learning_rate": 1.867961388148384e-05, "loss": 0.5554, "step": 991 }, { "epoch": 0.67, "learning_rate": 1.8676981610538366e-05, "loss": 0.5264, "step": 992 }, { "epoch": 0.67, "learning_rate": 1.867434690425613e-05, "loss": 0.5763, "step": 993 }, { "epoch": 0.67, "learning_rate": 1.8671709763376596e-05, "loss": 0.6016, "step": 994 }, { "epoch": 0.67, "learning_rate": 1.8669070188639924e-05, "loss": 0.5204, "step": 995 }, { "epoch": 0.67, "learning_rate": 1.8666428180786956e-05, "loss": 0.5293, "step": 996 }, { "epoch": 0.67, "learning_rate": 1.8663783740559214e-05, "loss": 0.5339, "step": 997 }, { "epoch": 0.67, "learning_rate": 1.8661136868698903e-05, "loss": 0.5588, "step": 998 }, { "epoch": 0.67, "learning_rate": 1.865848756594891e-05, "loss": 0.5394, "step": 999 }, { "epoch": 0.67, "learning_rate": 1.8655835833052808e-05, "loss": 0.5522, "step": 1000 }, { "epoch": 0.67, "learning_rate": 1.8653181670754845e-05, "loss": 0.6408, "step": 1001 }, { "epoch": 0.67, "learning_rate": 1.8650525079799955e-05, "loss": 0.6042, "step": 1002 }, { "epoch": 0.68, "learning_rate": 1.8647866060933756e-05, "loss": 0.5654, "step": 1003 }, { "epoch": 0.68, "learning_rate": 1.8645204614902548e-05, "loss": 0.5483, "step": 1004 }, { "epoch": 0.68, "learning_rate": 1.8642540742453302e-05, "loss": 0.5671, "step": 1005 }, { "epoch": 0.68, "learning_rate": 1.8639874444333687e-05, "loss": 0.5773, "step": 1006 }, { "epoch": 0.68, "learning_rate": 1.8637205721292033e-05, "loss": 0.5795, "step": 1007 }, { "epoch": 0.68, "learning_rate": 1.8634534574077365e-05, "loss": 0.5965, "step": 1008 }, { "epoch": 0.68, "learning_rate": 1.8631861003439388e-05, "loss": 0.5701, "step": 1009 }, { "epoch": 0.68, "learning_rate": 1.8629185010128478e-05, "loss": 0.5733, "step": 1010 }, { "epoch": 0.68, "learning_rate": 1.8626506594895696e-05, "loss": 0.5599, "step": 1011 }, { "epoch": 0.68, "learning_rate": 1.8623825758492788e-05, "loss": 0.5474, "step": 1012 }, { "epoch": 0.68, "learning_rate": 1.8621142501672175e-05, "loss": 0.5942, "step": 1013 }, { "epoch": 0.68, "learning_rate": 1.861845682518695e-05, "loss": 0.578, "step": 1014 }, { "epoch": 0.68, "learning_rate": 1.8615768729790893e-05, "loss": 0.5267, "step": 1015 }, { "epoch": 0.68, "learning_rate": 1.8613078216238468e-05, "loss": 0.548, "step": 1016 }, { "epoch": 0.68, "learning_rate": 1.8610385285284806e-05, "loss": 0.5534, "step": 1017 }, { "epoch": 0.69, "learning_rate": 1.8607689937685728e-05, "loss": 0.554, "step": 1018 }, { "epoch": 0.69, "learning_rate": 1.8604992174197716e-05, "loss": 0.5538, "step": 1019 }, { "epoch": 0.69, "learning_rate": 1.8602291995577957e-05, "loss": 0.5392, "step": 1020 }, { "epoch": 0.69, "learning_rate": 1.8599589402584288e-05, "loss": 0.552, "step": 1021 }, { "epoch": 0.69, "learning_rate": 1.8596884395975242e-05, "loss": 0.5482, "step": 1022 }, { "epoch": 0.69, "learning_rate": 1.859417697651002e-05, "loss": 0.5613, "step": 1023 }, { "epoch": 0.69, "learning_rate": 1.859146714494851e-05, "loss": 0.5874, "step": 1024 }, { "epoch": 0.69, "learning_rate": 1.8588754902051262e-05, "loss": 0.5678, "step": 1025 }, { "epoch": 0.69, "learning_rate": 1.8586040248579515e-05, "loss": 0.5738, "step": 1026 }, { "epoch": 0.69, "learning_rate": 1.858332318529518e-05, "loss": 0.5775, "step": 1027 }, { "epoch": 0.69, "learning_rate": 1.858060371296085e-05, "loss": 0.569, "step": 1028 }, { "epoch": 0.69, "learning_rate": 1.857788183233978e-05, "loss": 0.5369, "step": 1029 }, { "epoch": 0.69, "learning_rate": 1.857515754419592e-05, "loss": 0.5448, "step": 1030 }, { "epoch": 0.69, "learning_rate": 1.8572430849293882e-05, "loss": 0.5283, "step": 1031 }, { "epoch": 0.69, "learning_rate": 1.8569701748398954e-05, "loss": 0.5402, "step": 1032 }, { "epoch": 0.7, "learning_rate": 1.856697024227711e-05, "loss": 0.5652, "step": 1033 }, { "epoch": 0.7, "learning_rate": 1.8564236331694988e-05, "loss": 0.5265, "step": 1034 }, { "epoch": 0.7, "learning_rate": 1.8561500017419902e-05, "loss": 0.5431, "step": 1035 }, { "epoch": 0.7, "learning_rate": 1.8558761300219846e-05, "loss": 0.5301, "step": 1036 }, { "epoch": 0.7, "learning_rate": 1.8556020180863487e-05, "loss": 0.5852, "step": 1037 }, { "epoch": 0.7, "learning_rate": 1.8553276660120164e-05, "loss": 0.5673, "step": 1038 }, { "epoch": 0.7, "learning_rate": 1.855053073875989e-05, "loss": 0.5536, "step": 1039 }, { "epoch": 0.7, "learning_rate": 1.8547782417553355e-05, "loss": 0.5695, "step": 1040 }, { "epoch": 0.7, "learning_rate": 1.854503169727192e-05, "loss": 0.547, "step": 1041 }, { "epoch": 0.7, "learning_rate": 1.8542278578687614e-05, "loss": 0.5649, "step": 1042 }, { "epoch": 0.7, "learning_rate": 1.853952306257315e-05, "loss": 0.5144, "step": 1043 }, { "epoch": 0.7, "learning_rate": 1.853676514970191e-05, "loss": 0.5283, "step": 1044 }, { "epoch": 0.7, "learning_rate": 1.8534004840847943e-05, "loss": 0.5587, "step": 1045 }, { "epoch": 0.7, "learning_rate": 1.853124213678598e-05, "loss": 0.5484, "step": 1046 }, { "epoch": 0.7, "learning_rate": 1.8528477038291416e-05, "loss": 0.5718, "step": 1047 }, { "epoch": 0.71, "learning_rate": 1.8525709546140314e-05, "loss": 0.5829, "step": 1048 }, { "epoch": 0.71, "learning_rate": 1.852293966110943e-05, "loss": 0.6006, "step": 1049 }, { "epoch": 0.71, "learning_rate": 1.8520167383976168e-05, "loss": 0.5346, "step": 1050 }, { "epoch": 0.71, "learning_rate": 1.851739271551862e-05, "loss": 0.5626, "step": 1051 }, { "epoch": 0.71, "learning_rate": 1.8514615656515534e-05, "loss": 0.553, "step": 1052 }, { "epoch": 0.71, "learning_rate": 1.851183620774634e-05, "loss": 0.5863, "step": 1053 }, { "epoch": 0.71, "learning_rate": 1.8509054369991136e-05, "loss": 0.5691, "step": 1054 }, { "epoch": 0.71, "learning_rate": 1.850627014403069e-05, "loss": 0.5806, "step": 1055 }, { "epoch": 0.71, "learning_rate": 1.8503483530646443e-05, "loss": 0.6041, "step": 1056 }, { "epoch": 0.71, "learning_rate": 1.85006945306205e-05, "loss": 0.5592, "step": 1057 }, { "epoch": 0.71, "learning_rate": 1.8497903144735643e-05, "loss": 0.5467, "step": 1058 }, { "epoch": 0.71, "learning_rate": 1.8495109373775317e-05, "loss": 0.5648, "step": 1059 }, { "epoch": 0.71, "learning_rate": 1.849231321852364e-05, "loss": 0.5405, "step": 1060 }, { "epoch": 0.71, "learning_rate": 1.8489514679765402e-05, "loss": 0.5315, "step": 1061 }, { "epoch": 0.72, "learning_rate": 1.8486713758286052e-05, "loss": 0.5274, "step": 1062 }, { "epoch": 0.72, "learning_rate": 1.8483910454871724e-05, "loss": 0.5696, "step": 1063 }, { "epoch": 0.72, "learning_rate": 1.84811047703092e-05, "loss": 0.54, "step": 1064 }, { "epoch": 0.72, "learning_rate": 1.8478296705385953e-05, "loss": 0.5545, "step": 1065 }, { "epoch": 0.72, "learning_rate": 1.84754862608901e-05, "loss": 0.5261, "step": 1066 }, { "epoch": 0.72, "learning_rate": 1.8472673437610448e-05, "loss": 0.5648, "step": 1067 }, { "epoch": 0.72, "learning_rate": 1.8469858236336456e-05, "loss": 0.5383, "step": 1068 }, { "epoch": 0.72, "learning_rate": 1.846704065785826e-05, "loss": 0.5471, "step": 1069 }, { "epoch": 0.72, "learning_rate": 1.8464220702966656e-05, "loss": 0.5655, "step": 1070 }, { "epoch": 0.72, "learning_rate": 1.8461398372453113e-05, "loss": 0.56, "step": 1071 }, { "epoch": 0.72, "learning_rate": 1.8458573667109765e-05, "loss": 0.5315, "step": 1072 }, { "epoch": 0.72, "learning_rate": 1.845574658772941e-05, "loss": 0.5664, "step": 1073 }, { "epoch": 0.72, "learning_rate": 1.8452917135105513e-05, "loss": 0.6063, "step": 1074 }, { "epoch": 0.72, "learning_rate": 1.8450085310032206e-05, "loss": 0.5747, "step": 1075 }, { "epoch": 0.72, "learning_rate": 1.844725111330429e-05, "loss": 0.5459, "step": 1076 }, { "epoch": 0.73, "learning_rate": 1.844441454571723e-05, "loss": 0.5482, "step": 1077 }, { "epoch": 0.73, "learning_rate": 1.844157560806715e-05, "loss": 0.5597, "step": 1078 }, { "epoch": 0.73, "learning_rate": 1.8438734301150845e-05, "loss": 0.5262, "step": 1079 }, { "epoch": 0.73, "learning_rate": 1.8435890625765776e-05, "loss": 0.5731, "step": 1080 }, { "epoch": 0.73, "learning_rate": 1.8433044582710067e-05, "loss": 0.5268, "step": 1081 }, { "epoch": 0.73, "learning_rate": 1.8430196172782505e-05, "loss": 0.5854, "step": 1082 }, { "epoch": 0.73, "learning_rate": 1.8427345396782547e-05, "loss": 0.5544, "step": 1083 }, { "epoch": 0.73, "learning_rate": 1.8424492255510305e-05, "loss": 0.5715, "step": 1084 }, { "epoch": 0.73, "learning_rate": 1.8421636749766563e-05, "loss": 0.5682, "step": 1085 }, { "epoch": 0.73, "learning_rate": 1.841877888035276e-05, "loss": 0.561, "step": 1086 }, { "epoch": 0.73, "learning_rate": 1.8415918648071014e-05, "loss": 0.5562, "step": 1087 }, { "epoch": 0.73, "learning_rate": 1.8413056053724086e-05, "loss": 0.5626, "step": 1088 }, { "epoch": 0.73, "learning_rate": 1.841019109811541e-05, "loss": 0.5773, "step": 1089 }, { "epoch": 0.73, "learning_rate": 1.8407323782049093e-05, "loss": 0.5336, "step": 1090 }, { "epoch": 0.73, "learning_rate": 1.8404454106329886e-05, "loss": 0.516, "step": 1091 }, { "epoch": 0.74, "learning_rate": 1.840158207176321e-05, "loss": 0.5546, "step": 1092 }, { "epoch": 0.74, "learning_rate": 1.8398707679155154e-05, "loss": 0.5857, "step": 1093 }, { "epoch": 0.74, "learning_rate": 1.8395830929312457e-05, "loss": 0.5659, "step": 1094 }, { "epoch": 0.74, "learning_rate": 1.8392951823042525e-05, "loss": 0.5407, "step": 1095 }, { "epoch": 0.74, "learning_rate": 1.8390070361153436e-05, "loss": 0.562, "step": 1096 }, { "epoch": 0.74, "learning_rate": 1.838718654445391e-05, "loss": 0.5819, "step": 1097 }, { "epoch": 0.74, "learning_rate": 1.838430037375334e-05, "loss": 0.5558, "step": 1098 }, { "epoch": 0.74, "learning_rate": 1.838141184986178e-05, "loss": 0.576, "step": 1099 }, { "epoch": 0.74, "learning_rate": 1.8378520973589937e-05, "loss": 0.5851, "step": 1100 }, { "epoch": 0.74, "learning_rate": 1.8375627745749184e-05, "loss": 0.5467, "step": 1101 }, { "epoch": 0.74, "learning_rate": 1.8372732167151556e-05, "loss": 0.5627, "step": 1102 }, { "epoch": 0.74, "learning_rate": 1.836983423860974e-05, "loss": 0.5335, "step": 1103 }, { "epoch": 0.74, "learning_rate": 1.836693396093709e-05, "loss": 0.566, "step": 1104 }, { "epoch": 0.74, "learning_rate": 1.8364031334947612e-05, "loss": 0.5558, "step": 1105 }, { "epoch": 0.74, "learning_rate": 1.836112636145598e-05, "loss": 0.5288, "step": 1106 }, { "epoch": 0.75, "learning_rate": 1.8358219041277523e-05, "loss": 0.5459, "step": 1107 }, { "epoch": 0.75, "learning_rate": 1.8355309375228225e-05, "loss": 0.5884, "step": 1108 }, { "epoch": 0.75, "learning_rate": 1.835239736412473e-05, "loss": 0.5693, "step": 1109 }, { "epoch": 0.75, "learning_rate": 1.8349483008784346e-05, "loss": 0.5611, "step": 1110 }, { "epoch": 0.75, "learning_rate": 1.834656631002503e-05, "loss": 0.5495, "step": 1111 }, { "epoch": 0.75, "learning_rate": 1.8343647268665407e-05, "loss": 0.5381, "step": 1112 }, { "epoch": 0.75, "learning_rate": 1.8340725885524747e-05, "loss": 0.5539, "step": 1113 }, { "epoch": 0.75, "learning_rate": 1.8337802161422987e-05, "loss": 0.5514, "step": 1114 }, { "epoch": 0.75, "learning_rate": 1.833487609718072e-05, "loss": 0.5761, "step": 1115 }, { "epoch": 0.75, "learning_rate": 1.8331947693619195e-05, "loss": 0.5259, "step": 1116 }, { "epoch": 0.75, "learning_rate": 1.8329016951560307e-05, "loss": 0.5635, "step": 1117 }, { "epoch": 0.75, "learning_rate": 1.8326083871826626e-05, "loss": 0.5649, "step": 1118 }, { "epoch": 0.75, "learning_rate": 1.8323148455241363e-05, "loss": 0.5648, "step": 1119 }, { "epoch": 0.75, "learning_rate": 1.8320210702628397e-05, "loss": 0.5527, "step": 1120 }, { "epoch": 0.75, "learning_rate": 1.831727061481225e-05, "loss": 0.5816, "step": 1121 }, { "epoch": 0.76, "learning_rate": 1.831432819261811e-05, "loss": 0.5715, "step": 1122 }, { "epoch": 0.76, "learning_rate": 1.8311383436871814e-05, "loss": 0.5798, "step": 1123 }, { "epoch": 0.76, "learning_rate": 1.8308436348399855e-05, "loss": 0.5713, "step": 1124 }, { "epoch": 0.76, "learning_rate": 1.8305486928029383e-05, "loss": 0.5625, "step": 1125 }, { "epoch": 0.76, "learning_rate": 1.83025351765882e-05, "loss": 0.5452, "step": 1126 }, { "epoch": 0.76, "learning_rate": 1.8299581094904765e-05, "loss": 0.5403, "step": 1127 }, { "epoch": 0.76, "learning_rate": 1.829662468380819e-05, "loss": 0.5742, "step": 1128 }, { "epoch": 0.76, "learning_rate": 1.8293665944128237e-05, "loss": 0.5546, "step": 1129 }, { "epoch": 0.76, "learning_rate": 1.8290704876695325e-05, "loss": 0.544, "step": 1130 }, { "epoch": 0.76, "learning_rate": 1.828774148234053e-05, "loss": 0.5635, "step": 1131 }, { "epoch": 0.76, "learning_rate": 1.828477576189557e-05, "loss": 0.5788, "step": 1132 }, { "epoch": 0.76, "learning_rate": 1.828180771619283e-05, "loss": 0.5658, "step": 1133 }, { "epoch": 0.76, "learning_rate": 1.8278837346065336e-05, "loss": 0.5797, "step": 1134 }, { "epoch": 0.76, "learning_rate": 1.8275864652346772e-05, "loss": 0.5697, "step": 1135 }, { "epoch": 0.76, "learning_rate": 1.8272889635871475e-05, "loss": 0.5545, "step": 1136 }, { "epoch": 0.77, "learning_rate": 1.826991229747443e-05, "loss": 0.5676, "step": 1137 }, { "epoch": 0.77, "learning_rate": 1.8266932637991278e-05, "loss": 0.5892, "step": 1138 }, { "epoch": 0.77, "learning_rate": 1.8263950658258306e-05, "loss": 0.5726, "step": 1139 }, { "epoch": 0.77, "learning_rate": 1.826096635911246e-05, "loss": 0.5479, "step": 1140 }, { "epoch": 0.77, "learning_rate": 1.8257979741391326e-05, "loss": 0.5627, "step": 1141 }, { "epoch": 0.77, "learning_rate": 1.825499080593315e-05, "loss": 0.5281, "step": 1142 }, { "epoch": 0.77, "learning_rate": 1.825199955357683e-05, "loss": 0.5572, "step": 1143 }, { "epoch": 0.77, "learning_rate": 1.82490059851619e-05, "loss": 0.5584, "step": 1144 }, { "epoch": 0.77, "learning_rate": 1.8246010101528566e-05, "loss": 0.5633, "step": 1145 }, { "epoch": 0.77, "learning_rate": 1.8243011903517663e-05, "loss": 0.5161, "step": 1146 }, { "epoch": 0.77, "learning_rate": 1.8240011391970688e-05, "loss": 0.5349, "step": 1147 }, { "epoch": 0.77, "learning_rate": 1.8237008567729783e-05, "loss": 0.5578, "step": 1148 }, { "epoch": 0.77, "learning_rate": 1.8234003431637734e-05, "loss": 0.5572, "step": 1149 }, { "epoch": 0.77, "learning_rate": 1.823099598453799e-05, "loss": 0.5674, "step": 1150 }, { "epoch": 0.78, "learning_rate": 1.8227986227274638e-05, "loss": 0.56, "step": 1151 }, { "epoch": 0.78, "learning_rate": 1.822497416069241e-05, "loss": 0.5024, "step": 1152 }, { "epoch": 0.78, "learning_rate": 1.8221959785636703e-05, "loss": 0.5567, "step": 1153 }, { "epoch": 0.78, "learning_rate": 1.8218943102953545e-05, "loss": 0.5724, "step": 1154 }, { "epoch": 0.78, "learning_rate": 1.8215924113489613e-05, "loss": 0.5166, "step": 1155 }, { "epoch": 0.78, "learning_rate": 1.8212902818092236e-05, "loss": 0.5596, "step": 1156 }, { "epoch": 0.78, "learning_rate": 1.8209879217609394e-05, "loss": 0.5518, "step": 1157 }, { "epoch": 0.78, "learning_rate": 1.8206853312889713e-05, "loss": 0.5058, "step": 1158 }, { "epoch": 0.78, "learning_rate": 1.8203825104782457e-05, "loss": 0.5774, "step": 1159 }, { "epoch": 0.78, "learning_rate": 1.820079459413754e-05, "loss": 0.5464, "step": 1160 }, { "epoch": 0.78, "learning_rate": 1.8197761781805532e-05, "loss": 0.5645, "step": 1161 }, { "epoch": 0.78, "learning_rate": 1.8194726668637636e-05, "loss": 0.5986, "step": 1162 }, { "epoch": 0.78, "learning_rate": 1.8191689255485707e-05, "loss": 0.5357, "step": 1163 }, { "epoch": 0.78, "learning_rate": 1.8188649543202244e-05, "loss": 0.5441, "step": 1164 }, { "epoch": 0.78, "learning_rate": 1.8185607532640396e-05, "loss": 0.6002, "step": 1165 }, { "epoch": 0.79, "learning_rate": 1.8182563224653947e-05, "loss": 0.5582, "step": 1166 }, { "epoch": 0.79, "learning_rate": 1.817951662009734e-05, "loss": 0.5546, "step": 1167 }, { "epoch": 0.79, "learning_rate": 1.8176467719825645e-05, "loss": 0.5573, "step": 1168 }, { "epoch": 0.79, "learning_rate": 1.8173416524694595e-05, "loss": 0.5548, "step": 1169 }, { "epoch": 0.79, "learning_rate": 1.8170363035560544e-05, "loss": 0.5505, "step": 1170 }, { "epoch": 0.79, "learning_rate": 1.816730725328052e-05, "loss": 0.5436, "step": 1171 }, { "epoch": 0.79, "learning_rate": 1.816424917871217e-05, "loss": 0.5785, "step": 1172 }, { "epoch": 0.79, "learning_rate": 1.816118881271379e-05, "loss": 0.5582, "step": 1173 }, { "epoch": 0.79, "learning_rate": 1.815812615614433e-05, "loss": 0.5828, "step": 1174 }, { "epoch": 0.79, "learning_rate": 1.8155061209863368e-05, "loss": 0.5446, "step": 1175 }, { "epoch": 0.79, "learning_rate": 1.815199397473113e-05, "loss": 0.5474, "step": 1176 }, { "epoch": 0.79, "learning_rate": 1.814892445160849e-05, "loss": 0.5278, "step": 1177 }, { "epoch": 0.79, "learning_rate": 1.8145852641356963e-05, "loss": 0.5898, "step": 1178 }, { "epoch": 0.79, "learning_rate": 1.8142778544838695e-05, "loss": 0.5891, "step": 1179 }, { "epoch": 0.79, "learning_rate": 1.8139702162916485e-05, "loss": 0.5793, "step": 1180 }, { "epoch": 0.8, "learning_rate": 1.813662349645377e-05, "loss": 0.5586, "step": 1181 }, { "epoch": 0.8, "learning_rate": 1.8133542546314628e-05, "loss": 0.5638, "step": 1182 }, { "epoch": 0.8, "learning_rate": 1.813045931336378e-05, "loss": 0.5568, "step": 1183 }, { "epoch": 0.8, "learning_rate": 1.812737379846658e-05, "loss": 0.5527, "step": 1184 }, { "epoch": 0.8, "learning_rate": 1.8124286002489034e-05, "loss": 0.5288, "step": 1185 }, { "epoch": 0.8, "learning_rate": 1.812119592629778e-05, "loss": 0.5394, "step": 1186 }, { "epoch": 0.8, "learning_rate": 1.8118103570760097e-05, "loss": 0.5451, "step": 1187 }, { "epoch": 0.8, "learning_rate": 1.8115008936743908e-05, "loss": 0.5415, "step": 1188 }, { "epoch": 0.8, "learning_rate": 1.8111912025117774e-05, "loss": 0.5766, "step": 1189 }, { "epoch": 0.8, "learning_rate": 1.8108812836750887e-05, "loss": 0.537, "step": 1190 }, { "epoch": 0.8, "learning_rate": 1.810571137251309e-05, "loss": 0.5629, "step": 1191 }, { "epoch": 0.8, "learning_rate": 1.8102607633274863e-05, "loss": 0.6042, "step": 1192 }, { "epoch": 0.8, "learning_rate": 1.809950161990731e-05, "loss": 0.5561, "step": 1193 }, { "epoch": 0.8, "learning_rate": 1.8096393333282195e-05, "loss": 0.5187, "step": 1194 }, { "epoch": 0.8, "learning_rate": 1.8093282774271908e-05, "loss": 0.5787, "step": 1195 }, { "epoch": 0.81, "learning_rate": 1.8090169943749477e-05, "loss": 0.5906, "step": 1196 }, { "epoch": 0.81, "learning_rate": 1.8087054842588565e-05, "loss": 0.5603, "step": 1197 }, { "epoch": 0.81, "learning_rate": 1.808393747166348e-05, "loss": 0.5595, "step": 1198 }, { "epoch": 0.81, "learning_rate": 1.808081783184916e-05, "loss": 0.5911, "step": 1199 }, { "epoch": 0.81, "learning_rate": 1.807769592402119e-05, "loss": 0.5518, "step": 1200 }, { "epoch": 0.81, "learning_rate": 1.8074571749055778e-05, "loss": 0.5511, "step": 1201 }, { "epoch": 0.81, "learning_rate": 1.8071445307829775e-05, "loss": 0.5431, "step": 1202 }, { "epoch": 0.81, "learning_rate": 1.8068316601220667e-05, "loss": 0.5305, "step": 1203 }, { "epoch": 0.81, "learning_rate": 1.8065185630106583e-05, "loss": 0.5709, "step": 1204 }, { "epoch": 0.81, "learning_rate": 1.8062052395366275e-05, "loss": 0.5471, "step": 1205 }, { "epoch": 0.81, "learning_rate": 1.805891689787914e-05, "loss": 0.521, "step": 1206 }, { "epoch": 0.81, "learning_rate": 1.8055779138525205e-05, "loss": 0.565, "step": 1207 }, { "epoch": 0.81, "learning_rate": 1.8052639118185132e-05, "loss": 0.5485, "step": 1208 }, { "epoch": 0.81, "learning_rate": 1.8049496837740223e-05, "loss": 0.5483, "step": 1209 }, { "epoch": 0.81, "learning_rate": 1.8046352298072408e-05, "loss": 0.5808, "step": 1210 }, { "epoch": 0.82, "learning_rate": 1.804320550006425e-05, "loss": 0.5419, "step": 1211 }, { "epoch": 0.82, "learning_rate": 1.8040056444598958e-05, "loss": 0.5123, "step": 1212 }, { "epoch": 0.82, "learning_rate": 1.8036905132560362e-05, "loss": 0.5991, "step": 1213 }, { "epoch": 0.82, "learning_rate": 1.8033751564832926e-05, "loss": 0.5723, "step": 1214 }, { "epoch": 0.82, "learning_rate": 1.803059574230175e-05, "loss": 0.5062, "step": 1215 }, { "epoch": 0.82, "learning_rate": 1.8027437665852574e-05, "loss": 0.5779, "step": 1216 }, { "epoch": 0.82, "learning_rate": 1.8024277336371755e-05, "loss": 0.5962, "step": 1217 }, { "epoch": 0.82, "learning_rate": 1.80211147547463e-05, "loss": 0.575, "step": 1218 }, { "epoch": 0.82, "learning_rate": 1.8017949921863832e-05, "loss": 0.5319, "step": 1219 }, { "epoch": 0.82, "learning_rate": 1.8014782838612616e-05, "loss": 0.5337, "step": 1220 }, { "epoch": 0.82, "learning_rate": 1.8011613505881545e-05, "loss": 0.5396, "step": 1221 }, { "epoch": 0.82, "learning_rate": 1.800844192456015e-05, "loss": 0.5938, "step": 1222 }, { "epoch": 0.82, "learning_rate": 1.800526809553858e-05, "loss": 0.5384, "step": 1223 }, { "epoch": 0.82, "learning_rate": 1.800209201970762e-05, "loss": 0.5173, "step": 1224 }, { "epoch": 0.82, "learning_rate": 1.7998913697958693e-05, "loss": 0.5705, "step": 1225 }, { "epoch": 0.83, "learning_rate": 1.7995733131183846e-05, "loss": 0.5306, "step": 1226 }, { "epoch": 0.83, "learning_rate": 1.799255032027576e-05, "loss": 0.5502, "step": 1227 }, { "epoch": 0.83, "learning_rate": 1.7989365266127735e-05, "loss": 0.5615, "step": 1228 }, { "epoch": 0.83, "learning_rate": 1.7986177969633717e-05, "loss": 0.5426, "step": 1229 }, { "epoch": 0.83, "learning_rate": 1.7982988431688266e-05, "loss": 0.5941, "step": 1230 }, { "epoch": 0.83, "learning_rate": 1.797979665318658e-05, "loss": 0.5623, "step": 1231 }, { "epoch": 0.83, "learning_rate": 1.7976602635024485e-05, "loss": 0.5853, "step": 1232 }, { "epoch": 0.83, "learning_rate": 1.7973406378098434e-05, "loss": 0.4976, "step": 1233 }, { "epoch": 0.83, "learning_rate": 1.7970207883305512e-05, "loss": 0.5476, "step": 1234 }, { "epoch": 0.83, "learning_rate": 1.7967007151543425e-05, "loss": 0.5423, "step": 1235 }, { "epoch": 0.83, "learning_rate": 1.796380418371051e-05, "loss": 0.5634, "step": 1236 }, { "epoch": 0.83, "learning_rate": 1.7960598980705734e-05, "loss": 0.5151, "step": 1237 }, { "epoch": 0.83, "learning_rate": 1.795739154342869e-05, "loss": 0.5512, "step": 1238 }, { "epoch": 0.83, "learning_rate": 1.7954181872779598e-05, "loss": 0.586, "step": 1239 }, { "epoch": 0.83, "learning_rate": 1.7950969969659303e-05, "loss": 0.5718, "step": 1240 }, { "epoch": 0.84, "learning_rate": 1.794775583496928e-05, "loss": 0.5715, "step": 1241 }, { "epoch": 0.84, "learning_rate": 1.7944539469611625e-05, "loss": 0.5841, "step": 1242 }, { "epoch": 0.84, "learning_rate": 1.7941320874489065e-05, "loss": 0.5748, "step": 1243 }, { "epoch": 0.84, "learning_rate": 1.7938100050504953e-05, "loss": 0.5816, "step": 1244 }, { "epoch": 0.84, "learning_rate": 1.7934876998563263e-05, "loss": 0.5453, "step": 1245 }, { "epoch": 0.84, "learning_rate": 1.7931651719568603e-05, "loss": 0.5642, "step": 1246 }, { "epoch": 0.84, "learning_rate": 1.7928424214426196e-05, "loss": 0.5858, "step": 1247 }, { "epoch": 0.84, "learning_rate": 1.7925194484041893e-05, "loss": 0.6241, "step": 1248 }, { "epoch": 0.84, "learning_rate": 1.7921962529322173e-05, "loss": 0.5986, "step": 1249 }, { "epoch": 0.84, "learning_rate": 1.7918728351174136e-05, "loss": 0.587, "step": 1250 }, { "epoch": 0.84, "learning_rate": 1.791549195050551e-05, "loss": 0.5381, "step": 1251 }, { "epoch": 0.84, "learning_rate": 1.791225332822464e-05, "loss": 0.5746, "step": 1252 }, { "epoch": 0.84, "learning_rate": 1.7909012485240497e-05, "loss": 0.5194, "step": 1253 }, { "epoch": 0.84, "learning_rate": 1.790576942246268e-05, "loss": 0.5305, "step": 1254 }, { "epoch": 0.85, "learning_rate": 1.790252414080141e-05, "loss": 0.5204, "step": 1255 }, { "epoch": 0.85, "learning_rate": 1.7899276641167516e-05, "loss": 0.5574, "step": 1256 }, { "epoch": 0.85, "learning_rate": 1.7896026924472478e-05, "loss": 0.5867, "step": 1257 }, { "epoch": 0.85, "learning_rate": 1.789277499162837e-05, "loss": 0.5388, "step": 1258 }, { "epoch": 0.85, "learning_rate": 1.7889520843547908e-05, "loss": 0.5574, "step": 1259 }, { "epoch": 0.85, "learning_rate": 1.788626448114442e-05, "loss": 0.5374, "step": 1260 }, { "epoch": 0.85, "learning_rate": 1.7883005905331855e-05, "loss": 0.5887, "step": 1261 }, { "epoch": 0.85, "learning_rate": 1.787974511702479e-05, "loss": 0.5416, "step": 1262 }, { "epoch": 0.85, "learning_rate": 1.787648211713841e-05, "loss": 0.5464, "step": 1263 }, { "epoch": 0.85, "learning_rate": 1.7873216906588536e-05, "loss": 0.5829, "step": 1264 }, { "epoch": 0.85, "learning_rate": 1.7869949486291604e-05, "loss": 0.54, "step": 1265 }, { "epoch": 0.85, "learning_rate": 1.7866679857164663e-05, "loss": 0.5434, "step": 1266 }, { "epoch": 0.85, "learning_rate": 1.786340802012539e-05, "loss": 0.5229, "step": 1267 }, { "epoch": 0.85, "learning_rate": 1.7860133976092083e-05, "loss": 0.5692, "step": 1268 }, { "epoch": 0.85, "learning_rate": 1.7856857725983653e-05, "loss": 0.5863, "step": 1269 }, { "epoch": 0.86, "learning_rate": 1.7853579270719635e-05, "loss": 0.5444, "step": 1270 }, { "epoch": 0.86, "learning_rate": 1.785029861122018e-05, "loss": 0.5632, "step": 1271 }, { "epoch": 0.86, "learning_rate": 1.7847015748406055e-05, "loss": 0.5744, "step": 1272 }, { "epoch": 0.86, "learning_rate": 1.7843730683198658e-05, "loss": 0.5433, "step": 1273 }, { "epoch": 0.86, "learning_rate": 1.7840443416519985e-05, "loss": 0.5505, "step": 1274 }, { "epoch": 0.86, "learning_rate": 1.7837153949292674e-05, "loss": 0.5459, "step": 1275 }, { "epoch": 0.86, "learning_rate": 1.7833862282439956e-05, "loss": 0.5358, "step": 1276 }, { "epoch": 0.86, "learning_rate": 1.7830568416885697e-05, "loss": 0.5456, "step": 1277 }, { "epoch": 0.86, "learning_rate": 1.7827272353554376e-05, "loss": 0.556, "step": 1278 }, { "epoch": 0.86, "learning_rate": 1.782397409337108e-05, "loss": 0.5292, "step": 1279 }, { "epoch": 0.86, "learning_rate": 1.782067363726153e-05, "loss": 0.5772, "step": 1280 }, { "epoch": 0.86, "learning_rate": 1.7817370986152042e-05, "loss": 0.551, "step": 1281 }, { "epoch": 0.86, "learning_rate": 1.7814066140969565e-05, "loss": 0.5204, "step": 1282 }, { "epoch": 0.86, "learning_rate": 1.7810759102641657e-05, "loss": 0.5896, "step": 1283 }, { "epoch": 0.86, "learning_rate": 1.7807449872096492e-05, "loss": 0.5536, "step": 1284 }, { "epoch": 0.87, "learning_rate": 1.7804138450262862e-05, "loss": 0.577, "step": 1285 }, { "epoch": 0.87, "learning_rate": 1.7800824838070166e-05, "loss": 0.5369, "step": 1286 }, { "epoch": 0.87, "learning_rate": 1.779750903644843e-05, "loss": 0.5514, "step": 1287 }, { "epoch": 0.87, "learning_rate": 1.7794191046328283e-05, "loss": 0.5477, "step": 1288 }, { "epoch": 0.87, "learning_rate": 1.779087086864098e-05, "loss": 0.5724, "step": 1289 }, { "epoch": 0.87, "learning_rate": 1.7787548504318372e-05, "loss": 0.544, "step": 1290 }, { "epoch": 0.87, "learning_rate": 1.7784223954292944e-05, "loss": 0.5232, "step": 1291 }, { "epoch": 0.87, "learning_rate": 1.7780897219497783e-05, "loss": 0.5792, "step": 1292 }, { "epoch": 0.87, "learning_rate": 1.7777568300866587e-05, "loss": 0.5664, "step": 1293 }, { "epoch": 0.87, "learning_rate": 1.777423719933368e-05, "loss": 0.5499, "step": 1294 }, { "epoch": 0.87, "learning_rate": 1.7770903915833986e-05, "loss": 0.5658, "step": 1295 }, { "epoch": 0.87, "learning_rate": 1.7767568451303042e-05, "loss": 0.5446, "step": 1296 }, { "epoch": 0.87, "learning_rate": 1.7764230806677005e-05, "loss": 0.5919, "step": 1297 }, { "epoch": 0.87, "learning_rate": 1.7760890982892638e-05, "loss": 0.5649, "step": 1298 }, { "epoch": 0.87, "learning_rate": 1.775754898088732e-05, "loss": 0.5406, "step": 1299 }, { "epoch": 0.88, "learning_rate": 1.775420480159903e-05, "loss": 0.5525, "step": 1300 }, { "epoch": 0.88, "learning_rate": 1.775085844596638e-05, "loss": 0.5279, "step": 1301 }, { "epoch": 0.88, "learning_rate": 1.7747509914928568e-05, "loss": 0.5058, "step": 1302 }, { "epoch": 0.88, "learning_rate": 1.7744159209425416e-05, "loss": 0.5327, "step": 1303 }, { "epoch": 0.88, "learning_rate": 1.774080633039736e-05, "loss": 0.5858, "step": 1304 }, { "epoch": 0.88, "learning_rate": 1.7737451278785435e-05, "loss": 0.5439, "step": 1305 }, { "epoch": 0.88, "learning_rate": 1.773409405553129e-05, "loss": 0.5526, "step": 1306 }, { "epoch": 0.88, "learning_rate": 1.773073466157719e-05, "loss": 0.5348, "step": 1307 }, { "epoch": 0.88, "learning_rate": 1.7727373097866002e-05, "loss": 0.498, "step": 1308 }, { "epoch": 0.88, "learning_rate": 1.7724009365341204e-05, "loss": 0.5656, "step": 1309 }, { "epoch": 0.88, "learning_rate": 1.772064346494688e-05, "loss": 0.5645, "step": 1310 }, { "epoch": 0.88, "learning_rate": 1.7717275397627724e-05, "loss": 0.5431, "step": 1311 }, { "epoch": 0.88, "learning_rate": 1.7713905164329044e-05, "loss": 0.5695, "step": 1312 }, { "epoch": 0.88, "learning_rate": 1.771053276599675e-05, "loss": 0.5572, "step": 1313 }, { "epoch": 0.88, "learning_rate": 1.7707158203577356e-05, "loss": 0.5517, "step": 1314 }, { "epoch": 0.89, "learning_rate": 1.7703781478017995e-05, "loss": 0.5426, "step": 1315 }, { "epoch": 0.89, "learning_rate": 1.770040259026639e-05, "loss": 0.5556, "step": 1316 }, { "epoch": 0.89, "learning_rate": 1.7697021541270897e-05, "loss": 0.5778, "step": 1317 }, { "epoch": 0.89, "learning_rate": 1.769363833198045e-05, "loss": 0.5986, "step": 1318 }, { "epoch": 0.89, "learning_rate": 1.7690252963344606e-05, "loss": 0.5727, "step": 1319 }, { "epoch": 0.89, "learning_rate": 1.768686543631352e-05, "loss": 0.536, "step": 1320 }, { "epoch": 0.89, "learning_rate": 1.7683475751837963e-05, "loss": 0.5263, "step": 1321 }, { "epoch": 0.89, "learning_rate": 1.7680083910869305e-05, "loss": 0.5217, "step": 1322 }, { "epoch": 0.89, "learning_rate": 1.7676689914359517e-05, "loss": 0.5059, "step": 1323 }, { "epoch": 0.89, "learning_rate": 1.7673293763261182e-05, "loss": 0.5205, "step": 1324 }, { "epoch": 0.89, "learning_rate": 1.7669895458527487e-05, "loss": 0.5552, "step": 1325 }, { "epoch": 0.89, "learning_rate": 1.766649500111222e-05, "loss": 0.5728, "step": 1326 }, { "epoch": 0.89, "learning_rate": 1.7663092391969773e-05, "loss": 0.5808, "step": 1327 }, { "epoch": 0.89, "learning_rate": 1.765968763205515e-05, "loss": 0.5567, "step": 1328 }, { "epoch": 0.89, "learning_rate": 1.7656280722323945e-05, "loss": 0.5205, "step": 1329 }, { "epoch": 0.9, "learning_rate": 1.765287166373237e-05, "loss": 0.5659, "step": 1330 }, { "epoch": 0.9, "learning_rate": 1.7649460457237223e-05, "loss": 0.5307, "step": 1331 }, { "epoch": 0.9, "learning_rate": 1.7646047103795928e-05, "loss": 0.5007, "step": 1332 }, { "epoch": 0.9, "learning_rate": 1.7642631604366493e-05, "loss": 0.5241, "step": 1333 }, { "epoch": 0.9, "learning_rate": 1.7639213959907525e-05, "loss": 0.5569, "step": 1334 }, { "epoch": 0.9, "learning_rate": 1.7635794171378257e-05, "loss": 0.519, "step": 1335 }, { "epoch": 0.9, "learning_rate": 1.7632372239738497e-05, "loss": 0.5643, "step": 1336 }, { "epoch": 0.9, "learning_rate": 1.762894816594867e-05, "loss": 0.5979, "step": 1337 }, { "epoch": 0.9, "learning_rate": 1.7625521950969802e-05, "loss": 0.5538, "step": 1338 }, { "epoch": 0.9, "learning_rate": 1.762209359576351e-05, "loss": 0.5651, "step": 1339 }, { "epoch": 0.9, "learning_rate": 1.761866310129202e-05, "loss": 0.5428, "step": 1340 }, { "epoch": 0.9, "learning_rate": 1.7615230468518157e-05, "loss": 0.5413, "step": 1341 }, { "epoch": 0.9, "learning_rate": 1.761179569840535e-05, "loss": 0.5716, "step": 1342 }, { "epoch": 0.9, "learning_rate": 1.7608358791917615e-05, "loss": 0.549, "step": 1343 }, { "epoch": 0.9, "learning_rate": 1.760491975001958e-05, "loss": 0.5682, "step": 1344 }, { "epoch": 0.91, "learning_rate": 1.760147857367647e-05, "loss": 0.5648, "step": 1345 }, { "epoch": 0.91, "learning_rate": 1.7598035263854103e-05, "loss": 0.5652, "step": 1346 }, { "epoch": 0.91, "learning_rate": 1.7594589821518905e-05, "loss": 0.5656, "step": 1347 }, { "epoch": 0.91, "learning_rate": 1.7591142247637895e-05, "loss": 0.5546, "step": 1348 }, { "epoch": 0.91, "learning_rate": 1.7587692543178684e-05, "loss": 0.5348, "step": 1349 }, { "epoch": 0.91, "learning_rate": 1.7584240709109498e-05, "loss": 0.549, "step": 1350 }, { "epoch": 0.91, "learning_rate": 1.7580786746399146e-05, "loss": 0.5672, "step": 1351 }, { "epoch": 0.91, "learning_rate": 1.7577330656017037e-05, "loss": 0.5469, "step": 1352 }, { "epoch": 0.91, "learning_rate": 1.7573872438933183e-05, "loss": 0.5214, "step": 1353 }, { "epoch": 0.91, "learning_rate": 1.757041209611819e-05, "loss": 0.5863, "step": 1354 }, { "epoch": 0.91, "learning_rate": 1.7566949628543252e-05, "loss": 0.5102, "step": 1355 }, { "epoch": 0.91, "learning_rate": 1.7563485037180177e-05, "loss": 0.5402, "step": 1356 }, { "epoch": 0.91, "learning_rate": 1.7560018323001354e-05, "loss": 0.5629, "step": 1357 }, { "epoch": 0.91, "learning_rate": 1.7556549486979775e-05, "loss": 0.5137, "step": 1358 }, { "epoch": 0.92, "learning_rate": 1.7553078530089024e-05, "loss": 0.5946, "step": 1359 }, { "epoch": 0.92, "learning_rate": 1.754960545330328e-05, "loss": 0.5536, "step": 1360 }, { "epoch": 0.92, "learning_rate": 1.7546130257597322e-05, "loss": 0.5257, "step": 1361 }, { "epoch": 0.92, "learning_rate": 1.7542652943946523e-05, "loss": 0.5616, "step": 1362 }, { "epoch": 0.92, "learning_rate": 1.753917351332684e-05, "loss": 0.5994, "step": 1363 }, { "epoch": 0.92, "learning_rate": 1.753569196671484e-05, "loss": 0.588, "step": 1364 }, { "epoch": 0.92, "learning_rate": 1.753220830508767e-05, "loss": 0.5721, "step": 1365 }, { "epoch": 0.92, "learning_rate": 1.752872252942308e-05, "loss": 0.5262, "step": 1366 }, { "epoch": 0.92, "learning_rate": 1.752523464069941e-05, "loss": 0.5495, "step": 1367 }, { "epoch": 0.92, "learning_rate": 1.7521744639895593e-05, "loss": 0.5531, "step": 1368 }, { "epoch": 0.92, "learning_rate": 1.7518252527991154e-05, "loss": 0.58, "step": 1369 }, { "epoch": 0.92, "learning_rate": 1.7514758305966206e-05, "loss": 0.5573, "step": 1370 }, { "epoch": 0.92, "learning_rate": 1.751126197480147e-05, "loss": 0.5405, "step": 1371 }, { "epoch": 0.92, "learning_rate": 1.750776353547824e-05, "loss": 0.5244, "step": 1372 }, { "epoch": 0.92, "learning_rate": 1.7504262988978417e-05, "loss": 0.5468, "step": 1373 }, { "epoch": 0.93, "learning_rate": 1.750076033628448e-05, "loss": 0.517, "step": 1374 }, { "epoch": 0.93, "learning_rate": 1.7497255578379514e-05, "loss": 0.559, "step": 1375 }, { "epoch": 0.93, "learning_rate": 1.7493748716247174e-05, "loss": 0.5307, "step": 1376 }, { "epoch": 0.93, "learning_rate": 1.749023975087173e-05, "loss": 0.5896, "step": 1377 }, { "epoch": 0.93, "learning_rate": 1.7486728683238025e-05, "loss": 0.5933, "step": 1378 }, { "epoch": 0.93, "learning_rate": 1.74832155143315e-05, "loss": 0.4987, "step": 1379 }, { "epoch": 0.93, "learning_rate": 1.7479700245138184e-05, "loss": 0.5837, "step": 1380 }, { "epoch": 0.93, "learning_rate": 1.747618287664469e-05, "loss": 0.5429, "step": 1381 }, { "epoch": 0.93, "learning_rate": 1.7472663409838225e-05, "loss": 0.5695, "step": 1382 }, { "epoch": 0.93, "learning_rate": 1.746914184570659e-05, "loss": 0.5263, "step": 1383 }, { "epoch": 0.93, "learning_rate": 1.7465618185238167e-05, "loss": 0.561, "step": 1384 }, { "epoch": 0.93, "learning_rate": 1.746209242942193e-05, "loss": 0.5063, "step": 1385 }, { "epoch": 0.93, "learning_rate": 1.745856457924744e-05, "loss": 0.5745, "step": 1386 }, { "epoch": 0.93, "learning_rate": 1.7455034635704845e-05, "loss": 0.5506, "step": 1387 }, { "epoch": 0.93, "learning_rate": 1.745150259978488e-05, "loss": 0.6118, "step": 1388 }, { "epoch": 0.94, "learning_rate": 1.744796847247887e-05, "loss": 0.5525, "step": 1389 }, { "epoch": 0.94, "learning_rate": 1.7444432254778725e-05, "loss": 0.5814, "step": 1390 }, { "epoch": 0.94, "learning_rate": 1.7440893947676943e-05, "loss": 0.5578, "step": 1391 }, { "epoch": 0.94, "learning_rate": 1.743735355216661e-05, "loss": 0.5782, "step": 1392 }, { "epoch": 0.94, "learning_rate": 1.7433811069241392e-05, "loss": 0.5566, "step": 1393 }, { "epoch": 0.94, "learning_rate": 1.7430266499895547e-05, "loss": 0.5485, "step": 1394 }, { "epoch": 0.94, "learning_rate": 1.7426719845123914e-05, "loss": 0.5494, "step": 1395 }, { "epoch": 0.94, "learning_rate": 1.7423171105921924e-05, "loss": 0.5581, "step": 1396 }, { "epoch": 0.94, "learning_rate": 1.7419620283285585e-05, "loss": 0.5435, "step": 1397 }, { "epoch": 0.94, "learning_rate": 1.741606737821149e-05, "loss": 0.5617, "step": 1398 }, { "epoch": 0.94, "learning_rate": 1.741251239169683e-05, "loss": 0.5174, "step": 1399 }, { "epoch": 0.94, "learning_rate": 1.7408955324739363e-05, "loss": 0.5197, "step": 1400 }, { "epoch": 0.94, "learning_rate": 1.740539617833744e-05, "loss": 0.5342, "step": 1401 }, { "epoch": 0.94, "learning_rate": 1.7401834953489993e-05, "loss": 0.5741, "step": 1402 }, { "epoch": 0.94, "learning_rate": 1.7398271651196538e-05, "loss": 0.5477, "step": 1403 }, { "epoch": 0.95, "learning_rate": 1.739470627245717e-05, "loss": 0.5801, "step": 1404 }, { "epoch": 0.95, "learning_rate": 1.7391138818272578e-05, "loss": 0.5585, "step": 1405 }, { "epoch": 0.95, "learning_rate": 1.7387569289644025e-05, "loss": 0.5288, "step": 1406 }, { "epoch": 0.95, "learning_rate": 1.7383997687573354e-05, "loss": 0.5547, "step": 1407 }, { "epoch": 0.95, "learning_rate": 1.7380424013062996e-05, "loss": 0.5832, "step": 1408 }, { "epoch": 0.95, "learning_rate": 1.737684826711596e-05, "loss": 0.5243, "step": 1409 }, { "epoch": 0.95, "learning_rate": 1.737327045073584e-05, "loss": 0.545, "step": 1410 }, { "epoch": 0.95, "learning_rate": 1.7369690564926808e-05, "loss": 0.5605, "step": 1411 }, { "epoch": 0.95, "learning_rate": 1.736610861069361e-05, "loss": 0.567, "step": 1412 }, { "epoch": 0.95, "learning_rate": 1.7362524589041593e-05, "loss": 0.5514, "step": 1413 }, { "epoch": 0.95, "learning_rate": 1.7358938500976662e-05, "loss": 0.5287, "step": 1414 }, { "epoch": 0.95, "learning_rate": 1.7355350347505312e-05, "loss": 0.5304, "step": 1415 }, { "epoch": 0.95, "learning_rate": 1.735176012963462e-05, "loss": 0.4766, "step": 1416 }, { "epoch": 0.95, "learning_rate": 1.734816784837224e-05, "loss": 0.5207, "step": 1417 }, { "epoch": 0.95, "learning_rate": 1.7344573504726397e-05, "loss": 0.5648, "step": 1418 }, { "epoch": 0.96, "learning_rate": 1.7340977099705914e-05, "loss": 0.5315, "step": 1419 }, { "epoch": 0.96, "learning_rate": 1.7337378634320173e-05, "loss": 0.5466, "step": 1420 }, { "epoch": 0.96, "learning_rate": 1.7333778109579144e-05, "loss": 0.5923, "step": 1421 }, { "epoch": 0.96, "learning_rate": 1.7330175526493373e-05, "loss": 0.5792, "step": 1422 }, { "epoch": 0.96, "learning_rate": 1.7326570886073986e-05, "loss": 0.5649, "step": 1423 }, { "epoch": 0.96, "learning_rate": 1.7322964189332682e-05, "loss": 0.5476, "step": 1424 }, { "epoch": 0.96, "learning_rate": 1.7319355437281737e-05, "loss": 0.5452, "step": 1425 }, { "epoch": 0.96, "learning_rate": 1.7315744630934007e-05, "loss": 0.5462, "step": 1426 }, { "epoch": 0.96, "learning_rate": 1.731213177130293e-05, "loss": 0.5276, "step": 1427 }, { "epoch": 0.96, "learning_rate": 1.7308516859402507e-05, "loss": 0.5519, "step": 1428 }, { "epoch": 0.96, "learning_rate": 1.7304899896247328e-05, "loss": 0.5934, "step": 1429 }, { "epoch": 0.96, "learning_rate": 1.730128088285255e-05, "loss": 0.5544, "step": 1430 }, { "epoch": 0.96, "learning_rate": 1.7297659820233906e-05, "loss": 0.5212, "step": 1431 }, { "epoch": 0.96, "learning_rate": 1.7294036709407707e-05, "loss": 0.533, "step": 1432 }, { "epoch": 0.96, "learning_rate": 1.7290411551390845e-05, "loss": 0.5498, "step": 1433 }, { "epoch": 0.97, "learning_rate": 1.7286784347200768e-05, "loss": 0.5629, "step": 1434 }, { "epoch": 0.97, "learning_rate": 1.7283155097855525e-05, "loss": 0.5244, "step": 1435 }, { "epoch": 0.97, "learning_rate": 1.727952380437371e-05, "loss": 0.5323, "step": 1436 }, { "epoch": 0.97, "learning_rate": 1.7275890467774515e-05, "loss": 0.5459, "step": 1437 }, { "epoch": 0.97, "learning_rate": 1.727225508907769e-05, "loss": 0.5407, "step": 1438 }, { "epoch": 0.97, "learning_rate": 1.7268617669303564e-05, "loss": 0.535, "step": 1439 }, { "epoch": 0.97, "learning_rate": 1.7264978209473035e-05, "loss": 0.5439, "step": 1440 }, { "epoch": 0.97, "learning_rate": 1.7261336710607587e-05, "loss": 0.5545, "step": 1441 }, { "epoch": 0.97, "learning_rate": 1.7257693173729256e-05, "loss": 0.5662, "step": 1442 }, { "epoch": 0.97, "learning_rate": 1.725404759986067e-05, "loss": 0.5379, "step": 1443 }, { "epoch": 0.97, "learning_rate": 1.7250399990025005e-05, "loss": 0.5472, "step": 1444 }, { "epoch": 0.97, "learning_rate": 1.724675034524604e-05, "loss": 0.5184, "step": 1445 }, { "epoch": 0.97, "learning_rate": 1.7243098666548094e-05, "loss": 0.517, "step": 1446 }, { "epoch": 0.97, "learning_rate": 1.723944495495607e-05, "loss": 0.5782, "step": 1447 }, { "epoch": 0.98, "learning_rate": 1.7235789211495455e-05, "loss": 0.5171, "step": 1448 }, { "epoch": 0.98, "learning_rate": 1.723213143719228e-05, "loss": 0.5429, "step": 1449 }, { "epoch": 0.98, "learning_rate": 1.7228471633073164e-05, "loss": 0.4996, "step": 1450 }, { "epoch": 0.98, "learning_rate": 1.722480980016529e-05, "loss": 0.523, "step": 1451 }, { "epoch": 0.98, "learning_rate": 1.7221145939496416e-05, "loss": 0.604, "step": 1452 }, { "epoch": 0.98, "learning_rate": 1.7217480052094855e-05, "loss": 0.5223, "step": 1453 }, { "epoch": 0.98, "learning_rate": 1.7213812138989505e-05, "loss": 0.5559, "step": 1454 }, { "epoch": 0.98, "learning_rate": 1.7210142201209825e-05, "loss": 0.5616, "step": 1455 }, { "epoch": 0.98, "learning_rate": 1.7206470239785842e-05, "loss": 0.5441, "step": 1456 }, { "epoch": 0.98, "learning_rate": 1.7202796255748154e-05, "loss": 0.5509, "step": 1457 }, { "epoch": 0.98, "learning_rate": 1.7199120250127922e-05, "loss": 0.5611, "step": 1458 }, { "epoch": 0.98, "learning_rate": 1.7195442223956876e-05, "loss": 0.5476, "step": 1459 }, { "epoch": 0.98, "learning_rate": 1.719176217826732e-05, "loss": 0.565, "step": 1460 }, { "epoch": 0.98, "learning_rate": 1.718808011409211e-05, "loss": 0.5307, "step": 1461 }, { "epoch": 0.98, "learning_rate": 1.718439603246469e-05, "loss": 0.5496, "step": 1462 }, { "epoch": 0.99, "learning_rate": 1.7180709934419047e-05, "loss": 0.559, "step": 1463 }, { "epoch": 0.99, "learning_rate": 1.7177021820989746e-05, "loss": 0.5849, "step": 1464 }, { "epoch": 0.99, "learning_rate": 1.7173331693211922e-05, "loss": 0.5817, "step": 1465 }, { "epoch": 0.99, "learning_rate": 1.7169639552121268e-05, "loss": 0.5533, "step": 1466 }, { "epoch": 0.99, "learning_rate": 1.716594539875404e-05, "loss": 0.5608, "step": 1467 }, { "epoch": 0.99, "learning_rate": 1.7162249234147063e-05, "loss": 0.5828, "step": 1468 }, { "epoch": 0.99, "learning_rate": 1.715855105933773e-05, "loss": 0.546, "step": 1469 }, { "epoch": 0.99, "learning_rate": 1.7154850875363987e-05, "loss": 0.5544, "step": 1470 }, { "epoch": 0.99, "learning_rate": 1.7151148683264362e-05, "loss": 0.598, "step": 1471 }, { "epoch": 0.99, "learning_rate": 1.7147444484077928e-05, "loss": 0.5401, "step": 1472 }, { "epoch": 0.99, "learning_rate": 1.714373827884433e-05, "loss": 0.5325, "step": 1473 }, { "epoch": 0.99, "learning_rate": 1.7140030068603777e-05, "loss": 0.5642, "step": 1474 }, { "epoch": 0.99, "learning_rate": 1.7136319854397037e-05, "loss": 0.5241, "step": 1475 }, { "epoch": 0.99, "learning_rate": 1.7132607637265443e-05, "loss": 0.5673, "step": 1476 }, { "epoch": 0.99, "learning_rate": 1.7128893418250892e-05, "loss": 0.5445, "step": 1477 }, { "epoch": 1.0, "learning_rate": 1.7125177198395835e-05, "loss": 0.5406, "step": 1478 }, { "epoch": 1.0, "learning_rate": 1.712145897874329e-05, "loss": 0.5535, "step": 1479 }, { "epoch": 1.0, "learning_rate": 1.7117738760336846e-05, "loss": 0.5634, "step": 1480 }, { "epoch": 1.0, "learning_rate": 1.7114016544220633e-05, "loss": 0.5694, "step": 1481 }, { "epoch": 1.0, "learning_rate": 1.7110292331439353e-05, "loss": 0.5582, "step": 1482 }, { "epoch": 1.0, "learning_rate": 1.710656612303827e-05, "loss": 0.5437, "step": 1483 }, { "epoch": 1.0, "learning_rate": 1.7102837920063206e-05, "loss": 0.5281, "step": 1484 }, { "epoch": 1.0, "learning_rate": 1.7099107723560537e-05, "loss": 0.5499, "step": 1485 }, { "epoch": 1.0, "learning_rate": 1.7095375534577208e-05, "loss": 0.5285, "step": 1486 }, { "epoch": 1.0, "learning_rate": 1.709164135416072e-05, "loss": 0.5201, "step": 1487 }, { "epoch": 1.0, "learning_rate": 1.708790518335913e-05, "loss": 0.5495, "step": 1488 }, { "epoch": 1.0, "learning_rate": 1.7084167023221054e-05, "loss": 0.5412, "step": 1489 }, { "epoch": 1.0, "learning_rate": 1.7080426874795666e-05, "loss": 0.5768, "step": 1490 }, { "epoch": 1.0, "learning_rate": 1.7076684739132705e-05, "loss": 0.507, "step": 1491 }, { "epoch": 1.0, "learning_rate": 1.7072940617282463e-05, "loss": 0.565, "step": 1492 }, { "epoch": 1.01, "learning_rate": 1.7069194510295785e-05, "loss": 0.5262, "step": 1493 }, { "epoch": 1.01, "learning_rate": 1.7065446419224073e-05, "loss": 0.5473, "step": 1494 }, { "epoch": 1.01, "learning_rate": 1.7061696345119304e-05, "loss": 0.5041, "step": 1495 }, { "epoch": 1.01, "learning_rate": 1.7057944289033983e-05, "loss": 0.5967, "step": 1496 }, { "epoch": 1.01, "learning_rate": 1.7054190252021195e-05, "loss": 0.5512, "step": 1497 }, { "epoch": 1.01, "learning_rate": 1.705043423513457e-05, "loss": 0.5361, "step": 1498 }, { "epoch": 1.01, "learning_rate": 1.7046676239428287e-05, "loss": 0.5605, "step": 1499 }, { "epoch": 1.01, "learning_rate": 1.7042916265957107e-05, "loss": 0.5367, "step": 1500 }, { "epoch": 1.01, "learning_rate": 1.703915431577631e-05, "loss": 0.5324, "step": 1501 }, { "epoch": 1.01, "learning_rate": 1.7035390389941757e-05, "loss": 0.5222, "step": 1502 }, { "epoch": 1.01, "learning_rate": 1.7031624489509858e-05, "loss": 0.5858, "step": 1503 }, { "epoch": 1.01, "learning_rate": 1.702785661553757e-05, "loss": 0.5545, "step": 1504 }, { "epoch": 1.01, "learning_rate": 1.702408676908241e-05, "loss": 0.5632, "step": 1505 }, { "epoch": 1.01, "learning_rate": 1.7020314951202443e-05, "loss": 0.5266, "step": 1506 }, { "epoch": 1.01, "learning_rate": 1.7016541162956296e-05, "loss": 0.5519, "step": 1507 }, { "epoch": 1.02, "learning_rate": 1.701276540540315e-05, "loss": 0.5277, "step": 1508 }, { "epoch": 1.02, "learning_rate": 1.700898767960272e-05, "loss": 0.5308, "step": 1509 }, { "epoch": 1.02, "learning_rate": 1.7005207986615293e-05, "loss": 0.5625, "step": 1510 }, { "epoch": 1.02, "learning_rate": 1.7001426327501702e-05, "loss": 0.5516, "step": 1511 }, { "epoch": 1.0, "learning_rate": 1.699764270332333e-05, "loss": 0.5446, "step": 1512 }, { "epoch": 1.0, "learning_rate": 1.6993857115142117e-05, "loss": 0.4884, "step": 1513 }, { "epoch": 1.0, "learning_rate": 1.6990069564020548e-05, "loss": 0.494, "step": 1514 }, { "epoch": 1.0, "learning_rate": 1.698628005102166e-05, "loss": 0.502, "step": 1515 }, { "epoch": 1.0, "learning_rate": 1.698248857720904e-05, "loss": 0.4559, "step": 1516 }, { "epoch": 1.0, "learning_rate": 1.6978695143646827e-05, "loss": 0.5284, "step": 1517 }, { "epoch": 1.0, "learning_rate": 1.6974899751399722e-05, "loss": 0.5137, "step": 1518 }, { "epoch": 1.0, "learning_rate": 1.6971102401532946e-05, "loss": 0.4768, "step": 1519 }, { "epoch": 1.01, "learning_rate": 1.6967303095112297e-05, "loss": 0.5221, "step": 1520 }, { "epoch": 1.01, "learning_rate": 1.6963501833204112e-05, "loss": 0.4772, "step": 1521 }, { "epoch": 1.01, "learning_rate": 1.695969861687528e-05, "loss": 0.4521, "step": 1522 }, { "epoch": 1.01, "learning_rate": 1.6955893447193225e-05, "loss": 0.4778, "step": 1523 }, { "epoch": 1.01, "learning_rate": 1.6952086325225945e-05, "loss": 0.5071, "step": 1524 }, { "epoch": 1.01, "learning_rate": 1.6948277252041957e-05, "loss": 0.4821, "step": 1525 }, { "epoch": 1.01, "learning_rate": 1.694446622871035e-05, "loss": 0.5019, "step": 1526 }, { "epoch": 1.01, "learning_rate": 1.6940653256300745e-05, "loss": 0.4926, "step": 1527 }, { "epoch": 1.01, "learning_rate": 1.6936838335883316e-05, "loss": 0.5114, "step": 1528 }, { "epoch": 1.01, "learning_rate": 1.6933021468528786e-05, "loss": 0.5159, "step": 1529 }, { "epoch": 1.01, "learning_rate": 1.6929202655308414e-05, "loss": 0.5111, "step": 1530 }, { "epoch": 1.01, "learning_rate": 1.692538189729402e-05, "loss": 0.5076, "step": 1531 }, { "epoch": 1.01, "learning_rate": 1.6921559195557958e-05, "loss": 0.479, "step": 1532 }, { "epoch": 1.01, "learning_rate": 1.691773455117313e-05, "loss": 0.5125, "step": 1533 }, { "epoch": 1.01, "learning_rate": 1.691390796521299e-05, "loss": 0.4801, "step": 1534 }, { "epoch": 1.02, "learning_rate": 1.691007943875153e-05, "loss": 0.4866, "step": 1535 }, { "epoch": 1.02, "learning_rate": 1.6906248972863285e-05, "loss": 0.4519, "step": 1536 }, { "epoch": 1.02, "learning_rate": 1.6902416568623344e-05, "loss": 0.4643, "step": 1537 }, { "epoch": 1.02, "learning_rate": 1.6898582227107326e-05, "loss": 0.5046, "step": 1538 }, { "epoch": 1.02, "learning_rate": 1.689474594939141e-05, "loss": 0.4605, "step": 1539 }, { "epoch": 1.02, "learning_rate": 1.689090773655231e-05, "loss": 0.5053, "step": 1540 }, { "epoch": 1.02, "learning_rate": 1.6887067589667278e-05, "loss": 0.5014, "step": 1541 }, { "epoch": 1.02, "learning_rate": 1.688322550981411e-05, "loss": 0.4573, "step": 1542 }, { "epoch": 1.02, "learning_rate": 1.6879381498071163e-05, "loss": 0.4963, "step": 1543 }, { "epoch": 1.02, "learning_rate": 1.6875535555517302e-05, "loss": 0.4797, "step": 1544 }, { "epoch": 1.02, "learning_rate": 1.6871687683231975e-05, "loss": 0.4845, "step": 1545 }, { "epoch": 1.02, "learning_rate": 1.6867837882295135e-05, "loss": 0.4384, "step": 1546 }, { "epoch": 1.02, "learning_rate": 1.6863986153787297e-05, "loss": 0.4952, "step": 1547 }, { "epoch": 1.02, "learning_rate": 1.686013249878951e-05, "loss": 0.475, "step": 1548 }, { "epoch": 1.03, "learning_rate": 1.6856276918383368e-05, "loss": 0.4936, "step": 1549 }, { "epoch": 1.03, "learning_rate": 1.6852419413651003e-05, "loss": 0.4954, "step": 1550 }, { "epoch": 1.03, "learning_rate": 1.684855998567508e-05, "loss": 0.4921, "step": 1551 }, { "epoch": 1.03, "learning_rate": 1.684469863553882e-05, "loss": 0.5014, "step": 1552 }, { "epoch": 1.03, "learning_rate": 1.684083536432597e-05, "loss": 0.4892, "step": 1553 }, { "epoch": 1.03, "learning_rate": 1.6836970173120818e-05, "loss": 0.4969, "step": 1554 }, { "epoch": 1.03, "learning_rate": 1.6833103063008194e-05, "loss": 0.4983, "step": 1555 }, { "epoch": 1.03, "learning_rate": 1.6829234035073464e-05, "loss": 0.5034, "step": 1556 }, { "epoch": 1.03, "learning_rate": 1.682536309040254e-05, "loss": 0.4834, "step": 1557 }, { "epoch": 1.03, "learning_rate": 1.6821490230081864e-05, "loss": 0.464, "step": 1558 }, { "epoch": 1.03, "learning_rate": 1.6817615455198414e-05, "loss": 0.4887, "step": 1559 }, { "epoch": 1.03, "learning_rate": 1.681373876683971e-05, "loss": 0.4895, "step": 1560 }, { "epoch": 1.03, "learning_rate": 1.680986016609381e-05, "loss": 0.4953, "step": 1561 }, { "epoch": 1.03, "learning_rate": 1.6805979654049305e-05, "loss": 0.4882, "step": 1562 }, { "epoch": 1.03, "learning_rate": 1.6802097231795324e-05, "loss": 0.5271, "step": 1563 }, { "epoch": 1.04, "learning_rate": 1.6798212900421536e-05, "loss": 0.4953, "step": 1564 }, { "epoch": 1.04, "learning_rate": 1.6794326661018136e-05, "loss": 0.4628, "step": 1565 }, { "epoch": 1.04, "learning_rate": 1.6790438514675865e-05, "loss": 0.4971, "step": 1566 }, { "epoch": 1.04, "learning_rate": 1.6786548462485994e-05, "loss": 0.4879, "step": 1567 }, { "epoch": 1.04, "learning_rate": 1.678265650554033e-05, "loss": 0.4589, "step": 1568 }, { "epoch": 1.04, "learning_rate": 1.6778762644931213e-05, "loss": 0.4851, "step": 1569 }, { "epoch": 1.04, "learning_rate": 1.6774866881751518e-05, "loss": 0.4966, "step": 1570 }, { "epoch": 1.04, "learning_rate": 1.6770969217094657e-05, "loss": 0.5129, "step": 1571 }, { "epoch": 1.04, "learning_rate": 1.676706965205457e-05, "loss": 0.5025, "step": 1572 }, { "epoch": 1.04, "learning_rate": 1.6763168187725742e-05, "loss": 0.5246, "step": 1573 }, { "epoch": 1.04, "learning_rate": 1.6759264825203172e-05, "loss": 0.4962, "step": 1574 }, { "epoch": 1.04, "learning_rate": 1.6755359565582408e-05, "loss": 0.4804, "step": 1575 }, { "epoch": 1.04, "learning_rate": 1.6751452409959527e-05, "loss": 0.4765, "step": 1576 }, { "epoch": 1.04, "learning_rate": 1.674754335943113e-05, "loss": 0.5119, "step": 1577 }, { "epoch": 1.04, "learning_rate": 1.674363241509436e-05, "loss": 0.5444, "step": 1578 }, { "epoch": 1.05, "learning_rate": 1.6739719578046888e-05, "loss": 0.4934, "step": 1579 }, { "epoch": 1.05, "learning_rate": 1.6735804849386914e-05, "loss": 0.5107, "step": 1580 }, { "epoch": 1.05, "learning_rate": 1.6731888230213172e-05, "loss": 0.5445, "step": 1581 }, { "epoch": 1.05, "learning_rate": 1.6727969721624923e-05, "loss": 0.5188, "step": 1582 }, { "epoch": 1.05, "learning_rate": 1.672404932472196e-05, "loss": 0.5017, "step": 1583 }, { "epoch": 1.05, "learning_rate": 1.672012704060461e-05, "loss": 0.5127, "step": 1584 }, { "epoch": 1.05, "learning_rate": 1.6716202870373726e-05, "loss": 0.4733, "step": 1585 }, { "epoch": 1.05, "learning_rate": 1.6712276815130688e-05, "loss": 0.5149, "step": 1586 }, { "epoch": 1.05, "learning_rate": 1.670834887597741e-05, "loss": 0.4659, "step": 1587 }, { "epoch": 1.05, "learning_rate": 1.670441905401633e-05, "loss": 0.4702, "step": 1588 }, { "epoch": 1.05, "learning_rate": 1.6700487350350416e-05, "loss": 0.4714, "step": 1589 }, { "epoch": 1.05, "learning_rate": 1.6696553766083167e-05, "loss": 0.48, "step": 1590 }, { "epoch": 1.05, "learning_rate": 1.669261830231861e-05, "loss": 0.4869, "step": 1591 }, { "epoch": 1.05, "learning_rate": 1.6688680960161292e-05, "loss": 0.4932, "step": 1592 }, { "epoch": 1.05, "learning_rate": 1.66847417407163e-05, "loss": 0.5002, "step": 1593 }, { "epoch": 1.06, "learning_rate": 1.668080064508923e-05, "loss": 0.4699, "step": 1594 }, { "epoch": 1.06, "learning_rate": 1.667685767438622e-05, "loss": 0.4734, "step": 1595 }, { "epoch": 1.06, "learning_rate": 1.6672912829713936e-05, "loss": 0.506, "step": 1596 }, { "epoch": 1.06, "learning_rate": 1.666896611217955e-05, "loss": 0.5154, "step": 1597 }, { "epoch": 1.06, "learning_rate": 1.6665017522890786e-05, "loss": 0.4628, "step": 1598 }, { "epoch": 1.06, "learning_rate": 1.6661067062955868e-05, "loss": 0.4881, "step": 1599 }, { "epoch": 1.06, "learning_rate": 1.6657114733483564e-05, "loss": 0.5222, "step": 1600 }, { "epoch": 1.06, "learning_rate": 1.665316053558316e-05, "loss": 0.4997, "step": 1601 }, { "epoch": 1.06, "learning_rate": 1.6649204470364467e-05, "loss": 0.4897, "step": 1602 }, { "epoch": 1.06, "learning_rate": 1.6645246538937815e-05, "loss": 0.4963, "step": 1603 }, { "epoch": 1.06, "learning_rate": 1.6641286742414066e-05, "loss": 0.4787, "step": 1604 }, { "epoch": 1.06, "learning_rate": 1.6637325081904595e-05, "loss": 0.4995, "step": 1605 }, { "epoch": 1.06, "learning_rate": 1.663336155852132e-05, "loss": 0.5118, "step": 1606 }, { "epoch": 1.06, "learning_rate": 1.6629396173376656e-05, "loss": 0.5022, "step": 1607 }, { "epoch": 1.06, "learning_rate": 1.662542892758356e-05, "loss": 0.5062, "step": 1608 }, { "epoch": 1.07, "learning_rate": 1.66214598222555e-05, "loss": 0.5097, "step": 1609 }, { "epoch": 1.07, "learning_rate": 1.6617488858506478e-05, "loss": 0.4886, "step": 1610 }, { "epoch": 1.07, "learning_rate": 1.6613516037451e-05, "loss": 0.4908, "step": 1611 }, { "epoch": 1.07, "learning_rate": 1.6609541360204108e-05, "loss": 0.4705, "step": 1612 }, { "epoch": 1.07, "learning_rate": 1.660556482788136e-05, "loss": 0.5641, "step": 1613 }, { "epoch": 1.07, "learning_rate": 1.6601586441598834e-05, "loss": 0.501, "step": 1614 }, { "epoch": 1.07, "learning_rate": 1.659760620247313e-05, "loss": 0.4935, "step": 1615 }, { "epoch": 1.07, "learning_rate": 1.659362411162137e-05, "loss": 0.5196, "step": 1616 }, { "epoch": 1.07, "learning_rate": 1.6589640170161188e-05, "loss": 0.4981, "step": 1617 }, { "epoch": 1.07, "learning_rate": 1.6585654379210745e-05, "loss": 0.5202, "step": 1618 }, { "epoch": 1.07, "learning_rate": 1.6581666739888715e-05, "loss": 0.4837, "step": 1619 }, { "epoch": 1.07, "learning_rate": 1.65776772533143e-05, "loss": 0.4857, "step": 1620 }, { "epoch": 1.07, "learning_rate": 1.657368592060721e-05, "loss": 0.4868, "step": 1621 }, { "epoch": 1.07, "learning_rate": 1.656969274288768e-05, "loss": 0.5207, "step": 1622 }, { "epoch": 1.07, "learning_rate": 1.656569772127646e-05, "loss": 0.5117, "step": 1623 }, { "epoch": 1.08, "learning_rate": 1.6561700856894817e-05, "loss": 0.5011, "step": 1624 }, { "epoch": 1.08, "learning_rate": 1.6557702150864538e-05, "loss": 0.4876, "step": 1625 }, { "epoch": 1.08, "learning_rate": 1.6553701604307924e-05, "loss": 0.5139, "step": 1626 }, { "epoch": 1.08, "learning_rate": 1.6549699218347796e-05, "loss": 0.4967, "step": 1627 }, { "epoch": 1.08, "learning_rate": 1.654569499410749e-05, "loss": 0.4945, "step": 1628 }, { "epoch": 1.08, "learning_rate": 1.654168893271085e-05, "loss": 0.498, "step": 1629 }, { "epoch": 1.08, "learning_rate": 1.6537681035282247e-05, "loss": 0.4923, "step": 1630 }, { "epoch": 1.08, "learning_rate": 1.6533671302946566e-05, "loss": 0.5004, "step": 1631 }, { "epoch": 1.08, "learning_rate": 1.6529659736829197e-05, "loss": 0.5208, "step": 1632 }, { "epoch": 1.08, "learning_rate": 1.652564633805606e-05, "loss": 0.4744, "step": 1633 }, { "epoch": 1.08, "learning_rate": 1.6521631107753575e-05, "loss": 0.5152, "step": 1634 }, { "epoch": 1.08, "learning_rate": 1.6517614047048683e-05, "loss": 0.5014, "step": 1635 }, { "epoch": 1.08, "learning_rate": 1.6513595157068837e-05, "loss": 0.4904, "step": 1636 }, { "epoch": 1.08, "learning_rate": 1.650957443894201e-05, "loss": 0.4434, "step": 1637 }, { "epoch": 1.09, "learning_rate": 1.6505551893796673e-05, "loss": 0.481, "step": 1638 }, { "epoch": 1.09, "learning_rate": 1.6501527522761828e-05, "loss": 0.497, "step": 1639 }, { "epoch": 1.09, "learning_rate": 1.6497501326966974e-05, "loss": 0.5048, "step": 1640 }, { "epoch": 1.09, "learning_rate": 1.6493473307542132e-05, "loss": 0.509, "step": 1641 }, { "epoch": 1.09, "learning_rate": 1.6489443465617832e-05, "loss": 0.4771, "step": 1642 }, { "epoch": 1.09, "learning_rate": 1.648541180232511e-05, "loss": 0.5047, "step": 1643 }, { "epoch": 1.09, "learning_rate": 1.6481378318795528e-05, "loss": 0.4742, "step": 1644 }, { "epoch": 1.09, "learning_rate": 1.6477343016161138e-05, "loss": 0.4916, "step": 1645 }, { "epoch": 1.09, "learning_rate": 1.6473305895554522e-05, "loss": 0.4914, "step": 1646 }, { "epoch": 1.09, "learning_rate": 1.6469266958108757e-05, "loss": 0.4415, "step": 1647 }, { "epoch": 1.09, "learning_rate": 1.6465226204957444e-05, "loss": 0.4514, "step": 1648 }, { "epoch": 1.09, "learning_rate": 1.646118363723468e-05, "loss": 0.5003, "step": 1649 }, { "epoch": 1.09, "learning_rate": 1.6457139256075084e-05, "loss": 0.4971, "step": 1650 }, { "epoch": 1.09, "learning_rate": 1.6453093062613774e-05, "loss": 0.4783, "step": 1651 }, { "epoch": 1.09, "learning_rate": 1.6449045057986376e-05, "loss": 0.4858, "step": 1652 }, { "epoch": 1.1, "learning_rate": 1.644499524332904e-05, "loss": 0.5081, "step": 1653 }, { "epoch": 1.1, "learning_rate": 1.6440943619778403e-05, "loss": 0.5069, "step": 1654 }, { "epoch": 1.1, "learning_rate": 1.6436890188471622e-05, "loss": 0.5034, "step": 1655 }, { "epoch": 1.1, "learning_rate": 1.643283495054636e-05, "loss": 0.4961, "step": 1656 }, { "epoch": 1.1, "learning_rate": 1.642877790714078e-05, "loss": 0.4751, "step": 1657 }, { "epoch": 1.1, "learning_rate": 1.642471905939357e-05, "loss": 0.4604, "step": 1658 }, { "epoch": 1.1, "learning_rate": 1.6420658408443904e-05, "loss": 0.4928, "step": 1659 }, { "epoch": 1.1, "learning_rate": 1.6416595955431468e-05, "loss": 0.4956, "step": 1660 }, { "epoch": 1.1, "learning_rate": 1.641253170149646e-05, "loss": 0.5046, "step": 1661 }, { "epoch": 1.1, "learning_rate": 1.6408465647779578e-05, "loss": 0.4758, "step": 1662 }, { "epoch": 1.1, "learning_rate": 1.6404397795422024e-05, "loss": 0.4873, "step": 1663 }, { "epoch": 1.1, "learning_rate": 1.640032814556551e-05, "loss": 0.4867, "step": 1664 }, { "epoch": 1.1, "learning_rate": 1.6396256699352252e-05, "loss": 0.4854, "step": 1665 }, { "epoch": 1.1, "learning_rate": 1.6392183457924967e-05, "loss": 0.4952, "step": 1666 }, { "epoch": 1.1, "learning_rate": 1.6388108422426873e-05, "loss": 0.4483, "step": 1667 }, { "epoch": 1.11, "learning_rate": 1.6384031594001698e-05, "loss": 0.4868, "step": 1668 }, { "epoch": 1.11, "learning_rate": 1.637995297379367e-05, "loss": 0.5239, "step": 1669 }, { "epoch": 1.11, "learning_rate": 1.6375872562947516e-05, "loss": 0.5117, "step": 1670 }, { "epoch": 1.11, "learning_rate": 1.637179036260848e-05, "loss": 0.5197, "step": 1671 }, { "epoch": 1.11, "learning_rate": 1.636770637392229e-05, "loss": 0.4781, "step": 1672 }, { "epoch": 1.11, "learning_rate": 1.636362059803519e-05, "loss": 0.4938, "step": 1673 }, { "epoch": 1.11, "learning_rate": 1.6359533036093915e-05, "loss": 0.5139, "step": 1674 }, { "epoch": 1.11, "learning_rate": 1.635544368924571e-05, "loss": 0.5368, "step": 1675 }, { "epoch": 1.11, "learning_rate": 1.6351352558638313e-05, "loss": 0.5025, "step": 1676 }, { "epoch": 1.11, "learning_rate": 1.6347259645419966e-05, "loss": 0.4668, "step": 1677 }, { "epoch": 1.11, "learning_rate": 1.6343164950739417e-05, "loss": 0.4899, "step": 1678 }, { "epoch": 1.11, "learning_rate": 1.633906847574591e-05, "loss": 0.5279, "step": 1679 }, { "epoch": 1.11, "learning_rate": 1.6334970221589182e-05, "loss": 0.4785, "step": 1680 }, { "epoch": 1.11, "learning_rate": 1.6330870189419475e-05, "loss": 0.5061, "step": 1681 }, { "epoch": 1.11, "learning_rate": 1.6326768380387538e-05, "loss": 0.4853, "step": 1682 }, { "epoch": 1.12, "learning_rate": 1.6322664795644604e-05, "loss": 0.495, "step": 1683 }, { "epoch": 1.12, "learning_rate": 1.631855943634241e-05, "loss": 0.4349, "step": 1684 }, { "epoch": 1.12, "learning_rate": 1.6314452303633193e-05, "loss": 0.5487, "step": 1685 }, { "epoch": 1.12, "learning_rate": 1.6310343398669693e-05, "loss": 0.4936, "step": 1686 }, { "epoch": 1.12, "learning_rate": 1.6306232722605133e-05, "loss": 0.4722, "step": 1687 }, { "epoch": 1.12, "learning_rate": 1.6302120276593248e-05, "loss": 0.5042, "step": 1688 }, { "epoch": 1.12, "learning_rate": 1.6298006061788264e-05, "loss": 0.5306, "step": 1689 }, { "epoch": 1.12, "learning_rate": 1.6293890079344892e-05, "loss": 0.5045, "step": 1690 }, { "epoch": 1.12, "learning_rate": 1.6289772330418365e-05, "loss": 0.4998, "step": 1691 }, { "epoch": 1.12, "learning_rate": 1.6285652816164384e-05, "loss": 0.5169, "step": 1692 }, { "epoch": 1.12, "learning_rate": 1.6281531537739163e-05, "loss": 0.5023, "step": 1693 }, { "epoch": 1.12, "learning_rate": 1.6277408496299406e-05, "loss": 0.4869, "step": 1694 }, { "epoch": 1.12, "learning_rate": 1.6273283693002312e-05, "loss": 0.5068, "step": 1695 }, { "epoch": 1.12, "learning_rate": 1.6269157129005573e-05, "loss": 0.4735, "step": 1696 }, { "epoch": 1.12, "learning_rate": 1.626502880546738e-05, "loss": 0.48, "step": 1697 }, { "epoch": 1.13, "learning_rate": 1.6260898723546416e-05, "loss": 0.5318, "step": 1698 }, { "epoch": 1.13, "learning_rate": 1.625676688440185e-05, "loss": 0.5035, "step": 1699 }, { "epoch": 1.13, "learning_rate": 1.625263328919335e-05, "loss": 0.4679, "step": 1700 }, { "epoch": 1.13, "learning_rate": 1.6248497939081082e-05, "loss": 0.5333, "step": 1701 }, { "epoch": 1.13, "learning_rate": 1.6244360835225698e-05, "loss": 0.4487, "step": 1702 }, { "epoch": 1.13, "learning_rate": 1.624022197878834e-05, "loss": 0.4921, "step": 1703 }, { "epoch": 1.13, "learning_rate": 1.623608137093065e-05, "loss": 0.4767, "step": 1704 }, { "epoch": 1.13, "learning_rate": 1.6231939012814758e-05, "loss": 0.4884, "step": 1705 }, { "epoch": 1.13, "learning_rate": 1.622779490560328e-05, "loss": 0.4936, "step": 1706 }, { "epoch": 1.13, "learning_rate": 1.6223649050459337e-05, "loss": 0.5619, "step": 1707 }, { "epoch": 1.13, "learning_rate": 1.621950144854652e-05, "loss": 0.5088, "step": 1708 }, { "epoch": 1.13, "learning_rate": 1.6215352101028926e-05, "loss": 0.5222, "step": 1709 }, { "epoch": 1.13, "learning_rate": 1.6211201009071134e-05, "loss": 0.4876, "step": 1710 }, { "epoch": 1.13, "learning_rate": 1.6207048173838226e-05, "loss": 0.4759, "step": 1711 }, { "epoch": 1.13, "learning_rate": 1.620289359649575e-05, "loss": 0.5471, "step": 1712 }, { "epoch": 1.14, "learning_rate": 1.6198737278209763e-05, "loss": 0.4909, "step": 1713 }, { "epoch": 1.14, "learning_rate": 1.6194579220146806e-05, "loss": 0.5141, "step": 1714 }, { "epoch": 1.14, "learning_rate": 1.6190419423473897e-05, "loss": 0.5057, "step": 1715 }, { "epoch": 1.14, "learning_rate": 1.6186257889358557e-05, "loss": 0.5086, "step": 1716 }, { "epoch": 1.14, "learning_rate": 1.6182094618968793e-05, "loss": 0.4666, "step": 1717 }, { "epoch": 1.14, "learning_rate": 1.6177929613473088e-05, "loss": 0.5046, "step": 1718 }, { "epoch": 1.14, "learning_rate": 1.617376287404042e-05, "loss": 0.4704, "step": 1719 }, { "epoch": 1.14, "learning_rate": 1.6169594401840255e-05, "loss": 0.4911, "step": 1720 }, { "epoch": 1.14, "learning_rate": 1.6165424198042542e-05, "loss": 0.4914, "step": 1721 }, { "epoch": 1.14, "learning_rate": 1.6161252263817715e-05, "loss": 0.4775, "step": 1722 }, { "epoch": 1.14, "learning_rate": 1.6157078600336693e-05, "loss": 0.4704, "step": 1723 }, { "epoch": 1.14, "learning_rate": 1.6152903208770888e-05, "loss": 0.4702, "step": 1724 }, { "epoch": 1.14, "learning_rate": 1.6148726090292196e-05, "loss": 0.5048, "step": 1725 }, { "epoch": 1.14, "learning_rate": 1.6144547246072984e-05, "loss": 0.4896, "step": 1726 }, { "epoch": 1.14, "learning_rate": 1.614036667728612e-05, "loss": 0.506, "step": 1727 }, { "epoch": 1.15, "learning_rate": 1.6136184385104945e-05, "loss": 0.4839, "step": 1728 }, { "epoch": 1.15, "learning_rate": 1.6132000370703286e-05, "loss": 0.4753, "step": 1729 }, { "epoch": 1.15, "learning_rate": 1.6127814635255462e-05, "loss": 0.4943, "step": 1730 }, { "epoch": 1.15, "learning_rate": 1.6123627179936262e-05, "loss": 0.5303, "step": 1731 }, { "epoch": 1.15, "learning_rate": 1.6119438005920968e-05, "loss": 0.4707, "step": 1732 }, { "epoch": 1.15, "learning_rate": 1.611524711438533e-05, "loss": 0.5114, "step": 1733 }, { "epoch": 1.15, "learning_rate": 1.6111054506505607e-05, "loss": 0.4873, "step": 1734 }, { "epoch": 1.15, "learning_rate": 1.6106860183458514e-05, "loss": 0.4573, "step": 1735 }, { "epoch": 1.15, "learning_rate": 1.6102664146421255e-05, "loss": 0.4946, "step": 1736 }, { "epoch": 1.15, "learning_rate": 1.6098466396571514e-05, "loss": 0.5046, "step": 1737 }, { "epoch": 1.15, "learning_rate": 1.6094266935087467e-05, "loss": 0.5017, "step": 1738 }, { "epoch": 1.15, "learning_rate": 1.6090065763147755e-05, "loss": 0.5371, "step": 1739 }, { "epoch": 1.15, "learning_rate": 1.608586288193151e-05, "loss": 0.4745, "step": 1740 }, { "epoch": 1.15, "learning_rate": 1.608165829261833e-05, "loss": 0.4906, "step": 1741 }, { "epoch": 1.16, "learning_rate": 1.6077451996388314e-05, "loss": 0.4589, "step": 1742 }, { "epoch": 1.16, "learning_rate": 1.6073243994422018e-05, "loss": 0.4641, "step": 1743 }, { "epoch": 1.16, "learning_rate": 1.6069034287900493e-05, "loss": 0.4957, "step": 1744 }, { "epoch": 1.16, "learning_rate": 1.6064822878005262e-05, "loss": 0.479, "step": 1745 }, { "epoch": 1.16, "learning_rate": 1.606060976591832e-05, "loss": 0.5074, "step": 1746 }, { "epoch": 1.16, "learning_rate": 1.605639495282215e-05, "loss": 0.4891, "step": 1747 }, { "epoch": 1.16, "learning_rate": 1.6052178439899712e-05, "loss": 0.488, "step": 1748 }, { "epoch": 1.16, "learning_rate": 1.6047960228334428e-05, "loss": 0.4882, "step": 1749 }, { "epoch": 1.16, "learning_rate": 1.6043740319310218e-05, "loss": 0.4828, "step": 1750 }, { "epoch": 1.16, "learning_rate": 1.6039518714011465e-05, "loss": 0.4835, "step": 1751 }, { "epoch": 1.16, "learning_rate": 1.6035295413623032e-05, "loss": 0.5265, "step": 1752 }, { "epoch": 1.16, "learning_rate": 1.6031070419330258e-05, "loss": 0.4545, "step": 1753 }, { "epoch": 1.16, "learning_rate": 1.6026843732318958e-05, "loss": 0.4948, "step": 1754 }, { "epoch": 1.16, "learning_rate": 1.602261535377542e-05, "loss": 0.5852, "step": 1755 }, { "epoch": 1.16, "learning_rate": 1.60183852848864e-05, "loss": 0.4922, "step": 1756 }, { "epoch": 1.17, "learning_rate": 1.601415352683915e-05, "loss": 0.5126, "step": 1757 }, { "epoch": 1.17, "learning_rate": 1.6009920080821365e-05, "loss": 0.4729, "step": 1758 }, { "epoch": 1.17, "learning_rate": 1.6005684948021248e-05, "loss": 0.5047, "step": 1759 }, { "epoch": 1.17, "learning_rate": 1.600144812962745e-05, "loss": 0.4839, "step": 1760 }, { "epoch": 1.17, "learning_rate": 1.5997209626829105e-05, "loss": 0.4733, "step": 1761 }, { "epoch": 1.17, "learning_rate": 1.5992969440815813e-05, "loss": 0.4462, "step": 1762 }, { "epoch": 1.17, "learning_rate": 1.598872757277766e-05, "loss": 0.4996, "step": 1763 }, { "epoch": 1.17, "learning_rate": 1.5984484023905186e-05, "loss": 0.4953, "step": 1764 }, { "epoch": 1.17, "learning_rate": 1.5980238795389424e-05, "loss": 0.4991, "step": 1765 }, { "epoch": 1.17, "learning_rate": 1.5975991888421857e-05, "loss": 0.4656, "step": 1766 }, { "epoch": 1.17, "learning_rate": 1.597174330419445e-05, "loss": 0.4827, "step": 1767 }, { "epoch": 1.17, "learning_rate": 1.5967493043899644e-05, "loss": 0.5212, "step": 1768 }, { "epoch": 1.17, "learning_rate": 1.5963241108730342e-05, "loss": 0.5176, "step": 1769 }, { "epoch": 1.17, "learning_rate": 1.595898749987991e-05, "loss": 0.478, "step": 1770 }, { "epoch": 1.17, "learning_rate": 1.5954732218542207e-05, "loss": 0.4943, "step": 1771 }, { "epoch": 1.18, "learning_rate": 1.5950475265911537e-05, "loss": 0.5017, "step": 1772 }, { "epoch": 1.18, "learning_rate": 1.5946216643182685e-05, "loss": 0.5064, "step": 1773 }, { "epoch": 1.18, "learning_rate": 1.5941956351550908e-05, "loss": 0.4653, "step": 1774 }, { "epoch": 1.18, "learning_rate": 1.5937694392211923e-05, "loss": 0.5459, "step": 1775 }, { "epoch": 1.18, "learning_rate": 1.593343076636192e-05, "loss": 0.5163, "step": 1776 }, { "epoch": 1.18, "learning_rate": 1.5929165475197553e-05, "loss": 0.4803, "step": 1777 }, { "epoch": 1.18, "learning_rate": 1.5924898519915947e-05, "loss": 0.4616, "step": 1778 }, { "epoch": 1.18, "learning_rate": 1.592062990171469e-05, "loss": 0.5039, "step": 1779 }, { "epoch": 1.18, "learning_rate": 1.5916359621791847e-05, "loss": 0.4811, "step": 1780 }, { "epoch": 1.18, "learning_rate": 1.5912087681345934e-05, "loss": 0.488, "step": 1781 }, { "epoch": 1.18, "learning_rate": 1.5907814081575943e-05, "loss": 0.4915, "step": 1782 }, { "epoch": 1.18, "learning_rate": 1.590353882368133e-05, "loss": 0.4763, "step": 1783 }, { "epoch": 1.18, "learning_rate": 1.589926190886202e-05, "loss": 0.5249, "step": 1784 }, { "epoch": 1.18, "learning_rate": 1.5894983338318396e-05, "loss": 0.4959, "step": 1785 }, { "epoch": 1.18, "learning_rate": 1.5890703113251305e-05, "loss": 0.4974, "step": 1786 }, { "epoch": 1.19, "learning_rate": 1.5886421234862072e-05, "loss": 0.4823, "step": 1787 }, { "epoch": 1.19, "learning_rate": 1.5882137704352466e-05, "loss": 0.5042, "step": 1788 }, { "epoch": 1.19, "learning_rate": 1.5877852522924733e-05, "loss": 0.5092, "step": 1789 }, { "epoch": 1.19, "learning_rate": 1.587356569178158e-05, "loss": 0.5036, "step": 1790 }, { "epoch": 1.19, "learning_rate": 1.586927721212618e-05, "loss": 0.4688, "step": 1791 }, { "epoch": 1.19, "learning_rate": 1.5864987085162155e-05, "loss": 0.5075, "step": 1792 }, { "epoch": 1.19, "learning_rate": 1.5860695312093608e-05, "loss": 0.4574, "step": 1793 }, { "epoch": 1.19, "learning_rate": 1.5856401894125095e-05, "loss": 0.49, "step": 1794 }, { "epoch": 1.19, "learning_rate": 1.585210683246163e-05, "loss": 0.4857, "step": 1795 }, { "epoch": 1.19, "learning_rate": 1.5847810128308695e-05, "loss": 0.5273, "step": 1796 }, { "epoch": 1.19, "learning_rate": 1.5843511782872226e-05, "loss": 0.4779, "step": 1797 }, { "epoch": 1.19, "learning_rate": 1.583921179735863e-05, "loss": 0.5096, "step": 1798 }, { "epoch": 1.19, "learning_rate": 1.5834910172974767e-05, "loss": 0.5149, "step": 1799 }, { "epoch": 1.19, "learning_rate": 1.5830606910927956e-05, "loss": 0.5018, "step": 1800 }, { "epoch": 1.19, "learning_rate": 1.5826302012425977e-05, "loss": 0.5022, "step": 1801 }, { "epoch": 1.2, "learning_rate": 1.582199547867707e-05, "loss": 0.4903, "step": 1802 }, { "epoch": 1.2, "learning_rate": 1.581768731088994e-05, "loss": 0.4917, "step": 1803 }, { "epoch": 1.2, "learning_rate": 1.581337751027374e-05, "loss": 0.4503, "step": 1804 }, { "epoch": 1.2, "learning_rate": 1.5809066078038082e-05, "loss": 0.4935, "step": 1805 }, { "epoch": 1.2, "learning_rate": 1.5804753015393045e-05, "loss": 0.4633, "step": 1806 }, { "epoch": 1.2, "learning_rate": 1.5800438323549167e-05, "loss": 0.4962, "step": 1807 }, { "epoch": 1.2, "learning_rate": 1.5796122003717424e-05, "loss": 0.4695, "step": 1808 }, { "epoch": 1.2, "learning_rate": 1.5791804057109266e-05, "loss": 0.4752, "step": 1809 }, { "epoch": 1.2, "learning_rate": 1.57874844849366e-05, "loss": 0.5203, "step": 1810 }, { "epoch": 1.2, "learning_rate": 1.578316328841178e-05, "loss": 0.5127, "step": 1811 }, { "epoch": 1.2, "learning_rate": 1.5778840468747628e-05, "loss": 0.4468, "step": 1812 }, { "epoch": 1.2, "learning_rate": 1.57745160271574e-05, "loss": 0.4872, "step": 1813 }, { "epoch": 1.2, "learning_rate": 1.5770189964854834e-05, "loss": 0.4886, "step": 1814 }, { "epoch": 1.2, "learning_rate": 1.5765862283054105e-05, "loss": 0.5162, "step": 1815 }, { "epoch": 1.2, "learning_rate": 1.576153298296985e-05, "loss": 0.4946, "step": 1816 }, { "epoch": 1.21, "learning_rate": 1.575720206581715e-05, "loss": 0.5295, "step": 1817 }, { "epoch": 1.21, "learning_rate": 1.5752869532811555e-05, "loss": 0.513, "step": 1818 }, { "epoch": 1.21, "learning_rate": 1.5748535385169062e-05, "loss": 0.5128, "step": 1819 }, { "epoch": 1.21, "learning_rate": 1.5744199624106115e-05, "loss": 0.482, "step": 1820 }, { "epoch": 1.21, "learning_rate": 1.5739862250839623e-05, "loss": 0.4437, "step": 1821 }, { "epoch": 1.21, "learning_rate": 1.5735523266586935e-05, "loss": 0.5026, "step": 1822 }, { "epoch": 1.21, "learning_rate": 1.5731182672565865e-05, "loss": 0.4651, "step": 1823 }, { "epoch": 1.21, "learning_rate": 1.5726840469994658e-05, "loss": 0.5238, "step": 1824 }, { "epoch": 1.21, "learning_rate": 1.572249666009204e-05, "loss": 0.4972, "step": 1825 }, { "epoch": 1.21, "learning_rate": 1.5718151244077162e-05, "loss": 0.4722, "step": 1826 }, { "epoch": 1.21, "learning_rate": 1.571380422316964e-05, "loss": 0.5043, "step": 1827 }, { "epoch": 1.21, "learning_rate": 1.5709455598589537e-05, "loss": 0.436, "step": 1828 }, { "epoch": 1.21, "learning_rate": 1.5705105371557362e-05, "loss": 0.4849, "step": 1829 }, { "epoch": 1.21, "learning_rate": 1.570075354329408e-05, "loss": 0.4757, "step": 1830 }, { "epoch": 1.21, "learning_rate": 1.5696400115021102e-05, "loss": 0.4852, "step": 1831 }, { "epoch": 1.22, "learning_rate": 1.5692045087960294e-05, "loss": 0.4869, "step": 1832 }, { "epoch": 1.22, "learning_rate": 1.5687688463333954e-05, "loss": 0.5459, "step": 1833 }, { "epoch": 1.22, "learning_rate": 1.568333024236485e-05, "loss": 0.4779, "step": 1834 }, { "epoch": 1.22, "learning_rate": 1.5678970426276186e-05, "loss": 0.508, "step": 1835 }, { "epoch": 1.22, "learning_rate": 1.5674609016291613e-05, "loss": 0.4839, "step": 1836 }, { "epoch": 1.22, "learning_rate": 1.5670246013635232e-05, "loss": 0.4917, "step": 1837 }, { "epoch": 1.22, "learning_rate": 1.5665881419531593e-05, "loss": 0.5003, "step": 1838 }, { "epoch": 1.22, "learning_rate": 1.566151523520569e-05, "loss": 0.4802, "step": 1839 }, { "epoch": 1.22, "learning_rate": 1.5657147461882965e-05, "loss": 0.49, "step": 1840 }, { "epoch": 1.22, "learning_rate": 1.5652778100789304e-05, "loss": 0.4841, "step": 1841 }, { "epoch": 1.22, "learning_rate": 1.564840715315104e-05, "loss": 0.4745, "step": 1842 }, { "epoch": 1.22, "learning_rate": 1.5644034620194953e-05, "loss": 0.5214, "step": 1843 }, { "epoch": 1.22, "learning_rate": 1.563966050314826e-05, "loss": 0.4715, "step": 1844 }, { "epoch": 1.22, "learning_rate": 1.5635284803238632e-05, "loss": 0.4653, "step": 1845 }, { "epoch": 1.23, "learning_rate": 1.5630907521694184e-05, "loss": 0.4968, "step": 1846 }, { "epoch": 1.23, "learning_rate": 1.5626528659743466e-05, "loss": 0.507, "step": 1847 }, { "epoch": 1.23, "learning_rate": 1.562214821861548e-05, "loss": 0.4981, "step": 1848 }, { "epoch": 1.23, "learning_rate": 1.561776619953967e-05, "loss": 0.4915, "step": 1849 }, { "epoch": 1.23, "learning_rate": 1.5613382603745918e-05, "loss": 0.5093, "step": 1850 }, { "epoch": 1.23, "learning_rate": 1.560899743246455e-05, "loss": 0.49, "step": 1851 }, { "epoch": 1.23, "learning_rate": 1.5604610686926346e-05, "loss": 0.4908, "step": 1852 }, { "epoch": 1.23, "learning_rate": 1.5600222368362506e-05, "loss": 0.5194, "step": 1853 }, { "epoch": 1.23, "learning_rate": 1.5595832478004685e-05, "loss": 0.4776, "step": 1854 }, { "epoch": 1.23, "learning_rate": 1.559144101708499e-05, "loss": 0.4756, "step": 1855 }, { "epoch": 1.23, "learning_rate": 1.5587047986835942e-05, "loss": 0.4733, "step": 1856 }, { "epoch": 1.23, "learning_rate": 1.558265338849052e-05, "loss": 0.5164, "step": 1857 }, { "epoch": 1.23, "learning_rate": 1.5578257223282146e-05, "loss": 0.5059, "step": 1858 }, { "epoch": 1.23, "learning_rate": 1.5573859492444672e-05, "loss": 0.4619, "step": 1859 }, { "epoch": 1.23, "learning_rate": 1.556946019721239e-05, "loss": 0.4917, "step": 1860 }, { "epoch": 1.24, "learning_rate": 1.556505933882004e-05, "loss": 0.5133, "step": 1861 }, { "epoch": 1.24, "learning_rate": 1.5560656918502787e-05, "loss": 0.5143, "step": 1862 }, { "epoch": 1.24, "learning_rate": 1.555625293749625e-05, "loss": 0.4908, "step": 1863 }, { "epoch": 1.24, "learning_rate": 1.5551847397036476e-05, "loss": 0.465, "step": 1864 }, { "epoch": 1.24, "learning_rate": 1.5547440298359948e-05, "loss": 0.5334, "step": 1865 }, { "epoch": 1.24, "learning_rate": 1.5543031642703594e-05, "loss": 0.495, "step": 1866 }, { "epoch": 1.24, "learning_rate": 1.553862143130478e-05, "loss": 0.4917, "step": 1867 }, { "epoch": 1.24, "learning_rate": 1.553420966540129e-05, "loss": 0.5022, "step": 1868 }, { "epoch": 1.24, "learning_rate": 1.5529796346231376e-05, "loss": 0.4532, "step": 1869 }, { "epoch": 1.24, "learning_rate": 1.5525381475033692e-05, "loss": 0.4638, "step": 1870 }, { "epoch": 1.24, "learning_rate": 1.5520965053047353e-05, "loss": 0.5026, "step": 1871 }, { "epoch": 1.24, "learning_rate": 1.55165470815119e-05, "loss": 0.4906, "step": 1872 }, { "epoch": 1.24, "learning_rate": 1.5512127561667304e-05, "loss": 0.4698, "step": 1873 }, { "epoch": 1.24, "learning_rate": 1.550770649475398e-05, "loss": 0.4757, "step": 1874 }, { "epoch": 1.24, "learning_rate": 1.550328388201277e-05, "loss": 0.4805, "step": 1875 }, { "epoch": 1.25, "learning_rate": 1.5498859724684953e-05, "loss": 0.4983, "step": 1876 }, { "epoch": 1.25, "learning_rate": 1.5494434024012247e-05, "loss": 0.5818, "step": 1877 }, { "epoch": 1.25, "learning_rate": 1.5490006781236785e-05, "loss": 0.4802, "step": 1878 }, { "epoch": 1.25, "learning_rate": 1.5485577997601158e-05, "loss": 0.4864, "step": 1879 }, { "epoch": 1.25, "learning_rate": 1.5481147674348366e-05, "loss": 0.4753, "step": 1880 }, { "epoch": 1.25, "learning_rate": 1.547671581272186e-05, "loss": 0.5381, "step": 1881 }, { "epoch": 1.25, "learning_rate": 1.5472282413965508e-05, "loss": 0.5472, "step": 1882 }, { "epoch": 1.25, "learning_rate": 1.5467847479323622e-05, "loss": 0.5016, "step": 1883 }, { "epoch": 1.25, "learning_rate": 1.546341101004093e-05, "loss": 0.4962, "step": 1884 }, { "epoch": 1.25, "learning_rate": 1.545897300736261e-05, "loss": 0.514, "step": 1885 }, { "epoch": 1.25, "learning_rate": 1.5454533472534253e-05, "loss": 0.5045, "step": 1886 }, { "epoch": 1.25, "learning_rate": 1.5450092406801892e-05, "loss": 0.464, "step": 1887 }, { "epoch": 1.25, "learning_rate": 1.544564981141198e-05, "loss": 0.4967, "step": 1888 }, { "epoch": 1.25, "learning_rate": 1.5441205687611403e-05, "loss": 0.5189, "step": 1889 }, { "epoch": 1.25, "learning_rate": 1.5436760036647485e-05, "loss": 0.4891, "step": 1890 }, { "epoch": 1.26, "learning_rate": 1.5432312859767963e-05, "loss": 0.483, "step": 1891 }, { "epoch": 1.26, "learning_rate": 1.5427864158221015e-05, "loss": 0.5056, "step": 1892 }, { "epoch": 1.26, "learning_rate": 1.5423413933255237e-05, "loss": 0.4923, "step": 1893 }, { "epoch": 1.26, "learning_rate": 1.541896218611966e-05, "loss": 0.497, "step": 1894 }, { "epoch": 1.26, "learning_rate": 1.541450891806374e-05, "loss": 0.5341, "step": 1895 }, { "epoch": 1.26, "learning_rate": 1.5410054130337358e-05, "loss": 0.5075, "step": 1896 }, { "epoch": 1.26, "learning_rate": 1.5405597824190822e-05, "loss": 0.5031, "step": 1897 }, { "epoch": 1.26, "learning_rate": 1.5401140000874873e-05, "loss": 0.5042, "step": 1898 }, { "epoch": 1.26, "learning_rate": 1.5396680661640667e-05, "loss": 0.4972, "step": 1899 }, { "epoch": 1.26, "learning_rate": 1.539221980773979e-05, "loss": 0.487, "step": 1900 }, { "epoch": 1.26, "learning_rate": 1.538775744042426e-05, "loss": 0.4908, "step": 1901 }, { "epoch": 1.26, "learning_rate": 1.5383293560946505e-05, "loss": 0.5037, "step": 1902 }, { "epoch": 1.26, "learning_rate": 1.5378828170559387e-05, "loss": 0.5053, "step": 1903 }, { "epoch": 1.26, "learning_rate": 1.5374361270516197e-05, "loss": 0.467, "step": 1904 }, { "epoch": 1.26, "learning_rate": 1.5369892862070636e-05, "loss": 0.5163, "step": 1905 }, { "epoch": 1.27, "learning_rate": 1.5365422946476842e-05, "loss": 0.4656, "step": 1906 }, { "epoch": 1.27, "learning_rate": 1.5360951524989367e-05, "loss": 0.4932, "step": 1907 }, { "epoch": 1.27, "learning_rate": 1.5356478598863187e-05, "loss": 0.4975, "step": 1908 }, { "epoch": 1.27, "learning_rate": 1.5352004169353706e-05, "loss": 0.4963, "step": 1909 }, { "epoch": 1.27, "learning_rate": 1.5347528237716742e-05, "loss": 0.453, "step": 1910 }, { "epoch": 1.27, "learning_rate": 1.5343050805208543e-05, "loss": 0.4866, "step": 1911 }, { "epoch": 1.27, "learning_rate": 1.533857187308577e-05, "loss": 0.4702, "step": 1912 }, { "epoch": 1.27, "learning_rate": 1.533409144260551e-05, "loss": 0.5095, "step": 1913 }, { "epoch": 1.27, "learning_rate": 1.5329609515025262e-05, "loss": 0.4992, "step": 1914 }, { "epoch": 1.27, "learning_rate": 1.5325126091602965e-05, "loss": 0.4683, "step": 1915 }, { "epoch": 1.27, "learning_rate": 1.532064117359696e-05, "loss": 0.4832, "step": 1916 }, { "epoch": 1.27, "learning_rate": 1.5316154762266008e-05, "loss": 0.4995, "step": 1917 }, { "epoch": 1.27, "learning_rate": 1.5311666858869296e-05, "loss": 0.4624, "step": 1918 }, { "epoch": 1.27, "learning_rate": 1.530717746466643e-05, "loss": 0.4787, "step": 1919 }, { "epoch": 1.27, "learning_rate": 1.5302686580917428e-05, "loss": 0.4613, "step": 1920 }, { "epoch": 1.28, "learning_rate": 1.5298194208882735e-05, "loss": 0.4764, "step": 1921 }, { "epoch": 1.28, "learning_rate": 1.5293700349823203e-05, "loss": 0.4643, "step": 1922 }, { "epoch": 1.28, "learning_rate": 1.5289205005000113e-05, "loss": 0.4731, "step": 1923 }, { "epoch": 1.28, "learning_rate": 1.5284708175675153e-05, "loss": 0.485, "step": 1924 }, { "epoch": 1.28, "learning_rate": 1.528020986311043e-05, "loss": 0.4789, "step": 1925 }, { "epoch": 1.28, "learning_rate": 1.5275710068568477e-05, "loss": 0.4775, "step": 1926 }, { "epoch": 1.28, "learning_rate": 1.5271208793312226e-05, "loss": 0.4853, "step": 1927 }, { "epoch": 1.28, "learning_rate": 1.5266706038605038e-05, "loss": 0.5434, "step": 1928 }, { "epoch": 1.28, "learning_rate": 1.5262201805710683e-05, "loss": 0.5327, "step": 1929 }, { "epoch": 1.28, "learning_rate": 1.525769609589335e-05, "loss": 0.4669, "step": 1930 }, { "epoch": 1.28, "learning_rate": 1.5253188910417636e-05, "loss": 0.466, "step": 1931 }, { "epoch": 1.28, "learning_rate": 1.5248680250548558e-05, "loss": 0.4469, "step": 1932 }, { "epoch": 1.28, "learning_rate": 1.524417011755155e-05, "loss": 0.5053, "step": 1933 }, { "epoch": 1.28, "learning_rate": 1.5239658512692447e-05, "loss": 0.4796, "step": 1934 }, { "epoch": 1.28, "learning_rate": 1.523514543723751e-05, "loss": 0.4903, "step": 1935 }, { "epoch": 1.29, "learning_rate": 1.5230630892453407e-05, "loss": 0.4798, "step": 1936 }, { "epoch": 1.29, "learning_rate": 1.5226114879607215e-05, "loss": 0.4407, "step": 1937 }, { "epoch": 1.29, "learning_rate": 1.522159739996643e-05, "loss": 0.5244, "step": 1938 }, { "epoch": 1.29, "learning_rate": 1.5217078454798952e-05, "loss": 0.4863, "step": 1939 }, { "epoch": 1.29, "learning_rate": 1.5212558045373106e-05, "loss": 0.473, "step": 1940 }, { "epoch": 1.29, "learning_rate": 1.5208036172957612e-05, "loss": 0.5057, "step": 1941 }, { "epoch": 1.29, "learning_rate": 1.5203512838821609e-05, "loss": 0.4693, "step": 1942 }, { "epoch": 1.29, "learning_rate": 1.5198988044234644e-05, "loss": 0.503, "step": 1943 }, { "epoch": 1.29, "learning_rate": 1.5194461790466674e-05, "loss": 0.502, "step": 1944 }, { "epoch": 1.29, "learning_rate": 1.5189934078788069e-05, "loss": 0.493, "step": 1945 }, { "epoch": 1.29, "learning_rate": 1.5185404910469604e-05, "loss": 0.4808, "step": 1946 }, { "epoch": 1.29, "learning_rate": 1.5180874286782464e-05, "loss": 0.4626, "step": 1947 }, { "epoch": 1.29, "learning_rate": 1.517634220899824e-05, "loss": 0.4917, "step": 1948 }, { "epoch": 1.29, "learning_rate": 1.5171808678388934e-05, "loss": 0.4725, "step": 1949 }, { "epoch": 1.3, "learning_rate": 1.5167273696226965e-05, "loss": 0.4932, "step": 1950 }, { "epoch": 1.3, "learning_rate": 1.516273726378514e-05, "loss": 0.4965, "step": 1951 }, { "epoch": 1.3, "learning_rate": 1.5158199382336678e-05, "loss": 0.5235, "step": 1952 }, { "epoch": 1.3, "learning_rate": 1.5153660053155227e-05, "loss": 0.5067, "step": 1953 }, { "epoch": 1.3, "learning_rate": 1.5149119277514808e-05, "loss": 0.4946, "step": 1954 }, { "epoch": 1.3, "learning_rate": 1.5144577056689872e-05, "loss": 0.5235, "step": 1955 }, { "epoch": 1.3, "learning_rate": 1.5140033391955266e-05, "loss": 0.4944, "step": 1956 }, { "epoch": 1.3, "learning_rate": 1.5135488284586241e-05, "loss": 0.4987, "step": 1957 }, { "epoch": 1.3, "learning_rate": 1.513094173585846e-05, "loss": 0.5327, "step": 1958 }, { "epoch": 1.3, "learning_rate": 1.5126393747047983e-05, "loss": 0.5004, "step": 1959 }, { "epoch": 1.3, "learning_rate": 1.512184431943128e-05, "loss": 0.4803, "step": 1960 }, { "epoch": 1.3, "learning_rate": 1.5117293454285217e-05, "loss": 0.5076, "step": 1961 }, { "epoch": 1.3, "learning_rate": 1.5112741152887078e-05, "loss": 0.5194, "step": 1962 }, { "epoch": 1.3, "learning_rate": 1.5108187416514533e-05, "loss": 0.4837, "step": 1963 }, { "epoch": 1.3, "learning_rate": 1.5103632246445666e-05, "loss": 0.4671, "step": 1964 }, { "epoch": 1.31, "learning_rate": 1.5099075643958959e-05, "loss": 0.5293, "step": 1965 }, { "epoch": 1.31, "learning_rate": 1.5094517610333294e-05, "loss": 0.5148, "step": 1966 }, { "epoch": 1.31, "learning_rate": 1.5089958146847965e-05, "loss": 0.5091, "step": 1967 }, { "epoch": 1.31, "learning_rate": 1.5085397254782655e-05, "loss": 0.4895, "step": 1968 }, { "epoch": 1.31, "learning_rate": 1.508083493541745e-05, "loss": 0.5222, "step": 1969 }, { "epoch": 1.31, "learning_rate": 1.5076271190032845e-05, "loss": 0.488, "step": 1970 }, { "epoch": 1.31, "learning_rate": 1.507170601990973e-05, "loss": 0.492, "step": 1971 }, { "epoch": 1.31, "learning_rate": 1.5067139426329389e-05, "loss": 0.466, "step": 1972 }, { "epoch": 1.31, "learning_rate": 1.5062571410573515e-05, "loss": 0.5013, "step": 1973 }, { "epoch": 1.31, "learning_rate": 1.5058001973924197e-05, "loss": 0.5407, "step": 1974 }, { "epoch": 1.31, "learning_rate": 1.5053431117663922e-05, "loss": 0.5054, "step": 1975 }, { "epoch": 1.31, "learning_rate": 1.5048858843075573e-05, "loss": 0.4888, "step": 1976 }, { "epoch": 1.31, "learning_rate": 1.5044285151442437e-05, "loss": 0.4669, "step": 1977 }, { "epoch": 1.31, "learning_rate": 1.5039710044048192e-05, "loss": 0.5075, "step": 1978 }, { "epoch": 1.31, "learning_rate": 1.5035133522176916e-05, "loss": 0.5005, "step": 1979 }, { "epoch": 1.32, "learning_rate": 1.5030555587113091e-05, "loss": 0.5604, "step": 1980 }, { "epoch": 1.32, "learning_rate": 1.5025976240141585e-05, "loss": 0.5057, "step": 1981 }, { "epoch": 1.32, "learning_rate": 1.5021395482547665e-05, "loss": 0.494, "step": 1982 }, { "epoch": 1.32, "learning_rate": 1.5016813315616998e-05, "loss": 0.4726, "step": 1983 }, { "epoch": 1.32, "learning_rate": 1.5012229740635644e-05, "loss": 0.4818, "step": 1984 }, { "epoch": 1.32, "learning_rate": 1.5007644758890059e-05, "loss": 0.4719, "step": 1985 }, { "epoch": 1.32, "learning_rate": 1.5003058371667087e-05, "loss": 0.52, "step": 1986 }, { "epoch": 1.32, "learning_rate": 1.4998470580253981e-05, "loss": 0.4593, "step": 1987 }, { "epoch": 1.32, "learning_rate": 1.4993881385938376e-05, "loss": 0.5065, "step": 1988 }, { "epoch": 1.32, "learning_rate": 1.4989290790008304e-05, "loss": 0.4921, "step": 1989 }, { "epoch": 1.32, "learning_rate": 1.4984698793752193e-05, "loss": 0.5069, "step": 1990 }, { "epoch": 1.32, "learning_rate": 1.498010539845886e-05, "loss": 0.4687, "step": 1991 }, { "epoch": 1.32, "learning_rate": 1.4975510605417514e-05, "loss": 0.4661, "step": 1992 }, { "epoch": 1.32, "learning_rate": 1.4970914415917764e-05, "loss": 0.5055, "step": 1993 }, { "epoch": 1.32, "learning_rate": 1.4966316831249601e-05, "loss": 0.4628, "step": 1994 }, { "epoch": 1.33, "learning_rate": 1.4961717852703417e-05, "loss": 0.5247, "step": 1995 }, { "epoch": 1.33, "learning_rate": 1.4957117481569987e-05, "loss": 0.4945, "step": 1996 }, { "epoch": 1.33, "learning_rate": 1.4952515719140482e-05, "loss": 0.5304, "step": 1997 }, { "epoch": 1.33, "learning_rate": 1.4947912566706459e-05, "loss": 0.5036, "step": 1998 }, { "epoch": 1.33, "learning_rate": 1.4943308025559871e-05, "loss": 0.488, "step": 1999 }, { "epoch": 1.33, "learning_rate": 1.4938702096993057e-05, "loss": 0.4976, "step": 2000 }, { "epoch": 1.33, "learning_rate": 1.4934094782298747e-05, "loss": 0.4887, "step": 2001 }, { "epoch": 1.33, "learning_rate": 1.4929486082770059e-05, "loss": 0.5295, "step": 2002 }, { "epoch": 1.33, "learning_rate": 1.4924875999700499e-05, "loss": 0.4886, "step": 2003 }, { "epoch": 1.33, "learning_rate": 1.4920264534383962e-05, "loss": 0.4646, "step": 2004 }, { "epoch": 1.33, "learning_rate": 1.4915651688114733e-05, "loss": 0.5137, "step": 2005 }, { "epoch": 1.33, "learning_rate": 1.491103746218748e-05, "loss": 0.4779, "step": 2006 }, { "epoch": 1.33, "learning_rate": 1.490642185789726e-05, "loss": 0.4943, "step": 2007 }, { "epoch": 1.33, "learning_rate": 1.4901804876539522e-05, "loss": 0.52, "step": 2008 }, { "epoch": 1.33, "learning_rate": 1.4897186519410095e-05, "loss": 0.4914, "step": 2009 }, { "epoch": 1.34, "learning_rate": 1.48925667878052e-05, "loss": 0.4862, "step": 2010 }, { "epoch": 1.34, "learning_rate": 1.4887945683021436e-05, "loss": 0.5101, "step": 2011 }, { "epoch": 1.34, "learning_rate": 1.4883323206355791e-05, "loss": 0.4578, "step": 2012 }, { "epoch": 1.34, "learning_rate": 1.4878699359105641e-05, "loss": 0.4595, "step": 2013 }, { "epoch": 1.34, "learning_rate": 1.4874074142568741e-05, "loss": 0.5145, "step": 2014 }, { "epoch": 1.34, "learning_rate": 1.486944755804324e-05, "loss": 0.4861, "step": 2015 }, { "epoch": 1.34, "learning_rate": 1.4864819606827664e-05, "loss": 0.5072, "step": 2016 }, { "epoch": 1.34, "learning_rate": 1.4860190290220913e-05, "loss": 0.5119, "step": 2017 }, { "epoch": 1.34, "learning_rate": 1.4855559609522292e-05, "loss": 0.4695, "step": 2018 }, { "epoch": 1.34, "learning_rate": 1.4850927566031472e-05, "loss": 0.4962, "step": 2019 }, { "epoch": 1.34, "learning_rate": 1.484629416104851e-05, "loss": 0.4925, "step": 2020 }, { "epoch": 1.34, "learning_rate": 1.4841659395873852e-05, "loss": 0.4892, "step": 2021 }, { "epoch": 1.34, "learning_rate": 1.4837023271808317e-05, "loss": 0.5147, "step": 2022 }, { "epoch": 1.34, "learning_rate": 1.483238579015311e-05, "loss": 0.4766, "step": 2023 }, { "epoch": 1.34, "learning_rate": 1.4827746952209816e-05, "loss": 0.5117, "step": 2024 }, { "epoch": 1.35, "learning_rate": 1.4823106759280404e-05, "loss": 0.4925, "step": 2025 }, { "epoch": 1.35, "learning_rate": 1.4818465212667213e-05, "loss": 0.5164, "step": 2026 }, { "epoch": 1.35, "learning_rate": 1.4813822313672974e-05, "loss": 0.4803, "step": 2027 }, { "epoch": 1.35, "learning_rate": 1.480917806360079e-05, "loss": 0.4733, "step": 2028 }, { "epoch": 1.35, "learning_rate": 1.4804532463754148e-05, "loss": 0.4636, "step": 2029 }, { "epoch": 1.35, "learning_rate": 1.4799885515436912e-05, "loss": 0.4766, "step": 2030 }, { "epoch": 1.35, "learning_rate": 1.4795237219953323e-05, "loss": 0.497, "step": 2031 }, { "epoch": 1.35, "learning_rate": 1.4790587578607998e-05, "loss": 0.5008, "step": 2032 }, { "epoch": 1.35, "learning_rate": 1.4785936592705938e-05, "loss": 0.4939, "step": 2033 }, { "epoch": 1.35, "learning_rate": 1.478128426355252e-05, "loss": 0.5068, "step": 2034 }, { "epoch": 1.35, "learning_rate": 1.4776630592453492e-05, "loss": 0.4975, "step": 2035 }, { "epoch": 1.35, "learning_rate": 1.4771975580714986e-05, "loss": 0.5264, "step": 2036 }, { "epoch": 1.35, "learning_rate": 1.4767319229643506e-05, "loss": 0.4815, "step": 2037 }, { "epoch": 1.35, "learning_rate": 1.4762661540545932e-05, "loss": 0.528, "step": 2038 }, { "epoch": 1.36, "learning_rate": 1.4758002514729524e-05, "loss": 0.4937, "step": 2039 }, { "epoch": 1.36, "learning_rate": 1.4753342153501913e-05, "loss": 0.48, "step": 2040 }, { "epoch": 1.36, "learning_rate": 1.4748680458171099e-05, "loss": 0.4883, "step": 2041 }, { "epoch": 1.36, "learning_rate": 1.4744017430045473e-05, "loss": 0.4962, "step": 2042 }, { "epoch": 1.36, "learning_rate": 1.4739353070433784e-05, "loss": 0.5208, "step": 2043 }, { "epoch": 1.36, "learning_rate": 1.473468738064516e-05, "loss": 0.4529, "step": 2044 }, { "epoch": 1.36, "learning_rate": 1.4730020361989108e-05, "loss": 0.5103, "step": 2045 }, { "epoch": 1.36, "learning_rate": 1.47253520157755e-05, "loss": 0.5091, "step": 2046 }, { "epoch": 1.36, "learning_rate": 1.472068234331458e-05, "loss": 0.5402, "step": 2047 }, { "epoch": 1.36, "learning_rate": 1.4716011345916976e-05, "loss": 0.5004, "step": 2048 }, { "epoch": 1.36, "learning_rate": 1.4711339024893674e-05, "loss": 0.4977, "step": 2049 }, { "epoch": 1.36, "learning_rate": 1.470666538155604e-05, "loss": 0.4738, "step": 2050 }, { "epoch": 1.36, "learning_rate": 1.4701990417215807e-05, "loss": 0.4804, "step": 2051 }, { "epoch": 1.36, "learning_rate": 1.4697314133185083e-05, "loss": 0.4805, "step": 2052 }, { "epoch": 1.36, "learning_rate": 1.4692636530776336e-05, "loss": 0.5, "step": 2053 }, { "epoch": 1.37, "learning_rate": 1.468795761130242e-05, "loss": 0.5291, "step": 2054 }, { "epoch": 1.37, "learning_rate": 1.4683277376076548e-05, "loss": 0.5034, "step": 2055 }, { "epoch": 1.37, "learning_rate": 1.4678595826412303e-05, "loss": 0.4834, "step": 2056 }, { "epoch": 1.37, "learning_rate": 1.4673912963623637e-05, "loss": 0.5055, "step": 2057 }, { "epoch": 1.37, "learning_rate": 1.4669228789024877e-05, "loss": 0.4725, "step": 2058 }, { "epoch": 1.37, "learning_rate": 1.466454330393071e-05, "loss": 0.4926, "step": 2059 }, { "epoch": 1.37, "learning_rate": 1.4659856509656194e-05, "loss": 0.5579, "step": 2060 }, { "epoch": 1.37, "learning_rate": 1.4655168407516754e-05, "loss": 0.5123, "step": 2061 }, { "epoch": 1.37, "learning_rate": 1.465047899882818e-05, "loss": 0.4895, "step": 2062 }, { "epoch": 1.37, "learning_rate": 1.4645788284906639e-05, "loss": 0.5141, "step": 2063 }, { "epoch": 1.37, "learning_rate": 1.464109626706865e-05, "loss": 0.4875, "step": 2064 }, { "epoch": 1.37, "learning_rate": 1.4636402946631108e-05, "loss": 0.5015, "step": 2065 }, { "epoch": 1.37, "learning_rate": 1.4631708324911269e-05, "loss": 0.4835, "step": 2066 }, { "epoch": 1.37, "learning_rate": 1.4627012403226752e-05, "loss": 0.5068, "step": 2067 }, { "epoch": 1.37, "learning_rate": 1.462231518289555e-05, "loss": 0.5033, "step": 2068 }, { "epoch": 1.38, "learning_rate": 1.461761666523601e-05, "loss": 0.5302, "step": 2069 }, { "epoch": 1.38, "learning_rate": 1.4612916851566851e-05, "loss": 0.4861, "step": 2070 }, { "epoch": 1.38, "learning_rate": 1.4608215743207153e-05, "loss": 0.4787, "step": 2071 }, { "epoch": 1.38, "learning_rate": 1.4603513341476354e-05, "loss": 0.5375, "step": 2072 }, { "epoch": 1.38, "learning_rate": 1.4598809647694266e-05, "loss": 0.509, "step": 2073 }, { "epoch": 1.38, "learning_rate": 1.4594104663181052e-05, "loss": 0.4946, "step": 2074 }, { "epoch": 1.38, "learning_rate": 1.4589398389257246e-05, "loss": 0.5032, "step": 2075 }, { "epoch": 1.38, "learning_rate": 1.458469082724374e-05, "loss": 0.4754, "step": 2076 }, { "epoch": 1.38, "learning_rate": 1.4579981978461792e-05, "loss": 0.4844, "step": 2077 }, { "epoch": 1.38, "learning_rate": 1.457527184423301e-05, "loss": 0.4729, "step": 2078 }, { "epoch": 1.38, "learning_rate": 1.4570560425879377e-05, "loss": 0.4808, "step": 2079 }, { "epoch": 1.38, "learning_rate": 1.4565847724723225e-05, "loss": 0.4565, "step": 2080 }, { "epoch": 1.38, "learning_rate": 1.4561133742087253e-05, "loss": 0.4898, "step": 2081 }, { "epoch": 1.38, "learning_rate": 1.4556418479294514e-05, "loss": 0.5159, "step": 2082 }, { "epoch": 1.38, "learning_rate": 1.455170193766843e-05, "loss": 0.5023, "step": 2083 }, { "epoch": 1.39, "learning_rate": 1.454698411853277e-05, "loss": 0.5002, "step": 2084 }, { "epoch": 1.39, "learning_rate": 1.454226502321167e-05, "loss": 0.4662, "step": 2085 }, { "epoch": 1.39, "learning_rate": 1.453754465302962e-05, "loss": 0.4774, "step": 2086 }, { "epoch": 1.39, "learning_rate": 1.453282300931147e-05, "loss": 0.5091, "step": 2087 }, { "epoch": 1.39, "learning_rate": 1.4528100093382422e-05, "loss": 0.4902, "step": 2088 }, { "epoch": 1.39, "learning_rate": 1.4523375906568048e-05, "loss": 0.4714, "step": 2089 }, { "epoch": 1.39, "learning_rate": 1.4518650450194261e-05, "loss": 0.4562, "step": 2090 }, { "epoch": 1.39, "learning_rate": 1.451392372558734e-05, "loss": 0.5023, "step": 2091 }, { "epoch": 1.39, "learning_rate": 1.4509195734073917e-05, "loss": 0.5238, "step": 2092 }, { "epoch": 1.39, "learning_rate": 1.4504466476980983e-05, "loss": 0.4943, "step": 2093 }, { "epoch": 1.39, "learning_rate": 1.4499735955635882e-05, "loss": 0.5044, "step": 2094 }, { "epoch": 1.39, "learning_rate": 1.4495004171366302e-05, "loss": 0.4935, "step": 2095 }, { "epoch": 1.39, "learning_rate": 1.4490271125500306e-05, "loss": 0.5045, "step": 2096 }, { "epoch": 1.39, "learning_rate": 1.4485536819366299e-05, "loss": 0.4973, "step": 2097 }, { "epoch": 1.39, "learning_rate": 1.448080125429304e-05, "loss": 0.5382, "step": 2098 }, { "epoch": 1.4, "learning_rate": 1.4476064431609641e-05, "loss": 0.4492, "step": 2099 }, { "epoch": 1.4, "learning_rate": 1.4471326352645573e-05, "loss": 0.4704, "step": 2100 }, { "epoch": 1.4, "learning_rate": 1.4466587018730654e-05, "loss": 0.4742, "step": 2101 }, { "epoch": 1.4, "learning_rate": 1.4461846431195053e-05, "loss": 0.4793, "step": 2102 }, { "epoch": 1.4, "learning_rate": 1.4457104591369293e-05, "loss": 0.4901, "step": 2103 }, { "epoch": 1.4, "learning_rate": 1.4452361500584252e-05, "loss": 0.4717, "step": 2104 }, { "epoch": 1.4, "learning_rate": 1.4447617160171154e-05, "loss": 0.486, "step": 2105 }, { "epoch": 1.4, "learning_rate": 1.444287157146158e-05, "loss": 0.4732, "step": 2106 }, { "epoch": 1.4, "learning_rate": 1.443812473578745e-05, "loss": 0.504, "step": 2107 }, { "epoch": 1.4, "learning_rate": 1.4433376654481046e-05, "loss": 0.4926, "step": 2108 }, { "epoch": 1.4, "learning_rate": 1.442862732887499e-05, "loss": 0.4825, "step": 2109 }, { "epoch": 1.4, "learning_rate": 1.4423876760302266e-05, "loss": 0.5016, "step": 2110 }, { "epoch": 1.4, "learning_rate": 1.4419124950096192e-05, "loss": 0.4784, "step": 2111 }, { "epoch": 1.4, "learning_rate": 1.4414371899590445e-05, "loss": 0.5022, "step": 2112 }, { "epoch": 1.4, "learning_rate": 1.4409617610119041e-05, "loss": 0.539, "step": 2113 }, { "epoch": 1.41, "learning_rate": 1.4404862083016355e-05, "loss": 0.4778, "step": 2114 }, { "epoch": 1.41, "learning_rate": 1.4400105319617102e-05, "loss": 0.4949, "step": 2115 }, { "epoch": 1.41, "learning_rate": 1.439534732125634e-05, "loss": 0.5068, "step": 2116 }, { "epoch": 1.41, "learning_rate": 1.4390588089269488e-05, "loss": 0.5148, "step": 2117 }, { "epoch": 1.41, "learning_rate": 1.4385827624992297e-05, "loss": 0.4851, "step": 2118 }, { "epoch": 1.41, "learning_rate": 1.4381065929760867e-05, "loss": 0.4907, "step": 2119 }, { "epoch": 1.41, "learning_rate": 1.4376303004911654e-05, "loss": 0.4712, "step": 2120 }, { "epoch": 1.41, "learning_rate": 1.437153885178144e-05, "loss": 0.4832, "step": 2121 }, { "epoch": 1.41, "learning_rate": 1.4366773471707368e-05, "loss": 0.4634, "step": 2122 }, { "epoch": 1.41, "learning_rate": 1.4362006866026921e-05, "loss": 0.4701, "step": 2123 }, { "epoch": 1.41, "learning_rate": 1.4357239036077924e-05, "loss": 0.4728, "step": 2124 }, { "epoch": 1.41, "learning_rate": 1.4352469983198542e-05, "loss": 0.4946, "step": 2125 }, { "epoch": 1.41, "learning_rate": 1.4347699708727299e-05, "loss": 0.4711, "step": 2126 }, { "epoch": 1.41, "learning_rate": 1.4342928214003038e-05, "loss": 0.4568, "step": 2127 }, { "epoch": 1.41, "learning_rate": 1.433815550036496e-05, "loss": 0.4914, "step": 2128 }, { "epoch": 1.42, "learning_rate": 1.4333381569152612e-05, "loss": 0.4917, "step": 2129 }, { "epoch": 1.42, "learning_rate": 1.4328606421705868e-05, "loss": 0.4972, "step": 2130 }, { "epoch": 1.42, "learning_rate": 1.4323830059364953e-05, "loss": 0.4808, "step": 2131 }, { "epoch": 1.42, "learning_rate": 1.4319052483470437e-05, "loss": 0.495, "step": 2132 }, { "epoch": 1.42, "learning_rate": 1.4314273695363216e-05, "loss": 0.4679, "step": 2133 }, { "epoch": 1.42, "learning_rate": 1.4309493696384543e-05, "loss": 0.5073, "step": 2134 }, { "epoch": 1.42, "learning_rate": 1.4304712487875999e-05, "loss": 0.5183, "step": 2135 }, { "epoch": 1.42, "learning_rate": 1.4299930071179505e-05, "loss": 0.5257, "step": 2136 }, { "epoch": 1.42, "learning_rate": 1.4295146447637325e-05, "loss": 0.4997, "step": 2137 }, { "epoch": 1.42, "learning_rate": 1.4290361618592071e-05, "loss": 0.4862, "step": 2138 }, { "epoch": 1.42, "learning_rate": 1.4285575585386673e-05, "loss": 0.4498, "step": 2139 }, { "epoch": 1.42, "learning_rate": 1.4280788349364414e-05, "loss": 0.5013, "step": 2140 }, { "epoch": 1.42, "learning_rate": 1.4275999911868912e-05, "loss": 0.4696, "step": 2141 }, { "epoch": 1.42, "learning_rate": 1.4271210274244114e-05, "loss": 0.5032, "step": 2142 }, { "epoch": 1.43, "learning_rate": 1.4266419437834312e-05, "loss": 0.4575, "step": 2143 }, { "epoch": 1.43, "learning_rate": 1.4261627403984136e-05, "loss": 0.5013, "step": 2144 }, { "epoch": 1.43, "learning_rate": 1.4256834174038545e-05, "loss": 0.4897, "step": 2145 }, { "epoch": 1.43, "learning_rate": 1.4252039749342844e-05, "loss": 0.4883, "step": 2146 }, { "epoch": 1.43, "learning_rate": 1.4247244131242656e-05, "loss": 0.5088, "step": 2147 }, { "epoch": 1.43, "learning_rate": 1.4242447321083959e-05, "loss": 0.4663, "step": 2148 }, { "epoch": 1.43, "learning_rate": 1.4237649320213053e-05, "loss": 0.4839, "step": 2149 }, { "epoch": 1.43, "learning_rate": 1.4232850129976573e-05, "loss": 0.5, "step": 2150 }, { "epoch": 1.43, "learning_rate": 1.422804975172149e-05, "loss": 0.477, "step": 2151 }, { "epoch": 1.43, "learning_rate": 1.4223248186795116e-05, "loss": 0.4794, "step": 2152 }, { "epoch": 1.43, "learning_rate": 1.4218445436545081e-05, "loss": 0.4905, "step": 2153 }, { "epoch": 1.43, "learning_rate": 1.4213641502319358e-05, "loss": 0.4793, "step": 2154 }, { "epoch": 1.43, "learning_rate": 1.420883638546625e-05, "loss": 0.459, "step": 2155 }, { "epoch": 1.43, "learning_rate": 1.4204030087334391e-05, "loss": 0.4852, "step": 2156 }, { "epoch": 1.43, "learning_rate": 1.4199222609272747e-05, "loss": 0.4884, "step": 2157 }, { "epoch": 1.44, "learning_rate": 1.4194413952630615e-05, "loss": 0.5076, "step": 2158 }, { "epoch": 1.44, "learning_rate": 1.4189604118757626e-05, "loss": 0.4751, "step": 2159 }, { "epoch": 1.44, "learning_rate": 1.4184793109003734e-05, "loss": 0.5153, "step": 2160 }, { "epoch": 1.44, "learning_rate": 1.4179980924719231e-05, "loss": 0.5292, "step": 2161 }, { "epoch": 1.44, "learning_rate": 1.4175167567254735e-05, "loss": 0.5047, "step": 2162 }, { "epoch": 1.44, "learning_rate": 1.4170353037961193e-05, "loss": 0.53, "step": 2163 }, { "epoch": 1.44, "learning_rate": 1.416553733818988e-05, "loss": 0.4982, "step": 2164 }, { "epoch": 1.44, "learning_rate": 1.4160720469292402e-05, "loss": 0.4829, "step": 2165 }, { "epoch": 1.44, "learning_rate": 1.4155902432620692e-05, "loss": 0.4918, "step": 2166 }, { "epoch": 1.44, "learning_rate": 1.415108322952701e-05, "loss": 0.4779, "step": 2167 }, { "epoch": 1.44, "learning_rate": 1.4146262861363945e-05, "loss": 0.5003, "step": 2168 }, { "epoch": 1.44, "learning_rate": 1.4141441329484414e-05, "loss": 0.5143, "step": 2169 }, { "epoch": 1.44, "learning_rate": 1.4136618635241655e-05, "loss": 0.5223, "step": 2170 }, { "epoch": 1.44, "learning_rate": 1.4131794779989233e-05, "loss": 0.4915, "step": 2171 }, { "epoch": 1.44, "learning_rate": 1.4126969765081055e-05, "loss": 0.4968, "step": 2172 }, { "epoch": 1.45, "learning_rate": 1.4122143591871327e-05, "loss": 0.5001, "step": 2173 }, { "epoch": 1.45, "learning_rate": 1.41173162617146e-05, "loss": 0.4703, "step": 2174 }, { "epoch": 1.45, "learning_rate": 1.4112487775965741e-05, "loss": 0.5016, "step": 2175 }, { "epoch": 1.45, "learning_rate": 1.4107658135979944e-05, "loss": 0.4968, "step": 2176 }, { "epoch": 1.45, "learning_rate": 1.4102827343112725e-05, "loss": 0.507, "step": 2177 }, { "epoch": 1.45, "learning_rate": 1.4097995398719929e-05, "loss": 0.4762, "step": 2178 }, { "epoch": 1.45, "learning_rate": 1.4093162304157715e-05, "loss": 0.4871, "step": 2179 }, { "epoch": 1.45, "learning_rate": 1.4088328060782573e-05, "loss": 0.5035, "step": 2180 }, { "epoch": 1.45, "learning_rate": 1.4083492669951312e-05, "loss": 0.5147, "step": 2181 }, { "epoch": 1.45, "learning_rate": 1.4078656133021065e-05, "loss": 0.4859, "step": 2182 }, { "epoch": 1.45, "learning_rate": 1.4073818451349283e-05, "loss": 0.5278, "step": 2183 }, { "epoch": 1.45, "learning_rate": 1.406897962629374e-05, "loss": 0.5022, "step": 2184 }, { "epoch": 1.45, "learning_rate": 1.4064139659212534e-05, "loss": 0.4675, "step": 2185 }, { "epoch": 1.45, "learning_rate": 1.4059298551464077e-05, "loss": 0.5268, "step": 2186 }, { "epoch": 1.45, "learning_rate": 1.4054456304407111e-05, "loss": 0.5141, "step": 2187 }, { "epoch": 1.46, "learning_rate": 1.4049612919400687e-05, "loss": 0.5162, "step": 2188 }, { "epoch": 1.46, "learning_rate": 1.4044768397804181e-05, "loss": 0.494, "step": 2189 }, { "epoch": 1.46, "learning_rate": 1.403992274097729e-05, "loss": 0.492, "step": 2190 }, { "epoch": 1.46, "learning_rate": 1.4035075950280021e-05, "loss": 0.4818, "step": 2191 }, { "epoch": 1.46, "learning_rate": 1.403022802707271e-05, "loss": 0.4826, "step": 2192 }, { "epoch": 1.46, "learning_rate": 1.4025378972716004e-05, "loss": 0.5209, "step": 2193 }, { "epoch": 1.46, "learning_rate": 1.4020528788570871e-05, "loss": 0.4911, "step": 2194 }, { "epoch": 1.46, "learning_rate": 1.4015677475998595e-05, "loss": 0.4891, "step": 2195 }, { "epoch": 1.46, "learning_rate": 1.4010825036360771e-05, "loss": 0.5338, "step": 2196 }, { "epoch": 1.46, "learning_rate": 1.400597147101932e-05, "loss": 0.4825, "step": 2197 }, { "epoch": 1.46, "learning_rate": 1.4001116781336475e-05, "loss": 0.471, "step": 2198 }, { "epoch": 1.46, "learning_rate": 1.399626096867478e-05, "loss": 0.4719, "step": 2199 }, { "epoch": 1.46, "learning_rate": 1.3991404034397102e-05, "loss": 0.4844, "step": 2200 }, { "epoch": 1.46, "learning_rate": 1.398654597986662e-05, "loss": 0.4912, "step": 2201 }, { "epoch": 1.46, "learning_rate": 1.3981686806446822e-05, "loss": 0.509, "step": 2202 }, { "epoch": 1.47, "learning_rate": 1.3976826515501518e-05, "loss": 0.5028, "step": 2203 }, { "epoch": 1.47, "learning_rate": 1.3971965108394827e-05, "loss": 0.4754, "step": 2204 }, { "epoch": 1.47, "learning_rate": 1.3967102586491179e-05, "loss": 0.5052, "step": 2205 }, { "epoch": 1.47, "learning_rate": 1.3962238951155325e-05, "loss": 0.4803, "step": 2206 }, { "epoch": 1.47, "learning_rate": 1.3957374203752325e-05, "loss": 0.5064, "step": 2207 }, { "epoch": 1.47, "learning_rate": 1.3952508345647546e-05, "loss": 0.5004, "step": 2208 }, { "epoch": 1.47, "learning_rate": 1.3947641378206671e-05, "loss": 0.5101, "step": 2209 }, { "epoch": 1.47, "learning_rate": 1.3942773302795697e-05, "loss": 0.4889, "step": 2210 }, { "epoch": 1.47, "learning_rate": 1.3937904120780925e-05, "loss": 0.509, "step": 2211 }, { "epoch": 1.47, "learning_rate": 1.3933033833528971e-05, "loss": 0.5285, "step": 2212 }, { "epoch": 1.47, "learning_rate": 1.3928162442406765e-05, "loss": 0.5387, "step": 2213 }, { "epoch": 1.47, "learning_rate": 1.3923289948781538e-05, "loss": 0.5058, "step": 2214 }, { "epoch": 1.47, "learning_rate": 1.3918416354020836e-05, "loss": 0.4837, "step": 2215 }, { "epoch": 1.47, "learning_rate": 1.3913541659492516e-05, "loss": 0.4815, "step": 2216 }, { "epoch": 1.47, "learning_rate": 1.3908665866564736e-05, "loss": 0.4681, "step": 2217 }, { "epoch": 1.48, "learning_rate": 1.3903788976605972e-05, "loss": 0.506, "step": 2218 }, { "epoch": 1.48, "learning_rate": 1.3898910990985e-05, "loss": 0.5058, "step": 2219 }, { "epoch": 1.48, "learning_rate": 1.3894031911070904e-05, "loss": 0.4847, "step": 2220 }, { "epoch": 1.48, "learning_rate": 1.3889151738233087e-05, "loss": 0.4951, "step": 2221 }, { "epoch": 1.48, "learning_rate": 1.3884270473841235e-05, "loss": 0.4927, "step": 2222 }, { "epoch": 1.48, "learning_rate": 1.3879388119265366e-05, "loss": 0.4699, "step": 2223 }, { "epoch": 1.48, "learning_rate": 1.3874504675875791e-05, "loss": 0.5349, "step": 2224 }, { "epoch": 1.48, "learning_rate": 1.3869620145043123e-05, "loss": 0.4957, "step": 2225 }, { "epoch": 1.48, "learning_rate": 1.3864734528138288e-05, "loss": 0.4657, "step": 2226 }, { "epoch": 1.48, "learning_rate": 1.3859847826532518e-05, "loss": 0.511, "step": 2227 }, { "epoch": 1.48, "learning_rate": 1.385496004159734e-05, "loss": 0.5271, "step": 2228 }, { "epoch": 1.48, "learning_rate": 1.3850071174704596e-05, "loss": 0.4646, "step": 2229 }, { "epoch": 1.48, "learning_rate": 1.3845181227226423e-05, "loss": 0.4827, "step": 2230 }, { "epoch": 1.48, "learning_rate": 1.3840290200535265e-05, "loss": 0.4871, "step": 2231 }, { "epoch": 1.48, "learning_rate": 1.3835398096003869e-05, "loss": 0.4928, "step": 2232 }, { "epoch": 1.49, "learning_rate": 1.3830504915005285e-05, "loss": 0.4904, "step": 2233 }, { "epoch": 1.49, "learning_rate": 1.3825610658912863e-05, "loss": 0.4594, "step": 2234 }, { "epoch": 1.49, "learning_rate": 1.3820715329100256e-05, "loss": 0.4881, "step": 2235 }, { "epoch": 1.49, "learning_rate": 1.3815818926941422e-05, "loss": 0.4578, "step": 2236 }, { "epoch": 1.49, "learning_rate": 1.3810921453810611e-05, "loss": 0.4822, "step": 2237 }, { "epoch": 1.49, "learning_rate": 1.380602291108238e-05, "loss": 0.5512, "step": 2238 }, { "epoch": 1.49, "learning_rate": 1.3801123300131587e-05, "loss": 0.5119, "step": 2239 }, { "epoch": 1.49, "learning_rate": 1.3796222622333389e-05, "loss": 0.4778, "step": 2240 }, { "epoch": 1.49, "learning_rate": 1.379132087906324e-05, "loss": 0.4658, "step": 2241 }, { "epoch": 1.49, "learning_rate": 1.378641807169689e-05, "loss": 0.4784, "step": 2242 }, { "epoch": 1.49, "learning_rate": 1.3781514201610398e-05, "loss": 0.4842, "step": 2243 }, { "epoch": 1.49, "learning_rate": 1.3776609270180118e-05, "loss": 0.4689, "step": 2244 }, { "epoch": 1.49, "learning_rate": 1.3771703278782689e-05, "loss": 0.4997, "step": 2245 }, { "epoch": 1.49, "learning_rate": 1.3766796228795064e-05, "loss": 0.5217, "step": 2246 }, { "epoch": 1.5, "learning_rate": 1.3761888121594487e-05, "loss": 0.4725, "step": 2247 }, { "epoch": 1.5, "learning_rate": 1.3756978958558496e-05, "loss": 0.4786, "step": 2248 }, { "epoch": 1.5, "learning_rate": 1.375206874106493e-05, "loss": 0.4814, "step": 2249 }, { "epoch": 1.5, "learning_rate": 1.3747157470491923e-05, "loss": 0.4969, "step": 2250 }, { "epoch": 1.5, "learning_rate": 1.3742245148217901e-05, "loss": 0.4791, "step": 2251 }, { "epoch": 1.5, "learning_rate": 1.3737331775621588e-05, "loss": 0.4764, "step": 2252 }, { "epoch": 1.5, "learning_rate": 1.3732417354082002e-05, "loss": 0.4959, "step": 2253 }, { "epoch": 1.5, "learning_rate": 1.3727501884978456e-05, "loss": 0.4977, "step": 2254 }, { "epoch": 1.5, "learning_rate": 1.372258536969056e-05, "loss": 0.5343, "step": 2255 }, { "epoch": 1.5, "learning_rate": 1.371766780959821e-05, "loss": 0.4699, "step": 2256 }, { "epoch": 1.5, "learning_rate": 1.37127492060816e-05, "loss": 0.503, "step": 2257 }, { "epoch": 1.5, "learning_rate": 1.3707829560521219e-05, "loss": 0.4918, "step": 2258 }, { "epoch": 1.5, "learning_rate": 1.3702908874297846e-05, "loss": 0.4999, "step": 2259 }, { "epoch": 1.5, "learning_rate": 1.3697987148792546e-05, "loss": 0.5087, "step": 2260 }, { "epoch": 1.5, "learning_rate": 1.369306438538669e-05, "loss": 0.4966, "step": 2261 }, { "epoch": 1.51, "learning_rate": 1.368814058546193e-05, "loss": 0.4839, "step": 2262 }, { "epoch": 1.51, "learning_rate": 1.3683215750400207e-05, "loss": 0.4963, "step": 2263 }, { "epoch": 1.51, "learning_rate": 1.3678289881583759e-05, "loss": 0.4981, "step": 2264 }, { "epoch": 1.51, "learning_rate": 1.3673362980395115e-05, "loss": 0.4385, "step": 2265 }, { "epoch": 1.51, "learning_rate": 1.3668435048217083e-05, "loss": 0.5018, "step": 2266 }, { "epoch": 1.51, "learning_rate": 1.3663506086432774e-05, "loss": 0.4818, "step": 2267 }, { "epoch": 1.51, "learning_rate": 1.3658576096425578e-05, "loss": 0.4613, "step": 2268 }, { "epoch": 1.51, "learning_rate": 1.3653645079579178e-05, "loss": 0.4918, "step": 2269 }, { "epoch": 1.51, "learning_rate": 1.364871303727755e-05, "loss": 0.5107, "step": 2270 }, { "epoch": 1.51, "learning_rate": 1.3643779970904942e-05, "loss": 0.5079, "step": 2271 }, { "epoch": 1.51, "learning_rate": 1.3638845881845909e-05, "loss": 0.5121, "step": 2272 }, { "epoch": 1.51, "learning_rate": 1.3633910771485276e-05, "loss": 0.4599, "step": 2273 }, { "epoch": 1.51, "learning_rate": 1.3628974641208168e-05, "loss": 0.488, "step": 2274 }, { "epoch": 1.51, "learning_rate": 1.3624037492399991e-05, "loss": 0.5232, "step": 2275 }, { "epoch": 1.51, "learning_rate": 1.3619099326446434e-05, "loss": 0.5337, "step": 2276 }, { "epoch": 1.52, "learning_rate": 1.3614160144733475e-05, "loss": 0.4987, "step": 2277 }, { "epoch": 1.52, "learning_rate": 1.3609219948647377e-05, "loss": 0.4844, "step": 2278 }, { "epoch": 1.52, "learning_rate": 1.3604278739574683e-05, "loss": 0.5125, "step": 2279 }, { "epoch": 1.52, "learning_rate": 1.3599336518902228e-05, "loss": 0.5085, "step": 2280 }, { "epoch": 1.52, "learning_rate": 1.3594393288017122e-05, "loss": 0.4843, "step": 2281 }, { "epoch": 1.52, "learning_rate": 1.3589449048306773e-05, "loss": 0.4926, "step": 2282 }, { "epoch": 1.52, "learning_rate": 1.3584503801158854e-05, "loss": 0.491, "step": 2283 }, { "epoch": 1.52, "learning_rate": 1.3579557547961335e-05, "loss": 0.4813, "step": 2284 }, { "epoch": 1.52, "learning_rate": 1.3574610290102462e-05, "loss": 0.498, "step": 2285 }, { "epoch": 1.52, "learning_rate": 1.3569662028970759e-05, "loss": 0.4855, "step": 2286 }, { "epoch": 1.52, "learning_rate": 1.3564712765955038e-05, "loss": 0.4984, "step": 2287 }, { "epoch": 1.52, "learning_rate": 1.3559762502444396e-05, "loss": 0.5005, "step": 2288 }, { "epoch": 1.52, "learning_rate": 1.3554811239828198e-05, "loss": 0.5207, "step": 2289 }, { "epoch": 1.52, "learning_rate": 1.3549858979496104e-05, "loss": 0.4841, "step": 2290 }, { "epoch": 1.52, "learning_rate": 1.3544905722838042e-05, "loss": 0.5085, "step": 2291 }, { "epoch": 1.53, "learning_rate": 1.3539951471244224e-05, "loss": 0.4746, "step": 2292 }, { "epoch": 1.53, "learning_rate": 1.3534996226105144e-05, "loss": 0.4904, "step": 2293 }, { "epoch": 1.53, "learning_rate": 1.3530039988811573e-05, "loss": 0.5245, "step": 2294 }, { "epoch": 1.53, "learning_rate": 1.3525082760754557e-05, "loss": 0.4997, "step": 2295 }, { "epoch": 1.53, "learning_rate": 1.352012454332543e-05, "loss": 0.4993, "step": 2296 }, { "epoch": 1.53, "learning_rate": 1.3515165337915788e-05, "loss": 0.5101, "step": 2297 }, { "epoch": 1.53, "learning_rate": 1.3510205145917517e-05, "loss": 0.4784, "step": 2298 }, { "epoch": 1.53, "learning_rate": 1.350524396872278e-05, "loss": 0.4903, "step": 2299 }, { "epoch": 1.53, "learning_rate": 1.3500281807724003e-05, "loss": 0.4962, "step": 2300 }, { "epoch": 1.53, "learning_rate": 1.3495318664313904e-05, "loss": 0.475, "step": 2301 }, { "epoch": 1.53, "learning_rate": 1.3490354539885473e-05, "loss": 0.4922, "step": 2302 }, { "epoch": 1.53, "learning_rate": 1.3485389435831966e-05, "loss": 0.5075, "step": 2303 }, { "epoch": 1.53, "learning_rate": 1.3480423353546926e-05, "loss": 0.5166, "step": 2304 }, { "epoch": 1.53, "learning_rate": 1.3475456294424163e-05, "loss": 0.4945, "step": 2305 }, { "epoch": 1.53, "learning_rate": 1.3470488259857763e-05, "loss": 0.523, "step": 2306 }, { "epoch": 1.54, "learning_rate": 1.3465519251242085e-05, "loss": 0.5399, "step": 2307 }, { "epoch": 1.54, "learning_rate": 1.3460549269971765e-05, "loss": 0.4964, "step": 2308 }, { "epoch": 1.54, "learning_rate": 1.345557831744171e-05, "loss": 0.5056, "step": 2309 }, { "epoch": 1.54, "learning_rate": 1.3450606395047094e-05, "loss": 0.4744, "step": 2310 }, { "epoch": 1.54, "learning_rate": 1.3445633504183376e-05, "loss": 0.5038, "step": 2311 }, { "epoch": 1.54, "learning_rate": 1.3440659646246275e-05, "loss": 0.5182, "step": 2312 }, { "epoch": 1.54, "learning_rate": 1.3435684822631783e-05, "loss": 0.4954, "step": 2313 }, { "epoch": 1.54, "learning_rate": 1.3430709034736169e-05, "loss": 0.5193, "step": 2314 }, { "epoch": 1.54, "learning_rate": 1.3425732283955968e-05, "loss": 0.5161, "step": 2315 }, { "epoch": 1.54, "learning_rate": 1.3420754571687989e-05, "loss": 0.4841, "step": 2316 }, { "epoch": 1.54, "learning_rate": 1.3415775899329306e-05, "loss": 0.5132, "step": 2317 }, { "epoch": 1.54, "learning_rate": 1.3410796268277264e-05, "loss": 0.4653, "step": 2318 }, { "epoch": 1.54, "learning_rate": 1.340581567992948e-05, "loss": 0.5226, "step": 2319 }, { "epoch": 1.54, "learning_rate": 1.3400834135683836e-05, "loss": 0.5066, "step": 2320 }, { "epoch": 1.54, "learning_rate": 1.3395851636938484e-05, "loss": 0.5038, "step": 2321 }, { "epoch": 1.55, "learning_rate": 1.3390868185091844e-05, "loss": 0.4913, "step": 2322 }, { "epoch": 1.55, "learning_rate": 1.3385883781542601e-05, "loss": 0.5052, "step": 2323 }, { "epoch": 1.55, "learning_rate": 1.3380898427689717e-05, "loss": 0.4717, "step": 2324 }, { "epoch": 1.55, "learning_rate": 1.3375912124932406e-05, "loss": 0.4844, "step": 2325 }, { "epoch": 1.55, "learning_rate": 1.3370924874670155e-05, "loss": 0.5088, "step": 2326 }, { "epoch": 1.55, "learning_rate": 1.3365936678302722e-05, "loss": 0.5426, "step": 2327 }, { "epoch": 1.55, "learning_rate": 1.3360947537230123e-05, "loss": 0.506, "step": 2328 }, { "epoch": 1.55, "learning_rate": 1.335595745285264e-05, "loss": 0.4665, "step": 2329 }, { "epoch": 1.55, "learning_rate": 1.3350966426570825e-05, "loss": 0.4601, "step": 2330 }, { "epoch": 1.55, "learning_rate": 1.3345974459785494e-05, "loss": 0.4649, "step": 2331 }, { "epoch": 1.55, "learning_rate": 1.3340981553897719e-05, "loss": 0.4749, "step": 2332 }, { "epoch": 1.55, "learning_rate": 1.333598771030884e-05, "loss": 0.4608, "step": 2333 }, { "epoch": 1.55, "learning_rate": 1.3330992930420466e-05, "loss": 0.5071, "step": 2334 }, { "epoch": 1.55, "learning_rate": 1.3325997215634457e-05, "loss": 0.4862, "step": 2335 }, { "epoch": 1.56, "learning_rate": 1.3321000567352944e-05, "loss": 0.4901, "step": 2336 }, { "epoch": 1.56, "learning_rate": 1.3316002986978326e-05, "loss": 0.5135, "step": 2337 }, { "epoch": 1.56, "learning_rate": 1.3311004475913247e-05, "loss": 0.5051, "step": 2338 }, { "epoch": 1.56, "learning_rate": 1.3306005035560623e-05, "loss": 0.4989, "step": 2339 }, { "epoch": 1.56, "learning_rate": 1.330100466732363e-05, "loss": 0.4646, "step": 2340 }, { "epoch": 1.56, "learning_rate": 1.32960033726057e-05, "loss": 0.4715, "step": 2341 }, { "epoch": 1.56, "learning_rate": 1.329100115281053e-05, "loss": 0.4844, "step": 2342 }, { "epoch": 1.56, "learning_rate": 1.3285998009342076e-05, "loss": 0.5051, "step": 2343 }, { "epoch": 1.56, "learning_rate": 1.328099394360455e-05, "loss": 0.5271, "step": 2344 }, { "epoch": 1.56, "learning_rate": 1.3275988957002429e-05, "loss": 0.4777, "step": 2345 }, { "epoch": 1.56, "learning_rate": 1.3270983050940435e-05, "loss": 0.4982, "step": 2346 }, { "epoch": 1.56, "learning_rate": 1.3265976226823569e-05, "loss": 0.4717, "step": 2347 }, { "epoch": 1.56, "learning_rate": 1.3260968486057068e-05, "loss": 0.4823, "step": 2348 }, { "epoch": 1.56, "learning_rate": 1.3255959830046446e-05, "loss": 0.496, "step": 2349 }, { "epoch": 1.56, "learning_rate": 1.3250950260197455e-05, "loss": 0.4878, "step": 2350 }, { "epoch": 1.57, "learning_rate": 1.324593977791612e-05, "loss": 0.4919, "step": 2351 }, { "epoch": 1.57, "learning_rate": 1.3240928384608712e-05, "loss": 0.4997, "step": 2352 }, { "epoch": 1.57, "learning_rate": 1.3235916081681757e-05, "loss": 0.4921, "step": 2353 }, { "epoch": 1.57, "learning_rate": 1.3230902870542046e-05, "loss": 0.5137, "step": 2354 }, { "epoch": 1.57, "learning_rate": 1.3225888752596614e-05, "loss": 0.4929, "step": 2355 }, { "epoch": 1.57, "learning_rate": 1.3220873729252752e-05, "loss": 0.4413, "step": 2356 }, { "epoch": 1.57, "learning_rate": 1.321585780191802e-05, "loss": 0.4459, "step": 2357 }, { "epoch": 1.57, "learning_rate": 1.3210840972000209e-05, "loss": 0.4613, "step": 2358 }, { "epoch": 1.57, "learning_rate": 1.3205823240907381e-05, "loss": 0.5138, "step": 2359 }, { "epoch": 1.57, "learning_rate": 1.3200804610047842e-05, "loss": 0.5126, "step": 2360 }, { "epoch": 1.57, "learning_rate": 1.319578508083015e-05, "loss": 0.5372, "step": 2361 }, { "epoch": 1.57, "learning_rate": 1.3190764654663122e-05, "loss": 0.496, "step": 2362 }, { "epoch": 1.57, "learning_rate": 1.3185743332955818e-05, "loss": 0.4629, "step": 2363 }, { "epoch": 1.57, "learning_rate": 1.3180721117117562e-05, "loss": 0.5045, "step": 2364 }, { "epoch": 1.57, "learning_rate": 1.3175698008557914e-05, "loss": 0.4811, "step": 2365 }, { "epoch": 1.58, "learning_rate": 1.3170674008686692e-05, "loss": 0.4964, "step": 2366 }, { "epoch": 1.58, "learning_rate": 1.3165649118913968e-05, "loss": 0.5173, "step": 2367 }, { "epoch": 1.58, "learning_rate": 1.3160623340650058e-05, "loss": 0.4976, "step": 2368 }, { "epoch": 1.58, "learning_rate": 1.3155596675305522e-05, "loss": 0.4787, "step": 2369 }, { "epoch": 1.58, "learning_rate": 1.3150569124291186e-05, "loss": 0.4784, "step": 2370 }, { "epoch": 1.58, "learning_rate": 1.314554068901811e-05, "loss": 0.5282, "step": 2371 }, { "epoch": 1.58, "learning_rate": 1.3140511370897607e-05, "loss": 0.5015, "step": 2372 }, { "epoch": 1.58, "learning_rate": 1.3135481171341233e-05, "loss": 0.4699, "step": 2373 }, { "epoch": 1.58, "learning_rate": 1.3130450091760804e-05, "loss": 0.4843, "step": 2374 }, { "epoch": 1.58, "learning_rate": 1.3125418133568369e-05, "loss": 0.4747, "step": 2375 }, { "epoch": 1.58, "learning_rate": 1.312038529817623e-05, "loss": 0.5123, "step": 2376 }, { "epoch": 1.58, "learning_rate": 1.3115351586996937e-05, "loss": 0.503, "step": 2377 }, { "epoch": 1.58, "learning_rate": 1.3110317001443278e-05, "loss": 0.4729, "step": 2378 }, { "epoch": 1.58, "learning_rate": 1.3105281542928296e-05, "loss": 0.497, "step": 2379 }, { "epoch": 1.58, "learning_rate": 1.3100245212865279e-05, "loss": 0.5098, "step": 2380 }, { "epoch": 1.59, "learning_rate": 1.3095208012667747e-05, "loss": 0.5135, "step": 2381 }, { "epoch": 1.59, "learning_rate": 1.3090169943749475e-05, "loss": 0.4688, "step": 2382 }, { "epoch": 1.59, "learning_rate": 1.3085131007524483e-05, "loss": 0.4836, "step": 2383 }, { "epoch": 1.59, "learning_rate": 1.3080091205407027e-05, "loss": 0.4571, "step": 2384 }, { "epoch": 1.59, "learning_rate": 1.3075050538811611e-05, "loss": 0.4976, "step": 2385 }, { "epoch": 1.59, "learning_rate": 1.3070009009152984e-05, "loss": 0.5032, "step": 2386 }, { "epoch": 1.59, "learning_rate": 1.3064966617846126e-05, "loss": 0.4935, "step": 2387 }, { "epoch": 1.59, "learning_rate": 1.3059923366306272e-05, "loss": 0.5023, "step": 2388 }, { "epoch": 1.59, "learning_rate": 1.3054879255948896e-05, "loss": 0.4684, "step": 2389 }, { "epoch": 1.59, "learning_rate": 1.3049834288189702e-05, "loss": 0.5184, "step": 2390 }, { "epoch": 1.59, "learning_rate": 1.3044788464444648e-05, "loss": 0.4983, "step": 2391 }, { "epoch": 1.59, "learning_rate": 1.3039741786129927e-05, "loss": 0.514, "step": 2392 }, { "epoch": 1.59, "learning_rate": 1.3034694254661971e-05, "loss": 0.4756, "step": 2393 }, { "epoch": 1.59, "learning_rate": 1.302964587145745e-05, "loss": 0.4788, "step": 2394 }, { "epoch": 1.59, "learning_rate": 1.3024596637933277e-05, "loss": 0.4821, "step": 2395 }, { "epoch": 1.6, "learning_rate": 1.3019546555506603e-05, "loss": 0.5358, "step": 2396 }, { "epoch": 1.6, "learning_rate": 1.3014495625594815e-05, "loss": 0.5028, "step": 2397 }, { "epoch": 1.6, "learning_rate": 1.3009443849615538e-05, "loss": 0.48, "step": 2398 }, { "epoch": 1.6, "learning_rate": 1.3004391228986637e-05, "loss": 0.4808, "step": 2399 }, { "epoch": 1.6, "learning_rate": 1.2999337765126217e-05, "loss": 0.4664, "step": 2400 }, { "epoch": 1.6, "learning_rate": 1.2994283459452606e-05, "loss": 0.5013, "step": 2401 }, { "epoch": 1.6, "learning_rate": 1.2989228313384383e-05, "loss": 0.497, "step": 2402 }, { "epoch": 1.6, "learning_rate": 1.2984172328340357e-05, "loss": 0.5056, "step": 2403 }, { "epoch": 1.6, "learning_rate": 1.2979115505739573e-05, "loss": 0.4779, "step": 2404 }, { "epoch": 1.6, "learning_rate": 1.2974057847001306e-05, "loss": 0.4619, "step": 2405 }, { "epoch": 1.6, "learning_rate": 1.2968999353545082e-05, "loss": 0.4889, "step": 2406 }, { "epoch": 1.6, "learning_rate": 1.296394002679064e-05, "loss": 0.5278, "step": 2407 }, { "epoch": 1.6, "learning_rate": 1.2958879868157966e-05, "loss": 0.501, "step": 2408 }, { "epoch": 1.6, "learning_rate": 1.2953818879067276e-05, "loss": 0.481, "step": 2409 }, { "epoch": 1.6, "learning_rate": 1.2948757060939019e-05, "loss": 0.5152, "step": 2410 }, { "epoch": 1.61, "learning_rate": 1.2943694415193875e-05, "loss": 0.5048, "step": 2411 }, { "epoch": 1.61, "learning_rate": 1.2938630943252765e-05, "loss": 0.5087, "step": 2412 }, { "epoch": 1.61, "learning_rate": 1.293356664653683e-05, "loss": 0.5102, "step": 2413 }, { "epoch": 1.61, "learning_rate": 1.2928501526467448e-05, "loss": 0.4895, "step": 2414 }, { "epoch": 1.61, "learning_rate": 1.2923435584466228e-05, "loss": 0.5067, "step": 2415 }, { "epoch": 1.61, "learning_rate": 1.291836882195501e-05, "loss": 0.5439, "step": 2416 }, { "epoch": 1.61, "learning_rate": 1.2913301240355861e-05, "loss": 0.473, "step": 2417 }, { "epoch": 1.61, "learning_rate": 1.2908232841091088e-05, "loss": 0.4874, "step": 2418 }, { "epoch": 1.61, "learning_rate": 1.2903163625583213e-05, "loss": 0.5007, "step": 2419 }, { "epoch": 1.61, "learning_rate": 1.2898093595254998e-05, "loss": 0.5122, "step": 2420 }, { "epoch": 1.61, "learning_rate": 1.2893022751529425e-05, "loss": 0.4922, "step": 2421 }, { "epoch": 1.61, "learning_rate": 1.2887951095829713e-05, "loss": 0.4741, "step": 2422 }, { "epoch": 1.61, "learning_rate": 1.288287862957931e-05, "loss": 0.4794, "step": 2423 }, { "epoch": 1.61, "learning_rate": 1.2877805354201876e-05, "loss": 0.5161, "step": 2424 }, { "epoch": 1.61, "learning_rate": 1.2872731271121315e-05, "loss": 0.4921, "step": 2425 }, { "epoch": 1.62, "learning_rate": 1.286765638176175e-05, "loss": 0.4682, "step": 2426 }, { "epoch": 1.62, "learning_rate": 1.2862580687547534e-05, "loss": 0.5074, "step": 2427 }, { "epoch": 1.62, "learning_rate": 1.2857504189903241e-05, "loss": 0.5238, "step": 2428 }, { "epoch": 1.62, "learning_rate": 1.2852426890253676e-05, "loss": 0.4943, "step": 2429 }, { "epoch": 1.62, "learning_rate": 1.2847348790023858e-05, "loss": 0.4989, "step": 2430 }, { "epoch": 1.62, "learning_rate": 1.284226989063905e-05, "loss": 0.4927, "step": 2431 }, { "epoch": 1.62, "learning_rate": 1.283719019352472e-05, "loss": 0.5004, "step": 2432 }, { "epoch": 1.62, "learning_rate": 1.2832109700106572e-05, "loss": 0.4837, "step": 2433 }, { "epoch": 1.62, "learning_rate": 1.2827028411810527e-05, "loss": 0.4997, "step": 2434 }, { "epoch": 1.62, "learning_rate": 1.2821946330062738e-05, "loss": 0.4895, "step": 2435 }, { "epoch": 1.62, "learning_rate": 1.2816863456289565e-05, "loss": 0.5072, "step": 2436 }, { "epoch": 1.62, "learning_rate": 1.2811779791917605e-05, "loss": 0.4691, "step": 2437 }, { "epoch": 1.62, "learning_rate": 1.2806695338373669e-05, "loss": 0.4917, "step": 2438 }, { "epoch": 1.62, "learning_rate": 1.2801610097084796e-05, "loss": 0.4832, "step": 2439 }, { "epoch": 1.63, "learning_rate": 1.2796524069478243e-05, "loss": 0.5057, "step": 2440 }, { "epoch": 1.63, "learning_rate": 1.2791437256981479e-05, "loss": 0.4779, "step": 2441 }, { "epoch": 1.63, "learning_rate": 1.2786349661022205e-05, "loss": 0.5197, "step": 2442 }, { "epoch": 1.63, "learning_rate": 1.2781261283028344e-05, "loss": 0.4913, "step": 2443 }, { "epoch": 1.63, "learning_rate": 1.2776172124428023e-05, "loss": 0.4637, "step": 2444 }, { "epoch": 1.63, "learning_rate": 1.27710821866496e-05, "loss": 0.4851, "step": 2445 }, { "epoch": 1.63, "learning_rate": 1.2765991471121655e-05, "loss": 0.4761, "step": 2446 }, { "epoch": 1.63, "learning_rate": 1.2760899979272977e-05, "loss": 0.4816, "step": 2447 }, { "epoch": 1.63, "learning_rate": 1.275580771253257e-05, "loss": 0.4659, "step": 2448 }, { "epoch": 1.63, "learning_rate": 1.2750714672329676e-05, "loss": 0.463, "step": 2449 }, { "epoch": 1.63, "learning_rate": 1.2745620860093726e-05, "loss": 0.4985, "step": 2450 }, { "epoch": 1.63, "learning_rate": 1.274052627725439e-05, "loss": 0.4847, "step": 2451 }, { "epoch": 1.63, "learning_rate": 1.2735430925241543e-05, "loss": 0.4654, "step": 2452 }, { "epoch": 1.63, "learning_rate": 1.273033480548528e-05, "loss": 0.4958, "step": 2453 }, { "epoch": 1.63, "learning_rate": 1.272523791941591e-05, "loss": 0.5283, "step": 2454 }, { "epoch": 1.64, "learning_rate": 1.2720140268463958e-05, "loss": 0.4846, "step": 2455 }, { "epoch": 1.64, "learning_rate": 1.2715041854060161e-05, "loss": 0.502, "step": 2456 }, { "epoch": 1.64, "learning_rate": 1.2709942677635474e-05, "loss": 0.4987, "step": 2457 }, { "epoch": 1.64, "learning_rate": 1.2704842740621061e-05, "loss": 0.48, "step": 2458 }, { "epoch": 1.64, "learning_rate": 1.2699742044448311e-05, "loss": 0.45, "step": 2459 }, { "epoch": 1.64, "learning_rate": 1.2694640590548808e-05, "loss": 0.5029, "step": 2460 }, { "epoch": 1.64, "learning_rate": 1.2689538380354367e-05, "loss": 0.4948, "step": 2461 }, { "epoch": 1.64, "learning_rate": 1.2684435415297e-05, "loss": 0.5004, "step": 2462 }, { "epoch": 1.64, "learning_rate": 1.267933169680894e-05, "loss": 0.518, "step": 2463 }, { "epoch": 1.64, "learning_rate": 1.2674227226322633e-05, "loss": 0.4908, "step": 2464 }, { "epoch": 1.64, "learning_rate": 1.2669122005270724e-05, "loss": 0.4942, "step": 2465 }, { "epoch": 1.64, "learning_rate": 1.2664016035086081e-05, "loss": 0.4937, "step": 2466 }, { "epoch": 1.64, "learning_rate": 1.2658909317201782e-05, "loss": 0.512, "step": 2467 }, { "epoch": 1.64, "learning_rate": 1.2653801853051105e-05, "loss": 0.4871, "step": 2468 }, { "epoch": 1.64, "learning_rate": 1.2648693644067545e-05, "loss": 0.4573, "step": 2469 }, { "epoch": 1.65, "learning_rate": 1.2643584691684802e-05, "loss": 0.5149, "step": 2470 }, { "epoch": 1.65, "learning_rate": 1.2638474997336794e-05, "loss": 0.4781, "step": 2471 }, { "epoch": 1.65, "learning_rate": 1.2633364562457632e-05, "loss": 0.4893, "step": 2472 }, { "epoch": 1.65, "learning_rate": 1.262825338848165e-05, "loss": 0.4776, "step": 2473 }, { "epoch": 1.65, "learning_rate": 1.262314147684338e-05, "loss": 0.5026, "step": 2474 }, { "epoch": 1.65, "learning_rate": 1.2618028828977563e-05, "loss": 0.5018, "step": 2475 }, { "epoch": 1.65, "learning_rate": 1.2612915446319148e-05, "loss": 0.5081, "step": 2476 }, { "epoch": 1.65, "learning_rate": 1.2607801330303287e-05, "loss": 0.4972, "step": 2477 }, { "epoch": 1.65, "learning_rate": 1.2602686482365343e-05, "loss": 0.5139, "step": 2478 }, { "epoch": 1.65, "learning_rate": 1.2597570903940879e-05, "loss": 0.5296, "step": 2479 }, { "epoch": 1.65, "learning_rate": 1.259245459646567e-05, "loss": 0.4634, "step": 2480 }, { "epoch": 1.65, "learning_rate": 1.258733756137569e-05, "loss": 0.5005, "step": 2481 }, { "epoch": 1.65, "learning_rate": 1.2582219800107113e-05, "loss": 0.5058, "step": 2482 }, { "epoch": 1.65, "learning_rate": 1.257710131409633e-05, "loss": 0.4789, "step": 2483 }, { "epoch": 1.65, "learning_rate": 1.2571982104779927e-05, "loss": 0.497, "step": 2484 }, { "epoch": 1.66, "learning_rate": 1.2566862173594689e-05, "loss": 0.5065, "step": 2485 }, { "epoch": 1.66, "learning_rate": 1.256174152197761e-05, "loss": 0.526, "step": 2486 }, { "epoch": 1.66, "learning_rate": 1.2556620151365886e-05, "loss": 0.5361, "step": 2487 }, { "epoch": 1.66, "learning_rate": 1.2551498063196914e-05, "loss": 0.4911, "step": 2488 }, { "epoch": 1.66, "learning_rate": 1.254637525890829e-05, "loss": 0.4768, "step": 2489 }, { "epoch": 1.66, "learning_rate": 1.2541251739937814e-05, "loss": 0.4887, "step": 2490 }, { "epoch": 1.66, "learning_rate": 1.2536127507723486e-05, "loss": 0.4734, "step": 2491 }, { "epoch": 1.66, "learning_rate": 1.2531002563703502e-05, "loss": 0.453, "step": 2492 }, { "epoch": 1.66, "learning_rate": 1.2525876909316263e-05, "loss": 0.4697, "step": 2493 }, { "epoch": 1.66, "learning_rate": 1.252075054600037e-05, "loss": 0.4998, "step": 2494 }, { "epoch": 1.66, "learning_rate": 1.2515623475194623e-05, "loss": 0.4756, "step": 2495 }, { "epoch": 1.66, "learning_rate": 1.251049569833801e-05, "loss": 0.4939, "step": 2496 }, { "epoch": 1.66, "learning_rate": 1.250536721686973e-05, "loss": 0.5076, "step": 2497 }, { "epoch": 1.66, "learning_rate": 1.2500238032229177e-05, "loss": 0.5119, "step": 2498 }, { "epoch": 1.66, "learning_rate": 1.2495108145855938e-05, "loss": 0.512, "step": 2499 }, { "epoch": 1.67, "learning_rate": 1.2489977559189796e-05, "loss": 0.5, "step": 2500 }, { "epoch": 1.67, "learning_rate": 1.2484846273670745e-05, "loss": 0.514, "step": 2501 }, { "epoch": 1.67, "learning_rate": 1.2479714290738953e-05, "loss": 0.4768, "step": 2502 }, { "epoch": 1.67, "learning_rate": 1.24745816118348e-05, "loss": 0.4813, "step": 2503 }, { "epoch": 1.67, "learning_rate": 1.2469448238398859e-05, "loss": 0.4583, "step": 2504 }, { "epoch": 1.67, "learning_rate": 1.2464314171871888e-05, "loss": 0.5163, "step": 2505 }, { "epoch": 1.67, "learning_rate": 1.2459179413694849e-05, "loss": 0.4898, "step": 2506 }, { "epoch": 1.67, "learning_rate": 1.24540439653089e-05, "loss": 0.4777, "step": 2507 }, { "epoch": 1.67, "learning_rate": 1.2448907828155383e-05, "loss": 0.5537, "step": 2508 }, { "epoch": 1.67, "learning_rate": 1.2443771003675842e-05, "loss": 0.4661, "step": 2509 }, { "epoch": 1.67, "learning_rate": 1.2438633493312016e-05, "loss": 0.5147, "step": 2510 }, { "epoch": 1.67, "learning_rate": 1.243349529850582e-05, "loss": 0.4929, "step": 2511 }, { "epoch": 1.67, "learning_rate": 1.2428356420699378e-05, "loss": 0.4882, "step": 2512 }, { "epoch": 1.67, "learning_rate": 1.2423216861335004e-05, "loss": 0.5021, "step": 2513 }, { "epoch": 1.67, "learning_rate": 1.2418076621855191e-05, "loss": 0.4877, "step": 2514 }, { "epoch": 1.68, "learning_rate": 1.241293570370264e-05, "loss": 0.4847, "step": 2515 }, { "epoch": 1.68, "learning_rate": 1.2407794108320229e-05, "loss": 0.478, "step": 2516 }, { "epoch": 1.68, "learning_rate": 1.2402651837151029e-05, "loss": 0.5041, "step": 2517 }, { "epoch": 1.68, "learning_rate": 1.2397508891638308e-05, "loss": 0.4899, "step": 2518 }, { "epoch": 1.68, "learning_rate": 1.2392365273225513e-05, "loss": 0.4775, "step": 2519 }, { "epoch": 1.68, "learning_rate": 1.2387220983356283e-05, "loss": 0.4811, "step": 2520 }, { "epoch": 1.68, "learning_rate": 1.2382076023474455e-05, "loss": 0.5075, "step": 2521 }, { "epoch": 1.68, "learning_rate": 1.2376930395024039e-05, "loss": 0.4862, "step": 2522 }, { "epoch": 1.68, "learning_rate": 1.2371784099449242e-05, "loss": 0.5041, "step": 2523 }, { "epoch": 1.68, "learning_rate": 1.236663713819446e-05, "loss": 0.5246, "step": 2524 }, { "epoch": 1.68, "learning_rate": 1.2361489512704264e-05, "loss": 0.508, "step": 2525 }, { "epoch": 1.68, "learning_rate": 1.2356341224423422e-05, "loss": 0.4847, "step": 2526 }, { "epoch": 1.68, "learning_rate": 1.235119227479689e-05, "loss": 0.4775, "step": 2527 }, { "epoch": 1.68, "learning_rate": 1.2346042665269799e-05, "loss": 0.5094, "step": 2528 }, { "epoch": 1.68, "learning_rate": 1.2340892397287475e-05, "loss": 0.5002, "step": 2529 }, { "epoch": 1.69, "learning_rate": 1.2335741472295426e-05, "loss": 0.4888, "step": 2530 }, { "epoch": 1.69, "learning_rate": 1.2330589891739338e-05, "loss": 0.4987, "step": 2531 }, { "epoch": 1.69, "learning_rate": 1.2325437657065089e-05, "loss": 0.4593, "step": 2532 }, { "epoch": 1.69, "learning_rate": 1.2320284769718739e-05, "loss": 0.495, "step": 2533 }, { "epoch": 1.69, "learning_rate": 1.2315131231146526e-05, "loss": 0.4926, "step": 2534 }, { "epoch": 1.69, "learning_rate": 1.230997704279488e-05, "loss": 0.4855, "step": 2535 }, { "epoch": 1.69, "learning_rate": 1.2304822206110409e-05, "loss": 0.4989, "step": 2536 }, { "epoch": 1.69, "learning_rate": 1.2299666722539897e-05, "loss": 0.5409, "step": 2537 }, { "epoch": 1.69, "learning_rate": 1.2294510593530318e-05, "loss": 0.4749, "step": 2538 }, { "epoch": 1.69, "learning_rate": 1.2289353820528825e-05, "loss": 0.4767, "step": 2539 }, { "epoch": 1.69, "learning_rate": 1.2284196404982746e-05, "loss": 0.5033, "step": 2540 }, { "epoch": 1.69, "learning_rate": 1.2279038348339595e-05, "loss": 0.4805, "step": 2541 }, { "epoch": 1.69, "learning_rate": 1.227387965204707e-05, "loss": 0.4998, "step": 2542 }, { "epoch": 1.69, "learning_rate": 1.226872031755304e-05, "loss": 0.4781, "step": 2543 }, { "epoch": 1.7, "learning_rate": 1.2263560346305556e-05, "loss": 0.4978, "step": 2544 }, { "epoch": 1.7, "learning_rate": 1.2258399739752848e-05, "loss": 0.484, "step": 2545 }, { "epoch": 1.7, "learning_rate": 1.2253238499343328e-05, "loss": 0.4384, "step": 2546 }, { "epoch": 1.7, "learning_rate": 1.2248076626525578e-05, "loss": 0.462, "step": 2547 }, { "epoch": 1.7, "learning_rate": 1.2242914122748363e-05, "loss": 0.496, "step": 2548 }, { "epoch": 1.7, "learning_rate": 1.223775098946063e-05, "loss": 0.4909, "step": 2549 }, { "epoch": 1.7, "learning_rate": 1.223258722811149e-05, "loss": 0.4811, "step": 2550 }, { "epoch": 1.7, "learning_rate": 1.2227422840150238e-05, "loss": 0.5114, "step": 2551 }, { "epoch": 1.7, "learning_rate": 1.2222257827026345e-05, "loss": 0.4714, "step": 2552 }, { "epoch": 1.7, "learning_rate": 1.221709219018946e-05, "loss": 0.4699, "step": 2553 }, { "epoch": 1.7, "learning_rate": 1.2211925931089394e-05, "loss": 0.4859, "step": 2554 }, { "epoch": 1.7, "learning_rate": 1.2206759051176151e-05, "loss": 0.4844, "step": 2555 }, { "epoch": 1.7, "learning_rate": 1.2201591551899898e-05, "loss": 0.4592, "step": 2556 }, { "epoch": 1.7, "learning_rate": 1.2196423434710978e-05, "loss": 0.5196, "step": 2557 }, { "epoch": 1.7, "learning_rate": 1.2191254701059904e-05, "loss": 0.4786, "step": 2558 }, { "epoch": 1.71, "learning_rate": 1.2186085352397374e-05, "loss": 0.4673, "step": 2559 }, { "epoch": 1.71, "learning_rate": 1.218091539017424e-05, "loss": 0.4997, "step": 2560 }, { "epoch": 1.71, "learning_rate": 1.2175744815841543e-05, "loss": 0.5088, "step": 2561 }, { "epoch": 1.71, "learning_rate": 1.2170573630850486e-05, "loss": 0.476, "step": 2562 }, { "epoch": 1.71, "learning_rate": 1.216540183665245e-05, "loss": 0.4563, "step": 2563 }, { "epoch": 1.71, "learning_rate": 1.2160229434698984e-05, "loss": 0.4688, "step": 2564 }, { "epoch": 1.71, "learning_rate": 1.2155056426441803e-05, "loss": 0.5236, "step": 2565 }, { "epoch": 1.71, "learning_rate": 1.2149882813332796e-05, "loss": 0.486, "step": 2566 }, { "epoch": 1.71, "learning_rate": 1.2144708596824027e-05, "loss": 0.4992, "step": 2567 }, { "epoch": 1.71, "learning_rate": 1.213953377836772e-05, "loss": 0.5141, "step": 2568 }, { "epoch": 1.71, "learning_rate": 1.2134358359416277e-05, "loss": 0.4961, "step": 2569 }, { "epoch": 1.71, "learning_rate": 1.212918234142226e-05, "loss": 0.4442, "step": 2570 }, { "epoch": 1.71, "learning_rate": 1.2124005725838404e-05, "loss": 0.5016, "step": 2571 }, { "epoch": 1.71, "learning_rate": 1.2118828514117611e-05, "loss": 0.5002, "step": 2572 }, { "epoch": 1.71, "learning_rate": 1.2113650707712952e-05, "loss": 0.486, "step": 2573 }, { "epoch": 1.72, "learning_rate": 1.2108472308077661e-05, "loss": 0.4673, "step": 2574 }, { "epoch": 1.72, "learning_rate": 1.2103293316665137e-05, "loss": 0.4508, "step": 2575 }, { "epoch": 1.72, "learning_rate": 1.2098113734928957e-05, "loss": 0.4631, "step": 2576 }, { "epoch": 1.72, "learning_rate": 1.209293356432285e-05, "loss": 0.4737, "step": 2577 }, { "epoch": 1.72, "learning_rate": 1.2087752806300715e-05, "loss": 0.4912, "step": 2578 }, { "epoch": 1.72, "learning_rate": 1.208257146231662e-05, "loss": 0.4755, "step": 2579 }, { "epoch": 1.72, "learning_rate": 1.2077389533824789e-05, "loss": 0.5224, "step": 2580 }, { "epoch": 1.72, "learning_rate": 1.2072207022279618e-05, "loss": 0.4868, "step": 2581 }, { "epoch": 1.72, "learning_rate": 1.2067023929135664e-05, "loss": 0.5011, "step": 2582 }, { "epoch": 1.72, "learning_rate": 1.2061840255847645e-05, "loss": 0.5079, "step": 2583 }, { "epoch": 1.72, "learning_rate": 1.2056656003870448e-05, "loss": 0.4912, "step": 2584 }, { "epoch": 1.72, "learning_rate": 1.2051471174659116e-05, "loss": 0.5013, "step": 2585 }, { "epoch": 1.72, "learning_rate": 1.204628576966885e-05, "loss": 0.4873, "step": 2586 }, { "epoch": 1.72, "learning_rate": 1.2041099790355025e-05, "loss": 0.4853, "step": 2587 }, { "epoch": 1.72, "learning_rate": 1.203591323817317e-05, "loss": 0.481, "step": 2588 }, { "epoch": 1.73, "learning_rate": 1.2030726114578974e-05, "loss": 0.5212, "step": 2589 }, { "epoch": 1.73, "learning_rate": 1.2025538421028293e-05, "loss": 0.4934, "step": 2590 }, { "epoch": 1.73, "learning_rate": 1.2020350158977131e-05, "loss": 0.5001, "step": 2591 }, { "epoch": 1.73, "learning_rate": 1.2015161329881663e-05, "loss": 0.4796, "step": 2592 }, { "epoch": 1.73, "learning_rate": 1.200997193519822e-05, "loss": 0.4958, "step": 2593 }, { "epoch": 1.73, "learning_rate": 1.2004781976383286e-05, "loss": 0.5291, "step": 2594 }, { "epoch": 1.73, "learning_rate": 1.1999591454893511e-05, "loss": 0.5025, "step": 2595 }, { "epoch": 1.73, "learning_rate": 1.1994400372185698e-05, "loss": 0.4827, "step": 2596 }, { "epoch": 1.73, "learning_rate": 1.1989208729716808e-05, "loss": 0.4829, "step": 2597 }, { "epoch": 1.73, "learning_rate": 1.1984016528943962e-05, "loss": 0.485, "step": 2598 }, { "epoch": 1.73, "learning_rate": 1.1978823771324439e-05, "loss": 0.5281, "step": 2599 }, { "epoch": 1.73, "learning_rate": 1.1973630458315667e-05, "loss": 0.4634, "step": 2600 }, { "epoch": 1.73, "learning_rate": 1.1968436591375236e-05, "loss": 0.4909, "step": 2601 }, { "epoch": 1.73, "learning_rate": 1.196324217196089e-05, "loss": 0.5222, "step": 2602 }, { "epoch": 1.73, "learning_rate": 1.1958047201530527e-05, "loss": 0.4826, "step": 2603 }, { "epoch": 1.74, "learning_rate": 1.1952851681542201e-05, "loss": 0.4825, "step": 2604 }, { "epoch": 1.74, "learning_rate": 1.194765561345412e-05, "loss": 0.475, "step": 2605 }, { "epoch": 1.74, "learning_rate": 1.1942458998724642e-05, "loss": 0.5081, "step": 2606 }, { "epoch": 1.74, "learning_rate": 1.1937261838812286e-05, "loss": 0.4609, "step": 2607 }, { "epoch": 1.74, "learning_rate": 1.1932064135175723e-05, "loss": 0.5037, "step": 2608 }, { "epoch": 1.74, "learning_rate": 1.1926865889273762e-05, "loss": 0.4807, "step": 2609 }, { "epoch": 1.74, "learning_rate": 1.1921667102565384e-05, "loss": 0.492, "step": 2610 }, { "epoch": 1.74, "learning_rate": 1.1916467776509717e-05, "loss": 0.5251, "step": 2611 }, { "epoch": 1.74, "learning_rate": 1.1911267912566027e-05, "loss": 0.4634, "step": 2612 }, { "epoch": 1.74, "learning_rate": 1.1906067512193748e-05, "loss": 0.4855, "step": 2613 }, { "epoch": 1.74, "learning_rate": 1.190086657685246e-05, "loss": 0.4897, "step": 2614 }, { "epoch": 1.74, "learning_rate": 1.1895665108001879e-05, "loss": 0.4853, "step": 2615 }, { "epoch": 1.74, "learning_rate": 1.1890463107101891e-05, "loss": 0.4809, "step": 2616 }, { "epoch": 1.74, "learning_rate": 1.188526057561252e-05, "loss": 0.5015, "step": 2617 }, { "epoch": 1.74, "learning_rate": 1.1880057514993944e-05, "loss": 0.5009, "step": 2618 }, { "epoch": 1.75, "learning_rate": 1.1874853926706486e-05, "loss": 0.4776, "step": 2619 }, { "epoch": 1.75, "learning_rate": 1.1869649812210618e-05, "loss": 0.5275, "step": 2620 }, { "epoch": 1.75, "learning_rate": 1.1864445172966956e-05, "loss": 0.4887, "step": 2621 }, { "epoch": 1.75, "learning_rate": 1.1859240010436272e-05, "loss": 0.5075, "step": 2622 }, { "epoch": 1.75, "learning_rate": 1.185403432607948e-05, "loss": 0.4988, "step": 2623 }, { "epoch": 1.75, "learning_rate": 1.1848828121357637e-05, "loss": 0.4991, "step": 2624 }, { "epoch": 1.75, "learning_rate": 1.1843621397731954e-05, "loss": 0.5394, "step": 2625 }, { "epoch": 1.75, "learning_rate": 1.1838414156663778e-05, "loss": 0.5116, "step": 2626 }, { "epoch": 1.75, "learning_rate": 1.183320639961461e-05, "loss": 0.4602, "step": 2627 }, { "epoch": 1.75, "learning_rate": 1.1827998128046091e-05, "loss": 0.4982, "step": 2628 }, { "epoch": 1.75, "learning_rate": 1.1822789343420008e-05, "loss": 0.4962, "step": 2629 }, { "epoch": 1.75, "learning_rate": 1.1817580047198287e-05, "loss": 0.4969, "step": 2630 }, { "epoch": 1.75, "learning_rate": 1.1812370240843012e-05, "loss": 0.4923, "step": 2631 }, { "epoch": 1.75, "learning_rate": 1.1807159925816389e-05, "loss": 0.5061, "step": 2632 }, { "epoch": 1.75, "learning_rate": 1.1801949103580788e-05, "loss": 0.5, "step": 2633 }, { "epoch": 1.76, "learning_rate": 1.1796737775598708e-05, "loss": 0.5, "step": 2634 }, { "epoch": 1.76, "learning_rate": 1.179152594333279e-05, "loss": 0.5057, "step": 2635 }, { "epoch": 1.76, "learning_rate": 1.1786313608245823e-05, "loss": 0.4722, "step": 2636 }, { "epoch": 1.76, "learning_rate": 1.1781100771800733e-05, "loss": 0.4782, "step": 2637 }, { "epoch": 1.76, "learning_rate": 1.1775887435460588e-05, "loss": 0.502, "step": 2638 }, { "epoch": 1.76, "learning_rate": 1.17706736006886e-05, "loss": 0.4728, "step": 2639 }, { "epoch": 1.76, "learning_rate": 1.1765459268948111e-05, "loss": 0.4592, "step": 2640 }, { "epoch": 1.76, "learning_rate": 1.1760244441702612e-05, "loss": 0.4893, "step": 2641 }, { "epoch": 1.76, "learning_rate": 1.1755029120415728e-05, "loss": 0.4818, "step": 2642 }, { "epoch": 1.76, "learning_rate": 1.1749813306551221e-05, "loss": 0.4849, "step": 2643 }, { "epoch": 1.76, "learning_rate": 1.1744597001573002e-05, "loss": 0.5335, "step": 2644 }, { "epoch": 1.76, "learning_rate": 1.1739380206945108e-05, "loss": 0.4826, "step": 2645 }, { "epoch": 1.76, "learning_rate": 1.1734162924131719e-05, "loss": 0.4944, "step": 2646 }, { "epoch": 1.76, "learning_rate": 1.1728945154597149e-05, "loss": 0.4772, "step": 2647 }, { "epoch": 1.77, "learning_rate": 1.1723726899805851e-05, "loss": 0.4972, "step": 2648 }, { "epoch": 1.77, "learning_rate": 1.1718508161222415e-05, "loss": 0.4766, "step": 2649 }, { "epoch": 1.77, "learning_rate": 1.1713288940311562e-05, "loss": 0.4663, "step": 2650 }, { "epoch": 1.77, "learning_rate": 1.1708069238538153e-05, "loss": 0.4894, "step": 2651 }, { "epoch": 1.77, "learning_rate": 1.1702849057367185e-05, "loss": 0.468, "step": 2652 }, { "epoch": 1.77, "learning_rate": 1.1697628398263785e-05, "loss": 0.5206, "step": 2653 }, { "epoch": 1.77, "learning_rate": 1.1692407262693219e-05, "loss": 0.4589, "step": 2654 }, { "epoch": 1.77, "learning_rate": 1.168718565212088e-05, "loss": 0.496, "step": 2655 }, { "epoch": 1.77, "learning_rate": 1.16819635680123e-05, "loss": 0.4686, "step": 2656 }, { "epoch": 1.77, "learning_rate": 1.1676741011833142e-05, "loss": 0.4786, "step": 2657 }, { "epoch": 1.77, "learning_rate": 1.1671517985049204e-05, "loss": 0.4619, "step": 2658 }, { "epoch": 1.77, "learning_rate": 1.1666294489126411e-05, "loss": 0.5025, "step": 2659 }, { "epoch": 1.77, "learning_rate": 1.1661070525530827e-05, "loss": 0.4791, "step": 2660 }, { "epoch": 1.77, "learning_rate": 1.1655846095728638e-05, "loss": 0.4788, "step": 2661 }, { "epoch": 1.77, "learning_rate": 1.1650621201186165e-05, "loss": 0.4947, "step": 2662 }, { "epoch": 1.78, "learning_rate": 1.1645395843369867e-05, "loss": 0.4753, "step": 2663 }, { "epoch": 1.78, "learning_rate": 1.1640170023746315e-05, "loss": 0.4809, "step": 2664 }, { "epoch": 1.78, "learning_rate": 1.1634943743782235e-05, "loss": 0.4601, "step": 2665 }, { "epoch": 1.78, "learning_rate": 1.1629717004944455e-05, "loss": 0.4867, "step": 2666 }, { "epoch": 1.78, "learning_rate": 1.162448980869995e-05, "loss": 0.5017, "step": 2667 }, { "epoch": 1.78, "learning_rate": 1.161926215651582e-05, "loss": 0.461, "step": 2668 }, { "epoch": 1.78, "learning_rate": 1.1614034049859289e-05, "loss": 0.4891, "step": 2669 }, { "epoch": 1.78, "learning_rate": 1.160880549019771e-05, "loss": 0.4865, "step": 2670 }, { "epoch": 1.78, "learning_rate": 1.1603576478998561e-05, "loss": 0.4866, "step": 2671 }, { "epoch": 1.78, "learning_rate": 1.1598347017729457e-05, "loss": 0.4877, "step": 2672 }, { "epoch": 1.78, "learning_rate": 1.1593117107858127e-05, "loss": 0.5105, "step": 2673 }, { "epoch": 1.78, "learning_rate": 1.1587886750852434e-05, "loss": 0.4858, "step": 2674 }, { "epoch": 1.78, "learning_rate": 1.1582655948180357e-05, "loss": 0.4396, "step": 2675 }, { "epoch": 1.78, "learning_rate": 1.157742470131001e-05, "loss": 0.5135, "step": 2676 }, { "epoch": 1.78, "learning_rate": 1.1572193011709627e-05, "loss": 0.4782, "step": 2677 }, { "epoch": 1.79, "learning_rate": 1.1566960880847572e-05, "loss": 0.5253, "step": 2678 }, { "epoch": 1.79, "learning_rate": 1.156172831019232e-05, "loss": 0.4964, "step": 2679 }, { "epoch": 1.79, "learning_rate": 1.1556495301212485e-05, "loss": 0.4945, "step": 2680 }, { "epoch": 1.79, "learning_rate": 1.1551261855376792e-05, "loss": 0.5082, "step": 2681 }, { "epoch": 1.79, "learning_rate": 1.1546027974154095e-05, "loss": 0.4884, "step": 2682 }, { "epoch": 1.79, "learning_rate": 1.154079365901337e-05, "loss": 0.4731, "step": 2683 }, { "epoch": 1.79, "learning_rate": 1.1535558911423706e-05, "loss": 0.4869, "step": 2684 }, { "epoch": 1.79, "learning_rate": 1.1530323732854326e-05, "loss": 0.5016, "step": 2685 }, { "epoch": 1.79, "learning_rate": 1.152508812477457e-05, "loss": 0.5469, "step": 2686 }, { "epoch": 1.79, "learning_rate": 1.1519852088653895e-05, "loss": 0.5322, "step": 2687 }, { "epoch": 1.79, "learning_rate": 1.1514615625961877e-05, "loss": 0.4848, "step": 2688 }, { "epoch": 1.79, "learning_rate": 1.1509378738168224e-05, "loss": 0.4862, "step": 2689 }, { "epoch": 1.79, "learning_rate": 1.1504141426742744e-05, "loss": 0.4539, "step": 2690 }, { "epoch": 1.79, "learning_rate": 1.149890369315538e-05, "loss": 0.5259, "step": 2691 }, { "epoch": 1.79, "learning_rate": 1.1493665538876182e-05, "loss": 0.5112, "step": 2692 }, { "epoch": 1.8, "learning_rate": 1.148842696537533e-05, "loss": 0.5013, "step": 2693 }, { "epoch": 1.8, "learning_rate": 1.1483187974123116e-05, "loss": 0.5112, "step": 2694 }, { "epoch": 1.8, "learning_rate": 1.147794856658994e-05, "loss": 0.4677, "step": 2695 }, { "epoch": 1.8, "learning_rate": 1.1472708744246334e-05, "loss": 0.5068, "step": 2696 }, { "epoch": 1.8, "learning_rate": 1.1467468508562943e-05, "loss": 0.5124, "step": 2697 }, { "epoch": 1.8, "learning_rate": 1.1462227861010513e-05, "loss": 0.478, "step": 2698 }, { "epoch": 1.8, "learning_rate": 1.1456986803059927e-05, "loss": 0.4768, "step": 2699 }, { "epoch": 1.8, "learning_rate": 1.1451745336182173e-05, "loss": 0.4729, "step": 2700 }, { "epoch": 1.8, "learning_rate": 1.1446503461848353e-05, "loss": 0.4853, "step": 2701 }, { "epoch": 1.8, "learning_rate": 1.1441261181529684e-05, "loss": 0.5183, "step": 2702 }, { "epoch": 1.8, "learning_rate": 1.14360184966975e-05, "loss": 0.4719, "step": 2703 }, { "epoch": 1.8, "learning_rate": 1.1430775408823243e-05, "loss": 0.5446, "step": 2704 }, { "epoch": 1.8, "learning_rate": 1.1425531919378469e-05, "loss": 0.5112, "step": 2705 }, { "epoch": 1.8, "learning_rate": 1.1420288029834855e-05, "loss": 0.4901, "step": 2706 }, { "epoch": 1.8, "learning_rate": 1.1415043741664184e-05, "loss": 0.5144, "step": 2707 }, { "epoch": 1.81, "learning_rate": 1.140979905633835e-05, "loss": 0.4611, "step": 2708 }, { "epoch": 1.81, "learning_rate": 1.1404553975329357e-05, "loss": 0.533, "step": 2709 }, { "epoch": 1.81, "learning_rate": 1.1399308500109326e-05, "loss": 0.4642, "step": 2710 }, { "epoch": 1.81, "learning_rate": 1.1394062632150483e-05, "loss": 0.4878, "step": 2711 }, { "epoch": 1.81, "learning_rate": 1.1388816372925166e-05, "loss": 0.5049, "step": 2712 }, { "epoch": 1.81, "learning_rate": 1.138356972390583e-05, "loss": 0.4818, "step": 2713 }, { "epoch": 1.81, "learning_rate": 1.1378322686565026e-05, "loss": 0.5138, "step": 2714 }, { "epoch": 1.81, "learning_rate": 1.1373075262375421e-05, "loss": 0.4992, "step": 2715 }, { "epoch": 1.81, "learning_rate": 1.1367827452809794e-05, "loss": 0.487, "step": 2716 }, { "epoch": 1.81, "learning_rate": 1.1362579259341029e-05, "loss": 0.4777, "step": 2717 }, { "epoch": 1.81, "learning_rate": 1.1357330683442114e-05, "loss": 0.4653, "step": 2718 }, { "epoch": 1.81, "learning_rate": 1.1352081726586144e-05, "loss": 0.4782, "step": 2719 }, { "epoch": 1.81, "learning_rate": 1.1346832390246334e-05, "loss": 0.4701, "step": 2720 }, { "epoch": 1.81, "learning_rate": 1.134158267589599e-05, "loss": 0.5024, "step": 2721 }, { "epoch": 1.81, "learning_rate": 1.1336332585008532e-05, "loss": 0.501, "step": 2722 }, { "epoch": 1.82, "learning_rate": 1.1331082119057485e-05, "loss": 0.5276, "step": 2723 }, { "epoch": 1.82, "learning_rate": 1.1325831279516475e-05, "loss": 0.5013, "step": 2724 }, { "epoch": 1.82, "learning_rate": 1.132058006785924e-05, "loss": 0.4997, "step": 2725 }, { "epoch": 1.82, "learning_rate": 1.131532848555961e-05, "loss": 0.4982, "step": 2726 }, { "epoch": 1.82, "learning_rate": 1.1310076534091539e-05, "loss": 0.4904, "step": 2727 }, { "epoch": 1.82, "learning_rate": 1.1304824214929064e-05, "loss": 0.5161, "step": 2728 }, { "epoch": 1.82, "learning_rate": 1.1299571529546342e-05, "loss": 0.4886, "step": 2729 }, { "epoch": 1.82, "learning_rate": 1.1294318479417618e-05, "loss": 0.508, "step": 2730 }, { "epoch": 1.82, "learning_rate": 1.1289065066017249e-05, "loss": 0.4876, "step": 2731 }, { "epoch": 1.82, "learning_rate": 1.1283811290819693e-05, "loss": 0.4646, "step": 2732 }, { "epoch": 1.82, "learning_rate": 1.1278557155299506e-05, "loss": 0.5344, "step": 2733 }, { "epoch": 1.82, "learning_rate": 1.1273302660931345e-05, "loss": 0.522, "step": 2734 }, { "epoch": 1.82, "learning_rate": 1.1268047809189976e-05, "loss": 0.4928, "step": 2735 }, { "epoch": 1.82, "learning_rate": 1.1262792601550254e-05, "loss": 0.4954, "step": 2736 }, { "epoch": 1.83, "learning_rate": 1.1257537039487141e-05, "loss": 0.498, "step": 2737 }, { "epoch": 1.83, "learning_rate": 1.1252281124475695e-05, "loss": 0.5206, "step": 2738 }, { "epoch": 1.83, "learning_rate": 1.1247024857991075e-05, "loss": 0.4763, "step": 2739 }, { "epoch": 1.83, "learning_rate": 1.1241768241508537e-05, "loss": 0.4737, "step": 2740 }, { "epoch": 1.83, "learning_rate": 1.123651127650344e-05, "loss": 0.4942, "step": 2741 }, { "epoch": 1.83, "learning_rate": 1.1231253964451235e-05, "loss": 0.4861, "step": 2742 }, { "epoch": 1.83, "learning_rate": 1.1225996306827471e-05, "loss": 0.4762, "step": 2743 }, { "epoch": 1.83, "learning_rate": 1.12207383051078e-05, "loss": 0.4883, "step": 2744 }, { "epoch": 1.83, "learning_rate": 1.1215479960767958e-05, "loss": 0.5143, "step": 2745 }, { "epoch": 1.83, "learning_rate": 1.1210221275283794e-05, "loss": 0.4959, "step": 2746 }, { "epoch": 1.83, "learning_rate": 1.120496225013124e-05, "loss": 0.5203, "step": 2747 }, { "epoch": 1.83, "learning_rate": 1.1199702886786327e-05, "loss": 0.4639, "step": 2748 }, { "epoch": 1.83, "learning_rate": 1.1194443186725186e-05, "loss": 0.5363, "step": 2749 }, { "epoch": 1.83, "learning_rate": 1.118918315142403e-05, "loss": 0.5006, "step": 2750 }, { "epoch": 1.83, "learning_rate": 1.118392278235918e-05, "loss": 0.5088, "step": 2751 }, { "epoch": 1.84, "learning_rate": 1.1178662081007044e-05, "loss": 0.4637, "step": 2752 }, { "epoch": 1.84, "learning_rate": 1.117340104884412e-05, "loss": 0.4974, "step": 2753 }, { "epoch": 1.84, "learning_rate": 1.1168139687347003e-05, "loss": 0.4883, "step": 2754 }, { "epoch": 1.84, "learning_rate": 1.1162877997992389e-05, "loss": 0.5157, "step": 2755 }, { "epoch": 1.84, "learning_rate": 1.1157615982257047e-05, "loss": 0.5178, "step": 2756 }, { "epoch": 1.84, "learning_rate": 1.115235364161785e-05, "loss": 0.5356, "step": 2757 }, { "epoch": 1.84, "learning_rate": 1.1147090977551764e-05, "loss": 0.4933, "step": 2758 }, { "epoch": 1.84, "learning_rate": 1.114182799153584e-05, "loss": 0.5038, "step": 2759 }, { "epoch": 1.84, "learning_rate": 1.1136564685047213e-05, "loss": 0.472, "step": 2760 }, { "epoch": 1.84, "learning_rate": 1.1131301059563129e-05, "loss": 0.4746, "step": 2761 }, { "epoch": 1.84, "learning_rate": 1.1126037116560905e-05, "loss": 0.469, "step": 2762 }, { "epoch": 1.84, "learning_rate": 1.1120772857517947e-05, "loss": 0.4833, "step": 2763 }, { "epoch": 1.84, "learning_rate": 1.1115508283911767e-05, "loss": 0.4831, "step": 2764 }, { "epoch": 1.84, "learning_rate": 1.1110243397219945e-05, "loss": 0.4783, "step": 2765 }, { "epoch": 1.84, "learning_rate": 1.1104978198920158e-05, "loss": 0.4802, "step": 2766 }, { "epoch": 1.85, "learning_rate": 1.1099712690490172e-05, "loss": 0.4894, "step": 2767 }, { "epoch": 1.85, "learning_rate": 1.1094446873407838e-05, "loss": 0.502, "step": 2768 }, { "epoch": 1.85, "learning_rate": 1.1089180749151098e-05, "loss": 0.5038, "step": 2769 }, { "epoch": 1.85, "learning_rate": 1.1083914319197967e-05, "loss": 0.5063, "step": 2770 }, { "epoch": 1.85, "learning_rate": 1.107864758502656e-05, "loss": 0.4997, "step": 2771 }, { "epoch": 1.85, "learning_rate": 1.1073380548115074e-05, "loss": 0.4957, "step": 2772 }, { "epoch": 1.85, "learning_rate": 1.106811320994178e-05, "loss": 0.4841, "step": 2773 }, { "epoch": 1.85, "learning_rate": 1.1062845571985052e-05, "loss": 0.4778, "step": 2774 }, { "epoch": 1.85, "learning_rate": 1.1057577635723337e-05, "loss": 0.4859, "step": 2775 }, { "epoch": 1.85, "learning_rate": 1.1052309402635164e-05, "loss": 0.4793, "step": 2776 }, { "epoch": 1.85, "learning_rate": 1.1047040874199151e-05, "loss": 0.4921, "step": 2777 }, { "epoch": 1.85, "learning_rate": 1.1041772051894e-05, "loss": 0.4774, "step": 2778 }, { "epoch": 1.85, "learning_rate": 1.1036502937198484e-05, "loss": 0.5116, "step": 2779 }, { "epoch": 1.85, "learning_rate": 1.1031233531591471e-05, "loss": 0.4943, "step": 2780 }, { "epoch": 1.85, "learning_rate": 1.1025963836551907e-05, "loss": 0.4552, "step": 2781 }, { "epoch": 1.86, "learning_rate": 1.1020693853558815e-05, "loss": 0.4817, "step": 2782 }, { "epoch": 1.86, "learning_rate": 1.1015423584091306e-05, "loss": 0.4796, "step": 2783 }, { "epoch": 1.86, "learning_rate": 1.1010153029628563e-05, "loss": 0.4945, "step": 2784 }, { "epoch": 1.86, "learning_rate": 1.1004882191649857e-05, "loss": 0.4959, "step": 2785 }, { "epoch": 1.86, "learning_rate": 1.099961107163453e-05, "loss": 0.5319, "step": 2786 }, { "epoch": 1.86, "learning_rate": 1.0994339671062012e-05, "loss": 0.5032, "step": 2787 }, { "epoch": 1.86, "learning_rate": 1.0989067991411808e-05, "loss": 0.4865, "step": 2788 }, { "epoch": 1.86, "learning_rate": 1.09837960341635e-05, "loss": 0.4712, "step": 2789 }, { "epoch": 1.86, "learning_rate": 1.0978523800796747e-05, "loss": 0.5002, "step": 2790 }, { "epoch": 1.86, "learning_rate": 1.0973251292791292e-05, "loss": 0.5202, "step": 2791 }, { "epoch": 1.86, "learning_rate": 1.0967978511626947e-05, "loss": 0.4463, "step": 2792 }, { "epoch": 1.86, "learning_rate": 1.0962705458783605e-05, "loss": 0.498, "step": 2793 }, { "epoch": 1.86, "learning_rate": 1.095743213574123e-05, "loss": 0.4906, "step": 2794 }, { "epoch": 1.86, "learning_rate": 1.0952158543979878e-05, "loss": 0.4979, "step": 2795 }, { "epoch": 1.86, "learning_rate": 1.0946884684979659e-05, "loss": 0.483, "step": 2796 }, { "epoch": 1.87, "learning_rate": 1.094161056022077e-05, "loss": 0.4596, "step": 2797 }, { "epoch": 1.87, "learning_rate": 1.0936336171183484e-05, "loss": 0.497, "step": 2798 }, { "epoch": 1.87, "learning_rate": 1.0931061519348139e-05, "loss": 0.5555, "step": 2799 }, { "epoch": 1.87, "learning_rate": 1.0925786606195153e-05, "loss": 0.4815, "step": 2800 }, { "epoch": 1.87, "learning_rate": 1.092051143320502e-05, "loss": 0.4933, "step": 2801 }, { "epoch": 1.87, "learning_rate": 1.09152360018583e-05, "loss": 0.5072, "step": 2802 }, { "epoch": 1.87, "learning_rate": 1.0909960313635632e-05, "loss": 0.4998, "step": 2803 }, { "epoch": 1.87, "learning_rate": 1.0904684370017726e-05, "loss": 0.4853, "step": 2804 }, { "epoch": 1.87, "learning_rate": 1.0899408172485357e-05, "loss": 0.4921, "step": 2805 }, { "epoch": 1.87, "learning_rate": 1.0894131722519376e-05, "loss": 0.4851, "step": 2806 }, { "epoch": 1.87, "learning_rate": 1.0888855021600711e-05, "loss": 0.4909, "step": 2807 }, { "epoch": 1.87, "learning_rate": 1.0883578071210348e-05, "loss": 0.5264, "step": 2808 }, { "epoch": 1.87, "learning_rate": 1.087830087282935e-05, "loss": 0.4648, "step": 2809 }, { "epoch": 1.87, "learning_rate": 1.0873023427938855e-05, "loss": 0.4745, "step": 2810 }, { "epoch": 1.87, "learning_rate": 1.0867745738020058e-05, "loss": 0.4899, "step": 2811 }, { "epoch": 1.88, "learning_rate": 1.0862467804554231e-05, "loss": 0.4364, "step": 2812 }, { "epoch": 1.88, "learning_rate": 1.0857189629022713e-05, "loss": 0.496, "step": 2813 }, { "epoch": 1.88, "learning_rate": 1.0851911212906909e-05, "loss": 0.4562, "step": 2814 }, { "epoch": 1.88, "learning_rate": 1.0846632557688295e-05, "loss": 0.4876, "step": 2815 }, { "epoch": 1.88, "learning_rate": 1.0841353664848406e-05, "loss": 0.4894, "step": 2816 }, { "epoch": 1.88, "learning_rate": 1.0836074535868857e-05, "loss": 0.5028, "step": 2817 }, { "epoch": 1.88, "learning_rate": 1.0830795172231322e-05, "loss": 0.5217, "step": 2818 }, { "epoch": 1.88, "learning_rate": 1.082551557541753e-05, "loss": 0.4886, "step": 2819 }, { "epoch": 1.88, "learning_rate": 1.08202357469093e-05, "loss": 0.485, "step": 2820 }, { "epoch": 1.88, "learning_rate": 1.081495568818849e-05, "loss": 0.4923, "step": 2821 }, { "epoch": 1.88, "learning_rate": 1.0809675400737045e-05, "loss": 0.4748, "step": 2822 }, { "epoch": 1.88, "learning_rate": 1.0804394886036959e-05, "loss": 0.5513, "step": 2823 }, { "epoch": 1.88, "learning_rate": 1.0799114145570298e-05, "loss": 0.4667, "step": 2824 }, { "epoch": 1.88, "learning_rate": 1.0793833180819183e-05, "loss": 0.5125, "step": 2825 }, { "epoch": 1.88, "learning_rate": 1.0788551993265804e-05, "loss": 0.4919, "step": 2826 }, { "epoch": 1.89, "learning_rate": 1.0783270584392418e-05, "loss": 0.4575, "step": 2827 }, { "epoch": 1.89, "learning_rate": 1.0777988955681331e-05, "loss": 0.4957, "step": 2828 }, { "epoch": 1.89, "learning_rate": 1.0772707108614923e-05, "loss": 0.5028, "step": 2829 }, { "epoch": 1.89, "learning_rate": 1.0767425044675634e-05, "loss": 0.5076, "step": 2830 }, { "epoch": 1.89, "learning_rate": 1.0762142765345955e-05, "loss": 0.508, "step": 2831 }, { "epoch": 1.89, "learning_rate": 1.075686027210845e-05, "loss": 0.525, "step": 2832 }, { "epoch": 1.89, "learning_rate": 1.0751577566445732e-05, "loss": 0.497, "step": 2833 }, { "epoch": 1.89, "learning_rate": 1.0746294649840481e-05, "loss": 0.4831, "step": 2834 }, { "epoch": 1.89, "learning_rate": 1.0741011523775433e-05, "loss": 0.4795, "step": 2835 }, { "epoch": 1.89, "learning_rate": 1.0735728189733386e-05, "loss": 0.4814, "step": 2836 }, { "epoch": 1.89, "learning_rate": 1.0730444649197191e-05, "loss": 0.497, "step": 2837 }, { "epoch": 1.89, "learning_rate": 1.0725160903649765e-05, "loss": 0.4884, "step": 2838 }, { "epoch": 1.89, "learning_rate": 1.0719876954574071e-05, "loss": 0.4925, "step": 2839 }, { "epoch": 1.89, "learning_rate": 1.0714592803453138e-05, "loss": 0.5302, "step": 2840 }, { "epoch": 1.9, "learning_rate": 1.0709308451770053e-05, "loss": 0.4754, "step": 2841 }, { "epoch": 1.9, "learning_rate": 1.070402390100795e-05, "loss": 0.4796, "step": 2842 }, { "epoch": 1.9, "learning_rate": 1.0698739152650031e-05, "loss": 0.4843, "step": 2843 }, { "epoch": 1.9, "learning_rate": 1.0693454208179544e-05, "loss": 0.4846, "step": 2844 }, { "epoch": 1.9, "learning_rate": 1.0688169069079793e-05, "loss": 0.4903, "step": 2845 }, { "epoch": 1.9, "learning_rate": 1.068288373683414e-05, "loss": 0.5108, "step": 2846 }, { "epoch": 1.9, "learning_rate": 1.0677598212926001e-05, "loss": 0.5432, "step": 2847 }, { "epoch": 1.9, "learning_rate": 1.0672312498838844e-05, "loss": 0.4934, "step": 2848 }, { "epoch": 1.9, "learning_rate": 1.0667026596056186e-05, "loss": 0.5237, "step": 2849 }, { "epoch": 1.9, "learning_rate": 1.0661740506061616e-05, "loss": 0.5274, "step": 2850 }, { "epoch": 1.9, "learning_rate": 1.065645423033875e-05, "loss": 0.5007, "step": 2851 }, { "epoch": 1.9, "learning_rate": 1.0651167770371267e-05, "loss": 0.5235, "step": 2852 }, { "epoch": 1.9, "learning_rate": 1.0645881127642907e-05, "loss": 0.4926, "step": 2853 }, { "epoch": 1.9, "learning_rate": 1.0640594303637444e-05, "loss": 0.5101, "step": 2854 }, { "epoch": 1.9, "learning_rate": 1.0635307299838715e-05, "loss": 0.4413, "step": 2855 }, { "epoch": 1.91, "learning_rate": 1.0630020117730606e-05, "loss": 0.4903, "step": 2856 }, { "epoch": 1.91, "learning_rate": 1.062473275879705e-05, "loss": 0.4597, "step": 2857 }, { "epoch": 1.91, "learning_rate": 1.0619445224522027e-05, "loss": 0.528, "step": 2858 }, { "epoch": 1.91, "learning_rate": 1.0614157516389579e-05, "loss": 0.4897, "step": 2859 }, { "epoch": 1.91, "learning_rate": 1.0608869635883776e-05, "loss": 0.4912, "step": 2860 }, { "epoch": 1.91, "learning_rate": 1.0603581584488755e-05, "loss": 0.4837, "step": 2861 }, { "epoch": 1.91, "learning_rate": 1.0598293363688694e-05, "loss": 0.5153, "step": 2862 }, { "epoch": 1.91, "learning_rate": 1.0593004974967817e-05, "loss": 0.4801, "step": 2863 }, { "epoch": 1.91, "learning_rate": 1.05877164198104e-05, "loss": 0.508, "step": 2864 }, { "epoch": 1.91, "learning_rate": 1.0582427699700759e-05, "loss": 0.5199, "step": 2865 }, { "epoch": 1.91, "learning_rate": 1.057713881612326e-05, "loss": 0.4843, "step": 2866 }, { "epoch": 1.91, "learning_rate": 1.0571849770562316e-05, "loss": 0.4659, "step": 2867 }, { "epoch": 1.91, "learning_rate": 1.0566560564502384e-05, "loss": 0.5031, "step": 2868 }, { "epoch": 1.91, "learning_rate": 1.0561271199427965e-05, "loss": 0.4719, "step": 2869 }, { "epoch": 1.91, "learning_rate": 1.0555981676823606e-05, "loss": 0.5044, "step": 2870 }, { "epoch": 1.92, "learning_rate": 1.0550691998173897e-05, "loss": 0.4975, "step": 2871 }, { "epoch": 1.92, "learning_rate": 1.0545402164963476e-05, "loss": 0.4609, "step": 2872 }, { "epoch": 1.92, "learning_rate": 1.0540112178677022e-05, "loss": 0.5131, "step": 2873 }, { "epoch": 1.92, "learning_rate": 1.053482204079925e-05, "loss": 0.4446, "step": 2874 }, { "epoch": 1.92, "learning_rate": 1.0529531752814928e-05, "loss": 0.4816, "step": 2875 }, { "epoch": 1.92, "learning_rate": 1.052424131620886e-05, "loss": 0.4872, "step": 2876 }, { "epoch": 1.92, "learning_rate": 1.0518950732465895e-05, "loss": 0.52, "step": 2877 }, { "epoch": 1.92, "learning_rate": 1.0513660003070924e-05, "loss": 0.5158, "step": 2878 }, { "epoch": 1.92, "learning_rate": 1.0508369129508876e-05, "loss": 0.4921, "step": 2879 }, { "epoch": 1.92, "learning_rate": 1.0503078113264715e-05, "loss": 0.4898, "step": 2880 }, { "epoch": 1.92, "learning_rate": 1.0497786955823457e-05, "loss": 0.4705, "step": 2881 }, { "epoch": 1.92, "learning_rate": 1.0492495658670151e-05, "loss": 0.4777, "step": 2882 }, { "epoch": 1.92, "learning_rate": 1.0487204223289882e-05, "loss": 0.502, "step": 2883 }, { "epoch": 1.92, "learning_rate": 1.0481912651167784e-05, "loss": 0.4947, "step": 2884 }, { "epoch": 1.92, "learning_rate": 1.0476620943789021e-05, "loss": 0.5034, "step": 2885 }, { "epoch": 1.93, "learning_rate": 1.0471329102638799e-05, "loss": 0.5059, "step": 2886 }, { "epoch": 1.93, "learning_rate": 1.0466037129202356e-05, "loss": 0.4792, "step": 2887 }, { "epoch": 1.93, "learning_rate": 1.046074502496497e-05, "loss": 0.4651, "step": 2888 }, { "epoch": 1.93, "learning_rate": 1.0455452791411962e-05, "loss": 0.521, "step": 2889 }, { "epoch": 1.93, "learning_rate": 1.0450160430028679e-05, "loss": 0.4928, "step": 2890 }, { "epoch": 1.93, "learning_rate": 1.0444867942300512e-05, "loss": 0.4638, "step": 2891 }, { "epoch": 1.93, "learning_rate": 1.0439575329712883e-05, "loss": 0.4608, "step": 2892 }, { "epoch": 1.93, "learning_rate": 1.043428259375125e-05, "loss": 0.4649, "step": 2893 }, { "epoch": 1.93, "learning_rate": 1.0428989735901104e-05, "loss": 0.4693, "step": 2894 }, { "epoch": 1.93, "learning_rate": 1.0423696757647977e-05, "loss": 0.5068, "step": 2895 }, { "epoch": 1.93, "learning_rate": 1.0418403660477425e-05, "loss": 0.4996, "step": 2896 }, { "epoch": 1.93, "learning_rate": 1.0413110445875046e-05, "loss": 0.4853, "step": 2897 }, { "epoch": 1.93, "learning_rate": 1.0407817115326463e-05, "loss": 0.4898, "step": 2898 }, { "epoch": 1.93, "learning_rate": 1.0402523670317343e-05, "loss": 0.5222, "step": 2899 }, { "epoch": 1.93, "learning_rate": 1.039723011233337e-05, "loss": 0.4907, "step": 2900 }, { "epoch": 1.94, "learning_rate": 1.0391936442860271e-05, "loss": 0.5157, "step": 2901 }, { "epoch": 1.94, "learning_rate": 1.0386642663383802e-05, "loss": 0.4984, "step": 2902 }, { "epoch": 1.94, "learning_rate": 1.0381348775389745e-05, "loss": 0.4725, "step": 2903 }, { "epoch": 1.94, "learning_rate": 1.0376054780363917e-05, "loss": 0.4468, "step": 2904 }, { "epoch": 1.94, "learning_rate": 1.0370760679792173e-05, "loss": 0.4809, "step": 2905 }, { "epoch": 1.94, "learning_rate": 1.0365466475160377e-05, "loss": 0.4874, "step": 2906 }, { "epoch": 1.94, "learning_rate": 1.0360172167954439e-05, "loss": 0.4814, "step": 2907 }, { "epoch": 1.94, "learning_rate": 1.0354877759660296e-05, "loss": 0.4635, "step": 2908 }, { "epoch": 1.94, "learning_rate": 1.0349583251763905e-05, "loss": 0.5117, "step": 2909 }, { "epoch": 1.94, "learning_rate": 1.0344288645751257e-05, "loss": 0.4821, "step": 2910 }, { "epoch": 1.94, "learning_rate": 1.033899394310837e-05, "loss": 0.4777, "step": 2911 }, { "epoch": 1.94, "learning_rate": 1.0333699145321294e-05, "loss": 0.4786, "step": 2912 }, { "epoch": 1.94, "learning_rate": 1.0328404253876096e-05, "loss": 0.4784, "step": 2913 }, { "epoch": 1.94, "learning_rate": 1.0323109270258873e-05, "loss": 0.5113, "step": 2914 }, { "epoch": 1.94, "learning_rate": 1.031781419595575e-05, "loss": 0.4958, "step": 2915 }, { "epoch": 1.95, "learning_rate": 1.0312519032452877e-05, "loss": 0.4805, "step": 2916 }, { "epoch": 1.95, "learning_rate": 1.0307223781236424e-05, "loss": 0.4957, "step": 2917 }, { "epoch": 1.95, "learning_rate": 1.0301928443792598e-05, "loss": 0.4952, "step": 2918 }, { "epoch": 1.95, "learning_rate": 1.0296633021607617e-05, "loss": 0.4686, "step": 2919 }, { "epoch": 1.95, "learning_rate": 1.0291337516167725e-05, "loss": 0.4754, "step": 2920 }, { "epoch": 1.95, "learning_rate": 1.0286041928959197e-05, "loss": 0.4714, "step": 2921 }, { "epoch": 1.95, "learning_rate": 1.0280746261468326e-05, "loss": 0.4981, "step": 2922 }, { "epoch": 1.95, "learning_rate": 1.0275450515181424e-05, "loss": 0.5099, "step": 2923 }, { "epoch": 1.95, "learning_rate": 1.0270154691584833e-05, "loss": 0.5042, "step": 2924 }, { "epoch": 1.95, "learning_rate": 1.0264858792164908e-05, "loss": 0.5074, "step": 2925 }, { "epoch": 1.95, "learning_rate": 1.0259562818408033e-05, "loss": 0.4982, "step": 2926 }, { "epoch": 1.95, "learning_rate": 1.0254266771800609e-05, "loss": 0.5062, "step": 2927 }, { "epoch": 1.95, "learning_rate": 1.0248970653829063e-05, "loss": 0.5191, "step": 2928 }, { "epoch": 1.95, "learning_rate": 1.0243674465979825e-05, "loss": 0.541, "step": 2929 }, { "epoch": 1.95, "learning_rate": 1.0238378209739366e-05, "loss": 0.5299, "step": 2930 }, { "epoch": 1.96, "learning_rate": 1.0233081886594165e-05, "loss": 0.4885, "step": 2931 }, { "epoch": 1.96, "learning_rate": 1.0227785498030722e-05, "loss": 0.484, "step": 2932 }, { "epoch": 1.96, "learning_rate": 1.0222489045535553e-05, "loss": 0.4875, "step": 2933 }, { "epoch": 1.96, "learning_rate": 1.0217192530595196e-05, "loss": 0.5089, "step": 2934 }, { "epoch": 1.96, "learning_rate": 1.0211895954696204e-05, "loss": 0.4779, "step": 2935 }, { "epoch": 1.96, "learning_rate": 1.0206599319325148e-05, "loss": 0.4459, "step": 2936 }, { "epoch": 1.96, "learning_rate": 1.0201302625968616e-05, "loss": 0.5009, "step": 2937 }, { "epoch": 1.96, "learning_rate": 1.0196005876113209e-05, "loss": 0.4415, "step": 2938 }, { "epoch": 1.96, "learning_rate": 1.0190709071245547e-05, "loss": 0.5037, "step": 2939 }, { "epoch": 1.96, "learning_rate": 1.0185412212852268e-05, "loss": 0.4729, "step": 2940 }, { "epoch": 1.96, "learning_rate": 1.0180115302420019e-05, "loss": 0.4909, "step": 2941 }, { "epoch": 1.96, "learning_rate": 1.0174818341435466e-05, "loss": 0.51, "step": 2942 }, { "epoch": 1.96, "learning_rate": 1.0169521331385287e-05, "loss": 0.5205, "step": 2943 }, { "epoch": 1.96, "learning_rate": 1.0164224273756172e-05, "loss": 0.4777, "step": 2944 }, { "epoch": 1.97, "learning_rate": 1.0158927170034831e-05, "loss": 0.4962, "step": 2945 }, { "epoch": 1.97, "learning_rate": 1.0153630021707982e-05, "loss": 0.5115, "step": 2946 }, { "epoch": 1.97, "learning_rate": 1.0148332830262352e-05, "loss": 0.4814, "step": 2947 }, { "epoch": 1.97, "learning_rate": 1.0143035597184691e-05, "loss": 0.4373, "step": 2948 }, { "epoch": 1.97, "learning_rate": 1.013773832396175e-05, "loss": 0.491, "step": 2949 }, { "epoch": 1.97, "learning_rate": 1.0132441012080296e-05, "loss": 0.5256, "step": 2950 }, { "epoch": 1.97, "learning_rate": 1.0127143663027106e-05, "loss": 0.5094, "step": 2951 }, { "epoch": 1.97, "learning_rate": 1.012184627828897e-05, "loss": 0.5509, "step": 2952 }, { "epoch": 1.97, "learning_rate": 1.0116548859352682e-05, "loss": 0.4828, "step": 2953 }, { "epoch": 1.97, "learning_rate": 1.0111251407705052e-05, "loss": 0.502, "step": 2954 }, { "epoch": 1.97, "learning_rate": 1.0105953924832894e-05, "loss": 0.5207, "step": 2955 }, { "epoch": 1.97, "learning_rate": 1.0100656412223035e-05, "loss": 0.518, "step": 2956 }, { "epoch": 1.97, "learning_rate": 1.009535887136231e-05, "loss": 0.4915, "step": 2957 }, { "epoch": 1.97, "learning_rate": 1.0090061303737555e-05, "loss": 0.4972, "step": 2958 }, { "epoch": 1.97, "learning_rate": 1.0084763710835624e-05, "loss": 0.5316, "step": 2959 }, { "epoch": 1.98, "learning_rate": 1.0079466094143373e-05, "loss": 0.4677, "step": 2960 }, { "epoch": 1.98, "learning_rate": 1.0074168455147662e-05, "loss": 0.5158, "step": 2961 }, { "epoch": 1.98, "learning_rate": 1.006887079533536e-05, "loss": 0.5006, "step": 2962 }, { "epoch": 1.98, "learning_rate": 1.0063573116193346e-05, "loss": 0.4925, "step": 2963 }, { "epoch": 1.98, "learning_rate": 1.0058275419208496e-05, "loss": 0.5025, "step": 2964 }, { "epoch": 1.98, "learning_rate": 1.0052977705867697e-05, "loss": 0.5037, "step": 2965 }, { "epoch": 1.98, "learning_rate": 1.0047679977657836e-05, "loss": 0.5, "step": 2966 }, { "epoch": 1.98, "learning_rate": 1.0042382236065814e-05, "loss": 0.5016, "step": 2967 }, { "epoch": 1.98, "learning_rate": 1.0037084482578523e-05, "loss": 0.4967, "step": 2968 }, { "epoch": 1.98, "learning_rate": 1.0031786718682866e-05, "loss": 0.457, "step": 2969 }, { "epoch": 1.98, "learning_rate": 1.0026488945865744e-05, "loss": 0.4925, "step": 2970 }, { "epoch": 1.98, "learning_rate": 1.002119116561407e-05, "loss": 0.4812, "step": 2971 }, { "epoch": 1.98, "learning_rate": 1.0015893379414744e-05, "loss": 0.4598, "step": 2972 }, { "epoch": 1.98, "learning_rate": 1.0010595588754683e-05, "loss": 0.506, "step": 2973 }, { "epoch": 1.98, "learning_rate": 1.00052977951208e-05, "loss": 0.4896, "step": 2974 }, { "epoch": 1.99, "learning_rate": 1e-05, "loss": 0.5029, "step": 2975 }, { "epoch": 1.99, "learning_rate": 9.994702204879203e-06, "loss": 0.4762, "step": 2976 }, { "epoch": 1.99, "learning_rate": 9.989404411245316e-06, "loss": 0.4894, "step": 2977 }, { "epoch": 1.99, "learning_rate": 9.984106620585258e-06, "loss": 0.5094, "step": 2978 }, { "epoch": 1.99, "learning_rate": 9.978808834385936e-06, "loss": 0.4987, "step": 2979 }, { "epoch": 1.99, "learning_rate": 9.973511054134259e-06, "loss": 0.5028, "step": 2980 }, { "epoch": 1.99, "learning_rate": 9.96821328131714e-06, "loss": 0.4956, "step": 2981 }, { "epoch": 1.99, "learning_rate": 9.96291551742148e-06, "loss": 0.5067, "step": 2982 }, { "epoch": 1.99, "learning_rate": 9.957617763934188e-06, "loss": 0.5024, "step": 2983 }, { "epoch": 1.99, "learning_rate": 9.952320022342165e-06, "loss": 0.5173, "step": 2984 }, { "epoch": 1.99, "learning_rate": 9.947022294132306e-06, "loss": 0.4876, "step": 2985 }, { "epoch": 1.99, "learning_rate": 9.941724580791507e-06, "loss": 0.4784, "step": 2986 }, { "epoch": 1.99, "learning_rate": 9.936426883806657e-06, "loss": 0.4727, "step": 2987 }, { "epoch": 1.99, "learning_rate": 9.93112920466464e-06, "loss": 0.48, "step": 2988 }, { "epoch": 1.99, "learning_rate": 9.92583154485234e-06, "loss": 0.4937, "step": 2989 }, { "epoch": 2.0, "learning_rate": 9.920533905856634e-06, "loss": 0.522, "step": 2990 }, { "epoch": 2.0, "learning_rate": 9.915236289164381e-06, "loss": 0.4809, "step": 2991 }, { "epoch": 2.0, "learning_rate": 9.909938696262447e-06, "loss": 0.4693, "step": 2992 }, { "epoch": 2.0, "learning_rate": 9.904641128637693e-06, "loss": 0.4934, "step": 2993 }, { "epoch": 2.0, "learning_rate": 9.899343587776966e-06, "loss": 0.5, "step": 2994 }, { "epoch": 2.0, "learning_rate": 9.894046075167106e-06, "loss": 0.4683, "step": 2995 }, { "epoch": 2.0, "learning_rate": 9.888748592294953e-06, "loss": 0.4824, "step": 2996 }, { "epoch": 2.0, "learning_rate": 9.883451140647323e-06, "loss": 0.49, "step": 2997 }, { "epoch": 2.0, "learning_rate": 9.878153721711034e-06, "loss": 0.5117, "step": 2998 }, { "epoch": 2.0, "learning_rate": 9.872856336972896e-06, "loss": 0.4748, "step": 2999 }, { "epoch": 2.0, "learning_rate": 9.867558987919704e-06, "loss": 0.5105, "step": 3000 }, { "epoch": 2.0, "learning_rate": 9.862261676038254e-06, "loss": 0.4925, "step": 3001 }, { "epoch": 2.0, "learning_rate": 9.856964402815312e-06, "loss": 0.5202, "step": 3002 }, { "epoch": 2.0, "learning_rate": 9.851667169737651e-06, "loss": 0.4646, "step": 3003 }, { "epoch": 2.0, "learning_rate": 9.846369978292022e-06, "loss": 0.4966, "step": 3004 }, { "epoch": 2.01, "learning_rate": 9.841072829965172e-06, "loss": 0.4671, "step": 3005 }, { "epoch": 2.01, "learning_rate": 9.83577572624383e-06, "loss": 0.5169, "step": 3006 }, { "epoch": 2.01, "learning_rate": 9.830478668614718e-06, "loss": 0.4912, "step": 3007 }, { "epoch": 2.01, "learning_rate": 9.825181658564539e-06, "loss": 0.4678, "step": 3008 }, { "epoch": 2.01, "learning_rate": 9.819884697579984e-06, "loss": 0.5064, "step": 3009 }, { "epoch": 2.01, "learning_rate": 9.814587787147735e-06, "loss": 0.48, "step": 3010 }, { "epoch": 2.01, "learning_rate": 9.809290928754454e-06, "loss": 0.501, "step": 3011 }, { "epoch": 2.01, "learning_rate": 9.803994123886793e-06, "loss": 0.4783, "step": 3012 }, { "epoch": 2.01, "learning_rate": 9.798697374031388e-06, "loss": 0.4799, "step": 3013 }, { "epoch": 2.01, "learning_rate": 9.793400680674853e-06, "loss": 0.4619, "step": 3014 }, { "epoch": 2.01, "learning_rate": 9.788104045303797e-06, "loss": 0.4934, "step": 3015 }, { "epoch": 2.01, "learning_rate": 9.782807469404805e-06, "loss": 0.5111, "step": 3016 }, { "epoch": 2.01, "learning_rate": 9.777510954464448e-06, "loss": 0.5035, "step": 3017 }, { "epoch": 2.01, "learning_rate": 9.77221450196928e-06, "loss": 0.5074, "step": 3018 }, { "epoch": 2.01, "learning_rate": 9.766918113405838e-06, "loss": 0.4702, "step": 3019 }, { "epoch": 2.02, "learning_rate": 9.761621790260636e-06, "loss": 0.456, "step": 3020 }, { "epoch": 2.02, "learning_rate": 9.756325534020177e-06, "loss": 0.469, "step": 3021 }, { "epoch": 2.02, "learning_rate": 9.751029346170942e-06, "loss": 0.4753, "step": 3022 }, { "epoch": 2.02, "learning_rate": 9.74573322819939e-06, "loss": 0.4956, "step": 3023 }, { "epoch": 2.0, "learning_rate": 9.740437181591967e-06, "loss": 0.437, "step": 3024 }, { "epoch": 2.0, "learning_rate": 9.735141207835095e-06, "loss": 0.4146, "step": 3025 }, { "epoch": 2.0, "learning_rate": 9.72984530841517e-06, "loss": 0.4427, "step": 3026 }, { "epoch": 2.0, "learning_rate": 9.724549484818578e-06, "loss": 0.4721, "step": 3027 }, { "epoch": 2.0, "learning_rate": 9.719253738531676e-06, "loss": 0.439, "step": 3028 }, { "epoch": 2.0, "learning_rate": 9.713958071040803e-06, "loss": 0.4536, "step": 3029 }, { "epoch": 2.0, "learning_rate": 9.708662483832279e-06, "loss": 0.4351, "step": 3030 }, { "epoch": 2.01, "learning_rate": 9.703366978392388e-06, "loss": 0.4254, "step": 3031 }, { "epoch": 2.01, "learning_rate": 9.698071556207407e-06, "loss": 0.4023, "step": 3032 }, { "epoch": 2.01, "learning_rate": 9.69277621876358e-06, "loss": 0.4288, "step": 3033 }, { "epoch": 2.01, "learning_rate": 9.687480967547127e-06, "loss": 0.4396, "step": 3034 }, { "epoch": 2.01, "learning_rate": 9.682185804044252e-06, "loss": 0.4131, "step": 3035 }, { "epoch": 2.01, "learning_rate": 9.676890729741134e-06, "loss": 0.4208, "step": 3036 }, { "epoch": 2.01, "learning_rate": 9.67159574612391e-06, "loss": 0.4434, "step": 3037 }, { "epoch": 2.01, "learning_rate": 9.66630085467871e-06, "loss": 0.4389, "step": 3038 }, { "epoch": 2.01, "learning_rate": 9.661006056891631e-06, "loss": 0.4429, "step": 3039 }, { "epoch": 2.01, "learning_rate": 9.655711354248747e-06, "loss": 0.4069, "step": 3040 }, { "epoch": 2.01, "learning_rate": 9.650416748236099e-06, "loss": 0.4562, "step": 3041 }, { "epoch": 2.01, "learning_rate": 9.645122240339709e-06, "loss": 0.4161, "step": 3042 }, { "epoch": 2.01, "learning_rate": 9.639827832045564e-06, "loss": 0.4611, "step": 3043 }, { "epoch": 2.01, "learning_rate": 9.634533524839626e-06, "loss": 0.4519, "step": 3044 }, { "epoch": 2.01, "learning_rate": 9.62923932020783e-06, "loss": 0.4385, "step": 3045 }, { "epoch": 2.02, "learning_rate": 9.623945219636081e-06, "loss": 0.4605, "step": 3046 }, { "epoch": 2.02, "learning_rate": 9.618651224610257e-06, "loss": 0.4065, "step": 3047 }, { "epoch": 2.02, "learning_rate": 9.613357336616203e-06, "loss": 0.4553, "step": 3048 }, { "epoch": 2.02, "learning_rate": 9.608063557139732e-06, "loss": 0.4096, "step": 3049 }, { "epoch": 2.02, "learning_rate": 9.602769887666633e-06, "loss": 0.4256, "step": 3050 }, { "epoch": 2.02, "learning_rate": 9.59747632968266e-06, "loss": 0.4498, "step": 3051 }, { "epoch": 2.02, "learning_rate": 9.592182884673536e-06, "loss": 0.4204, "step": 3052 }, { "epoch": 2.02, "learning_rate": 9.586889554124957e-06, "loss": 0.4516, "step": 3053 }, { "epoch": 2.02, "learning_rate": 9.581596339522576e-06, "loss": 0.4111, "step": 3054 }, { "epoch": 2.02, "learning_rate": 9.576303242352025e-06, "loss": 0.4505, "step": 3055 }, { "epoch": 2.02, "learning_rate": 9.571010264098897e-06, "loss": 0.4205, "step": 3056 }, { "epoch": 2.02, "learning_rate": 9.565717406248752e-06, "loss": 0.417, "step": 3057 }, { "epoch": 2.02, "learning_rate": 9.560424670287119e-06, "loss": 0.441, "step": 3058 }, { "epoch": 2.02, "learning_rate": 9.555132057699493e-06, "loss": 0.41, "step": 3059 }, { "epoch": 2.02, "learning_rate": 9.549839569971323e-06, "loss": 0.4201, "step": 3060 }, { "epoch": 2.03, "learning_rate": 9.54454720858804e-06, "loss": 0.403, "step": 3061 }, { "epoch": 2.03, "learning_rate": 9.539254975035031e-06, "loss": 0.3843, "step": 3062 }, { "epoch": 2.03, "learning_rate": 9.533962870797646e-06, "loss": 0.413, "step": 3063 }, { "epoch": 2.03, "learning_rate": 9.528670897361203e-06, "loss": 0.4564, "step": 3064 }, { "epoch": 2.03, "learning_rate": 9.523379056210982e-06, "loss": 0.4398, "step": 3065 }, { "epoch": 2.03, "learning_rate": 9.518087348832219e-06, "loss": 0.4408, "step": 3066 }, { "epoch": 2.03, "learning_rate": 9.512795776710122e-06, "loss": 0.4469, "step": 3067 }, { "epoch": 2.03, "learning_rate": 9.507504341329852e-06, "loss": 0.4652, "step": 3068 }, { "epoch": 2.03, "learning_rate": 9.502213044176545e-06, "loss": 0.4121, "step": 3069 }, { "epoch": 2.03, "learning_rate": 9.496921886735287e-06, "loss": 0.4466, "step": 3070 }, { "epoch": 2.03, "learning_rate": 9.491630870491131e-06, "loss": 0.4442, "step": 3071 }, { "epoch": 2.03, "learning_rate": 9.486339996929079e-06, "loss": 0.4356, "step": 3072 }, { "epoch": 2.03, "learning_rate": 9.481049267534106e-06, "loss": 0.442, "step": 3073 }, { "epoch": 2.03, "learning_rate": 9.475758683791142e-06, "loss": 0.4661, "step": 3074 }, { "epoch": 2.03, "learning_rate": 9.470468247185076e-06, "loss": 0.4441, "step": 3075 }, { "epoch": 2.04, "learning_rate": 9.465177959200756e-06, "loss": 0.4245, "step": 3076 }, { "epoch": 2.04, "learning_rate": 9.459887821322983e-06, "loss": 0.4399, "step": 3077 }, { "epoch": 2.04, "learning_rate": 9.454597835036527e-06, "loss": 0.4381, "step": 3078 }, { "epoch": 2.04, "learning_rate": 9.449308001826104e-06, "loss": 0.4601, "step": 3079 }, { "epoch": 2.04, "learning_rate": 9.444018323176399e-06, "loss": 0.4666, "step": 3080 }, { "epoch": 2.04, "learning_rate": 9.43872880057204e-06, "loss": 0.4482, "step": 3081 }, { "epoch": 2.04, "learning_rate": 9.433439435497621e-06, "loss": 0.4147, "step": 3082 }, { "epoch": 2.04, "learning_rate": 9.428150229437689e-06, "loss": 0.4019, "step": 3083 }, { "epoch": 2.04, "learning_rate": 9.422861183876742e-06, "loss": 0.4396, "step": 3084 }, { "epoch": 2.04, "learning_rate": 9.417572300299244e-06, "loss": 0.4445, "step": 3085 }, { "epoch": 2.04, "learning_rate": 9.412283580189601e-06, "loss": 0.4139, "step": 3086 }, { "epoch": 2.04, "learning_rate": 9.406995025032183e-06, "loss": 0.4168, "step": 3087 }, { "epoch": 2.04, "learning_rate": 9.40170663631131e-06, "loss": 0.4385, "step": 3088 }, { "epoch": 2.04, "learning_rate": 9.396418415511248e-06, "loss": 0.4235, "step": 3089 }, { "epoch": 2.04, "learning_rate": 9.391130364116226e-06, "loss": 0.4682, "step": 3090 }, { "epoch": 2.05, "learning_rate": 9.385842483610426e-06, "loss": 0.4274, "step": 3091 }, { "epoch": 2.05, "learning_rate": 9.380554775477974e-06, "loss": 0.4169, "step": 3092 }, { "epoch": 2.05, "learning_rate": 9.375267241202952e-06, "loss": 0.3905, "step": 3093 }, { "epoch": 2.05, "learning_rate": 9.369979882269397e-06, "loss": 0.4587, "step": 3094 }, { "epoch": 2.05, "learning_rate": 9.364692700161287e-06, "loss": 0.4684, "step": 3095 }, { "epoch": 2.05, "learning_rate": 9.35940569636256e-06, "loss": 0.4058, "step": 3096 }, { "epoch": 2.05, "learning_rate": 9.354118872357096e-06, "loss": 0.4391, "step": 3097 }, { "epoch": 2.05, "learning_rate": 9.348832229628733e-06, "loss": 0.4388, "step": 3098 }, { "epoch": 2.05, "learning_rate": 9.343545769661252e-06, "loss": 0.4554, "step": 3099 }, { "epoch": 2.05, "learning_rate": 9.33825949393839e-06, "loss": 0.412, "step": 3100 }, { "epoch": 2.05, "learning_rate": 9.332973403943815e-06, "loss": 0.448, "step": 3101 }, { "epoch": 2.05, "learning_rate": 9.327687501161158e-06, "loss": 0.4424, "step": 3102 }, { "epoch": 2.05, "learning_rate": 9.322401787074e-06, "loss": 0.4538, "step": 3103 }, { "epoch": 2.05, "learning_rate": 9.317116263165862e-06, "loss": 0.4263, "step": 3104 }, { "epoch": 2.06, "learning_rate": 9.311830930920214e-06, "loss": 0.4219, "step": 3105 }, { "epoch": 2.06, "learning_rate": 9.306545791820461e-06, "loss": 0.4664, "step": 3106 }, { "epoch": 2.06, "learning_rate": 9.301260847349974e-06, "loss": 0.4363, "step": 3107 }, { "epoch": 2.06, "learning_rate": 9.295976098992053e-06, "loss": 0.4299, "step": 3108 }, { "epoch": 2.06, "learning_rate": 9.29069154822995e-06, "loss": 0.4489, "step": 3109 }, { "epoch": 2.06, "learning_rate": 9.285407196546862e-06, "loss": 0.4411, "step": 3110 }, { "epoch": 2.06, "learning_rate": 9.280123045425936e-06, "loss": 0.4333, "step": 3111 }, { "epoch": 2.06, "learning_rate": 9.274839096350241e-06, "loss": 0.4369, "step": 3112 }, { "epoch": 2.06, "learning_rate": 9.269555350802812e-06, "loss": 0.435, "step": 3113 }, { "epoch": 2.06, "learning_rate": 9.264271810266618e-06, "loss": 0.4532, "step": 3114 }, { "epoch": 2.06, "learning_rate": 9.25898847622457e-06, "loss": 0.4396, "step": 3115 }, { "epoch": 2.06, "learning_rate": 9.253705350159522e-06, "loss": 0.4343, "step": 3116 }, { "epoch": 2.06, "learning_rate": 9.248422433554273e-06, "loss": 0.4988, "step": 3117 }, { "epoch": 2.06, "learning_rate": 9.243139727891554e-06, "loss": 0.4265, "step": 3118 }, { "epoch": 2.06, "learning_rate": 9.237857234654048e-06, "loss": 0.4469, "step": 3119 }, { "epoch": 2.07, "learning_rate": 9.232574955324369e-06, "loss": 0.4516, "step": 3120 }, { "epoch": 2.07, "learning_rate": 9.227292891385078e-06, "loss": 0.4299, "step": 3121 }, { "epoch": 2.07, "learning_rate": 9.22201104431867e-06, "loss": 0.4532, "step": 3122 }, { "epoch": 2.07, "learning_rate": 9.216729415607588e-06, "loss": 0.4405, "step": 3123 }, { "epoch": 2.07, "learning_rate": 9.211448006734199e-06, "loss": 0.4121, "step": 3124 }, { "epoch": 2.07, "learning_rate": 9.206166819180822e-06, "loss": 0.4103, "step": 3125 }, { "epoch": 2.07, "learning_rate": 9.200885854429706e-06, "loss": 0.4354, "step": 3126 }, { "epoch": 2.07, "learning_rate": 9.195605113963042e-06, "loss": 0.4422, "step": 3127 }, { "epoch": 2.07, "learning_rate": 9.190324599262957e-06, "loss": 0.4214, "step": 3128 }, { "epoch": 2.07, "learning_rate": 9.185044311811511e-06, "loss": 0.4335, "step": 3129 }, { "epoch": 2.07, "learning_rate": 9.179764253090703e-06, "loss": 0.4208, "step": 3130 }, { "epoch": 2.07, "learning_rate": 9.174484424582471e-06, "loss": 0.4444, "step": 3131 }, { "epoch": 2.07, "learning_rate": 9.169204827768683e-06, "loss": 0.4358, "step": 3132 }, { "epoch": 2.07, "learning_rate": 9.163925464131143e-06, "loss": 0.4224, "step": 3133 }, { "epoch": 2.07, "learning_rate": 9.158646335151598e-06, "loss": 0.4395, "step": 3134 }, { "epoch": 2.08, "learning_rate": 9.153367442311712e-06, "loss": 0.4383, "step": 3135 }, { "epoch": 2.08, "learning_rate": 9.148088787093093e-06, "loss": 0.4676, "step": 3136 }, { "epoch": 2.08, "learning_rate": 9.142810370977289e-06, "loss": 0.4364, "step": 3137 }, { "epoch": 2.08, "learning_rate": 9.137532195445769e-06, "loss": 0.4792, "step": 3138 }, { "epoch": 2.08, "learning_rate": 9.132254261979943e-06, "loss": 0.4508, "step": 3139 }, { "epoch": 2.08, "learning_rate": 9.12697657206115e-06, "loss": 0.45, "step": 3140 }, { "epoch": 2.08, "learning_rate": 9.121699127170652e-06, "loss": 0.4413, "step": 3141 }, { "epoch": 2.08, "learning_rate": 9.116421928789655e-06, "loss": 0.4299, "step": 3142 }, { "epoch": 2.08, "learning_rate": 9.111144978399292e-06, "loss": 0.4424, "step": 3143 }, { "epoch": 2.08, "learning_rate": 9.105868277480622e-06, "loss": 0.4621, "step": 3144 }, { "epoch": 2.08, "learning_rate": 9.100591827514643e-06, "loss": 0.445, "step": 3145 }, { "epoch": 2.08, "learning_rate": 9.09531562998228e-06, "loss": 0.4271, "step": 3146 }, { "epoch": 2.08, "learning_rate": 9.09003968636437e-06, "loss": 0.4619, "step": 3147 }, { "epoch": 2.08, "learning_rate": 9.084763998141703e-06, "loss": 0.4598, "step": 3148 }, { "epoch": 2.08, "learning_rate": 9.079488566794984e-06, "loss": 0.4174, "step": 3149 }, { "epoch": 2.09, "learning_rate": 9.07421339380485e-06, "loss": 0.4865, "step": 3150 }, { "epoch": 2.09, "learning_rate": 9.068938480651868e-06, "loss": 0.4655, "step": 3151 }, { "epoch": 2.09, "learning_rate": 9.063663828816523e-06, "loss": 0.4233, "step": 3152 }, { "epoch": 2.09, "learning_rate": 9.058389439779233e-06, "loss": 0.4591, "step": 3153 }, { "epoch": 2.09, "learning_rate": 9.053115315020344e-06, "loss": 0.4367, "step": 3154 }, { "epoch": 2.09, "learning_rate": 9.047841456020125e-06, "loss": 0.4348, "step": 3155 }, { "epoch": 2.09, "learning_rate": 9.042567864258768e-06, "loss": 0.4011, "step": 3156 }, { "epoch": 2.09, "learning_rate": 9.0372945412164e-06, "loss": 0.4606, "step": 3157 }, { "epoch": 2.09, "learning_rate": 9.032021488373058e-06, "loss": 0.3948, "step": 3158 }, { "epoch": 2.09, "learning_rate": 9.026748707208712e-06, "loss": 0.492, "step": 3159 }, { "epoch": 2.09, "learning_rate": 9.021476199203255e-06, "loss": 0.4212, "step": 3160 }, { "epoch": 2.09, "learning_rate": 9.016203965836503e-06, "loss": 0.4398, "step": 3161 }, { "epoch": 2.09, "learning_rate": 9.010932008588194e-06, "loss": 0.4554, "step": 3162 }, { "epoch": 2.09, "learning_rate": 9.00566032893799e-06, "loss": 0.4753, "step": 3163 }, { "epoch": 2.09, "learning_rate": 9.000388928365473e-06, "loss": 0.4076, "step": 3164 }, { "epoch": 2.1, "learning_rate": 8.995117808350146e-06, "loss": 0.4389, "step": 3165 }, { "epoch": 2.1, "learning_rate": 8.989846970371438e-06, "loss": 0.4405, "step": 3166 }, { "epoch": 2.1, "learning_rate": 8.984576415908696e-06, "loss": 0.4117, "step": 3167 }, { "epoch": 2.1, "learning_rate": 8.979306146441185e-06, "loss": 0.4344, "step": 3168 }, { "epoch": 2.1, "learning_rate": 8.974036163448098e-06, "loss": 0.4781, "step": 3169 }, { "epoch": 2.1, "learning_rate": 8.968766468408532e-06, "loss": 0.4679, "step": 3170 }, { "epoch": 2.1, "learning_rate": 8.96349706280152e-06, "loss": 0.4317, "step": 3171 }, { "epoch": 2.1, "learning_rate": 8.958227948106005e-06, "loss": 0.4041, "step": 3172 }, { "epoch": 2.1, "learning_rate": 8.95295912580085e-06, "loss": 0.4189, "step": 3173 }, { "epoch": 2.1, "learning_rate": 8.947690597364836e-06, "loss": 0.4271, "step": 3174 }, { "epoch": 2.1, "learning_rate": 8.942422364276668e-06, "loss": 0.4367, "step": 3175 }, { "epoch": 2.1, "learning_rate": 8.937154428014951e-06, "loss": 0.4442, "step": 3176 }, { "epoch": 2.1, "learning_rate": 8.931886790058223e-06, "loss": 0.4431, "step": 3177 }, { "epoch": 2.1, "learning_rate": 8.92661945188493e-06, "loss": 0.469, "step": 3178 }, { "epoch": 2.1, "learning_rate": 8.921352414973441e-06, "loss": 0.4279, "step": 3179 }, { "epoch": 2.11, "learning_rate": 8.916085680802038e-06, "loss": 0.4393, "step": 3180 }, { "epoch": 2.11, "learning_rate": 8.910819250848907e-06, "loss": 0.4693, "step": 3181 }, { "epoch": 2.11, "learning_rate": 8.905553126592164e-06, "loss": 0.4108, "step": 3182 }, { "epoch": 2.11, "learning_rate": 8.900287309509831e-06, "loss": 0.4587, "step": 3183 }, { "epoch": 2.11, "learning_rate": 8.895021801079846e-06, "loss": 0.4543, "step": 3184 }, { "epoch": 2.11, "learning_rate": 8.889756602780059e-06, "loss": 0.4242, "step": 3185 }, { "epoch": 2.11, "learning_rate": 8.884491716088238e-06, "loss": 0.4155, "step": 3186 }, { "epoch": 2.11, "learning_rate": 8.879227142482055e-06, "loss": 0.433, "step": 3187 }, { "epoch": 2.11, "learning_rate": 8.8739628834391e-06, "loss": 0.4293, "step": 3188 }, { "epoch": 2.11, "learning_rate": 8.868698940436874e-06, "loss": 0.4508, "step": 3189 }, { "epoch": 2.11, "learning_rate": 8.863435314952787e-06, "loss": 0.4317, "step": 3190 }, { "epoch": 2.11, "learning_rate": 8.858172008464164e-06, "loss": 0.4433, "step": 3191 }, { "epoch": 2.11, "learning_rate": 8.852909022448239e-06, "loss": 0.4192, "step": 3192 }, { "epoch": 2.11, "learning_rate": 8.847646358382153e-06, "loss": 0.4316, "step": 3193 }, { "epoch": 2.11, "learning_rate": 8.842384017742956e-06, "loss": 0.4411, "step": 3194 }, { "epoch": 2.12, "learning_rate": 8.837122002007614e-06, "loss": 0.412, "step": 3195 }, { "epoch": 2.12, "learning_rate": 8.831860312652995e-06, "loss": 0.4164, "step": 3196 }, { "epoch": 2.12, "learning_rate": 8.82659895115588e-06, "loss": 0.4399, "step": 3197 }, { "epoch": 2.12, "learning_rate": 8.821337918992961e-06, "loss": 0.4652, "step": 3198 }, { "epoch": 2.12, "learning_rate": 8.816077217640822e-06, "loss": 0.4325, "step": 3199 }, { "epoch": 2.12, "learning_rate": 8.810816848575971e-06, "loss": 0.4143, "step": 3200 }, { "epoch": 2.12, "learning_rate": 8.805556813274817e-06, "loss": 0.4277, "step": 3201 }, { "epoch": 2.12, "learning_rate": 8.800297113213673e-06, "loss": 0.4151, "step": 3202 }, { "epoch": 2.12, "learning_rate": 8.795037749868764e-06, "loss": 0.4328, "step": 3203 }, { "epoch": 2.12, "learning_rate": 8.789778724716209e-06, "loss": 0.4202, "step": 3204 }, { "epoch": 2.12, "learning_rate": 8.784520039232044e-06, "loss": 0.4346, "step": 3205 }, { "epoch": 2.12, "learning_rate": 8.779261694892205e-06, "loss": 0.4752, "step": 3206 }, { "epoch": 2.12, "learning_rate": 8.77400369317253e-06, "loss": 0.4587, "step": 3207 }, { "epoch": 2.12, "learning_rate": 8.768746035548767e-06, "loss": 0.4165, "step": 3208 }, { "epoch": 2.13, "learning_rate": 8.763488723496565e-06, "loss": 0.4291, "step": 3209 }, { "epoch": 2.13, "learning_rate": 8.758231758491467e-06, "loss": 0.4129, "step": 3210 }, { "epoch": 2.13, "learning_rate": 8.752975142008928e-06, "loss": 0.4158, "step": 3211 }, { "epoch": 2.13, "learning_rate": 8.747718875524307e-06, "loss": 0.4357, "step": 3212 }, { "epoch": 2.13, "learning_rate": 8.74246296051286e-06, "loss": 0.4941, "step": 3213 }, { "epoch": 2.13, "learning_rate": 8.737207398449746e-06, "loss": 0.4942, "step": 3214 }, { "epoch": 2.13, "learning_rate": 8.731952190810029e-06, "loss": 0.4383, "step": 3215 }, { "epoch": 2.13, "learning_rate": 8.726697339068657e-06, "loss": 0.4342, "step": 3216 }, { "epoch": 2.13, "learning_rate": 8.721442844700499e-06, "loss": 0.4577, "step": 3217 }, { "epoch": 2.13, "learning_rate": 8.71618870918031e-06, "loss": 0.4291, "step": 3218 }, { "epoch": 2.13, "learning_rate": 8.71093493398275e-06, "loss": 0.4078, "step": 3219 }, { "epoch": 2.13, "learning_rate": 8.705681520582382e-06, "loss": 0.4073, "step": 3220 }, { "epoch": 2.13, "learning_rate": 8.700428470453663e-06, "loss": 0.4608, "step": 3221 }, { "epoch": 2.13, "learning_rate": 8.695175785070938e-06, "loss": 0.413, "step": 3222 }, { "epoch": 2.13, "learning_rate": 8.689923465908464e-06, "loss": 0.4612, "step": 3223 }, { "epoch": 2.14, "learning_rate": 8.684671514440391e-06, "loss": 0.4355, "step": 3224 }, { "epoch": 2.14, "learning_rate": 8.679419932140765e-06, "loss": 0.4477, "step": 3225 }, { "epoch": 2.14, "learning_rate": 8.67416872048353e-06, "loss": 0.4294, "step": 3226 }, { "epoch": 2.14, "learning_rate": 8.66891788094252e-06, "loss": 0.4199, "step": 3227 }, { "epoch": 2.14, "learning_rate": 8.66366741499147e-06, "loss": 0.4324, "step": 3228 }, { "epoch": 2.14, "learning_rate": 8.658417324104011e-06, "loss": 0.421, "step": 3229 }, { "epoch": 2.14, "learning_rate": 8.653167609753667e-06, "loss": 0.4279, "step": 3230 }, { "epoch": 2.14, "learning_rate": 8.647918273413856e-06, "loss": 0.4307, "step": 3231 }, { "epoch": 2.14, "learning_rate": 8.642669316557893e-06, "loss": 0.4243, "step": 3232 }, { "epoch": 2.14, "learning_rate": 8.637420740658976e-06, "loss": 0.4207, "step": 3233 }, { "epoch": 2.14, "learning_rate": 8.632172547190208e-06, "loss": 0.4335, "step": 3234 }, { "epoch": 2.14, "learning_rate": 8.62692473762458e-06, "loss": 0.4559, "step": 3235 }, { "epoch": 2.14, "learning_rate": 8.621677313434977e-06, "loss": 0.469, "step": 3236 }, { "epoch": 2.14, "learning_rate": 8.616430276094172e-06, "loss": 0.4419, "step": 3237 }, { "epoch": 2.14, "learning_rate": 8.611183627074835e-06, "loss": 0.4237, "step": 3238 }, { "epoch": 2.15, "learning_rate": 8.60593736784952e-06, "loss": 0.4596, "step": 3239 }, { "epoch": 2.15, "learning_rate": 8.600691499890677e-06, "loss": 0.4473, "step": 3240 }, { "epoch": 2.15, "learning_rate": 8.595446024670644e-06, "loss": 0.4466, "step": 3241 }, { "epoch": 2.15, "learning_rate": 8.590200943661652e-06, "loss": 0.4342, "step": 3242 }, { "epoch": 2.15, "learning_rate": 8.584956258335816e-06, "loss": 0.4702, "step": 3243 }, { "epoch": 2.15, "learning_rate": 8.579711970165148e-06, "loss": 0.4397, "step": 3244 }, { "epoch": 2.15, "learning_rate": 8.574468080621533e-06, "loss": 0.4034, "step": 3245 }, { "epoch": 2.15, "learning_rate": 8.56922459117676e-06, "loss": 0.4269, "step": 3246 }, { "epoch": 2.15, "learning_rate": 8.563981503302503e-06, "loss": 0.4427, "step": 3247 }, { "epoch": 2.15, "learning_rate": 8.558738818470317e-06, "loss": 0.4177, "step": 3248 }, { "epoch": 2.15, "learning_rate": 8.553496538151647e-06, "loss": 0.4476, "step": 3249 }, { "epoch": 2.15, "learning_rate": 8.54825466381783e-06, "loss": 0.4454, "step": 3250 }, { "epoch": 2.15, "learning_rate": 8.543013196940075e-06, "loss": 0.4845, "step": 3251 }, { "epoch": 2.15, "learning_rate": 8.53777213898949e-06, "loss": 0.4478, "step": 3252 }, { "epoch": 2.15, "learning_rate": 8.532531491437062e-06, "loss": 0.4501, "step": 3253 }, { "epoch": 2.16, "learning_rate": 8.527291255753666e-06, "loss": 0.4996, "step": 3254 }, { "epoch": 2.16, "learning_rate": 8.522051433410064e-06, "loss": 0.4275, "step": 3255 }, { "epoch": 2.16, "learning_rate": 8.516812025876891e-06, "loss": 0.3928, "step": 3256 }, { "epoch": 2.16, "learning_rate": 8.511573034624673e-06, "loss": 0.4554, "step": 3257 }, { "epoch": 2.16, "learning_rate": 8.50633446112382e-06, "loss": 0.4638, "step": 3258 }, { "epoch": 2.16, "learning_rate": 8.501096306844624e-06, "loss": 0.421, "step": 3259 }, { "epoch": 2.16, "learning_rate": 8.495858573257258e-06, "loss": 0.4767, "step": 3260 }, { "epoch": 2.16, "learning_rate": 8.490621261831781e-06, "loss": 0.4308, "step": 3261 }, { "epoch": 2.16, "learning_rate": 8.485384374038124e-06, "loss": 0.4348, "step": 3262 }, { "epoch": 2.16, "learning_rate": 8.480147911346108e-06, "loss": 0.4315, "step": 3263 }, { "epoch": 2.16, "learning_rate": 8.474911875225432e-06, "loss": 0.4272, "step": 3264 }, { "epoch": 2.16, "learning_rate": 8.469676267145674e-06, "loss": 0.4492, "step": 3265 }, { "epoch": 2.16, "learning_rate": 8.464441088576296e-06, "loss": 0.4666, "step": 3266 }, { "epoch": 2.16, "learning_rate": 8.459206340986637e-06, "loss": 0.4391, "step": 3267 }, { "epoch": 2.16, "learning_rate": 8.453972025845908e-06, "loss": 0.4728, "step": 3268 }, { "epoch": 2.17, "learning_rate": 8.448738144623212e-06, "loss": 0.4391, "step": 3269 }, { "epoch": 2.17, "learning_rate": 8.443504698787517e-06, "loss": 0.4268, "step": 3270 }, { "epoch": 2.17, "learning_rate": 8.438271689807682e-06, "loss": 0.4355, "step": 3271 }, { "epoch": 2.17, "learning_rate": 8.43303911915243e-06, "loss": 0.4216, "step": 3272 }, { "epoch": 2.17, "learning_rate": 8.427806988290374e-06, "loss": 0.44, "step": 3273 }, { "epoch": 2.17, "learning_rate": 8.422575298689993e-06, "loss": 0.4194, "step": 3274 }, { "epoch": 2.17, "learning_rate": 8.417344051819646e-06, "loss": 0.4382, "step": 3275 }, { "epoch": 2.17, "learning_rate": 8.412113249147571e-06, "loss": 0.4844, "step": 3276 }, { "epoch": 2.17, "learning_rate": 8.406882892141875e-06, "loss": 0.4389, "step": 3277 }, { "epoch": 2.17, "learning_rate": 8.401652982270548e-06, "loss": 0.4001, "step": 3278 }, { "epoch": 2.17, "learning_rate": 8.39642352100144e-06, "loss": 0.4499, "step": 3279 }, { "epoch": 2.17, "learning_rate": 8.391194509802294e-06, "loss": 0.4405, "step": 3280 }, { "epoch": 2.17, "learning_rate": 8.385965950140714e-06, "loss": 0.4058, "step": 3281 }, { "epoch": 2.17, "learning_rate": 8.380737843484181e-06, "loss": 0.4037, "step": 3282 }, { "epoch": 2.17, "learning_rate": 8.37551019130005e-06, "loss": 0.4304, "step": 3283 }, { "epoch": 2.18, "learning_rate": 8.37028299505555e-06, "loss": 0.4666, "step": 3284 }, { "epoch": 2.18, "learning_rate": 8.365056256217772e-06, "loss": 0.4508, "step": 3285 }, { "epoch": 2.18, "learning_rate": 8.359829976253687e-06, "loss": 0.4296, "step": 3286 }, { "epoch": 2.18, "learning_rate": 8.354604156630136e-06, "loss": 0.4795, "step": 3287 }, { "epoch": 2.18, "learning_rate": 8.349378798813835e-06, "loss": 0.4067, "step": 3288 }, { "epoch": 2.18, "learning_rate": 8.344153904271363e-06, "loss": 0.438, "step": 3289 }, { "epoch": 2.18, "learning_rate": 8.338929474469177e-06, "loss": 0.4478, "step": 3290 }, { "epoch": 2.18, "learning_rate": 8.33370551087359e-06, "loss": 0.4555, "step": 3291 }, { "epoch": 2.18, "learning_rate": 8.328482014950798e-06, "loss": 0.4411, "step": 3292 }, { "epoch": 2.18, "learning_rate": 8.32325898816686e-06, "loss": 0.4222, "step": 3293 }, { "epoch": 2.18, "learning_rate": 8.318036431987703e-06, "loss": 0.4431, "step": 3294 }, { "epoch": 2.18, "learning_rate": 8.312814347879121e-06, "loss": 0.4481, "step": 3295 }, { "epoch": 2.18, "learning_rate": 8.307592737306786e-06, "loss": 0.4643, "step": 3296 }, { "epoch": 2.18, "learning_rate": 8.302371601736218e-06, "loss": 0.4421, "step": 3297 }, { "epoch": 2.19, "learning_rate": 8.297150942632818e-06, "loss": 0.4551, "step": 3298 }, { "epoch": 2.19, "learning_rate": 8.29193076146185e-06, "loss": 0.4231, "step": 3299 }, { "epoch": 2.19, "learning_rate": 8.286711059688441e-06, "loss": 0.4674, "step": 3300 }, { "epoch": 2.19, "learning_rate": 8.28149183877759e-06, "loss": 0.4272, "step": 3301 }, { "epoch": 2.19, "learning_rate": 8.276273100194154e-06, "loss": 0.4478, "step": 3302 }, { "epoch": 2.19, "learning_rate": 8.271054845402855e-06, "loss": 0.4773, "step": 3303 }, { "epoch": 2.19, "learning_rate": 8.265837075868283e-06, "loss": 0.4381, "step": 3304 }, { "epoch": 2.19, "learning_rate": 8.260619793054894e-06, "loss": 0.4767, "step": 3305 }, { "epoch": 2.19, "learning_rate": 8.255402998427e-06, "loss": 0.4362, "step": 3306 }, { "epoch": 2.19, "learning_rate": 8.250186693448782e-06, "loss": 0.4714, "step": 3307 }, { "epoch": 2.19, "learning_rate": 8.244970879584277e-06, "loss": 0.4361, "step": 3308 }, { "epoch": 2.19, "learning_rate": 8.239755558297392e-06, "loss": 0.4301, "step": 3309 }, { "epoch": 2.19, "learning_rate": 8.234540731051892e-06, "loss": 0.4509, "step": 3310 }, { "epoch": 2.19, "learning_rate": 8.229326399311403e-06, "loss": 0.4405, "step": 3311 }, { "epoch": 2.19, "learning_rate": 8.224112564539413e-06, "loss": 0.4602, "step": 3312 }, { "epoch": 2.2, "learning_rate": 8.21889922819927e-06, "loss": 0.464, "step": 3313 }, { "epoch": 2.2, "learning_rate": 8.21368639175418e-06, "loss": 0.4464, "step": 3314 }, { "epoch": 2.2, "learning_rate": 8.208474056667212e-06, "loss": 0.4818, "step": 3315 }, { "epoch": 2.2, "learning_rate": 8.203262224401295e-06, "loss": 0.4171, "step": 3316 }, { "epoch": 2.2, "learning_rate": 8.198050896419214e-06, "loss": 0.4621, "step": 3317 }, { "epoch": 2.2, "learning_rate": 8.19284007418361e-06, "loss": 0.4563, "step": 3318 }, { "epoch": 2.2, "learning_rate": 8.187629759156994e-06, "loss": 0.4325, "step": 3319 }, { "epoch": 2.2, "learning_rate": 8.182419952801716e-06, "loss": 0.4425, "step": 3320 }, { "epoch": 2.2, "learning_rate": 8.177210656579996e-06, "loss": 0.4053, "step": 3321 }, { "epoch": 2.2, "learning_rate": 8.172001871953912e-06, "loss": 0.4533, "step": 3322 }, { "epoch": 2.2, "learning_rate": 8.166793600385391e-06, "loss": 0.4385, "step": 3323 }, { "epoch": 2.2, "learning_rate": 8.161585843336227e-06, "loss": 0.448, "step": 3324 }, { "epoch": 2.2, "learning_rate": 8.15637860226805e-06, "loss": 0.3988, "step": 3325 }, { "epoch": 2.2, "learning_rate": 8.151171878642365e-06, "loss": 0.3908, "step": 3326 }, { "epoch": 2.2, "learning_rate": 8.145965673920523e-06, "loss": 0.4152, "step": 3327 }, { "epoch": 2.21, "learning_rate": 8.14075998956373e-06, "loss": 0.4291, "step": 3328 }, { "epoch": 2.21, "learning_rate": 8.135554827033044e-06, "loss": 0.4169, "step": 3329 }, { "epoch": 2.21, "learning_rate": 8.130350187789387e-06, "loss": 0.4492, "step": 3330 }, { "epoch": 2.21, "learning_rate": 8.125146073293517e-06, "loss": 0.4439, "step": 3331 }, { "epoch": 2.21, "learning_rate": 8.119942485006058e-06, "loss": 0.4429, "step": 3332 }, { "epoch": 2.21, "learning_rate": 8.114739424387481e-06, "loss": 0.4352, "step": 3333 }, { "epoch": 2.21, "learning_rate": 8.10953689289811e-06, "loss": 0.436, "step": 3334 }, { "epoch": 2.21, "learning_rate": 8.104334891998124e-06, "loss": 0.4581, "step": 3335 }, { "epoch": 2.21, "learning_rate": 8.099133423147547e-06, "loss": 0.458, "step": 3336 }, { "epoch": 2.21, "learning_rate": 8.093932487806254e-06, "loss": 0.4513, "step": 3337 }, { "epoch": 2.21, "learning_rate": 8.088732087433975e-06, "loss": 0.4382, "step": 3338 }, { "epoch": 2.21, "learning_rate": 8.083532223490288e-06, "loss": 0.3819, "step": 3339 }, { "epoch": 2.21, "learning_rate": 8.078332897434617e-06, "loss": 0.4486, "step": 3340 }, { "epoch": 2.21, "learning_rate": 8.07313411072624e-06, "loss": 0.4267, "step": 3341 }, { "epoch": 2.21, "learning_rate": 8.067935864824283e-06, "loss": 0.452, "step": 3342 }, { "epoch": 2.22, "learning_rate": 8.062738161187716e-06, "loss": 0.4281, "step": 3343 }, { "epoch": 2.22, "learning_rate": 8.05754100127536e-06, "loss": 0.4447, "step": 3344 }, { "epoch": 2.22, "learning_rate": 8.052344386545882e-06, "loss": 0.465, "step": 3345 }, { "epoch": 2.22, "learning_rate": 8.0471483184578e-06, "loss": 0.433, "step": 3346 }, { "epoch": 2.22, "learning_rate": 8.041952798469473e-06, "loss": 0.4692, "step": 3347 }, { "epoch": 2.22, "learning_rate": 8.036757828039112e-06, "loss": 0.4626, "step": 3348 }, { "epoch": 2.22, "learning_rate": 8.031563408624767e-06, "loss": 0.4259, "step": 3349 }, { "epoch": 2.22, "learning_rate": 8.026369541684334e-06, "loss": 0.4502, "step": 3350 }, { "epoch": 2.22, "learning_rate": 8.021176228675563e-06, "loss": 0.4452, "step": 3351 }, { "epoch": 2.22, "learning_rate": 8.015983471056038e-06, "loss": 0.4518, "step": 3352 }, { "epoch": 2.22, "learning_rate": 8.010791270283197e-06, "loss": 0.4352, "step": 3353 }, { "epoch": 2.22, "learning_rate": 8.00559962781431e-06, "loss": 0.4588, "step": 3354 }, { "epoch": 2.22, "learning_rate": 8.000408545106492e-06, "loss": 0.4438, "step": 3355 }, { "epoch": 2.22, "learning_rate": 7.995218023616715e-06, "loss": 0.4948, "step": 3356 }, { "epoch": 2.22, "learning_rate": 7.990028064801781e-06, "loss": 0.432, "step": 3357 }, { "epoch": 2.23, "learning_rate": 7.984838670118335e-06, "loss": 0.4285, "step": 3358 }, { "epoch": 2.23, "learning_rate": 7.979649841022872e-06, "loss": 0.4376, "step": 3359 }, { "epoch": 2.23, "learning_rate": 7.97446157897171e-06, "loss": 0.4321, "step": 3360 }, { "epoch": 2.23, "learning_rate": 7.969273885421027e-06, "loss": 0.3945, "step": 3361 }, { "epoch": 2.23, "learning_rate": 7.964086761826832e-06, "loss": 0.4434, "step": 3362 }, { "epoch": 2.23, "learning_rate": 7.958900209644975e-06, "loss": 0.4359, "step": 3363 }, { "epoch": 2.23, "learning_rate": 7.953714230331152e-06, "loss": 0.4034, "step": 3364 }, { "epoch": 2.23, "learning_rate": 7.948528825340891e-06, "loss": 0.4401, "step": 3365 }, { "epoch": 2.23, "learning_rate": 7.943343996129555e-06, "loss": 0.4259, "step": 3366 }, { "epoch": 2.23, "learning_rate": 7.938159744152357e-06, "loss": 0.4389, "step": 3367 }, { "epoch": 2.23, "learning_rate": 7.932976070864338e-06, "loss": 0.4086, "step": 3368 }, { "epoch": 2.23, "learning_rate": 7.927792977720384e-06, "loss": 0.4316, "step": 3369 }, { "epoch": 2.23, "learning_rate": 7.922610466175213e-06, "loss": 0.4205, "step": 3370 }, { "epoch": 2.23, "learning_rate": 7.917428537683386e-06, "loss": 0.4412, "step": 3371 }, { "epoch": 2.23, "learning_rate": 7.912247193699288e-06, "loss": 0.443, "step": 3372 }, { "epoch": 2.24, "learning_rate": 7.907066435677154e-06, "loss": 0.4321, "step": 3373 }, { "epoch": 2.24, "learning_rate": 7.901886265071046e-06, "loss": 0.4128, "step": 3374 }, { "epoch": 2.24, "learning_rate": 7.896706683334863e-06, "loss": 0.4476, "step": 3375 }, { "epoch": 2.24, "learning_rate": 7.891527691922346e-06, "loss": 0.4417, "step": 3376 }, { "epoch": 2.24, "learning_rate": 7.886349292287052e-06, "loss": 0.4693, "step": 3377 }, { "epoch": 2.24, "learning_rate": 7.881171485882392e-06, "loss": 0.4304, "step": 3378 }, { "epoch": 2.24, "learning_rate": 7.8759942741616e-06, "loss": 0.4458, "step": 3379 }, { "epoch": 2.24, "learning_rate": 7.870817658577743e-06, "loss": 0.4159, "step": 3380 }, { "epoch": 2.24, "learning_rate": 7.865641640583725e-06, "loss": 0.4161, "step": 3381 }, { "epoch": 2.24, "learning_rate": 7.860466221632282e-06, "loss": 0.4145, "step": 3382 }, { "epoch": 2.24, "learning_rate": 7.855291403175976e-06, "loss": 0.4316, "step": 3383 }, { "epoch": 2.24, "learning_rate": 7.850117186667206e-06, "loss": 0.4341, "step": 3384 }, { "epoch": 2.24, "learning_rate": 7.844943573558202e-06, "loss": 0.4535, "step": 3385 }, { "epoch": 2.24, "learning_rate": 7.839770565301019e-06, "loss": 0.4604, "step": 3386 }, { "epoch": 2.24, "learning_rate": 7.83459816334755e-06, "loss": 0.4484, "step": 3387 }, { "epoch": 2.25, "learning_rate": 7.829426369149518e-06, "loss": 0.498, "step": 3388 }, { "epoch": 2.25, "learning_rate": 7.82425518415846e-06, "loss": 0.4422, "step": 3389 }, { "epoch": 2.25, "learning_rate": 7.819084609825762e-06, "loss": 0.474, "step": 3390 }, { "epoch": 2.25, "learning_rate": 7.81391464760263e-06, "loss": 0.4473, "step": 3391 }, { "epoch": 2.25, "learning_rate": 7.808745298940096e-06, "loss": 0.459, "step": 3392 }, { "epoch": 2.25, "learning_rate": 7.803576565289023e-06, "loss": 0.405, "step": 3393 }, { "epoch": 2.25, "learning_rate": 7.798408448100106e-06, "loss": 0.4761, "step": 3394 }, { "epoch": 2.25, "learning_rate": 7.793240948823852e-06, "loss": 0.457, "step": 3395 }, { "epoch": 2.25, "learning_rate": 7.788074068910609e-06, "loss": 0.4435, "step": 3396 }, { "epoch": 2.25, "learning_rate": 7.782907809810542e-06, "loss": 0.4636, "step": 3397 }, { "epoch": 2.25, "learning_rate": 7.777742172973655e-06, "loss": 0.4407, "step": 3398 }, { "epoch": 2.25, "learning_rate": 7.772577159849767e-06, "loss": 0.4257, "step": 3399 }, { "epoch": 2.25, "learning_rate": 7.767412771888515e-06, "loss": 0.4465, "step": 3400 }, { "epoch": 2.25, "learning_rate": 7.762249010539372e-06, "loss": 0.4883, "step": 3401 }, { "epoch": 2.26, "learning_rate": 7.757085877251638e-06, "loss": 0.4491, "step": 3402 }, { "epoch": 2.26, "learning_rate": 7.751923373474425e-06, "loss": 0.3983, "step": 3403 }, { "epoch": 2.26, "learning_rate": 7.746761500656676e-06, "loss": 0.4486, "step": 3404 }, { "epoch": 2.26, "learning_rate": 7.741600260247155e-06, "loss": 0.4098, "step": 3405 }, { "epoch": 2.26, "learning_rate": 7.73643965369445e-06, "loss": 0.4879, "step": 3406 }, { "epoch": 2.26, "learning_rate": 7.731279682446964e-06, "loss": 0.4209, "step": 3407 }, { "epoch": 2.26, "learning_rate": 7.726120347952932e-06, "loss": 0.4568, "step": 3408 }, { "epoch": 2.26, "learning_rate": 7.720961651660406e-06, "loss": 0.4212, "step": 3409 }, { "epoch": 2.26, "learning_rate": 7.715803595017257e-06, "loss": 0.4247, "step": 3410 }, { "epoch": 2.26, "learning_rate": 7.71064617947118e-06, "loss": 0.4458, "step": 3411 }, { "epoch": 2.26, "learning_rate": 7.705489406469684e-06, "loss": 0.4574, "step": 3412 }, { "epoch": 2.26, "learning_rate": 7.700333277460104e-06, "loss": 0.4325, "step": 3413 }, { "epoch": 2.26, "learning_rate": 7.695177793889593e-06, "loss": 0.392, "step": 3414 }, { "epoch": 2.26, "learning_rate": 7.69002295720512e-06, "loss": 0.4819, "step": 3415 }, { "epoch": 2.26, "learning_rate": 7.684868768853472e-06, "loss": 0.4743, "step": 3416 }, { "epoch": 2.27, "learning_rate": 7.679715230281265e-06, "loss": 0.432, "step": 3417 }, { "epoch": 2.27, "learning_rate": 7.674562342934914e-06, "loss": 0.4601, "step": 3418 }, { "epoch": 2.27, "learning_rate": 7.669410108260664e-06, "loss": 0.4758, "step": 3419 }, { "epoch": 2.27, "learning_rate": 7.664258527704576e-06, "loss": 0.4441, "step": 3420 }, { "epoch": 2.27, "learning_rate": 7.659107602712524e-06, "loss": 0.4276, "step": 3421 }, { "epoch": 2.27, "learning_rate": 7.6539573347302e-06, "loss": 0.4361, "step": 3422 }, { "epoch": 2.27, "learning_rate": 7.648807725203112e-06, "loss": 0.441, "step": 3423 }, { "epoch": 2.27, "learning_rate": 7.64365877557658e-06, "loss": 0.4189, "step": 3424 }, { "epoch": 2.27, "learning_rate": 7.638510487295738e-06, "loss": 0.4356, "step": 3425 }, { "epoch": 2.27, "learning_rate": 7.633362861805544e-06, "loss": 0.4215, "step": 3426 }, { "epoch": 2.27, "learning_rate": 7.628215900550758e-06, "loss": 0.4181, "step": 3427 }, { "epoch": 2.27, "learning_rate": 7.623069604975966e-06, "loss": 0.4294, "step": 3428 }, { "epoch": 2.27, "learning_rate": 7.617923976525549e-06, "loss": 0.443, "step": 3429 }, { "epoch": 2.27, "learning_rate": 7.61277901664372e-06, "loss": 0.4298, "step": 3430 }, { "epoch": 2.27, "learning_rate": 7.607634726774491e-06, "loss": 0.4353, "step": 3431 }, { "epoch": 2.28, "learning_rate": 7.602491108361695e-06, "loss": 0.4528, "step": 3432 }, { "epoch": 2.28, "learning_rate": 7.597348162848972e-06, "loss": 0.4203, "step": 3433 }, { "epoch": 2.28, "learning_rate": 7.592205891679777e-06, "loss": 0.4689, "step": 3434 }, { "epoch": 2.28, "learning_rate": 7.587064296297364e-06, "loss": 0.4104, "step": 3435 }, { "epoch": 2.28, "learning_rate": 7.5819233781448105e-06, "loss": 0.4375, "step": 3436 }, { "epoch": 2.28, "learning_rate": 7.576783138665e-06, "loss": 0.4357, "step": 3437 }, { "epoch": 2.28, "learning_rate": 7.571643579300622e-06, "loss": 0.4485, "step": 3438 }, { "epoch": 2.28, "learning_rate": 7.5665047014941805e-06, "loss": 0.4608, "step": 3439 }, { "epoch": 2.28, "learning_rate": 7.56136650668799e-06, "loss": 0.4587, "step": 3440 }, { "epoch": 2.28, "learning_rate": 7.55622899632416e-06, "loss": 0.4229, "step": 3441 }, { "epoch": 2.28, "learning_rate": 7.55109217184462e-06, "loss": 0.4576, "step": 3442 }, { "epoch": 2.28, "learning_rate": 7.545956034691104e-06, "loss": 0.4087, "step": 3443 }, { "epoch": 2.28, "learning_rate": 7.540820586305153e-06, "loss": 0.4538, "step": 3444 }, { "epoch": 2.28, "learning_rate": 7.535685828128117e-06, "loss": 0.4152, "step": 3445 }, { "epoch": 2.28, "learning_rate": 7.530551761601147e-06, "loss": 0.4219, "step": 3446 }, { "epoch": 2.29, "learning_rate": 7.525418388165202e-06, "loss": 0.4365, "step": 3447 }, { "epoch": 2.29, "learning_rate": 7.520285709261049e-06, "loss": 0.4476, "step": 3448 }, { "epoch": 2.29, "learning_rate": 7.5151537263292575e-06, "loss": 0.4355, "step": 3449 }, { "epoch": 2.29, "learning_rate": 7.510022440810203e-06, "loss": 0.4305, "step": 3450 }, { "epoch": 2.29, "learning_rate": 7.504891854144066e-06, "loss": 0.436, "step": 3451 }, { "epoch": 2.29, "learning_rate": 7.499761967770827e-06, "loss": 0.4559, "step": 3452 }, { "epoch": 2.29, "learning_rate": 7.494632783130271e-06, "loss": 0.4461, "step": 3453 }, { "epoch": 2.29, "learning_rate": 7.489504301661992e-06, "loss": 0.4525, "step": 3454 }, { "epoch": 2.29, "learning_rate": 7.48437652480538e-06, "loss": 0.4435, "step": 3455 }, { "epoch": 2.29, "learning_rate": 7.4792494539996285e-06, "loss": 0.4356, "step": 3456 }, { "epoch": 2.29, "learning_rate": 7.474123090683738e-06, "loss": 0.431, "step": 3457 }, { "epoch": 2.29, "learning_rate": 7.468997436296501e-06, "loss": 0.4583, "step": 3458 }, { "epoch": 2.29, "learning_rate": 7.4638724922765185e-06, "loss": 0.3965, "step": 3459 }, { "epoch": 2.29, "learning_rate": 7.458748260062187e-06, "loss": 0.4467, "step": 3460 }, { "epoch": 2.29, "learning_rate": 7.453624741091712e-06, "loss": 0.4076, "step": 3461 }, { "epoch": 2.3, "learning_rate": 7.448501936803087e-06, "loss": 0.438, "step": 3462 }, { "epoch": 2.3, "learning_rate": 7.443379848634118e-06, "loss": 0.4097, "step": 3463 }, { "epoch": 2.3, "learning_rate": 7.438258478022393e-06, "loss": 0.4785, "step": 3464 }, { "epoch": 2.3, "learning_rate": 7.433137826405314e-06, "loss": 0.4716, "step": 3465 }, { "epoch": 2.3, "learning_rate": 7.428017895220076e-06, "loss": 0.4587, "step": 3466 }, { "epoch": 2.3, "learning_rate": 7.42289868590367e-06, "loss": 0.4538, "step": 3467 }, { "epoch": 2.3, "learning_rate": 7.4177801998928864e-06, "loss": 0.4278, "step": 3468 }, { "epoch": 2.3, "learning_rate": 7.412662438624316e-06, "loss": 0.4138, "step": 3469 }, { "epoch": 2.3, "learning_rate": 7.407545403534334e-06, "loss": 0.4657, "step": 3470 }, { "epoch": 2.3, "learning_rate": 7.402429096059125e-06, "loss": 0.4418, "step": 3471 }, { "epoch": 2.3, "learning_rate": 7.397313517634659e-06, "loss": 0.4446, "step": 3472 }, { "epoch": 2.3, "learning_rate": 7.392198669696715e-06, "loss": 0.4365, "step": 3473 }, { "epoch": 2.3, "learning_rate": 7.387084553680859e-06, "loss": 0.4334, "step": 3474 }, { "epoch": 2.3, "learning_rate": 7.381971171022442e-06, "loss": 0.4196, "step": 3475 }, { "epoch": 2.3, "learning_rate": 7.376858523156624e-06, "loss": 0.4396, "step": 3476 }, { "epoch": 2.31, "learning_rate": 7.371746611518352e-06, "loss": 0.4371, "step": 3477 }, { "epoch": 2.31, "learning_rate": 7.366635437542368e-06, "loss": 0.436, "step": 3478 }, { "epoch": 2.31, "learning_rate": 7.361525002663209e-06, "loss": 0.412, "step": 3479 }, { "epoch": 2.31, "learning_rate": 7.356415308315201e-06, "loss": 0.4762, "step": 3480 }, { "epoch": 2.31, "learning_rate": 7.35130635593246e-06, "loss": 0.4307, "step": 3481 }, { "epoch": 2.31, "learning_rate": 7.3461981469489e-06, "loss": 0.4331, "step": 3482 }, { "epoch": 2.31, "learning_rate": 7.341090682798222e-06, "loss": 0.4192, "step": 3483 }, { "epoch": 2.31, "learning_rate": 7.335983964913919e-06, "loss": 0.4222, "step": 3484 }, { "epoch": 2.31, "learning_rate": 7.3308779947292776e-06, "loss": 0.4051, "step": 3485 }, { "epoch": 2.31, "learning_rate": 7.325772773677372e-06, "loss": 0.4489, "step": 3486 }, { "epoch": 2.31, "learning_rate": 7.320668303191062e-06, "loss": 0.4374, "step": 3487 }, { "epoch": 2.31, "learning_rate": 7.315564584703002e-06, "loss": 0.4563, "step": 3488 }, { "epoch": 2.31, "learning_rate": 7.310461619645634e-06, "loss": 0.4302, "step": 3489 }, { "epoch": 2.31, "learning_rate": 7.305359409451192e-06, "loss": 0.4415, "step": 3490 }, { "epoch": 2.31, "learning_rate": 7.300257955551691e-06, "loss": 0.4531, "step": 3491 }, { "epoch": 2.32, "learning_rate": 7.29515725937894e-06, "loss": 0.4458, "step": 3492 }, { "epoch": 2.32, "learning_rate": 7.29005732236453e-06, "loss": 0.4209, "step": 3493 }, { "epoch": 2.32, "learning_rate": 7.284958145939842e-06, "loss": 0.4142, "step": 3494 }, { "epoch": 2.32, "learning_rate": 7.279859731536045e-06, "loss": 0.4405, "step": 3495 }, { "epoch": 2.32, "learning_rate": 7.274762080584091e-06, "loss": 0.4735, "step": 3496 }, { "epoch": 2.32, "learning_rate": 7.269665194514721e-06, "loss": 0.4689, "step": 3497 }, { "epoch": 2.32, "learning_rate": 7.26456907475846e-06, "loss": 0.4187, "step": 3498 }, { "epoch": 2.32, "learning_rate": 7.2594737227456125e-06, "loss": 0.4564, "step": 3499 }, { "epoch": 2.32, "learning_rate": 7.2543791399062755e-06, "loss": 0.4354, "step": 3500 }, { "epoch": 2.32, "learning_rate": 7.2492853276703275e-06, "loss": 0.4134, "step": 3501 }, { "epoch": 2.32, "learning_rate": 7.244192287467429e-06, "loss": 0.4516, "step": 3502 }, { "epoch": 2.32, "learning_rate": 7.23910002072703e-06, "loss": 0.489, "step": 3503 }, { "epoch": 2.32, "learning_rate": 7.2340085288783504e-06, "loss": 0.4368, "step": 3504 }, { "epoch": 2.32, "learning_rate": 7.228917813350404e-06, "loss": 0.45, "step": 3505 }, { "epoch": 2.33, "learning_rate": 7.223827875571981e-06, "loss": 0.4866, "step": 3506 }, { "epoch": 2.33, "learning_rate": 7.21873871697166e-06, "loss": 0.4565, "step": 3507 }, { "epoch": 2.33, "learning_rate": 7.213650338977795e-06, "loss": 0.4598, "step": 3508 }, { "epoch": 2.33, "learning_rate": 7.208562743018525e-06, "loss": 0.4587, "step": 3509 }, { "epoch": 2.33, "learning_rate": 7.203475930521764e-06, "loss": 0.4419, "step": 3510 }, { "epoch": 2.33, "learning_rate": 7.198389902915206e-06, "loss": 0.4119, "step": 3511 }, { "epoch": 2.33, "learning_rate": 7.193304661626333e-06, "loss": 0.4405, "step": 3512 }, { "epoch": 2.33, "learning_rate": 7.188220208082398e-06, "loss": 0.4706, "step": 3513 }, { "epoch": 2.33, "learning_rate": 7.183136543710436e-06, "loss": 0.4529, "step": 3514 }, { "epoch": 2.33, "learning_rate": 7.1780536699372685e-06, "loss": 0.4359, "step": 3515 }, { "epoch": 2.33, "learning_rate": 7.172971588189475e-06, "loss": 0.4417, "step": 3516 }, { "epoch": 2.33, "learning_rate": 7.167890299893432e-06, "loss": 0.4259, "step": 3517 }, { "epoch": 2.33, "learning_rate": 7.162809806475283e-06, "loss": 0.4407, "step": 3518 }, { "epoch": 2.33, "learning_rate": 7.157730109360954e-06, "loss": 0.4335, "step": 3519 }, { "epoch": 2.33, "learning_rate": 7.1526512099761424e-06, "loss": 0.4171, "step": 3520 }, { "epoch": 2.34, "learning_rate": 7.147573109746331e-06, "loss": 0.4404, "step": 3521 }, { "epoch": 2.34, "learning_rate": 7.142495810096762e-06, "loss": 0.4391, "step": 3522 }, { "epoch": 2.34, "learning_rate": 7.137419312452469e-06, "loss": 0.4373, "step": 3523 }, { "epoch": 2.34, "learning_rate": 7.132343618238251e-06, "loss": 0.4633, "step": 3524 }, { "epoch": 2.34, "learning_rate": 7.127268728878687e-06, "loss": 0.4483, "step": 3525 }, { "epoch": 2.34, "learning_rate": 7.122194645798128e-06, "loss": 0.3956, "step": 3526 }, { "epoch": 2.34, "learning_rate": 7.117121370420695e-06, "loss": 0.412, "step": 3527 }, { "epoch": 2.34, "learning_rate": 7.112048904170288e-06, "loss": 0.4419, "step": 3528 }, { "epoch": 2.34, "learning_rate": 7.106977248470577e-06, "loss": 0.4381, "step": 3529 }, { "epoch": 2.34, "learning_rate": 7.101906404745006e-06, "loss": 0.4403, "step": 3530 }, { "epoch": 2.34, "learning_rate": 7.096836374416789e-06, "loss": 0.4569, "step": 3531 }, { "epoch": 2.34, "learning_rate": 7.091767158908916e-06, "loss": 0.4104, "step": 3532 }, { "epoch": 2.34, "learning_rate": 7.0866987596441394e-06, "loss": 0.4476, "step": 3533 }, { "epoch": 2.34, "learning_rate": 7.081631178044992e-06, "loss": 0.4346, "step": 3534 }, { "epoch": 2.34, "learning_rate": 7.076564415533774e-06, "loss": 0.4343, "step": 3535 }, { "epoch": 2.35, "learning_rate": 7.071498473532554e-06, "loss": 0.4373, "step": 3536 }, { "epoch": 2.35, "learning_rate": 7.066433353463171e-06, "loss": 0.452, "step": 3537 }, { "epoch": 2.35, "learning_rate": 7.061369056747239e-06, "loss": 0.4414, "step": 3538 }, { "epoch": 2.35, "learning_rate": 7.056305584806127e-06, "loss": 0.4686, "step": 3539 }, { "epoch": 2.35, "learning_rate": 7.0512429390609825e-06, "loss": 0.4706, "step": 3540 }, { "epoch": 2.35, "learning_rate": 7.046181120932726e-06, "loss": 0.4547, "step": 3541 }, { "epoch": 2.35, "learning_rate": 7.041120131842035e-06, "loss": 0.4455, "step": 3542 }, { "epoch": 2.35, "learning_rate": 7.03605997320936e-06, "loss": 0.4433, "step": 3543 }, { "epoch": 2.35, "learning_rate": 7.031000646454922e-06, "loss": 0.4661, "step": 3544 }, { "epoch": 2.35, "learning_rate": 7.0259421529986946e-06, "loss": 0.4327, "step": 3545 }, { "epoch": 2.35, "learning_rate": 7.020884494260431e-06, "loss": 0.4314, "step": 3546 }, { "epoch": 2.35, "learning_rate": 7.015827671659647e-06, "loss": 0.4583, "step": 3547 }, { "epoch": 2.35, "learning_rate": 7.010771686615618e-06, "loss": 0.4411, "step": 3548 }, { "epoch": 2.35, "learning_rate": 7.005716540547399e-06, "loss": 0.4061, "step": 3549 }, { "epoch": 2.35, "learning_rate": 7.00066223487379e-06, "loss": 0.4522, "step": 3550 }, { "epoch": 2.36, "learning_rate": 6.995608771013365e-06, "loss": 0.4227, "step": 3551 }, { "epoch": 2.36, "learning_rate": 6.990556150384464e-06, "loss": 0.426, "step": 3552 }, { "epoch": 2.36, "learning_rate": 6.985504374405188e-06, "loss": 0.4526, "step": 3553 }, { "epoch": 2.36, "learning_rate": 6.9804534444934005e-06, "loss": 0.4475, "step": 3554 }, { "epoch": 2.36, "learning_rate": 6.975403362066727e-06, "loss": 0.4351, "step": 3555 }, { "epoch": 2.36, "learning_rate": 6.970354128542553e-06, "loss": 0.4514, "step": 3556 }, { "epoch": 2.36, "learning_rate": 6.965305745338033e-06, "loss": 0.4737, "step": 3557 }, { "epoch": 2.36, "learning_rate": 6.960258213870075e-06, "loss": 0.4709, "step": 3558 }, { "epoch": 2.36, "learning_rate": 6.955211535555353e-06, "loss": 0.4494, "step": 3559 }, { "epoch": 2.36, "learning_rate": 6.9501657118102994e-06, "loss": 0.4359, "step": 3560 }, { "epoch": 2.36, "learning_rate": 6.945120744051108e-06, "loss": 0.4241, "step": 3561 }, { "epoch": 2.36, "learning_rate": 6.9400766336937286e-06, "loss": 0.4718, "step": 3562 }, { "epoch": 2.36, "learning_rate": 6.935033382153875e-06, "loss": 0.4306, "step": 3563 }, { "epoch": 2.36, "learning_rate": 6.92999099084702e-06, "loss": 0.4353, "step": 3564 }, { "epoch": 2.36, "learning_rate": 6.92494946118839e-06, "loss": 0.449, "step": 3565 }, { "epoch": 2.37, "learning_rate": 6.919908794592973e-06, "loss": 0.4529, "step": 3566 }, { "epoch": 2.37, "learning_rate": 6.914868992475521e-06, "loss": 0.445, "step": 3567 }, { "epoch": 2.37, "learning_rate": 6.909830056250527e-06, "loss": 0.459, "step": 3568 }, { "epoch": 2.37, "learning_rate": 6.904791987332256e-06, "loss": 0.4115, "step": 3569 }, { "epoch": 2.37, "learning_rate": 6.899754787134725e-06, "loss": 0.4252, "step": 3570 }, { "epoch": 2.37, "learning_rate": 6.894718457071703e-06, "loss": 0.4236, "step": 3571 }, { "epoch": 2.37, "learning_rate": 6.889682998556724e-06, "loss": 0.4085, "step": 3572 }, { "epoch": 2.37, "learning_rate": 6.88464841300307e-06, "loss": 0.4205, "step": 3573 }, { "epoch": 2.37, "learning_rate": 6.8796147018237734e-06, "loss": 0.421, "step": 3574 }, { "epoch": 2.37, "learning_rate": 6.874581866431633e-06, "loss": 0.4488, "step": 3575 }, { "epoch": 2.37, "learning_rate": 6.869549908239198e-06, "loss": 0.4126, "step": 3576 }, { "epoch": 2.37, "learning_rate": 6.864518828658767e-06, "loss": 0.4676, "step": 3577 }, { "epoch": 2.37, "learning_rate": 6.8594886291024e-06, "loss": 0.4283, "step": 3578 }, { "epoch": 2.37, "learning_rate": 6.854459310981894e-06, "loss": 0.4777, "step": 3579 }, { "epoch": 2.37, "learning_rate": 6.849430875708818e-06, "loss": 0.4633, "step": 3580 }, { "epoch": 2.38, "learning_rate": 6.84440332469448e-06, "loss": 0.4546, "step": 3581 }, { "epoch": 2.38, "learning_rate": 6.839376659349945e-06, "loss": 0.4471, "step": 3582 }, { "epoch": 2.38, "learning_rate": 6.8343508810860325e-06, "loss": 0.4466, "step": 3583 }, { "epoch": 2.38, "learning_rate": 6.8293259913133115e-06, "loss": 0.4365, "step": 3584 }, { "epoch": 2.38, "learning_rate": 6.82430199144209e-06, "loss": 0.4517, "step": 3585 }, { "epoch": 2.38, "learning_rate": 6.819278882882442e-06, "loss": 0.4448, "step": 3586 }, { "epoch": 2.38, "learning_rate": 6.8142566670441835e-06, "loss": 0.4237, "step": 3587 }, { "epoch": 2.38, "learning_rate": 6.809235345336882e-06, "loss": 0.4277, "step": 3588 }, { "epoch": 2.38, "learning_rate": 6.8042149191698526e-06, "loss": 0.492, "step": 3589 }, { "epoch": 2.38, "learning_rate": 6.799195389952163e-06, "loss": 0.4245, "step": 3590 }, { "epoch": 2.38, "learning_rate": 6.794176759092622e-06, "loss": 0.4467, "step": 3591 }, { "epoch": 2.38, "learning_rate": 6.789159027999792e-06, "loss": 0.4284, "step": 3592 }, { "epoch": 2.38, "learning_rate": 6.784142198081983e-06, "loss": 0.4248, "step": 3593 }, { "epoch": 2.38, "learning_rate": 6.779126270747247e-06, "loss": 0.4149, "step": 3594 }, { "epoch": 2.38, "learning_rate": 6.77411124740339e-06, "loss": 0.4378, "step": 3595 }, { "epoch": 2.39, "learning_rate": 6.7690971294579596e-06, "loss": 0.4382, "step": 3596 }, { "epoch": 2.39, "learning_rate": 6.764083918318246e-06, "loss": 0.4614, "step": 3597 }, { "epoch": 2.39, "learning_rate": 6.759071615391293e-06, "loss": 0.4236, "step": 3598 }, { "epoch": 2.39, "learning_rate": 6.754060222083883e-06, "loss": 0.4642, "step": 3599 }, { "epoch": 2.39, "learning_rate": 6.7490497398025444e-06, "loss": 0.4343, "step": 3600 }, { "epoch": 2.39, "learning_rate": 6.744040169953559e-06, "loss": 0.462, "step": 3601 }, { "epoch": 2.39, "learning_rate": 6.739031513942933e-06, "loss": 0.4474, "step": 3602 }, { "epoch": 2.39, "learning_rate": 6.734023773176434e-06, "loss": 0.4472, "step": 3603 }, { "epoch": 2.39, "learning_rate": 6.729016949059566e-06, "loss": 0.4427, "step": 3604 }, { "epoch": 2.39, "learning_rate": 6.724011042997576e-06, "loss": 0.4836, "step": 3605 }, { "epoch": 2.39, "learning_rate": 6.719006056395452e-06, "loss": 0.4896, "step": 3606 }, { "epoch": 2.39, "learning_rate": 6.7140019906579305e-06, "loss": 0.4458, "step": 3607 }, { "epoch": 2.39, "learning_rate": 6.708998847189474e-06, "loss": 0.4098, "step": 3608 }, { "epoch": 2.39, "learning_rate": 6.703996627394303e-06, "loss": 0.4333, "step": 3609 }, { "epoch": 2.4, "learning_rate": 6.698995332676375e-06, "loss": 0.4708, "step": 3610 }, { "epoch": 2.4, "learning_rate": 6.693994964439379e-06, "loss": 0.4483, "step": 3611 }, { "epoch": 2.4, "learning_rate": 6.688995524086754e-06, "loss": 0.4463, "step": 3612 }, { "epoch": 2.4, "learning_rate": 6.683997013021679e-06, "loss": 0.4482, "step": 3613 }, { "epoch": 2.4, "learning_rate": 6.678999432647057e-06, "loss": 0.4296, "step": 3614 }, { "epoch": 2.4, "learning_rate": 6.674002784365547e-06, "loss": 0.4126, "step": 3615 }, { "epoch": 2.4, "learning_rate": 6.669007069579538e-06, "loss": 0.4426, "step": 3616 }, { "epoch": 2.4, "learning_rate": 6.664012289691162e-06, "loss": 0.4256, "step": 3617 }, { "epoch": 2.4, "learning_rate": 6.659018446102283e-06, "loss": 0.4596, "step": 3618 }, { "epoch": 2.4, "learning_rate": 6.65402554021451e-06, "loss": 0.4325, "step": 3619 }, { "epoch": 2.4, "learning_rate": 6.649033573429178e-06, "loss": 0.4374, "step": 3620 }, { "epoch": 2.4, "learning_rate": 6.644042547147362e-06, "loss": 0.4303, "step": 3621 }, { "epoch": 2.4, "learning_rate": 6.639052462769881e-06, "loss": 0.4271, "step": 3622 }, { "epoch": 2.4, "learning_rate": 6.634063321697282e-06, "loss": 0.403, "step": 3623 }, { "epoch": 2.4, "learning_rate": 6.629075125329849e-06, "loss": 0.428, "step": 3624 }, { "epoch": 2.41, "learning_rate": 6.6240878750676e-06, "loss": 0.4293, "step": 3625 }, { "epoch": 2.41, "learning_rate": 6.619101572310287e-06, "loss": 0.4489, "step": 3626 }, { "epoch": 2.41, "learning_rate": 6.6141162184574e-06, "loss": 0.4596, "step": 3627 }, { "epoch": 2.41, "learning_rate": 6.6091318149081595e-06, "loss": 0.4322, "step": 3628 }, { "epoch": 2.41, "learning_rate": 6.60414836306152e-06, "loss": 0.442, "step": 3629 }, { "epoch": 2.41, "learning_rate": 6.5991658643161696e-06, "loss": 0.4025, "step": 3630 }, { "epoch": 2.41, "learning_rate": 6.594184320070525e-06, "loss": 0.4346, "step": 3631 }, { "epoch": 2.41, "learning_rate": 6.58920373172274e-06, "loss": 0.4604, "step": 3632 }, { "epoch": 2.41, "learning_rate": 6.584224100670698e-06, "loss": 0.4403, "step": 3633 }, { "epoch": 2.41, "learning_rate": 6.5792454283120136e-06, "loss": 0.4435, "step": 3634 }, { "epoch": 2.41, "learning_rate": 6.574267716044033e-06, "loss": 0.431, "step": 3635 }, { "epoch": 2.41, "learning_rate": 6.569290965263835e-06, "loss": 0.4122, "step": 3636 }, { "epoch": 2.41, "learning_rate": 6.564315177368222e-06, "loss": 0.4402, "step": 3637 }, { "epoch": 2.41, "learning_rate": 6.559340353753729e-06, "loss": 0.4413, "step": 3638 }, { "epoch": 2.41, "learning_rate": 6.5543664958166266e-06, "loss": 0.4892, "step": 3639 }, { "epoch": 2.42, "learning_rate": 6.549393604952906e-06, "loss": 0.4502, "step": 3640 }, { "epoch": 2.42, "learning_rate": 6.544421682558293e-06, "loss": 0.4277, "step": 3641 }, { "epoch": 2.42, "learning_rate": 6.539450730028238e-06, "loss": 0.4171, "step": 3642 }, { "epoch": 2.42, "learning_rate": 6.534480748757917e-06, "loss": 0.4482, "step": 3643 }, { "epoch": 2.42, "learning_rate": 6.52951174014224e-06, "loss": 0.4598, "step": 3644 }, { "epoch": 2.42, "learning_rate": 6.524543705575839e-06, "loss": 0.4523, "step": 3645 }, { "epoch": 2.42, "learning_rate": 6.519576646453075e-06, "loss": 0.4233, "step": 3646 }, { "epoch": 2.42, "learning_rate": 6.514610564168034e-06, "loss": 0.4775, "step": 3647 }, { "epoch": 2.42, "learning_rate": 6.509645460114532e-06, "loss": 0.418, "step": 3648 }, { "epoch": 2.42, "learning_rate": 6.504681335686098e-06, "loss": 0.4211, "step": 3649 }, { "epoch": 2.42, "learning_rate": 6.499718192275999e-06, "loss": 0.42, "step": 3650 }, { "epoch": 2.42, "learning_rate": 6.494756031277224e-06, "loss": 0.485, "step": 3651 }, { "epoch": 2.42, "learning_rate": 6.489794854082483e-06, "loss": 0.4125, "step": 3652 }, { "epoch": 2.42, "learning_rate": 6.484834662084217e-06, "loss": 0.4401, "step": 3653 }, { "epoch": 2.42, "learning_rate": 6.479875456674576e-06, "loss": 0.4453, "step": 3654 }, { "epoch": 2.43, "learning_rate": 6.474917239245445e-06, "loss": 0.4371, "step": 3655 }, { "epoch": 2.43, "learning_rate": 6.469960011188431e-06, "loss": 0.4473, "step": 3656 }, { "epoch": 2.43, "learning_rate": 6.465003773894859e-06, "loss": 0.4259, "step": 3657 }, { "epoch": 2.43, "learning_rate": 6.460048528755778e-06, "loss": 0.4485, "step": 3658 }, { "epoch": 2.43, "learning_rate": 6.455094277161964e-06, "loss": 0.4598, "step": 3659 }, { "epoch": 2.43, "learning_rate": 6.450141020503902e-06, "loss": 0.4259, "step": 3660 }, { "epoch": 2.43, "learning_rate": 6.445188760171806e-06, "loss": 0.4785, "step": 3661 }, { "epoch": 2.43, "learning_rate": 6.4402374975556085e-06, "loss": 0.4534, "step": 3662 }, { "epoch": 2.43, "learning_rate": 6.435287234044965e-06, "loss": 0.45, "step": 3663 }, { "epoch": 2.43, "learning_rate": 6.4303379710292445e-06, "loss": 0.4328, "step": 3664 }, { "epoch": 2.43, "learning_rate": 6.425389709897543e-06, "loss": 0.4413, "step": 3665 }, { "epoch": 2.43, "learning_rate": 6.420442452038668e-06, "loss": 0.3978, "step": 3666 }, { "epoch": 2.43, "learning_rate": 6.415496198841147e-06, "loss": 0.424, "step": 3667 }, { "epoch": 2.43, "learning_rate": 6.410550951693228e-06, "loss": 0.4175, "step": 3668 }, { "epoch": 2.43, "learning_rate": 6.405606711982877e-06, "loss": 0.4138, "step": 3669 }, { "epoch": 2.44, "learning_rate": 6.400663481097774e-06, "loss": 0.4631, "step": 3670 }, { "epoch": 2.44, "learning_rate": 6.395721260425321e-06, "loss": 0.4291, "step": 3671 }, { "epoch": 2.44, "learning_rate": 6.390780051352627e-06, "loss": 0.4527, "step": 3672 }, { "epoch": 2.44, "learning_rate": 6.385839855266527e-06, "loss": 0.4537, "step": 3673 }, { "epoch": 2.44, "learning_rate": 6.380900673553567e-06, "loss": 0.421, "step": 3674 }, { "epoch": 2.44, "learning_rate": 6.375962507600009e-06, "loss": 0.4492, "step": 3675 }, { "epoch": 2.44, "learning_rate": 6.371025358791834e-06, "loss": 0.5156, "step": 3676 }, { "epoch": 2.44, "learning_rate": 6.366089228514727e-06, "loss": 0.4256, "step": 3677 }, { "epoch": 2.44, "learning_rate": 6.361154118154096e-06, "loss": 0.4096, "step": 3678 }, { "epoch": 2.44, "learning_rate": 6.356220029095061e-06, "loss": 0.4158, "step": 3679 }, { "epoch": 2.44, "learning_rate": 6.3512869627224535e-06, "loss": 0.4658, "step": 3680 }, { "epoch": 2.44, "learning_rate": 6.346354920420822e-06, "loss": 0.4171, "step": 3681 }, { "epoch": 2.44, "learning_rate": 6.341423903574426e-06, "loss": 0.4592, "step": 3682 }, { "epoch": 2.44, "learning_rate": 6.33649391356723e-06, "loss": 0.4661, "step": 3683 }, { "epoch": 2.44, "learning_rate": 6.331564951782918e-06, "loss": 0.4568, "step": 3684 }, { "epoch": 2.45, "learning_rate": 6.326637019604888e-06, "loss": 0.399, "step": 3685 }, { "epoch": 2.45, "learning_rate": 6.32171011841624e-06, "loss": 0.4534, "step": 3686 }, { "epoch": 2.45, "learning_rate": 6.316784249599792e-06, "loss": 0.4481, "step": 3687 }, { "epoch": 2.45, "learning_rate": 6.311859414538075e-06, "loss": 0.4416, "step": 3688 }, { "epoch": 2.45, "learning_rate": 6.306935614613312e-06, "loss": 0.4382, "step": 3689 }, { "epoch": 2.45, "learning_rate": 6.302012851207455e-06, "loss": 0.4625, "step": 3690 }, { "epoch": 2.45, "learning_rate": 6.297091125702157e-06, "loss": 0.4316, "step": 3691 }, { "epoch": 2.45, "learning_rate": 6.292170439478782e-06, "loss": 0.4556, "step": 3692 }, { "epoch": 2.45, "learning_rate": 6.2872507939184e-06, "loss": 0.4212, "step": 3693 }, { "epoch": 2.45, "learning_rate": 6.282332190401794e-06, "loss": 0.4267, "step": 3694 }, { "epoch": 2.45, "learning_rate": 6.277414630309444e-06, "loss": 0.4399, "step": 3695 }, { "epoch": 2.45, "learning_rate": 6.272498115021546e-06, "loss": 0.4302, "step": 3696 }, { "epoch": 2.45, "learning_rate": 6.267582645918001e-06, "loss": 0.4348, "step": 3697 }, { "epoch": 2.45, "learning_rate": 6.2626682243784155e-06, "loss": 0.4467, "step": 3698 }, { "epoch": 2.46, "learning_rate": 6.257754851782104e-06, "loss": 0.4496, "step": 3699 }, { "epoch": 2.46, "learning_rate": 6.252842529508081e-06, "loss": 0.4466, "step": 3700 }, { "epoch": 2.46, "learning_rate": 6.247931258935074e-06, "loss": 0.4406, "step": 3701 }, { "epoch": 2.46, "learning_rate": 6.243021041441508e-06, "loss": 0.456, "step": 3702 }, { "epoch": 2.46, "learning_rate": 6.238111878405518e-06, "loss": 0.4616, "step": 3703 }, { "epoch": 2.46, "learning_rate": 6.23320377120494e-06, "loss": 0.3853, "step": 3704 }, { "epoch": 2.46, "learning_rate": 6.228296721217317e-06, "loss": 0.4379, "step": 3705 }, { "epoch": 2.46, "learning_rate": 6.223390729819889e-06, "loss": 0.4125, "step": 3706 }, { "epoch": 2.46, "learning_rate": 6.218485798389604e-06, "loss": 0.414, "step": 3707 }, { "epoch": 2.46, "learning_rate": 6.213581928303112e-06, "loss": 0.4495, "step": 3708 }, { "epoch": 2.46, "learning_rate": 6.208679120936765e-06, "loss": 0.4467, "step": 3709 }, { "epoch": 2.46, "learning_rate": 6.2037773776666134e-06, "loss": 0.4587, "step": 3710 }, { "epoch": 2.46, "learning_rate": 6.198876699868415e-06, "loss": 0.4626, "step": 3711 }, { "epoch": 2.46, "learning_rate": 6.193977088917622e-06, "loss": 0.4559, "step": 3712 }, { "epoch": 2.46, "learning_rate": 6.189078546189393e-06, "loss": 0.4426, "step": 3713 }, { "epoch": 2.47, "learning_rate": 6.184181073058582e-06, "loss": 0.4434, "step": 3714 }, { "epoch": 2.47, "learning_rate": 6.179284670899745e-06, "loss": 0.4431, "step": 3715 }, { "epoch": 2.47, "learning_rate": 6.174389341087138e-06, "loss": 0.4622, "step": 3716 }, { "epoch": 2.47, "learning_rate": 6.1694950849947186e-06, "loss": 0.4141, "step": 3717 }, { "epoch": 2.47, "learning_rate": 6.164601903996134e-06, "loss": 0.4155, "step": 3718 }, { "epoch": 2.47, "learning_rate": 6.159709799464737e-06, "loss": 0.434, "step": 3719 }, { "epoch": 2.47, "learning_rate": 6.154818772773579e-06, "loss": 0.4769, "step": 3720 }, { "epoch": 2.47, "learning_rate": 6.149928825295406e-06, "loss": 0.4362, "step": 3721 }, { "epoch": 2.47, "learning_rate": 6.14503995840266e-06, "loss": 0.4318, "step": 3722 }, { "epoch": 2.47, "learning_rate": 6.140152173467486e-06, "loss": 0.4583, "step": 3723 }, { "epoch": 2.47, "learning_rate": 6.1352654718617156e-06, "loss": 0.4347, "step": 3724 }, { "epoch": 2.47, "learning_rate": 6.130379854956879e-06, "loss": 0.4383, "step": 3725 }, { "epoch": 2.47, "learning_rate": 6.125495324124212e-06, "loss": 0.4309, "step": 3726 }, { "epoch": 2.47, "learning_rate": 6.120611880734635e-06, "loss": 0.4379, "step": 3727 }, { "epoch": 2.47, "learning_rate": 6.115729526158769e-06, "loss": 0.4644, "step": 3728 }, { "epoch": 2.48, "learning_rate": 6.110848261766919e-06, "loss": 0.418, "step": 3729 }, { "epoch": 2.48, "learning_rate": 6.105968088929098e-06, "loss": 0.4124, "step": 3730 }, { "epoch": 2.48, "learning_rate": 6.1010890090150045e-06, "loss": 0.4599, "step": 3731 }, { "epoch": 2.48, "learning_rate": 6.0962110233940304e-06, "loss": 0.4161, "step": 3732 }, { "epoch": 2.48, "learning_rate": 6.091334133435263e-06, "loss": 0.4202, "step": 3733 }, { "epoch": 2.48, "learning_rate": 6.086458340507488e-06, "loss": 0.4018, "step": 3734 }, { "epoch": 2.48, "learning_rate": 6.081583645979168e-06, "loss": 0.4603, "step": 3735 }, { "epoch": 2.48, "learning_rate": 6.076710051218467e-06, "loss": 0.4286, "step": 3736 }, { "epoch": 2.48, "learning_rate": 6.071837557593239e-06, "loss": 0.4514, "step": 3737 }, { "epoch": 2.48, "learning_rate": 6.066966166471031e-06, "loss": 0.4489, "step": 3738 }, { "epoch": 2.48, "learning_rate": 6.062095879219079e-06, "loss": 0.4607, "step": 3739 }, { "epoch": 2.48, "learning_rate": 6.057226697204308e-06, "loss": 0.4171, "step": 3740 }, { "epoch": 2.48, "learning_rate": 6.052358621793333e-06, "loss": 0.4348, "step": 3741 }, { "epoch": 2.48, "learning_rate": 6.047491654352458e-06, "loss": 0.4199, "step": 3742 }, { "epoch": 2.48, "learning_rate": 6.042625796247678e-06, "loss": 0.4575, "step": 3743 }, { "epoch": 2.49, "learning_rate": 6.037761048844675e-06, "loss": 0.4436, "step": 3744 }, { "epoch": 2.49, "learning_rate": 6.032897413508822e-06, "loss": 0.4454, "step": 3745 }, { "epoch": 2.49, "learning_rate": 6.028034891605179e-06, "loss": 0.4889, "step": 3746 }, { "epoch": 2.49, "learning_rate": 6.023173484498486e-06, "loss": 0.4478, "step": 3747 }, { "epoch": 2.49, "learning_rate": 6.018313193553181e-06, "loss": 0.413, "step": 3748 }, { "epoch": 2.49, "learning_rate": 6.013454020133382e-06, "loss": 0.4513, "step": 3749 }, { "epoch": 2.49, "learning_rate": 6.0085959656028994e-06, "loss": 0.4412, "step": 3750 }, { "epoch": 2.49, "learning_rate": 6.003739031325223e-06, "loss": 0.4786, "step": 3751 }, { "epoch": 2.49, "learning_rate": 5.998883218663529e-06, "loss": 0.4556, "step": 3752 }, { "epoch": 2.49, "learning_rate": 5.994028528980682e-06, "loss": 0.4328, "step": 3753 }, { "epoch": 2.49, "learning_rate": 5.989174963639231e-06, "loss": 0.4722, "step": 3754 }, { "epoch": 2.49, "learning_rate": 5.984322524001409e-06, "loss": 0.4108, "step": 3755 }, { "epoch": 2.49, "learning_rate": 5.97947121142913e-06, "loss": 0.4435, "step": 3756 }, { "epoch": 2.49, "learning_rate": 5.974621027284e-06, "loss": 0.4526, "step": 3757 }, { "epoch": 2.49, "learning_rate": 5.969771972927294e-06, "loss": 0.451, "step": 3758 }, { "epoch": 2.5, "learning_rate": 5.96492404971998e-06, "loss": 0.4491, "step": 3759 }, { "epoch": 2.5, "learning_rate": 5.960077259022713e-06, "loss": 0.4062, "step": 3760 }, { "epoch": 2.5, "learning_rate": 5.955231602195819e-06, "loss": 0.4243, "step": 3761 }, { "epoch": 2.5, "learning_rate": 5.9503870805993135e-06, "loss": 0.4772, "step": 3762 }, { "epoch": 2.5, "learning_rate": 5.9455436955928924e-06, "loss": 0.4362, "step": 3763 }, { "epoch": 2.5, "learning_rate": 5.9407014485359236e-06, "loss": 0.4642, "step": 3764 }, { "epoch": 2.5, "learning_rate": 5.9358603407874695e-06, "loss": 0.4353, "step": 3765 }, { "epoch": 2.5, "learning_rate": 5.931020373706263e-06, "loss": 0.4557, "step": 3766 }, { "epoch": 2.5, "learning_rate": 5.926181548650718e-06, "loss": 0.4088, "step": 3767 }, { "epoch": 2.5, "learning_rate": 5.921343866978935e-06, "loss": 0.4143, "step": 3768 }, { "epoch": 2.5, "learning_rate": 5.916507330048691e-06, "loss": 0.4344, "step": 3769 }, { "epoch": 2.5, "learning_rate": 5.9116719392174304e-06, "loss": 0.4211, "step": 3770 }, { "epoch": 2.5, "learning_rate": 5.906837695842289e-06, "loss": 0.4212, "step": 3771 }, { "epoch": 2.5, "learning_rate": 5.902004601280076e-06, "loss": 0.4682, "step": 3772 }, { "epoch": 2.5, "learning_rate": 5.897172656887278e-06, "loss": 0.4228, "step": 3773 }, { "epoch": 2.51, "learning_rate": 5.892341864020062e-06, "loss": 0.4392, "step": 3774 }, { "epoch": 2.51, "learning_rate": 5.887512224034263e-06, "loss": 0.452, "step": 3775 }, { "epoch": 2.51, "learning_rate": 5.882683738285404e-06, "loss": 0.4441, "step": 3776 }, { "epoch": 2.51, "learning_rate": 5.877856408128675e-06, "loss": 0.442, "step": 3777 }, { "epoch": 2.51, "learning_rate": 5.873030234918948e-06, "loss": 0.4472, "step": 3778 }, { "epoch": 2.51, "learning_rate": 5.868205220010766e-06, "loss": 0.4814, "step": 3779 }, { "epoch": 2.51, "learning_rate": 5.8633813647583505e-06, "loss": 0.4582, "step": 3780 }, { "epoch": 2.51, "learning_rate": 5.858558670515591e-06, "loss": 0.447, "step": 3781 }, { "epoch": 2.51, "learning_rate": 5.853737138636058e-06, "loss": 0.4183, "step": 3782 }, { "epoch": 2.51, "learning_rate": 5.848916770472992e-06, "loss": 0.4319, "step": 3783 }, { "epoch": 2.51, "learning_rate": 5.84409756737931e-06, "loss": 0.4229, "step": 3784 }, { "epoch": 2.51, "learning_rate": 5.8392795307076e-06, "loss": 0.4262, "step": 3785 }, { "epoch": 2.51, "learning_rate": 5.834462661810123e-06, "loss": 0.4874, "step": 3786 }, { "epoch": 2.51, "learning_rate": 5.829646962038812e-06, "loss": 0.4452, "step": 3787 }, { "epoch": 2.51, "learning_rate": 5.824832432745267e-06, "loss": 0.4797, "step": 3788 }, { "epoch": 2.52, "learning_rate": 5.820019075280772e-06, "loss": 0.4496, "step": 3789 }, { "epoch": 2.52, "learning_rate": 5.815206890996267e-06, "loss": 0.4279, "step": 3790 }, { "epoch": 2.52, "learning_rate": 5.810395881242378e-06, "loss": 0.4347, "step": 3791 }, { "epoch": 2.52, "learning_rate": 5.805586047369389e-06, "loss": 0.4187, "step": 3792 }, { "epoch": 2.52, "learning_rate": 5.8007773907272565e-06, "loss": 0.4372, "step": 3793 }, { "epoch": 2.52, "learning_rate": 5.795969912665615e-06, "loss": 0.3969, "step": 3794 }, { "epoch": 2.52, "learning_rate": 5.791163614533753e-06, "loss": 0.4203, "step": 3795 }, { "epoch": 2.52, "learning_rate": 5.786358497680647e-06, "loss": 0.4339, "step": 3796 }, { "epoch": 2.52, "learning_rate": 5.7815545634549205e-06, "loss": 0.4204, "step": 3797 }, { "epoch": 2.52, "learning_rate": 5.776751813204887e-06, "loss": 0.4482, "step": 3798 }, { "epoch": 2.52, "learning_rate": 5.771950248278513e-06, "loss": 0.4435, "step": 3799 }, { "epoch": 2.52, "learning_rate": 5.76714987002343e-06, "loss": 0.4183, "step": 3800 }, { "epoch": 2.52, "learning_rate": 5.7623506797869525e-06, "loss": 0.4705, "step": 3801 }, { "epoch": 2.52, "learning_rate": 5.757552678916042e-06, "loss": 0.4252, "step": 3802 }, { "epoch": 2.53, "learning_rate": 5.752755868757345e-06, "loss": 0.4253, "step": 3803 }, { "epoch": 2.53, "learning_rate": 5.7479602506571615e-06, "loss": 0.4645, "step": 3804 }, { "epoch": 2.53, "learning_rate": 5.743165825961454e-06, "loss": 0.4407, "step": 3805 }, { "epoch": 2.53, "learning_rate": 5.738372596015867e-06, "loss": 0.4389, "step": 3806 }, { "epoch": 2.53, "learning_rate": 5.733580562165688e-06, "loss": 0.4458, "step": 3807 }, { "epoch": 2.53, "learning_rate": 5.72878972575589e-06, "loss": 0.4327, "step": 3808 }, { "epoch": 2.53, "learning_rate": 5.7240000881310945e-06, "loss": 0.4501, "step": 3809 }, { "epoch": 2.53, "learning_rate": 5.719211650635586e-06, "loss": 0.4477, "step": 3810 }, { "epoch": 2.53, "learning_rate": 5.714424414613329e-06, "loss": 0.4432, "step": 3811 }, { "epoch": 2.53, "learning_rate": 5.709638381407929e-06, "loss": 0.4247, "step": 3812 }, { "epoch": 2.53, "learning_rate": 5.704853552362674e-06, "loss": 0.4022, "step": 3813 }, { "epoch": 2.53, "learning_rate": 5.700069928820496e-06, "loss": 0.4089, "step": 3814 }, { "epoch": 2.53, "learning_rate": 5.695287512124011e-06, "loss": 0.464, "step": 3815 }, { "epoch": 2.53, "learning_rate": 5.690506303615461e-06, "loss": 0.4569, "step": 3816 }, { "epoch": 2.53, "learning_rate": 5.685726304636787e-06, "loss": 0.4443, "step": 3817 }, { "epoch": 2.54, "learning_rate": 5.680947516529566e-06, "loss": 0.4363, "step": 3818 }, { "epoch": 2.54, "learning_rate": 5.676169940635045e-06, "loss": 0.41, "step": 3819 }, { "epoch": 2.54, "learning_rate": 5.671393578294133e-06, "loss": 0.449, "step": 3820 }, { "epoch": 2.54, "learning_rate": 5.6666184308473915e-06, "loss": 0.455, "step": 3821 }, { "epoch": 2.54, "learning_rate": 5.661844499635043e-06, "loss": 0.4161, "step": 3822 }, { "epoch": 2.54, "learning_rate": 5.657071785996966e-06, "loss": 0.4271, "step": 3823 }, { "epoch": 2.54, "learning_rate": 5.652300291272707e-06, "loss": 0.4935, "step": 3824 }, { "epoch": 2.54, "learning_rate": 5.647530016801457e-06, "loss": 0.4375, "step": 3825 }, { "epoch": 2.54, "learning_rate": 5.6427609639220804e-06, "loss": 0.4461, "step": 3826 }, { "epoch": 2.54, "learning_rate": 5.637993133973083e-06, "loss": 0.4122, "step": 3827 }, { "epoch": 2.54, "learning_rate": 5.633226528292635e-06, "loss": 0.4563, "step": 3828 }, { "epoch": 2.54, "learning_rate": 5.628461148218565e-06, "loss": 0.4445, "step": 3829 }, { "epoch": 2.54, "learning_rate": 5.62369699508835e-06, "loss": 0.4412, "step": 3830 }, { "epoch": 2.54, "learning_rate": 5.618934070239135e-06, "loss": 0.4524, "step": 3831 }, { "epoch": 2.54, "learning_rate": 5.6141723750077095e-06, "loss": 0.4851, "step": 3832 }, { "epoch": 2.55, "learning_rate": 5.6094119107305145e-06, "loss": 0.4015, "step": 3833 }, { "epoch": 2.55, "learning_rate": 5.604652678743663e-06, "loss": 0.4392, "step": 3834 }, { "epoch": 2.55, "learning_rate": 5.5998946803829e-06, "loss": 0.444, "step": 3835 }, { "epoch": 2.55, "learning_rate": 5.595137916983647e-06, "loss": 0.4508, "step": 3836 }, { "epoch": 2.55, "learning_rate": 5.5903823898809586e-06, "loss": 0.4389, "step": 3837 }, { "epoch": 2.55, "learning_rate": 5.585628100409557e-06, "loss": 0.3999, "step": 3838 }, { "epoch": 2.55, "learning_rate": 5.580875049903811e-06, "loss": 0.4794, "step": 3839 }, { "epoch": 2.55, "learning_rate": 5.576123239697735e-06, "loss": 0.4045, "step": 3840 }, { "epoch": 2.55, "learning_rate": 5.57137267112501e-06, "loss": 0.4505, "step": 3841 }, { "epoch": 2.55, "learning_rate": 5.5666233455189556e-06, "loss": 0.4002, "step": 3842 }, { "epoch": 2.55, "learning_rate": 5.561875264212553e-06, "loss": 0.4334, "step": 3843 }, { "epoch": 2.55, "learning_rate": 5.557128428538425e-06, "loss": 0.4159, "step": 3844 }, { "epoch": 2.55, "learning_rate": 5.552382839828847e-06, "loss": 0.4338, "step": 3845 }, { "epoch": 2.55, "learning_rate": 5.547638499415751e-06, "loss": 0.459, "step": 3846 }, { "epoch": 2.55, "learning_rate": 5.5428954086307085e-06, "loss": 0.4299, "step": 3847 }, { "epoch": 2.56, "learning_rate": 5.538153568804951e-06, "loss": 0.4158, "step": 3848 }, { "epoch": 2.56, "learning_rate": 5.53341298126935e-06, "loss": 0.4511, "step": 3849 }, { "epoch": 2.56, "learning_rate": 5.528673647354432e-06, "loss": 0.4259, "step": 3850 }, { "epoch": 2.56, "learning_rate": 5.52393556839036e-06, "loss": 0.4301, "step": 3851 }, { "epoch": 2.56, "learning_rate": 5.51919874570696e-06, "loss": 0.4522, "step": 3852 }, { "epoch": 2.56, "learning_rate": 5.514463180633702e-06, "loss": 0.4308, "step": 3853 }, { "epoch": 2.56, "learning_rate": 5.509728874499692e-06, "loss": 0.4333, "step": 3854 }, { "epoch": 2.56, "learning_rate": 5.504995828633704e-06, "loss": 0.4707, "step": 3855 }, { "epoch": 2.56, "learning_rate": 5.500264044364124e-06, "loss": 0.4298, "step": 3856 }, { "epoch": 2.56, "learning_rate": 5.495533523019021e-06, "loss": 0.4242, "step": 3857 }, { "epoch": 2.56, "learning_rate": 5.490804265926084e-06, "loss": 0.4345, "step": 3858 }, { "epoch": 2.56, "learning_rate": 5.486076274412664e-06, "loss": 0.4507, "step": 3859 }, { "epoch": 2.56, "learning_rate": 5.481349549805741e-06, "loss": 0.4404, "step": 3860 }, { "epoch": 2.56, "learning_rate": 5.476624093431956e-06, "loss": 0.4434, "step": 3861 }, { "epoch": 2.56, "learning_rate": 5.471899906617581e-06, "loss": 0.4121, "step": 3862 }, { "epoch": 2.57, "learning_rate": 5.467176990688534e-06, "loss": 0.4289, "step": 3863 }, { "epoch": 2.57, "learning_rate": 5.462455346970383e-06, "loss": 0.433, "step": 3864 }, { "epoch": 2.57, "learning_rate": 5.457734976788331e-06, "loss": 0.4319, "step": 3865 }, { "epoch": 2.57, "learning_rate": 5.4530158814672315e-06, "loss": 0.448, "step": 3866 }, { "epoch": 2.57, "learning_rate": 5.448298062331574e-06, "loss": 0.4342, "step": 3867 }, { "epoch": 2.57, "learning_rate": 5.443581520705485e-06, "loss": 0.4472, "step": 3868 }, { "epoch": 2.57, "learning_rate": 5.438866257912751e-06, "loss": 0.4287, "step": 3869 }, { "epoch": 2.57, "learning_rate": 5.434152275276776e-06, "loss": 0.4306, "step": 3870 }, { "epoch": 2.57, "learning_rate": 5.429439574120627e-06, "loss": 0.4117, "step": 3871 }, { "epoch": 2.57, "learning_rate": 5.424728155766994e-06, "loss": 0.4475, "step": 3872 }, { "epoch": 2.57, "learning_rate": 5.420018021538211e-06, "loss": 0.4138, "step": 3873 }, { "epoch": 2.57, "learning_rate": 5.415309172756263e-06, "loss": 0.4391, "step": 3874 }, { "epoch": 2.57, "learning_rate": 5.410601610742754e-06, "loss": 0.4368, "step": 3875 }, { "epoch": 2.57, "learning_rate": 5.4058953368189515e-06, "loss": 0.4226, "step": 3876 }, { "epoch": 2.57, "learning_rate": 5.401190352305736e-06, "loss": 0.4685, "step": 3877 }, { "epoch": 2.58, "learning_rate": 5.396486658523647e-06, "loss": 0.4473, "step": 3878 }, { "epoch": 2.58, "learning_rate": 5.391784256792851e-06, "loss": 0.437, "step": 3879 }, { "epoch": 2.58, "learning_rate": 5.38708314843315e-06, "loss": 0.415, "step": 3880 }, { "epoch": 2.58, "learning_rate": 5.382383334763992e-06, "loss": 0.4128, "step": 3881 }, { "epoch": 2.58, "learning_rate": 5.377684817104451e-06, "loss": 0.4946, "step": 3882 }, { "epoch": 2.58, "learning_rate": 5.372987596773249e-06, "loss": 0.4345, "step": 3883 }, { "epoch": 2.58, "learning_rate": 5.368291675088736e-06, "loss": 0.457, "step": 3884 }, { "epoch": 2.58, "learning_rate": 5.363597053368897e-06, "loss": 0.4408, "step": 3885 }, { "epoch": 2.58, "learning_rate": 5.3589037329313534e-06, "loss": 0.453, "step": 3886 }, { "epoch": 2.58, "learning_rate": 5.354211715093361e-06, "loss": 0.4598, "step": 3887 }, { "epoch": 2.58, "learning_rate": 5.34952100117182e-06, "loss": 0.4323, "step": 3888 }, { "epoch": 2.58, "learning_rate": 5.344831592483249e-06, "loss": 0.4242, "step": 3889 }, { "epoch": 2.58, "learning_rate": 5.340143490343813e-06, "loss": 0.4336, "step": 3890 }, { "epoch": 2.58, "learning_rate": 5.3354566960692945e-06, "loss": 0.4291, "step": 3891 }, { "epoch": 2.58, "learning_rate": 5.3307712109751274e-06, "loss": 0.4126, "step": 3892 }, { "epoch": 2.59, "learning_rate": 5.3260870363763635e-06, "loss": 0.4418, "step": 3893 }, { "epoch": 2.59, "learning_rate": 5.321404173587696e-06, "loss": 0.4345, "step": 3894 }, { "epoch": 2.59, "learning_rate": 5.316722623923454e-06, "loss": 0.4443, "step": 3895 }, { "epoch": 2.59, "learning_rate": 5.312042388697582e-06, "loss": 0.4251, "step": 3896 }, { "epoch": 2.59, "learning_rate": 5.307363469223667e-06, "loss": 0.4331, "step": 3897 }, { "epoch": 2.59, "learning_rate": 5.3026858668149205e-06, "loss": 0.4175, "step": 3898 }, { "epoch": 2.59, "learning_rate": 5.298009582784196e-06, "loss": 0.4254, "step": 3899 }, { "epoch": 2.59, "learning_rate": 5.293334618443962e-06, "loss": 0.4211, "step": 3900 }, { "epoch": 2.59, "learning_rate": 5.2886609751063275e-06, "loss": 0.4537, "step": 3901 }, { "epoch": 2.59, "learning_rate": 5.283988654083029e-06, "loss": 0.4242, "step": 3902 }, { "epoch": 2.59, "learning_rate": 5.279317656685421e-06, "loss": 0.3906, "step": 3903 }, { "epoch": 2.59, "learning_rate": 5.274647984224506e-06, "loss": 0.4283, "step": 3904 }, { "epoch": 2.59, "learning_rate": 5.269979638010893e-06, "loss": 0.4255, "step": 3905 }, { "epoch": 2.59, "learning_rate": 5.265312619354843e-06, "loss": 0.4389, "step": 3906 }, { "epoch": 2.6, "learning_rate": 5.2606469295662224e-06, "loss": 0.4016, "step": 3907 }, { "epoch": 2.6, "learning_rate": 5.25598256995453e-06, "loss": 0.4627, "step": 3908 }, { "epoch": 2.6, "learning_rate": 5.2513195418289035e-06, "loss": 0.416, "step": 3909 }, { "epoch": 2.6, "learning_rate": 5.24665784649809e-06, "loss": 0.4259, "step": 3910 }, { "epoch": 2.6, "learning_rate": 5.241997485270478e-06, "loss": 0.4351, "step": 3911 }, { "epoch": 2.6, "learning_rate": 5.237338459454067e-06, "loss": 0.4337, "step": 3912 }, { "epoch": 2.6, "learning_rate": 5.232680770356495e-06, "loss": 0.4478, "step": 3913 }, { "epoch": 2.6, "learning_rate": 5.2280244192850185e-06, "loss": 0.4425, "step": 3914 }, { "epoch": 2.6, "learning_rate": 5.223369407546509e-06, "loss": 0.4473, "step": 3915 }, { "epoch": 2.6, "learning_rate": 5.218715736447484e-06, "loss": 0.4829, "step": 3916 }, { "epoch": 2.6, "learning_rate": 5.2140634072940624e-06, "loss": 0.4156, "step": 3917 }, { "epoch": 2.6, "learning_rate": 5.209412421392005e-06, "loss": 0.423, "step": 3918 }, { "epoch": 2.6, "learning_rate": 5.204762780046682e-06, "loss": 0.4634, "step": 3919 }, { "epoch": 2.6, "learning_rate": 5.2001144845630906e-06, "loss": 0.4425, "step": 3920 }, { "epoch": 2.6, "learning_rate": 5.195467536245855e-06, "loss": 0.4434, "step": 3921 }, { "epoch": 2.61, "learning_rate": 5.19082193639921e-06, "loss": 0.4177, "step": 3922 }, { "epoch": 2.61, "learning_rate": 5.186177686327029e-06, "loss": 0.4291, "step": 3923 }, { "epoch": 2.61, "learning_rate": 5.181534787332791e-06, "loss": 0.4606, "step": 3924 }, { "epoch": 2.61, "learning_rate": 5.176893240719602e-06, "loss": 0.3969, "step": 3925 }, { "epoch": 2.61, "learning_rate": 5.172253047790187e-06, "loss": 0.4138, "step": 3926 }, { "epoch": 2.61, "learning_rate": 5.16761420984689e-06, "loss": 0.4162, "step": 3927 }, { "epoch": 2.61, "learning_rate": 5.162976728191685e-06, "loss": 0.4464, "step": 3928 }, { "epoch": 2.61, "learning_rate": 5.158340604126148e-06, "loss": 0.4454, "step": 3929 }, { "epoch": 2.61, "learning_rate": 5.153705838951495e-06, "loss": 0.4015, "step": 3930 }, { "epoch": 2.61, "learning_rate": 5.149072433968533e-06, "loss": 0.4529, "step": 3931 }, { "epoch": 2.61, "learning_rate": 5.144440390477714e-06, "loss": 0.4596, "step": 3932 }, { "epoch": 2.61, "learning_rate": 5.139809709779089e-06, "loss": 0.4108, "step": 3933 }, { "epoch": 2.61, "learning_rate": 5.135180393172343e-06, "loss": 0.4344, "step": 3934 }, { "epoch": 2.61, "learning_rate": 5.1305524419567595e-06, "loss": 0.4245, "step": 3935 }, { "epoch": 2.61, "learning_rate": 5.12592585743126e-06, "loss": 0.41, "step": 3936 }, { "epoch": 2.62, "learning_rate": 5.1213006408943645e-06, "loss": 0.4678, "step": 3937 }, { "epoch": 2.62, "learning_rate": 5.116676793644212e-06, "loss": 0.4478, "step": 3938 }, { "epoch": 2.62, "learning_rate": 5.11205431697857e-06, "loss": 0.4218, "step": 3939 }, { "epoch": 2.62, "learning_rate": 5.107433212194801e-06, "loss": 0.4355, "step": 3940 }, { "epoch": 2.62, "learning_rate": 5.102813480589905e-06, "loss": 0.4411, "step": 3941 }, { "epoch": 2.62, "learning_rate": 5.098195123460481e-06, "loss": 0.4242, "step": 3942 }, { "epoch": 2.62, "learning_rate": 5.093578142102742e-06, "loss": 0.4505, "step": 3943 }, { "epoch": 2.62, "learning_rate": 5.0889625378125255e-06, "loss": 0.4334, "step": 3944 }, { "epoch": 2.62, "learning_rate": 5.08434831188527e-06, "loss": 0.4675, "step": 3945 }, { "epoch": 2.62, "learning_rate": 5.079735465616041e-06, "loss": 0.4508, "step": 3946 }, { "epoch": 2.62, "learning_rate": 5.075124000299506e-06, "loss": 0.4567, "step": 3947 }, { "epoch": 2.62, "learning_rate": 5.070513917229944e-06, "loss": 0.4502, "step": 3948 }, { "epoch": 2.62, "learning_rate": 5.065905217701257e-06, "loss": 0.3799, "step": 3949 }, { "epoch": 2.62, "learning_rate": 5.061297903006943e-06, "loss": 0.4242, "step": 3950 }, { "epoch": 2.62, "learning_rate": 5.056691974440132e-06, "loss": 0.4438, "step": 3951 }, { "epoch": 2.63, "learning_rate": 5.052087433293542e-06, "loss": 0.4284, "step": 3952 }, { "epoch": 2.63, "learning_rate": 5.0474842808595226e-06, "loss": 0.4535, "step": 3953 }, { "epoch": 2.63, "learning_rate": 5.042882518430018e-06, "loss": 0.4418, "step": 3954 }, { "epoch": 2.63, "learning_rate": 5.038282147296585e-06, "loss": 0.4433, "step": 3955 }, { "epoch": 2.63, "learning_rate": 5.033683168750401e-06, "loss": 0.4453, "step": 3956 }, { "epoch": 2.63, "learning_rate": 5.029085584082238e-06, "loss": 0.4176, "step": 3957 }, { "epoch": 2.63, "learning_rate": 5.024489394582488e-06, "loss": 0.4295, "step": 3958 }, { "epoch": 2.63, "learning_rate": 5.019894601541144e-06, "loss": 0.4894, "step": 3959 }, { "epoch": 2.63, "learning_rate": 5.015301206247813e-06, "loss": 0.4646, "step": 3960 }, { "epoch": 2.63, "learning_rate": 5.010709209991698e-06, "loss": 0.4669, "step": 3961 }, { "epoch": 2.63, "learning_rate": 5.0061186140616235e-06, "loss": 0.4535, "step": 3962 }, { "epoch": 2.63, "learning_rate": 5.0015294197460204e-06, "loss": 0.4321, "step": 3963 }, { "epoch": 2.63, "learning_rate": 4.996941628332912e-06, "loss": 0.4394, "step": 3964 }, { "epoch": 2.63, "learning_rate": 4.992355241109949e-06, "loss": 0.4583, "step": 3965 }, { "epoch": 2.63, "learning_rate": 4.987770259364359e-06, "loss": 0.4509, "step": 3966 }, { "epoch": 2.64, "learning_rate": 4.983186684383007e-06, "loss": 0.3877, "step": 3967 }, { "epoch": 2.64, "learning_rate": 4.978604517452338e-06, "loss": 0.3827, "step": 3968 }, { "epoch": 2.64, "learning_rate": 4.974023759858416e-06, "loss": 0.4354, "step": 3969 }, { "epoch": 2.64, "learning_rate": 4.96944441288691e-06, "loss": 0.427, "step": 3970 }, { "epoch": 2.64, "learning_rate": 4.9648664778230856e-06, "loss": 0.4127, "step": 3971 }, { "epoch": 2.64, "learning_rate": 4.960289955951813e-06, "loss": 0.4374, "step": 3972 }, { "epoch": 2.64, "learning_rate": 4.9557148485575665e-06, "loss": 0.4438, "step": 3973 }, { "epoch": 2.64, "learning_rate": 4.951141156924432e-06, "loss": 0.4572, "step": 3974 }, { "epoch": 2.64, "learning_rate": 4.94656888233608e-06, "loss": 0.4247, "step": 3975 }, { "epoch": 2.64, "learning_rate": 4.941998026075806e-06, "loss": 0.416, "step": 3976 }, { "epoch": 2.64, "learning_rate": 4.937428589426489e-06, "loss": 0.4632, "step": 3977 }, { "epoch": 2.64, "learning_rate": 4.932860573670614e-06, "loss": 0.4342, "step": 3978 }, { "epoch": 2.64, "learning_rate": 4.9282939800902764e-06, "loss": 0.4363, "step": 3979 }, { "epoch": 2.64, "learning_rate": 4.923728809967156e-06, "loss": 0.4296, "step": 3980 }, { "epoch": 2.64, "learning_rate": 4.919165064582553e-06, "loss": 0.4325, "step": 3981 }, { "epoch": 2.65, "learning_rate": 4.914602745217352e-06, "loss": 0.4414, "step": 3982 }, { "epoch": 2.65, "learning_rate": 4.910041853152038e-06, "loss": 0.4372, "step": 3983 }, { "epoch": 2.65, "learning_rate": 4.905482389666708e-06, "loss": 0.4409, "step": 3984 }, { "epoch": 2.65, "learning_rate": 4.900924356041044e-06, "loss": 0.4433, "step": 3985 }, { "epoch": 2.65, "learning_rate": 4.896367753554336e-06, "loss": 0.4482, "step": 3986 }, { "epoch": 2.65, "learning_rate": 4.891812583485467e-06, "loss": 0.4383, "step": 3987 }, { "epoch": 2.65, "learning_rate": 4.887258847112923e-06, "loss": 0.4165, "step": 3988 }, { "epoch": 2.65, "learning_rate": 4.882706545714783e-06, "loss": 0.4243, "step": 3989 }, { "epoch": 2.65, "learning_rate": 4.878155680568721e-06, "loss": 0.4543, "step": 3990 }, { "epoch": 2.65, "learning_rate": 4.87360625295202e-06, "loss": 0.4425, "step": 3991 }, { "epoch": 2.65, "learning_rate": 4.869058264141541e-06, "loss": 0.4241, "step": 3992 }, { "epoch": 2.65, "learning_rate": 4.864511715413761e-06, "loss": 0.4387, "step": 3993 }, { "epoch": 2.65, "learning_rate": 4.8599666080447395e-06, "loss": 0.4028, "step": 3994 }, { "epoch": 2.65, "learning_rate": 4.855422943310129e-06, "loss": 0.4479, "step": 3995 }, { "epoch": 2.65, "learning_rate": 4.850880722485195e-06, "loss": 0.4545, "step": 3996 }, { "epoch": 2.66, "learning_rate": 4.846339946844776e-06, "loss": 0.4222, "step": 3997 }, { "epoch": 2.66, "learning_rate": 4.841800617663322e-06, "loss": 0.4789, "step": 3998 }, { "epoch": 2.66, "learning_rate": 4.8372627362148675e-06, "loss": 0.4321, "step": 3999 }, { "epoch": 2.66, "learning_rate": 4.832726303773042e-06, "loss": 0.4266, "step": 4000 }, { "epoch": 2.66, "learning_rate": 4.8281913216110665e-06, "loss": 0.4097, "step": 4001 }, { "epoch": 2.66, "learning_rate": 4.823657791001766e-06, "loss": 0.4569, "step": 4002 }, { "epoch": 2.66, "learning_rate": 4.819125713217541e-06, "loss": 0.4252, "step": 4003 }, { "epoch": 2.66, "learning_rate": 4.814595089530396e-06, "loss": 0.4612, "step": 4004 }, { "epoch": 2.66, "learning_rate": 4.810065921211936e-06, "loss": 0.4303, "step": 4005 }, { "epoch": 2.66, "learning_rate": 4.805538209533328e-06, "loss": 0.4585, "step": 4006 }, { "epoch": 2.66, "learning_rate": 4.801011955765361e-06, "loss": 0.4431, "step": 4007 }, { "epoch": 2.66, "learning_rate": 4.796487161178394e-06, "loss": 0.4408, "step": 4008 }, { "epoch": 2.66, "learning_rate": 4.791963827042392e-06, "loss": 0.4571, "step": 4009 }, { "epoch": 2.66, "learning_rate": 4.787441954626895e-06, "loss": 0.435, "step": 4010 }, { "epoch": 2.67, "learning_rate": 4.782921545201049e-06, "loss": 0.4716, "step": 4011 }, { "epoch": 2.67, "learning_rate": 4.7784026000335755e-06, "loss": 0.4543, "step": 4012 }, { "epoch": 2.67, "learning_rate": 4.773885120392788e-06, "loss": 0.4447, "step": 4013 }, { "epoch": 2.67, "learning_rate": 4.769369107546598e-06, "loss": 0.4332, "step": 4014 }, { "epoch": 2.67, "learning_rate": 4.764854562762491e-06, "loss": 0.4549, "step": 4015 }, { "epoch": 2.67, "learning_rate": 4.7603414873075545e-06, "loss": 0.4357, "step": 4016 }, { "epoch": 2.67, "learning_rate": 4.755829882448455e-06, "loss": 0.423, "step": 4017 }, { "epoch": 2.67, "learning_rate": 4.751319749451443e-06, "loss": 0.4224, "step": 4018 }, { "epoch": 2.67, "learning_rate": 4.746811089582368e-06, "loss": 0.3962, "step": 4019 }, { "epoch": 2.67, "learning_rate": 4.742303904106653e-06, "loss": 0.4106, "step": 4020 }, { "epoch": 2.67, "learning_rate": 4.73779819428932e-06, "loss": 0.4429, "step": 4021 }, { "epoch": 2.67, "learning_rate": 4.733293961394967e-06, "loss": 0.4902, "step": 4022 }, { "epoch": 2.67, "learning_rate": 4.728791206687776e-06, "loss": 0.467, "step": 4023 }, { "epoch": 2.67, "learning_rate": 4.7242899314315275e-06, "loss": 0.4164, "step": 4024 }, { "epoch": 2.67, "learning_rate": 4.719790136889569e-06, "loss": 0.4542, "step": 4025 }, { "epoch": 2.68, "learning_rate": 4.71529182432485e-06, "loss": 0.403, "step": 4026 }, { "epoch": 2.68, "learning_rate": 4.710794994999888e-06, "loss": 0.4358, "step": 4027 }, { "epoch": 2.68, "learning_rate": 4.706299650176802e-06, "loss": 0.4522, "step": 4028 }, { "epoch": 2.68, "learning_rate": 4.701805791117269e-06, "loss": 0.4088, "step": 4029 }, { "epoch": 2.68, "learning_rate": 4.697313419082573e-06, "loss": 0.451, "step": 4030 }, { "epoch": 2.68, "learning_rate": 4.692822535333575e-06, "loss": 0.4132, "step": 4031 }, { "epoch": 2.68, "learning_rate": 4.688333141130705e-06, "loss": 0.4412, "step": 4032 }, { "epoch": 2.68, "learning_rate": 4.683845237733996e-06, "loss": 0.457, "step": 4033 }, { "epoch": 2.68, "learning_rate": 4.679358826403045e-06, "loss": 0.431, "step": 4034 }, { "epoch": 2.68, "learning_rate": 4.674873908397039e-06, "loss": 0.4441, "step": 4035 }, { "epoch": 2.68, "learning_rate": 4.6703904849747376e-06, "loss": 0.4424, "step": 4036 }, { "epoch": 2.68, "learning_rate": 4.665908557394492e-06, "loss": 0.4337, "step": 4037 }, { "epoch": 2.68, "learning_rate": 4.661428126914233e-06, "loss": 0.4703, "step": 4038 }, { "epoch": 2.68, "learning_rate": 4.6569491947914555e-06, "loss": 0.4479, "step": 4039 }, { "epoch": 2.68, "learning_rate": 4.65247176228326e-06, "loss": 0.4456, "step": 4040 }, { "epoch": 2.69, "learning_rate": 4.647995830646296e-06, "loss": 0.5017, "step": 4041 }, { "epoch": 2.69, "learning_rate": 4.643521401136816e-06, "loss": 0.4032, "step": 4042 }, { "epoch": 2.69, "learning_rate": 4.639048475010636e-06, "loss": 0.4391, "step": 4043 }, { "epoch": 2.69, "learning_rate": 4.634577053523162e-06, "loss": 0.4689, "step": 4044 }, { "epoch": 2.69, "learning_rate": 4.630107137929365e-06, "loss": 0.4273, "step": 4045 }, { "epoch": 2.69, "learning_rate": 4.625638729483808e-06, "loss": 0.4457, "step": 4046 }, { "epoch": 2.69, "learning_rate": 4.621171829440617e-06, "loss": 0.4647, "step": 4047 }, { "epoch": 2.69, "learning_rate": 4.6167064390535e-06, "loss": 0.4494, "step": 4048 }, { "epoch": 2.69, "learning_rate": 4.612242559575746e-06, "loss": 0.4335, "step": 4049 }, { "epoch": 2.69, "learning_rate": 4.6077801922602105e-06, "loss": 0.4495, "step": 4050 }, { "epoch": 2.69, "learning_rate": 4.603319338359335e-06, "loss": 0.459, "step": 4051 }, { "epoch": 2.69, "learning_rate": 4.598859999125132e-06, "loss": 0.449, "step": 4052 }, { "epoch": 2.69, "learning_rate": 4.594402175809179e-06, "loss": 0.489, "step": 4053 }, { "epoch": 2.69, "learning_rate": 4.589945869662647e-06, "loss": 0.4142, "step": 4054 }, { "epoch": 2.69, "learning_rate": 4.585491081936263e-06, "loss": 0.4366, "step": 4055 }, { "epoch": 2.7, "learning_rate": 4.581037813880344e-06, "loss": 0.4595, "step": 4056 }, { "epoch": 2.7, "learning_rate": 4.5765860667447685e-06, "loss": 0.4137, "step": 4057 }, { "epoch": 2.7, "learning_rate": 4.572135841778989e-06, "loss": 0.4234, "step": 4058 }, { "epoch": 2.7, "learning_rate": 4.56768714023204e-06, "loss": 0.4251, "step": 4059 }, { "epoch": 2.7, "learning_rate": 4.563239963352517e-06, "loss": 0.441, "step": 4060 }, { "epoch": 2.7, "learning_rate": 4.558794312388598e-06, "loss": 0.4508, "step": 4061 }, { "epoch": 2.7, "learning_rate": 4.554350188588021e-06, "loss": 0.4518, "step": 4062 }, { "epoch": 2.7, "learning_rate": 4.549907593198111e-06, "loss": 0.445, "step": 4063 }, { "epoch": 2.7, "learning_rate": 4.54546652746575e-06, "loss": 0.4511, "step": 4064 }, { "epoch": 2.7, "learning_rate": 4.5410269926373905e-06, "loss": 0.3977, "step": 4065 }, { "epoch": 2.7, "learning_rate": 4.536588989959071e-06, "loss": 0.4401, "step": 4066 }, { "epoch": 2.7, "learning_rate": 4.5321525206763805e-06, "loss": 0.4592, "step": 4067 }, { "epoch": 2.7, "learning_rate": 4.527717586034494e-06, "loss": 0.4071, "step": 4068 }, { "epoch": 2.7, "learning_rate": 4.523284187278144e-06, "loss": 0.4383, "step": 4069 }, { "epoch": 2.7, "learning_rate": 4.518852325651638e-06, "loss": 0.5033, "step": 4070 }, { "epoch": 2.71, "learning_rate": 4.514422002398846e-06, "loss": 0.4573, "step": 4071 }, { "epoch": 2.71, "learning_rate": 4.5099932187632146e-06, "loss": 0.4646, "step": 4072 }, { "epoch": 2.71, "learning_rate": 4.505565975987757e-06, "loss": 0.437, "step": 4073 }, { "epoch": 2.71, "learning_rate": 4.501140275315049e-06, "loss": 0.4392, "step": 4074 }, { "epoch": 2.71, "learning_rate": 4.496716117987234e-06, "loss": 0.4408, "step": 4075 }, { "epoch": 2.71, "learning_rate": 4.492293505246024e-06, "loss": 0.4576, "step": 4076 }, { "epoch": 2.71, "learning_rate": 4.4878724383327e-06, "loss": 0.4386, "step": 4077 }, { "epoch": 2.71, "learning_rate": 4.483452918488104e-06, "loss": 0.4628, "step": 4078 }, { "epoch": 2.71, "learning_rate": 4.479034946952646e-06, "loss": 0.4258, "step": 4079 }, { "epoch": 2.71, "learning_rate": 4.474618524966313e-06, "loss": 0.439, "step": 4080 }, { "epoch": 2.71, "learning_rate": 4.47020365376863e-06, "loss": 0.4353, "step": 4081 }, { "epoch": 2.71, "learning_rate": 4.465790334598712e-06, "loss": 0.4503, "step": 4082 }, { "epoch": 2.71, "learning_rate": 4.461378568695225e-06, "loss": 0.435, "step": 4083 }, { "epoch": 2.71, "learning_rate": 4.456968357296408e-06, "loss": 0.4563, "step": 4084 }, { "epoch": 2.71, "learning_rate": 4.452559701640053e-06, "loss": 0.4357, "step": 4085 }, { "epoch": 2.72, "learning_rate": 4.448152602963528e-06, "loss": 0.4659, "step": 4086 }, { "epoch": 2.72, "learning_rate": 4.4437470625037535e-06, "loss": 0.4486, "step": 4087 }, { "epoch": 2.72, "learning_rate": 4.439343081497214e-06, "loss": 0.4612, "step": 4088 }, { "epoch": 2.72, "learning_rate": 4.434940661179965e-06, "loss": 0.4041, "step": 4089 }, { "epoch": 2.72, "learning_rate": 4.43053980278761e-06, "loss": 0.4262, "step": 4090 }, { "epoch": 2.72, "learning_rate": 4.426140507555331e-06, "loss": 0.4269, "step": 4091 }, { "epoch": 2.72, "learning_rate": 4.421742776717857e-06, "loss": 0.4871, "step": 4092 }, { "epoch": 2.72, "learning_rate": 4.41734661150948e-06, "loss": 0.4581, "step": 4093 }, { "epoch": 2.72, "learning_rate": 4.412952013164062e-06, "loss": 0.4301, "step": 4094 }, { "epoch": 2.72, "learning_rate": 4.4085589829150125e-06, "loss": 0.4401, "step": 4095 }, { "epoch": 2.72, "learning_rate": 4.404167521995315e-06, "loss": 0.4588, "step": 4096 }, { "epoch": 2.72, "learning_rate": 4.3997776316374995e-06, "loss": 0.4712, "step": 4097 }, { "epoch": 2.72, "learning_rate": 4.395389313073659e-06, "loss": 0.4159, "step": 4098 }, { "epoch": 2.72, "learning_rate": 4.3910025675354515e-06, "loss": 0.4163, "step": 4099 }, { "epoch": 2.73, "learning_rate": 4.386617396254085e-06, "loss": 0.4337, "step": 4100 }, { "epoch": 2.73, "learning_rate": 4.3822338004603336e-06, "loss": 0.4797, "step": 4101 }, { "epoch": 2.73, "learning_rate": 4.37785178138452e-06, "loss": 0.4334, "step": 4102 }, { "epoch": 2.73, "learning_rate": 4.373471340256539e-06, "loss": 0.4503, "step": 4103 }, { "epoch": 2.73, "learning_rate": 4.36909247830582e-06, "loss": 0.4302, "step": 4104 }, { "epoch": 2.73, "learning_rate": 4.364715196761368e-06, "loss": 0.4162, "step": 4105 }, { "epoch": 2.73, "learning_rate": 4.360339496851742e-06, "loss": 0.4677, "step": 4106 }, { "epoch": 2.73, "learning_rate": 4.355965379805048e-06, "loss": 0.446, "step": 4107 }, { "epoch": 2.73, "learning_rate": 4.351592846848961e-06, "loss": 0.4509, "step": 4108 }, { "epoch": 2.73, "learning_rate": 4.347221899210698e-06, "loss": 0.4808, "step": 4109 }, { "epoch": 2.73, "learning_rate": 4.342852538117039e-06, "loss": 0.4909, "step": 4110 }, { "epoch": 2.73, "learning_rate": 4.338484764794312e-06, "loss": 0.4291, "step": 4111 }, { "epoch": 2.73, "learning_rate": 4.334118580468411e-06, "loss": 0.4433, "step": 4112 }, { "epoch": 2.73, "learning_rate": 4.32975398636477e-06, "loss": 0.3994, "step": 4113 }, { "epoch": 2.73, "learning_rate": 4.325390983708388e-06, "loss": 0.4079, "step": 4114 }, { "epoch": 2.74, "learning_rate": 4.32102957372382e-06, "loss": 0.4606, "step": 4115 }, { "epoch": 2.74, "learning_rate": 4.316669757635153e-06, "loss": 0.4422, "step": 4116 }, { "epoch": 2.74, "learning_rate": 4.31231153666605e-06, "loss": 0.4542, "step": 4117 }, { "epoch": 2.74, "learning_rate": 4.30795491203971e-06, "loss": 0.4131, "step": 4118 }, { "epoch": 2.74, "learning_rate": 4.3035998849789e-06, "loss": 0.417, "step": 4119 }, { "epoch": 2.74, "learning_rate": 4.299246456705921e-06, "loss": 0.4232, "step": 4120 }, { "epoch": 2.74, "learning_rate": 4.2948946284426405e-06, "loss": 0.447, "step": 4121 }, { "epoch": 2.74, "learning_rate": 4.290544401410468e-06, "loss": 0.4466, "step": 4122 }, { "epoch": 2.74, "learning_rate": 4.286195776830362e-06, "loss": 0.4679, "step": 4123 }, { "epoch": 2.74, "learning_rate": 4.281848755922842e-06, "loss": 0.4319, "step": 4124 }, { "epoch": 2.74, "learning_rate": 4.277503339907961e-06, "loss": 0.4105, "step": 4125 }, { "epoch": 2.74, "learning_rate": 4.273159530005343e-06, "loss": 0.4393, "step": 4126 }, { "epoch": 2.74, "learning_rate": 4.268817327434141e-06, "loss": 0.419, "step": 4127 }, { "epoch": 2.74, "learning_rate": 4.2644767334130656e-06, "loss": 0.4209, "step": 4128 }, { "epoch": 2.74, "learning_rate": 4.26013774916038e-06, "loss": 0.4325, "step": 4129 }, { "epoch": 2.75, "learning_rate": 4.255800375893885e-06, "loss": 0.4335, "step": 4130 }, { "epoch": 2.75, "learning_rate": 4.2514646148309415e-06, "loss": 0.4273, "step": 4131 }, { "epoch": 2.75, "learning_rate": 4.247130467188448e-06, "loss": 0.4356, "step": 4132 }, { "epoch": 2.75, "learning_rate": 4.242797934182853e-06, "loss": 0.4615, "step": 4133 }, { "epoch": 2.75, "learning_rate": 4.238467017030156e-06, "loss": 0.4425, "step": 4134 }, { "epoch": 2.75, "learning_rate": 4.234137716945897e-06, "loss": 0.4223, "step": 4135 }, { "epoch": 2.75, "learning_rate": 4.229810035145168e-06, "loss": 0.4459, "step": 4136 }, { "epoch": 2.75, "learning_rate": 4.225483972842598e-06, "loss": 0.4132, "step": 4137 }, { "epoch": 2.75, "learning_rate": 4.221159531252375e-06, "loss": 0.423, "step": 4138 }, { "epoch": 2.75, "learning_rate": 4.21683671158822e-06, "loss": 0.4428, "step": 4139 }, { "epoch": 2.75, "learning_rate": 4.212515515063399e-06, "loss": 0.4683, "step": 4140 }, { "epoch": 2.75, "learning_rate": 4.2081959428907344e-06, "loss": 0.4144, "step": 4141 }, { "epoch": 2.75, "learning_rate": 4.203877996282577e-06, "loss": 0.4614, "step": 4142 }, { "epoch": 2.75, "learning_rate": 4.199561676450837e-06, "loss": 0.4072, "step": 4143 }, { "epoch": 2.75, "learning_rate": 4.195246984606957e-06, "loss": 0.4281, "step": 4144 }, { "epoch": 2.76, "learning_rate": 4.1909339219619225e-06, "loss": 0.3942, "step": 4145 }, { "epoch": 2.76, "learning_rate": 4.186622489726265e-06, "loss": 0.4136, "step": 4146 }, { "epoch": 2.76, "learning_rate": 4.182312689110062e-06, "loss": 0.4671, "step": 4147 }, { "epoch": 2.76, "learning_rate": 4.1780045213229316e-06, "loss": 0.4197, "step": 4148 }, { "epoch": 2.76, "learning_rate": 4.173697987574028e-06, "loss": 0.4237, "step": 4149 }, { "epoch": 2.76, "learning_rate": 4.16939308907205e-06, "loss": 0.4691, "step": 4150 }, { "epoch": 2.76, "learning_rate": 4.165089827025236e-06, "loss": 0.4331, "step": 4151 }, { "epoch": 2.76, "learning_rate": 4.160788202641373e-06, "loss": 0.4336, "step": 4152 }, { "epoch": 2.76, "learning_rate": 4.156488217127774e-06, "loss": 0.4559, "step": 4153 }, { "epoch": 2.76, "learning_rate": 4.152189871691306e-06, "loss": 0.4417, "step": 4154 }, { "epoch": 2.76, "learning_rate": 4.147893167538375e-06, "loss": 0.4707, "step": 4155 }, { "epoch": 2.76, "learning_rate": 4.143598105874908e-06, "loss": 0.4617, "step": 4156 }, { "epoch": 2.76, "learning_rate": 4.139304687906395e-06, "loss": 0.4391, "step": 4157 }, { "epoch": 2.76, "learning_rate": 4.135012914837846e-06, "loss": 0.4303, "step": 4158 }, { "epoch": 2.76, "learning_rate": 4.130722787873825e-06, "loss": 0.4488, "step": 4159 }, { "epoch": 2.77, "learning_rate": 4.126434308218421e-06, "loss": 0.4475, "step": 4160 }, { "epoch": 2.77, "learning_rate": 4.12214747707527e-06, "loss": 0.4476, "step": 4161 }, { "epoch": 2.77, "learning_rate": 4.117862295647539e-06, "loss": 0.4312, "step": 4162 }, { "epoch": 2.77, "learning_rate": 4.113578765137931e-06, "loss": 0.4398, "step": 4163 }, { "epoch": 2.77, "learning_rate": 4.109296886748695e-06, "loss": 0.4868, "step": 4164 }, { "epoch": 2.77, "learning_rate": 4.105016661681605e-06, "loss": 0.4264, "step": 4165 }, { "epoch": 2.77, "learning_rate": 4.10073809113798e-06, "loss": 0.4519, "step": 4166 }, { "epoch": 2.77, "learning_rate": 4.096461176318671e-06, "loss": 0.4329, "step": 4167 }, { "epoch": 2.77, "learning_rate": 4.092185918424057e-06, "loss": 0.4167, "step": 4168 }, { "epoch": 2.77, "learning_rate": 4.087912318654071e-06, "loss": 0.4253, "step": 4169 }, { "epoch": 2.77, "learning_rate": 4.083640378208156e-06, "loss": 0.436, "step": 4170 }, { "epoch": 2.77, "learning_rate": 4.079370098285311e-06, "loss": 0.4489, "step": 4171 }, { "epoch": 2.77, "learning_rate": 4.075101480084058e-06, "loss": 0.4391, "step": 4172 }, { "epoch": 2.77, "learning_rate": 4.07083452480245e-06, "loss": 0.4768, "step": 4173 }, { "epoch": 2.77, "learning_rate": 4.066569233638083e-06, "loss": 0.4319, "step": 4174 }, { "epoch": 2.78, "learning_rate": 4.0623056077880775e-06, "loss": 0.4375, "step": 4175 }, { "epoch": 2.78, "learning_rate": 4.0580436484490925e-06, "loss": 0.451, "step": 4176 }, { "epoch": 2.78, "learning_rate": 4.053783356817313e-06, "loss": 0.4474, "step": 4177 }, { "epoch": 2.78, "learning_rate": 4.0495247340884684e-06, "loss": 0.4197, "step": 4178 }, { "epoch": 2.78, "learning_rate": 4.045267781457797e-06, "loss": 0.4391, "step": 4179 }, { "epoch": 2.78, "learning_rate": 4.04101250012009e-06, "loss": 0.4508, "step": 4180 }, { "epoch": 2.78, "learning_rate": 4.036758891269663e-06, "loss": 0.4634, "step": 4181 }, { "epoch": 2.78, "learning_rate": 4.032506956100356e-06, "loss": 0.4542, "step": 4182 }, { "epoch": 2.78, "learning_rate": 4.0282566958055505e-06, "loss": 0.4723, "step": 4183 }, { "epoch": 2.78, "learning_rate": 4.024008111578147e-06, "loss": 0.4217, "step": 4184 }, { "epoch": 2.78, "learning_rate": 4.0197612046105815e-06, "loss": 0.4617, "step": 4185 }, { "epoch": 2.78, "learning_rate": 4.015515976094815e-06, "loss": 0.4487, "step": 4186 }, { "epoch": 2.78, "learning_rate": 4.011272427222345e-06, "loss": 0.4489, "step": 4187 }, { "epoch": 2.78, "learning_rate": 4.0070305591841885e-06, "loss": 0.4625, "step": 4188 }, { "epoch": 2.78, "learning_rate": 4.002790373170896e-06, "loss": 0.4363, "step": 4189 }, { "epoch": 2.79, "learning_rate": 3.998551870372554e-06, "loss": 0.4572, "step": 4190 }, { "epoch": 2.79, "learning_rate": 3.994315051978753e-06, "loss": 0.4542, "step": 4191 }, { "epoch": 2.79, "learning_rate": 3.990079919178636e-06, "loss": 0.4342, "step": 4192 }, { "epoch": 2.79, "learning_rate": 3.9858464731608545e-06, "loss": 0.4327, "step": 4193 }, { "epoch": 2.79, "learning_rate": 3.981614715113602e-06, "loss": 0.4045, "step": 4194 }, { "epoch": 2.79, "learning_rate": 3.977384646224584e-06, "loss": 0.4705, "step": 4195 }, { "epoch": 2.79, "learning_rate": 3.973156267681044e-06, "loss": 0.4499, "step": 4196 }, { "epoch": 2.79, "learning_rate": 3.968929580669743e-06, "loss": 0.4813, "step": 4197 }, { "epoch": 2.79, "learning_rate": 3.9647045863769685e-06, "loss": 0.4303, "step": 4198 }, { "epoch": 2.79, "learning_rate": 3.960481285988538e-06, "loss": 0.4538, "step": 4199 }, { "epoch": 2.79, "learning_rate": 3.956259680689784e-06, "loss": 0.4587, "step": 4200 }, { "epoch": 2.79, "learning_rate": 3.952039771665575e-06, "loss": 0.4618, "step": 4201 }, { "epoch": 2.79, "learning_rate": 3.9478215601002955e-06, "loss": 0.4581, "step": 4202 }, { "epoch": 2.79, "learning_rate": 3.943605047177852e-06, "loss": 0.4384, "step": 4203 }, { "epoch": 2.8, "learning_rate": 3.9393902340816835e-06, "loss": 0.4719, "step": 4204 }, { "epoch": 2.8, "learning_rate": 3.935177121994741e-06, "loss": 0.4446, "step": 4205 }, { "epoch": 2.8, "learning_rate": 3.930965712099508e-06, "loss": 0.3973, "step": 4206 }, { "epoch": 2.8, "learning_rate": 3.926756005577984e-06, "loss": 0.4523, "step": 4207 }, { "epoch": 2.8, "learning_rate": 3.922548003611688e-06, "loss": 0.4173, "step": 4208 }, { "epoch": 2.8, "learning_rate": 3.918341707381672e-06, "loss": 0.4462, "step": 4209 }, { "epoch": 2.8, "learning_rate": 3.9141371180684925e-06, "loss": 0.4445, "step": 4210 }, { "epoch": 2.8, "learning_rate": 3.909934236852246e-06, "loss": 0.4312, "step": 4211 }, { "epoch": 2.8, "learning_rate": 3.9057330649125325e-06, "loss": 0.4166, "step": 4212 }, { "epoch": 2.8, "learning_rate": 3.901533603428489e-06, "loss": 0.4373, "step": 4213 }, { "epoch": 2.8, "learning_rate": 3.897335853578748e-06, "loss": 0.4164, "step": 4214 }, { "epoch": 2.8, "learning_rate": 3.893139816541487e-06, "loss": 0.4386, "step": 4215 }, { "epoch": 2.8, "learning_rate": 3.888945493494393e-06, "loss": 0.4224, "step": 4216 }, { "epoch": 2.8, "learning_rate": 3.884752885614665e-06, "loss": 0.454, "step": 4217 }, { "epoch": 2.8, "learning_rate": 3.8805619940790355e-06, "loss": 0.4281, "step": 4218 }, { "epoch": 2.81, "learning_rate": 3.876372820063741e-06, "loss": 0.4241, "step": 4219 }, { "epoch": 2.81, "learning_rate": 3.872185364744543e-06, "loss": 0.464, "step": 4220 }, { "epoch": 2.81, "learning_rate": 3.867999629296715e-06, "loss": 0.4272, "step": 4221 }, { "epoch": 2.81, "learning_rate": 3.863815614895056e-06, "loss": 0.418, "step": 4222 }, { "epoch": 2.81, "learning_rate": 3.859633322713882e-06, "loss": 0.4392, "step": 4223 }, { "epoch": 2.81, "learning_rate": 3.855452753927018e-06, "loss": 0.4264, "step": 4224 }, { "epoch": 2.81, "learning_rate": 3.851273909707809e-06, "loss": 0.4696, "step": 4225 }, { "epoch": 2.81, "learning_rate": 3.847096791229111e-06, "loss": 0.3982, "step": 4226 }, { "epoch": 2.81, "learning_rate": 3.842921399663309e-06, "loss": 0.4344, "step": 4227 }, { "epoch": 2.81, "learning_rate": 3.838747736182289e-06, "loss": 0.4585, "step": 4228 }, { "epoch": 2.81, "learning_rate": 3.834575801957463e-06, "loss": 0.408, "step": 4229 }, { "epoch": 2.81, "learning_rate": 3.8304055981597495e-06, "loss": 0.3865, "step": 4230 }, { "epoch": 2.81, "learning_rate": 3.826237125959582e-06, "loss": 0.4493, "step": 4231 }, { "epoch": 2.81, "learning_rate": 3.822070386526916e-06, "loss": 0.4511, "step": 4232 }, { "epoch": 2.81, "learning_rate": 3.817905381031208e-06, "loss": 0.4405, "step": 4233 }, { "epoch": 2.82, "learning_rate": 3.813742110641443e-06, "loss": 0.4229, "step": 4234 }, { "epoch": 2.82, "learning_rate": 3.809580576526104e-06, "loss": 0.4621, "step": 4235 }, { "epoch": 2.82, "learning_rate": 3.8054207798531995e-06, "loss": 0.407, "step": 4236 }, { "epoch": 2.82, "learning_rate": 3.8012627217902408e-06, "loss": 0.4144, "step": 4237 }, { "epoch": 2.82, "learning_rate": 3.7971064035042515e-06, "loss": 0.4219, "step": 4238 }, { "epoch": 2.82, "learning_rate": 3.7929518261617794e-06, "loss": 0.4361, "step": 4239 }, { "epoch": 2.82, "learning_rate": 3.7887989909288648e-06, "loss": 0.451, "step": 4240 }, { "epoch": 2.82, "learning_rate": 3.7846478989710776e-06, "loss": 0.4412, "step": 4241 }, { "epoch": 2.82, "learning_rate": 3.7804985514534853e-06, "loss": 0.4464, "step": 4242 }, { "epoch": 2.82, "learning_rate": 3.776350949540666e-06, "loss": 0.457, "step": 4243 }, { "epoch": 2.82, "learning_rate": 3.7722050943967203e-06, "loss": 0.4425, "step": 4244 }, { "epoch": 2.82, "learning_rate": 3.7680609871852436e-06, "loss": 0.4208, "step": 4245 }, { "epoch": 2.82, "learning_rate": 3.763918629069352e-06, "loss": 0.4589, "step": 4246 }, { "epoch": 2.82, "learning_rate": 3.7597780212116653e-06, "loss": 0.4631, "step": 4247 }, { "epoch": 2.82, "learning_rate": 3.7556391647743074e-06, "loss": 0.478, "step": 4248 }, { "epoch": 2.83, "learning_rate": 3.7515020609189234e-06, "loss": 0.4755, "step": 4249 }, { "epoch": 2.83, "learning_rate": 3.7473667108066524e-06, "loss": 0.4718, "step": 4250 }, { "epoch": 2.83, "learning_rate": 3.743233115598156e-06, "loss": 0.4399, "step": 4251 }, { "epoch": 2.83, "learning_rate": 3.739101276453586e-06, "loss": 0.4437, "step": 4252 }, { "epoch": 2.83, "learning_rate": 3.7349711945326238e-06, "loss": 0.4421, "step": 4253 }, { "epoch": 2.83, "learning_rate": 3.730842870994428e-06, "loss": 0.4646, "step": 4254 }, { "epoch": 2.83, "learning_rate": 3.726716306997692e-06, "loss": 0.4549, "step": 4255 }, { "epoch": 2.83, "learning_rate": 3.7225915037005966e-06, "loss": 0.4529, "step": 4256 }, { "epoch": 2.83, "learning_rate": 3.718468462260838e-06, "loss": 0.4405, "step": 4257 }, { "epoch": 2.83, "learning_rate": 3.7143471838356182e-06, "loss": 0.4525, "step": 4258 }, { "epoch": 2.83, "learning_rate": 3.7102276695816397e-06, "loss": 0.4476, "step": 4259 }, { "epoch": 2.83, "learning_rate": 3.70610992065511e-06, "loss": 0.4759, "step": 4260 }, { "epoch": 2.83, "learning_rate": 3.7019939382117397e-06, "loss": 0.4314, "step": 4261 }, { "epoch": 2.83, "learning_rate": 3.6978797234067533e-06, "loss": 0.4395, "step": 4262 }, { "epoch": 2.83, "learning_rate": 3.6937672773948663e-06, "loss": 0.4323, "step": 4263 }, { "epoch": 2.84, "learning_rate": 3.689656601330307e-06, "loss": 0.4383, "step": 4264 }, { "epoch": 2.84, "learning_rate": 3.68554769636681e-06, "loss": 0.4169, "step": 4265 }, { "epoch": 2.84, "learning_rate": 3.6814405636575935e-06, "loss": 0.4528, "step": 4266 }, { "epoch": 2.84, "learning_rate": 3.677335204355401e-06, "loss": 0.4464, "step": 4267 }, { "epoch": 2.84, "learning_rate": 3.673231619612464e-06, "loss": 0.4339, "step": 4268 }, { "epoch": 2.84, "learning_rate": 3.669129810580525e-06, "loss": 0.4291, "step": 4269 }, { "epoch": 2.84, "learning_rate": 3.665029778410819e-06, "loss": 0.459, "step": 4270 }, { "epoch": 2.84, "learning_rate": 3.660931524254092e-06, "loss": 0.4409, "step": 4271 }, { "epoch": 2.84, "learning_rate": 3.656835049260584e-06, "loss": 0.4234, "step": 4272 }, { "epoch": 2.84, "learning_rate": 3.6527403545800344e-06, "loss": 0.4002, "step": 4273 }, { "epoch": 2.84, "learning_rate": 3.6486474413616913e-06, "loss": 0.4492, "step": 4274 }, { "epoch": 2.84, "learning_rate": 3.6445563107542925e-06, "loss": 0.4307, "step": 4275 }, { "epoch": 2.84, "learning_rate": 3.6404669639060875e-06, "loss": 0.417, "step": 4276 }, { "epoch": 2.84, "learning_rate": 3.6363794019648147e-06, "loss": 0.4121, "step": 4277 }, { "epoch": 2.84, "learning_rate": 3.632293626077711e-06, "loss": 0.4349, "step": 4278 }, { "epoch": 2.85, "learning_rate": 3.628209637391523e-06, "loss": 0.4208, "step": 4279 }, { "epoch": 2.85, "learning_rate": 3.624127437052484e-06, "loss": 0.4198, "step": 4280 }, { "epoch": 2.85, "learning_rate": 3.620047026206335e-06, "loss": 0.4568, "step": 4281 }, { "epoch": 2.85, "learning_rate": 3.615968405998308e-06, "loss": 0.4398, "step": 4282 }, { "epoch": 2.85, "learning_rate": 3.61189157757313e-06, "loss": 0.4163, "step": 4283 }, { "epoch": 2.85, "learning_rate": 3.6078165420750366e-06, "loss": 0.4107, "step": 4284 }, { "epoch": 2.85, "learning_rate": 3.6037433006477475e-06, "loss": 0.4602, "step": 4285 }, { "epoch": 2.85, "learning_rate": 3.59967185443449e-06, "loss": 0.4634, "step": 4286 }, { "epoch": 2.85, "learning_rate": 3.595602204577975e-06, "loss": 0.4582, "step": 4287 }, { "epoch": 2.85, "learning_rate": 3.5915343522204284e-06, "loss": 0.4493, "step": 4288 }, { "epoch": 2.85, "learning_rate": 3.5874682985035437e-06, "loss": 0.4612, "step": 4289 }, { "epoch": 2.85, "learning_rate": 3.5834040445685325e-06, "loss": 0.4329, "step": 4290 }, { "epoch": 2.85, "learning_rate": 3.579341591556099e-06, "loss": 0.4494, "step": 4291 }, { "epoch": 2.85, "learning_rate": 3.575280940606429e-06, "loss": 0.46, "step": 4292 }, { "epoch": 2.85, "learning_rate": 3.571222092859218e-06, "loss": 0.4479, "step": 4293 }, { "epoch": 2.86, "learning_rate": 3.567165049453644e-06, "loss": 0.431, "step": 4294 }, { "epoch": 2.86, "learning_rate": 3.5631098115283833e-06, "loss": 0.4712, "step": 4295 }, { "epoch": 2.86, "learning_rate": 3.5590563802216004e-06, "loss": 0.4498, "step": 4296 }, { "epoch": 2.86, "learning_rate": 3.5550047566709646e-06, "loss": 0.4396, "step": 4297 }, { "epoch": 2.86, "learning_rate": 3.5509549420136235e-06, "loss": 0.4477, "step": 4298 }, { "epoch": 2.86, "learning_rate": 3.5469069373862296e-06, "loss": 0.4312, "step": 4299 }, { "epoch": 2.86, "learning_rate": 3.5428607439249197e-06, "loss": 0.437, "step": 4300 }, { "epoch": 2.86, "learning_rate": 3.5388163627653203e-06, "loss": 0.4517, "step": 4301 }, { "epoch": 2.86, "learning_rate": 3.5347737950425587e-06, "loss": 0.44, "step": 4302 }, { "epoch": 2.86, "learning_rate": 3.5307330418912424e-06, "loss": 0.4242, "step": 4303 }, { "epoch": 2.86, "learning_rate": 3.5266941044454816e-06, "loss": 0.4391, "step": 4304 }, { "epoch": 2.86, "learning_rate": 3.5226569838388647e-06, "loss": 0.4229, "step": 4305 }, { "epoch": 2.86, "learning_rate": 3.518621681204475e-06, "loss": 0.4624, "step": 4306 }, { "epoch": 2.86, "learning_rate": 3.514588197674891e-06, "loss": 0.4474, "step": 4307 }, { "epoch": 2.87, "learning_rate": 3.510556534382169e-06, "loss": 0.4697, "step": 4308 }, { "epoch": 2.87, "learning_rate": 3.50652669245787e-06, "loss": 0.4441, "step": 4309 }, { "epoch": 2.87, "learning_rate": 3.502498673033026e-06, "loss": 0.3996, "step": 4310 }, { "epoch": 2.87, "learning_rate": 3.498472477238174e-06, "loss": 0.4416, "step": 4311 }, { "epoch": 2.87, "learning_rate": 3.4944481062033287e-06, "loss": 0.4109, "step": 4312 }, { "epoch": 2.87, "learning_rate": 3.490425561057993e-06, "loss": 0.4199, "step": 4313 }, { "epoch": 2.87, "learning_rate": 3.4864048429311647e-06, "loss": 0.4525, "step": 4314 }, { "epoch": 2.87, "learning_rate": 3.482385952951318e-06, "loss": 0.4347, "step": 4315 }, { "epoch": 2.87, "learning_rate": 3.4783688922464275e-06, "loss": 0.429, "step": 4316 }, { "epoch": 2.87, "learning_rate": 3.4743536619439446e-06, "loss": 0.4719, "step": 4317 }, { "epoch": 2.87, "learning_rate": 3.470340263170804e-06, "loss": 0.419, "step": 4318 }, { "epoch": 2.87, "learning_rate": 3.466328697053438e-06, "loss": 0.431, "step": 4319 }, { "epoch": 2.87, "learning_rate": 3.4623189647177533e-06, "loss": 0.4558, "step": 4320 }, { "epoch": 2.87, "learning_rate": 3.458311067289153e-06, "loss": 0.457, "step": 4321 }, { "epoch": 2.87, "learning_rate": 3.454305005892515e-06, "loss": 0.4181, "step": 4322 }, { "epoch": 2.88, "learning_rate": 3.450300781652208e-06, "loss": 0.4683, "step": 4323 }, { "epoch": 2.88, "learning_rate": 3.446298395692077e-06, "loss": 0.4823, "step": 4324 }, { "epoch": 2.88, "learning_rate": 3.442297849135462e-06, "loss": 0.4603, "step": 4325 }, { "epoch": 2.88, "learning_rate": 3.4382991431051847e-06, "loss": 0.4328, "step": 4326 }, { "epoch": 2.88, "learning_rate": 3.43430227872354e-06, "loss": 0.4391, "step": 4327 }, { "epoch": 2.88, "learning_rate": 3.430307257112324e-06, "loss": 0.4487, "step": 4328 }, { "epoch": 2.88, "learning_rate": 3.4263140793927917e-06, "loss": 0.454, "step": 4329 }, { "epoch": 2.88, "learning_rate": 3.4223227466857045e-06, "loss": 0.4178, "step": 4330 }, { "epoch": 2.88, "learning_rate": 3.418333260111286e-06, "loss": 0.4356, "step": 4331 }, { "epoch": 2.88, "learning_rate": 3.4143456207892555e-06, "loss": 0.4359, "step": 4332 }, { "epoch": 2.88, "learning_rate": 3.410359829838814e-06, "loss": 0.4786, "step": 4333 }, { "epoch": 2.88, "learning_rate": 3.4063758883786334e-06, "loss": 0.4715, "step": 4334 }, { "epoch": 2.88, "learning_rate": 3.4023937975268728e-06, "loss": 0.4947, "step": 4335 }, { "epoch": 2.88, "learning_rate": 3.398413558401168e-06, "loss": 0.4878, "step": 4336 }, { "epoch": 2.88, "learning_rate": 3.394435172118644e-06, "loss": 0.4288, "step": 4337 }, { "epoch": 2.89, "learning_rate": 3.390458639795895e-06, "loss": 0.4409, "step": 4338 }, { "epoch": 2.89, "learning_rate": 3.386483962549004e-06, "loss": 0.4346, "step": 4339 }, { "epoch": 2.89, "learning_rate": 3.3825111414935287e-06, "loss": 0.4387, "step": 4340 }, { "epoch": 2.89, "learning_rate": 3.3785401777445003e-06, "loss": 0.4444, "step": 4341 }, { "epoch": 2.89, "learning_rate": 3.3745710724164437e-06, "loss": 0.4477, "step": 4342 }, { "epoch": 2.89, "learning_rate": 3.3706038266233456e-06, "loss": 0.4481, "step": 4343 }, { "epoch": 2.89, "learning_rate": 3.3666384414786834e-06, "loss": 0.464, "step": 4344 }, { "epoch": 2.89, "learning_rate": 3.3626749180954033e-06, "loss": 0.467, "step": 4345 }, { "epoch": 2.89, "learning_rate": 3.3587132575859383e-06, "loss": 0.4718, "step": 4346 }, { "epoch": 2.89, "learning_rate": 3.354753461062189e-06, "loss": 0.4453, "step": 4347 }, { "epoch": 2.89, "learning_rate": 3.3507955296355364e-06, "loss": 0.4207, "step": 4348 }, { "epoch": 2.89, "learning_rate": 3.346839464416842e-06, "loss": 0.425, "step": 4349 }, { "epoch": 2.89, "learning_rate": 3.342885266516436e-06, "loss": 0.4642, "step": 4350 }, { "epoch": 2.89, "learning_rate": 3.338932937044135e-06, "loss": 0.4285, "step": 4351 }, { "epoch": 2.89, "learning_rate": 3.33498247710922e-06, "loss": 0.4545, "step": 4352 }, { "epoch": 2.9, "learning_rate": 3.3310338878204507e-06, "loss": 0.4381, "step": 4353 }, { "epoch": 2.9, "learning_rate": 3.3270871702860686e-06, "loss": 0.4506, "step": 4354 }, { "epoch": 2.9, "learning_rate": 3.3231423256137784e-06, "loss": 0.4668, "step": 4355 }, { "epoch": 2.9, "learning_rate": 3.3191993549107725e-06, "loss": 0.4344, "step": 4356 }, { "epoch": 2.9, "learning_rate": 3.3152582592837058e-06, "loss": 0.4408, "step": 4357 }, { "epoch": 2.9, "learning_rate": 3.3113190398387076e-06, "loss": 0.432, "step": 4358 }, { "epoch": 2.9, "learning_rate": 3.307381697681392e-06, "loss": 0.426, "step": 4359 }, { "epoch": 2.9, "learning_rate": 3.3034462339168317e-06, "loss": 0.439, "step": 4360 }, { "epoch": 2.9, "learning_rate": 3.2995126496495857e-06, "loss": 0.4136, "step": 4361 }, { "epoch": 2.9, "learning_rate": 3.295580945983671e-06, "loss": 0.4472, "step": 4362 }, { "epoch": 2.9, "learning_rate": 3.2916511240225958e-06, "loss": 0.4354, "step": 4363 }, { "epoch": 2.9, "learning_rate": 3.2877231848693134e-06, "loss": 0.47, "step": 4364 }, { "epoch": 2.9, "learning_rate": 3.283797129626274e-06, "loss": 0.4369, "step": 4365 }, { "epoch": 2.9, "learning_rate": 3.2798729593953903e-06, "loss": 0.4427, "step": 4366 }, { "epoch": 2.9, "learning_rate": 3.275950675278039e-06, "loss": 0.4411, "step": 4367 }, { "epoch": 2.91, "learning_rate": 3.272030278375079e-06, "loss": 0.4469, "step": 4368 }, { "epoch": 2.91, "learning_rate": 3.2681117697868325e-06, "loss": 0.4536, "step": 4369 }, { "epoch": 2.91, "learning_rate": 3.264195150613091e-06, "loss": 0.4256, "step": 4370 }, { "epoch": 2.91, "learning_rate": 3.260280421953115e-06, "loss": 0.4578, "step": 4371 }, { "epoch": 2.91, "learning_rate": 3.256367584905643e-06, "loss": 0.4293, "step": 4372 }, { "epoch": 2.91, "learning_rate": 3.2524566405688716e-06, "loss": 0.4374, "step": 4373 }, { "epoch": 2.91, "learning_rate": 3.248547590040477e-06, "loss": 0.4327, "step": 4374 }, { "epoch": 2.91, "learning_rate": 3.244640434417595e-06, "loss": 0.4799, "step": 4375 }, { "epoch": 2.91, "learning_rate": 3.24073517479683e-06, "loss": 0.4319, "step": 4376 }, { "epoch": 2.91, "learning_rate": 3.2368318122742628e-06, "loss": 0.4631, "step": 4377 }, { "epoch": 2.91, "learning_rate": 3.232930347945429e-06, "loss": 0.4231, "step": 4378 }, { "epoch": 2.91, "learning_rate": 3.2290307829053456e-06, "loss": 0.4184, "step": 4379 }, { "epoch": 2.91, "learning_rate": 3.2251331182484868e-06, "loss": 0.4542, "step": 4380 }, { "epoch": 2.91, "learning_rate": 3.2212373550687903e-06, "loss": 0.4319, "step": 4381 }, { "epoch": 2.91, "learning_rate": 3.2173434944596747e-06, "loss": 0.4312, "step": 4382 }, { "epoch": 2.92, "learning_rate": 3.213451537514007e-06, "loss": 0.4563, "step": 4383 }, { "epoch": 2.92, "learning_rate": 3.2095614853241376e-06, "loss": 0.4362, "step": 4384 }, { "epoch": 2.92, "learning_rate": 3.205673338981865e-06, "loss": 0.449, "step": 4385 }, { "epoch": 2.92, "learning_rate": 3.201787099578467e-06, "loss": 0.4091, "step": 4386 }, { "epoch": 2.92, "learning_rate": 3.197902768204678e-06, "loss": 0.4509, "step": 4387 }, { "epoch": 2.92, "learning_rate": 3.194020345950697e-06, "loss": 0.4645, "step": 4388 }, { "epoch": 2.92, "learning_rate": 3.190139833906193e-06, "loss": 0.4458, "step": 4389 }, { "epoch": 2.92, "learning_rate": 3.1862612331602906e-06, "loss": 0.4272, "step": 4390 }, { "epoch": 2.92, "learning_rate": 3.182384544801589e-06, "loss": 0.4513, "step": 4391 }, { "epoch": 2.92, "learning_rate": 3.17850976991814e-06, "loss": 0.4219, "step": 4392 }, { "epoch": 2.92, "learning_rate": 3.1746369095974594e-06, "loss": 0.4353, "step": 4393 }, { "epoch": 2.92, "learning_rate": 3.1707659649265367e-06, "loss": 0.4773, "step": 4394 }, { "epoch": 2.92, "learning_rate": 3.166896936991808e-06, "loss": 0.4513, "step": 4395 }, { "epoch": 2.92, "learning_rate": 3.163029826879186e-06, "loss": 0.4285, "step": 4396 }, { "epoch": 2.93, "learning_rate": 3.159164635674035e-06, "loss": 0.4718, "step": 4397 }, { "epoch": 2.93, "learning_rate": 3.155301364461184e-06, "loss": 0.4337, "step": 4398 }, { "epoch": 2.93, "learning_rate": 3.1514400143249203e-06, "loss": 0.4778, "step": 4399 }, { "epoch": 2.93, "learning_rate": 3.147580586348998e-06, "loss": 0.4533, "step": 4400 }, { "epoch": 2.93, "learning_rate": 3.143723081616633e-06, "loss": 0.467, "step": 4401 }, { "epoch": 2.93, "learning_rate": 3.1398675012104885e-06, "loss": 0.4248, "step": 4402 }, { "epoch": 2.93, "learning_rate": 3.1360138462127075e-06, "loss": 0.4543, "step": 4403 }, { "epoch": 2.93, "learning_rate": 3.132162117704869e-06, "loss": 0.4144, "step": 4404 }, { "epoch": 2.93, "learning_rate": 3.1283123167680306e-06, "loss": 0.4495, "step": 4405 }, { "epoch": 2.93, "learning_rate": 3.1244644444826975e-06, "loss": 0.454, "step": 4406 }, { "epoch": 2.93, "learning_rate": 3.1206185019288414e-06, "loss": 0.4439, "step": 4407 }, { "epoch": 2.93, "learning_rate": 3.116774490185891e-06, "loss": 0.4164, "step": 4408 }, { "epoch": 2.93, "learning_rate": 3.1129324103327284e-06, "loss": 0.4221, "step": 4409 }, { "epoch": 2.93, "learning_rate": 3.1090922634476963e-06, "loss": 0.4739, "step": 4410 }, { "epoch": 2.93, "learning_rate": 3.1052540506085903e-06, "loss": 0.44, "step": 4411 }, { "epoch": 2.94, "learning_rate": 3.1014177728926755e-06, "loss": 0.4639, "step": 4412 }, { "epoch": 2.94, "learning_rate": 3.0975834313766594e-06, "loss": 0.4563, "step": 4413 }, { "epoch": 2.94, "learning_rate": 3.0937510271367177e-06, "loss": 0.4436, "step": 4414 }, { "epoch": 2.94, "learning_rate": 3.089920561248476e-06, "loss": 0.4189, "step": 4415 }, { "epoch": 2.94, "learning_rate": 3.0860920347870127e-06, "loss": 0.4294, "step": 4416 }, { "epoch": 2.94, "learning_rate": 3.0822654488268733e-06, "loss": 0.4462, "step": 4417 }, { "epoch": 2.94, "learning_rate": 3.0784408044420456e-06, "loss": 0.4727, "step": 4418 }, { "epoch": 2.94, "learning_rate": 3.074618102705984e-06, "loss": 0.4233, "step": 4419 }, { "epoch": 2.94, "learning_rate": 3.0707973446915863e-06, "loss": 0.4338, "step": 4420 }, { "epoch": 2.94, "learning_rate": 3.0669785314712173e-06, "loss": 0.4456, "step": 4421 }, { "epoch": 2.94, "learning_rate": 3.063161664116686e-06, "loss": 0.4378, "step": 4422 }, { "epoch": 2.94, "learning_rate": 3.0593467436992565e-06, "loss": 0.4272, "step": 4423 }, { "epoch": 2.94, "learning_rate": 3.0555337712896527e-06, "loss": 0.466, "step": 4424 }, { "epoch": 2.94, "learning_rate": 3.0517227479580425e-06, "loss": 0.4391, "step": 4425 }, { "epoch": 2.94, "learning_rate": 3.047913674774059e-06, "loss": 0.426, "step": 4426 }, { "epoch": 2.95, "learning_rate": 3.044106552806777e-06, "loss": 0.4233, "step": 4427 }, { "epoch": 2.95, "learning_rate": 3.0403013831247243e-06, "loss": 0.4555, "step": 4428 }, { "epoch": 2.95, "learning_rate": 3.0364981667958903e-06, "loss": 0.4368, "step": 4429 }, { "epoch": 2.95, "learning_rate": 3.0326969048877032e-06, "loss": 0.4209, "step": 4430 }, { "epoch": 2.95, "learning_rate": 3.0288975984670564e-06, "loss": 0.4147, "step": 4431 }, { "epoch": 2.95, "learning_rate": 3.0251002486002843e-06, "loss": 0.4626, "step": 4432 }, { "epoch": 2.95, "learning_rate": 3.0213048563531713e-06, "loss": 0.4124, "step": 4433 }, { "epoch": 2.95, "learning_rate": 3.017511422790963e-06, "loss": 0.4729, "step": 4434 }, { "epoch": 2.95, "learning_rate": 3.013719948978342e-06, "loss": 0.4275, "step": 4435 }, { "epoch": 2.95, "learning_rate": 3.0099304359794536e-06, "loss": 0.4516, "step": 4436 }, { "epoch": 2.95, "learning_rate": 3.006142884857881e-06, "loss": 0.4363, "step": 4437 }, { "epoch": 2.95, "learning_rate": 3.002357296676672e-06, "loss": 0.4255, "step": 4438 }, { "epoch": 2.95, "learning_rate": 2.9985736724982995e-06, "loss": 0.4323, "step": 4439 }, { "epoch": 2.95, "learning_rate": 2.9947920133847108e-06, "loss": 0.4297, "step": 4440 }, { "epoch": 2.95, "learning_rate": 2.991012320397283e-06, "loss": 0.457, "step": 4441 }, { "epoch": 2.96, "learning_rate": 2.9872345945968528e-06, "loss": 0.4242, "step": 4442 }, { "epoch": 2.96, "learning_rate": 2.9834588370437035e-06, "loss": 0.4239, "step": 4443 }, { "epoch": 2.96, "learning_rate": 2.9796850487975595e-06, "loss": 0.4534, "step": 4444 }, { "epoch": 2.96, "learning_rate": 2.975913230917595e-06, "loss": 0.4305, "step": 4445 }, { "epoch": 2.96, "learning_rate": 2.9721433844624327e-06, "loss": 0.4283, "step": 4446 }, { "epoch": 2.96, "learning_rate": 2.968375510490146e-06, "loss": 0.4348, "step": 4447 }, { "epoch": 2.96, "learning_rate": 2.9646096100582423e-06, "loss": 0.4003, "step": 4448 }, { "epoch": 2.96, "learning_rate": 2.9608456842236923e-06, "loss": 0.4318, "step": 4449 }, { "epoch": 2.96, "learning_rate": 2.9570837340428994e-06, "loss": 0.473, "step": 4450 }, { "epoch": 2.96, "learning_rate": 2.9533237605717125e-06, "loss": 0.4205, "step": 4451 }, { "epoch": 2.96, "learning_rate": 2.9495657648654364e-06, "loss": 0.4145, "step": 4452 }, { "epoch": 2.96, "learning_rate": 2.9458097479788073e-06, "loss": 0.4234, "step": 4453 }, { "epoch": 2.96, "learning_rate": 2.9420557109660197e-06, "loss": 0.4268, "step": 4454 }, { "epoch": 2.96, "learning_rate": 2.938303654880702e-06, "loss": 0.4549, "step": 4455 } ], "logging_steps": 1, "max_steps": 5940, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1485, "total_flos": 7210953804349440.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }